diff --git a/.gitignore b/.gitignore index c2ed4ecb0acd28126d8703e17e86775ccbed1bb6..0c39aa20b6ba8d2afbc69670636b9223b3bf23fa 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ *.lzo *.patch *.gcno +*.ll modules.builtin Module.symvers *.dwo diff --git a/.mailmap b/.mailmap index 1d6f4e7280dc67f630b79456b3f1e5a4c0a41343..5273cfd70ad62996ba8374698fb5e22e91d33f45 100644 --- a/.mailmap +++ b/.mailmap @@ -111,6 +111,7 @@ Mauro Carvalho Chehab Mauro Carvalho Chehab Matt Ranostay Matthew Ranostay Matt Ranostay +Matt Ranostay Mayuresh Janorkar Michael Buesch Michel Dänzer @@ -145,6 +146,8 @@ Santosh Shilimkar Santosh Shilimkar Sascha Hauer S.Çağlar Onur +Sebastian Reichel +Sebastian Reichel Shiraz Hashim Shuah Khan Shuah Khan diff --git a/CREDITS b/CREDITS index c5626bf06264e41e136fbe0c0420881e72de0ce2..5d09c26d69cdc0f4709cf48f5f3ff6195d53e85d 100644 --- a/CREDITS +++ b/CREDITS @@ -1034,6 +1034,10 @@ S: 2037 Walnut #6 S: Boulder, Colorado 80302 S: USA +N: Hans-Christian Noren Egtvedt +E: egtvedt@samfundet.no +D: AVR32 architecture maintainer. + N: Heiko Eißfeldt E: heiko@colossus.escape.de heiko@unifix.de D: verify_area stuff, generic SCSI fixes @@ -3398,6 +3402,10 @@ S: Suite 101 S: Markham, Ontario L3R 2Z6 S: Canada +N: Haavard Skinnemoen +M: Haavard Skinnemoen +D: AVR32 architecture port to Linux and maintainer. + N: Rick Sladkey E: jrs@world.std.com D: utility hacker: Emacs, NFS server, mount, kmem-ps, UPS debugger, strace, GDB diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 793acf999e9eac87057af3214ca1f98ad65b922f..ed3e5e949fce303efec625259b527d503cf82f8d 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -412,6 +412,8 @@ sysctl/ - directory with info on the /proc/sys/* files. target/ - directory with info on generating TCM v4 fabric .ko modules +tee.txt + - info on the TEE subsystem and drivers this_cpu_ops.txt - List rationale behind and the way to use this_cpu operations. thermal/ diff --git a/Documentation/ABI/obsolete/sysfs-firmware-acpi b/Documentation/ABI/obsolete/sysfs-firmware-acpi new file mode 100644 index 0000000000000000000000000000000000000000..6715a71bec3d71a27d3f4f97bce1b19dde154764 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-firmware-acpi @@ -0,0 +1,8 @@ +What: /sys/firmware/acpi/hotplug/force_remove +Date: Mar 2017 +Contact: Rafael J. Wysocki +Description: + Since the force_remove is inherently broken and dangerous to + use for some hotplugable resources like memory (because ignoring + the offline failure might lead to memory corruption and crashes) + enabling this knob is not safe and thus unsupported. diff --git a/Documentation/ABI/stable/sysfs-bus-usb b/Documentation/ABI/stable/sysfs-bus-usb index 831f15d9672f29e90cca5650356d2f69599e14b8..b832eeff999914b0afa4e4a99d7379e4f852dea8 100644 --- a/Documentation/ABI/stable/sysfs-bus-usb +++ b/Documentation/ABI/stable/sysfs-bus-usb @@ -9,7 +9,7 @@ Description: hubs this facility is always enabled and their device directories will not contain this file. - For more information, see Documentation/usb/persist.txt. + For more information, see Documentation/driver-api/usb/persist.rst. What: /sys/bus/usb/devices/.../power/autosuspend Date: March 2007 diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso index 7cdfc28cc2c6d93b861d6ec3acb05bc5aca8bc70..55406ec8a35a34d9c0f67713344e769d5fc76021 100644 --- a/Documentation/ABI/stable/vdso +++ b/Documentation/ABI/stable/vdso @@ -16,7 +16,8 @@ The vDSO uses symbol versioning; whenever you request a symbol from the vDSO, specify the version you are expecting. Programs that dynamically link to glibc will use the vDSO automatically. -Otherwise, you can use the reference parser in Documentation/vDSO/parse_vdso.c. +Otherwise, you can use the reference parser in +tools/testing/selftests/vDSO/parse_vdso.c. Unless otherwise noted, the set of symbols with any given version and the ABI of those symbols is considered stable. It may vary across architectures, diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 2da04ce6aeef482645bfd9edd924ce195275ea26..dea212db9df3531f3dfc69204c8e4fcac8ec2bb5 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -213,14 +213,8 @@ What: /sys/block//queue/discard_zeroes_data Date: May 2011 Contact: Martin K. Petersen Description: - Devices that support discard functionality may return - stale or random data when a previously discarded block - is read back. This can cause problems if the filesystem - expects discarded blocks to be explicitly cleared. If a - device reports that it deterministically returns zeroes - when a discarded area is read the discard_zeroes_data - parameter will be set to one. Otherwise it will be 0 and - the result of reading a discarded area is undefined. + Will always return 0. Don't rely on any specific behavior + for discards, and don't read this file. What: /sys/block//queue/write_same_max_bytes Date: January 2012 diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 530809ccfacf336710a40cf501401ccae0bb6ae4..8c24d0892f61e36727fdeee50c5bd39313ecc04a 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -55,6 +55,7 @@ Description: then it is to be found in the base device directory. What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency_available +What: /sys/bus/iio/devices/iio:deviceX/in_proximity_sampling_frequency_available What: /sys/.../iio:deviceX/buffer/sampling_frequency_available What: /sys/bus/iio/devices/triggerX/sampling_frequency_available KernelVersion: 2.6.35 @@ -1593,7 +1594,7 @@ Description: can be processed to siemens per meter. What: /sys/bus/iio/devices/iio:deviceX/in_countY_raw -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Raw counter device counts from channel Y. For quadrature @@ -1601,10 +1602,24 @@ Description: the counts of a single quadrature signal phase from channel Y. What: /sys/bus/iio/devices/iio:deviceX/in_indexY_raw -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Raw counter device index value from channel Y. This attribute provides an absolute positional reference (e.g. a pulse once per revolution) which may be used to home positional systems as required. + +What: /sys/bus/iio/devices/iio:deviceX/in_count_count_direction_available +KernelVersion: 4.12 +Contact: linux-iio@vger.kernel.org +Description: + A list of possible counting directions which are: + - "up" : counter device is increasing. + - "down": counter device is decreasing. + +What: /sys/bus/iio/devices/iio:deviceX/in_countY_count_direction +KernelVersion: 4.12 +Contact: linux-iio@vger.kernel.org +Description: + Raw counter device counters direction for channel Y. diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611 b/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611 new file mode 100644 index 0000000000000000000000000000000000000000..6d2d2b094941d34f8dba2ce28473dcf5a22fcbad --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-max9611 @@ -0,0 +1,17 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_power_shunt_resistor +Date: March 2017 +KernelVersion: 4.12 +Contact: linux-iio@vger.kernel.org +Description: The value of the shunt resistor used to compute power drain on + common input voltage pin (RS+). In Ohms. + +What: /sys/bus/iio/devices/iio:deviceX/in_current_shunt_resistor +Date: March 2017 +KernelVersion: 4.12 +Contact: linux-iio@vger.kernel.org +Description: The value of the shunt resistor used to compute current flowing + between RS+ and RS- voltage sense inputs. In Ohms. + +These attributes describe a single physical component, exposed as two distinct +attributes as it is used to calculate two different values: power load and +current flowing between RS+ and RS- inputs. diff --git a/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8 b/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8 index ba676520b9530e7c6d955ef0cdd6e68a2fd93823..7fac2c268d9a9050e06d7248bcc33dbdf30978bb 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8 +++ b/Documentation/ABI/testing/sysfs-bus-iio-counter-104-quad-8 @@ -1,24 +1,16 @@ -What: /sys/bus/iio/devices/iio:deviceX/in_count_count_direction_available What: /sys/bus/iio/devices/iio:deviceX/in_count_count_mode_available What: /sys/bus/iio/devices/iio:deviceX/in_count_noise_error_available What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available What: /sys/bus/iio/devices/iio:deviceX/in_index_index_polarity_available What: /sys/bus/iio/devices/iio:deviceX/in_index_synchronous_mode_available -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Discrete set of available values for the respective counter configuration are listed in this file. -What: /sys/bus/iio/devices/iio:deviceX/in_countY_count_direction -KernelVersion: 4.9 -Contact: linux-iio@vger.kernel.org -Description: - Read-only attribute that indicates whether the counter for - channel Y is counting up or down. - What: /sys/bus/iio/devices/iio:deviceX/in_countY_count_mode -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Count mode for channel Y. Four count modes are available: @@ -52,7 +44,7 @@ Description: continuously throughout. What: /sys/bus/iio/devices/iio:deviceX/in_countY_noise_error -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Read-only attribute that indicates whether excessive noise is @@ -60,14 +52,14 @@ Description: irrelevant in non-quadrature clock mode. What: /sys/bus/iio/devices/iio:deviceX/in_countY_preset -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: If the counter device supports preset registers, the preset count for channel Y is provided by this attribute. What: /sys/bus/iio/devices/iio:deviceX/in_countY_quadrature_mode -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Configure channel Y counter for non-quadrature or quadrature @@ -88,7 +80,7 @@ Description: decoded for UP/DN clock. What: /sys/bus/iio/devices/iio:deviceX/in_countY_set_to_preset_on_index -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Whether to set channel Y counter with channel Y preset value @@ -96,14 +88,14 @@ Description: Valid attribute values are boolean. What: /sys/bus/iio/devices/iio:deviceX/in_indexY_index_polarity -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Active level of channel Y index input; irrelevant in non-synchronous load mode. What: /sys/bus/iio/devices/iio:deviceX/in_indexY_synchronous_mode -KernelVersion: 4.9 +KernelVersion: 4.10 Contact: linux-iio@vger.kernel.org Description: Configure channel Y counter for non-synchronous or synchronous diff --git a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 index 6534a60037ffa6129c7a1f90d7dca62f68b23c54..230020e06677d73a2b8b24c031962e3fbda19c45 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 +++ b/Documentation/ABI/testing/sysfs-bus-iio-timer-stm32 @@ -3,11 +3,15 @@ KernelVersion: 4.11 Contact: benjamin.gaignard@st.com Description: Reading returns the list possible master modes which are: - - "reset" : The UG bit from the TIMx_EGR register is used as trigger output (TRGO). - - "enable" : The Counter Enable signal CNT_EN is used as trigger output. + - "reset" : The UG bit from the TIMx_EGR register is + used as trigger output (TRGO). + - "enable" : The Counter Enable signal CNT_EN is used + as trigger output. - "update" : The update event is selected as trigger output. - For instance a master timer can then be used as a prescaler for a slave timer. - - "compare_pulse" : The trigger output send a positive pulse when the CC1IF flag is to be set. + For instance a master timer can then be used + as a prescaler for a slave timer. + - "compare_pulse" : The trigger output send a positive pulse + when the CC1IF flag is to be set. - "OC1REF" : OC1REF signal is used as trigger output. - "OC2REF" : OC2REF signal is used as trigger output. - "OC3REF" : OC3REF signal is used as trigger output. @@ -27,3 +31,62 @@ Description: Reading returns the current sampling frequency. Writing an value different of 0 set and start sampling. Writing 0 stop sampling. + +What: /sys/bus/iio/devices/iio:deviceX/in_count0_preset +KernelVersion: 4.12 +Contact: benjamin.gaignard@st.com +Description: + Reading returns the current preset value. + Writing sets the preset value. + When counting up the counter starts from 0 and fires an + event when reach preset value. + When counting down the counter start from preset value + and fire event when reach 0. + +What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available +KernelVersion: 4.12 +Contact: benjamin.gaignard@st.com +Description: + Reading returns the list possible quadrature modes. + +What: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode +KernelVersion: 4.12 +Contact: benjamin.gaignard@st.com +Description: + Configure the device counter quadrature modes: + channel_A: + Encoder A input servers as the count input and B as + the UP/DOWN direction control input. + + channel_B: + Encoder B input serves as the count input and A as + the UP/DOWN direction control input. + + quadrature: + Encoder A and B inputs are mixed to get direction + and count with a scale of 0.25. + +What: /sys/bus/iio/devices/iio:deviceX/in_count_enable_mode_available +KernelVersion: 4.12 +Contact: benjamin.gaignard@st.com +Description: + Reading returns the list possible enable modes. + +What: /sys/bus/iio/devices/iio:deviceX/in_count0_enable_mode +KernelVersion: 4.12 +Contact: benjamin.gaignard@st.com +Description: + Configure the device counter enable modes, in all case + counting direction is set by in_count0_count_direction + attribute and the counter is clocked by the internal clock. + always: + Counter is always ON. + + gated: + Counting is enabled when connected trigger signal + level is high else counting is disabled. + + triggered: + Counting is enabled on rising edge of the connected + trigger, and remains enabled for the duration of this + selected mode. diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index e4e90104d7c38ff89bae718df7485337ee3e9108..44d4b2be92fd4a56ab2420944f76669a2466a304 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -301,3 +301,25 @@ Contact: Emil Velikov Description: This file contains the revision field of the PCI device. The value comes from device config space. The file is read only. + +What: /sys/bus/pci/devices/.../sriov_drivers_autoprobe +Date: April 2017 +Contact: Bodong Wang +Description: + This file is associated with the PF of a device that + supports SR-IOV. It determines whether newly-enabled VFs + are immediately bound to a driver. It initially contains + 1, which means the kernel automatically binds VFs to a + compatible driver immediately after they are enabled. If + an application writes 0 to the file before enabling VFs, + the kernel will not bind VFs to a driver. + + A typical use case is to write 0 to this file, then enable + VFs, then assign the newly-created VFs to virtual machines. + Note that changing this file does not affect already- + enabled VFs. In this scenario, the user must first disable + the VFs, write 0 to sriov_drivers_autoprobe, then re-enable + the VFs. + + This is similar to /sys/bus/pci/drivers_autoprobe, but + affects only the VFs associated with a specific PF. diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi index fa5a00bb114372bfbc67008e8d0ebe8d1566ed6b..7122d6264c49d6c02c2c0074e145f19b93fc63dd 100644 --- a/Documentation/ABI/testing/sysfs-class-net-qmi +++ b/Documentation/ABI/testing/sysfs-class-net-qmi @@ -21,3 +21,30 @@ Description: is responsible for coordination of driver and firmware link framing mode, changing this setting to 'Y' if the firmware is configured for 'raw-ip' mode. + +What: /sys/class/net//qmi/add_mux +Date: March 2017 +KernelVersion: 4.11 +Contact: Bjørn Mork +Description: + Unsigned integer. + + Write a number ranging from 1 to 127 to add a qmap mux + based network device, supported by recent Qualcomm based + modems. + + The network device will be called qmimux. + + Userspace is in charge of managing the qmux network device + activation and data stream setup on the modem side by + using the proper QMI protocol requests. + +What: /sys/class/net//qmi/del_mux +Date: March 2017 +KernelVersion: 4.11 +Contact: Bjørn Mork +Description: + Unsigned integer. + + Write a number ranging from 1 to 127 to delete a previously + created qmap mux based network device. diff --git a/Documentation/ABI/testing/sysfs-class-switchtec b/Documentation/ABI/testing/sysfs-class-switchtec new file mode 100644 index 0000000000000000000000000000000000000000..48cb4c15e430995c61f5af818368c0feeac8317b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-switchtec @@ -0,0 +1,96 @@ +switchtec - Microsemi Switchtec PCI Switch Management Endpoint + +For details on this subsystem look at Documentation/switchtec.txt. + +What: /sys/class/switchtec +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: The switchtec class subsystem folder. + Each registered switchtec driver is represented by a switchtecX + subfolder (X being an integer >= 0). + + +What: /sys/class/switchtec/switchtec[0-9]+/component_id +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Component identifier as stored in the hardware (eg. PM8543) + (read only) +Values: arbitrary string. + + +What: /sys/class/switchtec/switchtec[0-9]+/component_revision +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Component revision stored in the hardware (read only) +Values: integer. + + +What: /sys/class/switchtec/switchtec[0-9]+/component_vendor +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Component vendor as stored in the hardware (eg. MICROSEM) + (read only) +Values: arbitrary string. + + +What: /sys/class/switchtec/switchtec[0-9]+/device_version +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Device version as stored in the hardware (read only) +Values: integer. + + +What: /sys/class/switchtec/switchtec[0-9]+/fw_version +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Currently running firmware version (read only) +Values: integer (in hexadecimal). + + +What: /sys/class/switchtec/switchtec[0-9]+/partition +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Partition number for this device in the switch (read only) +Values: integer. + + +What: /sys/class/switchtec/switchtec[0-9]+/partition_count +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Total number of partitions in the switch (read only) +Values: integer. + + +What: /sys/class/switchtec/switchtec[0-9]+/product_id +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Product identifier as stored in the hardware (eg. PSX 48XG3) + (read only) +Values: arbitrary string. + + +What: /sys/class/switchtec/switchtec[0-9]+/product_revision +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Product revision stored in the hardware (eg. RevB) + (read only) +Values: arbitrary string. + + +What: /sys/class/switchtec/switchtec[0-9]+/product_vendor +Date: 05-Jan-2017 +KernelVersion: v4.11 +Contact: Logan Gunthorpe +Description: Product vendor as stored in the hardware (eg. MICROSEM) + (read only) +Values: arbitrary string. diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec new file mode 100644 index 0000000000000000000000000000000000000000..d4a3d23eb09c94adbf51b60dc26b55646e4fceeb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-typec @@ -0,0 +1,276 @@ +USB Type-C port devices (eg. /sys/class/typec/port0/) + +What: /sys/class/typec//data_role +Date: April 2017 +Contact: Heikki Krogerus +Description: + The supported USB data roles. This attribute can be used for + requesting data role swapping on the port. Swapping is supported + as synchronous operation, so write(2) to the attribute will not + return until the operation has finished. The attribute is + notified about role changes so that poll(2) on the attribute + wakes up. Change on the role will also generate uevent + KOBJ_CHANGE on the port. The current role is show in brackets, + for example "[host] device" when DRP port is in host mode. + + Valid values: host, device + +What: /sys/class/typec//power_role +Date: April 2017 +Contact: Heikki Krogerus +Description: + The supported power roles. This attribute can be used to request + power role swap on the port when the port supports USB Power + Delivery. Swapping is supported as synchronous operation, so + write(2) to the attribute will not return until the operation + has finished. The attribute is notified about role changes so + that poll(2) on the attribute wakes up. Change on the role will + also generate uevent KOBJ_CHANGE. The current role is show in + brackets, for example "[source] sink" when in source mode. + + Valid values: source, sink + +What: /sys/class/typec//vconn_source +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows is the port VCONN Source. This attribute can be used to + request VCONN swap to change the VCONN Source during connection + when both the port and the partner support USB Power Delivery. + Swapping is supported as synchronous operation, so write(2) to + the attribute will not return until the operation has finished. + The attribute is notified about VCONN source changes so that + poll(2) on the attribute wakes up. Change on VCONN source also + generates uevent KOBJ_CHANGE. + + Valid values: + - "no" when the port is not the VCONN Source + - "yes" when the port is the VCONN Source + +What: /sys/class/typec//power_operation_mode +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows the current power operational mode the port is in. The + power operation mode means current level for VBUS. In case USB + Power Delivery communication is used for negotiating the levels, + power operation mode should show "usb_power_delivery". + + Valid values: + - default + - 1.5A + - 3.0A + - usb_power_delivery + +What: /sys/class/typec//preferred_role +Date: April 2017 +Contact: Heikki Krogerus +Description: + The user space can notify the driver about the preferred role. + It should be handled as enabling of Try.SRC or Try.SNK, as + defined in USB Type-C specification, in the port drivers. By + default the preferred role should come from the platform. + + Valid values: source, sink, none (to remove preference) + +What: /sys/class/typec//supported_accessory_modes +Date: April 2017 +Contact: Heikki Krogerus +Description: + Space separated list of accessory modes, defined in the USB + Type-C specification, the port supports. + +What: /sys/class/typec//usb_power_delivery_revision +Date: April 2017 +Contact: Heikki Krogerus +Description: + Revision number of the supported USB Power Delivery + specification, or 0 when USB Power Delivery is not supported. + +What: /sys/class/typec//usb_typec_revision +Date: April 2017 +Contact: Heikki Krogerus +Description: + Revision number of the supported USB Type-C specification. + + +USB Type-C partner devices (eg. /sys/class/typec/port0-partner/) + +What: /sys/class/typec/-partner/accessory_mode +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows the Accessory Mode name when the partner is an Accessory. + The Accessory Modes are defined in USB Type-C Specification. + +What: /sys/class/typec/-partner/supports_usb_power_delivery +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows if the partner supports USB Power Delivery communication: + Valid values: yes, no + +What: /sys/class/typec/-partner>/identity/ +Date: April 2017 +Contact: Heikki Krogerus +Description: + This directory appears only if the port device driver is capable + of showing the result of Discover Identity USB power delivery + command. That will not always be possible even when USB power + delivery is supported, for example when USB power delivery + communication for the port is mostly handled in firmware. If the + directory exists, it will have an attribute file for every VDO + in Discover Identity command result. + +What: /sys/class/typec/-partner/identity/id_header +Date: April 2017 +Contact: Heikki Krogerus +Description: + ID Header VDO part of Discover Identity command result. The + value will show 0 until Discover Identity command result becomes + available. The value can be polled. + +What: /sys/class/typec/-partner/identity/cert_stat +Date: April 2017 +Contact: Heikki Krogerus +Description: + Cert Stat VDO part of Discover Identity command result. The + value will show 0 until Discover Identity command result becomes + available. The value can be polled. + +What: /sys/class/typec/-partner/identity/product +Date: April 2017 +Contact: Heikki Krogerus +Description: + Product VDO part of Discover Identity command result. The value + will show 0 until Discover Identity command result becomes + available. The value can be polled. + + +USB Type-C cable devices (eg. /sys/class/typec/port0-cable/) + +Note: Electronically Marked Cables will have a device also for one cable plug +(eg. /sys/class/typec/port0-plug0). If the cable is active and has also SOP +Double Prime controller (USB Power Deliver specification ch. 2.4) it will have +second device also for the other plug. Both plugs may have alternate modes as +described in USB Type-C and USB Power Delivery specifications. + +What: /sys/class/typec/-cable/type +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows if the cable is active. + Valid values: active, passive + +What: /sys/class/typec/-cable/plug_type +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows type of the plug on the cable: + - type-a - Standard A + - type-b - Standard B + - type-c + - captive + +What: /sys/class/typec/-cable/identity/ +Date: April 2017 +Contact: Heikki Krogerus +Description: + This directory appears only if the port device driver is capable + of showing the result of Discover Identity USB power delivery + command. That will not always be possible even when USB power + delivery is supported. If the directory exists, it will have an + attribute for every VDO returned by Discover Identity command. + +What: /sys/class/typec/-cable/identity/id_header +Date: April 2017 +Contact: Heikki Krogerus +Description: + ID Header VDO part of Discover Identity command result. The + value will show 0 until Discover Identity command result becomes + available. The value can be polled. + +What: /sys/class/typec/-cable/identity/cert_stat +Date: April 2017 +Contact: Heikki Krogerus +Description: + Cert Stat VDO part of Discover Identity command result. The + value will show 0 until Discover Identity command result becomes + available. The value can be polled. + +What: /sys/class/typec/-cable/identity/product +Date: April 2017 +Contact: Heikki Krogerus +Description: + Product VDO part of Discover Identity command result. The value + will show 0 until Discover Identity command result becomes + available. The value can be polled. + + +Alternate Mode devices. + +The alternate modes will have Standard or Vendor ID (SVID) assigned by USB-IF. +The ports, partners and cable plugs can have alternate modes. A supported SVID +will consist of a set of modes. Every SVID a port/partner/plug supports will +have a device created for it, and every supported mode for a supported SVID will +have its own directory under that device. Below refers to the device for +the alternate mode. + +What: /sys/class/typec///svid +Date: April 2017 +Contact: Heikki Krogerus +Description: + The SVID (Standard or Vendor ID) assigned by USB-IF for this + alternate mode. + +What: /sys/class/typec///mode/ +Date: April 2017 +Contact: Heikki Krogerus +Description: + Every supported mode will have its own directory. The name of + a mode will be "mode" (for example mode1), where + is the actual index to the mode VDO returned by Discover Modes + USB power delivery command. + +What: /sys/class/typec///mode/description +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows description of the mode. The description is optional for + the drivers, just like with the Billboard Devices. + +What: /sys/class/typec///mode/vdo +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows the VDO in hexadecimal returned by Discover Modes command + for this mode. + +What: /sys/class/typec///mode/active +Date: April 2017 +Contact: Heikki Krogerus +Description: + Shows if the mode is active or not. The attribute can be used + for entering/exiting the mode with partners and cable plugs, and + with the port alternate modes it can be used for disabling + support for specific alternate modes. Entering/exiting modes is + supported as synchronous operation so write(2) to the attribute + does not return until the enter/exit mode operation has + finished. The attribute is notified when the mode is + entered/exited so poll(2) on the attribute wakes up. + Entering/exiting a mode will also generate uevent KOBJ_CHANGE. + + Valid values: yes, no + +What: /sys/class/typec///mode/supported_roles +Date: April 2017 +Contact: Heikki Krogerus +Description: + Space separated list of the supported roles. + + This attribute is available for the devices describing the + alternate modes a port supports, and it will not be exposed with + the devices presenting the alternate modes the partners or cable + plugs support. + + Valid values: source, sink diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 2a4a423d08e0d3ed8bce7cc0c9bcd36bb30bb19d..f3d5817c4ef0fae54150bc3e772e7cc535bde805 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -366,3 +366,10 @@ Contact: Linux ARM Kernel Mailing list Description: AArch64 CPU registers 'identification' directory exposes the CPU ID registers for identifying model and revision of the CPU. + +What: /sys/devices/system/cpu/cpu#/cpu_capacity +Date: December 2016 +Contact: Linux kernel mailing list +Description: information about CPUs heterogeneity. + + cpu_capacity: capacity of cpu#. diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index c7fc72d4495ca0cbd2203e40cfba522bdb2fcfa8..613f42a9d5cdcd7b6a9f754b59387065daa1576f 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -44,16 +44,6 @@ Description: or 0 (unset). Attempts to write any other values to it will cause -EINVAL to be returned. -What: /sys/firmware/acpi/hotplug/force_remove -Date: May 2013 -Contact: Rafael J. Wysocki -Description: - The number in this file (0 or 1) determines whether (1) or not - (0) the ACPI subsystem will allow devices to be hot-removed even - if they cannot be put offline gracefully (from the kernel's - viewpoint). That number can be changed by writing a boolean - value to this file. - What: /sys/firmware/acpi/interrupts/ Date: February 2008 Contact: Len Brown diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch index da87f43aec584de74945714247c6849bebf0cfa7..d5d39748382f4dbdd9f4ad14163a9af29235fb56 100644 --- a/Documentation/ABI/testing/sysfs-kernel-livepatch +++ b/Documentation/ABI/testing/sysfs-kernel-livepatch @@ -25,6 +25,14 @@ Description: code is currently applied. Writing 0 will disable the patch while writing 1 will re-enable the patch. +What: /sys/kernel/livepatch//transition +Date: Feb 2017 +KernelVersion: 4.12.0 +Contact: live-patching@vger.kernel.org +Description: + An attribute which indicates whether the patch is currently in + transition. + What: /sys/kernel/livepatch// Date: Nov 2014 KernelVersion: 3.19.0 diff --git a/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 b/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 new file mode 100644 index 0000000000000000000000000000000000000000..b0f4684a83fe7ab6d72a9ff780565933833c64a0 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-platform-chipidea-usb2 @@ -0,0 +1,9 @@ +What: /sys/bus/platform/devices/ci_hdrc.0/role +Date: Mar 2017 +Contact: Peter Chen +Description: + It returns string "gadget" or "host" when read it, it indicates + current controller role. + + It will do role switch when write "gadget" or "host" to it. + Only controller at dual-role configuration supports writing. diff --git a/Documentation/ABI/testing/sysfs-platform-renesas_usb3 b/Documentation/ABI/testing/sysfs-platform-renesas_usb3 new file mode 100644 index 0000000000000000000000000000000000000000..5621c15d5dc0c30756f77971e44919211834a458 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-platform-renesas_usb3 @@ -0,0 +1,15 @@ +What: /sys/devices/platform//role +Date: March 2017 +KernelVersion: 4.13 +Contact: Yoshihiro Shimoda +Description: + This file can be read and write. + The file can show/change the drd mode of usb. + + Write the following string to change the mode: + "host" - switching mode from peripheral to host. + "peripheral" - switching mode from host to peripheral. + + Read the file, then it shows the following strings: + "host" - The mode is host now. + "peripheral" - The mode is peripheral now. diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 164c1c76971f0962cf3a44bf9bc3afd4876597bb..85916f13d330b78f675ba198636b4c19d0b825f0 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -8,12 +8,11 @@ DOCBOOKS := z8530book.xml \ kernel-hacking.xml kernel-locking.xml \ - writing_usb_driver.xml networking.xml \ - kernel-api.xml filesystems.xml lsm.xml kgdb.xml \ - gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ - genericirq.xml s390-drivers.xml scsi.xml \ - sh.xml w1.xml \ - writing_musb_glue_layer.xml + networking.xml \ + filesystems.xml lsm.xml kgdb.xml \ + libata.xml mtdnand.xml librs.xml rapidio.xml \ + s390-drivers.xml scsi.xml \ + sh.xml w1.xml ifeq ($(DOCBOOKS),) @@ -62,11 +61,14 @@ MAN := $(patsubst %.xml, %.9, $(BOOKS)) mandocs: $(MAN) find $(obj)/man -name '*.9' | xargs gzip -nf +# Default location for installed man pages +export INSTALL_MAN_PATH = $(objtree)/usr + installmandocs: mandocs - mkdir -p /usr/local/man/man9/ + mkdir -p $(INSTALL_MAN_PATH)/man/man9/ find $(obj)/man -name '*.9.gz' -printf '%h %f\n' | \ sort -k 2 -k 1 | uniq -f 1 | sed -e 's: :/:' | \ - xargs install -m 644 -t /usr/local/man/man9/ + xargs install -m 644 -t $(INSTALL_MAN_PATH)/man/man9/ # no-op for the DocBook toolchain epubdocs: @@ -238,7 +240,9 @@ dochelp: @echo ' psdocs - Postscript' @echo ' xmldocs - XML DocBook' @echo ' mandocs - man pages' - @echo ' installmandocs - install man pages generated by mandocs' + @echo ' installmandocs - install man pages generated by mandocs to INSTALL_MAN_PATH'; \ + echo ' (default: $(INSTALL_MAN_PATH))'; \ + echo '' @echo ' cleandocs - clean all generated DocBook files' @echo @echo ' make DOCBOOKS="s1.xml s2.xml" [target] Generate only docs s1.xml s2.xml' diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl deleted file mode 100644 index 64162922117651344d698ffd53ece06eafca3324..0000000000000000000000000000000000000000 --- a/Documentation/DocBook/gadget.tmpl +++ /dev/null @@ -1,793 +0,0 @@ - - - - - - USB Gadget API for Linux - 20 August 2004 - 20 August 2004 - - - - This documentation is free software; you can redistribute - it and/or modify it under the terms of the GNU General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later - version. - - - - This program is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307 USA - - - - For more details see the file COPYING in the source - distribution of Linux. - - - - 2003-2004 - David Brownell - - - - David - Brownell - -
dbrownell@users.sourceforge.net
-
-
-
- - - -Introduction - -This document presents a Linux-USB "Gadget" -kernel mode -API, for use within peripherals and other USB devices -that embed Linux. -It provides an overview of the API structure, -and shows how that fits into a system development project. -This is the first such API released on Linux to address -a number of important problems, including: - - - Supports USB 2.0, for high speed devices which - can stream data at several dozen megabytes per second. - - Handles devices with dozens of endpoints just as - well as ones with just two fixed-function ones. Gadget drivers - can be written so they're easy to port to new hardware. - - Flexible enough to expose more complex USB device - capabilities such as multiple configurations, multiple interfaces, - composite devices, - and alternate interface settings. - - USB "On-The-Go" (OTG) support, in conjunction - with updates to the Linux-USB host side. - - Sharing data structures and API models with the - Linux-USB host side API. This helps the OTG support, and - looks forward to more-symmetric frameworks (where the same - I/O model is used by both host and device side drivers). - - Minimalist, so it's easier to support new device - controller hardware. I/O processing doesn't imply large - demands for memory or CPU resources. - - - - -Most Linux developers will not be able to use this API, since they -have USB "host" hardware in a PC, workstation, or server. -Linux users with embedded systems are more likely to -have USB peripheral hardware. -To distinguish drivers running inside such hardware from the -more familiar Linux "USB device drivers", -which are host side proxies for the real USB devices, -a different term is used: -the drivers inside the peripherals are "USB gadget drivers". -In USB protocol interactions, the device driver is the master -(or "client driver") -and the gadget driver is the slave (or "function driver"). - - -The gadget API resembles the host side Linux-USB API in that both -use queues of request objects to package I/O buffers, and those requests -may be submitted or canceled. -They share common definitions for the standard USB -Chapter 9 messages, structures, and constants. -Also, both APIs bind and unbind drivers to devices. -The APIs differ in detail, since the host side's current -URB framework exposes a number of implementation details -and assumptions that are inappropriate for a gadget API. -While the model for control transfers and configuration -management is necessarily different (one side is a hardware-neutral master, -the other is a hardware-aware slave), the endpoint I/0 API used here -should also be usable for an overhead-reduced host side API. - - - - -Structure of Gadget Drivers - -A system running inside a USB peripheral -normally has at least three layers inside the kernel to handle -USB protocol processing, and may have additional layers in -user space code. -The "gadget" API is used by the middle layer to interact -with the lowest level (which directly handles hardware). - - -In Linux, from the bottom up, these layers are: - - - - - - USB Controller Driver - - - This is the lowest software level. - It is the only layer that talks to hardware, - through registers, fifos, dma, irqs, and the like. - The <linux/usb/gadget.h> API abstracts - the peripheral controller endpoint hardware. - That hardware is exposed through endpoint objects, which accept - streams of IN/OUT buffers, and through callbacks that interact - with gadget drivers. - Since normal USB devices only have one upstream - port, they only have one of these drivers. - The controller driver can support any number of different - gadget drivers, but only one of them can be used at a time. - - - Examples of such controller hardware include - the PCI-based NetChip 2280 USB 2.0 high speed controller, - the SA-11x0 or PXA-25x UDC (found within many PDAs), - and a variety of other products. - - - - - - Gadget Driver - - - The lower boundary of this driver implements hardware-neutral - USB functions, using calls to the controller driver. - Because such hardware varies widely in capabilities and restrictions, - and is used in embedded environments where space is at a premium, - the gadget driver is often configured at compile time - to work with endpoints supported by one particular controller. - Gadget drivers may be portable to several different controllers, - using conditional compilation. - (Recent kernels substantially simplify the work involved in - supporting new hardware, by autoconfiguring - endpoints automatically for many bulk-oriented drivers.) - Gadget driver responsibilities include: - - - handling setup requests (ep0 protocol responses) - possibly including class-specific functionality - - returning configuration and string descriptors - - (re)setting configurations and interface - altsettings, including enabling and configuring endpoints - - handling life cycle events, such as managing - bindings to hardware, - USB suspend/resume, remote wakeup, - and disconnection from the USB host. - - managing IN and OUT transfers on all currently - enabled endpoints - - - - - Such drivers may be modules of proprietary code, although - that approach is discouraged in the Linux community. - - - - - Upper Level - - - Most gadget drivers have an upper boundary that connects - to some Linux driver or framework in Linux. - Through that boundary flows the data which the gadget driver - produces and/or consumes through protocol transfers over USB. - Examples include: - - - user mode code, using generic (gadgetfs) - or application specific files in - /dev - - networking subsystem (for network gadgets, - like the CDC Ethernet Model gadget driver) - - data capture drivers, perhaps video4Linux or - a scanner driver; or test and measurement hardware. - - input subsystem (for HID gadgets) - - sound subsystem (for audio gadgets) - - file system (for PTP gadgets) - - block i/o subsystem (for usb-storage gadgets) - - ... and more - - - - - Additional Layers - - - Other layers may exist. - These could include kernel layers, such as network protocol stacks, - as well as user mode applications building on standard POSIX - system call APIs such as - open(), close(), - read() and write(). - On newer systems, POSIX Async I/O calls may be an option. - Such user mode code will not necessarily be subject to - the GNU General Public License (GPL). - - - - - - -OTG-capable systems will also need to include a standard Linux-USB -host side stack, -with usbcore, -one or more Host Controller Drivers (HCDs), -USB Device Drivers to support -the OTG "Targeted Peripheral List", -and so forth. -There will also be an OTG Controller Driver, -which is visible to gadget and device driver developers only indirectly. -That helps the host and device side USB controllers implement the -two new OTG protocols (HNP and SRP). -Roles switch (host to peripheral, or vice versa) using HNP -during USB suspend processing, and SRP can be viewed as a -more battery-friendly kind of device wakeup protocol. - - -Over time, reusable utilities are evolving to help make some -gadget driver tasks simpler. -For example, building configuration descriptors from vectors of -descriptors for the configurations interfaces and endpoints is -now automated, and many drivers now use autoconfiguration to -choose hardware endpoints and initialize their descriptors. - -A potential example of particular interest -is code implementing standard USB-IF protocols for -HID, networking, storage, or audio classes. -Some developers are interested in KDB or KGDB hooks, to let -target hardware be remotely debugged. -Most such USB protocol code doesn't need to be hardware-specific, -any more than network protocols like X11, HTTP, or NFS are. -Such gadget-side interface drivers should eventually be combined, -to implement composite devices. - - - - - -Kernel Mode Gadget API - -Gadget drivers declare themselves through a -struct usb_gadget_driver, which is responsible for -most parts of enumeration for a struct usb_gadget. -The response to a set_configuration usually involves -enabling one or more of the struct usb_ep objects -exposed by the gadget, and submitting one or more -struct usb_request buffers to transfer data. -Understand those four data types, and their operations, and -you will understand how this API works. - - -Incomplete Data Type Descriptions - -This documentation was prepared using the standard Linux -kernel docproc tool, which turns text -and in-code comments into SGML DocBook and then into usable -formats such as HTML or PDF. -Other than the "Chapter 9" data types, most of the significant -data types and functions are described here. - - -However, docproc does not understand all the C constructs -that are used, so some relevant information is likely omitted from -what you are reading. -One example of such information is endpoint autoconfiguration. -You'll have to read the header file, and use example source -code (such as that for "Gadget Zero"), to fully understand the API. - - -The part of the API implementing some basic -driver capabilities is specific to the version of the -Linux kernel that's in use. -The 2.6 kernel includes a driver model -framework that has no analogue on earlier kernels; -so those parts of the gadget API are not fully portable. -(They are implemented on 2.4 kernels, but in a different way.) -The driver model state is another part of this API that is -ignored by the kerneldoc tools. - - - -The core API does not expose -every possible hardware feature, only the most widely available ones. -There are significant hardware features, such as device-to-device DMA -(without temporary storage in a memory buffer) -that would be added using hardware-specific APIs. - - -This API allows drivers to use conditional compilation to handle -endpoint capabilities of different hardware, but doesn't require that. -Hardware tends to have arbitrary restrictions, relating to -transfer types, addressing, packet sizes, buffering, and availability. -As a rule, such differences only matter for "endpoint zero" logic -that handles device configuration and management. -The API supports limited run-time -detection of capabilities, through naming conventions for endpoints. -Many drivers will be able to at least partially autoconfigure -themselves. -In particular, driver init sections will often have endpoint -autoconfiguration logic that scans the hardware's list of endpoints -to find ones matching the driver requirements -(relying on those conventions), to eliminate some of the most -common reasons for conditional compilation. - - -Like the Linux-USB host side API, this API exposes -the "chunky" nature of USB messages: I/O requests are in terms -of one or more "packets", and packet boundaries are visible to drivers. -Compared to RS-232 serial protocols, USB resembles -synchronous protocols like HDLC -(N bytes per frame, multipoint addressing, host as the primary -station and devices as secondary stations) -more than asynchronous ones -(tty style: 8 data bits per frame, no parity, one stop bit). -So for example the controller drivers won't buffer -two single byte writes into a single two-byte USB IN packet, -although gadget drivers may do so when they implement -protocols where packet boundaries (and "short packets") -are not significant. - - -Driver Life Cycle - -Gadget drivers make endpoint I/O requests to hardware without -needing to know many details of the hardware, but driver -setup/configuration code needs to handle some differences. -Use the API like this: - - - - -Register a driver for the particular device side -usb controller hardware, -such as the net2280 on PCI (USB 2.0), -sa11x0 or pxa25x as found in Linux PDAs, -and so on. -At this point the device is logically in the USB ch9 initial state -("attached"), drawing no power and not usable -(since it does not yet support enumeration). -Any host should not see the device, since it's not -activated the data line pullup used by the host to -detect a device, even if VBUS power is available. - - -Register a gadget driver that implements some higher level -device function. That will then bind() to a usb_gadget, which -activates the data line pullup sometime after detecting VBUS. - - -The hardware driver can now start enumerating. -The steps it handles are to accept USB power and set_address requests. -Other steps are handled by the gadget driver. -If the gadget driver module is unloaded before the host starts to -enumerate, steps before step 7 are skipped. - - -The gadget driver's setup() call returns usb descriptors, -based both on what the bus interface hardware provides and on the -functionality being implemented. -That can involve alternate settings or configurations, -unless the hardware prevents such operation. -For OTG devices, each configuration descriptor includes -an OTG descriptor. - - -The gadget driver handles the last step of enumeration, -when the USB host issues a set_configuration call. -It enables all endpoints used in that configuration, -with all interfaces in their default settings. -That involves using a list of the hardware's endpoints, enabling each -endpoint according to its descriptor. -It may also involve using usb_gadget_vbus_draw -to let more power be drawn from VBUS, as allowed by that configuration. -For OTG devices, setting a configuration may also involve reporting -HNP capabilities through a user interface. - - -Do real work and perform data transfers, possibly involving -changes to interface settings or switching to new configurations, until the -device is disconnect()ed from the host. -Queue any number of transfer requests to each endpoint. -It may be suspended and resumed several times before being disconnected. -On disconnect, the drivers go back to step 3 (above). - - -When the gadget driver module is being unloaded, -the driver unbind() callback is issued. That lets the controller -driver be unloaded. - - - - -Drivers will normally be arranged so that just loading the -gadget driver module (or statically linking it into a Linux kernel) -allows the peripheral device to be enumerated, but some drivers -will defer enumeration until some higher level component (like -a user mode daemon) enables it. -Note that at this lowest level there are no policies about how -ep0 configuration logic is implemented, -except that it should obey USB specifications. -Such issues are in the domain of gadget drivers, -including knowing about implementation constraints -imposed by some USB controllers -or understanding that composite devices might happen to -be built by integrating reusable components. - - -Note that the lifecycle above can be slightly different -for OTG devices. -Other than providing an additional OTG descriptor in each -configuration, only the HNP-related differences are particularly -visible to driver code. -They involve reporting requirements during the SET_CONFIGURATION -request, and the option to invoke HNP during some suspend callbacks. -Also, SRP changes the semantics of -usb_gadget_wakeup -slightly. - - - - -USB 2.0 Chapter 9 Types and Constants - -Gadget drivers -rely on common USB structures and constants -defined in the -<linux/usb/ch9.h> -header file, which is standard in Linux 2.6 kernels. -These are the same types and constants used by host -side drivers (and usbcore). - - -!Iinclude/linux/usb/ch9.h - - -Core Objects and Methods - -These are declared in -<linux/usb/gadget.h>, -and are used by gadget drivers to interact with -USB peripheral controller drivers. - - - - -!Iinclude/linux/usb/gadget.h - - -Optional Utilities - -The core API is sufficient for writing a USB Gadget Driver, -but some optional utilities are provided to simplify common tasks. -These utilities include endpoint autoconfiguration. - - -!Edrivers/usb/gadget/usbstring.c -!Edrivers/usb/gadget/config.c - - - -Composite Device Framework - -The core API is sufficient for writing drivers for composite -USB devices (with more than one function in a given configuration), -and also multi-configuration devices (also more than one function, -but not necessarily sharing a given configuration). -There is however an optional framework which makes it easier to -reuse and combine functions. - - -Devices using this framework provide a struct -usb_composite_driver, which in turn provides one or -more struct usb_configuration instances. -Each such configuration includes at least one -struct usb_function, which packages a user -visible role such as "network link" or "mass storage device". -Management functions may also exist, such as "Device Firmware -Upgrade". - - -!Iinclude/linux/usb/composite.h -!Edrivers/usb/gadget/composite.c - - - -Composite Device Functions - -At this writing, a few of the current gadget drivers have -been converted to this framework. -Near-term plans include converting all of them, except for "gadgetfs". - - -!Edrivers/usb/gadget/function/f_acm.c -!Edrivers/usb/gadget/function/f_ecm.c -!Edrivers/usb/gadget/function/f_subset.c -!Edrivers/usb/gadget/function/f_obex.c -!Edrivers/usb/gadget/function/f_serial.c - - - - - - -Peripheral Controller Drivers - -The first hardware supporting this API was the NetChip 2280 -controller, which supports USB 2.0 high speed and is based on PCI. -This is the net2280 driver module. -The driver supports Linux kernel versions 2.4 and 2.6; -contact NetChip Technologies for development boards and product -information. - - -Other hardware working in the "gadget" framework includes: -Intel's PXA 25x and IXP42x series processors -(pxa2xx_udc), -Toshiba TC86c001 "Goku-S" (goku_udc), -Renesas SH7705/7727 (sh_udc), -MediaQ 11xx (mq11xx_udc), -Hynix HMS30C7202 (h7202_udc), -National 9303/4 (n9604_udc), -Texas Instruments OMAP (omap_udc), -Sharp LH7A40x (lh7a40x_udc), -and more. -Most of those are full speed controllers. - - -At this writing, there are people at work on drivers in -this framework for several other USB device controllers, -with plans to make many of them be widely available. - - - - -A partial USB simulator, -the dummy_hcd driver, is available. -It can act like a net2280, a pxa25x, or an sa11x0 in terms -of available endpoints and device speeds; and it simulates -control, bulk, and to some extent interrupt transfers. -That lets you develop some parts of a gadget driver on a normal PC, -without any special hardware, and perhaps with the assistance -of tools such as GDB running with User Mode Linux. -At least one person has expressed interest in adapting that -approach, hooking it up to a simulator for a microcontroller. -Such simulators can help debug subsystems where the runtime hardware -is unfriendly to software development, or is not yet available. - - -Support for other controllers is expected to be developed -and contributed -over time, as this driver framework evolves. - - - - -Gadget Drivers - -In addition to Gadget Zero -(used primarily for testing and development with drivers -for usb controller hardware), other gadget drivers exist. - - -There's an ethernet gadget -driver, which implements one of the most useful -Communications Device Class (CDC) models. -One of the standards for cable modem interoperability even -specifies the use of this ethernet model as one of two -mandatory options. -Gadgets using this code look to a USB host as if they're -an Ethernet adapter. -It provides access to a network where the gadget's CPU is one host, -which could easily be bridging, routing, or firewalling -access to other networks. -Since some hardware can't fully implement the CDC Ethernet -requirements, this driver also implements a "good parts only" -subset of CDC Ethernet. -(That subset doesn't advertise itself as CDC Ethernet, -to avoid creating problems.) - - -Support for Microsoft's RNDIS -protocol has been contributed by Pengutronix and Auerswald GmbH. -This is like CDC Ethernet, but it runs on more slightly USB hardware -(but less than the CDC subset). -However, its main claim to fame is being able to connect directly to -recent versions of Windows, using drivers that Microsoft bundles -and supports, making it much simpler to network with Windows. - - -There is also support for user mode gadget drivers, -using gadgetfs. -This provides a User Mode API that presents -each endpoint as a single file descriptor. I/O is done using -normal read() and read() calls. -Familiar tools like GDB and pthreads can be used to -develop and debug user mode drivers, so that once a robust -controller driver is available many applications for it -won't require new kernel mode software. -Linux 2.6 Async I/O (AIO) -support is available, so that user mode software -can stream data with only slightly more overhead -than a kernel driver. - - -There's a USB Mass Storage class driver, which provides -a different solution for interoperability with systems such -as MS-Windows and MacOS. -That Mass Storage driver uses a -file or block device as backing store for a drive, -like the loop driver. -The USB host uses the BBB, CB, or CBI versions of the mass -storage class specification, using transparent SCSI commands -to access the data from the backing store. - - -There's a "serial line" driver, useful for TTY style -operation over USB. -The latest version of that driver supports CDC ACM style -operation, like a USB modem, and so on most hardware it can -interoperate easily with MS-Windows. -One interesting use of that driver is in boot firmware (like a BIOS), -which can sometimes use that model with very small systems without -real serial lines. - - -Support for other kinds of gadget is expected to -be developed and contributed -over time, as this driver framework evolves. - - - - -USB On-The-GO (OTG) - -USB OTG support on Linux 2.6 was initially developed -by Texas Instruments for -OMAP 16xx and 17xx -series processors. -Other OTG systems should work in similar ways, but the -hardware level details could be very different. - - -Systems need specialized hardware support to implement OTG, -notably including a special Mini-AB jack -and associated transceiver to support Dual-Role -operation: -they can act either as a host, using the standard -Linux-USB host side driver stack, -or as a peripheral, using this "gadget" framework. -To do that, the system software relies on small additions -to those programming interfaces, -and on a new internal component (here called an "OTG Controller") -affecting which driver stack connects to the OTG port. -In each role, the system can re-use the existing pool of -hardware-neutral drivers, layered on top of the controller -driver interfaces (usb_bus or -usb_gadget). -Such drivers need at most minor changes, and most of the calls -added to support OTG can also benefit non-OTG products. - - - - Gadget drivers test the is_otg - flag, and use it to determine whether or not to include - an OTG descriptor in each of their configurations. - - Gadget drivers may need changes to support the - two new OTG protocols, exposed in new gadget attributes - such as b_hnp_enable flag. - HNP support should be reported through a user interface - (two LEDs could suffice), and is triggered in some cases - when the host suspends the peripheral. - SRP support can be user-initiated just like remote wakeup, - probably by pressing the same button. - - On the host side, USB device drivers need - to be taught to trigger HNP at appropriate moments, using - usb_suspend_device(). - That also conserves battery power, which is useful even - for non-OTG configurations. - - Also on the host side, a driver must support the - OTG "Targeted Peripheral List". That's just a whitelist, - used to reject peripherals not supported with a given - Linux OTG host. - This whitelist is product-specific; - each product must modify otg_whitelist.h - to match its interoperability specification. - - - Non-OTG Linux hosts, like PCs and workstations, - normally have some solution for adding drivers, so that - peripherals that aren't recognized can eventually be supported. - That approach is unreasonable for consumer products that may - never have their firmware upgraded, and where it's usually - unrealistic to expect traditional PC/workstation/server kinds - of support model to work. - For example, it's often impractical to change device firmware - once the product has been distributed, so driver bugs can't - normally be fixed if they're found after shipment. - - - - -Additional changes are needed below those hardware-neutral -usb_bus and usb_gadget -driver interfaces; those aren't discussed here in any detail. -Those affect the hardware-specific code for each USB Host or Peripheral -controller, and how the HCD initializes (since OTG can be active only -on a single port). -They also involve what may be called an OTG Controller -Driver, managing the OTG transceiver and the OTG state -machine logic as well as much of the root hub behavior for the -OTG port. -The OTG controller driver needs to activate and deactivate USB -controllers depending on the relevant device role. -Some related changes were needed inside usbcore, so that it -can identify OTG-capable devices and respond appropriately -to HNP or SRP protocols. - - - - -
- diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl deleted file mode 100644 index 59fb5c077541b15f6f319a17cbf259195c9849f0..0000000000000000000000000000000000000000 --- a/Documentation/DocBook/genericirq.tmpl +++ /dev/null @@ -1,520 +0,0 @@ - - - - - - Linux generic IRQ handling - - - - Thomas - Gleixner - -
- tglx@linutronix.de -
-
-
- - Ingo - Molnar - -
- mingo@elte.hu -
-
-
-
- - - 2005-2010 - Thomas Gleixner - - - 2005-2006 - Ingo Molnar - - - - - This documentation is free software; you can redistribute - it and/or modify it under the terms of the GNU General Public - License version 2 as published by the Free Software Foundation. - - - - This program is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307 USA - - - - For more details see the file COPYING in the source - distribution of Linux. - - -
- - - - - Introduction - - The generic interrupt handling layer is designed to provide a - complete abstraction of interrupt handling for device drivers. - It is able to handle all the different types of interrupt controller - hardware. Device drivers use generic API functions to request, enable, - disable and free interrupts. The drivers do not have to know anything - about interrupt hardware details, so they can be used on different - platforms without code changes. - - - This documentation is provided to developers who want to implement - an interrupt subsystem based for their architecture, with the help - of the generic IRQ handling layer. - - - - - Rationale - - The original implementation of interrupt handling in Linux uses - the __do_IRQ() super-handler, which is able to deal with every - type of interrupt logic. - - - Originally, Russell King identified different types of handlers to - build a quite universal set for the ARM interrupt handler - implementation in Linux 2.5/2.6. He distinguished between: - - Level type - Edge type - Simple type - - During the implementation we identified another type: - - Fast EOI type - - In the SMP world of the __do_IRQ() super-handler another type - was identified: - - Per CPU type - - - - This split implementation of high-level IRQ handlers allows us to - optimize the flow of the interrupt handling for each specific - interrupt type. This reduces complexity in that particular code path - and allows the optimized handling of a given type. - - - The original general IRQ implementation used hw_interrupt_type - structures and their ->ack(), ->end() [etc.] callbacks to - differentiate the flow control in the super-handler. This leads to - a mix of flow logic and low-level hardware logic, and it also leads - to unnecessary code duplication: for example in i386, there is an - ioapic_level_irq and an ioapic_edge_irq IRQ-type which share many - of the low-level details but have different flow handling. - - - A more natural abstraction is the clean separation of the - 'irq flow' and the 'chip details'. - - - Analysing a couple of architecture's IRQ subsystem implementations - reveals that most of them can use a generic set of 'irq flow' - methods and only need to add the chip-level specific code. - The separation is also valuable for (sub)architectures - which need specific quirks in the IRQ flow itself but not in the - chip details - and thus provides a more transparent IRQ subsystem - design. - - - Each interrupt descriptor is assigned its own high-level flow - handler, which is normally one of the generic - implementations. (This high-level flow handler implementation also - makes it simple to provide demultiplexing handlers which can be - found in embedded platforms on various architectures.) - - - The separation makes the generic interrupt handling layer more - flexible and extensible. For example, an (sub)architecture can - use a generic IRQ-flow implementation for 'level type' interrupts - and add a (sub)architecture specific 'edge type' implementation. - - - To make the transition to the new model easier and prevent the - breakage of existing implementations, the __do_IRQ() super-handler - is still available. This leads to a kind of duality for the time - being. Over time the new model should be used in more and more - architectures, as it enables smaller and cleaner IRQ subsystems. - It's deprecated for three years now and about to be removed. - - - - Known Bugs And Assumptions - - None (knock on wood). - - - - - Abstraction layers - - There are three main levels of abstraction in the interrupt code: - - High-level driver API - High-level IRQ flow handlers - Chip-level hardware encapsulation - - - - Interrupt control flow - - Each interrupt is described by an interrupt descriptor structure - irq_desc. The interrupt is referenced by an 'unsigned int' numeric - value which selects the corresponding interrupt description structure - in the descriptor structures array. - The descriptor structure contains status information and pointers - to the interrupt flow method and the interrupt chip structure - which are assigned to this interrupt. - - - Whenever an interrupt triggers, the low-level architecture code calls - into the generic interrupt code by calling desc->handle_irq(). - This high-level IRQ handling function only uses desc->irq_data.chip - primitives referenced by the assigned chip descriptor structure. - - - - High-level Driver API - - The high-level Driver API consists of following functions: - - request_irq() - free_irq() - disable_irq() - enable_irq() - disable_irq_nosync() (SMP only) - synchronize_irq() (SMP only) - irq_set_irq_type() - irq_set_irq_wake() - irq_set_handler_data() - irq_set_chip() - irq_set_chip_data() - - See the autogenerated function documentation for details. - - - - High-level IRQ flow handlers - - The generic layer provides a set of pre-defined irq-flow methods: - - handle_level_irq - handle_edge_irq - handle_fasteoi_irq - handle_simple_irq - handle_percpu_irq - handle_edge_eoi_irq - handle_bad_irq - - The interrupt flow handlers (either pre-defined or architecture - specific) are assigned to specific interrupts by the architecture - either during bootup or during device initialization. - - - Default flow implementations - - Helper functions - - The helper functions call the chip primitives and - are used by the default flow implementations. - The following helper functions are implemented (simplified excerpt): - -default_enable(struct irq_data *data) -{ - desc->irq_data.chip->irq_unmask(data); -} - -default_disable(struct irq_data *data) -{ - if (!delay_disable(data)) - desc->irq_data.chip->irq_mask(data); -} - -default_ack(struct irq_data *data) -{ - chip->irq_ack(data); -} - -default_mask_ack(struct irq_data *data) -{ - if (chip->irq_mask_ack) { - chip->irq_mask_ack(data); - } else { - chip->irq_mask(data); - chip->irq_ack(data); - } -} - -noop(struct irq_data *data)) -{ -} - - - - - - - Default flow handler implementations - - Default Level IRQ flow handler - - handle_level_irq provides a generic implementation - for level-triggered interrupts. - - - The following control flow is implemented (simplified excerpt): - -desc->irq_data.chip->irq_mask_ack(); -handle_irq_event(desc->action); -desc->irq_data.chip->irq_unmask(); - - - - - Default Fast EOI IRQ flow handler - - handle_fasteoi_irq provides a generic implementation - for interrupts, which only need an EOI at the end of - the handler. - - - The following control flow is implemented (simplified excerpt): - -handle_irq_event(desc->action); -desc->irq_data.chip->irq_eoi(); - - - - - Default Edge IRQ flow handler - - handle_edge_irq provides a generic implementation - for edge-triggered interrupts. - - - The following control flow is implemented (simplified excerpt): - -if (desc->status & running) { - desc->irq_data.chip->irq_mask_ack(); - desc->status |= pending | masked; - return; -} -desc->irq_data.chip->irq_ack(); -desc->status |= running; -do { - if (desc->status & masked) - desc->irq_data.chip->irq_unmask(); - desc->status &= ~pending; - handle_irq_event(desc->action); -} while (status & pending); -desc->status &= ~running; - - - - - Default simple IRQ flow handler - - handle_simple_irq provides a generic implementation - for simple interrupts. - - - Note: The simple flow handler does not call any - handler/chip primitives. - - - The following control flow is implemented (simplified excerpt): - -handle_irq_event(desc->action); - - - - - Default per CPU flow handler - - handle_percpu_irq provides a generic implementation - for per CPU interrupts. - - - Per CPU interrupts are only available on SMP and - the handler provides a simplified version without - locking. - - - The following control flow is implemented (simplified excerpt): - -if (desc->irq_data.chip->irq_ack) - desc->irq_data.chip->irq_ack(); -handle_irq_event(desc->action); -if (desc->irq_data.chip->irq_eoi) - desc->irq_data.chip->irq_eoi(); - - - - - EOI Edge IRQ flow handler - - handle_edge_eoi_irq provides an abnomination of the edge - handler which is solely used to tame a badly wreckaged - irq controller on powerpc/cell. - - - - Bad IRQ flow handler - - handle_bad_irq is used for spurious interrupts which - have no real handler assigned.. - - - - - Quirks and optimizations - - The generic functions are intended for 'clean' architectures and chips, - which have no platform-specific IRQ handling quirks. If an architecture - needs to implement quirks on the 'flow' level then it can do so by - overriding the high-level irq-flow handler. - - - - Delayed interrupt disable - - This per interrupt selectable feature, which was introduced by Russell - King in the ARM interrupt implementation, does not mask an interrupt - at the hardware level when disable_irq() is called. The interrupt is - kept enabled and is masked in the flow handler when an interrupt event - happens. This prevents losing edge interrupts on hardware which does - not store an edge interrupt event while the interrupt is disabled at - the hardware level. When an interrupt arrives while the IRQ_DISABLED - flag is set, then the interrupt is masked at the hardware level and - the IRQ_PENDING bit is set. When the interrupt is re-enabled by - enable_irq() the pending bit is checked and if it is set, the - interrupt is resent either via hardware or by a software resend - mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when - you want to use the delayed interrupt disable feature and your - hardware is not capable of retriggering an interrupt.) - The delayed interrupt disable is not configurable. - - - - - Chip-level hardware encapsulation - - The chip-level hardware descriptor structure irq_chip - contains all the direct chip relevant functions, which - can be utilized by the irq flow implementations. - - irq_ack() - irq_mask_ack() - Optional, recommended for performance - irq_mask() - irq_unmask() - irq_eoi() - Optional, required for EOI flow handlers - irq_retrigger() - Optional - irq_set_type() - Optional - irq_set_wake() - Optional - - These primitives are strictly intended to mean what they say: ack means - ACK, masking means masking of an IRQ line, etc. It is up to the flow - handler(s) to use these basic units of low-level functionality. - - - - - - __do_IRQ entry point - - The original implementation __do_IRQ() was an alternative entry - point for all types of interrupts. It no longer exists. - - - This handler turned out to be not suitable for all - interrupt hardware and was therefore reimplemented with split - functionality for edge/level/simple/percpu interrupts. This is not - only a functional optimization. It also shortens code paths for - interrupts. - - - - - Locking on SMP - - The locking of chip registers is up to the architecture that - defines the chip primitives. The per-irq structure is - protected via desc->lock, by the generic layer. - - - - - Generic interrupt chip - - To avoid copies of identical implementations of IRQ chips the - core provides a configurable generic interrupt chip - implementation. Developers should check carefully whether the - generic chip fits their needs before implementing the same - functionality slightly differently themselves. - -!Ekernel/irq/generic-chip.c - - - - Structures - - This chapter contains the autogenerated documentation of the structures which are - used in the generic IRQ layer. - -!Iinclude/linux/irq.h -!Iinclude/linux/interrupt.h - - - - Public Functions Provided - - This chapter contains the autogenerated documentation of the kernel API functions - which are exported. - -!Ekernel/irq/manage.c -!Ekernel/irq/chip.c - - - - Internal Functions Provided - - This chapter contains the autogenerated documentation of the internal functions. - -!Ikernel/irq/irqdesc.c -!Ikernel/irq/handle.c -!Ikernel/irq/chip.c - - - - Credits - - The following people have contributed to this document: - - Thomas Gleixnertglx@linutronix.de - Ingo Molnarmingo@elte.hu - - - -
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl deleted file mode 100644 index ecfd0ea406619b0a2c76071b2024c7219b9c7a6a..0000000000000000000000000000000000000000 --- a/Documentation/DocBook/kernel-api.tmpl +++ /dev/null @@ -1,331 +0,0 @@ - - - - - - The Linux Kernel API - - - - This documentation is free software; you can redistribute - it and/or modify it under the terms of the GNU General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later - version. - - - - This program is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307 USA - - - - For more details see the file COPYING in the source - distribution of Linux. - - - - - - - - Data Types - Doubly Linked Lists -!Iinclude/linux/list.h - - - - - Basic C Library Functions - - - When writing drivers, you cannot in general use routines which are - from the C Library. Some of the functions have been found generally - useful and they are listed below. The behaviour of these functions - may vary slightly from those defined by ANSI, and these deviations - are noted in the text. - - - String Conversions -!Elib/vsprintf.c -!Finclude/linux/kernel.h kstrtol -!Finclude/linux/kernel.h kstrtoul -!Elib/kstrtox.c - - String Manipulation - -!Elib/string.c - - Bit Operations -!Iarch/x86/include/asm/bitops.h - - - - - Basic Kernel Library Functions - - - The Linux kernel provides more basic utility functions. - - - Bitmap Operations -!Elib/bitmap.c -!Ilib/bitmap.c - - - Command-line Parsing -!Elib/cmdline.c - - - CRC Functions -!Elib/crc7.c -!Elib/crc16.c -!Elib/crc-itu-t.c -!Elib/crc32.c -!Elib/crc-ccitt.c - - - idr/ida Functions -!Pinclude/linux/idr.h idr sync -!Plib/idr.c IDA description -!Elib/idr.c - - - - - Memory Management in Linux - The Slab Cache -!Iinclude/linux/slab.h -!Emm/slab.c -!Emm/util.c - - User Space Memory Access -!Iarch/x86/include/asm/uaccess_32.h -!Earch/x86/lib/usercopy_32.c - - More Memory Management Functions -!Emm/readahead.c -!Emm/filemap.c -!Emm/memory.c -!Emm/vmalloc.c -!Imm/page_alloc.c -!Emm/mempool.c -!Emm/dmapool.c -!Emm/page-writeback.c -!Emm/truncate.c - - - - - - Kernel IPC facilities - - IPC utilities -!Iipc/util.c - - - - - FIFO Buffer - kfifo interface -!Iinclude/linux/kfifo.h - - - - - relay interface support - - - Relay interface support - is designed to provide an efficient mechanism for tools and - facilities to relay large amounts of data from kernel space to - user space. - - - relay interface -!Ekernel/relay.c -!Ikernel/relay.c - - - - - Module Support - Module Loading -!Ekernel/kmod.c - - Inter Module support - - Refer to the file kernel/module.c for more information. - - - - - - - Hardware Interfaces - Interrupt Handling -!Ekernel/irq/manage.c - - - DMA Channels -!Ekernel/dma.c - - - Resources Management -!Ikernel/resource.c -!Ekernel/resource.c - - - MTRR Handling -!Earch/x86/kernel/cpu/mtrr/main.c - - - PCI Support Library -!Edrivers/pci/pci.c -!Edrivers/pci/pci-driver.c -!Edrivers/pci/remove.c -!Edrivers/pci/search.c -!Edrivers/pci/msi.c -!Edrivers/pci/bus.c -!Edrivers/pci/access.c -!Edrivers/pci/irq.c -!Edrivers/pci/htirq.c - -!Edrivers/pci/probe.c -!Edrivers/pci/slot.c -!Edrivers/pci/rom.c -!Edrivers/pci/iov.c -!Idrivers/pci/pci-sysfs.c - - PCI Hotplug Support Library -!Edrivers/pci/hotplug/pci_hotplug_core.c - - - - - Firmware Interfaces - DMI Interfaces -!Edrivers/firmware/dmi_scan.c - - EDD Interfaces -!Idrivers/firmware/edd.c - - - - - Security Framework -!Isecurity/security.c -!Esecurity/inode.c - - - - Audit Interfaces -!Ekernel/audit.c -!Ikernel/auditsc.c -!Ikernel/auditfilter.c - - - - Accounting Framework -!Ikernel/acct.c - - - - Block Devices -!Eblock/blk-core.c -!Iblock/blk-core.c -!Eblock/blk-map.c -!Iblock/blk-sysfs.c -!Eblock/blk-settings.c -!Eblock/blk-exec.c -!Eblock/blk-flush.c -!Eblock/blk-lib.c -!Eblock/blk-tag.c -!Iblock/blk-tag.c -!Eblock/blk-integrity.c -!Ikernel/trace/blktrace.c -!Iblock/genhd.c -!Eblock/genhd.c - - - - Char devices -!Efs/char_dev.c - - - - Miscellaneous Devices -!Edrivers/char/misc.c - - - - Clock Framework - - - The clock framework defines programming interfaces to support - software management of the system clock tree. - This framework is widely used with System-On-Chip (SOC) platforms - to support power management and various devices which may need - custom clock rates. - Note that these "clocks" don't relate to timekeeping or real - time clocks (RTCs), each of which have separate frameworks. - These struct clk instances may be used - to manage for example a 96 MHz signal that is used to shift bits - into and out of peripherals or busses, or otherwise trigger - synchronous state machine transitions in system hardware. - - - - Power management is supported by explicit software clock gating: - unused clocks are disabled, so the system doesn't waste power - changing the state of transistors that aren't in active use. - On some systems this may be backed by hardware clock gating, - where clocks are gated without being disabled in software. - Sections of chips that are powered but not clocked may be able - to retain their last state. - This low power state is often called a retention - mode. - This mode still incurs leakage currents, especially with finer - circuit geometries, but for CMOS circuits power is mostly used - by clocked state changes. - - - - Power-aware drivers only enable their clocks when the device - they manage is in active use. Also, system sleep states often - differ according to which clock domains are active: while a - "standby" state may allow wakeup from several active domains, a - "mem" (suspend-to-RAM) state may require a more wholesale shutdown - of clocks derived from higher speed PLLs and oscillators, limiting - the number of possible wakeup event sources. A driver's suspend - method may need to be aware of system-specific clock constraints - on the target sleep state. - - - - Some platforms support programmable clock generators. These - can be used by external chips of various kinds, such as other - CPUs, multimedia codecs, and devices with strict requirements - for interface clocking. - - -!Iinclude/linux/clk.h - - - diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl index 50479360d84590640c0dd6deba02935a295239fb..ac3cca3399a1e4d4ee205bdbfa3435e8b02f74a7 100644 --- a/Documentation/DocBook/rapidio.tmpl +++ b/Documentation/DocBook/rapidio.tmpl @@ -128,9 +128,6 @@ Device model support !Idrivers/rapidio/rio-driver.c - - Sysfs support -!Idrivers/rapidio/rio-sysfs.c PPC32 support !Iarch/powerpc/sysdev/fsl_rio.c diff --git a/Documentation/DocBook/writing_musb_glue_layer.tmpl b/Documentation/DocBook/writing_musb_glue_layer.tmpl deleted file mode 100644 index 837eca77f274236464225ba280d32a4898640f54..0000000000000000000000000000000000000000 --- a/Documentation/DocBook/writing_musb_glue_layer.tmpl +++ /dev/null @@ -1,873 +0,0 @@ - - - - - - Writing an MUSB Glue Layer - - - - Apelete - Seketeli - -
- apelete at seketeli.net -
-
-
-
- - - 2014 - Apelete Seketeli - - - - - This documentation is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - - - This documentation is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - - You should have received a copy of the GNU General Public License - along with this documentation; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA - - - - For more details see the file COPYING in the Linux kernel source - tree. - - -
- - - - - Introduction - - The Linux MUSB subsystem is part of the larger Linux USB - subsystem. It provides support for embedded USB Device Controllers - (UDC) that do not use Universal Host Controller Interface (UHCI) - or Open Host Controller Interface (OHCI). - - - Instead, these embedded UDC rely on the USB On-the-Go (OTG) - specification which they implement at least partially. The silicon - reference design used in most cases is the Multipoint USB - Highspeed Dual-Role Controller (MUSB HDRC) found in the Mentor - Graphics Inventra™ design. - - - As a self-taught exercise I have written an MUSB glue layer for - the Ingenic JZ4740 SoC, modelled after the many MUSB glue layers - in the kernel source tree. This layer can be found at - drivers/usb/musb/jz4740.c. In this documentation I will walk - through the basics of the jz4740.c glue layer, explaining the - different pieces and what needs to be done in order to write your - own device glue layer. - - - - - Linux MUSB Basics - - To get started on the topic, please read USB On-the-Go Basics (see - Resources) which provides an introduction of USB OTG operation at - the hardware level. A couple of wiki pages by Texas Instruments - and Analog Devices also provide an overview of the Linux kernel - MUSB configuration, albeit focused on some specific devices - provided by these companies. Finally, getting acquainted with the - USB specification at USB home page may come in handy, with - practical instance provided through the Writing USB Device Drivers - documentation (again, see Resources). - - - Linux USB stack is a layered architecture in which the MUSB - controller hardware sits at the lowest. The MUSB controller driver - abstract the MUSB controller hardware to the Linux USB stack. - - - ------------------------ - | | <------- drivers/usb/gadget - | Linux USB Core Stack | <------- drivers/usb/host - | | <------- drivers/usb/core - ------------------------ - ⬍ - -------------------------- - | | <------ drivers/usb/musb/musb_gadget.c - | MUSB Controller driver | <------ drivers/usb/musb/musb_host.c - | | <------ drivers/usb/musb/musb_core.c - -------------------------- - ⬍ - --------------------------------- - | MUSB Platform Specific Driver | - | | <-- drivers/usb/musb/jz4740.c - | aka "Glue Layer" | - --------------------------------- - ⬍ - --------------------------------- - | MUSB Controller Hardware | - --------------------------------- - - - As outlined above, the glue layer is actually the platform - specific code sitting in between the controller driver and the - controller hardware. - - - Just like a Linux USB driver needs to register itself with the - Linux USB subsystem, the MUSB glue layer needs first to register - itself with the MUSB controller driver. This will allow the - controller driver to know about which device the glue layer - supports and which functions to call when a supported device is - detected or released; remember we are talking about an embedded - controller chip here, so no insertion or removal at run-time. - - - All of this information is passed to the MUSB controller driver - through a platform_driver structure defined in the glue layer as: - - -static struct platform_driver jz4740_driver = { - .probe = jz4740_probe, - .remove = jz4740_remove, - .driver = { - .name = "musb-jz4740", - }, -}; - - - The probe and remove function pointers are called when a matching - device is detected and, respectively, released. The name string - describes the device supported by this glue layer. In the current - case it matches a platform_device structure declared in - arch/mips/jz4740/platform.c. Note that we are not using device - tree bindings here. - - - In order to register itself to the controller driver, the glue - layer goes through a few steps, basically allocating the - controller hardware resources and initialising a couple of - circuits. To do so, it needs to keep track of the information used - throughout these steps. This is done by defining a private - jz4740_glue structure: - - -struct jz4740_glue { - struct device *dev; - struct platform_device *musb; - struct clk *clk; -}; - - - The dev and musb members are both device structure variables. The - first one holds generic information about the device, since it's - the basic device structure, and the latter holds information more - closely related to the subsystem the device is registered to. The - clk variable keeps information related to the device clock - operation. - - - Let's go through the steps of the probe function that leads the - glue layer to register itself to the controller driver. - - - N.B.: For the sake of readability each function will be split in - logical parts, each part being shown as if it was independent from - the others. - - -static int jz4740_probe(struct platform_device *pdev) -{ - struct platform_device *musb; - struct jz4740_glue *glue; - struct clk *clk; - int ret; - - glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL); - if (!glue) - return -ENOMEM; - - musb = platform_device_alloc("musb-hdrc", PLATFORM_DEVID_AUTO); - if (!musb) { - dev_err(&pdev->dev, "failed to allocate musb device\n"); - return -ENOMEM; - } - - clk = devm_clk_get(&pdev->dev, "udc"); - if (IS_ERR(clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - ret = PTR_ERR(clk); - goto err_platform_device_put; - } - - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(&pdev->dev, "failed to enable clock\n"); - goto err_platform_device_put; - } - - musb->dev.parent = &pdev->dev; - - glue->dev = &pdev->dev; - glue->musb = musb; - glue->clk = clk; - - return 0; - -err_platform_device_put: - platform_device_put(musb); - return ret; -} - - - The first few lines of the probe function allocate and assign the - glue, musb and clk variables. The GFP_KERNEL flag (line 8) allows - the allocation process to sleep and wait for memory, thus being - usable in a blocking situation. The PLATFORM_DEVID_AUTO flag (line - 12) allows automatic allocation and management of device IDs in - order to avoid device namespace collisions with explicit IDs. With - devm_clk_get() (line 18) the glue layer allocates the clock -- the - devm_ prefix indicates that clk_get() is - managed: it automatically frees the allocated clock resource data - when the device is released -- and enable it. - - - Then comes the registration steps: - - -static int jz4740_probe(struct platform_device *pdev) -{ - struct musb_hdrc_platform_data *pdata = &jz4740_musb_platform_data; - - pdata->platform_ops = &jz4740_musb_ops; - - platform_set_drvdata(pdev, glue); - - ret = platform_device_add_resources(musb, pdev->resource, - pdev->num_resources); - if (ret) { - dev_err(&pdev->dev, "failed to add resources\n"); - goto err_clk_disable; - } - - ret = platform_device_add_data(musb, pdata, sizeof(*pdata)); - if (ret) { - dev_err(&pdev->dev, "failed to add platform_data\n"); - goto err_clk_disable; - } - - return 0; - -err_clk_disable: - clk_disable_unprepare(clk); -err_platform_device_put: - platform_device_put(musb); - return ret; -} - - - The first step is to pass the device data privately held by the - glue layer on to the controller driver through - platform_set_drvdata() (line 7). Next is passing on the device - resources information, also privately held at that point, through - platform_device_add_resources() (line 9). - - - Finally comes passing on the platform specific data to the - controller driver (line 16). Platform data will be discussed in - Chapter 4, but here - we are looking at the platform_ops function pointer (line 5) in - musb_hdrc_platform_data structure (line 3). This function - pointer allows the MUSB controller driver to know which function - to call for device operation: - - -static const struct musb_platform_ops jz4740_musb_ops = { - .init = jz4740_musb_init, - .exit = jz4740_musb_exit, -}; - - - Here we have the minimal case where only init and exit functions - are called by the controller driver when needed. Fact is the - JZ4740 MUSB controller is a basic controller, lacking some - features found in other controllers, otherwise we may also have - pointers to a few other functions like a power management function - or a function to switch between OTG and non-OTG modes, for - instance. - - - At that point of the registration process, the controller driver - actually calls the init function: - - -static int jz4740_musb_init(struct musb *musb) -{ - musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); - if (!musb->xceiv) { - pr_err("HS UDC: no transceiver configured\n"); - return -ENODEV; - } - - /* Silicon does not implement ConfigData register. - * Set dyn_fifo to avoid reading EP config from hardware. - */ - musb->dyn_fifo = true; - - musb->isr = jz4740_musb_interrupt; - - return 0; -} - - - The goal of jz4740_musb_init() is to get hold of the transceiver - driver data of the MUSB controller hardware and pass it on to the - MUSB controller driver, as usual. The transceiver is the circuitry - inside the controller hardware responsible for sending/receiving - the USB data. Since it is an implementation of the physical layer - of the OSI model, the transceiver is also referred to as PHY. - - - Getting hold of the MUSB PHY driver data is done with - usb_get_phy() which returns a pointer to the structure - containing the driver instance data. The next couple of - instructions (line 12 and 14) are used as a quirk and to setup - IRQ handling respectively. Quirks and IRQ handling will be - discussed later in Chapter - 5 and Chapter 3. - - -static int jz4740_musb_exit(struct musb *musb) -{ - usb_put_phy(musb->xceiv); - - return 0; -} - - - Acting as the counterpart of init, the exit function releases the - MUSB PHY driver when the controller hardware itself is about to be - released. - - - Again, note that init and exit are fairly simple in this case due - to the basic set of features of the JZ4740 controller hardware. - When writing an musb glue layer for a more complex controller - hardware, you might need to take care of more processing in those - two functions. - - - Returning from the init function, the MUSB controller driver jumps - back into the probe function: - - -static int jz4740_probe(struct platform_device *pdev) -{ - ret = platform_device_add(musb); - if (ret) { - dev_err(&pdev->dev, "failed to register musb device\n"); - goto err_clk_disable; - } - - return 0; - -err_clk_disable: - clk_disable_unprepare(clk); -err_platform_device_put: - platform_device_put(musb); - return ret; -} - - - This is the last part of the device registration process where the - glue layer adds the controller hardware device to Linux kernel - device hierarchy: at this stage, all known information about the - device is passed on to the Linux USB core stack. - - -static int jz4740_remove(struct platform_device *pdev) -{ - struct jz4740_glue *glue = platform_get_drvdata(pdev); - - platform_device_unregister(glue->musb); - clk_disable_unprepare(glue->clk); - - return 0; -} - - - Acting as the counterpart of probe, the remove function unregister - the MUSB controller hardware (line 5) and disable the clock (line - 6), allowing it to be gated. - - - - - Handling IRQs - - Additionally to the MUSB controller hardware basic setup and - registration, the glue layer is also responsible for handling the - IRQs: - - -static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) -{ - unsigned long flags; - irqreturn_t retval = IRQ_NONE; - struct musb *musb = __hci; - - spin_lock_irqsave(&musb->lock, flags); - - musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB); - musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX); - musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX); - - /* - * The controller is gadget only, the state of the host mode IRQ bits is - * undefined. Mask them to make sure that the musb driver core will - * never see them set - */ - musb->int_usb &= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME | - MUSB_INTR_RESET | MUSB_INTR_SOF; - - if (musb->int_usb || musb->int_tx || musb->int_rx) - retval = musb_interrupt(musb); - - spin_unlock_irqrestore(&musb->lock, flags); - - return retval; -} - - - Here the glue layer mostly has to read the relevant hardware - registers and pass their values on to the controller driver which - will handle the actual event that triggered the IRQ. - - - The interrupt handler critical section is protected by the - spin_lock_irqsave() and counterpart spin_unlock_irqrestore() - functions (line 7 and 24 respectively), which prevent the - interrupt handler code to be run by two different threads at the - same time. - - - Then the relevant interrupt registers are read (line 9 to 11): - - - - - MUSB_INTRUSB: indicates which USB interrupts are currently - active, - - - - - MUSB_INTRTX: indicates which of the interrupts for TX - endpoints are currently active, - - - - - MUSB_INTRRX: indicates which of the interrupts for TX - endpoints are currently active. - - - - - Note that musb_readb() is used to read 8-bit registers at most, - while musb_readw() allows us to read at most 16-bit registers. - There are other functions that can be used depending on the size - of your device registers. See musb_io.h for more information. - - - Instruction on line 18 is another quirk specific to the JZ4740 - USB device controller, which will be discussed later in Chapter 5. - - - The glue layer still needs to register the IRQ handler though. - Remember the instruction on line 14 of the init function: - - -static int jz4740_musb_init(struct musb *musb) -{ - musb->isr = jz4740_musb_interrupt; - - return 0; -} - - - This instruction sets a pointer to the glue layer IRQ handler - function, in order for the controller hardware to call the handler - back when an IRQ comes from the controller hardware. The interrupt - handler is now implemented and registered. - - - - - Device Platform Data - - In order to write an MUSB glue layer, you need to have some data - describing the hardware capabilities of your controller hardware, - which is called the platform data. - - - Platform data is specific to your hardware, though it may cover a - broad range of devices, and is generally found somewhere in the - arch/ directory, depending on your device architecture. - - - For instance, platform data for the JZ4740 SoC is found in - arch/mips/jz4740/platform.c. In the platform.c file each device of - the JZ4740 SoC is described through a set of structures. - - - Here is the part of arch/mips/jz4740/platform.c that covers the - USB Device Controller (UDC): - - -/* USB Device Controller */ -struct platform_device jz4740_udc_xceiv_device = { - .name = "usb_phy_gen_xceiv", - .id = 0, -}; - -static struct resource jz4740_udc_resources[] = { - [0] = { - .start = JZ4740_UDC_BASE_ADDR, - .end = JZ4740_UDC_BASE_ADDR + 0x10000 - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = JZ4740_IRQ_UDC, - .end = JZ4740_IRQ_UDC, - .flags = IORESOURCE_IRQ, - .name = "mc", - }, -}; - -struct platform_device jz4740_udc_device = { - .name = "musb-jz4740", - .id = -1, - .dev = { - .dma_mask = &jz4740_udc_device.dev.coherent_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, - .num_resources = ARRAY_SIZE(jz4740_udc_resources), - .resource = jz4740_udc_resources, -}; - - - The jz4740_udc_xceiv_device platform device structure (line 2) - describes the UDC transceiver with a name and id number. - - - At the time of this writing, note that - "usb_phy_gen_xceiv" is the specific name to be used for - all transceivers that are either built-in with reference USB IP or - autonomous and doesn't require any PHY programming. You will need - to set CONFIG_NOP_USB_XCEIV=y in the kernel configuration to make - use of the corresponding transceiver driver. The id field could be - set to -1 (equivalent to PLATFORM_DEVID_NONE), -2 (equivalent to - PLATFORM_DEVID_AUTO) or start with 0 for the first device of this - kind if we want a specific id number. - - - The jz4740_udc_resources resource structure (line 7) defines the - UDC registers base addresses. - - - The first array (line 9 to 11) defines the UDC registers base - memory addresses: start points to the first register memory - address, end points to the last register memory address and the - flags member defines the type of resource we are dealing with. So - IORESOURCE_MEM is used to define the registers memory addresses. - The second array (line 14 to 17) defines the UDC IRQ registers - addresses. Since there is only one IRQ register available for the - JZ4740 UDC, start and end point at the same address. The - IORESOURCE_IRQ flag tells that we are dealing with IRQ resources, - and the name "mc" is in fact hard-coded in the MUSB core - in order for the controller driver to retrieve this IRQ resource - by querying it by its name. - - - Finally, the jz4740_udc_device platform device structure (line 21) - describes the UDC itself. - - - The "musb-jz4740" name (line 22) defines the MUSB - driver that is used for this device; remember this is in fact - the name that we used in the jz4740_driver platform driver - structure in Chapter - 2. The id field (line 23) is set to -1 (equivalent to - PLATFORM_DEVID_NONE) since we do not need an id for the device: - the MUSB controller driver was already set to allocate an - automatic id in Chapter - 2. In the dev field we care for DMA related information - here. The dma_mask field (line 25) defines the width of the DMA - mask that is going to be used, and coherent_dma_mask (line 26) - has the same purpose but for the alloc_coherent DMA mappings: in - both cases we are using a 32 bits mask. Then the resource field - (line 29) is simply a pointer to the resource structure defined - before, while the num_resources field (line 28) keeps track of - the number of arrays defined in the resource structure (in this - case there were two resource arrays defined before). - - - With this quick overview of the UDC platform data at the arch/ - level now done, let's get back to the MUSB glue layer specific - platform data in drivers/usb/musb/jz4740.c: - - -static struct musb_hdrc_config jz4740_musb_config = { - /* Silicon does not implement USB OTG. */ - .multipoint = 0, - /* Max EPs scanned, driver will decide which EP can be used. */ - .num_eps = 4, - /* RAMbits needed to configure EPs from table */ - .ram_bits = 9, - .fifo_cfg = jz4740_musb_fifo_cfg, - .fifo_cfg_size = ARRAY_SIZE(jz4740_musb_fifo_cfg), -}; - -static struct musb_hdrc_platform_data jz4740_musb_platform_data = { - .mode = MUSB_PERIPHERAL, - .config = &jz4740_musb_config, -}; - - - First the glue layer configures some aspects of the controller - driver operation related to the controller hardware specifics. - This is done through the jz4740_musb_config musb_hdrc_config - structure. - - - Defining the OTG capability of the controller hardware, the - multipoint member (line 3) is set to 0 (equivalent to false) - since the JZ4740 UDC is not OTG compatible. Then num_eps (line - 5) defines the number of USB endpoints of the controller - hardware, including endpoint 0: here we have 3 endpoints + - endpoint 0. Next is ram_bits (line 7) which is the width of the - RAM address bus for the MUSB controller hardware. This - information is needed when the controller driver cannot - automatically configure endpoints by reading the relevant - controller hardware registers. This issue will be discussed when - we get to device quirks in Chapter - 5. Last two fields (line 8 and 9) are also about device - quirks: fifo_cfg points to the USB endpoints configuration table - and fifo_cfg_size keeps track of the size of the number of - entries in that configuration table. More on that later in Chapter 5. - - - Then this configuration is embedded inside - jz4740_musb_platform_data musb_hdrc_platform_data structure (line - 11): config is a pointer to the configuration structure itself, - and mode tells the controller driver if the controller hardware - may be used as MUSB_HOST only, MUSB_PERIPHERAL only or MUSB_OTG - which is a dual mode. - - - Remember that jz4740_musb_platform_data is then used to convey - platform data information as we have seen in the probe function - in Chapter 2 - - - - - Device Quirks - - Completing the platform data specific to your device, you may also - need to write some code in the glue layer to work around some - device specific limitations. These quirks may be due to some - hardware bugs, or simply be the result of an incomplete - implementation of the USB On-the-Go specification. - - - The JZ4740 UDC exhibits such quirks, some of which we will discuss - here for the sake of insight even though these might not be found - in the controller hardware you are working on. - - - Let's get back to the init function first: - - -static int jz4740_musb_init(struct musb *musb) -{ - musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); - if (!musb->xceiv) { - pr_err("HS UDC: no transceiver configured\n"); - return -ENODEV; - } - - /* Silicon does not implement ConfigData register. - * Set dyn_fifo to avoid reading EP config from hardware. - */ - musb->dyn_fifo = true; - - musb->isr = jz4740_musb_interrupt; - - return 0; -} - - - Instruction on line 12 helps the MUSB controller driver to work - around the fact that the controller hardware is missing registers - that are used for USB endpoints configuration. - - - Without these registers, the controller driver is unable to read - the endpoints configuration from the hardware, so we use line 12 - instruction to bypass reading the configuration from silicon, and - rely on a hard-coded table that describes the endpoints - configuration instead: - - -static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { -{ .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, -{ .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, -{ .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, }, -}; - - - Looking at the configuration table above, we see that each - endpoints is described by three fields: hw_ep_num is the endpoint - number, style is its direction (either FIFO_TX for the controller - driver to send packets in the controller hardware, or FIFO_RX to - receive packets from hardware), and maxpacket defines the maximum - size of each data packet that can be transmitted over that - endpoint. Reading from the table, the controller driver knows that - endpoint 1 can be used to send and receive USB data packets of 512 - bytes at once (this is in fact a bulk in/out endpoint), and - endpoint 2 can be used to send data packets of 64 bytes at once - (this is in fact an interrupt endpoint). - - - Note that there is no information about endpoint 0 here: that one - is implemented by default in every silicon design, with a - predefined configuration according to the USB specification. For - more examples of endpoint configuration tables, see musb_core.c. - - - Let's now get back to the interrupt handler function: - - -static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) -{ - unsigned long flags; - irqreturn_t retval = IRQ_NONE; - struct musb *musb = __hci; - - spin_lock_irqsave(&musb->lock, flags); - - musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB); - musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX); - musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX); - - /* - * The controller is gadget only, the state of the host mode IRQ bits is - * undefined. Mask them to make sure that the musb driver core will - * never see them set - */ - musb->int_usb &= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME | - MUSB_INTR_RESET | MUSB_INTR_SOF; - - if (musb->int_usb || musb->int_tx || musb->int_rx) - retval = musb_interrupt(musb); - - spin_unlock_irqrestore(&musb->lock, flags); - - return retval; -} - - - Instruction on line 18 above is a way for the controller driver to - work around the fact that some interrupt bits used for USB host - mode operation are missing in the MUSB_INTRUSB register, thus left - in an undefined hardware state, since this MUSB controller - hardware is used in peripheral mode only. As a consequence, the - glue layer masks these missing bits out to avoid parasite - interrupts by doing a logical AND operation between the value read - from MUSB_INTRUSB and the bits that are actually implemented in - the register. - - - These are only a couple of the quirks found in the JZ4740 USB - device controller. Some others were directly addressed in the MUSB - core since the fixes were generic enough to provide a better - handling of the issues for others controller hardware eventually. - - - - - Conclusion - - Writing a Linux MUSB glue layer should be a more accessible task, - as this documentation tries to show the ins and outs of this - exercise. - - - The JZ4740 USB device controller being fairly simple, I hope its - glue layer serves as a good example for the curious mind. Used - with the current MUSB glue layers, this documentation should - provide enough guidance to get started; should anything gets out - of hand, the linux-usb mailing list archive is another helpful - resource to browse through. - - - - - Acknowledgements - - Many thanks to Lars-Peter Clausen and Maarten ter Huurne for - answering my questions while I was writing the JZ4740 glue layer - and for helping me out getting the code in good shape. - - - I would also like to thank the Qi-Hardware community at large for - its cheerful guidance and support. - - - - - Resources - - USB Home Page: - http://www.usb.org - - - linux-usb Mailing List Archives: - http://marc.info/?l=linux-usb - - - USB On-the-Go Basics: - http://www.maximintegrated.com/app-notes/index.mvp/id/1822 - - - Writing USB Device Drivers: - https://www.kernel.org/doc/htmldocs/writing_usb_driver/index.html - - - Texas Instruments USB Configuration Wiki Page: - http://processors.wiki.ti.com/index.php/Usbgeneralpage - - - Analog Devices Blackfin MUSB Configuration: - http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:musb - - - -
diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl deleted file mode 100644 index 3210dcf741c9743c53924e56d3f2768353674157..0000000000000000000000000000000000000000 --- a/Documentation/DocBook/writing_usb_driver.tmpl +++ /dev/null @@ -1,412 +0,0 @@ - - - - - - Writing USB Device Drivers - - - - Greg - Kroah-Hartman - -
- greg@kroah.com -
-
-
-
- - - 2001-2002 - Greg Kroah-Hartman - - - - - This documentation is free software; you can redistribute - it and/or modify it under the terms of the GNU General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later - version. - - - - This program is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307 USA - - - - For more details see the file COPYING in the source - distribution of Linux. - - - - This documentation is based on an article published in - Linux Journal Magazine, October 2001, Issue 90. - - -
- - - - - Introduction - - The Linux USB subsystem has grown from supporting only two different - types of devices in the 2.2.7 kernel (mice and keyboards), to over 20 - different types of devices in the 2.4 kernel. Linux currently supports - almost all USB class devices (standard types of devices like keyboards, - mice, modems, printers and speakers) and an ever-growing number of - vendor-specific devices (such as USB to serial converters, digital - cameras, Ethernet devices and MP3 players). For a full list of the - different USB devices currently supported, see Resources. - - - The remaining kinds of USB devices that do not have support on Linux are - almost all vendor-specific devices. Each vendor decides to implement a - custom protocol to talk to their device, so a custom driver usually needs - to be created. Some vendors are open with their USB protocols and help - with the creation of Linux drivers, while others do not publish them, and - developers are forced to reverse-engineer. See Resources for some links - to handy reverse-engineering tools. - - - Because each different protocol causes a new driver to be created, I have - written a generic USB driver skeleton, modelled after the pci-skeleton.c - file in the kernel source tree upon which many PCI network drivers have - been based. This USB skeleton can be found at drivers/usb/usb-skeleton.c - in the kernel source tree. In this article I will walk through the basics - of the skeleton driver, explaining the different pieces and what needs to - be done to customize it to your specific device. - - - - - Linux USB Basics - - If you are going to write a Linux USB driver, please become familiar with - the USB protocol specification. It can be found, along with many other - useful documents, at the USB home page (see Resources). An excellent - introduction to the Linux USB subsystem can be found at the USB Working - Devices List (see Resources). It explains how the Linux USB subsystem is - structured and introduces the reader to the concept of USB urbs - (USB Request Blocks), which are essential to USB drivers. - - - The first thing a Linux USB driver needs to do is register itself with - the Linux USB subsystem, giving it some information about which devices - the driver supports and which functions to call when a device supported - by the driver is inserted or removed from the system. All of this - information is passed to the USB subsystem in the usb_driver structure. - The skeleton driver declares a usb_driver as: - - -static struct usb_driver skel_driver = { - .name = "skeleton", - .probe = skel_probe, - .disconnect = skel_disconnect, - .fops = &skel_fops, - .minor = USB_SKEL_MINOR_BASE, - .id_table = skel_table, -}; - - - The variable name is a string that describes the driver. It is used in - informational messages printed to the system log. The probe and - disconnect function pointers are called when a device that matches the - information provided in the id_table variable is either seen or removed. - - - The fops and minor variables are optional. Most USB drivers hook into - another kernel subsystem, such as the SCSI, network or TTY subsystem. - These types of drivers register themselves with the other kernel - subsystem, and any user-space interactions are provided through that - interface. But for drivers that do not have a matching kernel subsystem, - such as MP3 players or scanners, a method of interacting with user space - is needed. The USB subsystem provides a way to register a minor device - number and a set of file_operations function pointers that enable this - user-space interaction. The skeleton driver needs this kind of interface, - so it provides a minor starting number and a pointer to its - file_operations functions. - - - The USB driver is then registered with a call to usb_register, usually in - the driver's init function, as shown here: - - -static int __init usb_skel_init(void) -{ - int result; - - /* register this driver with the USB subsystem */ - result = usb_register(&skel_driver); - if (result < 0) { - err("usb_register failed for the "__FILE__ "driver." - "Error number %d", result); - return -1; - } - - return 0; -} -module_init(usb_skel_init); - - - When the driver is unloaded from the system, it needs to deregister - itself with the USB subsystem. This is done with the usb_deregister - function: - - -static void __exit usb_skel_exit(void) -{ - /* deregister this driver with the USB subsystem */ - usb_deregister(&skel_driver); -} -module_exit(usb_skel_exit); - - - To enable the linux-hotplug system to load the driver automatically when - the device is plugged in, you need to create a MODULE_DEVICE_TABLE. The - following code tells the hotplug scripts that this module supports a - single device with a specific vendor and product ID: - - -/* table of devices that work with this driver */ -static struct usb_device_id skel_table [] = { - { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) }, - { } /* Terminating entry */ -}; -MODULE_DEVICE_TABLE (usb, skel_table); - - - There are other macros that can be used in describing a usb_device_id for - drivers that support a whole class of USB drivers. See usb.h for more - information on this. - - - - - Device operation - - When a device is plugged into the USB bus that matches the device ID - pattern that your driver registered with the USB core, the probe function - is called. The usb_device structure, interface number and the interface ID - are passed to the function: - - -static int skel_probe(struct usb_interface *interface, - const struct usb_device_id *id) - - - The driver now needs to verify that this device is actually one that it - can accept. If so, it returns 0. - If not, or if any error occurs during initialization, an errorcode - (such as -ENOMEM or -ENODEV) - is returned from the probe function. - - - In the skeleton driver, we determine what end points are marked as bulk-in - and bulk-out. We create buffers to hold the data that will be sent and - received from the device, and a USB urb to write data to the device is - initialized. - - - Conversely, when the device is removed from the USB bus, the disconnect - function is called with the device pointer. The driver needs to clean any - private data that has been allocated at this time and to shut down any - pending urbs that are in the USB system. - - - Now that the device is plugged into the system and the driver is bound to - the device, any of the functions in the file_operations structure that - were passed to the USB subsystem will be called from a user program trying - to talk to the device. The first function called will be open, as the - program tries to open the device for I/O. We increment our private usage - count and save a pointer to our internal structure in the file - structure. This is done so that future calls to file operations will - enable the driver to determine which device the user is addressing. All - of this is done with the following code: - - -/* increment our usage count for the module */ -++skel->open_count; - -/* save our object in the file's private structure */ -file->private_data = dev; - - - After the open function is called, the read and write functions are called - to receive and send data to the device. In the skel_write function, we - receive a pointer to some data that the user wants to send to the device - and the size of the data. The function determines how much data it can - send to the device based on the size of the write urb it has created (this - size depends on the size of the bulk out end point that the device has). - Then it copies the data from user space to kernel space, points the urb to - the data and submits the urb to the USB subsystem. This can be seen in - the following code: - - -/* we can only write as much as 1 urb will hold */ -bytes_written = (count > skel->bulk_out_size) ? skel->bulk_out_size : count; - -/* copy the data from user space into our urb */ -copy_from_user(skel->write_urb->transfer_buffer, buffer, bytes_written); - -/* set up our urb */ -usb_fill_bulk_urb(skel->write_urb, - skel->dev, - usb_sndbulkpipe(skel->dev, skel->bulk_out_endpointAddr), - skel->write_urb->transfer_buffer, - bytes_written, - skel_write_bulk_callback, - skel); - -/* send the data out the bulk port */ -result = usb_submit_urb(skel->write_urb); -if (result) { - err("Failed submitting write urb, error %d", result); -} - - - When the write urb is filled up with the proper information using the - usb_fill_bulk_urb function, we point the urb's completion callback to call our - own skel_write_bulk_callback function. This function is called when the - urb is finished by the USB subsystem. The callback function is called in - interrupt context, so caution must be taken not to do very much processing - at that time. Our implementation of skel_write_bulk_callback merely - reports if the urb was completed successfully or not and then returns. - - - The read function works a bit differently from the write function in that - we do not use an urb to transfer data from the device to the driver. - Instead we call the usb_bulk_msg function, which can be used to send or - receive data from a device without having to create urbs and handle - urb completion callback functions. We call the usb_bulk_msg function, - giving it a buffer into which to place any data received from the device - and a timeout value. If the timeout period expires without receiving any - data from the device, the function will fail and return an error message. - This can be shown with the following code: - - -/* do an immediate bulk read to get data from the device */ -retval = usb_bulk_msg (skel->dev, - usb_rcvbulkpipe (skel->dev, - skel->bulk_in_endpointAddr), - skel->bulk_in_buffer, - skel->bulk_in_size, - &count, HZ*10); -/* if the read was successful, copy the data to user space */ -if (!retval) { - if (copy_to_user (buffer, skel->bulk_in_buffer, count)) - retval = -EFAULT; - else - retval = count; -} - - - The usb_bulk_msg function can be very useful for doing single reads or - writes to a device; however, if you need to read or write constantly to a - device, it is recommended to set up your own urbs and submit them to the - USB subsystem. - - - When the user program releases the file handle that it has been using to - talk to the device, the release function in the driver is called. In this - function we decrement our private usage count and wait for possible - pending writes: - - -/* decrement our usage count for the device */ ---skel->open_count; - - - One of the more difficult problems that USB drivers must be able to handle - smoothly is the fact that the USB device may be removed from the system at - any point in time, even if a program is currently talking to it. It needs - to be able to shut down any current reads and writes and notify the - user-space programs that the device is no longer there. The following - code (function skel_delete) - is an example of how to do this: - -static inline void skel_delete (struct usb_skel *dev) -{ - kfree (dev->bulk_in_buffer); - if (dev->bulk_out_buffer != NULL) - usb_free_coherent (dev->udev, dev->bulk_out_size, - dev->bulk_out_buffer, - dev->write_urb->transfer_dma); - usb_free_urb (dev->write_urb); - kfree (dev); -} - - - If a program currently has an open handle to the device, we reset the flag - device_present. For - every read, write, release and other functions that expect a device to be - present, the driver first checks this flag to see if the device is - still present. If not, it releases that the device has disappeared, and a - -ENODEV error is returned to the user-space program. When the release - function is eventually called, it determines if there is no device - and if not, it does the cleanup that the skel_disconnect - function normally does if there are no open files on the device (see - Listing 5). - - - - - Isochronous Data - - This usb-skeleton driver does not have any examples of interrupt or - isochronous data being sent to or from the device. Interrupt data is sent - almost exactly as bulk data is, with a few minor exceptions. Isochronous - data works differently with continuous streams of data being sent to or - from the device. The audio and video camera drivers are very good examples - of drivers that handle isochronous data and will be useful if you also - need to do this. - - - - - Conclusion - - Writing Linux USB device drivers is not a difficult task as the - usb-skeleton driver shows. This driver, combined with the other current - USB drivers, should provide enough examples to help a beginning author - create a working driver in a minimal amount of time. The linux-usb-devel - mailing list archives also contain a lot of helpful information. - - - - - Resources - - The Linux USB Project: http://www.linux-usb.org/ - - - Linux Hotplug Project: http://linux-hotplug.sourceforge.net/ - - - Linux USB Working Devices List: http://www.qbik.ch/usb/devices/ - - - linux-usb-devel Mailing List Archives: http://marc.theaimsgroup.com/?l=linux-usb-devel - - - Programming Guide for Linux USB Device Drivers: http://usb.cs.tum.edu/usbdoc - - - USB Home Page: http://www.usb.org - - - -
diff --git a/Documentation/PCI/00-INDEX b/Documentation/PCI/00-INDEX index 147231f1613ef3a0a8df8d9addc2a754d5ac69af..00c9a90b6f3854050f1778961be557b10c25fa0d 100644 --- a/Documentation/PCI/00-INDEX +++ b/Documentation/PCI/00-INDEX @@ -12,3 +12,13 @@ pci.txt - info on the PCI subsystem for device driver authors pcieaer-howto.txt - the PCI Express Advanced Error Reporting Driver Guide HOWTO +endpoint/pci-endpoint.txt + - guide to add endpoint controller driver and endpoint function driver. +endpoint/pci-endpoint-cfs.txt + - guide to use configfs to configure the PCI endpoint function. +endpoint/pci-test-function.txt + - specification of *PCI test* function device. +endpoint/pci-test-howto.txt + - userguide for PCI endpoint test function. +endpoint/function/binding/ + - binding documentation for PCI endpoint function diff --git a/Documentation/PCI/endpoint/function/binding/pci-test.txt b/Documentation/PCI/endpoint/function/binding/pci-test.txt new file mode 100644 index 0000000000000000000000000000000000000000..3b68b955fb503374b0426e75de246971111dd463 --- /dev/null +++ b/Documentation/PCI/endpoint/function/binding/pci-test.txt @@ -0,0 +1,17 @@ +PCI TEST ENDPOINT FUNCTION + +name: Should be "pci_epf_test" to bind to the pci_epf_test driver. + +Configurable Fields: +vendorid : should be 0x104c +deviceid : should be 0xb500 for DRA74x and 0xb501 for DRA72x +revid : don't care +progif_code : don't care +subclass_code : don't care +baseclass_code : should be 0xff +cache_line_size : don't care +subsys_vendor_id : don't care +subsys_id : don't care +interrupt_pin : Should be 1 - INTA, 2 - INTB, 3 - INTC, 4 -INTD +msi_interrupts : Should be 1 to 32 depending on the number of MSI interrupts + to test diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.txt b/Documentation/PCI/endpoint/pci-endpoint-cfs.txt new file mode 100644 index 0000000000000000000000000000000000000000..d740f29960a47edc21e1811d3f496496c8865ecd --- /dev/null +++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.txt @@ -0,0 +1,105 @@ + CONFIGURING PCI ENDPOINT USING CONFIGFS + Kishon Vijay Abraham I + +The PCI Endpoint Core exposes configfs entry (pci_ep) to configure the +PCI endpoint function and to bind the endpoint function +with the endpoint controller. (For introducing other mechanisms to +configure the PCI Endpoint Function refer to [1]). + +*) Mounting configfs + +The PCI Endpoint Core layer creates pci_ep directory in the mounted configfs +directory. configfs can be mounted using the following command. + + mount -t configfs none /sys/kernel/config + +*) Directory Structure + +The pci_ep configfs has two directories at its root: controllers and +functions. Every EPC device present in the system will have an entry in +the *controllers* directory and and every EPF driver present in the system +will have an entry in the *functions* directory. + +/sys/kernel/config/pci_ep/ + .. controllers/ + .. functions/ + +*) Creating EPF Device + +Every registered EPF driver will be listed in controllers directory. The +entries corresponding to EPF driver will be created by the EPF core. + +/sys/kernel/config/pci_ep/functions/ + .. / + ... / + ... / + .. / + ... / + ... / + +In order to create a of the type probed by , the +user has to create a directory inside . + +Every directory consists of the following entries that can be +used to configure the standard configuration header of the endpoint function. +(These entries are created by the framework when any new is +created) + + .. / + ... / + ... vendorid + ... deviceid + ... revid + ... progif_code + ... subclass_code + ... baseclass_code + ... cache_line_size + ... subsys_vendor_id + ... subsys_id + ... interrupt_pin + +*) EPC Device + +Every registered EPC device will be listed in controllers directory. The +entries corresponding to EPC device will be created by the EPC core. + +/sys/kernel/config/pci_ep/controllers/ + .. / + ... / + ... / + ... start + .. / + ... / + ... / + ... start + +The directory will have a list of symbolic links to +. These symbolic links should be created by the user to +represent the functions present in the endpoint device. + +The directory will also have a *start* field. Once +"1" is written to this field, the endpoint device will be ready to +establish the link with the host. This is usually done after +all the EPF devices are created and linked with the EPC device. + + + | controllers/ + | / + | + | start + | functions/ + | / + | / + | vendorid + | deviceid + | revid + | progif_code + | subclass_code + | baseclass_code + | cache_line_size + | subsys_vendor_id + | subsys_id + | interrupt_pin + | function + +[1] -> Documentation/PCI/endpoint/pci-endpoint.txt diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.txt new file mode 100644 index 0000000000000000000000000000000000000000..9b1d668292904373a8a15b563c5cb41903699416 --- /dev/null +++ b/Documentation/PCI/endpoint/pci-endpoint.txt @@ -0,0 +1,215 @@ + PCI ENDPOINT FRAMEWORK + Kishon Vijay Abraham I + +This document is a guide to use the PCI Endpoint Framework in order to create +endpoint controller driver, endpoint function driver, and using configfs +interface to bind the function driver to the controller driver. + +1. Introduction + +Linux has a comprehensive PCI subsystem to support PCI controllers that +operates in Root Complex mode. The subsystem has capability to scan PCI bus, +assign memory resources and IRQ resources, load PCI driver (based on +vendor ID, device ID), support other services like hot-plug, power management, +advanced error reporting and virtual channels. + +However the PCI controller IP integrated in some SoCs is capable of operating +either in Root Complex mode or Endpoint mode. PCI Endpoint Framework will +add endpoint mode support in Linux. This will help to run Linux in an +EP system which can have a wide variety of use cases from testing or +validation, co-processor accelerator, etc. + +2. PCI Endpoint Core + +The PCI Endpoint Core layer comprises 3 components: the Endpoint Controller +library, the Endpoint Function library, and the configfs layer to bind the +endpoint function with the endpoint controller. + +2.1 PCI Endpoint Controller(EPC) Library + +The EPC library provides APIs to be used by the controller that can operate +in endpoint mode. It also provides APIs to be used by function driver/library +in order to implement a particular endpoint function. + +2.1.1 APIs for the PCI controller Driver + +This section lists the APIs that the PCI Endpoint core provides to be used +by the PCI controller driver. + +*) devm_pci_epc_create()/pci_epc_create() + + The PCI controller driver should implement the following ops: + * write_header: ops to populate configuration space header + * set_bar: ops to configure the BAR + * clear_bar: ops to reset the BAR + * alloc_addr_space: ops to allocate in PCI controller address space + * free_addr_space: ops to free the allocated address space + * raise_irq: ops to raise a legacy or MSI interrupt + * start: ops to start the PCI link + * stop: ops to stop the PCI link + + The PCI controller driver can then create a new EPC device by invoking + devm_pci_epc_create()/pci_epc_create(). + +*) devm_pci_epc_destroy()/pci_epc_destroy() + + The PCI controller driver can destroy the EPC device created by either + devm_pci_epc_create() or pci_epc_create() using devm_pci_epc_destroy() or + pci_epc_destroy(). + +*) pci_epc_linkup() + + In order to notify all the function devices that the EPC device to which + they are linked has established a link with the host, the PCI controller + driver should invoke pci_epc_linkup(). + +*) pci_epc_mem_init() + + Initialize the pci_epc_mem structure used for allocating EPC addr space. + +*) pci_epc_mem_exit() + + Cleanup the pci_epc_mem structure allocated during pci_epc_mem_init(). + +2.1.2 APIs for the PCI Endpoint Function Driver + +This section lists the APIs that the PCI Endpoint core provides to be used +by the PCI endpoint function driver. + +*) pci_epc_write_header() + + The PCI endpoint function driver should use pci_epc_write_header() to + write the standard configuration header to the endpoint controller. + +*) pci_epc_set_bar() + + The PCI endpoint function driver should use pci_epc_set_bar() to configure + the Base Address Register in order for the host to assign PCI addr space. + Register space of the function driver is usually configured + using this API. + +*) pci_epc_clear_bar() + + The PCI endpoint function driver should use pci_epc_clear_bar() to reset + the BAR. + +*) pci_epc_raise_irq() + + The PCI endpoint function driver should use pci_epc_raise_irq() to raise + Legacy Interrupt or MSI Interrupt. + +*) pci_epc_mem_alloc_addr() + + The PCI endpoint function driver should use pci_epc_mem_alloc_addr(), to + allocate memory address from EPC addr space which is required to access + RC's buffer + +*) pci_epc_mem_free_addr() + + The PCI endpoint function driver should use pci_epc_mem_free_addr() to + free the memory space allocated using pci_epc_mem_alloc_addr(). + +2.1.3 Other APIs + +There are other APIs provided by the EPC library. These are used for binding +the EPF device with EPC device. pci-ep-cfs.c can be used as reference for +using these APIs. + +*) pci_epc_get() + + Get a reference to the PCI endpoint controller based on the device name of + the controller. + +*) pci_epc_put() + + Release the reference to the PCI endpoint controller obtained using + pci_epc_get() + +*) pci_epc_add_epf() + + Add a PCI endpoint function to a PCI endpoint controller. A PCIe device + can have up to 8 functions according to the specification. + +*) pci_epc_remove_epf() + + Remove the PCI endpoint function from PCI endpoint controller. + +*) pci_epc_start() + + The PCI endpoint function driver should invoke pci_epc_start() once it + has configured the endpoint function and wants to start the PCI link. + +*) pci_epc_stop() + + The PCI endpoint function driver should invoke pci_epc_stop() to stop + the PCI LINK. + +2.2 PCI Endpoint Function(EPF) Library + +The EPF library provides APIs to be used by the function driver and the EPC +library to provide endpoint mode functionality. + +2.2.1 APIs for the PCI Endpoint Function Driver + +This section lists the APIs that the PCI Endpoint core provides to be used +by the PCI endpoint function driver. + +*) pci_epf_register_driver() + + The PCI Endpoint Function driver should implement the following ops: + * bind: ops to perform when a EPC device has been bound to EPF device + * unbind: ops to perform when a binding has been lost between a EPC + device and EPF device + * linkup: ops to perform when the EPC device has established a + connection with a host system + + The PCI Function driver can then register the PCI EPF driver by using + pci_epf_register_driver(). + +*) pci_epf_unregister_driver() + + The PCI Function driver can unregister the PCI EPF driver by using + pci_epf_unregister_driver(). + +*) pci_epf_alloc_space() + + The PCI Function driver can allocate space for a particular BAR using + pci_epf_alloc_space(). + +*) pci_epf_free_space() + + The PCI Function driver can free the allocated space + (using pci_epf_alloc_space) by invoking pci_epf_free_space(). + +2.2.2 APIs for the PCI Endpoint Controller Library +This section lists the APIs that the PCI Endpoint core provides to be used +by the PCI endpoint controller library. + +*) pci_epf_linkup() + + The PCI endpoint controller library invokes pci_epf_linkup() when the + EPC device has established the connection to the host. + +2.2.2 Other APIs +There are other APIs provided by the EPF library. These are used to notify +the function driver when the EPF device is bound to the EPC device. +pci-ep-cfs.c can be used as reference for using these APIs. + +*) pci_epf_create() + + Create a new PCI EPF device by passing the name of the PCI EPF device. + This name will be used to bind the the EPF device to a EPF driver. + +*) pci_epf_destroy() + + Destroy the created PCI EPF device. + +*) pci_epf_bind() + + pci_epf_bind() should be invoked when the EPF device has been bound to + a EPC device. + +*) pci_epf_unbind() + + pci_epf_unbind() should be invoked when the binding between EPC device + and EPF device is lost. diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt new file mode 100644 index 0000000000000000000000000000000000000000..0c519c9bf94a35802bff554004ab23021e1382a6 --- /dev/null +++ b/Documentation/PCI/endpoint/pci-test-function.txt @@ -0,0 +1,66 @@ + PCI TEST + Kishon Vijay Abraham I + +Traditionally PCI RC has always been validated by using standard +PCI cards like ethernet PCI cards or USB PCI cards or SATA PCI cards. +However with the addition of EP-core in linux kernel, it is possible +to configure a PCI controller that can operate in EP mode to work as +a test device. + +The PCI endpoint test device is a virtual device (defined in software) +used to test the endpoint functionality and serve as a sample driver +for other PCI endpoint devices (to use the EP framework). + +The PCI endpoint test device has the following registers: + + 1) PCI_ENDPOINT_TEST_MAGIC + 2) PCI_ENDPOINT_TEST_COMMAND + 3) PCI_ENDPOINT_TEST_STATUS + 4) PCI_ENDPOINT_TEST_SRC_ADDR + 5) PCI_ENDPOINT_TEST_DST_ADDR + 6) PCI_ENDPOINT_TEST_SIZE + 7) PCI_ENDPOINT_TEST_CHECKSUM + +*) PCI_ENDPOINT_TEST_MAGIC + +This register will be used to test BAR0. A known pattern will be written +and read back from MAGIC register to verify BAR0. + +*) PCI_ENDPOINT_TEST_COMMAND: + +This register will be used by the host driver to indicate the function +that the endpoint device must perform. + +Bitfield Description: + Bit 0 : raise legacy IRQ + Bit 1 : raise MSI IRQ + Bit 2 - 7 : MSI interrupt number + Bit 8 : read command (read data from RC buffer) + Bit 9 : write command (write data to RC buffer) + Bit 10 : copy command (copy data from one RC buffer to another + RC buffer) + +*) PCI_ENDPOINT_TEST_STATUS + +This register reflects the status of the PCI endpoint device. + +Bitfield Description: + Bit 0 : read success + Bit 1 : read fail + Bit 2 : write success + Bit 3 : write fail + Bit 4 : copy success + Bit 5 : copy fail + Bit 6 : IRQ raised + Bit 7 : source address is invalid + Bit 8 : destination address is invalid + +*) PCI_ENDPOINT_TEST_SRC_ADDR + +This register contains the source address (RC buffer address) for the +COPY/READ command. + +*) PCI_ENDPOINT_TEST_DST_ADDR + +This register contains the destination address (RC buffer address) for +the COPY/WRITE command. diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt new file mode 100644 index 0000000000000000000000000000000000000000..75f48c3bb19135b11300d23d09b5446de431bf5e --- /dev/null +++ b/Documentation/PCI/endpoint/pci-test-howto.txt @@ -0,0 +1,179 @@ + PCI TEST USERGUIDE + Kishon Vijay Abraham I + +This document is a guide to help users use pci-epf-test function driver +and pci_endpoint_test host driver for testing PCI. The list of steps to +be followed in the host side and EP side is given below. + +1. Endpoint Device + +1.1 Endpoint Controller Devices + +To find the list of endpoint controller devices in the system: + + # ls /sys/class/pci_epc/ + 51000000.pcie_ep + +If PCI_ENDPOINT_CONFIGFS is enabled + # ls /sys/kernel/config/pci_ep/controllers + 51000000.pcie_ep + +1.2 Endpoint Function Drivers + +To find the list of endpoint function drivers in the system: + + # ls /sys/bus/pci-epf/drivers + pci_epf_test + +If PCI_ENDPOINT_CONFIGFS is enabled + # ls /sys/kernel/config/pci_ep/functions + pci_epf_test + +1.3 Creating pci-epf-test Device + +PCI endpoint function device can be created using the configfs. To create +pci-epf-test device, the following commands can be used + + # mount -t configfs none /sys/kernel/config + # cd /sys/kernel/config/pci_ep/ + # mkdir functions/pci_epf_test/func1 + +The "mkdir func1" above creates the pci-epf-test function device that will +be probed by pci_epf_test driver. + +The PCI endpoint framework populates the directory with the following +configurable fields. + + # ls functions/pci_epf_test/func1 + baseclass_code interrupt_pin revid subsys_vendor_id + cache_line_size msi_interrupts subclass_code vendorid + deviceid progif_code subsys_id + +The PCI endpoint function driver populates these entries with default values +when the device is bound to the driver. The pci-epf-test driver populates +vendorid with 0xffff and interrupt_pin with 0x0001 + + # cat functions/pci_epf_test/func1/vendorid + 0xffff + # cat functions/pci_epf_test/func1/interrupt_pin + 0x0001 + +1.4 Configuring pci-epf-test Device + +The user can configure the pci-epf-test device using configfs entry. In order +to change the vendorid and the number of MSI interrupts used by the function +device, the following commands can be used. + + # echo 0x104c > functions/pci_epf_test/func1/vendorid + # echo 0xb500 > functions/pci_epf_test/func1/deviceid + # echo 16 > functions/pci_epf_test/func1/msi_interrupts + +1.5 Binding pci-epf-test Device to EP Controller + +In order for the endpoint function device to be useful, it has to be bound to +a PCI endpoint controller driver. Use the configfs to bind the function +device to one of the controller driver present in the system. + + # ln -s functions/pci_epf_test/func1 controllers/51000000.pcie_ep/ + +Once the above step is completed, the PCI endpoint is ready to establish a link +with the host. + +1.6 Start the Link + +In order for the endpoint device to establish a link with the host, the _start_ +field should be populated with '1'. + + # echo 1 > controllers/51000000.pcie_ep/start + +2. RootComplex Device + +2.1 lspci Output + +Note that the devices listed here correspond to the value populated in 1.4 above + + 00:00.0 PCI bridge: Texas Instruments Device 8888 (rev 01) + 01:00.0 Unassigned class [ff00]: Texas Instruments Device b500 + +2.2 Using Endpoint Test function Device + +pcitest.sh added in tools/pci/ can be used to run all the default PCI endpoint +tests. Before pcitest.sh can be used pcitest.c should be compiled using the +following commands. + + cd + make headers_install ARCH=arm + arm-linux-gnueabihf-gcc -Iusr/include tools/pci/pcitest.c -o pcitest + cp pcitest /usr/sbin/ + cp tools/pci/pcitest.sh + +2.2.1 pcitest.sh Output + # ./pcitest.sh + BAR tests + + BAR0: OKAY + BAR1: OKAY + BAR2: OKAY + BAR3: OKAY + BAR4: NOT OKAY + BAR5: NOT OKAY + + Interrupt tests + + LEGACY IRQ: NOT OKAY + MSI1: OKAY + MSI2: OKAY + MSI3: OKAY + MSI4: OKAY + MSI5: OKAY + MSI6: OKAY + MSI7: OKAY + MSI8: OKAY + MSI9: OKAY + MSI10: OKAY + MSI11: OKAY + MSI12: OKAY + MSI13: OKAY + MSI14: OKAY + MSI15: OKAY + MSI16: OKAY + MSI17: NOT OKAY + MSI18: NOT OKAY + MSI19: NOT OKAY + MSI20: NOT OKAY + MSI21: NOT OKAY + MSI22: NOT OKAY + MSI23: NOT OKAY + MSI24: NOT OKAY + MSI25: NOT OKAY + MSI26: NOT OKAY + MSI27: NOT OKAY + MSI28: NOT OKAY + MSI29: NOT OKAY + MSI30: NOT OKAY + MSI31: NOT OKAY + MSI32: NOT OKAY + + Read Tests + + READ ( 1 bytes): OKAY + READ ( 1024 bytes): OKAY + READ ( 1025 bytes): OKAY + READ (1024000 bytes): OKAY + READ (1024001 bytes): OKAY + + Write Tests + + WRITE ( 1 bytes): OKAY + WRITE ( 1024 bytes): OKAY + WRITE ( 1025 bytes): OKAY + WRITE (1024000 bytes): OKAY + WRITE (1024001 bytes): OKAY + + Copy Tests + + COPY ( 1 bytes): OKAY + COPY ( 1024 bytes): OKAY + COPY ( 1025 bytes): OKAY + COPY (1024000 bytes): OKAY + COPY (1024001 bytes): OKAY diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt index da3b2176d5da74a4e279266c7b0326e27bfdf8cf..0b6bb3ef449ee7a8ee45afe9b4be61a2859ac8e3 100644 --- a/Documentation/PCI/pci-error-recovery.txt +++ b/Documentation/PCI/pci-error-recovery.txt @@ -11,7 +11,7 @@ Many PCI bus controllers are able to detect a variety of hardware PCI errors on the bus, such as parity errors on the data and address -busses, as well as SERR and PERR errors. Some of the more advanced +buses, as well as SERR and PERR errors. Some of the more advanced chipsets are able to deal with these errors; these include PCI-E chipsets, and the PCI-host bridges found on IBM Power4, Power5 and Power6-based pSeries boxes. A typical action taken is to disconnect the affected device, @@ -173,7 +173,7 @@ is STEP 6 (Permanent Failure). >>> a value of 0xff on read, and writes will be dropped. If more than >>> EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH >>> assumes that the device driver has gone into an infinite loop ->>> and prints an error to syslog. A reboot is then required to +>>> and prints an error to syslog. A reboot is then required to >>> get the device working again. STEP 2: MMIO Enabled @@ -231,14 +231,14 @@ proceeds to STEP 4 (Slot Reset) STEP 3: Link Reset ------------------ The platform resets the link. This is a PCI-Express specific step -and is done whenever a non-fatal error has been detected that can be +and is done whenever a fatal error has been detected that can be "solved" by resetting the link. STEP 4: Slot Reset ------------------ In response to a return value of PCI_ERS_RESULT_NEED_RESET, the -the platform will perform a slot reset on the requesting PCI device(s). +the platform will perform a slot reset on the requesting PCI device(s). The actual steps taken by a platform to perform a slot reset will be platform-dependent. Upon completion of slot reset, the platform will call the device slot_reset() callback. @@ -258,7 +258,7 @@ configuration registers to initialize to their default conditions. For most PCI devices, a soft reset will be sufficient for recovery. Optional fundamental reset is provided to support a limited number -of PCI Express PCI devices for which a soft reset is not sufficient +of PCI Express devices for which a soft reset is not sufficient for recovery. If the platform supports PCI hotplug, then the reset might be @@ -303,7 +303,7 @@ driver performs device init only from PCI function 0: Same as above. Drivers for PCI Express cards that require a fundamental reset must -set the needs_freset bit in the pci_dev structure in their probe function. +set the needs_freset bit in the pci_dev structure in their probe function. For example, the QLogic qla2xxx driver sets the needs_freset bit for certain PCI card types: diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt index 2d91ae25198295dc8559d2ef8d00cae1b5314989..d2a84151e99c0a37242d5dd754a3e571ab58ac53 100644 --- a/Documentation/PCI/pci-iov-howto.txt +++ b/Documentation/PCI/pci-iov-howto.txt @@ -68,6 +68,18 @@ To disable SR-IOV capability: echo 0 > \ /sys/bus/pci/devices//sriov_numvfs +To enable auto probing VFs by a compatible driver on the host, run +command below before enabling SR-IOV capabilities. This is the +default behavior. + echo 1 > \ + /sys/bus/pci/devices//sriov_drivers_autoprobe + +To disable auto probing VFs by a compatible driver on the host, run +command below before enabling SR-IOV capabilities. Updating this +entry will not affect VFs which are already probed. + echo 0 > \ + /sys/bus/pci/devices//sriov_drivers_autoprobe + 3.2 Usage example Following piece of code illustrates the usage of the SR-IOV API. diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index f773a264ae02918ef6b79f002ff17d0bd4e5df65..1672573b037a73ebe4b169f7044e525b7a4bf246 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -17,7 +17,7 @@ rcu_dereference.txt rcubarrier.txt - RCU and Unloadable Modules rculist_nulls.txt - - RCU list primitives for use with SLAB_DESTROY_BY_RCU + - RCU list primitives for use with SLAB_TYPESAFE_BY_RCU rcuref.txt - Reference-count design for elements of lists/arrays protected by RCU rcu.txt diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index d583c653a703f0645c10c11cd8c1dfe761095cd6..38d6d800761f7cd944c7b290eeea19dbc0d3866e 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -19,6 +19,8 @@ to each other. The rcu_state Structure
  • The rcu_node Structure +
  • + The rcu_segcblist Structure
  • The rcu_data Structure
  • @@ -841,6 +843,134 @@ for lockdep lock-class names. Finally, lines 64-66 produce an error if the maximum number of CPUs is too large for the specified fanout. +

    +The rcu_segcblist Structure

    + +The rcu_segcblist structure maintains a segmented list of +callbacks as follows: + +
    + 1 #define RCU_DONE_TAIL        0
    + 2 #define RCU_WAIT_TAIL        1
    + 3 #define RCU_NEXT_READY_TAIL  2
    + 4 #define RCU_NEXT_TAIL        3
    + 5 #define RCU_CBLIST_NSEGS     4
    + 6
    + 7 struct rcu_segcblist {
    + 8   struct rcu_head *head;
    + 9   struct rcu_head **tails[RCU_CBLIST_NSEGS];
    +10   unsigned long gp_seq[RCU_CBLIST_NSEGS];
    +11   long len;
    +12   long len_lazy;
    +13 };
    +
    + +

    +The segments are as follows: + +

      +
    1. RCU_DONE_TAIL: Callbacks whose grace periods have elapsed. + These callbacks are ready to be invoked. +
    2. RCU_WAIT_TAIL: Callbacks that are waiting for the + current grace period. + Note that different CPUs can have different ideas about which + grace period is current, hence the ->gp_seq field. +
    3. RCU_NEXT_READY_TAIL: Callbacks waiting for the next + grace period to start. +
    4. RCU_NEXT_TAIL: Callbacks that have not yet been + associated with a grace period. +
    + +

    +The ->head pointer references the first callback or +is NULL if the list contains no callbacks (which is +not the same as being empty). +Each element of the ->tails[] array references the +->next pointer of the last callback in the corresponding +segment of the list, or the list's ->head pointer if +that segment and all previous segments are empty. +If the corresponding segment is empty but some previous segment is +not empty, then the array element is identical to its predecessor. +Older callbacks are closer to the head of the list, and new callbacks +are added at the tail. +This relationship between the ->head pointer, the +->tails[] array, and the callbacks is shown in this +diagram: + +

    nxtlist.svg + +

    In this figure, the ->head pointer references the +first +RCU callback in the list. +The ->tails[RCU_DONE_TAIL] array element references +the ->head pointer itself, indicating that none +of the callbacks is ready to invoke. +The ->tails[RCU_WAIT_TAIL] array element references callback +CB 2's ->next pointer, which indicates that +CB 1 and CB 2 are both waiting on the current grace period, +give or take possible disagreements about exactly which grace period +is the current one. +The ->tails[RCU_NEXT_READY_TAIL] array element +references the same RCU callback that ->tails[RCU_WAIT_TAIL] +does, which indicates that there are no callbacks waiting on the next +RCU grace period. +The ->tails[RCU_NEXT_TAIL] array element references +CB 4's ->next pointer, indicating that all the +remaining RCU callbacks have not yet been assigned to an RCU grace +period. +Note that the ->tails[RCU_NEXT_TAIL] array element +always references the last RCU callback's ->next pointer +unless the callback list is empty, in which case it references +the ->head pointer. + +

    +There is one additional important special case for the +->tails[RCU_NEXT_TAIL] array element: It can be NULL +when this list is disabled. +Lists are disabled when the corresponding CPU is offline or when +the corresponding CPU's callbacks are offloaded to a kthread, +both of which are described elsewhere. + +

    CPUs advance their callbacks from the +RCU_NEXT_TAIL to the RCU_NEXT_READY_TAIL to the +RCU_WAIT_TAIL to the RCU_DONE_TAIL list segments +as grace periods advance. + +

    The ->gp_seq[] array records grace-period +numbers corresponding to the list segments. +This is what allows different CPUs to have different ideas as to +which is the current grace period while still avoiding premature +invocation of their callbacks. +In particular, this allows CPUs that go idle for extended periods +to determine which of their callbacks are ready to be invoked after +reawakening. + +

    The ->len counter contains the number of +callbacks in ->head, and the +->len_lazy contains the number of those callbacks that +are known to only free memory, and whose invocation can therefore +be safely deferred. + +

    Important note: It is the ->len field that +determines whether or not there are callbacks associated with +this rcu_segcblist structure, not the ->head +pointer. +The reason for this is that all the ready-to-invoke callbacks +(that is, those in the RCU_DONE_TAIL segment) are extracted +all at once at callback-invocation time. +If callback invocation must be postponed, for example, because a +high-priority process just woke up on this CPU, then the remaining +callbacks are placed back on the RCU_DONE_TAIL segment. +Either way, the ->len and ->len_lazy counts +are adjusted after the corresponding callbacks have been invoked, and so +again it is the ->len count that accurately reflects whether +or not there are callbacks associated with this rcu_segcblist +structure. +Of course, off-CPU sampling of the ->len count requires +the use of appropriate synchronization, for example, memory barriers. +This synchronization can be a bit subtle, particularly in the case +of rcu_barrier(). +

    The rcu_data Structure

    @@ -983,62 +1113,18 @@ choice. as follows:
    - 1 struct rcu_head *nxtlist;
    - 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
    - 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
    - 4 long qlen_lazy;
    - 5 long qlen;
    - 6 long qlen_last_fqs_check;
    + 1 struct rcu_segcblist cblist;
    + 2 long qlen_last_fqs_check;
    + 3 unsigned long n_cbs_invoked;
    + 4 unsigned long n_nocbs_invoked;
    + 5 unsigned long n_cbs_orphaned;
    + 6 unsigned long n_cbs_adopted;
      7 unsigned long n_force_qs_snap;
    - 8 unsigned long n_cbs_invoked;
    - 9 unsigned long n_cbs_orphaned;
    -10 unsigned long n_cbs_adopted;
    -11 long blimit;
    + 8 long blimit;
     
    -

    The ->nxtlist pointer and the -->nxttail[] array form a four-segment list with -older callbacks near the head and newer ones near the tail. -Each segment contains callbacks with the corresponding relationship -to the current grace period. -The pointer out of the end of each of the four segments is referenced -by the element of the ->nxttail[] array indexed by -RCU_DONE_TAIL (for callbacks handled by a prior grace period), -RCU_WAIT_TAIL (for callbacks waiting on the current grace period), -RCU_NEXT_READY_TAIL (for callbacks that will wait on the next -grace period), and -RCU_NEXT_TAIL (for callbacks that are not yet associated -with a specific grace period) -respectively, as shown in the following figure. - -

    nxtlist.svg - -

    In this figure, the ->nxtlist pointer references the -first -RCU callback in the list. -The ->nxttail[RCU_DONE_TAIL] array element references -the ->nxtlist pointer itself, indicating that none -of the callbacks is ready to invoke. -The ->nxttail[RCU_WAIT_TAIL] array element references callback -CB 2's ->next pointer, which indicates that -CB 1 and CB 2 are both waiting on the current grace period. -The ->nxttail[RCU_NEXT_READY_TAIL] array element -references the same RCU callback that ->nxttail[RCU_WAIT_TAIL] -does, which indicates that there are no callbacks waiting on the next -RCU grace period. -The ->nxttail[RCU_NEXT_TAIL] array element references -CB 4's ->next pointer, indicating that all the -remaining RCU callbacks have not yet been assigned to an RCU grace -period. -Note that the ->nxttail[RCU_NEXT_TAIL] array element -always references the last RCU callback's ->next pointer -unless the callback list is empty, in which case it references -the ->nxtlist pointer. - -

    CPUs advance their callbacks from the -RCU_NEXT_TAIL to the RCU_NEXT_READY_TAIL to the -RCU_WAIT_TAIL to the RCU_DONE_TAIL list segments -as grace periods advance. +

    The ->cblist structure is the segmented callback list +described earlier. The CPU advances the callbacks in its rcu_data structure whenever it notices that another RCU grace period has completed. The CPU detects the completion of an RCU grace period by noticing @@ -1049,16 +1135,7 @@ Recall that each rcu_node structure's ->completed field is updated at the end of each grace period. -

    The ->nxtcompleted[] array records grace-period -numbers corresponding to the list segments. -This allows CPUs that go idle for extended periods to determine -which of their callbacks are ready to be invoked after reawakening. - -

    The ->qlen counter contains the number of -callbacks in ->nxtlist, and the -->qlen_lazy contains the number of those callbacks that -are known to only free memory, and whose invocation can therefore -be safely deferred. +

    The ->qlen_last_fqs_check and ->n_force_qs_snap coordinate the forcing of quiescent states from call_rcu() and friends when callback @@ -1069,6 +1146,10 @@ lists grow excessively long. fields count the number of callbacks invoked, sent to other CPUs when this CPU goes offline, and received from other CPUs when those other CPUs go offline. +The ->n_nocbs_invoked is used when the CPU's callbacks +are offloaded to a kthread. + +

    Finally, the ->blimit counter is the maximum number of RCU callbacks that may be invoked at a given time. @@ -1104,6 +1185,9 @@ Its fields are as follows: 1 int dynticks_nesting; 2 int dynticks_nmi_nesting; 3 atomic_t dynticks; + 4 bool rcu_need_heavy_qs; + 5 unsigned long rcu_qs_ctr; + 6 bool rcu_urgent_qs;

    The ->dynticks_nesting field counts the @@ -1117,11 +1201,32 @@ NMIs are counted by the ->dynticks_nmi_nesting field, except that NMIs that interrupt non-dyntick-idle execution are not counted. -

    Finally, the ->dynticks field counts the corresponding +

    The ->dynticks field counts the corresponding CPU's transitions to and from dyntick-idle mode, so that this counter has an even value when the CPU is in dyntick-idle mode and an odd value otherwise. +

    The ->rcu_need_heavy_qs field is used +to record the fact that the RCU core code would really like to +see a quiescent state from the corresponding CPU, so much so that +it is willing to call for heavy-weight dyntick-counter operations. +This flag is checked by RCU's context-switch and cond_resched() +code, which provide a momentary idle sojourn in response. + +

    The ->rcu_qs_ctr field is used to record +quiescent states from cond_resched(). +Because cond_resched() can execute quite frequently, this +must be quite lightweight, as in a non-atomic increment of this +per-CPU field. + +

    Finally, the ->rcu_urgent_qs field is used to record +the fact that the RCU core code would really like to see a quiescent +state from the corresponding CPU, with the various other fields indicating +just how badly RCU wants this quiescent state. +This flag is checked by RCU's context-switch and cond_resched() +code, which, if nothing else, non-atomically increment ->rcu_qs_ctr +in response. + diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg index abc4cc73a0977195317d7a0397f1f5e7c52f35b3..0223e79c38e0036373dcfc919b7296bd87fa300e 100644 --- a/Documentation/RCU/Design/Data-Structures/nxtlist.svg +++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg @@ -19,7 +19,7 @@ id="svg2" version="1.1" inkscape:version="0.48.4 r9939" - sodipodi:docname="nxtlist.fig"> + sodipodi:docname="segcblist.svg"> @@ -28,7 +28,7 @@ image/svg+xml - + @@ -241,61 +241,51 @@ xml:space="preserve" x="225" y="675" - fill="#000000" - font-family="Courier" font-style="normal" font-weight="bold" font-size="324" - text-anchor="start" - id="text64">nxtlist + id="text64" + style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->headnxttail[RCU_DONE_TAIL] + id="text66" + style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_DONE_TAIL]nxttail[RCU_WAIT_TAIL] + id="text68" + style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_WAIT_TAIL]nxttail[RCU_NEXT_READY_TAIL] + id="text70" + style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_NEXT_READY_TAIL]nxttail[RCU_NEXT_TAIL] + id="text72" + style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_NEXT_TAIL] Funnel locking and wait/wakeup.
  • Use of Workqueues.
  • Stall warnings. +
  • Mid-boot operation.

    Idle-CPU Checks

    @@ -524,7 +525,7 @@ their grace periods and carrying out their wakeups. In earlier implementations, the task requesting the expedited grace period also drove it to completion. This straightforward approach had the disadvantage of needing to -account for signals sent to user tasks, +account for POSIX signals sent to user tasks, so more recent implemementations use the Linux kernel's workqueues. @@ -533,8 +534,8 @@ The requesting task still does counter snapshotting and funnel-lock processing, but the task reaching the top of the funnel lock does a schedule_work() (from _synchronize_rcu_expedited() so that a workqueue kthread does the actual grace-period processing. -Because workqueue kthreads do not accept signals, grace-period-wait -processing need not allow for signals. +Because workqueue kthreads do not accept POSIX signals, grace-period-wait +processing need not allow for POSIX signals. In addition, this approach allows wakeups for the previous expedited grace period to be overlapped with processing for the next expedited @@ -586,6 +587,46 @@ blocking the current grace period are printed. Each stall warning results in another pass through the loop, but the second and subsequent passes use longer stall times. +

    Mid-boot operation

    + +

    +The use of workqueues has the advantage that the expedited +grace-period code need not worry about POSIX signals. +Unfortunately, it has the +corresponding disadvantage that workqueues cannot be used until +they are initialized, which does not happen until some time after +the scheduler spawns the first task. +Given that there are parts of the kernel that really do want to +execute grace periods during this mid-boot “dead zone”, +expedited grace periods must do something else during thie time. + +

    +What they do is to fall back to the old practice of requiring that the +requesting task drive the expedited grace period, as was the case +before the use of workqueues. +However, the requesting task is only required to drive the grace period +during the mid-boot dead zone. +Before mid-boot, a synchronous grace period is a no-op. +Some time after mid-boot, workqueues are used. + +

    +Non-expedited non-SRCU synchronous grace periods must also operate +normally during mid-boot. +This is handled by causing non-expedited grace periods to take the +expedited code path during mid-boot. + +

    +The current code assumes that there are no POSIX signals during +the mid-boot dead zone. +However, if an overwhelming need for POSIX signals somehow arises, +appropriate adjustments can be made to the expedited stall-warning code. +One such adjustment would reinstate the pre-workqueue stall-warning +checks, but only during the mid-boot dead zone. + +

    +With this refinement, synchronous grace periods can now be used from +task context pretty much any time during the life of the kernel. +

    Summary

    diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 21593496aca6f617957b667f4f3aa5d627e86c4d..f60adf112663aad038e928e7b7cce04a65752277 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -659,8 +659,9 @@ systems with more than one CPU: In other words, a given instance of synchronize_rcu() can avoid waiting on a given RCU read-side critical section only if it can prove that synchronize_rcu() started first. + -

    +

    A related question is “When rcu_read_lock() doesn't generate any code, why does it matter how it relates to a grace period?” @@ -675,8 +676,9 @@ systems with more than one CPU: within the critical section, in which case none of the accesses within the critical section may observe the effects of any access following the grace period. + -

    +

    As of late 2016, mathematical models of RCU take this viewpoint, for example, see slides 62 and 63 of the @@ -1616,8 +1618,8 @@ CPUs should at least make reasonable forward progress. In return for its shorter latencies, synchronize_rcu_expedited() is permitted to impose modest degradation of real-time latency on non-idle online CPUs. -That said, it will likely be necessary to take further steps to reduce this -degradation, hopefully to roughly that of a scheduling-clock interrupt. +Here, “modest” means roughly the same latency +degradation as a scheduling-clock interrupt.

    There are a number of situations where even @@ -1913,12 +1915,9 @@ This requirement is another factor driving batching of grace periods, but it is also the driving force behind the checks for large numbers of queued RCU callbacks in the call_rcu() code path. Finally, high update rates should not delay RCU read-side critical -sections, although some read-side delays can occur when using +sections, although some small read-side delays can occur when using synchronize_rcu_expedited(), courtesy of this function's use -of try_stop_cpus(). -(In the future, synchronize_rcu_expedited() will be -converted to use lighter-weight inter-processor interrupts (IPIs), -but this will still disturb readers, though to a much smaller degree.) +of smp_call_function_single().

    Although all three of these corner cases were understood in the early @@ -2154,7 +2153,8 @@ as will rcu_assign_pointer().

    Although call_rcu() may be invoked at any time during boot, callbacks are not guaranteed to be invoked until after -the scheduler is fully up and running. +all of RCU's kthreads have been spawned, which occurs at +early_initcall() time. This delay in callback invocation is due to the fact that RCU does not invoke callbacks until it is fully initialized, and this full initialization cannot occur until after the scheduler has initialized itself to the @@ -2167,8 +2167,10 @@ on what operations those callbacks could invoke. Perhaps surprisingly, synchronize_rcu(), synchronize_rcu_bh() (discussed below), -and -synchronize_sched() +synchronize_sched(), +synchronize_rcu_expedited(), +synchronize_rcu_bh_expedited(), and +synchronize_sched_expedited() will all operate normally during very early boot, the reason being that there is only one CPU and preemption is disabled. @@ -2178,45 +2180,59 @@ state and thus a grace period, so the early-boot implementation can be a no-op.

    -Both synchronize_rcu_bh() and synchronize_sched() -continue to operate normally through the remainder of boot, courtesy -of the fact that preemption is disabled across their RCU read-side -critical sections and also courtesy of the fact that there is still -only one CPU. -However, once the scheduler starts initializing, preemption is enabled. -There is still only a single CPU, but the fact that preemption is enabled -means that the no-op implementation of synchronize_rcu() no -longer works in CONFIG_PREEMPT=y kernels. -Therefore, as soon as the scheduler starts initializing, the early-boot -fastpath is disabled. -This means that synchronize_rcu() switches to its runtime -mode of operation where it posts callbacks, which in turn means that -any call to synchronize_rcu() will block until the corresponding -callback is invoked. -Unfortunately, the callback cannot be invoked until RCU's runtime -grace-period machinery is up and running, which cannot happen until -the scheduler has initialized itself sufficiently to allow RCU's -kthreads to be spawned. -Therefore, invoking synchronize_rcu() during scheduler -initialization can result in deadlock. +However, once the scheduler has spawned its first kthread, this early +boot trick fails for synchronize_rcu() (as well as for +synchronize_rcu_expedited()) in CONFIG_PREEMPT=y +kernels. +The reason is that an RCU read-side critical section might be preempted, +which means that a subsequent synchronize_rcu() really does have +to wait for something, as opposed to simply returning immediately. +Unfortunately, synchronize_rcu() can't do this until all of +its kthreads are spawned, which doesn't happen until some time during +early_initcalls() time. +But this is no excuse: RCU is nevertheless required to correctly handle +synchronous grace periods during this time period. +Once all of its kthreads are up and running, RCU starts running +normally.

  •  
    Quick Quiz:
     
    Quick Quiz:
    - So what happens with synchronize_rcu() during - scheduler initialization for CONFIG_PREEMPT=n - kernels? + How can RCU possibly handle grace periods before all of its + kthreads have been spawned???
    Answer:
    - In CONFIG_PREEMPT=n kernel, synchronize_rcu() - maps directly to synchronize_sched(). - Therefore, synchronize_rcu() works normally throughout - boot in CONFIG_PREEMPT=n kernels. - However, your code must also work in CONFIG_PREEMPT=y kernels, - so it is still necessary to avoid invoking synchronize_rcu() - during scheduler initialization. + Very carefully! + + +

    + During the “dead zone” between the time that the + scheduler spawns the first task and the time that all of RCU's + kthreads have been spawned, all synchronous grace periods are + handled by the expedited grace-period mechanism. + At runtime, this expedited mechanism relies on workqueues, but + during the dead zone the requesting task itself drives the + desired expedited grace period. + Because dead-zone execution takes place within task context, + everything works. + Once the dead zone ends, expedited grace periods go back to + using workqueues, as is required to avoid problems that would + otherwise occur when a user task received a POSIX signal while + driving an expedited grace period. + + +

    + And yes, this does mean that it is unhelpful to send POSIX + signals to random tasks between the time that the scheduler + spawns its first kthread and the time that RCU's kthreads + have all been spawned. + If there ever turns out to be a good reason for sending POSIX + signals during that time, appropriate adjustments will be made. + (If it turns out that POSIX signals are sent during this time for + no good reason, other adjustments will be made, appropriate + or otherwise.)

     
    @@ -2295,12 +2311,61 @@ situation, and Dipankar Sarma incorporated rcu_barrier() into RCU. The need for rcu_barrier() for module unloading became apparent later. +

    +Important note: The rcu_barrier() function is not, +repeat, not, obligated to wait for a grace period. +It is instead only required to wait for RCU callbacks that have +already been posted. +Therefore, if there are no RCU callbacks posted anywhere in the system, +rcu_barrier() is within its rights to return immediately. +Even if there are callbacks posted, rcu_barrier() does not +necessarily need to wait for a grace period. + + + + + + + + +
     
    Quick Quiz:
    + Wait a minute! + Each RCU callbacks must wait for a grace period to complete, + and rcu_barrier() must wait for each pre-existing + callback to be invoked. + Doesn't rcu_barrier() therefore need to wait for + a full grace period if there is even one callback posted anywhere + in the system? +
    Answer:
    + Absolutely not!!! + + +

    + Yes, each RCU callbacks must wait for a grace period to complete, + but it might well be partly (or even completely) finished waiting + by the time rcu_barrier() is invoked. + In that case, rcu_barrier() need only wait for the + remaining portion of the grace period to elapse. + So even if there are quite a few callbacks posted, + rcu_barrier() might well return quite quickly. + + +

    + So if you need to wait for a grace period as well as for all + pre-existing callbacks, you will need to invoke both + synchronize_rcu() and rcu_barrier(). + If latency is a concern, you can always use workqueues + to invoke them concurrently. +

     
    +

    Hotplug CPU

    The Linux kernel supports CPU hotplug, which means that CPUs can come and go. -It is of course illegal to use any RCU API member from an offline CPU. +It is of course illegal to use any RCU API member from an offline CPU, +with the exception of SRCU read-side +critical sections. This requirement was present from day one in DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug implementation is “interesting.” @@ -2310,19 +2375,18 @@ The Linux-kernel CPU-hotplug implementation has notifiers that are used to allow the various kernel subsystems (including RCU) to respond appropriately to a given CPU-hotplug operation. Most RCU operations may be invoked from CPU-hotplug notifiers, -including even normal synchronous grace-period operations -such as synchronize_rcu(). -However, expedited grace-period operations such as -synchronize_rcu_expedited() are not supported, -due to the fact that current implementations block CPU-hotplug -operations, which could result in deadlock. +including even synchronous grace-period operations such as +synchronize_rcu() and synchronize_rcu_expedited().

    -In addition, all-callback-wait operations such as +However, all-callback-wait operations such as rcu_barrier() are also not supported, due to the fact that there are phases of CPU-hotplug operations where the outgoing CPU's callbacks will not be invoked until after the CPU-hotplug operation ends, which could also result in deadlock. +Furthermore, rcu_barrier() blocks CPU-hotplug operations +during its execution, which results in another type of deadlock +when invoked from a CPU-hotplug notifier.

    Scheduler and RCU

    @@ -2863,6 +2927,27 @@ It also motivates the smp_mb__after_srcu_read_unlock() API, which, in combination with srcu_read_unlock(), guarantees a full memory barrier. +

    +Also unlike other RCU flavors, SRCU's callbacks-wait function +srcu_barrier() may be invoked from CPU-hotplug notifiers, +though this is not necessarily a good idea. +The reason that this is possible is that SRCU is insensitive +to whether or not a CPU is online, which means that srcu_barrier() +need not exclude CPU-hotplug operations. + +

    +As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating +a locking bottleneck present in prior kernel versions. +Although this will allow users to put much heavier stress on +call_srcu(), it is important to note that SRCU does not +yet take any special steps to deal with callback flooding. +So if you are posting (say) 10,000 SRCU callbacks per second per CPU, +you are probably totally OK, but if you intend to post (say) 1,000,000 +SRCU callbacks per second per CPU, please run some tests first. +SRCU just might need a few adjustment to deal with that sort of load. +Of course, your mileage may vary based on the speed of your CPUs and +the size of your memory. +

    The SRCU API @@ -3021,8 +3106,8 @@ to do some redesign to avoid this scalability problem.

    RCU disables CPU hotplug in a few places, perhaps most notably in the -expedited grace-period and rcu_barrier() operations. -If there is a strong reason to use expedited grace periods in CPU-hotplug +rcu_barrier() operations. +If there is a strong reason to use rcu_barrier() in CPU-hotplug notifiers, it will be necessary to avoid disabling CPU hotplug. This would introduce some complexity, so there had better be a very good reason. @@ -3096,9 +3181,5 @@ Andy Lutomirski for their help in rendering this article human readable, and to Michelle Rankin for her support of this effort. Other contributions are acknowledged in the Linux kernel's git archive. -The cartoon is copyright (c) 2013 by Melissa Broussard, -and is provided -under the terms of the Creative Commons Attribution-Share Alike 3.0 -United States license. diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt index c0bf2441a2baf5f254ed64d6e4c117dda808fd2c..b2a613f16d747828e35fd182a0c9fe06c1107d0f 100644 --- a/Documentation/RCU/rcu_dereference.txt +++ b/Documentation/RCU/rcu_dereference.txt @@ -138,6 +138,15 @@ o Be very careful about comparing pointers obtained from This sort of comparison occurs frequently when scanning RCU-protected circular linked lists. + Note that if checks for being within an RCU read-side + critical section are not required and the pointer is never + dereferenced, rcu_access_pointer() should be used in place + of rcu_dereference(). The rcu_access_pointer() primitive + does not require an enclosing read-side critical section, + and also omits the smp_read_barrier_depends() included in + rcu_dereference(), which in turn should provide a small + performance gain in some CPUs (e.g., the DEC Alpha). + o The comparison is against a pointer that references memory that was initialized "a long time ago." The reason this is safe is that even if misordering occurs, the diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt index 18f9651ff23d411e96737ec070d4fc6bc29c50fe..8151f0195f7688386e8057ad34fdc9cc153b7fb8 100644 --- a/Documentation/RCU/rculist_nulls.txt +++ b/Documentation/RCU/rculist_nulls.txt @@ -1,5 +1,5 @@ Using hlist_nulls to protect read-mostly linked lists and -objects using SLAB_DESTROY_BY_RCU allocations. +objects using SLAB_TYPESAFE_BY_RCU allocations. Please read the basics in Documentation/RCU/listRCU.txt @@ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way to solve following problem : A typical RCU linked list managing objects which are -allocated with SLAB_DESTROY_BY_RCU kmem_cache can +allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use following algos : 1) Lookup algo @@ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock() 3) Remove algo -------------- Nothing special here, we can use a standard RCU hlist deletion. -But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused +But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused very very fast (before the end of RCU grace period) if (put_last_reference_on(obj) { diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index e93d04133fe7ae58cfb6c3da3e0717b0640f3b9a..96a3d81837e1b120098eccfd1b95b0bfc4947be9 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -1,9 +1,102 @@ Using RCU's CPU Stall Detector -The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall -detector, which detects conditions that unduly delay RCU grace periods. -This module parameter enables CPU stall detection by default, but -may be overridden via boot-time parameter or at runtime via sysfs. +This document first discusses what sorts of issues RCU's CPU stall +detector can locate, and then discusses kernel parameters and Kconfig +options that can be used to fine-tune the detector's operation. Finally, +this document explains the stall detector's "splat" format. + + +What Causes RCU CPU Stall Warnings? + +So your kernel printed an RCU CPU stall warning. The next question is +"What caused it?" The following problems can result in RCU CPU stall +warnings: + +o A CPU looping in an RCU read-side critical section. + +o A CPU looping with interrupts disabled. + +o A CPU looping with preemption disabled. This condition can + result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh + stalls. + +o A CPU looping with bottom halves disabled. This condition can + result in RCU-sched and RCU-bh stalls. + +o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the + kernel without invoking schedule(). Note that cond_resched() + does not necessarily prevent RCU CPU stall warnings. Therefore, + if the looping in the kernel is really expected and desirable + behavior, you might need to replace some of the cond_resched() + calls with calls to cond_resched_rcu_qs(). + +o Booting Linux using a console connection that is too slow to + keep up with the boot-time console-message rate. For example, + a 115Kbaud serial console can be -way- too slow to keep up + with boot-time message rates, and will frequently result in + RCU CPU stall warning messages. Especially if you have added + debug printk()s. + +o Anything that prevents RCU's grace-period kthreads from running. + This can result in the "All QSes seen" console-log message. + This message will include information on when the kthread last + ran and how often it should be expected to run. + +o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might + happen to preempt a low-priority task in the middle of an RCU + read-side critical section. This is especially damaging if + that low-priority task is not permitted to run on any other CPU, + in which case the next RCU grace period can never complete, which + will eventually cause the system to run out of memory and hang. + While the system is in the process of running itself out of + memory, you might see stall-warning messages. + +o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that + is running at a higher priority than the RCU softirq threads. + This will prevent RCU callbacks from ever being invoked, + and in a CONFIG_PREEMPT_RCU kernel will further prevent + RCU grace periods from ever completing. Either way, the + system will eventually run out of memory and hang. In the + CONFIG_PREEMPT_RCU case, you might see stall-warning + messages. + +o A hardware or software issue shuts off the scheduler-clock + interrupt on a CPU that is not in dyntick-idle mode. This + problem really has happened, and seems to be most likely to + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. + +o A bug in the RCU implementation. + +o A hardware failure. This is quite unlikely, but has occurred + at least once in real life. A CPU failed in a running system, + becoming unresponsive, but not causing an immediate crash. + This resulted in a series of RCU CPU stall warnings, eventually + leading the realization that the CPU had failed. + +The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall +warning. Note that SRCU does -not- have CPU stall warnings. Please note +that RCU only detects CPU stalls when there is a grace period in progress. +No grace period, no CPU stall warnings. + +To diagnose the cause of the stall, inspect the stack traces. +The offending function will usually be near the top of the stack. +If you have a series of stall warnings from a single extended stall, +comparing the stack traces can often help determine where the stall +is occurring, which will usually be in the function nearest the top of +that portion of the stack which remains the same from trace to trace. +If you can reliably trigger the stall, ftrace can be quite helpful. + +RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE +and with RCU's event tracing. For information on RCU's event tracing, +see include/trace/events/rcu.h. + + +Fine-Tuning the RCU CPU Stall Detector + +The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's +CPU stall detector, which detects conditions that unduly delay RCU grace +periods. This module parameter enables CPU stall detection by default, +but may be overridden via boot-time parameter or at runtime via sysfs. The stall detector's idea of what constitutes "unduly delayed" is controlled by a set of kernel configuration variables and cpp macros: @@ -56,6 +149,9 @@ rcupdate.rcu_task_stall_timeout And continues with the output of sched_show_task() for each task stalling the current RCU-tasks grace period. + +Interpreting RCU's CPU Stall-Detector "Splats" + For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling, it will print a message similar to the following: @@ -178,89 +274,3 @@ grace period is in flight. It is entirely possible to see stall warnings from normal and from expedited grace periods at about the same time from the same run. - - -What Causes RCU CPU Stall Warnings? - -So your kernel printed an RCU CPU stall warning. The next question is -"What caused it?" The following problems can result in RCU CPU stall -warnings: - -o A CPU looping in an RCU read-side critical section. - -o A CPU looping with interrupts disabled. This condition can - result in RCU-sched and RCU-bh stalls. - -o A CPU looping with preemption disabled. This condition can - result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh - stalls. - -o A CPU looping with bottom halves disabled. This condition can - result in RCU-sched and RCU-bh stalls. - -o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the - kernel without invoking schedule(). Note that cond_resched() - does not necessarily prevent RCU CPU stall warnings. Therefore, - if the looping in the kernel is really expected and desirable - behavior, you might need to replace some of the cond_resched() - calls with calls to cond_resched_rcu_qs(). - -o Booting Linux using a console connection that is too slow to - keep up with the boot-time console-message rate. For example, - a 115Kbaud serial console can be -way- too slow to keep up - with boot-time message rates, and will frequently result in - RCU CPU stall warning messages. Especially if you have added - debug printk()s. - -o Anything that prevents RCU's grace-period kthreads from running. - This can result in the "All QSes seen" console-log message. - This message will include information on when the kthread last - ran and how often it should be expected to run. - -o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might - happen to preempt a low-priority task in the middle of an RCU - read-side critical section. This is especially damaging if - that low-priority task is not permitted to run on any other CPU, - in which case the next RCU grace period can never complete, which - will eventually cause the system to run out of memory and hang. - While the system is in the process of running itself out of - memory, you might see stall-warning messages. - -o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that - is running at a higher priority than the RCU softirq threads. - This will prevent RCU callbacks from ever being invoked, - and in a CONFIG_PREEMPT_RCU kernel will further prevent - RCU grace periods from ever completing. Either way, the - system will eventually run out of memory and hang. In the - CONFIG_PREEMPT_RCU case, you might see stall-warning - messages. - -o A hardware or software issue shuts off the scheduler-clock - interrupt on a CPU that is not in dyntick-idle mode. This - problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. - -o A bug in the RCU implementation. - -o A hardware failure. This is quite unlikely, but has occurred - at least once in real life. A CPU failed in a running system, - becoming unresponsive, but not causing an immediate crash. - This resulted in a series of RCU CPU stall warnings, eventually - leading the realization that the CPU had failed. - -The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall -warning. Note that SRCU does -not- have CPU stall warnings. Please note -that RCU only detects CPU stalls when there is a grace period in progress. -No grace period, no CPU stall warnings. - -To diagnose the cause of the stall, inspect the stack traces. -The offending function will usually be near the top of the stack. -If you have a series of stall warnings from a single extended stall, -comparing the stack traces can often help determine where the stall -is occurring, which will usually be in the function nearest the top of -that portion of the stack which remains the same from trace to trace. -If you can reliably trigger the stall, ftrace can be quite helpful. - -RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE -and with RCU's event tracing. For information on RCU's event tracing, -see include/trace/events/rcu.h. diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 5cbd8b2395b811489c68acb7da4616b4a1ef9523..8ed6c9f6133c45a54c442d04ed0814dc2dcd1a45 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -562,7 +562,9 @@ This section presents a "toy" RCU implementation that is based on familiar locking primitives. Its overhead makes it a non-starter for real-life use, as does its lack of scalability. It is also unsuitable for realtime use, since it allows scheduling latency to "bleed" from -one read-side critical section to another. +one read-side critical section to another. It also assumes recursive +reader-writer locks: If you try this with non-recursive locks, and +you allow nested rcu_read_lock() calls, you can deadlock. However, it is probably the easiest implementation to relate to, so is a good starting point. @@ -587,20 +589,21 @@ It is extremely simple: write_unlock(&rcu_gp_mutex); } -[You can ignore rcu_assign_pointer() and rcu_dereference() without -missing much. But here they are anyway. And whatever you do, don't -forget about them when submitting patches making use of RCU!] +[You can ignore rcu_assign_pointer() and rcu_dereference() without missing +much. But here are simplified versions anyway. And whatever you do, +don't forget about them when submitting patches making use of RCU!] - #define rcu_assign_pointer(p, v) ({ \ - smp_wmb(); \ - (p) = (v); \ - }) + #define rcu_assign_pointer(p, v) \ + ({ \ + smp_store_release(&(p), (v)); \ + }) - #define rcu_dereference(p) ({ \ - typeof(p) _________p1 = p; \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) + #define rcu_dereference(p) \ + ({ \ + typeof(p) _________p1 = p; \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) The rcu_read_lock() and rcu_read_unlock() primitive read-acquire @@ -925,7 +928,8 @@ d. Do you need RCU grace periods to complete even in the face e. Is your workload too update-intensive for normal use of RCU, but inappropriate for other synchronization mechanisms? - If so, consider SLAB_DESTROY_BY_RCU. But please be careful! + If so, consider SLAB_TYPESAFE_BY_RCU (which was originally + named SLAB_DESTROY_BY_RCU). But please be careful! f. Do you need read-side critical sections that are respected even though they are in the middle of the idle loop, during diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt index 22cb3091f29776b2acfb0df339db150393a8ad83..effe7af3a5af95d25f86efadaa7dd2b127a0dea9 100644 --- a/Documentation/acpi/acpi-lid.txt +++ b/Documentation/acpi/acpi-lid.txt @@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues. If the userspace hasn't been prepared to ignore the unreliable "opened" events and the unreliable initial state notification, Linux users can use the following kernel parameters to handle the possible issues: -A. button.lid_init_state=open: +A. button.lid_init_state=method: + When this option is specified, the ACPI button driver reports the + initial lid state using the returning value of the _LID control method + and whether the "opened"/"closed" events are paired fully relies on the + firmware implementation. + This option can be used to fix some platforms where the returning value + of the _LID control method is reliable but the initial lid state + notification is missing. + This option is the default behavior during the period the userspace + isn't ready to handle the buggy AML tables. +B. button.lid_init_state=open: When this option is specified, the ACPI button driver always reports the initial lid state as "opened" and whether the "opened"/"closed" events are paired fully relies on the firmware implementation. This may fix some platforms where the returning value of the _LID control method is not reliable and the initial lid state notification is missing. - This option is the default behavior during the period the userspace - isn't ready to handle the buggy AML tables. If the userspace has been prepared to ignore the unreliable "opened" events and the unreliable initial state notification, Linux users should always use the following kernel parameter: -B. button.lid_init_state=ignore: +C. button.lid_init_state=ignore: When this option is specified, the ACPI button driver never reports the initial lid state and there is a compensation mechanism implemented to ensure that the reliable "closed" notifications can always be delievered diff --git a/Documentation/acpi/aml-debugger.txt b/Documentation/acpi/aml-debugger.txt index 5f62aa4a493b60ef9ab8043dbf29eb12f7537361..e851cc5de63f29e8e8b89bc9c5f95e660f51f856 100644 --- a/Documentation/acpi/aml-debugger.txt +++ b/Documentation/acpi/aml-debugger.txt @@ -15,7 +15,7 @@ kernel. CONFIG_ACPI_DEBUGGER=y CONFIG_ACPI_DEBUGGER_USER=m - The userspace utlities can be built from the kernel source tree using + The userspace utilities can be built from the kernel source tree using the following commands: $ cd tools diff --git a/Documentation/acpi/dsd/graph.txt b/Documentation/acpi/dsd/graph.txt new file mode 100644 index 0000000000000000000000000000000000000000..ac09e3138b791213c9bdd4e74f698d818a6ecbd6 --- /dev/null +++ b/Documentation/acpi/dsd/graph.txt @@ -0,0 +1,162 @@ +Graphs + + +_DSD +---- + +_DSD (Device Specific Data) [7] is a predefined ACPI device +configuration object that can be used to convey information on +hardware features which are not specifically covered by the ACPI +specification [1][6]. There are two _DSD extensions that are relevant +for graphs: property [4] and hierarchical data extensions [5]. The +property extension provides generic key-value pairs whereas the +hierarchical data extension supports nodes with references to other +nodes, forming a tree. The nodes in the tree may contain properties as +defined by the property extension. The two extensions together provide +a tree-like structure with zero or more properties (key-value pairs) +in each node of the tree. + +The data structure may be accessed at runtime by using the device_* +and fwnode_* functions defined in include/linux/fwnode.h . + +Fwnode represents a generic firmware node object. It is independent on +the firmware type. In ACPI, fwnodes are _DSD hierarchical data +extensions objects. A device's _DSD object is represented by an +fwnode. + +The data structure may be referenced to elsewhere in the ACPI tables +by using a hard reference to the device itself and an index to the +hierarchical data extension array on each depth. + + +Ports and endpoints +------------------- + +The port and endpoint concepts are very similar to those in Devicetree +[3]. A port represents an interface in a device, and an endpoint +represents a connection to that interface. + +All port nodes are located under the device's "_DSD" node in the +hierarchical data extension tree. The property extension related to +each port node must contain the key "port" and an integer value which +is the number of the port. The object it refers to should be called "PRTX", +where "X" is the number of the port. + +Further on, endpoints are located under the individual port nodes. The +first hierarchical data extension package list entry of the endpoint +nodes must begin with "endpoint" and must be followed by the number +of the endpoint. The object it refers to should be called "EPXY", where +"X" is the number of the port and "Y" is the number of the endpoint. + +Each port node contains a property extension key "port", the value of +which is the number of the port node. The each endpoint is similarly numbered +with a property extension key "endpoint". Port numbers must be unique within a +device and endpoint numbers must be unique within a port. + +The endpoint reference uses property extension with "remote-endpoint" property +name followed by a reference in the same package. Such references consist of the +the remote device reference, number of the port in the device and finally the +number of the endpoint in that port. Individual references thus appear as: + + Package() { device, port_number, endpoint_number } + +The references to endpoints must be always done both ways, to the +remote endpoint and back from the referred remote endpoint node. + +A simple example of this is show below: + + Scope (\_SB.PCI0.I2C2) + { + Device (CAM0) + { + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { "compatible", Package () { "nokia,smia" } }, + }, + ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), + Package () { + Package () { "port0", "PRT0" }, + } + }) + Name (PRT0, Package() { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { "port", 0 }, + }, + ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), + Package () { + Package () { "endpoint0", "EP00" }, + } + }) + Name (EP00, Package() { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { "endpoint", 0 }, + Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, 4, 0 } }, + } + }) + } + } + + Scope (\_SB.PCI0) + { + Device (ISP) + { + Name (_DSD, Package () { + ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), + Package () { + Package () { "port4", "PRT4" }, + } + }) + + Name (PRT4, Package() { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { "port", 4 }, /* CSI-2 port number */ + }, + ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), + Package () { + Package () { "endpoint0", "EP40" }, + } + }) + + Name (EP40, Package() { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () { "endpoint", 0 }, + Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, 0, 0 } }, + } + }) + } + } + +Here, the port 0 of the "CAM0" device is connected to the port 4 of +the "ISP" device and vice versa. + + +References +---------- + +[1] _DSD (Device Specific Data) Implementation Guide. + , + referenced 2016-10-03. + +[2] Devicetree. , referenced 2016-10-03. + +[3] Documentation/devicetree/bindings/graph.txt + +[4] Device Properties UUID For _DSD. + , + referenced 2016-10-04. + +[5] Hierarchical Data Extension UUID For _DSD. + , + referenced 2016-10-04. + +[6] Advanced Configuration and Power Interface Specification. + , + referenced 2016-10-04. + +[7] _DSD Device Properties Usage Rules. + Documentation/acpi/DSD-properties-rules.txt diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt index 209a5eba6b8711e34000b102f68814e76bfcded8..7bcf9c3d9fbe27520f2167e77cba45c294cd9e89 100644 --- a/Documentation/acpi/enumeration.txt +++ b/Documentation/acpi/enumeration.txt @@ -367,10 +367,10 @@ resulting child platform device. Device Tree namespace link device ID ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The Device Tree protocol uses device indentification based on the "compatible" +The Device Tree protocol uses device identification based on the "compatible" property whose value is a string or an array of strings recognized as device identifiers by drivers and the driver core. The set of all those strings may be -regarded as a device indentification namespace analogous to the ACPI/PNP device +regarded as a device identification namespace analogous to the ACPI/PNP device ID namespace. Consequently, in principle it should not be necessary to allocate a new (and arguably redundant) ACPI/PNP device ID for a devices with an existing identification string in the Device Tree (DT) namespace, especially if that ID @@ -381,7 +381,7 @@ In ACPI, the device identification object called _CID (Compatible ID) is used to list the IDs of devices the given one is compatible with, but those IDs must belong to one of the namespaces prescribed by the ACPI specification (see Section 6.1.2 of ACPI 6.0 for details) and the DT namespace is not one of them. -Moreover, the specification mandates that either a _HID or an _ADR identificaion +Moreover, the specification mandates that either a _HID or an _ADR identification object be present for all ACPI objects representing devices (Section 6.1 of ACPI 6.0). For non-enumerable bus types that object must be _HID and its value must be a device ID from one of the namespaces prescribed by the specification too. diff --git a/Documentation/acpi/linuxized-acpica.txt b/Documentation/acpi/linuxized-acpica.txt index defe2eec5331066a0eeb4206dda20ff84b7124e7..3ad7b0dfb083377d043573de0de94206881853f4 100644 --- a/Documentation/acpi/linuxized-acpica.txt +++ b/Documentation/acpi/linuxized-acpica.txt @@ -24,7 +24,7 @@ upstream. The homepage of ACPICA project is: www.acpica.org, it is maintained and supported by Intel Corporation. - The following figure depicts the Linux ACPI subystem where the ACPICA + The following figure depicts the Linux ACPI subsystem where the ACPICA adaptation is included: +---------------------------------------------------------+ @@ -110,7 +110,7 @@ upstream. Linux patches. The patches generated by this process are referred to as "linuxized ACPICA patches". The release process is carried out on a local copy the ACPICA git repository. Each commit in the monthly release is - converted into a linuxized ACPICA patch. Together, they form the montly + converted into a linuxized ACPICA patch. Together, they form the monthly ACPICA release patchset for the Linux ACPI community. This process is illustrated in the following figure: @@ -165,7 +165,7 @@ upstream. . Before the linuxized ACPICA patches are sent to the Linux ACPI community - for review, there is a quality ensurance build test process to reduce + for review, there is a quality assurance build test process to reduce porting issues. Currently this build process only takes care of the following kernel configuration options: CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER @@ -195,12 +195,12 @@ upstream. release utilities (please refer to Section 4 below for the details). 3. Linux specific features - Sometimes it's impossible to use the current ACPICA APIs to implement features required by the Linux kernel, - so Linux developers occasionaly have to change ACPICA code directly. + so Linux developers occasionally have to change ACPICA code directly. Those changes may not be acceptable by ACPICA upstream and in such cases they are left as committed ACPICA divergences unless the ACPICA side can implement new mechanisms as replacements for them. 4. ACPICA release fixups - ACPICA only tests commits using a set of the - user space simulation utilies, thus the linuxized ACPICA patches may + user space simulation utilities, thus the linuxized ACPICA patches may break the Linux kernel, leaving us build/boot failures. In order to avoid breaking Linux bisection, fixes are applied directly to the linuxized ACPICA patches during the release process. When the release diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 697a00ccec25eebf2d4b063fc4820fc6fd1d71c7..b96e80f79e853109abdb0d73a6a688453f9689cc 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -27,7 +27,7 @@ On what hardware does it run? today Linux also runs on (at least) the Compaq Alpha AXP, Sun SPARC and UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, Hitachi SuperH, Cell, IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD x86-64, AXIS CRIS, - Xtensa, Tilera TILE, AVR32, ARC and Renesas M32R architectures. + Xtensa, Tilera TILE, ARC and Renesas M32R architectures. Linux is easily portable to most general-purpose 32- or 64-bit architectures as long as they have a paged memory management unit (PMMU) and a port of the @@ -362,7 +362,7 @@ If something goes wrong as is, otherwise you will have to use the ``ksymoops`` program to make sense of the dump (but compiling with CONFIG_KALLSYMS is usually preferred). This utility can be downloaded from - ftp://ftp..kernel.org/pub/linux/utils/kernel/ksymoops/ . + https://www.kernel.org/pub/linux/utils/kernel/ksymoops/ . Alternatively, you can do the dump lookup by hand: - In debugging dumps like the above, it helps enormously if you can diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 8ddae4e4299aa10e84f27ab480a3ec3f79c97261..8c60a8a32a1a1c82a4d539021a2dbd6449340f4f 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -60,6 +60,7 @@ configure specific aspects of kernel behavior to your liking. mono java ras + pm/index .. only:: subproject and html diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b516164999a801abf47eae22f90a28116f20eb2b..d76ab3907e2bd3b07cba39de90434f6e0490d0e7 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -1,3 +1,5 @@ +.. _kernelparameters: + The kernel's command-line parameters ==================================== @@ -86,7 +88,6 @@ parameter is applicable:: APIC APIC support is enabled. APM Advanced Power Management support is enabled. ARM ARM architecture is enabled. - AVR32 AVR32 architecture is enabled. AX25 Appropriate AX.25 support is enabled. BLACKFIN Blackfin architecture is enabled. CLK Common clock infrastructure is enabled. @@ -197,7 +198,7 @@ and is between 256 and 4096 characters. It is defined in the file Finally, the [KMG] suffix is commonly described after a number of kernel parameter values. These 'K', 'M', and 'G' letters represent the _binary_ -multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30 +multipliers 'Kilo', 'Mega', and 'Giga', equaling 2^10, 2^20, and 2^30 bytes respectively. Such letter suffixes can also be entirely omitted: .. include:: kernel-parameters.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index facc20a3f96280472396ad3f7d2e8f2dba62fecc..7737ab5d04b2941621df23b7fc83a4cdba36e5ba 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -531,7 +531,6 @@ [ACPI] acpi_pm [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, pxa_timer,timer3,32k_counter,timer0_1 - [AVR32] avr32 [X86-32] pit,hpet,tsc; scx200_hrt on Geode; cyclone on IBM x440 [MIPS] MIPS @@ -867,6 +866,15 @@ dscc4.setup= [NET] + dt_cpu_ftrs= [PPC] + Format: {"off" | "known"} + Control how the dt_cpu_ftrs device-tree binding is + used for CPU feature discovery and setup (if it + exists). + off: Do not use it, fall back to legacy cpu table. + known: Do not pass through unknown features to guests + or userspace, only those that the kernel is aware of. + dump_apple_properties [X86] Dump name and content of EFI device properties on x86 Macs. Useful for driver authors to determine @@ -973,7 +981,7 @@ A valid base address must be provided, and the serial port must already be setup and configured. - armada3700_uart, + ar3700_uart, Start an early, polled-mode console on the Armada 3700 serial port at the specified address. The serial port must already be setup @@ -989,6 +997,7 @@ earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] earlyprintk=pciserial,bus:device.function[,baudrate] + earlyprintk=xdbc[xhciController#] earlyprintk is useful when the kernel crashes before the normal console is initialized. It is not enabled by @@ -1578,6 +1587,15 @@ extended tables themselves, and also PASID support. With this option set, extended tables will not be used even on hardware which claims to support them. + tboot_noforce [Default Off] + Do not force the Intel IOMMU enabled under tboot. + By default, tboot will force Intel IOMMU on, which + could harm performance of some high-throughput + devices like 40GBit network cards, even if identity + mapping is enabled. + Note that using this option lowers the security + provided by tboot because it makes the system + vulnerable to DMA attacks. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. @@ -1644,6 +1662,12 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.passthrough= + [ARM64] Configure DMA to bypass the IOMMU by default. + Format: { "0" | "1" } + 0 - Use IOMMU translation for DMA. + 1 - Bypass the IOMMU for DMA. + unset - Use IOMMU translation for DMA. io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in @@ -2419,13 +2443,7 @@ and gids from such clients. This is intended to ease migration from NFSv2/v3. - objlayoutdriver.osd_login_prog= - [NFS] [OBJLAYOUT] sets the pathname to the program which - is used to automatically discover and login into new - osd-targets. Please see: - Documentation/filesystems/pnfs.txt for more explanations - - nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take + nmi_debug= [KNL,SH] Specify one or more actions to take when a NMI is triggered. Format: [state][,regs][,debounce][,die] @@ -3178,6 +3196,12 @@ ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. + ras=option[,option,...] [KNL] RAS-specific options + + cec_disable [X86] + Disable the Correctable Errors Collector, + see CONFIG_RAS_CEC help text. + rcu_nocbs= [KNL] The argument is a cpu list, as described above. @@ -3779,6 +3803,21 @@ spia_pedr= spia_peddr= + srcutree.exp_holdoff [KNL] + Specifies how many nanoseconds must elapse + since the end of the last SRCU grace period for + a given srcu_struct until the next normal SRCU + grace period will be considered for automatic + expediting. Set to zero to disable automatic + expediting. + + stack_guard_gap= [MM] + override the default stack gap protection. The value + is in page units and it defines how many pages prior + to (for stacks growing down) resp. after (for stacks + growing up) the main stack are reserved for no other + mapping. Default value is 256 pages. + stacktrace [FTRACE] Enabled the stack tracer on boot up. @@ -4121,6 +4160,9 @@ usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. + usbhid.jspoll= + [USBHID] The interval which joysticks are to be polled at. + usb-storage.delay_use= [UMS] The delay in seconds before a new device is scanned for Logical Units (default 1). diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst index 1e61bf50595c84c936cfe8788bb37114a6f7d5f0..84de718f24a469ee455b6e5e3aab5b6608f3f1dc 100644 --- a/Documentation/admin-guide/md.rst +++ b/Documentation/admin-guide/md.rst @@ -276,14 +276,14 @@ All md devices contain: array creation it will default to 0, though starting the array as ``clean`` will set it much larger. - new_dev + new_dev This file can be written but not read. The value written should be a block device number as major:minor. e.g. 8:0 This will cause that device to be attached to the array, if it is available. It will then appear at md/dev-XXX (depending on the name of the device) and further configuration is then possible. - safe_mode_delay + safe_mode_delay When an md array has seen no write requests for a certain period of time, it will be marked as ``clean``. When another write request arrives, the array is marked as ``dirty`` before the write @@ -292,7 +292,7 @@ All md devices contain: period as a number of seconds. The default is 200msec (0.200). Writing a value of 0 disables safemode. - array_state + array_state This file contains a single word which describes the current state of the array. In many cases, the state can be set by writing the word for the desired state, however some states @@ -401,7 +401,30 @@ All md devices contain: once the array becomes non-degraded, and this fact has been recorded in the metadata. + consistency_policy + This indicates how the array maintains consistency in case of unexpected + shutdown. It can be: + none + Array has no redundancy information, e.g. raid0, linear. + + resync + Full resync is performed and all redundancy is regenerated when the + array is started after unclean shutdown. + + bitmap + Resync assisted by a write-intent bitmap. + + journal + For raid4/5/6, journal device is used to log transactions and replay + after unclean shutdown. + + ppl + For raid5 only, Partial Parity Log is used to close the write hole and + eliminate resync. + + The accepted values when writing to this file are ``ppl`` and ``resync``, + used to enable and disable PPL. As component devices are added to an md array, they appear in the ``md`` @@ -563,6 +586,9 @@ Each directory contains: adds bad blocks without acknowledging them. This is largely for testing. + ppl_sector, ppl_size + Location and size (in sectors) of the space used for Partial Parity Log + on this device. An active md device will also contain an entry for each active device diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst new file mode 100644 index 0000000000000000000000000000000000000000..09aa2e9497875acec984dc68727e14139c5d23ad --- /dev/null +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -0,0 +1,701 @@ +.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy ` +.. |intel_pstate| replace:: :doc:`intel_pstate ` + +======================= +CPU Performance Scaling +======================= + +:: + + Copyright (c) 2017 Intel Corp., Rafael J. Wysocki + +The Concept of CPU Performance Scaling +====================================== + +The majority of modern processors are capable of operating in a number of +different clock frequency and voltage configurations, often referred to as +Operating Performance Points or P-states (in ACPI terminology). As a rule, +the higher the clock frequency and the higher the voltage, the more instructions +can be retired by the CPU over a unit of time, but also the higher the clock +frequency and the higher the voltage, the more energy is consumed over a unit of +time (or the more power is drawn) by the CPU in the given P-state. Therefore +there is a natural tradeoff between the CPU capacity (the number of instructions +that can be executed over a unit of time) and the power drawn by the CPU. + +In some situations it is desirable or even necessary to run the program as fast +as possible and then there is no reason to use any P-states different from the +highest one (i.e. the highest-performance frequency/voltage configuration +available). In some other cases, however, it may not be necessary to execute +instructions so quickly and maintaining the highest available CPU capacity for a +relatively long time without utilizing it entirely may be regarded as wasteful. +It also may not be physically possible to maintain maximum CPU capacity for too +long for thermal or power supply capacity reasons or similar. To cover those +cases, there are hardware interfaces allowing CPUs to be switched between +different frequency/voltage configurations or (in the ACPI terminology) to be +put into different P-states. + +Typically, they are used along with algorithms to estimate the required CPU +capacity, so as to decide which P-states to put the CPUs into. Of course, since +the utilization of the system generally changes over time, that has to be done +repeatedly on a regular basis. The activity by which this happens is referred +to as CPU performance scaling or CPU frequency scaling (because it involves +adjusting the CPU clock frequency). + + +CPU Performance Scaling in Linux +================================ + +The Linux kernel supports CPU performance scaling by means of the ``CPUFreq`` +(CPU Frequency scaling) subsystem that consists of three layers of code: the +core, scaling governors and scaling drivers. + +The ``CPUFreq`` core provides the common code infrastructure and user space +interfaces for all platforms that support CPU performance scaling. It defines +the basic framework in which the other components operate. + +Scaling governors implement algorithms to estimate the required CPU capacity. +As a rule, each governor implements one, possibly parametrized, scaling +algorithm. + +Scaling drivers talk to the hardware. They provide scaling governors with +information on the available P-states (or P-state ranges in some cases) and +access platform-specific hardware interfaces to change CPU P-states as requested +by scaling governors. + +In principle, all available scaling governors can be used with every scaling +driver. That design is based on the observation that the information used by +performance scaling algorithms for P-state selection can be represented in a +platform-independent form in the majority of cases, so it should be possible +to use the same performance scaling algorithm implemented in exactly the same +way regardless of which scaling driver is used. Consequently, the same set of +scaling governors should be suitable for every supported platform. + +However, that observation may not hold for performance scaling algorithms +based on information provided by the hardware itself, for example through +feedback registers, as that information is typically specific to the hardware +interface it comes from and may not be easily represented in an abstract, +platform-independent way. For this reason, ``CPUFreq`` allows scaling drivers +to bypass the governor layer and implement their own performance scaling +algorithms. That is done by the |intel_pstate| scaling driver. + + +``CPUFreq`` Policy Objects +========================== + +In some cases the hardware interface for P-state control is shared by multiple +CPUs. That is, for example, the same register (or set of registers) is used to +control the P-state of multiple CPUs at the same time and writing to it affects +all of those CPUs simultaneously. + +Sets of CPUs sharing hardware P-state control interfaces are represented by +``CPUFreq`` as |struct cpufreq_policy| objects. For consistency, +|struct cpufreq_policy| is also used when there is only one CPU in the given +set. + +The ``CPUFreq`` core maintains a pointer to a |struct cpufreq_policy| object for +every CPU in the system, including CPUs that are currently offline. If multiple +CPUs share the same hardware P-state control interface, all of the pointers +corresponding to them point to the same |struct cpufreq_policy| object. + +``CPUFreq`` uses |struct cpufreq_policy| as its basic data type and the design +of its user space interface is based on the policy concept. + + +CPU Initialization +================== + +First of all, a scaling driver has to be registered for ``CPUFreq`` to work. +It is only possible to register one scaling driver at a time, so the scaling +driver is expected to be able to handle all CPUs in the system. + +The scaling driver may be registered before or after CPU registration. If +CPUs are registered earlier, the driver core invokes the ``CPUFreq`` core to +take a note of all of the already registered CPUs during the registration of the +scaling driver. In turn, if any CPUs are registered after the registration of +the scaling driver, the ``CPUFreq`` core will be invoked to take note of them +at their registration time. + +In any case, the ``CPUFreq`` core is invoked to take note of any logical CPU it +has not seen so far as soon as it is ready to handle that CPU. [Note that the +logical CPU may be a physical single-core processor, or a single core in a +multicore processor, or a hardware thread in a physical processor or processor +core. In what follows "CPU" always means "logical CPU" unless explicitly stated +otherwise and the word "processor" is used to refer to the physical part +possibly including multiple logical CPUs.] + +Once invoked, the ``CPUFreq`` core checks if the policy pointer is already set +for the given CPU and if so, it skips the policy object creation. Otherwise, +a new policy object is created and initialized, which involves the creation of +a new policy directory in ``sysfs``, and the policy pointer corresponding to +the given CPU is set to the new policy object's address in memory. + +Next, the scaling driver's ``->init()`` callback is invoked with the policy +pointer of the new CPU passed to it as the argument. That callback is expected +to initialize the performance scaling hardware interface for the given CPU (or, +more precisely, for the set of CPUs sharing the hardware interface it belongs +to, represented by its policy object) and, if the policy object it has been +called for is new, to set parameters of the policy, like the minimum and maximum +frequencies supported by the hardware, the table of available frequencies (if +the set of supported P-states is not a continuous range), and the mask of CPUs +that belong to the same policy (including both online and offline CPUs). That +mask is then used by the core to populate the policy pointers for all of the +CPUs in it. + +The next major initialization step for a new policy object is to attach a +scaling governor to it (to begin with, that is the default scaling governor +determined by the kernel configuration, but it may be changed later +via ``sysfs``). First, a pointer to the new policy object is passed to the +governor's ``->init()`` callback which is expected to initialize all of the +data structures necessary to handle the given policy and, possibly, to add +a governor ``sysfs`` interface to it. Next, the governor is started by +invoking its ``->start()`` callback. + +That callback it expected to register per-CPU utilization update callbacks for +all of the online CPUs belonging to the given policy with the CPU scheduler. +The utilization update callbacks will be invoked by the CPU scheduler on +important events, like task enqueue and dequeue, on every iteration of the +scheduler tick or generally whenever the CPU utilization may change (from the +scheduler's perspective). They are expected to carry out computations needed +to determine the P-state to use for the given policy going forward and to +invoke the scaling driver to make changes to the hardware in accordance with +the P-state selection. The scaling driver may be invoked directly from +scheduler context or asynchronously, via a kernel thread or workqueue, depending +on the configuration and capabilities of the scaling driver and the governor. + +Similar steps are taken for policy objects that are not new, but were "inactive" +previously, meaning that all of the CPUs belonging to them were offline. The +only practical difference in that case is that the ``CPUFreq`` core will attempt +to use the scaling governor previously used with the policy that became +"inactive" (and is re-initialized now) instead of the default governor. + +In turn, if a previously offline CPU is being brought back online, but some +other CPUs sharing the policy object with it are online already, there is no +need to re-initialize the policy object at all. In that case, it only is +necessary to restart the scaling governor so that it can take the new online CPU +into account. That is achieved by invoking the governor's ``->stop`` and +``->start()`` callbacks, in this order, for the entire policy. + +As mentioned before, the |intel_pstate| scaling driver bypasses the scaling +governor layer of ``CPUFreq`` and provides its own P-state selection algorithms. +Consequently, if |intel_pstate| is used, scaling governors are not attached to +new policy objects. Instead, the driver's ``->setpolicy()`` callback is invoked +to register per-CPU utilization update callbacks for each policy. These +callbacks are invoked by the CPU scheduler in the same way as for scaling +governors, but in the |intel_pstate| case they both determine the P-state to +use and change the hardware configuration accordingly in one go from scheduler +context. + +The policy objects created during CPU initialization and other data structures +associated with them are torn down when the scaling driver is unregistered +(which happens when the kernel module containing it is unloaded, for example) or +when the last CPU belonging to the given policy in unregistered. + + +Policy Interface in ``sysfs`` +============================= + +During the initialization of the kernel, the ``CPUFreq`` core creates a +``sysfs`` directory (kobject) called ``cpufreq`` under +:file:`/sys/devices/system/cpu/`. + +That directory contains a ``policyX`` subdirectory (where ``X`` represents an +integer number) for every policy object maintained by the ``CPUFreq`` core. +Each ``policyX`` directory is pointed to by ``cpufreq`` symbolic links +under :file:`/sys/devices/system/cpu/cpuY/` (where ``Y`` represents an integer +that may be different from the one represented by ``X``) for all of the CPUs +associated with (or belonging to) the given policy. The ``policyX`` directories +in :file:`/sys/devices/system/cpu/cpufreq` each contain policy-specific +attributes (files) to control ``CPUFreq`` behavior for the corresponding policy +objects (that is, for all of the CPUs associated with them). + +Some of those attributes are generic. They are created by the ``CPUFreq`` core +and their behavior generally does not depend on what scaling driver is in use +and what scaling governor is attached to the given policy. Some scaling drivers +also add driver-specific attributes to the policy directories in ``sysfs`` to +control policy-specific aspects of driver behavior. + +The generic attributes under :file:`/sys/devices/system/cpu/cpufreq/policyX/` +are the following: + +``affected_cpus`` + List of online CPUs belonging to this policy (i.e. sharing the hardware + performance scaling interface represented by the ``policyX`` policy + object). + +``bios_limit`` + If the platform firmware (BIOS) tells the OS to apply an upper limit to + CPU frequencies, that limit will be reported through this attribute (if + present). + + The existence of the limit may be a result of some (often unintentional) + BIOS settings, restrictions coming from a service processor or another + BIOS/HW-based mechanisms. + + This does not cover ACPI thermal limitations which can be discovered + through a generic thermal driver. + + This attribute is not present if the scaling driver in use does not + support it. + +``cpuinfo_max_freq`` + Maximum possible operating frequency the CPUs belonging to this policy + can run at (in kHz). + +``cpuinfo_min_freq`` + Minimum possible operating frequency the CPUs belonging to this policy + can run at (in kHz). + +``cpuinfo_transition_latency`` + The time it takes to switch the CPUs belonging to this policy from one + P-state to another, in nanoseconds. + + If unknown or if known to be so high that the scaling driver does not + work with the `ondemand`_ governor, -1 (:c:macro:`CPUFREQ_ETERNAL`) + will be returned by reads from this attribute. + +``related_cpus`` + List of all (online and offline) CPUs belonging to this policy. + +``scaling_available_governors`` + List of ``CPUFreq`` scaling governors present in the kernel that can + be attached to this policy or (if the |intel_pstate| scaling driver is + in use) list of scaling algorithms provided by the driver that can be + applied to this policy. + + [Note that some governors are modular and it may be necessary to load a + kernel module for the governor held by it to become available and be + listed by this attribute.] + +``scaling_cur_freq`` + Current frequency of all of the CPUs belonging to this policy (in kHz). + + For the majority of scaling drivers, this is the frequency of the last + P-state requested by the driver from the hardware using the scaling + interface provided by it, which may or may not reflect the frequency + the CPU is actually running at (due to hardware design and other + limitations). + + Some scaling drivers (e.g. |intel_pstate|) attempt to provide + information more precisely reflecting the current CPU frequency through + this attribute, but that still may not be the exact current CPU + frequency as seen by the hardware at the moment. + +``scaling_driver`` + The scaling driver currently in use. + +``scaling_governor`` + The scaling governor currently attached to this policy or (if the + |intel_pstate| scaling driver is in use) the scaling algorithm + provided by the driver that is currently applied to this policy. + + This attribute is read-write and writing to it will cause a new scaling + governor to be attached to this policy or a new scaling algorithm + provided by the scaling driver to be applied to it (in the + |intel_pstate| case), as indicated by the string written to this + attribute (which must be one of the names listed by the + ``scaling_available_governors`` attribute described above). + +``scaling_max_freq`` + Maximum frequency the CPUs belonging to this policy are allowed to be + running at (in kHz). + + This attribute is read-write and writing a string representing an + integer to it will cause a new limit to be set (it must not be lower + than the value of the ``scaling_min_freq`` attribute). + +``scaling_min_freq`` + Minimum frequency the CPUs belonging to this policy are allowed to be + running at (in kHz). + + This attribute is read-write and writing a string representing a + non-negative integer to it will cause a new limit to be set (it must not + be higher than the value of the ``scaling_max_freq`` attribute). + +``scaling_setspeed`` + This attribute is functional only if the `userspace`_ scaling governor + is attached to the given policy. + + It returns the last frequency requested by the governor (in kHz) or can + be written to in order to set a new frequency for the policy. + + +Generic Scaling Governors +========================= + +``CPUFreq`` provides generic scaling governors that can be used with all +scaling drivers. As stated before, each of them implements a single, possibly +parametrized, performance scaling algorithm. + +Scaling governors are attached to policy objects and different policy objects +can be handled by different scaling governors at the same time (although that +may lead to suboptimal results in some cases). + +The scaling governor for a given policy object can be changed at any time with +the help of the ``scaling_governor`` policy attribute in ``sysfs``. + +Some governors expose ``sysfs`` attributes to control or fine-tune the scaling +algorithms implemented by them. Those attributes, referred to as governor +tunables, can be either global (system-wide) or per-policy, depending on the +scaling driver in use. If the driver requires governor tunables to be +per-policy, they are located in a subdirectory of each policy directory. +Otherwise, they are located in a subdirectory under +:file:`/sys/devices/system/cpu/cpufreq/`. In either case the name of the +subdirectory containing the governor tunables is the name of the governor +providing them. + +``performance`` +--------------- + +When attached to a policy object, this governor causes the highest frequency, +within the ``scaling_max_freq`` policy limit, to be requested for that policy. + +The request is made once at that time the governor for the policy is set to +``performance`` and whenever the ``scaling_max_freq`` or ``scaling_min_freq`` +policy limits change after that. + +``powersave`` +------------- + +When attached to a policy object, this governor causes the lowest frequency, +within the ``scaling_min_freq`` policy limit, to be requested for that policy. + +The request is made once at that time the governor for the policy is set to +``powersave`` and whenever the ``scaling_max_freq`` or ``scaling_min_freq`` +policy limits change after that. + +``userspace`` +------------- + +This governor does not do anything by itself. Instead, it allows user space +to set the CPU frequency for the policy it is attached to by writing to the +``scaling_setspeed`` attribute of that policy. + +``schedutil`` +------------- + +This governor uses CPU utilization data available from the CPU scheduler. It +generally is regarded as a part of the CPU scheduler, so it can access the +scheduler's internal data structures directly. + +It runs entirely in scheduler context, although in some cases it may need to +invoke the scaling driver asynchronously when it decides that the CPU frequency +should be changed for a given policy (that depends on whether or not the driver +is capable of changing the CPU frequency from scheduler context). + +The actions of this governor for a particular CPU depend on the scheduling class +invoking its utilization update callback for that CPU. If it is invoked by the +RT or deadline scheduling classes, the governor will increase the frequency to +the allowed maximum (that is, the ``scaling_max_freq`` policy limit). In turn, +if it is invoked by the CFS scheduling class, the governor will use the +Per-Entity Load Tracking (PELT) metric for the root control group of the +given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_ +LWN.net article for a description of the PELT mechanism). Then, the new +CPU frequency to apply is computed in accordance with the formula + + f = 1.25 * ``f_0`` * ``util`` / ``max`` + +where ``util`` is the PELT number, ``max`` is the theoretical maximum of +``util``, and ``f_0`` is either the maximum possible CPU frequency for the given +policy (if the PELT number is frequency-invariant), or the current CPU frequency +(otherwise). + +This governor also employs a mechanism allowing it to temporarily bump up the +CPU frequency for tasks that have been waiting on I/O most recently, called +"IO-wait boosting". That happens when the :c:macro:`SCHED_CPUFREQ_IOWAIT` flag +is passed by the scheduler to the governor callback which causes the frequency +to go up to the allowed maximum immediately and then draw back to the value +returned by the above formula over time. + +This governor exposes only one tunable: + +``rate_limit_us`` + Minimum time (in microseconds) that has to pass between two consecutive + runs of governor computations (default: 1000 times the scaling driver's + transition latency). + + The purpose of this tunable is to reduce the scheduler context overhead + of the governor which might be excessive without it. + +This governor generally is regarded as a replacement for the older `ondemand`_ +and `conservative`_ governors (described below), as it is simpler and more +tightly integrated with the CPU scheduler, its overhead in terms of CPU context +switches and similar is less significant, and it uses the scheduler's own CPU +utilization metric, so in principle its decisions should not contradict the +decisions made by the other parts of the scheduler. + +``ondemand`` +------------ + +This governor uses CPU load as a CPU frequency selection metric. + +In order to estimate the current CPU load, it measures the time elapsed between +consecutive invocations of its worker routine and computes the fraction of that +time in which the given CPU was not idle. The ratio of the non-idle (active) +time to the total CPU time is taken as an estimate of the load. + +If this governor is attached to a policy shared by multiple CPUs, the load is +estimated for all of them and the greatest result is taken as the load estimate +for the entire policy. + +The worker routine of this governor has to run in process context, so it is +invoked asynchronously (via a workqueue) and CPU P-states are updated from +there if necessary. As a result, the scheduler context overhead from this +governor is minimum, but it causes additional CPU context switches to happen +relatively often and the CPU P-state updates triggered by it can be relatively +irregular. Also, it affects its own CPU load metric by running code that +reduces the CPU idle time (even though the CPU idle time is only reduced very +slightly by it). + +It generally selects CPU frequencies proportional to the estimated load, so that +the value of the ``cpuinfo_max_freq`` policy attribute corresponds to the load of +1 (or 100%), and the value of the ``cpuinfo_min_freq`` policy attribute +corresponds to the load of 0, unless when the load exceeds a (configurable) +speedup threshold, in which case it will go straight for the highest frequency +it is allowed to use (the ``scaling_max_freq`` policy limit). + +This governor exposes the following tunables: + +``sampling_rate`` + This is how often the governor's worker routine should run, in + microseconds. + + Typically, it is set to values of the order of 10000 (10 ms). Its + default value is equal to the value of ``cpuinfo_transition_latency`` + for each policy this governor is attached to (but since the unit here + is greater by 1000, this means that the time represented by + ``sampling_rate`` is 1000 times greater than the transition latency by + default). + + If this tunable is per-policy, the following shell command sets the time + represented by it to be 750 times as high as the transition latency:: + + # echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate + + +``min_sampling_rate`` + The minimum value of ``sampling_rate``. + + Equal to 10000 (10 ms) if :c:macro:`CONFIG_NO_HZ_COMMON` and + :c:data:`tick_nohz_active` are both set or to 20 times the value of + :c:data:`jiffies` in microseconds otherwise. + +``up_threshold`` + If the estimated CPU load is above this value (in percent), the governor + will set the frequency to the maximum value allowed for the policy. + Otherwise, the selected frequency will be proportional to the estimated + CPU load. + +``ignore_nice_load`` + If set to 1 (default 0), it will cause the CPU load estimation code to + treat the CPU time spent on executing tasks with "nice" levels greater + than 0 as CPU idle time. + + This may be useful if there are tasks in the system that should not be + taken into account when deciding what frequency to run the CPUs at. + Then, to make that happen it is sufficient to increase the "nice" level + of those tasks above 0 and set this attribute to 1. + +``sampling_down_factor`` + Temporary multiplier, between 1 (default) and 100 inclusive, to apply to + the ``sampling_rate`` value if the CPU load goes above ``up_threshold``. + + This causes the next execution of the governor's worker routine (after + setting the frequency to the allowed maximum) to be delayed, so the + frequency stays at the maximum level for a longer time. + + Frequency fluctuations in some bursty workloads may be avoided this way + at the cost of additional energy spent on maintaining the maximum CPU + capacity. + +``powersave_bias`` + Reduction factor to apply to the original frequency target of the + governor (including the maximum value used when the ``up_threshold`` + value is exceeded by the estimated CPU load) or sensitivity threshold + for the AMD frequency sensitivity powersave bias driver + (:file:`drivers/cpufreq/amd_freq_sensitivity.c`), between 0 and 1000 + inclusive. + + If the AMD frequency sensitivity powersave bias driver is not loaded, + the effective frequency to apply is given by + + f * (1 - ``powersave_bias`` / 1000) + + where f is the governor's original frequency target. The default value + of this attribute is 0 in that case. + + If the AMD frequency sensitivity powersave bias driver is loaded, the + value of this attribute is 400 by default and it is used in a different + way. + + On Family 16h (and later) AMD processors there is a mechanism to get a + measured workload sensitivity, between 0 and 100% inclusive, from the + hardware. That value can be used to estimate how the performance of the + workload running on a CPU will change in response to frequency changes. + + The performance of a workload with the sensitivity of 0 (memory-bound or + IO-bound) is not expected to increase at all as a result of increasing + the CPU frequency, whereas workloads with the sensitivity of 100% + (CPU-bound) are expected to perform much better if the CPU frequency is + increased. + + If the workload sensitivity is less than the threshold represented by + the ``powersave_bias`` value, the sensitivity powersave bias driver + will cause the governor to select a frequency lower than its original + target, so as to avoid over-provisioning workloads that will not benefit + from running at higher CPU frequencies. + +``conservative`` +---------------- + +This governor uses CPU load as a CPU frequency selection metric. + +It estimates the CPU load in the same way as the `ondemand`_ governor described +above, but the CPU frequency selection algorithm implemented by it is different. + +Namely, it avoids changing the frequency significantly over short time intervals +which may not be suitable for systems with limited power supply capacity (e.g. +battery-powered). To achieve that, it changes the frequency in relatively +small steps, one step at a time, up or down - depending on whether or not a +(configurable) threshold has been exceeded by the estimated CPU load. + +This governor exposes the following tunables: + +``freq_step`` + Frequency step in percent of the maximum frequency the governor is + allowed to set (the ``scaling_max_freq`` policy limit), between 0 and + 100 (5 by default). + + This is how much the frequency is allowed to change in one go. Setting + it to 0 will cause the default frequency step (5 percent) to be used + and setting it to 100 effectively causes the governor to periodically + switch the frequency between the ``scaling_min_freq`` and + ``scaling_max_freq`` policy limits. + +``down_threshold`` + Threshold value (in percent, 20 by default) used to determine the + frequency change direction. + + If the estimated CPU load is greater than this value, the frequency will + go up (by ``freq_step``). If the load is less than this value (and the + ``sampling_down_factor`` mechanism is not in effect), the frequency will + go down. Otherwise, the frequency will not be changed. + +``sampling_down_factor`` + Frequency decrease deferral factor, between 1 (default) and 10 + inclusive. + + It effectively causes the frequency to go down ``sampling_down_factor`` + times slower than it ramps up. + + +Frequency Boost Support +======================= + +Background +---------- + +Some processors support a mechanism to raise the operating frequency of some +cores in a multicore package temporarily (and above the sustainable frequency +threshold for the whole package) under certain conditions, for example if the +whole chip is not fully utilized and below its intended thermal or power budget. + +Different names are used by different vendors to refer to this functionality. +For Intel processors it is referred to as "Turbo Boost", AMD calls it +"Turbo-Core" or (in technical documentation) "Core Performance Boost" and so on. +As a rule, it also is implemented differently by different vendors. The simple +term "frequency boost" is used here for brevity to refer to all of those +implementations. + +The frequency boost mechanism may be either hardware-based or software-based. +If it is hardware-based (e.g. on x86), the decision to trigger the boosting is +made by the hardware (although in general it requires the hardware to be put +into a special state in which it can control the CPU frequency within certain +limits). If it is software-based (e.g. on ARM), the scaling driver decides +whether or not to trigger boosting and when to do that. + +The ``boost`` File in ``sysfs`` +------------------------------- + +This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls +the "boost" setting for the whole system. It is not present if the underlying +scaling driver does not support the frequency boost mechanism (or supports it, +but provides a driver-specific interface for controlling it, like +|intel_pstate|). + +If the value in this file is 1, the frequency boost mechanism is enabled. This +means that either the hardware can be put into states in which it is able to +trigger boosting (in the hardware-based case), or the software is allowed to +trigger boosting (in the software-based case). It does not mean that boosting +is actually in use at the moment on any CPUs in the system. It only means a +permission to use the frequency boost mechanism (which still may never be used +for other reasons). + +If the value in this file is 0, the frequency boost mechanism is disabled and +cannot be used at all. + +The only values that can be written to this file are 0 and 1. + +Rationale for Boost Control Knob +-------------------------------- + +The frequency boost mechanism is generally intended to help to achieve optimum +CPU performance on time scales below software resolution (e.g. below the +scheduler tick interval) and it is demonstrably suitable for many workloads, but +it may lead to problems in certain situations. + +For this reason, many systems make it possible to disable the frequency boost +mechanism in the platform firmware (BIOS) setup, but that requires the system to +be restarted for the setting to be adjusted as desired, which may not be +practical at least in some cases. For example: + + 1. Boosting means overclocking the processor, although under controlled + conditions. Generally, the processor's energy consumption increases + as a result of increasing its frequency and voltage, even temporarily. + That may not be desirable on systems that switch to power sources of + limited capacity, such as batteries, so the ability to disable the boost + mechanism while the system is running may help there (but that depends on + the workload too). + + 2. In some situations deterministic behavior is more important than + performance or energy consumption (or both) and the ability to disable + boosting while the system is running may be useful then. + + 3. To examine the impact of the frequency boost mechanism itself, it is useful + to be able to run tests with and without boosting, preferably without + restarting the system in the meantime. + + 4. Reproducible results are important when running benchmarks. Since + the boosting functionality depends on the load of the whole package, + single-thread performance may vary because of it which may lead to + unreproducible results sometimes. That can be avoided by disabling the + frequency boost mechanism before running benchmarks sensitive to that + issue. + +Legacy AMD ``cpb`` Knob +----------------------- + +The AMD powernow-k8 scaling driver supports a ``sysfs`` knob very similar to +the global ``boost`` one. It is used for disabling/enabling the "Core +Performance Boost" feature of some AMD processors. + +If present, that knob is located in every ``CPUFreq`` policy directory in +``sysfs`` (:file:`/sys/devices/system/cpu/cpufreq/policyX/`) and is called +``cpb``, which indicates a more fine grained control interface. The actual +implementation, however, works on the system-wide basis and setting that knob +for one policy causes the same value of it to be set for all of the other +policies at the same time. + +That knob is still supported on AMD processors that support its underlying +hardware feature, but it may be configured out of the kernel (via the +:c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option) and the global +``boost`` knob is present regardless. Thus it is always possible use the +``boost`` knob instead of the ``cpb`` one which is highly recommended, as that +is more consistent with what all of the other systems do (and the ``cpb`` knob +may not be supported any more in the future). + +The ``cpb`` knob is never present for any processors without the underlying +hardware feature (e.g. all Intel ones), even if the +:c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set. + + +.. _Per-entity load tracking: https://lwn.net/Articles/531853/ diff --git a/Documentation/admin-guide/pm/index.rst b/Documentation/admin-guide/pm/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..7f148f76f432cbfc62ed663a6be7b9849b66b3c6 --- /dev/null +++ b/Documentation/admin-guide/pm/index.rst @@ -0,0 +1,16 @@ +================ +Power Management +================ + +.. toctree:: + :maxdepth: 2 + + cpufreq + intel_pstate + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst new file mode 100644 index 0000000000000000000000000000000000000000..33d703989ea80b3b6cf6ee3abcc57576185d987c --- /dev/null +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -0,0 +1,755 @@ +=============================================== +``intel_pstate`` CPU Performance Scaling Driver +=============================================== + +:: + + Copyright (c) 2017 Intel Corp., Rafael J. Wysocki + + +General Information +=================== + +``intel_pstate`` is a part of the +:doc:`CPU performance scaling subsystem ` in the Linux kernel +(``CPUFreq``). It is a scaling driver for the Sandy Bridge and later +generations of Intel processors. Note, however, that some of those processors +may not be supported. [To understand ``intel_pstate`` it is necessary to know +how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if +you have not done that yet.] + +For the processors supported by ``intel_pstate``, the P-state concept is broader +than just an operating frequency or an operating performance point (see the +`LinuxCon Europe 2015 presentation by Kristen Accardi `_ for more +information about that). For this reason, the representation of P-states used +by ``intel_pstate`` internally follows the hardware specification (for details +refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual +Volume 3: System Programming Guide `_). However, the ``CPUFreq`` core +uses frequencies for identifying operating performance points of CPUs and +frequencies are involved in the user space interface exposed by it, so +``intel_pstate`` maps its internal representation of P-states to frequencies too +(fortunately, that mapping is unambiguous). At the same time, it would not be +practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of +available frequencies due to the possible size of it, so the driver does not do +that. Some functionality of the core is limited by that. + +Since the hardware P-state selection interface used by ``intel_pstate`` is +available at the logical CPU level, the driver always works with individual +CPUs. Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy +object corresponds to one logical CPU and ``CPUFreq`` policies are effectively +equivalent to CPUs. In particular, this means that they become "inactive" every +time the corresponding CPU is taken offline and need to be re-initialized when +it goes back online. + +``intel_pstate`` is not modular, so it cannot be unloaded, which means that the +only way to pass early-configuration-time parameters to it is via the kernel +command line. However, its configuration can be adjusted via ``sysfs`` to a +great extent. In some configurations it even is possible to unregister it via +``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and +registered (see `below `_). + + +Operation Modes +=============== + +``intel_pstate`` can operate in three different modes: in the active mode with +or without hardware-managed P-states support and in the passive mode. Which of +them will be in effect depends on what kernel command line options are used and +on the capabilities of the processor. + +Active Mode +----------- + +This is the default operation mode of ``intel_pstate``. If it works in this +mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq`` +policies contains the string "intel_pstate". + +In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and +provides its own scaling algorithms for P-state selection. Those algorithms +can be applied to ``CPUFreq`` policies in the same way as generic scaling +governors (that is, through the ``scaling_governor`` policy attribute in +``sysfs``). [Note that different P-state selection algorithms may be chosen for +different policies, but that is not recommended.] + +They are not generic scaling governors, but their names are the same as the +names of some of those governors. Moreover, confusingly enough, they generally +do not work in the same way as the generic governors they share the names with. +For example, the ``powersave`` P-state selection algorithm provided by +``intel_pstate`` is not a counterpart of the generic ``powersave`` governor +(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors). + +There are two P-state selection algorithms provided by ``intel_pstate`` in the +active mode: ``powersave`` and ``performance``. The way they both operate +depends on whether or not the hardware-managed P-states (HWP) feature has been +enabled in the processor and possibly on the processor model. + +Which of the P-state selection algorithms is used by default depends on the +:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option. +Namely, if that option is set, the ``performance`` algorithm will be used by +default, and the other one will be used by default if it is not set. + +Active Mode With HWP +~~~~~~~~~~~~~~~~~~~~ + +If the processor supports the HWP feature, it will be enabled during the +processor initialization and cannot be disabled after that. It is possible +to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the +kernel in the command line. + +If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to +select P-states by itself, but still it can give hints to the processor's +internal P-state selection logic. What those hints are depends on which P-state +selection algorithm has been applied to the given policy (or to the CPU it +corresponds to). + +Even though the P-state selection is carried out by the processor automatically, +``intel_pstate`` registers utilization update callbacks with the CPU scheduler +in this mode. However, they are not used for running a P-state selection +algorithm, but for periodic updates of the current CPU frequency information to +be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``. + +HWP + ``performance`` +..................... + +In this configuration ``intel_pstate`` will write 0 to the processor's +Energy-Performance Preference (EPP) knob (if supported) or its +Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's +internal P-state selection logic is expected to focus entirely on performance. + +This will override the EPP/EPB setting coming from the ``sysfs`` interface +(see `Energy vs Performance Hints`_ below). + +Also, in this configuration the range of P-states available to the processor's +internal P-state selection logic is always restricted to the upper boundary +(that is, the maximum P-state that the driver is allowed to use). + +HWP + ``powersave`` +................... + +In this configuration ``intel_pstate`` will set the processor's +Energy-Performance Preference (EPP) knob (if supported) or its +Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was +previously set to via ``sysfs`` (or whatever default value it was +set to by the platform firmware). This usually causes the processor's +internal P-state selection logic to be less performance-focused. + +Active Mode Without HWP +~~~~~~~~~~~~~~~~~~~~~~~ + +This is the default operation mode for processors that do not support the HWP +feature. It also is used by default with the ``intel_pstate=no_hwp`` argument +in the kernel command line. However, in this mode ``intel_pstate`` may refuse +to work with the given processor if it does not recognize it. [Note that +``intel_pstate`` will never refuse to work with any processor with the HWP +feature enabled.] + +In this mode ``intel_pstate`` registers utilization update callbacks with the +CPU scheduler in order to run a P-state selection algorithm, either +``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy +setting in ``sysfs``. The current CPU frequency information to be made +available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is +periodically updated by those utilization update callbacks too. + +``performance`` +............... + +Without HWP, this P-state selection algorithm is always the same regardless of +the processor model and platform configuration. + +It selects the maximum P-state it is allowed to use, subject to limits set via +``sysfs``, every time the P-state selection computations are carried out by the +driver's utilization update callback for the given CPU (that does not happen +more often than every 10 ms), but the hardware configuration will not be changed +if the new P-state is the same as the current one. + +This is the default P-state selection algorithm if the +:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option +is set. + +``powersave`` +............. + +Without HWP, this P-state selection algorithm generally depends on the +processor model and/or the system profile setting in the ACPI tables and there +are two variants of it. + +One of them is used with processors from the Atom line and (regardless of the +processor model) on platforms with the system profile in the ACPI tables set to +"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or +"workstation". It is also used with processors supporting the HWP feature if +that feature has not been enabled (that is, with the ``intel_pstate=no_hwp`` +argument in the kernel command line). It is similar to the algorithm +implemented by the generic ``schedutil`` scaling governor except that the +utilization metric used by it is based on numbers coming from feedback +registers of the CPU. It generally selects P-states proportional to the +current CPU utilization, so it is referred to as the "proportional" algorithm. + +The second variant of the ``powersave`` P-state selection algorithm, used in all +of the other cases (generally, on processors from the Core line, so it is +referred to as the "Core" algorithm), is based on the values read from the APERF +and MPERF feedback registers and the previously requested target P-state. +It does not really take CPU utilization into account explicitly, but as a rule +it causes the CPU P-state to ramp up very quickly in response to increased +utilization which is generally desirable in server environments. + +Regardless of the variant, this algorithm is run by the driver's utilization +update callback for the given CPU when it is invoked by the CPU scheduler, but +not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this +particular case `_). Like in the ``performance`` +case, the hardware configuration is not touched if the new P-state turns out to +be the same as the current one. + +This is the default P-state selection algorithm if the +:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option +is not set. + +Passive Mode +------------ + +This mode is used if the ``intel_pstate=passive`` argument is passed to the +kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too). +Like in the active mode without HWP support, in this mode ``intel_pstate`` may +refuse to work with the given processor if it does not recognize it. + +If the driver works in this mode, the ``scaling_driver`` policy attribute in +``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq". +Then, the driver behaves like a regular ``CPUFreq`` scaling driver. That is, +it is invoked by generic scaling governors when necessary to talk to the +hardware in order to change the P-state of a CPU (in particular, the +``schedutil`` governor can invoke it directly from scheduler context). + +While in this mode, ``intel_pstate`` can be used with all of the (generic) +scaling governors listed by the ``scaling_available_governors`` policy attribute +in ``sysfs`` (and the P-state selection algorithms described above are not +used). Then, it is responsible for the configuration of policy objects +corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling +governors attached to the policy objects) with accurate information on the +maximum and minimum operating frequencies supported by the hardware (including +the so-called "turbo" frequency ranges). In other words, in the passive mode +the entire range of available P-states is exposed by ``intel_pstate`` to the +``CPUFreq`` core. However, in this mode the driver does not register +utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq`` +information comes from the ``CPUFreq`` core (and is the last frequency selected +by the current scaling governor for the given policy). + + +.. _turbo: + +Turbo P-states Support +====================== + +In the majority of cases, the entire range of P-states available to +``intel_pstate`` can be divided into two sub-ranges that correspond to +different types of processor behavior, above and below a boundary that +will be referred to as the "turbo threshold" in what follows. + +The P-states above the turbo threshold are referred to as "turbo P-states" and +the whole sub-range of P-states they belong to is referred to as the "turbo +range". These names are related to the Turbo Boost technology allowing a +multicore processor to opportunistically increase the P-state of one or more +cores if there is enough power to do that and if that is not going to cause the +thermal envelope of the processor package to be exceeded. + +Specifically, if software sets the P-state of a CPU core within the turbo range +(that is, above the turbo threshold), the processor is permitted to take over +performance scaling control for that core and put it into turbo P-states of its +choice going forward. However, that permission is interpreted differently by +different processor generations. Namely, the Sandy Bridge generation of +processors will never use any P-states above the last one set by software for +the given core, even if it is within the turbo range, whereas all of the later +processor generations will take it as a license to use any P-states from the +turbo range, even above the one set by software. In other words, on those +processors setting any P-state from the turbo range will enable the processor +to put the given core into all turbo P-states up to and including the maximum +supported one as it sees fit. + +One important property of turbo P-states is that they are not sustainable. More +precisely, there is no guarantee that any CPUs will be able to stay in any of +those states indefinitely, because the power distribution within the processor +package may change over time or the thermal envelope it was designed for might +be exceeded if a turbo P-state was used for too long. + +In turn, the P-states below the turbo threshold generally are sustainable. In +fact, if one of them is set by software, the processor is not expected to change +it to a lower one unless in a thermal stress or a power limit violation +situation (a higher P-state may still be used if it is set for another CPU in +the same package at the same time, for example). + +Some processors allow multiple cores to be in turbo P-states at the same time, +but the maximum P-state that can be set for them generally depends on the number +of cores running concurrently. The maximum turbo P-state that can be set for 3 +cores at the same time usually is lower than the analogous maximum P-state for +2 cores, which in turn usually is lower than the maximum turbo P-state that can +be set for 1 core. The one-core maximum turbo P-state is thus the maximum +supported one overall. + +The maximum supported turbo P-state, the turbo threshold (the maximum supported +non-turbo P-state) and the minimum supported P-state are specific to the +processor model and can be determined by reading the processor's model-specific +registers (MSRs). Moreover, some processors support the Configurable TDP +(Thermal Design Power) feature and, when that feature is enabled, the turbo +threshold effectively becomes a configurable value that can be set by the +platform firmware. + +Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes +the entire range of available P-states, including the whole turbo range, to the +``CPUFreq`` core and (in the passive mode) to generic scaling governors. This +generally causes turbo P-states to be set more often when ``intel_pstate`` is +used relative to ACPI-based CPU performance scaling (see `below `_ +for more information). + +Moreover, since ``intel_pstate`` always knows what the real turbo threshold is +(even if the Configurable TDP feature is enabled in the processor), its +``no_turbo`` attribute in ``sysfs`` (described `below `_) should +work as expected in all cases (that is, if set to disable turbo P-states, it +always should prevent ``intel_pstate`` from using them). + + +Processor Support +================= + +To handle a given processor ``intel_pstate`` requires a number of different +pieces of information on it to be known, including: + + * The minimum supported P-state. + + * The maximum supported `non-turbo P-state `_. + + * Whether or not turbo P-states are supported at all. + + * The maximum supported `one-core turbo P-state `_ (if turbo P-states + are supported). + + * The scaling formula to translate the driver's internal representation + of P-states into frequencies and the other way around. + +Generally, ways to obtain that information are specific to the processor model +or family. Although it often is possible to obtain all of it from the processor +itself (using model-specific registers), there are cases in which hardware +manuals need to be consulted to get to it too. + +For this reason, there is a list of supported processors in ``intel_pstate`` and +the driver initialization will fail if the detected processor is not in that +list, unless it supports the `HWP feature `_. [The interface to +obtain all of the information listed above is the same for all of the processors +supporting the HWP feature, which is why they all are supported by +``intel_pstate``.] + + +User Space Interface in ``sysfs`` +================================= + +Global Attributes +----------------- + +``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to +control its functionality at the system level. They are located in the +``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all +CPUs. + +Some of them are not present if the ``intel_pstate=per_cpu_perf_limits`` +argument is passed to the kernel in the command line. + +``max_perf_pct`` + Maximum P-state the driver is allowed to set in percent of the + maximum supported performance level (the highest supported `turbo + P-state `_). + + This attribute will not be exposed if the + ``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel + command line. + +``min_perf_pct`` + Minimum P-state the driver is allowed to set in percent of the + maximum supported performance level (the highest supported `turbo + P-state `_). + + This attribute will not be exposed if the + ``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel + command line. + +``num_pstates`` + Number of P-states supported by the processor (between 0 and 255 + inclusive) including both turbo and non-turbo P-states (see + `Turbo P-states Support`_). + + The value of this attribute is not affected by the ``no_turbo`` + setting described `below `_. + + This attribute is read-only. + +``turbo_pct`` + Ratio of the `turbo range `_ size to the size of the entire + range of supported P-states, in percent. + + This attribute is read-only. + +.. _no_turbo_attr: + +``no_turbo`` + If set (equal to 1), the driver is not allowed to set any turbo P-states + (see `Turbo P-states Support`_). If unset (equalt to 0, which is the + default), turbo P-states can be set by the driver. + [Note that ``intel_pstate`` does not support the general ``boost`` + attribute (supported by some other scaling drivers) which is replaced + by this one.] + + This attrubute does not affect the maximum supported frequency value + supplied to the ``CPUFreq`` core and exposed via the policy interface, + but it affects the maximum possible value of per-policy P-state limits + (see `Interpretation of Policy Attributes`_ below for details). + +.. _status_attr: + +``status`` + Operation mode of the driver: "active", "passive" or "off". + + "active" + The driver is functional and in the `active mode + `_. + + "passive" + The driver is functional and in the `passive mode + `_. + + "off" + The driver is not functional (it is not registered as a scaling + driver with the ``CPUFreq`` core). + + This attribute can be written to in order to change the driver's + operation mode or to unregister it. The string written to it must be + one of the possible values of it and, if successful, the write will + cause the driver to switch over to the operation mode represented by + that string - or to be unregistered in the "off" case. [Actually, + switching over from the active mode to the passive mode or the other + way around causes the driver to be unregistered and registered again + with a different set of callbacks, so all of its settings (the global + as well as the per-policy ones) are then reset to their default + values, possibly depending on the target operation mode.] + + That only is supported in some configurations, though (for example, if + the `HWP feature is enabled in the processor `_, + the operation mode of the driver cannot be changed), and if it is not + supported in the current configuration, writes to this attribute with + fail with an appropriate error. + +Interpretation of Policy Attributes +----------------------------------- + +The interpretation of some ``CPUFreq`` policy attributes described in +:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver +and it generally depends on the driver's `operation mode `_. + +First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and +``scaling_cur_freq`` attributes are produced by applying a processor-specific +multiplier to the internal P-state representation used by ``intel_pstate``. +Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq`` +attributes are capped by the frequency corresponding to the maximum P-state that +the driver is allowed to set. + +If the ``no_turbo`` `global attribute `_ is set, the driver is +not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq`` +and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency. +Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and +``scaling_min_freq`` to go down to that value if they were above it before. +However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be +restored after unsetting ``no_turbo``, unless these attributes have been written +to after ``no_turbo`` was set. + +If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq`` +and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state, +which also is the value of ``cpuinfo_max_freq`` in either case. + +Next, the following policy attributes have special meaning if +``intel_pstate`` works in the `active mode `_: + +``scaling_available_governors`` + List of P-state selection algorithms provided by ``intel_pstate``. + +``scaling_governor`` + P-state selection algorithm provided by ``intel_pstate`` currently in + use with the given policy. + +``scaling_cur_freq`` + Frequency of the average P-state of the CPU represented by the given + policy for the time interval between the last two invocations of the + driver's utilization update callback by the CPU scheduler for that CPU. + +The meaning of these attributes in the `passive mode `_ is the +same as for other scaling drivers. + +Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate`` +depends on the operation mode of the driver. Namely, it is either +"intel_pstate" (in the `active mode `_) or "intel_cpufreq" (in the +`passive mode `_). + +Coordination of P-State Limits +------------------------------ + +``intel_pstate`` allows P-state limits to be set in two ways: with the help of +the ``max_perf_pct`` and ``min_perf_pct`` `global attributes +`_ or via the ``scaling_max_freq`` and ``scaling_min_freq`` +``CPUFreq`` policy attributes. The coordination between those limits is based +on the following rules, regardless of the current operation mode of the driver: + + 1. All CPUs are affected by the global limits (that is, none of them can be + requested to run faster than the global maximum and none of them can be + requested to run slower than the global minimum). + + 2. Each individual CPU is affected by its own per-policy limits (that is, it + cannot be requested to run faster than its own per-policy maximum and it + cannot be requested to run slower than its own per-policy minimum). + + 3. The global and per-policy limits can be set independently. + +If the `HWP feature is enabled in the processor `_, the +resulting effective values are written into its registers whenever the limits +change in order to request its internal P-state selection logic to always set +P-states within these limits. Otherwise, the limits are taken into account by +scaling governors (in the `passive mode `_) and by the driver +every time before setting a new P-state for a CPU. + +Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument +is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed +at all and the only way to set the limits is by using the policy attributes. + + +Energy vs Performance Hints +--------------------------- + +If ``intel_pstate`` works in the `active mode with the HWP feature enabled +`_ in the processor, additional attributes are present +in every ``CPUFreq`` policy directory in ``sysfs``. They are intended to allow +user space to help ``intel_pstate`` to adjust the processor's internal P-state +selection logic by focusing it on performance or on energy-efficiency, or +somewhere between the two extremes: + +``energy_performance_preference`` + Current value of the energy vs performance hint for the given policy + (or the CPU represented by it). + + The hint can be changed by writing to this attribute. + +``energy_performance_available_preferences`` + List of strings that can be written to the + ``energy_performance_preference`` attribute. + + They represent different energy vs performance hints and should be + self-explanatory, except that ``default`` represents whatever hint + value was set by the platform firmware. + +Strings written to the ``energy_performance_preference`` attribute are +internally translated to integer values written to the processor's +Energy-Performance Preference (EPP) knob (if supported) or its +Energy-Performance Bias (EPB) knob. + +[Note that tasks may by migrated from one CPU to another by the scheduler's +load-balancing algorithm and if different energy vs performance hints are +set for those CPUs, that may lead to undesirable outcomes. To avoid such +issues it is better to set the same energy vs performance hint for all CPUs +or to pin every task potentially sensitive to them to a specific CPU.] + +.. _acpi-cpufreq: + +``intel_pstate`` vs ``acpi-cpufreq`` +==================================== + +On the majority of systems supported by ``intel_pstate``, the ACPI tables +provided by the platform firmware contain ``_PSS`` objects returning information +that can be used for CPU performance scaling (refer to the `ACPI specification`_ +for details on the ``_PSS`` objects and the format of the information returned +by them). + +The information returned by the ACPI ``_PSS`` objects is used by the +``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate`` +the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling +interface, but the set of P-states it can use is limited by the ``_PSS`` +output. + +On those systems each ``_PSS`` object returns a list of P-states supported by +the corresponding CPU which basically is a subset of the P-states range that can +be used by ``intel_pstate`` on the same system, with one exception: the whole +`turbo range `_ is represented by one item in it (the topmost one). By +convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz +than the frequency of the highest non-turbo P-state listed by it, but the +corresponding P-state representation (following the hardware specification) +returned for it matches the maximum supported turbo P-state (or is the +special value 255 meaning essentially "go as high as you can get"). + +The list of P-states returned by ``_PSS`` is reflected by the table of +available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and +scaling governors and the minimum and maximum supported frequencies reported by +it come from that list as well. In particular, given the special representation +of the turbo range described above, this means that the maximum supported +frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency +of the highest supported non-turbo P-state listed by ``_PSS`` which, of course, +affects decisions made by the scaling governors, except for ``powersave`` and +``performance``. + +For example, if a given governor attempts to select a frequency proportional to +estimated CPU load and maps the load of 100% to the maximum supported frequency +(possibly multiplied by a constant), then it will tend to choose P-states below +the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because +in that case the turbo range corresponds to a small fraction of the frequency +band it can use (1 MHz vs 1 GHz or more). In consequence, it will only go to +the turbo range for the highest loads and the other loads above 50% that might +benefit from running at turbo frequencies will be given non-turbo P-states +instead. + +One more issue related to that may appear on systems supporting the +`Configurable TDP feature `_ allowing the platform firmware to set the +turbo threshold. Namely, if that is not coordinated with the lists of P-states +returned by ``_PSS`` properly, there may be more than one item corresponding to +a turbo P-state in those lists and there may be a problem with avoiding the +turbo range (if desirable or necessary). Usually, to avoid using turbo +P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed +by ``_PSS``, but that is not sufficient when there are other turbo P-states in +the list returned by it. + +Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the +`passive mode `_, except that the number of P-states it can set +is limited to the ones listed by the ACPI ``_PSS`` objects. + + +Kernel Command Line Options for ``intel_pstate`` +================================================ + +Several kernel command line options can be used to pass early-configuration-time +parameters to ``intel_pstate`` in order to enforce specific behavior of it. All +of them have to be prepended with the ``intel_pstate=`` prefix. + +``disable`` + Do not register ``intel_pstate`` as the scaling driver even if the + processor is supported by it. + +``passive`` + Register ``intel_pstate`` in the `passive mode `_ to + start with. + + This option implies the ``no_hwp`` one described below. + +``force`` + Register ``intel_pstate`` as the scaling driver instead of + ``acpi-cpufreq`` even if the latter is preferred on the given system. + + This may prevent some platform features (such as thermal controls and + power capping) that rely on the availability of ACPI P-states + information from functioning as expected, so it should be used with + caution. + + This option does not work with processors that are not supported by + ``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling + driver is used instead of ``acpi-cpufreq``. + +``no_hwp`` + Do not enable the `hardware-managed P-states (HWP) feature + `_ even if it is supported by the processor. + +``hwp_only`` + Register ``intel_pstate`` as the scaling driver only if the + `hardware-managed P-states (HWP) feature `_ is + supported by the processor. + +``support_acpi_ppc`` + Take ACPI ``_PPC`` performance limits into account. + + If the preferred power management profile in the FADT (Fixed ACPI + Description Table) is set to "Enterprise Server" or "Performance + Server", the ACPI ``_PPC`` limits are taken into account by default + and this option has no effect. + +``per_cpu_perf_limits`` + Use per-logical-CPU P-State limits (see `Coordination of P-state + Limits`_ for details). + + +Diagnostics and Tuning +====================== + +Trace Events +------------ + +There are two static trace events that can be used for ``intel_pstate`` +diagnostics. One of them is the ``cpu_frequency`` trace event generally used +by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific +to ``intel_pstate``. Both of them are triggered by ``intel_pstate`` only if +it works in the `active mode `_. + +The following sequence of shell commands can be used to enable them and see +their output (if the kernel is generally configured to support event tracing):: + + # cd /sys/kernel/debug/tracing/ + # echo 1 > events/power/pstate_sample/enable + # echo 1 > events/power/cpu_frequency/enable + # cat trace + gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476 + cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2 + +If ``intel_pstate`` works in the `passive mode `_, the +``cpu_frequency`` trace event will be triggered either by the ``schedutil`` +scaling governor (for the policies it is attached to), or by the ``CPUFreq`` +core (for the policies with other scaling governors). + +``ftrace`` +---------- + +The ``ftrace`` interface can be used for low-level diagnostics of +``intel_pstate``. For example, to check how often the function to set a +P-state is called, the ``ftrace`` filter can be set to to +:c:func:`intel_pstate_set_pstate`:: + + # cd /sys/kernel/debug/tracing/ + # cat available_filter_functions | grep -i pstate + intel_pstate_set_pstate + intel_pstate_cpu_init + ... + # echo intel_pstate_set_pstate > set_ftrace_filter + # echo function > current_tracer + # cat trace | head -15 + # tracer: function + # + # entries-in-buffer/entries-written: 80/80 #P:4 + # + # _-----=> irqs-off + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / delay + # TASK-PID CPU# |||| TIMESTAMP FUNCTION + # | | | |||| | | + Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func + gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func + -0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func + +Tuning Interface in ``debugfs`` +------------------------------- + +The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of +processors in the active mode `_ is based on a `PID controller`_ +whose parameters were chosen to address a number of different use cases at the +same time. However, it still is possible to fine-tune it to a specific workload +and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is +provided for this purpose. [Note that the ``pstate_snb`` directory will be +present only if the specific P-state selection algorithm matching the interface +in it actually is in use.] + +The following files present in that directory can be used to modify the PID +controller parameters at run time: + +| ``deadband`` +| ``d_gain_pct`` +| ``i_gain_pct`` +| ``p_gain_pct`` +| ``sample_rate_ms`` +| ``setpoint`` + +Note, however, that achieving desirable results this way generally requires +expert-level understanding of the power vs performance tradeoff, so extra care +is recommended when attempting to do that. + + +.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf +.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html +.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf +.. _PID controller: https://en.wikipedia.org/wiki/PID_controller diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst index 1b90c6f00a9290b14e365c5e07eed13b57942ac0..8c7bbf2c88d23d2c690ecfbde43e6b309f796803 100644 --- a/Documentation/admin-guide/ras.rst +++ b/Documentation/admin-guide/ras.rst @@ -8,7 +8,7 @@ RAS concepts ************ Reliability, Availability and Serviceability (RAS) is a concept used on -servers meant to measure their robusteness. +servers meant to measure their robustness. Reliability is the probability that a system will produce correct outputs. @@ -42,13 +42,13 @@ Among the monitoring measures, the most usual ones include: * CPU – detect errors at instruction execution and at L1/L2/L3 caches; * Memory – add error correction logic (ECC) to detect and correct errors; -* I/O – add CRC checksums for tranfered data; +* I/O – add CRC checksums for transferred data; * Storage – RAID, journal file systems, checksums, Self-Monitoring, Analysis and Reporting Technology (SMART). By monitoring the number of occurrences of error detections, it is possible to identify if the probability of hardware errors is increasing, and, on such -case, do a preventive maintainance to replace a degrated component while +case, do a preventive maintenance to replace a degraded component while those errors are correctable. Types of errors @@ -121,7 +121,7 @@ using the ``dmidecode`` tool. For example, on a desktop machine, it shows:: On the above example, a DDR4 SO-DIMM memory module is located at the system's memory labeled as "BANK 0", as given by the *bank locator* field. Please notice that, on such system, the *total width* is equal to the -*data witdh*. It means that such memory module doesn't have error +*data width*. It means that such memory module doesn't have error detection/correction mechanisms. Unfortunately, not all systems use the same field to specify the memory @@ -145,7 +145,7 @@ bank. On this example, from an older server, ``dmidecode`` shows:: There, the DDR3 RDIMM memory module is located at the system's memory labeled as "DIMM_A1", as given by the *locator* field. Please notice that this -memory module has 64 bits of *data witdh* and 72 bits of *total width*. So, +memory module has 64 bits of *data width* and 72 bits of *total width*. So, it has 8 extra bits to be used by error detection and correction mechanisms. Such kind of memory is called Error-correcting code memory (ECC memory). @@ -186,7 +186,7 @@ Architecture (MCA)\ [#f3]_. .. [#f1] Please notice that several memory controllers allow operation on a mode called "Lock-Step", where it groups two memory modules together, doing 128-bit reads/writes. That gives 16 bits for error correction, with - significatively improves the error correction mechanism, at the expense + significantly improves the error correction mechanism, at the expense that, when an error happens, there's no way to know what memory module is to blame. So, it has to blame both memory modules. diff --git a/Documentation/admin-guide/sysrq.rst b/Documentation/admin-guide/sysrq.rst index d1712ea2d314da0f0405ec00f5ad410f1014b2db..7b9035c01a2e1c01d68336d891402f54362cdb13 100644 --- a/Documentation/admin-guide/sysrq.rst +++ b/Documentation/admin-guide/sysrq.rst @@ -212,7 +212,8 @@ I hit SysRq, but nothing seems to happen, what's wrong? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are some keyboards that produce a different keycode for SysRq than the -pre-defined value of 99 (see ``KEY_SYSRQ`` in ``include/linux/input.h``), or +pre-defined value of 99 +(see ``KEY_SYSRQ`` in ``include/uapi/linux/input-event-codes.h``), or which don't have a SysRq key at all. In these cases, run ``showkey -s`` to find an appropriate scancode sequence, and use ``setkeycodes 99`` to map this sequence to the usual SysRq code (e.g., ``setkeycodes e05b 99``). It's diff --git a/Documentation/arm/mem_alignment b/Documentation/arm/mem_alignment index c7c7a114c78c795c702f8543c57c7558466a9a4b..6335fcacbba986df418c1665410617d7b662398c 100644 --- a/Documentation/arm/mem_alignment +++ b/Documentation/arm/mem_alignment @@ -48,7 +48,7 @@ Note that not all combinations are supported - only values 0 through 5. For example, the following will turn on the warnings, but without fixing up or sending SIGBUS signals: - echo 1 > /proc/sys/debug/alignment + echo 1 > /proc/cpu/alignment You can also read the content of the same file to get statistical information on unaligned access occurrences plus the current mode of diff --git a/Documentation/arm/stm32/stm32h743-overview.txt b/Documentation/arm/stm32/stm32h743-overview.txt new file mode 100644 index 0000000000000000000000000000000000000000..3031cbae31a506b4ab2305d29f45d6550dc2d041 --- /dev/null +++ b/Documentation/arm/stm32/stm32h743-overview.txt @@ -0,0 +1,30 @@ + STM32H743 Overview + ================== + + Introduction + ------------ + The STM32H743 is a Cortex-M7 MCU aimed at various applications. + It features: + - Cortex-M7 core running up to @400MHz + - 2MB internal flash, 1MBytes internal RAM + - FMC controller to connect SDRAM, NOR and NAND memories + - Dual mode QSPI + - SD/MMC/SDIO support + - Ethernet controller + - USB OTFG FS & HS controllers + - I2C, SPI, CAN busses support + - Several 16 & 32 bits general purpose timers + - Serial Audio interface + - LCD controller + - HDMI-CEC + - SPDIFRX + - DFSDM + + Resources + --------- + Datasheet and reference manual are publicly available on ST website: + - http://www.st.com/en/microcontrollers/stm32h7x3.html?querycriteria=productId=LN2033 + + Document Author + --------------- + Alexandre Torgue diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index 61ca21ebef1a27ea8d633c56e9afef45224f776d..d1c97f9f51cc3bd3fd21ad181253bb354e7771b1 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -169,6 +169,18 @@ infrastructure: as available on the CPU where it is fetched and is not a system wide safe value. + 4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | LRCPC | [23-20] | y | + |--------------------------------------------------| + | FCMA | [19-16] | y | + |--------------------------------------------------| + | JSCVT | [15-12] | y | + x--------------------------------------------------x + Appendix I: Example --------------------------- diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 2f66683500b8e44e0ceb44bc877acccae39b35d7..10f2dddbf449475ae5bdc79945330d09cf5f6683 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -54,6 +54,7 @@ stable kernels. | ARM | Cortex-A57 | #852523 | N/A | | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | | ARM | Cortex-A72 | #853709 | N/A | +| ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt index d9995f1f51b3eb9e678a557a471e4d387db49ca8..a25a99e82bb1c9811ee3e8cb111d356fa53fc979 100644 --- a/Documentation/arm64/tagged-pointers.txt +++ b/Documentation/arm64/tagged-pointers.txt @@ -11,24 +11,56 @@ in AArch64 Linux. The kernel configures the translation tables so that translations made via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of the virtual address ignored by the translation hardware. This frees up -this byte for application use, with the following caveats: +this byte for application use. - (1) The kernel requires that all user addresses passed to EL1 - are tagged with tag 0x00. This means that any syscall - parameters containing user virtual addresses *must* have - their top byte cleared before trapping to the kernel. - (2) Non-zero tags are not preserved when delivering signals. - This means that signal handlers in applications making use - of tags cannot rely on the tag information for user virtual - addresses being maintained for fields inside siginfo_t. - One exception to this rule is for signals raised in response - to watchpoint debug exceptions, where the tag information - will be preserved. +Passing tagged addresses to the kernel +-------------------------------------- - (3) Special care should be taken when using tagged pointers, - since it is likely that C compilers will not hazard two - virtual addresses differing only in the upper byte. +All interpretation of userspace memory addresses by the kernel assumes +an address tag of 0x00. + +This includes, but is not limited to, addresses found in: + + - pointer arguments to system calls, including pointers in structures + passed to system calls, + + - the stack pointer (sp), e.g. when interpreting it to deliver a + signal, + + - the frame pointer (x29) and frame records, e.g. when interpreting + them to generate a backtrace or call graph. + +Using non-zero address tags in any of these locations may result in an +error code being returned, a (fatal) signal being raised, or other modes +of failure. + +For these reasons, passing non-zero address tags to the kernel via +system calls is forbidden, and using a non-zero address tag for sp is +strongly discouraged. + +Programs maintaining a frame pointer and frame records that use non-zero +address tags may suffer impaired or inaccurate debug and profiling +visibility. + + +Preserving tags +--------------- + +Non-zero tags are not preserved when delivering signals. This means that +signal handlers in applications making use of tags cannot rely on the +tag information for user virtual addresses being maintained for fields +inside siginfo_t. One exception to this rule is for signals raised in +response to watchpoint debug exceptions, where the tag information will +be preserved. The architecture prevents the use of a tagged PC, so the upper byte will be set to a sign-extension of bit 55 on exception return. + + +Other considerations +-------------------- + +Special care should be taken when using tagged pointers, since it is +likely that C compilers will not hazard two virtual addresses differing +only in the upper byte. diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index e55103ace382a093050ab70f5e0a6b92e4faa89a..8d55b4bbb5e2ef03344f3f4f8e920ad01f7d0e04 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -1,5 +1,7 @@ 00-INDEX - This file +bfq-iosched.txt + - BFQ IO scheduler and its tunables biodoc.txt - Notes on the Generic Block Layer Rewrite in Linux 2.5 biovecs.txt diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt new file mode 100644 index 0000000000000000000000000000000000000000..05e2822a80b34d0f31649cfb486ade2b4679f9e8 --- /dev/null +++ b/Documentation/block/bfq-iosched.txt @@ -0,0 +1,546 @@ +BFQ (Budget Fair Queueing) +========================== + +BFQ is a proportional-share I/O scheduler, with some extra +low-latency capabilities. In addition to cgroups support (blkio or io +controllers), BFQ's main features are: +- BFQ guarantees a high system and application responsiveness, and a + low latency for time-sensitive applications, such as audio or video + players; +- BFQ distributes bandwidth, and not just time, among processes or + groups (switching back to time distribution when needed to keep + throughput high). + +In its default configuration, BFQ privileges latency over +throughput. So, when needed for achieving a lower latency, BFQ builds +schedules that may lead to a lower throughput. If your main or only +goal, for a given device, is to achieve the maximum-possible +throughput at all times, then do switch off all low-latency heuristics +for that device, by setting low_latency to 0. Full details in Section 3. + +On average CPUs, the current version of BFQ can handle devices +performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a +reference, 30-50 KIOPS correspond to very high bandwidths with +sequential I/O (e.g., 8-12 GB/s if I/O requests are 256 KB large), and +to 120-200 MB/s with 4KB random I/O. BFQ has not yet been tested on +multi-queue devices. + +The table of contents follow. Impatients can just jump to Section 3. + +CONTENTS + +1. When may BFQ be useful? + 1-1 Personal systems + 1-2 Server systems +2. How does BFQ work? +3. What are BFQ's tunable? +4. BFQ group scheduling + 4-1 Service guarantees provided + 4-2 Interface + +1. When may BFQ be useful? +========================== + +BFQ provides the following benefits on personal and server systems. + +1-1 Personal systems +-------------------- + +Low latency for interactive applications + +Regardless of the actual background workload, BFQ guarantees that, for +interactive tasks, the storage device is virtually as responsive as if +it was idle. For example, even if one or more of the following +background workloads are being executed: +- one or more large files are being read, written or copied, +- a tree of source files is being compiled, +- one or more virtual machines are performing I/O, +- a software update is in progress, +- indexing daemons are scanning filesystems and updating their + databases, +starting an application or loading a file from within an application +takes about the same time as if the storage device was idle. As a +comparison, with CFQ, NOOP or DEADLINE, and in the same conditions, +applications experience high latencies, or even become unresponsive +until the background workload terminates (also on SSDs). + +Low latency for soft real-time applications + +Also soft real-time applications, such as audio and video +players/streamers, enjoy a low latency and a low drop rate, regardless +of the background I/O workload. As a consequence, these applications +do not suffer from almost any glitch due to the background workload. + +Higher speed for code-development tasks + +If some additional workload happens to be executed in parallel, then +BFQ executes the I/O-related components of typical code-development +tasks (compilation, checkout, merge, ...) much more quickly than CFQ, +NOOP or DEADLINE. + +High throughput + +On hard disks, BFQ achieves up to 30% higher throughput than CFQ, and +up to 150% higher throughput than DEADLINE and NOOP, with all the +sequential workloads considered in our tests. With random workloads, +and with all the workloads on flash-based devices, BFQ achieves, +instead, about the same throughput as the other schedulers. + +Strong fairness, bandwidth and delay guarantees + +BFQ distributes the device throughput, and not just the device time, +among I/O-bound applications in proportion their weights, with any +workload and regardless of the device parameters. From these bandwidth +guarantees, it is possible to compute tight per-I/O-request delay +guarantees by a simple formula. If not configured for strict service +guarantees, BFQ switches to time-based resource sharing (only) for +applications that would otherwise cause a throughput loss. + +1-2 Server systems +------------------ + +Most benefits for server systems follow from the same service +properties as above. In particular, regardless of whether additional, +possibly heavy workloads are being served, BFQ guarantees: + +. audio and video-streaming with zero or very low jitter and drop + rate; + +. fast retrieval of WEB pages and embedded objects; + +. real-time recording of data in live-dumping applications (e.g., + packet logging); + +. responsiveness in local and remote access to a server. + + +2. How does BFQ work? +===================== + +BFQ is a proportional-share I/O scheduler, whose general structure, +plus a lot of code, are borrowed from CFQ. + +- Each process doing I/O on a device is associated with a weight and a + (bfq_)queue. + +- BFQ grants exclusive access to the device, for a while, to one queue + (process) at a time, and implements this service model by + associating every queue with a budget, measured in number of + sectors. + + - After a queue is granted access to the device, the budget of the + queue is decremented, on each request dispatch, by the size of the + request. + + - The in-service queue is expired, i.e., its service is suspended, + only if one of the following events occurs: 1) the queue finishes + its budget, 2) the queue empties, 3) a "budget timeout" fires. + + - The budget timeout prevents processes doing random I/O from + holding the device for too long and dramatically reducing + throughput. + + - Actually, as in CFQ, a queue associated with a process issuing + sync requests may not be expired immediately when it empties. In + contrast, BFQ may idle the device for a short time interval, + giving the process the chance to go on being served if it issues + a new request in time. Device idling typically boosts the + throughput on rotational devices, if processes do synchronous + and sequential I/O. In addition, under BFQ, device idling is + also instrumental in guaranteeing the desired throughput + fraction to processes issuing sync requests (see the description + of the slice_idle tunable in this document, or [1, 2], for more + details). + + - With respect to idling for service guarantees, if several + processes are competing for the device at the same time, but + all processes (and groups, after the following commit) have + the same weight, then BFQ guarantees the expected throughput + distribution without ever idling the device. Throughput is + thus as high as possible in this common scenario. + + - If low-latency mode is enabled (default configuration), BFQ + executes some special heuristics to detect interactive and soft + real-time applications (e.g., video or audio players/streamers), + and to reduce their latency. The most important action taken to + achieve this goal is to give to the queues associated with these + applications more than their fair share of the device + throughput. For brevity, we call just "weight-raising" the whole + sets of actions taken by BFQ to privilege these queues. In + particular, BFQ provides a milder form of weight-raising for + interactive applications, and a stronger form for soft real-time + applications. + + - BFQ automatically deactivates idling for queues born in a burst of + queue creations. In fact, these queues are usually associated with + the processes of applications and services that benefit mostly + from a high throughput. Examples are systemd during boot, or git + grep. + + - As CFQ, BFQ merges queues performing interleaved I/O, i.e., + performing random I/O that becomes mostly sequential if + merged. Differently from CFQ, BFQ achieves this goal with a more + reactive mechanism, called Early Queue Merge (EQM). EQM is so + responsive in detecting interleaved I/O (cooperating processes), + that it enables BFQ to achieve a high throughput, by queue + merging, even for queues for which CFQ needs a different + mechanism, preemption, to get a high throughput. As such EQM is a + unified mechanism to achieve a high throughput with interleaved + I/O. + + - Queues are scheduled according to a variant of WF2Q+, named + B-WF2Q+, and implemented using an augmented rb-tree to preserve an + O(log N) overall complexity. See [2] for more details. B-WF2Q+ is + also ready for hierarchical scheduling. However, for a cleaner + logical breakdown, the code that enables and completes + hierarchical support is provided in the next commit, which focuses + exactly on this feature. + + - B-WF2Q+ guarantees a tight deviation with respect to an ideal, + perfectly fair, and smooth service. In particular, B-WF2Q+ + guarantees that each queue receives a fraction of the device + throughput proportional to its weight, even if the throughput + fluctuates, and regardless of: the device parameters, the current + workload and the budgets assigned to the queue. + + - The last, budget-independence, property (although probably + counterintuitive in the first place) is definitely beneficial, for + the following reasons: + + - First, with any proportional-share scheduler, the maximum + deviation with respect to an ideal service is proportional to + the maximum budget (slice) assigned to queues. As a consequence, + BFQ can keep this deviation tight not only because of the + accurate service of B-WF2Q+, but also because BFQ *does not* + need to assign a larger budget to a queue to let the queue + receive a higher fraction of the device throughput. + + - Second, BFQ is free to choose, for every process (queue), the + budget that best fits the needs of the process, or best + leverages the I/O pattern of the process. In particular, BFQ + updates queue budgets with a simple feedback-loop algorithm that + allows a high throughput to be achieved, while still providing + tight latency guarantees to time-sensitive applications. When + the in-service queue expires, this algorithm computes the next + budget of the queue so as to: + + - Let large budgets be eventually assigned to the queues + associated with I/O-bound applications performing sequential + I/O: in fact, the longer these applications are served once + got access to the device, the higher the throughput is. + + - Let small budgets be eventually assigned to the queues + associated with time-sensitive applications (which typically + perform sporadic and short I/O), because, the smaller the + budget assigned to a queue waiting for service is, the sooner + B-WF2Q+ will serve that queue (Subsec 3.3 in [2]). + +- If several processes are competing for the device at the same time, + but all processes and groups have the same weight, then BFQ + guarantees the expected throughput distribution without ever idling + the device. It uses preemption instead. Throughput is then much + higher in this common scenario. + +- ioprio classes are served in strict priority order, i.e., + lower-priority queues are not served as long as there are + higher-priority queues. Among queues in the same class, the + bandwidth is distributed in proportion to the weight of each + queue. A very thin extra bandwidth is however guaranteed to + the Idle class, to prevent it from starving. + + +3. What are BFQ's tunable? +========================== + +The tunables back_seek-max, back_seek_penalty, fifo_expire_async and +fifo_expire_sync below are the same as in CFQ. Their description is +just copied from that for CFQ. Some considerations in the description +of slice_idle are copied from CFQ too. + +per-process ioprio and weight +----------------------------- + +Unless the cgroups interface is used (see "4. BFQ group scheduling"), +weights can be assigned to processes only indirectly, through I/O +priorities, and according to the relation: +weight = (IOPRIO_BE_NR - ioprio) * 10. + +Beware that, if low-latency is set, then BFQ automatically raises the +weight of the queues associated with interactive and soft real-time +applications. Unset this tunable if you need/want to control weights. + +slice_idle +---------- + +This parameter specifies how long BFQ should idle for next I/O +request, when certain sync BFQ queues become empty. By default +slice_idle is a non-zero value. Idling has a double purpose: boosting +throughput and making sure that the desired throughput distribution is +respected (see the description of how BFQ works, and, if needed, the +papers referred there). + +As for throughput, idling can be very helpful on highly seeky media +like single spindle SATA/SAS disks where we can cut down on overall +number of seeks and see improved throughput. + +Setting slice_idle to 0 will remove all the idling on queues and one +should see an overall improved throughput on faster storage devices +like multiple SATA/SAS disks in hardware RAID configuration. + +So depending on storage and workload, it might be useful to set +slice_idle=0. In general for SATA/SAS disks and software RAID of +SATA/SAS disks keeping slice_idle enabled should be useful. For any +configurations where there are multiple spindles behind single LUN +(Host based hardware RAID controller or for storage arrays), setting +slice_idle=0 might end up in better throughput and acceptable +latencies. + +Idling is however necessary to have service guarantees enforced in +case of differentiated weights or differentiated I/O-request lengths. +To see why, suppose that a given BFQ queue A must get several I/O +requests served for each request served for another queue B. Idling +ensures that, if A makes a new I/O request slightly after becoming +empty, then no request of B is dispatched in the middle, and thus A +does not lose the possibility to get more than one request dispatched +before the next request of B is dispatched. Note that idling +guarantees the desired differentiated treatment of queues only in +terms of I/O-request dispatches. To guarantee that the actual service +order then corresponds to the dispatch order, the strict_guarantees +tunable must be set too. + +There is an important flipside for idling: apart from the above cases +where it is beneficial also for throughput, idling can severely impact +throughput. One important case is random workload. Because of this +issue, BFQ tends to avoid idling as much as possible, when it is not +beneficial also for throughput. As a consequence of this behavior, and +of further issues described for the strict_guarantees tunable, +short-term service guarantees may be occasionally violated. And, in +some cases, these guarantees may be more important than guaranteeing +maximum throughput. For example, in video playing/streaming, a very +low drop rate may be more important than maximum throughput. In these +cases, consider setting the strict_guarantees parameter. + +strict_guarantees +----------------- + +If this parameter is set (default: unset), then BFQ + +- always performs idling when the in-service queue becomes empty; + +- forces the device to serve one I/O request at a time, by dispatching a + new request only if there is no outstanding request. + +In the presence of differentiated weights or I/O-request sizes, both +the above conditions are needed to guarantee that every BFQ queue +receives its allotted share of the bandwidth. The first condition is +needed for the reasons explained in the description of the slice_idle +tunable. The second condition is needed because all modern storage +devices reorder internally-queued requests, which may trivially break +the service guarantees enforced by the I/O scheduler. + +Setting strict_guarantees may evidently affect throughput. + +back_seek_max +------------- + +This specifies, given in Kbytes, the maximum "distance" for backward seeking. +The distance is the amount of space from the current head location to the +sectors that are backward in terms of distance. + +This parameter allows the scheduler to anticipate requests in the "backward" +direction and consider them as being the "next" if they are within this +distance from the current head location. + +back_seek_penalty +----------------- + +This parameter is used to compute the cost of backward seeking. If the +backward distance of request is just 1/back_seek_penalty from a "front" +request, then the seeking cost of two requests is considered equivalent. + +So scheduler will not bias toward one or the other request (otherwise scheduler +will bias toward front request). Default value of back_seek_penalty is 2. + +fifo_expire_async +----------------- + +This parameter is used to set the timeout of asynchronous requests. Default +value of this is 248ms. + +fifo_expire_sync +---------------- + +This parameter is used to set the timeout of synchronous requests. Default +value of this is 124ms. In case to favor synchronous requests over asynchronous +one, this value should be decreased relative to fifo_expire_async. + +low_latency +----------- + +This parameter is used to enable/disable BFQ's low latency mode. By +default, low latency mode is enabled. If enabled, interactive and soft +real-time applications are privileged and experience a lower latency, +as explained in more detail in the description of how BFQ works. + +DISABLE this mode if you need full control on bandwidth +distribution. In fact, if it is enabled, then BFQ automatically +increases the bandwidth share of privileged applications, as the main +means to guarantee a lower latency to them. + +In addition, as already highlighted at the beginning of this document, +DISABLE this mode if your only goal is to achieve a high throughput. +In fact, privileging the I/O of some application over the rest may +entail a lower throughput. To achieve the highest-possible throughput +on a non-rotational device, setting slice_idle to 0 may be needed too +(at the cost of giving up any strong guarantee on fairness and low +latency). + +timeout_sync +------------ + +Maximum amount of device time that can be given to a task (queue) once +it has been selected for service. On devices with costly seeks, +increasing this time usually increases maximum throughput. On the +opposite end, increasing this time coarsens the granularity of the +short-term bandwidth and latency guarantees, especially if the +following parameter is set to zero. + +max_budget +---------- + +Maximum amount of service, measured in sectors, that can be provided +to a BFQ queue once it is set in service (of course within the limits +of the above timeout). According to what said in the description of +the algorithm, larger values increase the throughput in proportion to +the percentage of sequential I/O requests issued. The price of larger +values is that they coarsen the granularity of short-term bandwidth +and latency guarantees. + +The default value is 0, which enables auto-tuning: BFQ sets max_budget +to the maximum number of sectors that can be served during +timeout_sync, according to the estimated peak rate. + +weights +------- + +Read-only parameter, used to show the weights of the currently active +BFQ queues. + + +wr_ tunables +------------ + +BFQ exports a few parameters to control/tune the behavior of +low-latency heuristics. + +wr_coeff + +Factor by which the weight of a weight-raised queue is multiplied. If +the queue is deemed soft real-time, then the weight is further +multiplied by an additional, constant factor. + +wr_max_time + +Maximum duration of a weight-raising period for an interactive task +(ms). If set to zero (default value), then this value is computed +automatically, as a function of the peak rate of the device. In any +case, when the value of this parameter is read, it always reports the +current duration, regardless of whether it has been set manually or +computed automatically. + +wr_max_softrt_rate + +Maximum service rate below which a queue is deemed to be associated +with a soft real-time application, and is then weight-raised +accordingly (sectors/sec). + +wr_min_idle_time + +Minimum idle period after which interactive weight-raising may be +reactivated for a queue (in ms). + +wr_rt_max_time + +Maximum weight-raising duration for soft real-time queues (in ms). The +start time from which this duration is considered is automatically +moved forward if the queue is detected to be still soft real-time +before the current soft real-time weight-raising period finishes. + +wr_min_inter_arr_async + +Minimum period between I/O request arrivals after which weight-raising +may be reactivated for an already busy async queue (in ms). + + +4. Group scheduling with BFQ +============================ + +BFQ supports both cgroups-v1 and cgroups-v2 io controllers, namely +blkio and io. In particular, BFQ supports weight-based proportional +share. To activate cgroups support, set BFQ_GROUP_IOSCHED. + +4-1 Service guarantees provided +------------------------------- + +With BFQ, proportional share means true proportional share of the +device bandwidth, according to group weights. For example, a group +with weight 200 gets twice the bandwidth, and not just twice the time, +of a group with weight 100. + +BFQ supports hierarchies (group trees) of any depth. Bandwidth is +distributed among groups and processes in the expected way: for each +group, the children of the group share the whole bandwidth of the +group in proportion to their weights. In particular, this implies +that, for each leaf group, every process of the group receives the +same share of the whole group bandwidth, unless the ioprio of the +process is modified. + +The resource-sharing guarantee for a group may partially or totally +switch from bandwidth to time, if providing bandwidth guarantees to +the group lowers the throughput too much. This switch occurs on a +per-process basis: if a process of a leaf group causes throughput loss +if served in such a way to receive its share of the bandwidth, then +BFQ switches back to just time-based proportional share for that +process. + +4-2 Interface +------------- + +To get proportional sharing of bandwidth with BFQ for a given device, +BFQ must of course be the active scheduler for that device. + +Within each group directory, the names of the files associated with +BFQ-specific cgroup parameters and stats begin with the "bfq." +prefix. So, with cgroups-v1 or cgroups-v2, the full prefix for +BFQ-specific files is "blkio.bfq." or "io.bfq." For example, the group +parameter to set the weight of a group with BFQ is blkio.bfq.weight +or io.bfq.weight. + +Parameters to set +----------------- + +For each group, there is only the following parameter to set. + +weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the +group inside its parent. Available values: 1..10000 (default 100). The +linear mapping between ioprio and weights, described at the beginning +of the tunable section, is still valid, but all weights higher than +IOPRIO_BE_NR*10 are mapped to ioprio 0. + +Recall that, if low-latency is set, then BFQ automatically raises the +weight of the queues associated with interactive and soft real-time +applications. Unset this tunable if you need/want to control weights. + + +[1] P. Valente, A. Avanzini, "Evolution of the BFQ Storage I/O + Scheduler", Proceedings of the First Workshop on Mobile System + Technologies (MST-2015), May 2015. + http://algogroup.unimore.it/people/paolo/disk_sched/mst-2015.pdf + +[2] P. Valente and M. Andreolini, "Improving Application + Responsiveness with the BFQ Disk I/O Scheduler", Proceedings of + the 5th Annual International Systems and Storage Conference + (SYSTOR '12), June 2012. + Slightly extended version: + http://algogroup.unimore.it/people/paolo/disk_sched/bfq-v1-suite- + results.pdf diff --git a/Documentation/block/kyber-iosched.txt b/Documentation/block/kyber-iosched.txt new file mode 100644 index 0000000000000000000000000000000000000000..e94feacd7edcd7d8156ea6a5702e1fe781164bb1 --- /dev/null +++ b/Documentation/block/kyber-iosched.txt @@ -0,0 +1,14 @@ +Kyber I/O scheduler tunables +=========================== + +The only two tunables for the Kyber scheduler are the target latencies for +reads and synchronous writes. Kyber will throttle requests in order to meet +these target latencies. + +read_lat_nsec +------------- +Target latency for reads (in nanoseconds). + +write_lat_nsec +-------------- +Target latency for synchronous writes (in nanoseconds). diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index c0a3bb5a6e4eb291d077f10633001c439563ccc2..2c1e67058fd3bdf02336a71dc34ec885b9b84c2d 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -43,11 +43,6 @@ large discards are issued, setting this value lower will make Linux issue smaller discards and potentially help reduce latencies induced by large discard operations. -discard_zeroes_data (RO) ------------------------- -When read, this file will show if the discarded block are zeroed by the -device or not. If its value is '1' the blocks are zeroed otherwise not. - hw_sector_size (RO) ------------------- This is the hardware sector size of the device, in bytes. @@ -192,5 +187,11 @@ scaling back writes. Writing a value of '0' to this file disables the feature. Writing a value of '-1' to this file resets the value to the default setting. +throttle_sample_time (RW) +------------------------- +This is the time window that blk-throttle samples data, in millisecond. +blk-throttle makes decision based on the samplings. Lower time means cgroups +have more smooth throughput, but higher CPU overhead. This exists only when +CONFIG_BLK_DEV_THROTTLING_LOW is enabled. Jens Axboe , February 2009 diff --git a/Documentation/blockdev/mflash.txt b/Documentation/blockdev/mflash.txt deleted file mode 100644 index f7e05055148715f1eca44617fecaf20193c799dd..0000000000000000000000000000000000000000 --- a/Documentation/blockdev/mflash.txt +++ /dev/null @@ -1,84 +0,0 @@ -This document describes m[g]flash support in linux. - -Contents - 1. Overview - 2. Reserved area configuration - 3. Example of mflash platform driver registration - -1. Overview - -Mflash and gflash are embedded flash drive. The only difference is mflash is -MCP(Multi Chip Package) device. These two device operate exactly same way. -So the rest mflash repersents mflash and gflash altogether. - -Internally, mflash has nand flash and other hardware logics and supports -2 different operation (ATA, IO) modes. ATA mode doesn't need any new -driver and currently works well under standard IDE subsystem. Actually it's -one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have -IDE interface. - -Following are brief descriptions about IO mode. -A. IO mode based on ATA protocol and uses some custom command. (read confirm, -write confirm) -B. IO mode uses SRAM bus interface. -C. IO mode supports 4kB boot area, so host can boot from mflash. - -2. Reserved area configuration -If host boot from mflash, usually needs raw area for boot loader image. All of -the mflash's block device operation will be taken this value as start offset. -Note that boot loader's size of reserved area and kernel configuration value -must be same. - -3. Example of mflash platform driver registration -Working mflash is very straight forward. Adding platform device stuff to board -configuration file is all. Here is some pseudo example. - -static struct mg_drv_data mflash_drv_data = { - /* If you want to polling driver set to 1 */ - .use_polling = 0, - /* device attribution */ - .dev_attr = MG_BOOT_DEV -}; - -static struct resource mg_mflash_rsc[] = { - /* Base address of mflash */ - [0] = { - .start = 0x08000000, - .end = 0x08000000 + SZ_64K - 1, - .flags = IORESOURCE_MEM - }, - /* mflash interrupt pin */ - [1] = { - .start = IRQ_GPIO(84), - .end = IRQ_GPIO(84), - .flags = IORESOURCE_IRQ - }, - /* mflash reset pin */ - [2] = { - .start = 43, - .end = 43, - .name = MG_RST_PIN, - .flags = IORESOURCE_IO - }, - /* mflash reset-out pin - * If you use mflash as storage device (i.e. other than MG_BOOT_DEV), - * should assign this */ - [3] = { - .start = 51, - .end = 51, - .name = MG_RSTOUT_PIN, - .flags = IORESOURCE_IO - } -}; - -static struct platform_device mflash_dev = { - .name = MG_DEV_NAME, - .id = -1, - .dev = { - .platform_data = &mflash_drv_data, - }, - .num_resources = ARRAY_SIZE(mg_mflash_rsc), - .resource = mg_mflash_rsc -}; - -platform_device_register(&mflash_dev); diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 49d7c997fa1ee7f759b5ba319bb57be464f0bd47..dc5e2dcdbef40b344f373614220460ebe079f768 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -871,6 +871,11 @@ PAGE_SIZE multiple when read back. Amount of memory used in network transmission buffers + shmem + + Amount of cached filesystem data that is swap-backed, + such as tmpfs, shm segments, shared anonymous mmap()s + file_mapped Amount of cached filesystem data mapped with mmap() @@ -913,6 +918,18 @@ PAGE_SIZE multiple when read back. Number of major page faults incurred + workingset_refault + + Number of refaults of previously evicted pages + + workingset_activate + + Number of refaulted pages that were immediately activated + + workingset_nodereclaim + + Number of times a shadow node has been reclaimed + memory.swap.current A read-only single value file which exists on non-root diff --git a/Documentation/conf.py b/Documentation/conf.py index f2b9161583773defb6fd4f5b6b9f76aea7c21748..bacf9d337c89a067c8a9187d4cfeba6fe6582fcd 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -17,7 +17,7 @@ import os import sphinx # Get Sphinx version -major, minor, patch = map(int, sphinx.__version__.split(".")) +major, minor, patch = sphinx.version_info[:3] # If extensions (or modules to document with autodoc) are in another directory, @@ -29,7 +29,7 @@ from load_config import loadConfig # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +needs_sphinx = '1.2' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -348,6 +348,8 @@ latex_documents = [ 'The kernel development community', 'manual'), ('driver-api/index', 'driver-api.tex', 'The kernel driver API manual', 'The kernel development community', 'manual'), + ('input/index', 'linux-input.tex', 'The Linux input driver subsystem', + 'The kernel development community', 'manual'), ('kernel-documentation', 'kernel-documentation.tex', 'The Linux Kernel Documentation', 'The kernel development community', 'manual'), ('process/index', 'development-process.tex', 'Linux Kernel Development Documentation', diff --git a/Documentation/core-api/flexible-arrays.rst b/Documentation/core-api/flexible-arrays.rst new file mode 100644 index 0000000000000000000000000000000000000000..b6b85a1b518ed5c6fd38a25f74fa967c3666125c --- /dev/null +++ b/Documentation/core-api/flexible-arrays.rst @@ -0,0 +1,130 @@ + +=================================== +Using flexible arrays in the kernel +=================================== + +Large contiguous memory allocations can be unreliable in the Linux kernel. +Kernel programmers will sometimes respond to this problem by allocating +pages with :c:func:`vmalloc()`. This solution not ideal, though. On 32-bit +systems, memory from vmalloc() must be mapped into a relatively small address +space; it's easy to run out. On SMP systems, the page table changes required +by vmalloc() allocations can require expensive cross-processor interrupts on +all CPUs. And, on all systems, use of space in the vmalloc() range increases +pressure on the translation lookaside buffer (TLB), reducing the performance +of the system. + +In many cases, the need for memory from vmalloc() can be eliminated by piecing +together an array from smaller parts; the flexible array library exists to make +this task easier. + +A flexible array holds an arbitrary (within limits) number of fixed-sized +objects, accessed via an integer index. Sparse arrays are handled +reasonably well. Only single-page allocations are made, so memory +allocation failures should be relatively rare. The down sides are that the +arrays cannot be indexed directly, individual object size cannot exceed the +system page size, and putting data into a flexible array requires a copy +operation. It's also worth noting that flexible arrays do no internal +locking at all; if concurrent access to an array is possible, then the +caller must arrange for appropriate mutual exclusion. + +The creation of a flexible array is done with :c:func:`flex_array_alloc()`:: + + #include + + struct flex_array *flex_array_alloc(int element_size, + unsigned int total, + gfp_t flags); + +The individual object size is provided by ``element_size``, while total is the +maximum number of objects which can be stored in the array. The flags +argument is passed directly to the internal memory allocation calls. With +the current code, using flags to ask for high memory is likely to lead to +notably unpleasant side effects. + +It is also possible to define flexible arrays at compile time with:: + + DEFINE_FLEX_ARRAY(name, element_size, total); + +This macro will result in a definition of an array with the given name; the +element size and total will be checked for validity at compile time. + +Storing data into a flexible array is accomplished with a call to +:c:func:`flex_array_put()`:: + + int flex_array_put(struct flex_array *array, unsigned int element_nr, + void *src, gfp_t flags); + +This call will copy the data from src into the array, in the position +indicated by ``element_nr`` (which must be less than the maximum specified when +the array was created). If any memory allocations must be performed, flags +will be used. The return value is zero on success, a negative error code +otherwise. + +There might possibly be a need to store data into a flexible array while +running in some sort of atomic context; in this situation, sleeping in the +memory allocator would be a bad thing. That can be avoided by using +``GFP_ATOMIC`` for the flags value, but, often, there is a better way. The +trick is to ensure that any needed memory allocations are done before +entering atomic context, using :c:func:`flex_array_prealloc()`:: + + int flex_array_prealloc(struct flex_array *array, unsigned int start, + unsigned int nr_elements, gfp_t flags); + +This function will ensure that memory for the elements indexed in the range +defined by ``start`` and ``nr_elements`` has been allocated. Thereafter, a +``flex_array_put()`` call on an element in that range is guaranteed not to +block. + +Getting data back out of the array is done with :c:func:`flex_array_get()`:: + + void *flex_array_get(struct flex_array *fa, unsigned int element_nr); + +The return value is a pointer to the data element, or NULL if that +particular element has never been allocated. + +Note that it is possible to get back a valid pointer for an element which +has never been stored in the array. Memory for array elements is allocated +one page at a time; a single allocation could provide memory for several +adjacent elements. Flexible array elements are normally initialized to the +value ``FLEX_ARRAY_FREE`` (defined as 0x6c in ), so errors +involving that number probably result from use of unstored array entries. +Note that, if array elements are allocated with ``__GFP_ZERO``, they will be +initialized to zero and this poisoning will not happen. + +Individual elements in the array can be cleared with +:c:func:`flex_array_clear()`:: + + int flex_array_clear(struct flex_array *array, unsigned int element_nr); + +This function will set the given element to ``FLEX_ARRAY_FREE`` and return +zero. If storage for the indicated element is not allocated for the array, +``flex_array_clear()`` will return ``-EINVAL`` instead. Note that clearing an +element does not release the storage associated with it; to reduce the +allocated size of an array, call :c:func:`flex_array_shrink()`:: + + int flex_array_shrink(struct flex_array *array); + +The return value will be the number of pages of memory actually freed. +This function works by scanning the array for pages containing nothing but +``FLEX_ARRAY_FREE`` bytes, so (1) it can be expensive, and (2) it will not work +if the array's pages are allocated with ``__GFP_ZERO``. + +It is possible to remove all elements of an array with a call to +:c:func:`flex_array_free_parts()`:: + + void flex_array_free_parts(struct flex_array *array); + +This call frees all elements, but leaves the array itself in place. +Freeing the entire array is done with :c:func:`flex_array_free()`:: + + void flex_array_free(struct flex_array *array); + +As of this writing, there are no users of flexible arrays in the mainline +kernel. The functions described here are also not exported to modules; +that will probably be fixed when somebody comes up with a need for it. + + +Flexible array functions +------------------------ + +.. kernel-doc:: include/linux/flex_array.h diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst new file mode 100644 index 0000000000000000000000000000000000000000..0054bd48be849035146239cb6efc0d7dba0c0a62 --- /dev/null +++ b/Documentation/core-api/genericirq.rst @@ -0,0 +1,440 @@ +.. include:: + +========================== +Linux generic IRQ handling +========================== + +:Copyright: |copy| 2005-2010: Thomas Gleixner +:Copyright: |copy| 2005-2006: Ingo Molnar + +Introduction +============ + +The generic interrupt handling layer is designed to provide a complete +abstraction of interrupt handling for device drivers. It is able to +handle all the different types of interrupt controller hardware. Device +drivers use generic API functions to request, enable, disable and free +interrupts. The drivers do not have to know anything about interrupt +hardware details, so they can be used on different platforms without +code changes. + +This documentation is provided to developers who want to implement an +interrupt subsystem based for their architecture, with the help of the +generic IRQ handling layer. + +Rationale +========= + +The original implementation of interrupt handling in Linux uses the +:c:func:`__do_IRQ` super-handler, which is able to deal with every type of +interrupt logic. + +Originally, Russell King identified different types of handlers to build +a quite universal set for the ARM interrupt handler implementation in +Linux 2.5/2.6. He distinguished between: + +- Level type + +- Edge type + +- Simple type + +During the implementation we identified another type: + +- Fast EOI type + +In the SMP world of the :c:func:`__do_IRQ` super-handler another type was +identified: + +- Per CPU type + +This split implementation of high-level IRQ handlers allows us to +optimize the flow of the interrupt handling for each specific interrupt +type. This reduces complexity in that particular code path and allows +the optimized handling of a given type. + +The original general IRQ implementation used hw_interrupt_type +structures and their ``->ack``, ``->end`` [etc.] callbacks to differentiate +the flow control in the super-handler. This leads to a mix of flow logic +and low-level hardware logic, and it also leads to unnecessary code +duplication: for example in i386, there is an ``ioapic_level_irq`` and an +``ioapic_edge_irq`` IRQ-type which share many of the low-level details but +have different flow handling. + +A more natural abstraction is the clean separation of the 'irq flow' and +the 'chip details'. + +Analysing a couple of architecture's IRQ subsystem implementations +reveals that most of them can use a generic set of 'irq flow' methods +and only need to add the chip-level specific code. The separation is +also valuable for (sub)architectures which need specific quirks in the +IRQ flow itself but not in the chip details - and thus provides a more +transparent IRQ subsystem design. + +Each interrupt descriptor is assigned its own high-level flow handler, +which is normally one of the generic implementations. (This high-level +flow handler implementation also makes it simple to provide +demultiplexing handlers which can be found in embedded platforms on +various architectures.) + +The separation makes the generic interrupt handling layer more flexible +and extensible. For example, an (sub)architecture can use a generic +IRQ-flow implementation for 'level type' interrupts and add a +(sub)architecture specific 'edge type' implementation. + +To make the transition to the new model easier and prevent the breakage +of existing implementations, the :c:func:`__do_IRQ` super-handler is still +available. This leads to a kind of duality for the time being. Over time +the new model should be used in more and more architectures, as it +enables smaller and cleaner IRQ subsystems. It's deprecated for three +years now and about to be removed. + +Known Bugs And Assumptions +========================== + +None (knock on wood). + +Abstraction layers +================== + +There are three main levels of abstraction in the interrupt code: + +1. High-level driver API + +2. High-level IRQ flow handlers + +3. Chip-level hardware encapsulation + +Interrupt control flow +---------------------- + +Each interrupt is described by an interrupt descriptor structure +irq_desc. The interrupt is referenced by an 'unsigned int' numeric +value which selects the corresponding interrupt description structure in +the descriptor structures array. The descriptor structure contains +status information and pointers to the interrupt flow method and the +interrupt chip structure which are assigned to this interrupt. + +Whenever an interrupt triggers, the low-level architecture code calls +into the generic interrupt code by calling :c:func:`desc->handle_irq`. This +high-level IRQ handling function only uses desc->irq_data.chip +primitives referenced by the assigned chip descriptor structure. + +High-level Driver API +--------------------- + +The high-level Driver API consists of following functions: + +- :c:func:`request_irq` + +- :c:func:`free_irq` + +- :c:func:`disable_irq` + +- :c:func:`enable_irq` + +- :c:func:`disable_irq_nosync` (SMP only) + +- :c:func:`synchronize_irq` (SMP only) + +- :c:func:`irq_set_irq_type` + +- :c:func:`irq_set_irq_wake` + +- :c:func:`irq_set_handler_data` + +- :c:func:`irq_set_chip` + +- :c:func:`irq_set_chip_data` + +See the autogenerated function documentation for details. + +High-level IRQ flow handlers +---------------------------- + +The generic layer provides a set of pre-defined irq-flow methods: + +- :c:func:`handle_level_irq` + +- :c:func:`handle_edge_irq` + +- :c:func:`handle_fasteoi_irq` + +- :c:func:`handle_simple_irq` + +- :c:func:`handle_percpu_irq` + +- :c:func:`handle_edge_eoi_irq` + +- :c:func:`handle_bad_irq` + +The interrupt flow handlers (either pre-defined or architecture +specific) are assigned to specific interrupts by the architecture either +during bootup or during device initialization. + +Default flow implementations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Helper functions +^^^^^^^^^^^^^^^^ + +The helper functions call the chip primitives and are used by the +default flow implementations. The following helper functions are +implemented (simplified excerpt):: + + default_enable(struct irq_data *data) + { + desc->irq_data.chip->irq_unmask(data); + } + + default_disable(struct irq_data *data) + { + if (!delay_disable(data)) + desc->irq_data.chip->irq_mask(data); + } + + default_ack(struct irq_data *data) + { + chip->irq_ack(data); + } + + default_mask_ack(struct irq_data *data) + { + if (chip->irq_mask_ack) { + chip->irq_mask_ack(data); + } else { + chip->irq_mask(data); + chip->irq_ack(data); + } + } + + noop(struct irq_data *data)) + { + } + + + +Default flow handler implementations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Default Level IRQ flow handler +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +handle_level_irq provides a generic implementation for level-triggered +interrupts. + +The following control flow is implemented (simplified excerpt):: + + :c:func:`desc->irq_data.chip->irq_mask_ack`; + handle_irq_event(desc->action); + :c:func:`desc->irq_data.chip->irq_unmask`; + + +Default Fast EOI IRQ flow handler +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +handle_fasteoi_irq provides a generic implementation for interrupts, +which only need an EOI at the end of the handler. + +The following control flow is implemented (simplified excerpt):: + + handle_irq_event(desc->action); + :c:func:`desc->irq_data.chip->irq_eoi`; + + +Default Edge IRQ flow handler +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +handle_edge_irq provides a generic implementation for edge-triggered +interrupts. + +The following control flow is implemented (simplified excerpt):: + + if (desc->status & running) { + :c:func:`desc->irq_data.chip->irq_mask_ack`; + desc->status |= pending | masked; + return; + } + :c:func:`desc->irq_data.chip->irq_ack`; + desc->status |= running; + do { + if (desc->status & masked) + :c:func:`desc->irq_data.chip->irq_unmask`; + desc->status &= ~pending; + handle_irq_event(desc->action); + } while (status & pending); + desc->status &= ~running; + + +Default simple IRQ flow handler +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +handle_simple_irq provides a generic implementation for simple +interrupts. + +.. note:: + + The simple flow handler does not call any handler/chip primitives. + +The following control flow is implemented (simplified excerpt):: + + handle_irq_event(desc->action); + + +Default per CPU flow handler +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +handle_percpu_irq provides a generic implementation for per CPU +interrupts. + +Per CPU interrupts are only available on SMP and the handler provides a +simplified version without locking. + +The following control flow is implemented (simplified excerpt):: + + if (desc->irq_data.chip->irq_ack) + :c:func:`desc->irq_data.chip->irq_ack`; + handle_irq_event(desc->action); + if (desc->irq_data.chip->irq_eoi) + :c:func:`desc->irq_data.chip->irq_eoi`; + + +EOI Edge IRQ flow handler +^^^^^^^^^^^^^^^^^^^^^^^^^ + +handle_edge_eoi_irq provides an abnomination of the edge handler +which is solely used to tame a badly wreckaged irq controller on +powerpc/cell. + +Bad IRQ flow handler +^^^^^^^^^^^^^^^^^^^^ + +handle_bad_irq is used for spurious interrupts which have no real +handler assigned.. + +Quirks and optimizations +~~~~~~~~~~~~~~~~~~~~~~~~ + +The generic functions are intended for 'clean' architectures and chips, +which have no platform-specific IRQ handling quirks. If an architecture +needs to implement quirks on the 'flow' level then it can do so by +overriding the high-level irq-flow handler. + +Delayed interrupt disable +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This per interrupt selectable feature, which was introduced by Russell +King in the ARM interrupt implementation, does not mask an interrupt at +the hardware level when :c:func:`disable_irq` is called. The interrupt is kept +enabled and is masked in the flow handler when an interrupt event +happens. This prevents losing edge interrupts on hardware which does not +store an edge interrupt event while the interrupt is disabled at the +hardware level. When an interrupt arrives while the IRQ_DISABLED flag +is set, then the interrupt is masked at the hardware level and the +IRQ_PENDING bit is set. When the interrupt is re-enabled by +:c:func:`enable_irq` the pending bit is checked and if it is set, the interrupt +is resent either via hardware or by a software resend mechanism. (It's +necessary to enable CONFIG_HARDIRQS_SW_RESEND when you want to use +the delayed interrupt disable feature and your hardware is not capable +of retriggering an interrupt.) The delayed interrupt disable is not +configurable. + +Chip-level hardware encapsulation +--------------------------------- + +The chip-level hardware descriptor structure :c:type:`irq_chip` contains all +the direct chip relevant functions, which can be utilized by the irq flow +implementations. + +- ``irq_ack`` + +- ``irq_mask_ack`` - Optional, recommended for performance + +- ``irq_mask`` + +- ``irq_unmask`` + +- ``irq_eoi`` - Optional, required for EOI flow handlers + +- ``irq_retrigger`` - Optional + +- ``irq_set_type`` - Optional + +- ``irq_set_wake`` - Optional + +These primitives are strictly intended to mean what they say: ack means +ACK, masking means masking of an IRQ line, etc. It is up to the flow +handler(s) to use these basic units of low-level functionality. + +__do_IRQ entry point +==================== + +The original implementation :c:func:`__do_IRQ` was an alternative entry point +for all types of interrupts. It no longer exists. + +This handler turned out to be not suitable for all interrupt hardware +and was therefore reimplemented with split functionality for +edge/level/simple/percpu interrupts. This is not only a functional +optimization. It also shortens code paths for interrupts. + +Locking on SMP +============== + +The locking of chip registers is up to the architecture that defines the +chip primitives. The per-irq structure is protected via desc->lock, by +the generic layer. + +Generic interrupt chip +====================== + +To avoid copies of identical implementations of IRQ chips the core +provides a configurable generic interrupt chip implementation. +Developers should check carefully whether the generic chip fits their +needs before implementing the same functionality slightly differently +themselves. + +.. kernel-doc:: kernel/irq/generic-chip.c + :export: + +Structures +========== + +This chapter contains the autogenerated documentation of the structures +which are used in the generic IRQ layer. + +.. kernel-doc:: include/linux/irq.h + :internal: + +.. kernel-doc:: include/linux/interrupt.h + :internal: + +Public Functions Provided +========================= + +This chapter contains the autogenerated documentation of the kernel API +functions which are exported. + +.. kernel-doc:: kernel/irq/manage.c + +.. kernel-doc:: kernel/irq/chip.c + +Internal Functions Provided +=========================== + +This chapter contains the autogenerated documentation of the internal +functions. + +.. kernel-doc:: kernel/irq/irqdesc.c + +.. kernel-doc:: kernel/irq/handle.c + +.. kernel-doc:: kernel/irq/chip.c + +Credits +======= + +The following people have contributed to this document: + +1. Thomas Gleixner tglx@linutronix.de + +2. Ingo Molnar mingo@elte.hu diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 0d93d808913610c665eab7b7df7a5d2bbd848d6d..62abd36bfffbc1c1421c9212f40ac33d2f0fcf0c 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -11,11 +11,14 @@ Core utilities .. toctree:: :maxdepth: 1 + kernel-api assoc_array atomic_ops cpu_hotplug local_ops workqueue + genericirq + flexible-arrays Interfaces for kernel debugging =============================== diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst new file mode 100644 index 0000000000000000000000000000000000000000..9ec8488319dcaacf1f595c5fea88580e97955483 --- /dev/null +++ b/Documentation/core-api/kernel-api.rst @@ -0,0 +1,346 @@ +==================== +The Linux Kernel API +==================== + +Data Types +========== + +Doubly Linked Lists +------------------- + +.. kernel-doc:: include/linux/list.h + :internal: + +Basic C Library Functions +========================= + +When writing drivers, you cannot in general use routines which are from +the C Library. Some of the functions have been found generally useful +and they are listed below. The behaviour of these functions may vary +slightly from those defined by ANSI, and these deviations are noted in +the text. + +String Conversions +------------------ + +.. kernel-doc:: lib/vsprintf.c + :export: + +.. kernel-doc:: include/linux/kernel.h + :functions: kstrtol + +.. kernel-doc:: include/linux/kernel.h + :functions: kstrtoul + +.. kernel-doc:: lib/kstrtox.c + :export: + +String Manipulation +------------------- + +.. kernel-doc:: lib/string.c + :export: + +Bit Operations +-------------- + +.. kernel-doc:: arch/x86/include/asm/bitops.h + :internal: + +Basic Kernel Library Functions +============================== + +The Linux kernel provides more basic utility functions. + +Bitmap Operations +----------------- + +.. kernel-doc:: lib/bitmap.c + :export: + +.. kernel-doc:: lib/bitmap.c + :internal: + +Command-line Parsing +-------------------- + +.. kernel-doc:: lib/cmdline.c + :export: + +CRC Functions +------------- + +.. kernel-doc:: lib/crc7.c + :export: + +.. kernel-doc:: lib/crc16.c + :export: + +.. kernel-doc:: lib/crc-itu-t.c + :export: + +.. kernel-doc:: lib/crc32.c + +.. kernel-doc:: lib/crc-ccitt.c + :export: + +idr/ida Functions +----------------- + +.. kernel-doc:: include/linux/idr.h + :doc: idr sync + +.. kernel-doc:: lib/idr.c + :doc: IDA description + +.. kernel-doc:: lib/idr.c + :export: + +Memory Management in Linux +========================== + +The Slab Cache +-------------- + +.. kernel-doc:: include/linux/slab.h + :internal: + +.. kernel-doc:: mm/slab.c + :export: + +.. kernel-doc:: mm/util.c + :export: + +User Space Memory Access +------------------------ + +.. kernel-doc:: arch/x86/include/asm/uaccess_32.h + :internal: + +.. kernel-doc:: arch/x86/lib/usercopy_32.c + :export: + +More Memory Management Functions +-------------------------------- + +.. kernel-doc:: mm/readahead.c + :export: + +.. kernel-doc:: mm/filemap.c + :export: + +.. kernel-doc:: mm/memory.c + :export: + +.. kernel-doc:: mm/vmalloc.c + :export: + +.. kernel-doc:: mm/page_alloc.c + :internal: + +.. kernel-doc:: mm/mempool.c + :export: + +.. kernel-doc:: mm/dmapool.c + :export: + +.. kernel-doc:: mm/page-writeback.c + :export: + +.. kernel-doc:: mm/truncate.c + :export: + +Kernel IPC facilities +===================== + +IPC utilities +------------- + +.. kernel-doc:: ipc/util.c + :internal: + +FIFO Buffer +=========== + +kfifo interface +--------------- + +.. kernel-doc:: include/linux/kfifo.h + :internal: + +relay interface support +======================= + +Relay interface support is designed to provide an efficient mechanism +for tools and facilities to relay large amounts of data from kernel +space to user space. + +relay interface +--------------- + +.. kernel-doc:: kernel/relay.c + :export: + +.. kernel-doc:: kernel/relay.c + :internal: + +Module Support +============== + +Module Loading +-------------- + +.. kernel-doc:: kernel/kmod.c + :export: + +Inter Module support +-------------------- + +Refer to the file kernel/module.c for more information. + +Hardware Interfaces +=================== + +Interrupt Handling +------------------ + +.. kernel-doc:: kernel/irq/manage.c + :export: + +DMA Channels +------------ + +.. kernel-doc:: kernel/dma.c + :export: + +Resources Management +-------------------- + +.. kernel-doc:: kernel/resource.c + :internal: + +.. kernel-doc:: kernel/resource.c + :export: + +MTRR Handling +------------- + +.. kernel-doc:: arch/x86/kernel/cpu/mtrr/main.c + :export: + +Security Framework +================== + +.. kernel-doc:: security/security.c + :internal: + +.. kernel-doc:: security/inode.c + :export: + +Audit Interfaces +================ + +.. kernel-doc:: kernel/audit.c + :export: + +.. kernel-doc:: kernel/auditsc.c + :internal: + +.. kernel-doc:: kernel/auditfilter.c + :internal: + +Accounting Framework +==================== + +.. kernel-doc:: kernel/acct.c + :internal: + +Block Devices +============= + +.. kernel-doc:: block/blk-core.c + :export: + +.. kernel-doc:: block/blk-core.c + :internal: + +.. kernel-doc:: block/blk-map.c + :export: + +.. kernel-doc:: block/blk-sysfs.c + :internal: + +.. kernel-doc:: block/blk-settings.c + :export: + +.. kernel-doc:: block/blk-exec.c + :export: + +.. kernel-doc:: block/blk-flush.c + :export: + +.. kernel-doc:: block/blk-lib.c + :export: + +.. kernel-doc:: block/blk-tag.c + :export: + +.. kernel-doc:: block/blk-tag.c + :internal: + +.. kernel-doc:: block/blk-integrity.c + :export: + +.. kernel-doc:: kernel/trace/blktrace.c + :internal: + +.. kernel-doc:: block/genhd.c + :internal: + +.. kernel-doc:: block/genhd.c + :export: + +Char devices +============ + +.. kernel-doc:: fs/char_dev.c + :export: + +Clock Framework +=============== + +The clock framework defines programming interfaces to support software +management of the system clock tree. This framework is widely used with +System-On-Chip (SOC) platforms to support power management and various +devices which may need custom clock rates. Note that these "clocks" +don't relate to timekeeping or real time clocks (RTCs), each of which +have separate frameworks. These :c:type:`struct clk ` +instances may be used to manage for example a 96 MHz signal that is used +to shift bits into and out of peripherals or busses, or otherwise +trigger synchronous state machine transitions in system hardware. + +Power management is supported by explicit software clock gating: unused +clocks are disabled, so the system doesn't waste power changing the +state of transistors that aren't in active use. On some systems this may +be backed by hardware clock gating, where clocks are gated without being +disabled in software. Sections of chips that are powered but not clocked +may be able to retain their last state. This low power state is often +called a *retention mode*. This mode still incurs leakage currents, +especially with finer circuit geometries, but for CMOS circuits power is +mostly used by clocked state changes. + +Power-aware drivers only enable their clocks when the device they manage +is in active use. Also, system sleep states often differ according to +which clock domains are active: while a "standby" state may allow wakeup +from several active domains, a "mem" (suspend-to-RAM) state may require +a more wholesale shutdown of clocks derived from higher speed PLLs and +oscillators, limiting the number of possible wakeup event sources. A +driver's suspend method may need to be aware of system-specific clock +constraints on the target sleep state. + +Some platforms support programmable clock generators. These can be used +by external chips of various kinds, such as other CPUs, multimedia +codecs, and devices with strict requirements for interface clocking. + +.. kernel-doc:: include/linux/clk.h + :internal: diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt deleted file mode 100644 index dd62e1334f0a8b52b7a30a2d4f9daff0e90506a5..0000000000000000000000000000000000000000 --- a/Documentation/cpu-freq/boost.txt +++ /dev/null @@ -1,93 +0,0 @@ -Processor boosting control - - - information for users - - -Quick guide for the impatient: --------------------- -/sys/devices/system/cpu/cpufreq/boost -controls the boost setting for the whole system. You can read and write -that file with either "0" (boosting disabled) or "1" (boosting allowed). -Reading or writing 1 does not mean that the system is boosting at this -very moment, but only that the CPU _may_ raise the frequency at it's -discretion. --------------------- - -Introduction -------------- -Some CPUs support a functionality to raise the operating frequency of -some cores in a multi-core package if certain conditions apply, mostly -if the whole chip is not fully utilized and below it's intended thermal -budget. The decision about boost disable/enable is made either at hardware -(e.g. x86) or software (e.g ARM). -On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core", -in technical documentation "Core performance boost". In Linux we use -the term "boost" for convenience. - -Rationale for disable switch ----------------------------- - -Though the idea is to just give better performance without any user -intervention, sometimes the need arises to disable this functionality. -Most systems offer a switch in the (BIOS) firmware to disable the -functionality at all, but a more fine-grained and dynamic control would -be desirable: -1. While running benchmarks, reproducible results are important. Since - the boosting functionality depends on the load of the whole package, - single thread performance can vary. By explicitly disabling the boost - functionality at least for the benchmark's run-time the system will run - at a fixed frequency and results are reproducible again. -2. To examine the impact of the boosting functionality it is helpful - to do tests with and without boosting. -3. Boosting means overclocking the processor, though under controlled - conditions. By raising the frequency and the voltage the processor - will consume more power than without the boosting, which may be - undesirable for instance for mobile users. Disabling boosting may - save power here, though this depends on the workload. - - -User controlled switch ----------------------- - -To allow the user to toggle the boosting functionality, the cpufreq core -driver exports a sysfs knob to enable or disable it. There is a file: -/sys/devices/system/cpu/cpufreq/boost -which can either read "0" (boosting disabled) or "1" (boosting enabled). -The file is exported only when cpufreq driver supports boosting. -Explicitly changing the permissions and writing to that file anyway will -return EINVAL. - -On supported CPUs one can write either a "0" or a "1" into this file. -This will either disable the boost functionality on all cores in the -whole system (0) or will allow the software or hardware to boost at will -(1). - -Writing a "1" does not explicitly boost the system, but just allows the -CPU to boost at their discretion. Some implementations take external -factors like the chip's temperature into account, so boosting once does -not necessarily mean that it will occur every time even using the exact -same software setup. - - -AMD legacy cpb switch ---------------------- -The AMD powernow-k8 driver used to support a very similar switch to -disable or enable the "Core Performance Boost" feature of some AMD CPUs. -This switch was instantiated in each CPU's cpufreq directory -(/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb". -Though the per CPU existence hints at a more fine grained control, the -actual implementation only supported a system-global switch semantics, -which was simply reflected into each CPU's file. Writing a 0 or 1 into it -would pull the other CPUs to the same state. -For compatibility reasons this file and its behavior is still supported -on AMD CPUs, though it is now protected by a config switch -(X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created, -even with the config option set. -This functionality is considered legacy and will be removed in some future -kernel version. - -More fine grained boosting control ----------------------------------- - -Technically it is possible to switch the boosting functionality at least -on a per package basis, for some CPUs even per core. Currently the driver -does not support it, but this may be implemented in the future. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt deleted file mode 100644 index 61b3184b6c24e47c4a31c09763707e49a1d0de01..0000000000000000000000000000000000000000 --- a/Documentation/cpu-freq/governors.txt +++ /dev/null @@ -1,301 +0,0 @@ - CPU frequency and voltage scaling code in the Linux(TM) kernel - - - L i n u x C P U F r e q - - C P U F r e q G o v e r n o r s - - - information for users and developers - - - - Dominik Brodowski - some additions and corrections by Nico Golde - Rafael J. Wysocki - Viresh Kumar - - - - Clock scaling allows you to change the clock speed of the CPUs on the - fly. This is a nice method to save battery power, because the lower - the clock speed, the less power the CPU consumes. - - -Contents: ---------- -1. What is a CPUFreq Governor? - -2. Governors In the Linux Kernel -2.1 Performance -2.2 Powersave -2.3 Userspace -2.4 Ondemand -2.5 Conservative -2.6 Schedutil - -3. The Governor Interface in the CPUfreq Core - -4. References - - -1. What Is A CPUFreq Governor? -============================== - -Most cpufreq drivers (except the intel_pstate and longrun) or even most -cpu frequency scaling algorithms only allow the CPU frequency to be set -to predefined fixed values. In order to offer dynamic frequency -scaling, the cpufreq core must be able to tell these drivers of a -"target frequency". So these specific drivers will be transformed to -offer a "->target/target_index/fast_switch()" call instead of the -"->setpolicy()" call. For set_policy drivers, all stays the same, -though. - -How to decide what frequency within the CPUfreq policy should be used? -That's done using "cpufreq governors". - -Basically, it's the following flow graph: - -CPU can be set to switch independently | CPU can only be set - within specific "limits" | to specific frequencies - - "CPUfreq policy" - consists of frequency limits (policy->{min,max}) - and CPUfreq governor to be used - / \ - / \ - / the cpufreq governor decides - / (dynamically or statically) - / what target_freq to set within - / the limits of policy->{min,max} - / \ - / \ - Using the ->setpolicy call, Using the ->target/target_index/fast_switch call, - the limits and the the frequency closest - "policy" is set. to target_freq is set. - It is assured that it - is within policy->{min,max} - - -2. Governors In the Linux Kernel -================================ - -2.1 Performance ---------------- - -The CPUfreq governor "performance" sets the CPU statically to the -highest frequency within the borders of scaling_min_freq and -scaling_max_freq. - - -2.2 Powersave -------------- - -The CPUfreq governor "powersave" sets the CPU statically to the -lowest frequency within the borders of scaling_min_freq and -scaling_max_freq. - - -2.3 Userspace -------------- - -The CPUfreq governor "userspace" allows the user, or any userspace -program running with UID "root", to set the CPU to a specific frequency -by making a sysfs file "scaling_setspeed" available in the CPU-device -directory. - - -2.4 Ondemand ------------- - -The CPUfreq governor "ondemand" sets the CPU frequency depending on the -current system load. Load estimation is triggered by the scheduler -through the update_util_data->func hook; when triggered, cpufreq checks -the CPU-usage statistics over the last period and the governor sets the -CPU accordingly. The CPU must have the capability to switch the -frequency very quickly. - -Sysfs files: - -* sampling_rate: - - Measured in uS (10^-6 seconds), this is how often you want the kernel - to look at the CPU usage and to make decisions on what to do about the - frequency. Typically this is set to values of around '10000' or more. - It's default value is (cmp. with users-guide.txt): transition_latency - * 1000. Be aware that transition latency is in ns and sampling_rate - is in us, so you get the same sysfs value by default. Sampling rate - should always get adjusted considering the transition latency to set - the sampling rate 750 times as high as the transition latency in the - bash (as said, 1000 is default), do: - - $ echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) > ondemand/sampling_rate - -* sampling_rate_min: - - The sampling rate is limited by the HW transition latency: - transition_latency * 100 - - Or by kernel restrictions: - - If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. - - If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is - used, the limits depend on the CONFIG_HZ option: - HZ=1000: min=20000us (20ms) - HZ=250: min=80000us (80ms) - HZ=100: min=200000us (200ms) - - The highest value of kernel and HW latency restrictions is shown and - used as the minimum sampling rate. - -* up_threshold: - - This defines what the average CPU usage between the samplings of - 'sampling_rate' needs to be for the kernel to make a decision on - whether it should increase the frequency. For example when it is set - to its default value of '95' it means that between the checking - intervals the CPU needs to be on average more than 95% in use to then - decide that the CPU frequency needs to be increased. - -* ignore_nice_load: - - This parameter takes a value of '0' or '1'. When set to '0' (its - default), all processes are counted towards the 'cpu utilisation' - value. When set to '1', the processes that are run with a 'nice' - value will not count (and thus be ignored) in the overall usage - calculation. This is useful if you are running a CPU intensive - calculation on your laptop that you do not care how long it takes to - complete as you can 'nice' it and prevent it from taking part in the - deciding process of whether to increase your CPU frequency. - -* sampling_down_factor: - - This parameter controls the rate at which the kernel makes a decision - on when to decrease the frequency while running at top speed. When set - to 1 (the default) decisions to reevaluate load are made at the same - interval regardless of current clock speed. But when set to greater - than 1 (e.g. 100) it acts as a multiplier for the scheduling interval - for reevaluating load when the CPU is at its top speed due to high - load. This improves performance by reducing the overhead of load - evaluation and helping the CPU stay at its top speed when truly busy, - rather than shifting back and forth in speed. This tunable has no - effect on behavior at lower speeds/lower CPU loads. - -* powersave_bias: - - This parameter takes a value between 0 to 1000. It defines the - percentage (times 10) value of the target frequency that will be - shaved off of the target. For example, when set to 100 -- 10%, when - ondemand governor would have targeted 1000 MHz, it will target - 1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0 - (disabled) by default. - - When AMD frequency sensitivity powersave bias driver -- - drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter - defines the workload frequency sensitivity threshold in which a lower - frequency is chosen instead of ondemand governor's original target. - The frequency sensitivity is a hardware reported (on AMD Family 16h - Processors and above) value between 0 to 100% that tells software how - the performance of the workload running on a CPU will change when - frequency changes. A workload with sensitivity of 0% (memory/IO-bound) - will not perform any better on higher core frequency, whereas a - workload with sensitivity of 100% (CPU-bound) will perform better - higher the frequency. When the driver is loaded, this is set to 400 by - default -- for CPUs running workloads with sensitivity value below - 40%, a lower frequency is chosen. Unloading the driver or writing 0 - will disable this feature. - - -2.5 Conservative ----------------- - -The CPUfreq governor "conservative", much like the "ondemand" -governor, sets the CPU frequency depending on the current usage. It -differs in behaviour in that it gracefully increases and decreases the -CPU speed rather than jumping to max speed the moment there is any load -on the CPU. This behaviour is more suitable in a battery powered -environment. The governor is tweaked in the same manner as the -"ondemand" governor through sysfs with the addition of: - -* freq_step: - - This describes what percentage steps the cpu freq should be increased - and decreased smoothly by. By default the cpu frequency will increase - in 5% chunks of your maximum cpu frequency. You can change this value - to anywhere between 0 and 100 where '0' will effectively lock your CPU - at a speed regardless of its load whilst '100' will, in theory, make - it behave identically to the "ondemand" governor. - -* down_threshold: - - Same as the 'up_threshold' found for the "ondemand" governor but for - the opposite direction. For example when set to its default value of - '20' it means that if the CPU usage needs to be below 20% between - samples to have the frequency decreased. - -* sampling_down_factor: - - Similar functionality as in "ondemand" governor. But in - "conservative", it controls the rate at which the kernel makes a - decision on when to decrease the frequency while running in any speed. - Load for frequency increase is still evaluated every sampling rate. - - -2.6 Schedutil -------------- - -The "schedutil" governor aims at better integration with the Linux -kernel scheduler. Load estimation is achieved through the scheduler's -Per-Entity Load Tracking (PELT) mechanism, which also provides -information about the recent load [1]. This governor currently does -load based DVFS only for tasks managed by CFS. RT and DL scheduler tasks -are always run at the highest frequency. Unlike all the other -governors, the code is located under the kernel/sched/ directory. - -Sysfs files: - -* rate_limit_us: - - This contains a value in microseconds. The governor waits for - rate_limit_us time before reevaluating the load again, after it has - evaluated the load once. - -For an in-depth comparison with the other governors refer to [2]. - - -3. The Governor Interface in the CPUfreq Core -============================================= - -A new governor must register itself with the CPUfreq core using -"cpufreq_register_governor". The struct cpufreq_governor, which has to -be passed to that function, must contain the following values: - -governor->name - A unique name for this governor. -governor->owner - .THIS_MODULE for the governor module (if appropriate). - -plus a set of hooks to the functions implementing the governor's logic. - -The CPUfreq governor may call the CPU processor driver using one of -these two functions: - -int cpufreq_driver_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); - -int __cpufreq_driver_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); - -target_freq must be within policy->min and policy->max, of course. -What's the difference between these two functions? When your governor is -in a direct code path of a call to governor callbacks, like -governor->start(), the policy->rwsem is still held in the cpufreq core, -and there's no need to lock it again (in fact, this would cause a -deadlock). So use __cpufreq_driver_target only in these cases. In all -other cases (for example, when there's a "daemonized" function that -wakes up every second), use cpufreq_driver_target to take policy->rwsem -before the command is passed to the cpufreq driver. - -4. References -============= - -[1] Per-entity load tracking: https://lwn.net/Articles/531853/ -[2] Improvements in CPU frequency management: https://lwn.net/Articles/682391/ - diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt index ef1d39247b054f332fc671bb0f781146866258c3..03a7cee6ac73a4f2dd48248196994538d81f6a4e 100644 --- a/Documentation/cpu-freq/index.txt +++ b/Documentation/cpu-freq/index.txt @@ -21,8 +21,6 @@ Documents in this directory: amd-powernow.txt - AMD powernow driver specific file. -boost.txt - Frequency boosting support. - core.txt - General description of the CPUFreq core and of CPUFreq notifiers. @@ -32,17 +30,12 @@ cpufreq-nforce2.txt - nVidia nForce2 platform specific file. cpufreq-stats.txt - General description of sysfs cpufreq stats. -governors.txt - What are cpufreq governors and how to - implement them? - index.txt - File index, Mailing list and Links (this document) intel-pstate.txt - Intel pstate cpufreq driver specific file. pcc-cpufreq.txt - PCC cpufreq driver specific file. -user-guide.txt - User Guide to CPUFreq - Mailing List ------------ diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt deleted file mode 100644 index 3fdcdfd968ba1237ef61856c51463d123b9321a1..0000000000000000000000000000000000000000 --- a/Documentation/cpu-freq/intel-pstate.txt +++ /dev/null @@ -1,281 +0,0 @@ -Intel P-State driver --------------------- - -This driver provides an interface to control the P-State selection for the -SandyBridge+ Intel processors. - -The following document explains P-States: -http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf -As stated in the document, P-State doesn’t exactly mean a frequency. However, for -the sake of the relationship with cpufreq, P-State and frequency are used -interchangeably. - -Understanding the cpufreq core governors and policies are important before -discussing more details about the Intel P-State driver. Based on what callbacks -a cpufreq driver provides to the cpufreq core, it can support two types of -drivers: -- with target_index() callback: In this mode, the drivers using cpufreq core -simply provide the minimum and maximum frequency limits and an additional -interface target_index() to set the current frequency. The cpufreq subsystem -has a number of scaling governors ("performance", "powersave", "ondemand", -etc.). Depending on which governor is in use, cpufreq core will call for -transitions to a specific frequency using target_index() callback. -- setpolicy() callback: In this mode, drivers do not provide target_index() -callback, so cpufreq core can't request a transition to a specific frequency. -The driver provides minimum and maximum frequency limits and callbacks to set a -policy. The policy in cpufreq sysfs is referred to as the "scaling governor". -The cpufreq core can request the driver to operate in any of the two policies: -"performance" and "powersave". The driver decides which frequency to use based -on the above policy selection considering minimum and maximum frequency limits. - -The Intel P-State driver falls under the latter category, which implements the -setpolicy() callback. This driver decides what P-State to use based on the -requested policy from the cpufreq core. If the processor is capable of -selecting its next P-State internally, then the driver will offload this -responsibility to the processor (aka HWP: Hardware P-States). If not, the -driver implements algorithms to select the next P-State. - -Since these policies are implemented in the driver, they are not same as the -cpufreq scaling governors implementation, even if they have the same name in -the cpufreq sysfs (scaling_governors). For example the "performance" policy is -similar to cpufreq’s "performance" governor, but "powersave" is completely -different than the cpufreq "powersave" governor. The strategy here is similar -to cpufreq "ondemand", where the requested P-State is related to the system load. - -Sysfs Interface - -In addition to the frequency-controlling interfaces provided by the cpufreq -core, the driver provides its own sysfs files to control the P-State selection. -These files have been added to /sys/devices/system/cpu/intel_pstate/. -Any changes made to these files are applicable to all CPUs (even in a -multi-package system, Refer to later section on placing "Per-CPU limits"). - - max_perf_pct: Limits the maximum P-State that will be requested by - the driver. It states it as a percentage of the available performance. The - available (P-State) performance may be reduced by the no_turbo - setting described below. - - min_perf_pct: Limits the minimum P-State that will be requested by - the driver. It states it as a percentage of the max (non-turbo) - performance level. - - no_turbo: Limits the driver to selecting P-State below the turbo - frequency range. - - turbo_pct: Displays the percentage of the total performance that - is supported by hardware that is in the turbo range. This number - is independent of whether turbo has been disabled or not. - - num_pstates: Displays the number of P-States that are supported - by hardware. This number is independent of whether turbo has - been disabled or not. - -For example, if a system has these parameters: - Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State) - Max non turbo ratio: 0x17 - Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio) - -Sysfs will show : - max_perf_pct:100, which corresponds to 1 core ratio - min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio - no_turbo:0, turbo is not disabled - num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1) - turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates - -Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual -Volume 3: System Programming Guide" to understand ratios. - -There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/ -that can be used for controlling the operation mode of the driver: - - status: Three settings are possible: - "off" - The driver is not in use at this time. - "active" - The driver works as a P-state governor (default). - "passive" - The driver works as a regular cpufreq one and collaborates - with the generic cpufreq governors (it sets P-states as - requested by those governors). - The current setting is returned by reads from this attribute. Writing one - of the above strings to it changes the operation mode as indicated by that - string, if possible. If HW-managed P-states (HWP) are enabled, it is not - possible to change the driver's operation mode and attempts to write to - this attribute will fail. - -cpufreq sysfs for Intel P-State - -Since this driver registers with cpufreq, cpufreq sysfs is also presented. -There are some important differences, which need to be considered. - -scaling_cur_freq: This displays the real frequency which was used during -the last sample period instead of what is requested. Some other cpufreq driver, -like acpi-cpufreq, displays what is requested (Some changes are on the -way to fix this for acpi-cpufreq driver). The same is true for frequencies -displayed at /proc/cpuinfo. - -scaling_governor: This displays current active policy. Since each CPU has a -cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this -is not possible with Intel P-States, as there is one common policy for all -CPUs. Here, the last requested policy will be applicable to all CPUs. It is -suggested that one use the cpupower utility to change policy to all CPUs at the -same time. - -scaling_setspeed: This attribute can never be used with Intel P-State. - -scaling_max_freq/scaling_min_freq: This interface can be used similarly to -the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies -are converted to nearest possible P-State, this is prone to rounding errors. -This method is not preferred to limit performance. - -affected_cpus: Not used -related_cpus: Not used - -For contemporary Intel processors, the frequency is controlled by the -processor itself and the P-State exposed to software is related to -performance levels. The idea that frequency can be set to a single -frequency is fictional for Intel Core processors. Even if the scaling -driver selects a single P-State, the actual frequency the processor -will run at is selected by the processor itself. - -Per-CPU limits - -The kernel command line option "intel_pstate=per_cpu_perf_limits" forces -the intel_pstate driver to use per-CPU performance limits. When it is set, -the sysfs control interface described above is subject to limitations. -- The following controls are not available for both read and write - /sys/devices/system/cpu/intel_pstate/max_perf_pct - /sys/devices/system/cpu/intel_pstate/min_perf_pct -- The following controls can be used to set performance limits, as far as the -architecture of the processor permits: - /sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq - /sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq - /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor -- User can still observe turbo percent and number of P-States from - /sys/devices/system/cpu/intel_pstate/turbo_pct - /sys/devices/system/cpu/intel_pstate/num_pstates -- User can read write system wide turbo status - /sys/devices/system/cpu/no_turbo - -Support of energy performance hints -It is possible to provide hints to the HWP algorithms in the processor -to be more performance centric to more energy centric. When the driver -is using HWP, two additional cpufreq sysfs attributes are presented for -each logical CPU. -These attributes are: - - energy_performance_available_preferences - - energy_performance_preference - -To get list of supported hints: -$ cat energy_performance_available_preferences - default performance balance_performance balance_power power - -The current preference can be read or changed via cpufreq sysfs -attribute "energy_performance_preference". Reading from this attribute -will display current effective setting. User can write any of the valid -preference string to this attribute. User can always restore to power-on -default by writing "default". - -Since threads can migrate to different CPUs, this is possible that the -new CPU may have different energy performance preference than the previous -one. To avoid such issues, either threads can be pinned to specific CPUs -or set the same energy performance preference value to all CPUs. - -Tuning Intel P-State driver - -When the performance can be tuned using PID (Proportional Integral -Derivative) controller, debugfs files are provided for adjusting performance. -They are presented under: -/sys/kernel/debug/pstate_snb/ - -The PID tunable parameters are: - deadband - d_gain_pct - i_gain_pct - p_gain_pct - sample_rate_ms - setpoint - -To adjust these parameters, some understanding of driver implementation is -necessary. There are some tweeks described here, but be very careful. Adjusting -them requires expert level understanding of power and performance relationship. -These limits are only useful when the "powersave" policy is active. - --To make the system more responsive to load changes, sample_rate_ms can -be adjusted (current default is 10ms). --To make the system use higher performance, even if the load is lower, setpoint -can be adjusted to a lower number. This will also lead to faster ramp up time -to reach the maximum P-State. -If there are no derivative and integral coefficients, The next P-State will be -equal to: - current P-State - ((setpoint - current cpu load) * p_gain_pct) - -For example, if the current PID parameters are (Which are defaults for the core -processors like SandyBridge): - deadband = 0 - d_gain_pct = 0 - i_gain_pct = 0 - p_gain_pct = 20 - sample_rate_ms = 10 - setpoint = 97 - -If the current P-State = 0x08 and current load = 100, this will result in the -next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State -goes up by only 1. If during next sample interval the current load doesn't -change and still 100, then P-State goes up by one again. This process will -continue as long as the load is more than the setpoint until the maximum P-State -is reached. - -For the same load at setpoint = 60, this will result in the next P-State -= 0x08 - ((60 - 100) * 0.2) = 16 -So by changing the setpoint from 97 to 60, there is an increase of the -next P-State from 9 to 16. So this will make processor execute at higher -P-State for the same CPU load. If the load continues to be more than the -setpoint during next sample intervals, then P-State will go up again till the -maximum P-State is reached. But the ramp up time to reach the maximum P-State -will be much faster when the setpoint is 60 compared to 97. - -Debugging Intel P-State driver - -Event tracing -To debug P-State transition, the Linux event tracing interface can be used. -There are two specific events, which can be enabled (Provided the kernel -configs related to event tracing are enabled). - -# cd /sys/kernel/debug/tracing/ -# echo 1 > events/power/pstate_sample/enable -# echo 1 > events/power/cpu_frequency/enable -# cat trace -gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 - scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 - freq=2474476 -cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2 - - -Using ftrace - -If function level tracing is required, the Linux ftrace interface can be used. -For example if we want to check how often a function to set a P-State is -called, we can set ftrace filter to intel_pstate_set_pstate. - -# cd /sys/kernel/debug/tracing/ -# cat available_filter_functions | grep -i pstate -intel_pstate_set_pstate -intel_pstate_cpu_init -... - -# echo intel_pstate_set_pstate > set_ftrace_filter -# echo function > current_tracer -# cat trace | head -15 -# tracer: function -# -# entries-in-buffer/entries-written: 80/80 #P:4 -# -# _-----=> irqs-off -# / _----=> need-resched -# | / _---=> hardirq/softirq -# || / _--=> preempt-depth -# ||| / delay -# TASK-PID CPU# |||| TIMESTAMP FUNCTION -# | | | |||| | | - Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func - gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func - gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func - -0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt deleted file mode 100644 index 391da64e94925c5fcfb04f0fc7c56db91c2023fa..0000000000000000000000000000000000000000 --- a/Documentation/cpu-freq/user-guide.txt +++ /dev/null @@ -1,228 +0,0 @@ - CPU frequency and voltage scaling code in the Linux(TM) kernel - - - L i n u x C P U F r e q - - U S E R G U I D E - - - Dominik Brodowski - - - - Clock scaling allows you to change the clock speed of the CPUs on the - fly. This is a nice method to save battery power, because the lower - the clock speed, the less power the CPU consumes. - - -Contents: ---------- -1. Supported Architectures and Processors -1.1 ARM and ARM64 -1.2 x86 -1.3 sparc64 -1.4 ppc -1.5 SuperH -1.6 Blackfin - -2. "Policy" / "Governor"? -2.1 Policy -2.2 Governor - -3. How to change the CPU cpufreq policy and/or speed -3.1 Preferred interface: sysfs - - - -1. Supported Architectures and Processors -========================================= - -1.1 ARM and ARM64 ------------------ - -Almost all ARM and ARM64 platforms support CPU frequency scaling. - -1.2 x86 -------- - -The following processors for the x86 architecture are supported by cpufreq: - -AMD Elan - SC400, SC410 -AMD mobile K6-2+ -AMD mobile K6-3+ -AMD mobile Duron -AMD mobile Athlon -AMD Opteron -AMD Athlon 64 -Cyrix Media GXm -Intel mobile PIII and Intel mobile PIII-M on certain chipsets -Intel Pentium 4, Intel Xeon -Intel Pentium M (Centrino) -National Semiconductors Geode GX -Transmeta Crusoe -Transmeta Efficeon -VIA Cyrix 3 / C3 -various processors on some ACPI 2.0-compatible systems [*] -And many more - -[*] Only if "ACPI Processor Performance States" are available -to the ACPI<->BIOS interface. - - -1.3 sparc64 ------------ - -The following processors for the sparc64 architecture are supported by -cpufreq: - -UltraSPARC-III - - -1.4 ppc -------- - -Several "PowerBook" and "iBook2" notebooks are supported. -The following POWER processors are supported in powernv mode: -POWER8 -POWER9 - -1.5 SuperH ----------- - -All SuperH processors supporting rate rounding through the clock -framework are supported by cpufreq. - -1.6 Blackfin ------------- - -The following Blackfin processors are supported by cpufreq: - -BF522, BF523, BF524, BF525, BF526, BF527, Rev 0.1 or higher -BF531, BF532, BF533, Rev 0.3 or higher -BF534, BF536, BF537, Rev 0.2 or higher -BF561, Rev 0.3 or higher -BF542, BF544, BF547, BF548, BF549, Rev 0.1 or higher - - -2. "Policy" / "Governor" ? -========================== - -Some CPU frequency scaling-capable processor switch between various -frequencies and operating voltages "on the fly" without any kernel or -user involvement. This guarantees very fast switching to a frequency -which is high enough to serve the user's needs, but low enough to save -power. - - -2.1 Policy ----------- - -On these systems, all you can do is select the lower and upper -frequency limit as well as whether you want more aggressive -power-saving or more instantly available processing power. - - -2.2 Governor ------------- - -On all other cpufreq implementations, these boundaries still need to -be set. Then, a "governor" must be selected. Such a "governor" decides -what speed the processor shall run within the boundaries. One such -"governor" is the "userspace" governor. This one allows the user - or -a yet-to-implement userspace program - to decide what specific speed -the processor shall run at. - - -3. How to change the CPU cpufreq policy and/or speed -==================================================== - -3.1 Preferred Interface: sysfs ------------------------------- - -The preferred interface is located in the sysfs filesystem. If you -mounted it at /sys, the cpufreq interface is located in a subdirectory -"cpufreq" within the cpu-device directory -(e.g. /sys/devices/system/cpu/cpu0/cpufreq/ for the first CPU). - -affected_cpus : List of Online CPUs that require software - coordination of frequency. - -cpuinfo_cur_freq : Current frequency of the CPU as obtained from - the hardware, in KHz. This is the frequency - the CPU actually runs at. - -cpuinfo_min_freq : this file shows the minimum operating - frequency the processor can run at(in kHz) - -cpuinfo_max_freq : this file shows the maximum operating - frequency the processor can run at(in kHz) - -cpuinfo_transition_latency The time it takes on this CPU to - switch between two frequencies in nano - seconds. If unknown or known to be - that high that the driver does not - work with the ondemand governor, -1 - (CPUFREQ_ETERNAL) will be returned. - Using this information can be useful - to choose an appropriate polling - frequency for a kernel governor or - userspace daemon. Make sure to not - switch the frequency too often - resulting in performance loss. - -related_cpus : List of Online + Offline CPUs that need software - coordination of frequency. - -scaling_available_frequencies : List of available frequencies, in KHz. - -scaling_available_governors : this file shows the CPUfreq governors - available in this kernel. You can see the - currently activated governor in - -scaling_cur_freq : Current frequency of the CPU as determined by - the governor and cpufreq core, in KHz. This is - the frequency the kernel thinks the CPU runs - at. - -scaling_driver : this file shows what cpufreq driver is - used to set the frequency on this CPU - -scaling_governor, and by "echoing" the name of another - governor you can change it. Please note - that some governors won't load - they only - work on some specific architectures or - processors. - -scaling_min_freq and -scaling_max_freq show the current "policy limits" (in - kHz). By echoing new values into these - files, you can change these limits. - NOTE: when setting a policy you need to - first set scaling_max_freq, then - scaling_min_freq. - -scaling_setspeed This can be read to get the currently programmed - value by the governor. This can be written to - change the current frequency for a group of - CPUs, represented by a policy. This is supported - currently only by the userspace governor. - -bios_limit : If the BIOS tells the OS to limit a CPU to - lower frequencies, the user can read out the - maximum available frequency from this file. - This typically can happen through (often not - intended) BIOS settings, restrictions - triggered through a service processor or other - BIOS/HW based implementations. - This does not cover thermal ACPI limitations - which can be detected through the generic - thermal driver. - -If you have selected the "userspace" governor which allows you to -set the CPU operating frequency to a specific value, you can read out -the current frequency in - -scaling_setspeed. By "echoing" a new frequency into this - you can change the speed of the CPU, - but only within the limits of - scaling_min_freq and scaling_max_freq. diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index f722f227a73bbc2cc0bbbbd6e0e65ab0d2f8b23e..127c9d8c2174ab419351882c45ef16230fb73563 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -100,7 +100,7 @@ not defined by include/asm-XXX/topology.h: For architectures that don't support books (CONFIG_SCHED_BOOK) there are no default definitions for topology_book_id() and topology_book_cpumask(). -For architectures that don't support drawes (CONFIG_SCHED_DRAWER) there are +For architectures that don't support drawers (CONFIG_SCHED_DRAWER) there are no default definitions for topology_drawer_id() and topology_drawer_cpumask(). Additionally, CPU topology information is provided under diff --git a/Documentation/crypto/api-samples.rst b/Documentation/crypto/api-samples.rst index 0a10819f6107a706c391ee5c01baa519246516fa..d021fd96a76ded1fd3a70c1c3166e7258c5127c9 100644 --- a/Documentation/crypto/api-samples.rst +++ b/Documentation/crypto/api-samples.rst @@ -155,9 +155,9 @@ Code Example For Use of Operational State Memory With SHASH char ctx[]; }; - static struct sdescinit_sdesc(struct crypto_shash *alg) + static struct sdesc init_sdesc(struct crypto_shash *alg) { - struct sdescsdesc; + struct sdesc sdesc; int size; size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); @@ -172,7 +172,7 @@ Code Example For Use of Operational State Memory With SHASH static int calc_hash(struct crypto_shashalg, const unsigned chardata, unsigned int datalen, unsigned chardigest) { - struct sdescsdesc; + struct sdesc sdesc; int ret; sdesc = init_sdesc(alg); diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt index 2b7816dea370a2f4a30404f5bbc8542f1f231561..5ad6480e3fb96308eed5a0f3f5f80130bd7af61d 100644 --- a/Documentation/crypto/asymmetric-keys.txt +++ b/Documentation/crypto/asymmetric-keys.txt @@ -311,3 +311,54 @@ Functions are provided to register and unregister parsers: Parsers may not have the same name. The names are otherwise only used for displaying in debugging messages. + + +========================= +KEYRING LINK RESTRICTIONS +========================= + +Keyrings created from userspace using add_key can be configured to check the +signature of the key being linked. + +Several restriction methods are available: + + (1) Restrict using the kernel builtin trusted keyring + + - Option string used with KEYCTL_RESTRICT_KEYRING: + - "builtin_trusted" + + The kernel builtin trusted keyring will be searched for the signing + key. The ca_keys kernel parameter also affects which keys are used for + signature verification. + + (2) Restrict using the kernel builtin and secondary trusted keyrings + + - Option string used with KEYCTL_RESTRICT_KEYRING: + - "builtin_and_secondary_trusted" + + The kernel builtin and secondary trusted keyrings will be searched for the + signing key. The ca_keys kernel parameter also affects which keys are used + for signature verification. + + (3) Restrict using a separate key or keyring + + - Option string used with KEYCTL_RESTRICT_KEYRING: + - "key_or_keyring:[:chain]" + + Whenever a key link is requested, the link will only succeed if the key + being linked is signed by one of the designated keys. This key may be + specified directly by providing a serial number for one asymmetric key, or + a group of keys may be searched for the signing key by providing the + serial number for a keyring. + + When the "chain" option is provided at the end of the string, the keys + within the destination keyring will also be searched for signing keys. + This allows for verification of certificate chains by adding each + cert in order (starting closest to the root) to one keyring. + +In all of these cases, if the signing key is found the signature of the key to +be linked will be verified using the signing key. The requested key is added +to the keyring only if the signature is successfully verified. -ENOKEY is +returned if the parent certificate could not be found, or -EKEYREJECTED is +returned if the signature check fails or the key is blacklisted. Other errors +may be returned if the signature check could not be performed. diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt index 03703afc4d302e7eeb7fb4031d494ab750233194..9ff026d22b751deb64f5f850631c0b6e7e93e4c1 100644 --- a/Documentation/debugging-via-ohci1394.txt +++ b/Documentation/debugging-via-ohci1394.txt @@ -100,8 +100,8 @@ Step-by-step instructions for using firescope with early OHCI initialization: CardBus and even some Express cards which are fully compliant to OHCI-1394 specification are available. If it requires no driver for Windows operating systems, it most likely is. Only specialized shops have cards which are not - compliant, they are based on TI PCILynx chips and require drivers for Win- - dows operating systems. + compliant, they are based on TI PCILynx chips and require drivers for Windows + operating systems. The mentioned kernel log message contains the string "physUB" if the controller implements a writable Physical Upper Bound register. This is diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt index f228604ddbcd6890c1db0ef2778422a499cbd480..cdfd0feb294e6f0be9851223b7669de3109d875a 100644 --- a/Documentation/device-mapper/cache.txt +++ b/Documentation/device-mapper/cache.txt @@ -290,7 +290,7 @@ message, which takes an arbitrary number of cblock ranges. Each cblock range's end value is "one past the end", meaning 5-10 expresses a range of values from 5 to 9. Each cblock must be expressed as a decimal value, in the future a variant message that takes cblock ranges -expressed in hexidecimal may be needed to better support efficient +expressed in hexadecimal may be needed to better support efficient invalidation of larger caches. The cache must be in passthrough mode when invalidate_cblocks is used. diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt index ff1f87bf26e8ad10e7327ecd74a87cb3a6e64058..3b3e1de21c9ccb3f07b45aa5770e25e611d4e4d5 100644 --- a/Documentation/device-mapper/dm-crypt.txt +++ b/Documentation/device-mapper/dm-crypt.txt @@ -11,14 +11,31 @@ Parameters: \ [<#opt_params> ] - Encryption cipher and an optional IV generation mode. - (In format cipher[:keycount]-chainmode-ivmode[:ivopts]). + Encryption cipher, encryption mode and Initial Vector (IV) generator. + + The cipher specifications format is: + cipher[:keycount]-chainmode-ivmode[:ivopts] Examples: - des aes-cbc-essiv:sha256 - twofish-ecb + aes-xts-plain64 + serpent-xts-plain64 + + Cipher format also supports direct specification with kernel crypt API + format (selected by capi: prefix). The IV specification is the same + as for the first format type. + This format is mainly used for specification of authenticated modes. - /proc/crypto contains supported crypto modes + The crypto API cipher specifications format is: + capi:cipher_api_spec-ivmode[:ivopts] + Examples: + capi:cbc(aes)-essiv:sha256 + capi:xts(aes)-plain64 + Examples of authenticated modes: + capi:gcm(aes)-random + capi:authenc(hmac(sha256),xts(aes))-random + capi:rfc7539(chacha20,poly1305)-random + + The /proc/crypto contains a list of curently loaded crypto modes. Key used for encryption. It is encoded either as a hexadecimal number @@ -93,6 +110,32 @@ submit_from_crypt_cpus thread because it benefits CFQ to have writes submitted using the same context. +integrity:: + The device requires additional metadata per-sector stored + in per-bio integrity structure. This metadata must by provided + by underlying dm-integrity target. + + The can be "none" if metadata is used only for persistent IV. + + For Authenticated Encryption with Additional Data (AEAD) + the is "aead". An AEAD mode additionally calculates and verifies + integrity for the encrypted device. The additional space is then + used for storing authentication tag (and persistent IV if needed). + +sector_size: + Use as the encryption unit instead of 512 bytes sectors. + This option can be in range 512 - 4096 bytes and must be power of two. + Virtual device will announce this size as a minimal IO and logical sector. + +iv_large_sectors + IV generators will use sector number counted in units + instead of default 512 bytes sectors. + + For example, if is 4096 bytes, plain64 IV for the second + sector will be 8 (without flag) and 1 if iv_large_sectors is present. + The must be multiple of (in 512 bytes units) + if this flag is specified. + Example scripts =============== LUKS (Linux Unified Key Setup) is now the preferred way to set up disk diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt new file mode 100644 index 0000000000000000000000000000000000000000..f33e3ade7a09bdaf9f6b338d5142acc512a94cb2 --- /dev/null +++ b/Documentation/device-mapper/dm-integrity.txt @@ -0,0 +1,199 @@ +The dm-integrity target emulates a block device that has additional +per-sector tags that can be used for storing integrity information. + +A general problem with storing integrity tags with every sector is that +writing the sector and the integrity tag must be atomic - i.e. in case of +crash, either both sector and integrity tag or none of them is written. + +To guarantee write atomicity, the dm-integrity target uses journal, it +writes sector data and integrity tags into a journal, commits the journal +and then copies the data and integrity tags to their respective location. + +The dm-integrity target can be used with the dm-crypt target - in this +situation the dm-crypt target creates the integrity data and passes them +to the dm-integrity target via bio_integrity_payload attached to the bio. +In this mode, the dm-crypt and dm-integrity targets provide authenticated +disk encryption - if the attacker modifies the encrypted device, an I/O +error is returned instead of random data. + +The dm-integrity target can also be used as a standalone target, in this +mode it calculates and verifies the integrity tag internally. In this +mode, the dm-integrity target can be used to detect silent data +corruption on the disk or in the I/O path. + + +When loading the target for the first time, the kernel driver will format +the device. But it will only format the device if the superblock contains +zeroes. If the superblock is neither valid nor zeroed, the dm-integrity +target can't be loaded. + +To use the target for the first time: +1. overwrite the superblock with zeroes +2. load the dm-integrity target with one-sector size, the kernel driver + will format the device +3. unload the dm-integrity target +4. read the "provided_data_sectors" value from the superblock +5. load the dm-integrity target with the the target size + "provided_data_sectors" +6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target + with the size "provided_data_sectors" + + +Target arguments: + +1. the underlying block device + +2. the number of reserved sector at the beginning of the device - the + dm-integrity won't read of write these sectors + +3. the size of the integrity tag (if "-" is used, the size is taken from + the internal-hash algorithm) + +4. mode: + D - direct writes (without journal) - in this mode, journaling is + not used and data sectors and integrity tags are written + separately. In case of crash, it is possible that the data + and integrity tag doesn't match. + J - journaled writes - data and integrity tags are written to the + journal and atomicity is guaranteed. In case of crash, + either both data and tag or none of them are written. The + journaled mode degrades write throughput twice because the + data have to be written twice. + R - recovery mode - in this mode, journal is not replayed, + checksums are not checked and writes to the device are not + allowed. This mode is useful for data recovery if the + device cannot be activated in any of the other standard + modes. + +5. the number of additional arguments + +Additional arguments: + +journal_sectors:number + The size of journal, this argument is used only if formatting the + device. If the device is already formatted, the value from the + superblock is used. + +interleave_sectors:number + The number of interleaved sectors. This values is rounded down to + a power of two. If the device is already formatted, the value from + the superblock is used. + +buffer_sectors:number + The number of sectors in one buffer. The value is rounded down to + a power of two. + + The tag area is accessed using buffers, the buffer size is + configurable. The large buffer size means that the I/O size will + be larger, but there could be less I/Os issued. + +journal_watermark:number + The journal watermark in percents. When the size of the journal + exceeds this watermark, the thread that flushes the journal will + be started. + +commit_time:number + Commit time in milliseconds. When this time passes, the journal is + written. The journal is also written immediatelly if the FLUSH + request is received. + +internal_hash:algorithm(:key) (the key is optional) + Use internal hash or crc. + When this argument is used, the dm-integrity target won't accept + integrity tags from the upper target, but it will automatically + generate and verify the integrity tags. + + You can use a crc algorithm (such as crc32), then integrity target + will protect the data against accidental corruption. + You can also use a hmac algorithm (for example + "hmac(sha256):0123456789abcdef"), in this mode it will provide + cryptographic authentication of the data without encryption. + + When this argument is not used, the integrity tags are accepted + from an upper layer target, such as dm-crypt. The upper layer + target should check the validity of the integrity tags. + +journal_crypt:algorithm(:key) (the key is optional) + Encrypt the journal using given algorithm to make sure that the + attacker can't read the journal. You can use a block cipher here + (such as "cbc(aes)") or a stream cipher (for example "chacha20", + "salsa20", "ctr(aes)" or "ecb(arc4)"). + + The journal contains history of last writes to the block device, + an attacker reading the journal could see the last sector nubmers + that were written. From the sector numbers, the attacker can infer + the size of files that were written. To protect against this + situation, you can encrypt the journal. + +journal_mac:algorithm(:key) (the key is optional) + Protect sector numbers in the journal from accidental or malicious + modification. To protect against accidental modification, use a + crc algorithm, to protect against malicious modification, use a + hmac algorithm with a key. + + This option is not needed when using internal-hash because in this + mode, the integrity of journal entries is checked when replaying + the journal. Thus, modified sector number would be detected at + this stage. + +block_size:number + The size of a data block in bytes. The larger the block size the + less overhead there is for per-block integrity metadata. + Supported values are 512, 1024, 2048 and 4096 bytes. If not + specified the default block size is 512 bytes. + +The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can +be changed when reloading the target (load an inactive table and swap the +tables with suspend and resume). The other arguments should not be changed +when reloading the target because the layout of disk data depend on them +and the reloaded target would be non-functional. + + +The layout of the formatted block device: +* reserved sectors (they are not used by this target, they can be used for + storing LUKS metadata or for other purpose), the size of the reserved + area is specified in the target arguments +* superblock (4kiB) + * magic string - identifies that the device was formatted + * version + * log2(interleave sectors) + * integrity tag size + * the number of journal sections + * provided data sectors - the number of sectors that this target + provides (i.e. the size of the device minus the size of all + metadata and padding). The user of this target should not send + bios that access data beyond the "provided data sectors" limit. + * flags - a flag is set if journal_mac is used +* journal + The journal is divided into sections, each section contains: + * metadata area (4kiB), it contains journal entries + every journal entry contains: + * logical sector (specifies where the data and tag should + be written) + * last 8 bytes of data + * integrity tag (the size is specified in the superblock) + every metadata sector ends with + * mac (8-bytes), all the macs in 8 metadata sectors form a + 64-byte value. It is used to store hmac of sector + numbers in the journal section, to protect against a + possibility that the attacker tampers with sector + numbers in the journal. + * commit id + * data area (the size is variable; it depends on how many journal + entries fit into the metadata area) + every sector in the data area contains: + * data (504 bytes of data, the last 8 bytes are stored in + the journal entry) + * commit id + To test if the whole journal section was written correctly, every + 512-byte sector of the journal ends with 8-byte commit id. If the + commit id matches on all sectors in a journal section, then it is + assumed that the section was written correctly. If the commit id + doesn't match, the section was written partially and it should not + be replayed. +* one or more runs of interleaved tags and data. Each run contains: + * tag area - it contains integrity tags. There is one tag for each + sector in the data area + * data area - it contains data sectors. The number of data sectors + in one run must be a power of two. log2 of this value is stored + in the superblock. diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index cd2cb2fc85ead940394873d941b24a7858ea3336..7e06e65586d4ae110262a4dc8c0eef62d43d9dd5 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -170,6 +170,13 @@ The target is named "raid" and it accepts the following parameters: Takeover/reshape is not possible with a raid4/5/6 journal device; it has to be deconfigured before requesting these. + [journal_mode ] + This option sets the caching mode on journaled raid4/5/6 raid sets + (see 'journal_dev ' above) to 'writethrough' or 'writeback'. + If 'writeback' is selected the journal device has to be resilient + and must not suffer from the 'write hole' problem itself (e.g. use + raid1 or raid10) to avoid a single point of failure. + <#raid_devs>: The number of devices composing the array. Each device consists of two entries. The first is the device containing the metadata (if any); the second is the one containing the @@ -254,7 +261,8 @@ recovery. Here is a fuller description of the individual fields: The current data offset to the start of the user data on each component device of a raid set (see the respective raid parameter to support out-of-place reshaping). - 'A' - active raid4/5/6 journal device. + 'A' - active write-through journal device. + 'a' - active write-back journal device. 'D' - dead journal device. '-' - no journal device. @@ -331,3 +339,7 @@ Version History 'D' on the status line. If '- -' is passed into the constructor, emit '- -' on the table line and '-' as the status line health character. 1.10.0 Add support for raid4/5/6 journal device +1.10.1 Fix data corruption on reshape request +1.11.0 Fix table line argument order + (wrong raid10_copies/raid10_format sequence) +1.11.1 Add raid4/5/6 journal write-back support via journal_mode option diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt index c246cd2730d90669787e0bb5a0f0f50ed5ad3376..bfd5b558477d95bd1c2fe42d47861d791313872e 100644 --- a/Documentation/devicetree/bindings/arm/amlogic.txt +++ b/Documentation/devicetree/bindings/arm/amlogic.txt @@ -43,8 +43,11 @@ Board compatible values: - "wetek,hub" (Meson gxbb) - "wetek,play2" (Meson gxbb) - "amlogic,p212" (Meson gxl s905x) + - "khadas,vim" (Meson gxl s905x) + - "amlogic,p230" (Meson gxl s905d) - "amlogic,p231" (Meson gxl s905d) + - "hwacom,amazetv" (Meson gxl s905x) - "amlogic,q200" (Meson gxm s912) - "amlogic,q201" (Meson gxm s912) - "nexbox,a95x" (Meson gxbb or Meson gxl s905x) diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt index 29737b9b616e46920a6df2a6c9327499eda8b69e..799af90dd75b09b4c28e85bbcf36db0604d63bd2 100644 --- a/Documentation/devicetree/bindings/arm/atmel-at91.txt +++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt @@ -217,7 +217,8 @@ memory, bridge implementations, processor and other functionality not controlled elsewhere. required properties: -- compatible: Should be "atmel,-sfr", "syscon". +- compatible: Should be "atmel,-sfr", "syscon" or + "atmel,-sfrbu", "syscon" can be "sama5d3", "sama5d4" or "sama5d2". - reg: Should contain registers location and length diff --git a/Documentation/devicetree/bindings/arm/cavium-thunder2.txt b/Documentation/devicetree/bindings/arm/cavium-thunder2.txt new file mode 100644 index 0000000000000000000000000000000000000000..dc5dd65cbce7a13a46eb4ef3e3ca172074509775 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/cavium-thunder2.txt @@ -0,0 +1,8 @@ +Cavium ThunderX2 CN99XX platform tree bindings +---------------------------------------------- + +Boards with Cavium ThunderX2 CN99XX SoC shall have the root property: + compatible = "cavium,thunderx2-cn9900", "brcm,vulcan-soc"; + +These SoC uses the "cavium,thunder2" core which will be compatible +with "brcm,vulcan". diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index 698ad1f097fa34c835478a84a7ad9bddfc86e9b6..1030f5f50207f22baa245e9df9ca69df286c128e 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -170,6 +170,7 @@ nodes to be present and contain the properties described below. "brcm,brahma-b15" "brcm,vulcan" "cavium,thunder" + "cavium,thunder2" "faraday,fa526" "intel,sa110" "intel,sa1100" diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt new file mode 100644 index 0000000000000000000000000000000000000000..d38834c67dffe910af59fa15531b9fcd2303a33b --- /dev/null +++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt @@ -0,0 +1,31 @@ +OP-TEE Device Tree Bindings + +OP-TEE is a piece of software using hardware features to provide a Trusted +Execution Environment. The security can be provided with ARM TrustZone, but +also by virtualization or a separate chip. + +We're using "linaro" as the first part of the compatible property for +the reference implementation maintained by Linaro. + +* OP-TEE based on ARM TrustZone required properties: + +- compatible : should contain "linaro,optee-tz" + +- method : The method of calling the OP-TEE Trusted OS. Permitted + values are: + + "smc" : SMC #0, with the register assignments specified + in drivers/tee/optee/optee_smc.h + + "hvc" : HVC #0, with the register assignments specified + in drivers/tee/optee/optee_smc.h + + + +Example: + firmware { + optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; + }; diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt index c9c567ae227f7a3f948ddc3a2ad66ba7d3664f05..cdb9dd705754a82928ffb90b7d67faf493104a24 100644 --- a/Documentation/devicetree/bindings/arm/fsl.txt +++ b/Documentation/devicetree/bindings/arm/fsl.txt @@ -179,6 +179,18 @@ LS1046A ARMv8 based RDB Board Required root node properties: - compatible = "fsl,ls1046a-rdb", "fsl,ls1046a"; +LS1088A SoC +Required root node properties: + - compatible = "fsl,ls1088a"; + +LS1088A ARMv8 based QDS Board +Required root node properties: + - compatible = "fsl,ls1088a-qds", "fsl,ls1088a"; + +LS1088A ARMv8 based RDB Board +Required root node properties: + - compatible = "fsl,ls1088a-rdb", "fsl,ls1088a"; + LS2080A SoC Required root node properties: - compatible = "fsl,ls2080a"; @@ -195,3 +207,14 @@ LS2080A ARMv8 based RDB Board Required root node properties: - compatible = "fsl,ls2080a-rdb", "fsl,ls2080a"; +LS2088A SoC +Required root node properties: + - compatible = "fsl,ls2088a"; + +LS2088A ARMv8 based QDS Board +Required root node properties: + - compatible = "fsl,ls2088a-qds", "fsl,ls2088a"; + +LS2088A ARMv8 based RDB Board +Required root node properties: + - compatible = "fsl,ls2088a-rdb", "fsl,ls2088a"; diff --git a/Documentation/devicetree/bindings/arm/gemini.txt b/Documentation/devicetree/bindings/arm/gemini.txt new file mode 100644 index 0000000000000000000000000000000000000000..0041eb031116af2ed8e306ffa6d30eedfc7a4aca --- /dev/null +++ b/Documentation/devicetree/bindings/arm/gemini.txt @@ -0,0 +1,86 @@ +Cortina systems Gemini platforms + +The Gemini SoC is the project name for an ARMv4 FA525-based SoC originally +produced by Storlink Semiconductor around 2005. The company was renamed +later renamed Storm Semiconductor. The chip product name is Storlink SL3516. +It was derived from earlier products from Storm named SL3316 (Centroid) and +SL3512 (Bulverde). + +Storm Semiconductor was acquired by Cortina Systems in 2008 and the SoC was +produced and used for NAS and similar usecases. In 2014 Cortina Systems was +in turn acquired by Inphi, who seem to have discontinued this product family. + +Many of the IP blocks used in the SoC comes from Faraday Technology. + +Required properties (in root node): + compatible = "cortina,gemini"; + +Required nodes: + +- soc: the SoC should be represented by a simple bus encompassing all the + onchip devices, this is referred to as the soc bus node. + +- syscon: the soc bus node must have a system controller node pointing to the + global control registers, with the compatible string + "cortina,gemini-syscon", "syscon"; + +- timer: the soc bus node must have a timer node pointing to the SoC timer + block, with the compatible string "cortina,gemini-timer" + See: clocksource/cortina,gemini-timer.txt + +- interrupt-controller: the sob bus node must have an interrupt controller + node pointing to the SoC interrupt controller block, with the compatible + string "cortina,gemini-interrupt-controller" + See interrupt-controller/cortina,gemini-interrupt-controller.txt + +Example: + +/ { + model = "Foo Gemini Machine"; + compatible = "cortina,gemini"; + #address-cells = <1>; + #size-cells = <1>; + + memory { + device_type = "memory"; + reg = <0x00000000 0x8000000>; + }; + + soc { + #address-cells = <1>; + #size-cells = <1>; + ranges; + compatible = "simple-bus"; + interrupt-parent = <&intcon>; + + syscon: syscon@40000000 { + compatible = "cortina,gemini-syscon", "syscon"; + reg = <0x40000000 0x1000>; + }; + + uart0: serial@42000000 { + compatible = "ns16550a"; + reg = <0x42000000 0x100>; + clock-frequency = <48000000>; + interrupts = <18 IRQ_TYPE_LEVEL_HIGH>; + reg-shift = <2>; + }; + + timer@43000000 { + compatible = "cortina,gemini-timer"; + reg = <0x43000000 0x1000>; + interrupt-parent = <&intcon>; + interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */ + <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */ + <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */ + syscon = <&syscon>; + }; + + intcon: interrupt-controller@48000000 { + compatible = "cortina,gemini-interrupt-controller"; + reg = <0x48000000 0x1000>; + interrupt-controller; + #interrupt-cells = <2>; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt index f1c1e21a811099ddd980a566f064d2284729cfcd..2e732152064b5ae51ae33f38dd9f90e3c9fc90a1 100644 --- a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt +++ b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt @@ -4,6 +4,14 @@ Hi3660 SoC Required root node properties: - compatible = "hisilicon,hi3660"; +Hi3798cv200 SoC +Required root node properties: + - compatible = "hisilicon,hi3798cv200"; + +Hi3798cv200 Poplar Board +Required root node properties: + - compatible = "hisilicon,hi3798cv200-poplar", "hisilicon,hi3798cv200"; + Hi4511 Board Required root node properties: - compatible = "hisilicon,hi3620-hi4511"; diff --git a/Documentation/devicetree/bindings/arm/i2se.txt b/Documentation/devicetree/bindings/arm/i2se.txt new file mode 100644 index 0000000000000000000000000000000000000000..dbd54a3aa07da1f1cd9de71b2ece851803ed4709 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/i2se.txt @@ -0,0 +1,22 @@ +I2SE Device Tree Bindings +------------------------- + +Duckbill Board +Required root node properties: + - compatible = "i2se,duckbill", "fsl,imx28"; + +Duckbill 2 Board +Required root node properties: + - compatible = "i2se,duckbill-2", "fsl,imx28"; + +Duckbill 2 485 Board +Required root node properties: + - compatible = "i2se,duckbill-2-485", "i2se,duckbill-2", "fsl,imx28"; + +Duckbill 2 EnOcean Board +Required root node properties: + - compatible = "i2se,duckbill-2-enocean", "i2se,duckbill-2", "fsl,imx28"; + +Duckbill 2 SPI Board +Required root node properties: + - compatible = "i2se,duckbill-2-spi", "i2se,duckbill-2", "fsl,imx28"; diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt index 917199f179658248baa37b7f84c20b737b3a5152..d9650c1788f4122199703abe8a89253dc6f1974f 100644 --- a/Documentation/devicetree/bindings/arm/l2c2x0.txt +++ b/Documentation/devicetree/bindings/arm/l2c2x0.txt @@ -90,6 +90,9 @@ Optional properties: - arm,standby-mode: L2 standby mode enable. Value <0> (forcibly disable), <1> (forcibly enable), property absent (OS specific behavior, preferably retain firmware settings) +- arm,early-bresp-disable : Disable the CA9 optimization Early BRESP (PL310) +- arm,full-line-zero-disable : Disable the CA9 optimization Full line of zero + write (PL310) Example: diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt index cb0054ac7121e54a747f147844b3f698c380c0e1..cd977db7630c50a6e118e60edd96256efa138ac9 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt @@ -7,6 +7,7 @@ Required Properties: - compatible: Should be one of: - "mediatek,mt2701-apmixedsys" + - "mediatek,mt6797-apmixedsys" - "mediatek,mt8135-apmixedsys" - "mediatek,mt8173-apmixedsys" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt index f6a916686f4c4a94515d703ebcca0506343a4ba9..047b11ae5f45c0a7e020a231a4e87ce656b3842e 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt @@ -7,6 +7,7 @@ Required Properties: - compatible: Should be one of: - "mediatek,mt2701-imgsys", "syscon" + - "mediatek,mt6797-imgsys", "syscon" - "mediatek,mt8173-imgsys", "syscon" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt index 1620ec2a5a3f12872d828d18fa04e02a9eafd315..58d58e2006b83324502d3d39739ab6537552649b 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt @@ -8,6 +8,7 @@ Required Properties: - compatible: Should be one of: - "mediatek,mt2701-infracfg", "syscon" + - "mediatek,mt6797-infracfg", "syscon" - "mediatek,mt8135-infracfg", "syscon" - "mediatek,mt8173-infracfg", "syscon" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt index 67dd2e473d25fbe7189d788f47edfb3f09d19dcd..70529e0b58e9a15927a552ada8106434c1627100 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt @@ -7,6 +7,7 @@ Required Properties: - compatible: Should be one of: - "mediatek,mt2701-mmsys", "syscon" + - "mediatek,mt6797-mmsys", "syscon" - "mediatek,mt8173-mmsys", "syscon" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt index 9f2fe7860114d65cb1a07ed70a6ec3242b57303f..ec93ecbb9f3c2fb72bf461d8d57275d8c57ac79a 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt @@ -7,6 +7,7 @@ Required Properties: - compatible: Should be one of: - "mediatek,mt2701-topckgen" + - "mediatek,mt6797-topckgen" - "mediatek,mt8135-topckgen" - "mediatek,mt8173-topckgen" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt index 2440f73450c365895a0ec62e6f87c21281d1b0ef..d150104f928a4f7c023a724a3e646eb11b49d83a 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt @@ -7,6 +7,7 @@ Required Properties: - compatible: Should be one of: - "mediatek,mt2701-vdecsys", "syscon" + - "mediatek,mt6797-vdecsys", "syscon" - "mediatek,mt8173-vdecsys", "syscon" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt index 5bb2866a2b50acebabf714868386aa9223186542..8a93be643647d429c39ba3c978e6e1a02f94d1c4 100644 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vencsys.txt @@ -5,7 +5,8 @@ The Mediatek vencsys controller provides various clocks to the system. Required Properties: -- compatible: Should be: +- compatible: Should be one of: + - "mediatek,mt6797-vencsys", "syscon" - "mediatek,mt8173-vencsys", "syscon" - #clock-cells: Must be 1 diff --git a/Documentation/devicetree/bindings/arm/rockchip.txt b/Documentation/devicetree/bindings/arm/rockchip.txt index cc4ace6397abda9ffa0a7aa02a0fce76eff5c6e0..c965d99e86c2b8413c5b5463d2bbc46966f53005 100644 --- a/Documentation/devicetree/bindings/arm/rockchip.txt +++ b/Documentation/devicetree/bindings/arm/rockchip.txt @@ -1,5 +1,8 @@ Rockchip platforms device tree bindings --------------------------------------- +- Asus Tinker board + Required root node properties: + - compatible = "asus,rk3288-tinker", "rockchip,rk3288"; - Kylin RK3036 board: Required root node properties: @@ -56,6 +59,17 @@ Rockchip platforms device tree bindings - compatible = "google,veyron-brain-rev0", "google,veyron-brain", "google,veyron", "rockchip,rk3288"; +- Google Gru (dev-board): + Required root node properties: + - compatible = "google,gru-rev15", "google,gru-rev14", + "google,gru-rev13", "google,gru-rev12", + "google,gru-rev11", "google,gru-rev10", + "google,gru-rev9", "google,gru-rev8", + "google,gru-rev7", "google,gru-rev6", + "google,gru-rev5", "google,gru-rev4", + "google,gru-rev3", "google,gru-rev2", + "google,gru", "rockchip,rk3399"; + - Google Jaq (Haier Chromebook 11 and more): Required root node properties: - compatible = "google,veyron-jaq-rev5", "google,veyron-jaq-rev4", @@ -70,6 +84,15 @@ Rockchip platforms device tree bindings "google,veyron-jerry-rev3", "google,veyron-jerry", "google,veyron", "rockchip,rk3288"; +- Google Kevin (Samsung Chromebook Plus): + Required root node properties: + - compatible = "google,kevin-rev15", "google,kevin-rev14", + "google,kevin-rev13", "google,kevin-rev12", + "google,kevin-rev11", "google,kevin-rev10", + "google,kevin-rev9", "google,kevin-rev8", + "google,kevin-rev7", "google,kevin-rev6", + "google,kevin", "google,gru", "rockchip,rk3399"; + - Google Mickey (Asus Chromebit CS10): Required root node properties: - compatible = "google,veyron-mickey-rev8", "google,veyron-mickey-rev7", @@ -103,6 +126,10 @@ Rockchip platforms device tree bindings Required root node properties: - compatible = "mqmaker,miqi", "rockchip,rk3288"; +- Phytec phyCORE-RK3288: Rapid Development Kit + Required root node properties: + - compatible = "phytec,rk3288-pcm-947", "phytec,rk3288-phycore-som", "rockchip,rk3288"; + - Rockchip PX3 Evaluation board: Required root node properties: - compatible = "rockchip,px3-evb", "rockchip,px3", "rockchip,rk3188"; @@ -134,6 +161,10 @@ Rockchip platforms device tree bindings Required root node properties: - compatible = "rockchip,rk3288-fennec", "rockchip,rk3288"; +- Rockchip RK3328 evb: + Required root node properties: + - compatible = "rockchip,rk3328-evb", "rockchip,rk3328"; + - Rockchip RK3399 evb: Required root node properties: - compatible = "rockchip,rk3399-evb", "rockchip,rk3399"; diff --git a/Documentation/devicetree/bindings/arm/shmobile.txt b/Documentation/devicetree/bindings/arm/shmobile.txt index c9502634316da3b184e883463973a64962fa252d..170fe0562c637eabb738978b909e930ba4fee87a 100644 --- a/Documentation/devicetree/bindings/arm/shmobile.txt +++ b/Documentation/devicetree/bindings/arm/shmobile.txt @@ -13,8 +13,12 @@ SoCs: compatible = "renesas,r8a73a4" - R-Mobile A1 (R8A77400) compatible = "renesas,r8a7740" + - RZ/G1H (R8A77420) + compatible = "renesas,r8a7742" - RZ/G1M (R8A77430) compatible = "renesas,r8a7743" + - RZ/G1N (R8A77440) + compatible = "renesas,r8a7744" - RZ/G1E (R8A77450) compatible = "renesas,r8a7745" - R-Car M1A (R8A77781) diff --git a/Documentation/devicetree/bindings/arm/sprd.txt b/Documentation/devicetree/bindings/arm/sprd.txt index 31a629dc75b8a959a3ad937a60364fa8dddccff0..3df034b13e284338df88af478101b4c4cd1542d0 100644 --- a/Documentation/devicetree/bindings/arm/sprd.txt +++ b/Documentation/devicetree/bindings/arm/sprd.txt @@ -1,11 +1,14 @@ Spreadtrum SoC Platforms Device Tree Bindings ---------------------------------------------------- -Sharkl64 is a Spreadtrum's SoC Platform which is based -on ARM 64-bit processor. +SC9836 openphone Board +Required root node properties: + - compatible = "sprd,sc9836-openphone", "sprd,sc9836"; -SC9836 openphone board with SC9836 SoC based on the -Sharkl64 Platform shall have the following properties. +SC9860 SoC +Required root node properties: + - compatible = "sprd,sc9860" +SP9860G 3GFHD Board Required root node properties: - - compatible = "sprd,sc9836-openphone", "sprd,sc9836"; + - compatible = "sprd,sp9860g-1h10", "sprd,sc9860"; diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt new file mode 100644 index 0000000000000000000000000000000000000000..078a58b0302fb5009da58ebf7c76ec7d556098cc --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra186-pmc.txt @@ -0,0 +1,34 @@ +NVIDIA Tegra Power Management Controller (PMC) + +Required properties: +- compatible: Should contain one of the following: + - "nvidia,tegra186-pmc": for Tegra186 +- reg: Must contain an (offset, length) pair of the register set for each + entry in reg-names. +- reg-names: Must include the following entries: + - "pmc" + - "wake" + - "aotag" + - "scratch" + +Optional properties: +- nvidia,invert-interrupt: If present, inverts the PMU interrupt signal. + +Example: + +SoC DTSI: + + pmc@c3600000 { + compatible = "nvidia,tegra186-pmc"; + reg = <0 0x0c360000 0 0x10000>, + <0 0x0c370000 0 0x10000>, + <0 0x0c380000 0 0x10000>, + <0 0x0c390000 0 0x10000>; + reg-names = "pmc", "wake", "aotag", "scratch"; + }; + +Board DTS: + + pmc@c360000 { + nvidia,invert-interrupt; + }; diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt index ccf0adddc8205e3f47df8ff99af43271f7792a99..a855c1bffc0f6695d5c0cd3f032019ea32d9d5e8 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt @@ -1,7 +1,13 @@ NVIDIA Tegra Flow Controller Required properties: -- compatible: Should be "nvidia,tegra-flowctrl" +- compatible: Should contain one of the following: + - "nvidia,tegra20-flowctrl": for Tegra20 + - "nvidia,tegra30-flowctrl": for Tegra30 + - "nvidia,tegra114-flowctrl": for Tegra114 + - "nvidia,tegra124-flowctrl": for Tegra124 + - "nvidia,tegra132-flowctrl", "nvidia,tegra124-flowctrl": for Tegra132 + - "nvidia,tegra210-flowctrl": for Tegra210 - reg: Should contain one register range (address and length) Example: diff --git a/Documentation/devicetree/bindings/ata/ahci-dm816.txt b/Documentation/devicetree/bindings/ata/ahci-dm816.txt new file mode 100644 index 0000000000000000000000000000000000000000..f8c535f3541f57d4cbf7da9e938e0cebaade608c --- /dev/null +++ b/Documentation/devicetree/bindings/ata/ahci-dm816.txt @@ -0,0 +1,21 @@ +Device tree binding for the TI DM816 AHCI SATA Controller +--------------------------------------------------------- + +Required properties: + - compatible: must be "ti,dm816-ahci" + - reg: physical base address and size of the register region used by + the controller (as defined by the AHCI 1.1 standard) + - interrupts: interrupt specifier (refer to the interrupt binding) + - clocks: list of phandle and clock specifier pairs (or only + phandles for clock providers with '0' defined for + #clock-cells); two clocks must be specified: the functional + clock and an external reference clock + +Example: + + sata: sata@4a140000 { + compatible = "ti,dm816-ahci"; + reg = <0x4a140000 0x10000>; + interrupts = <16>; + clocks = <&sysclk5_ck>, <&sata_refclk>; + }; diff --git a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt new file mode 100644 index 0000000000000000000000000000000000000000..2aa24b8899236882e2c74c99657bfa0d64b091ba --- /dev/null +++ b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.txt @@ -0,0 +1,45 @@ +DT bindings for the Hitachi HD44780 Character LCD Controller + +The Hitachi HD44780 Character LCD Controller is commonly used on character LCDs +that can display one or more lines of text. It exposes an M6800 bus interface, +which can be used in either 4-bit or 8-bit mode. + +Required properties: + - compatible: Must contain "hit,hd44780", + - data-gpios: Must contain an array of either 4 or 8 GPIO specifiers, + referring to the GPIO pins connected to the data signal lines DB0-DB7 + (8-bit mode) or DB4-DB7 (4-bit mode) of the LCD Controller's bus interface, + - enable-gpios: Must contain a GPIO specifier, referring to the GPIO pin + connected to the "E" (Enable) signal line of the LCD Controller's bus + interface, + - rs-gpios: Must contain a GPIO specifier, referring to the GPIO pin + connected to the "RS" (Register Select) signal line of the LCD Controller's + bus interface, + - display-height-chars: Height of the display, in character cells, + - display-width-chars: Width of the display, in character cells. + +Optional properties: + - rw-gpios: Must contain a GPIO specifier, referring to the GPIO pin + connected to the "RW" (Read/Write) signal line of the LCD Controller's bus + interface, + - backlight-gpios: Must contain a GPIO specifier, referring to the GPIO pin + used for enabling the LCD's backlight, + - internal-buffer-width: Internal buffer width (default is 40 for displays + with 1 or 2 lines, and display-width-chars for displays with more than 2 + lines). + +Example: + + auxdisplay { + compatible = "hit,hd44780"; + + data-gpios = <&hc595 0 GPIO_ACTIVE_HIGH>, + <&hc595 1 GPIO_ACTIVE_HIGH>, + <&hc595 2 GPIO_ACTIVE_HIGH>, + <&hc595 3 GPIO_ACTIVE_HIGH>; + enable-gpios = <&hc595 4 GPIO_ACTIVE_HIGH>; + rs-gpios = <&hc595 5 GPIO_ACTIVE_HIGH>; + + display-height-chars = <2>; + display-width-chars = <16>; + }; diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt index 6ae9d82d4c378844505c4b8184b60d429ba95a9e..b5e39af4ddc0d6330dbf19ed768d6f15e1d2ba6c 100644 --- a/Documentation/devicetree/bindings/chosen.txt +++ b/Documentation/devicetree/bindings/chosen.txt @@ -52,3 +52,48 @@ This property is set (currently only on PowerPC, and only needed on book3e) by some versions of kexec-tools to tell the new kernel that it is being booted by kexec, as the booting environment may differ (e.g. a different secondary CPU release mechanism) + +linux,usable-memory-range +------------------------- + +This property (arm64 only) holds a base address and size, describing a +limited region in which memory may be considered available for use by +the kernel. Memory outside of this range is not available for use. + +This property describes a limitation: memory within this range is only +valid when also described through another mechanism that the kernel +would otherwise use to determine available memory (e.g. memory nodes +or the EFI memory map). Valid memory may be sparse within the range. +e.g. + +/ { + chosen { + linux,usable-memory-range = <0x9 0xf0000000 0x0 0x10000000>; + }; +}; + +The main usage is for crash dump kernel to identify its own usable +memory and exclude, at its boot time, any other memory areas that are +part of the panicked kernel's memory. + +While this property does not represent a real hardware, the address +and the size are expressed in #address-cells and #size-cells, +respectively, of the root node. + +linux,elfcorehdr +---------------- + +This property (currently used only on arm64) holds the memory range, +the address and the size, of the elf core header which mainly describes +the panicked kernel's memory layout as PT_LOAD segments of elf format. +e.g. + +/ { + chosen { + linux,elfcorehdr = <0x9 0xfffff000 0x0 0x800>; + }; +}; + +While this property does not represent a real hardware, the address +and the size are expressed in #address-cells and #size-cells, +respectively, of the root node. diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt index ce06435d28ed9a036cdb2283f159acd6b3ac8b4c..a09d627b5508a2f8c9905dc9c02c09387acfe9e6 100644 --- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt +++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt @@ -5,7 +5,8 @@ controllers within the SoC. Required Properties: -- compatible: should be "amlogic,gxbb-clkc" +- compatible: should be "amlogic,gxbb-clkc" for GXBB SoC, + or "amlogic,gxl-clkc" for GXL and GXM SoC. - reg: physical base address of the clock controller and length of memory mapped region. diff --git a/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt index a88f1f05fbd6394dcc451e5bacb77c1220a6f3c3..4c0807f28cfaa119a55a6f48fa061daca01f1f60 100644 --- a/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt +++ b/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt @@ -5,6 +5,7 @@ reading the gpio latch register. This node must be a subnode of the node exposing the register address of the GPIO block where the gpio latch is located. +See Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt Required properties: - compatible : shall be one of the following: @@ -16,9 +17,9 @@ Optional properties: output names ("xtal") Example: -gpio1: gpio@13800 { - compatible = "marvell,armada-3700-gpio", "syscon", "simple-mfd"; - reg = <0x13800 0x1000>; +pinctrl_nb: pinctrl-nb@13800 { + compatible = "armada3710-nb-pinctrl", "syscon", "simple-mfd"; + reg = <0x13800 0x100>, <0x13C00 0x20>; xtalclk: xtal-clk { compatible = "marvell,armada-3700-xtal-clock"; diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt index 87e9c47a89a399fc3d8ef30623697d58cd956741..53d7e50ed875ae0db438af18f9f3354c2548c705 100644 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt @@ -6,18 +6,21 @@ from 3 to 12 output clocks. ==I2C device node== Required properties: -- compatible: shall be one of "idt,5p49v5923" , "idt,5p49v5933". +- compatible: shall be one of "idt,5p49v5923" , "idt,5p49v5933" , + "idt,5p49v5935". - reg: i2c device address, shall be 0x68 or 0x6a. - #clock-cells: from common clock binding; shall be set to 1. - clocks: from common clock binding; list of parent clock handles, - 5p49v5923: (required) either or both of XTAL or CLKIN reference clock. - - 5p49v5933: (optional) property not present (internal + - 5p49v5933 and + - 5p49v5935: (optional) property not present (internal Xtal used) or CLKIN reference clock. - clock-names: from common clock binding; clock input names, can be - 5p49v5923: (required) either or both of "xin", "clkin". - - 5p49v5933: (optional) property not present or "clkin". + - 5p49v5933 and + - 5p49v5935: (optional) property not present or "clkin". ==Mapping between clock specifier and physical pins== @@ -34,6 +37,13 @@ clock specifier, the following mapping applies: 1 -- OUT1 2 -- OUT4 +5P49V5935: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT2 + 3 -- OUT3 + 4 -- OUT4 + ==Example== /* 25MHz reference crystal */ diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt index eb985a633d595b98fe3d861e06b4efe468e25d9c..796c260c183d6af6d202752d453beb6363109c97 100644 --- a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt +++ b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt @@ -31,6 +31,12 @@ The following is a list of provided IDs and clock names on Armada 39x: 4 = dclk (SDRAM Interface Clock) 5 = refclk (Reference Clock) +The following is a list of provided IDs and clock names on 98dx3236: + 0 = tclk (Internal Bus clock) + 1 = cpuclk (CPU clock) + 2 = ddrclk (DDR clock) + 3 = mpll (MPLL Clock) + The following is a list of provided IDs and clock names on Kirkwood and Dove: 0 = tclk (Internal Bus clock) 1 = cpuclk (CPU0 clock) @@ -49,6 +55,7 @@ Required properties: "marvell,armada-380-core-clock" - For Armada 380/385 SoC core clocks "marvell,armada-390-core-clock" - For Armada 39x SoC core clocks "marvell,armada-xp-core-clock" - For Armada XP SoC core clocks + "marvell,mv98dx3236-core-clock" - For 98dx3236 family SoC core clocks "marvell,dove-core-clock" - for Dove SoC core clocks "marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180) "marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC diff --git a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt index 5142efc8099d24c927859cac7ca4d317853bfbdf..de562da2ae77db48639d1a775ca5efd489e5de04 100644 --- a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt +++ b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt @@ -119,6 +119,16 @@ ID Clock Peripheral 29 sata1lnk 30 sata1 SATA Host 1 +The following is a list of provided IDs for 98dx3236: +ID Clock Peripheral +----------------------------------- +3 ge1 Gigabit Ethernet 1 +4 ge0 Gigabit Ethernet 0 +5 pex0 PCIe Cntrl 0 +17 sdio SDHCI Host +18 usb0 USB Host 0 +22 xor0 XOR DMA 0 + The following is a list of provided IDs for Dove: ID Clock Peripheral ----------------------------------- @@ -169,6 +179,7 @@ Required properties: "marvell,armada-380-gating-clock" - for Armada 380/385 SoC clock gating "marvell,armada-390-gating-clock" - for Armada 39x SoC clock gating "marvell,armada-xp-gating-clock" - for Armada XP SoC clock gating + "marvell,mv98dx3236-gating-clock" - for 98dx3236 SoC clock gating "marvell,dove-gating-clock" - for Dove SoC clock gating "marvell,kirkwood-gating-clock" - for Kirkwood SoC clock gating - reg : shall be the register address of the Clock Gating Control register diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt index aa3526f229a721435766bb7ac3d6a82dd14feaa9..6ed469c66b32f42c4bfb62849ce5451618d5067e 100644 --- a/Documentation/devicetree/bindings/clock/qoriq-clock.txt +++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt @@ -35,6 +35,7 @@ Required properties: * "fsl,ls1021a-clockgen" * "fsl,ls1043a-clockgen" * "fsl,ls1046a-clockgen" + * "fsl,ls1088a-clockgen" * "fsl,ls2080a-clockgen" Chassis-version clock strings include: * "fsl,qoriq-clockgen-1.0": for chassis 1.0 clocks diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt deleted file mode 100644 index 4da126116cf007602bee3107c3222a537f7af856..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/clock/rockchip,rk1108-cru.txt +++ /dev/null @@ -1,59 +0,0 @@ -* Rockchip RK1108 Clock and Reset Unit - -The RK1108 clock controller generates and supplies clock to various -controllers within the SoC and also implements a reset controller for SoC -peripherals. - -Required Properties: - -- compatible: should be "rockchip,rk1108-cru" -- reg: physical base address of the controller and length of memory mapped - region. -- #clock-cells: should be 1. -- #reset-cells: should be 1. - -Optional Properties: - -- rockchip,grf: phandle to the syscon managing the "general register files" - If missing pll rates are not changeable, due to the missing pll lock status. - -Each clock is assigned an identifier and client nodes can use this identifier -to specify the clock which they consume. All available clocks are defined as -preprocessor macros in the dt-bindings/clock/rk1108-cru.h headers and can be -used in device tree sources. Similar macros exist for the reset sources in -these files. - -External clocks: - -There are several clocks that are generated outside the SoC. It is expected -that they are defined using standard clock bindings with following -clock-output-names: - - "xin24m" - crystal input - required, - - "ext_vip" - external VIP clock - optional - - "ext_i2s" - external I2S clock - optional - - "ext_gmac" - external GMAC clock - optional - - "hdmiphy" - external clock input derived from HDMI PHY - optional - - "usbphy" - external clock input derived from USB PHY - optional - -Example: Clock controller node: - - cru: cru@20200000 { - compatible = "rockchip,rk1108-cru"; - reg = <0x20200000 0x1000>; - rockchip,grf = <&grf>; - - #clock-cells = <1>; - #reset-cells = <1>; - }; - -Example: UART controller node that consumes the clock generated by the clock - controller: - - uart0: serial@10230000 { - compatible = "rockchip,rk1108-uart", "snps,dw-apb-uart"; - reg = <0x10230000 0x100>; - interrupts = ; - reg-shift = <2>; - reg-io-width = <4>; - clocks = <&cru SCLK_UART0>; - }; diff --git a/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt new file mode 100644 index 0000000000000000000000000000000000000000..161326a4f9c1feebe41d0ee38e8089c769043dd1 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/rockchip,rv1108-cru.txt @@ -0,0 +1,59 @@ +* Rockchip RV1108 Clock and Reset Unit + +The RV1108 clock controller generates and supplies clock to various +controllers within the SoC and also implements a reset controller for SoC +peripherals. + +Required Properties: + +- compatible: should be "rockchip,rv1108-cru" +- reg: physical base address of the controller and length of memory mapped + region. +- #clock-cells: should be 1. +- #reset-cells: should be 1. + +Optional Properties: + +- rockchip,grf: phandle to the syscon managing the "general register files" + If missing pll rates are not changeable, due to the missing pll lock status. + +Each clock is assigned an identifier and client nodes can use this identifier +to specify the clock which they consume. All available clocks are defined as +preprocessor macros in the dt-bindings/clock/rv1108-cru.h headers and can be +used in device tree sources. Similar macros exist for the reset sources in +these files. + +External clocks: + +There are several clocks that are generated outside the SoC. It is expected +that they are defined using standard clock bindings with following +clock-output-names: + - "xin24m" - crystal input - required, + - "ext_vip" - external VIP clock - optional + - "ext_i2s" - external I2S clock - optional + - "ext_gmac" - external GMAC clock - optional + - "hdmiphy" - external clock input derived from HDMI PHY - optional + - "usbphy" - external clock input derived from USB PHY - optional + +Example: Clock controller node: + + cru: cru@20200000 { + compatible = "rockchip,rv1108-cru"; + reg = <0x20200000 0x1000>; + rockchip,grf = <&grf>; + + #clock-cells = <1>; + #reset-cells = <1>; + }; + +Example: UART controller node that consumes the clock generated by the clock + controller: + + uart0: serial@10230000 { + compatible = "rockchip,rv1108-uart", "snps,dw-apb-uart"; + reg = <0x10230000 0x100>; + interrupts = ; + reg-shift = <2>; + reg-io-width = <4>; + clocks = <&cru SCLK_UART0>; + }; diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt index bae5668cf427836642a81ebe2e1d9b8b386bee71..e9c5a1d9834af600de55821bc0077e6235ff0bff 100644 --- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt +++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt @@ -7,9 +7,12 @@ Required properties : - "allwinner,sun8i-a23-ccu" - "allwinner,sun8i-a33-ccu" - "allwinner,sun8i-h3-ccu" + - "allwinner,sun8i-h3-r-ccu" - "allwinner,sun8i-v3s-ccu" - "allwinner,sun9i-a80-ccu" - "allwinner,sun50i-a64-ccu" + - "allwinner,sun50i-a64-r-ccu" + - "allwinner,sun50i-h5-ccu" - reg: Must contain the registers base address and length - clocks: phandle to the oscillators feeding the CCU. Two are needed: @@ -19,7 +22,10 @@ Required properties : - #clock-cells : must contain 1 - #reset-cells : must contain 1 -Example: +For the PRCM CCUs on H3/A64, one more clock is needed: +- "iosc": the SoC's internal frequency oscillator + +Example for generic CCU: ccu: clock@01c20000 { compatible = "allwinner,sun8i-h3-ccu"; reg = <0x01c20000 0x400>; @@ -28,3 +34,13 @@ ccu: clock@01c20000 { #clock-cells = <1>; #reset-cells = <1>; }; + +Example for PRCM CCU: +r_ccu: clock@01f01400 { + compatible = "allwinner,sun50i-a64-r-ccu"; + reg = <0x01f01400 0x100>; + clocks = <&osc24M>, <&osc32k>, <&iosc>; + clock-names = "hosc", "losc", "iosc"; + #clock-cells = <1>; + #reset-cells = <1>; +}; diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt b/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ba92a5e9b3617e0ceb86af03f6cf288d278ae73 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/st,stm32-crc.txt @@ -0,0 +1,16 @@ +* STMicroelectronics STM32 CRC + +Required properties: +- compatible: Should be "st,stm32f7-crc". +- reg: The address and length of the peripheral registers space +- clocks: The input clock of the CRC instance + +Optional properties: none + +Example: + +crc: crc@40023000 { + compatible = "st,stm32f7-crc"; + reg = <0x40023000 0x400>; + clocks = <&rcc 0 12>; +}; diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt index d085ef90d27c1f8b82645177169f71c3eb42d00f..f8e946471a58d7bb6ac07d22cf6a876d8c509410 100644 --- a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt +++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt @@ -202,23 +202,23 @@ Example2 : compatible = "operating-points-v2"; opp-shared; - opp@50000000 { + opp-50000000 { opp-hz = /bits/ 64 <50000000>; opp-microvolt = <800000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <800000>; }; - opp@134000000 { + opp-134000000 { opp-hz = /bits/ 64 <134000000>; opp-microvolt = <800000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <825000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <875000>; }; @@ -292,23 +292,23 @@ Example2 : compatible = "operating-points-v2"; opp-shared; - opp@50000000 { + opp-50000000 { opp-hz = /bits/ 64 <50000000>; opp-microvolt = <900000>; }; - opp@80000000 { + opp-80000000 { opp-hz = /bits/ 64 <80000000>; opp-microvolt = <900000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; opp-microvolt = <1000000>; }; - opp@134000000 { + opp-134000000 { opp-hz = /bits/ 64 <134000000>; opp-microvolt = <1000000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <1000000>; }; @@ -318,19 +318,19 @@ Example2 : compatible = "operating-points-v2"; opp-shared; - opp@50000000 { + opp-50000000 { opp-hz = /bits/ 64 <50000000>; }; - opp@80000000 { + opp-80000000 { opp-hz = /bits/ 64 <80000000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; }; }; @@ -339,19 +339,19 @@ Example2 : compatible = "operating-points-v2"; opp-shared; - opp@50000000 { + opp-50000000 { opp-hz = /bits/ 64 <50000000>; }; - opp@80000000 { + opp-80000000 { opp-hz = /bits/ 64 <80000000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; }; - opp@200000000 { + opp-200000000 { opp-hz = /bits/ 64 <200000000>; }; - opp@300000000 { + opp-300000000 { opp-hz = /bits/ 64 <300000000>; }; }; @@ -360,13 +360,13 @@ Example2 : compatible = "operating-points-v2"; opp-shared; - opp@50000000 { + opp-50000000 { opp-hz = /bits/ 64 <50000000>; }; - opp@80000000 { + opp-80000000 { opp-hz = /bits/ 64 <80000000>; }; - opp@100000000 { + opp-100000000 { opp-hz = /bits/ 64 <100000000>; }; }; diff --git a/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt b/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt index ebc1a914bda339922cd87d6f83883a0a18eaaa24..ec94468b35beca724152ab48c4f7630e3383ef2c 100644 --- a/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt +++ b/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt @@ -1,7 +1,7 @@ Device-Tree bindings for Atmel's HLCDC (High LCD Controller) DRM driver The Atmel HLCDC Display Controller is subdevice of the HLCDC MFD device. -See ../mfd/atmel-hlcdc.txt for more details. +See ../../mfd/atmel-hlcdc.txt for more details. Required properties: - compatible: value should be "atmel,hlcdc-display-controller" diff --git a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt index 7a5c0e204c8edf031c263a4129966445fdd9d0f9..e5a8b363d82972dc64133b69be7cc190f4470f56 100644 --- a/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt +++ b/Documentation/devicetree/bindings/display/imx/fsl,imx-fb.txt @@ -13,6 +13,8 @@ Required nodes: Additional, the display node has to define properties: - bits-per-pixel: Bits per pixel - fsl,pcr: LCDC PCR value + A display node may optionally define + - fsl,aus-mode: boolean to enable AUS mode (only for imx21) Optional properties: - lcd-supply: Regulator for LCD supply voltage. diff --git a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt index b82c004494681341bdef251e9323bb1df4773581..57a8d061006276730b07ff1ebf9867f52841e673 100644 --- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt +++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt @@ -94,6 +94,7 @@ Required properties: * allwinner,sun6i-a31-display-backend * allwinner,sun8i-a33-display-backend - reg: base address and size of the memory-mapped region. + - interrupts: interrupt associated to this IP - clocks: phandles to the clocks feeding the frontend and backend * ahb: the backend interface clock * mod: the backend module clock @@ -265,6 +266,7 @@ fe0: display-frontend@1e00000 { be0: display-backend@1e60000 { compatible = "allwinner,sun5i-a13-display-backend"; reg = <0x01e60000 0x10000>; + interrupts = <47>; clocks = <&ahb_gates 44>, <&de_be_clk>, <&dram_gates 26>; clock-names = "ahb", "mod", diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 0fad7ed2ea191201af1a136d1fee19bd94e67ff2..74e1e8add5a1264bfc2ed8db1b26667ed8906903 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -249,6 +249,19 @@ of the following host1x client modules: See ../pinctrl/nvidia,tegra124-dpaux-padctl.txt for information regarding the DPAUX pad controller bindings. +- vic: Video Image Compositor + - compatible : "nvidia,tegra-vic" + - reg: Physical base address and length of the controller's registers. + - interrupts: The interrupt outputs from the controller. + - clocks: Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. + - clock-names: Must include the following entries: + - vic: clock input for the VIC hardware + - resets: Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. + - reset-names: Must include the following entries: + - vic + Example: / { diff --git a/Documentation/devicetree/bindings/firmware/coreboot.txt b/Documentation/devicetree/bindings/firmware/coreboot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c955703cea87f733494d7ac84d87d1198cf0fb7 --- /dev/null +++ b/Documentation/devicetree/bindings/firmware/coreboot.txt @@ -0,0 +1,33 @@ +COREBOOT firmware information + +The device tree node to communicate the location of coreboot's memory-resident +bookkeeping structures to the kernel. Since coreboot itself cannot boot a +device-tree-based kernel (yet), this node needs to be inserted by a +second-stage bootloader (a coreboot "payload"). + +Required properties: + - compatible: Should be "coreboot" + - reg: Address and length of the following two memory regions, in order: + 1.) The coreboot table. This is a list of variable-sized descriptors + that contain various compile- and run-time generated firmware + parameters. It is identified by the magic string "LBIO" in its first + four bytes. + See coreboot's src/commonlib/include/commonlib/coreboot_tables.h for + details. + 2.) The CBMEM area. This is a downward-growing memory region used by + coreboot to dynamically allocate data structures that remain resident. + It may or may not include the coreboot table as one of its members. It + is identified by a root node descriptor with the magic number + 0xc0389481 that resides in the topmost 8 bytes of the area. + See coreboot's src/include/imd.h for details. + +Example: + firmware { + ranges; + + coreboot { + compatible = "coreboot"; + reg = <0xfdfea000 0x264>, + <0xfdfea000 0x16000>; + } + }; diff --git a/Documentation/devicetree/bindings/fpga/altera-pr-ip.txt b/Documentation/devicetree/bindings/fpga/altera-pr-ip.txt new file mode 100644 index 0000000000000000000000000000000000000000..52a294cf2730537c0b925cb16b0845b85c0195b1 --- /dev/null +++ b/Documentation/devicetree/bindings/fpga/altera-pr-ip.txt @@ -0,0 +1,12 @@ +Altera Arria10 Partial Reconfiguration IP + +Required properties: +- compatible : should contain "altr,a10-pr-ip" +- reg : base address and size for memory mapped io. + +Example: + + fpga_mgr: fpga-mgr@ff20c000 { + compatible = "altr,a10-pr-ip"; + reg = <0xff20c000 0x10>; + }; diff --git a/Documentation/devicetree/bindings/fpga/fpga-region.txt b/Documentation/devicetree/bindings/fpga/fpga-region.txt index 3b32ba15a71730838fb33bebb1365013b3c3c74d..6db8aeda461aebc0155326ae68a136fce19e2898 100644 --- a/Documentation/devicetree/bindings/fpga/fpga-region.txt +++ b/Documentation/devicetree/bindings/fpga/fpga-region.txt @@ -186,12 +186,15 @@ Optional properties: otherwise full reconfiguration is done. - external-fpga-config : boolean, set if the FPGA has already been configured prior to OS boot up. +- encrypted-fpga-config : boolean, set if the bitstream is encrypted - region-unfreeze-timeout-us : The maximum time in microseconds to wait for bridges to successfully become enabled after the region has been programmed. - region-freeze-timeout-us : The maximum time in microseconds to wait for bridges to successfully become disabled before the region has been programmed. +- config-complete-timeout-us : The maximum time in microseconds time for the + FPGA to go to operating mode after the region has been programmed. - child nodes : devices in the FPGA after programming. In the example below, when an overlay is applied targeting fpga-region0, diff --git a/Documentation/devicetree/bindings/fpga/lattice-ice40-fpga-mgr.txt b/Documentation/devicetree/bindings/fpga/lattice-ice40-fpga-mgr.txt new file mode 100644 index 0000000000000000000000000000000000000000..4dc412437b086d6470038f60480b7af03ddf7e3d --- /dev/null +++ b/Documentation/devicetree/bindings/fpga/lattice-ice40-fpga-mgr.txt @@ -0,0 +1,21 @@ +Lattice iCE40 FPGA Manager + +Required properties: +- compatible: Should contain "lattice,ice40-fpga-mgr" +- reg: SPI chip select +- spi-max-frequency: Maximum SPI frequency (>=1000000, <=25000000) +- cdone-gpios: GPIO input connected to CDONE pin +- reset-gpios: Active-low GPIO output connected to CRESET_B pin. Note + that unless the GPIO is held low during startup, the + FPGA will enter Master SPI mode and drive SCK with a + clock signal potentially jamming other devices on the + bus until the firmware is loaded. + +Example: + fpga: fpga@0 { + compatible = "lattice,ice40-fpga-mgr"; + reg = <0>; + spi-max-frequency = <1000000>; + cdone-gpios = <&gpio 24 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio 22 GPIO_ACTIVE_LOW>; + }; diff --git a/Documentation/devicetree/bindings/fpga/xilinx-slave-serial.txt b/Documentation/devicetree/bindings/fpga/xilinx-slave-serial.txt new file mode 100644 index 0000000000000000000000000000000000000000..9766f7472f512fec9652fb15d8d74f1f1f5133c7 --- /dev/null +++ b/Documentation/devicetree/bindings/fpga/xilinx-slave-serial.txt @@ -0,0 +1,44 @@ +Xilinx Slave Serial SPI FPGA Manager + +Xilinx Spartan-6 FPGAs support a method of loading the bitstream over +what is referred to as "slave serial" interface. +The slave serial link is not technically SPI, and might require extra +circuits in order to play nicely with other SPI slaves on the same bus. + +See https://www.xilinx.com/support/documentation/user_guides/ug380.pdf + +Required properties: +- compatible: should contain "xlnx,fpga-slave-serial" +- reg: spi chip select of the FPGA +- prog_b-gpios: config pin (referred to as PROGRAM_B in the manual) +- done-gpios: config status pin (referred to as DONE in the manual) + +Example for full FPGA configuration: + + fpga-region0 { + compatible = "fpga-region"; + fpga-mgr = <&fpga_mgr_spi>; + #address-cells = <0x1>; + #size-cells = <0x1>; + }; + + spi1: spi@10680 { + compatible = "marvell,armada-xp-spi", "marvell,orion-spi"; + pinctrl-0 = <&spi0_pins>; + pinctrl-names = "default"; + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + interrupts = <92>; + clocks = <&coreclk 0>; + status = "okay"; + + fpga_mgr_spi: fpga-mgr@0 { + compatible = "xlnx,fpga-slave-serial"; + spi-max-frequency = <60000000>; + spi-cpha; + reg = <0>; + done-gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>; + prog_b-gpios = <&gpio0 29 GPIO_ACTIVE_LOW>; + }; + }; diff --git a/Documentation/devicetree/bindings/gpio/cortina,gemini-gpio.txt b/Documentation/devicetree/bindings/gpio/cortina,gemini-gpio.txt deleted file mode 100644 index 5c9246c054e5bae6c0c552c52e76f84afa3eceb4..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/gpio/cortina,gemini-gpio.txt +++ /dev/null @@ -1,24 +0,0 @@ -Cortina Systems Gemini GPIO Controller - -Required properties: - -- compatible : Must be "cortina,gemini-gpio" -- reg : Should contain registers location and length -- interrupts : Should contain the interrupt line for the GPIO block -- gpio-controller : marks this as a GPIO controller -- #gpio-cells : Should be 2, see gpio/gpio.txt -- interrupt-controller : marks this as an interrupt controller -- #interrupt-cells : a standard two-cell interrupt flag, see - interrupt-controller/interrupts.txt - -Example: - -gpio@4d000000 { - compatible = "cortina,gemini-gpio"; - reg = <0x4d000000 0x100>; - interrupts = <22 IRQ_TYPE_LEVEL_HIGH>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; -}; diff --git a/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.txt b/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.txt new file mode 100644 index 0000000000000000000000000000000000000000..d04236558619d84b0b1b99351ac72803ac4b2ca9 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/faraday,ftgpio010.txt @@ -0,0 +1,27 @@ +Faraday Technology FTGPIO010 GPIO Controller + +Required properties: + +- compatible : Should be one of + "cortina,gemini-gpio", "faraday,ftgpio010" + "moxa,moxart-gpio", "faraday,ftgpio010" + "faraday,ftgpio010" +- reg : Should contain registers location and length +- interrupts : Should contain the interrupt line for the GPIO block +- gpio-controller : marks this as a GPIO controller +- #gpio-cells : Should be 2, see gpio/gpio.txt +- interrupt-controller : marks this as an interrupt controller +- #interrupt-cells : a standard two-cell interrupt flag, see + interrupt-controller/interrupts.txt + +Example: + +gpio@4d000000 { + compatible = "cortina,gemini-gpio", "faraday,ftgpio010"; + reg = <0x4d000000 0x100>; + interrupts = <22 IRQ_TYPE_LEVEL_HIGH>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; +}; diff --git a/Documentation/devicetree/bindings/gpio/gpio-aspeed.txt b/Documentation/devicetree/bindings/gpio/gpio-aspeed.txt index 393bb2ed8a774276bf1922ce63e018ab21985ca2..c756afa88cc62359323cfe3f0cc1171a31e955f4 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-aspeed.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-aspeed.txt @@ -17,7 +17,8 @@ Required properties: Optional properties: -- interrupt-parent : The parent interrupt controller, optional if inherited +- interrupt-parent : The parent interrupt controller, optional if inherited +- clocks : A phandle to the HPLL clock node for debounce timings The gpio and interrupt properties are further described in their respective bindings documentation: diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt index a6f3bec1da7d0f46d6f0ecf22fcec20232b354dc..42c3bb2d53e88b651a7d39efe00e278c24d9c9b3 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt @@ -38,6 +38,24 @@ Required properties: - #gpio-cells: Should be two. The first cell is the pin number. The second cell is reserved for flags, unused at the moment. +Optional properties: + +In order to use the GPIO lines in PWM mode, some additional optional +properties are required. Only Armada 370 and XP support these properties. + +- compatible: Must contain "marvell,armada-370-xp-gpio" + +- reg: an additional register set is needed, for the GPIO Blink + Counter on/off registers. + +- reg-names: Must contain an entry "pwm" corresponding to the + additional register range needed for PWM operation. + +- #pwm-cells: Should be two. The first cell is the GPIO line number. The + second cell is the period in nanoseconds. + +- clocks: Must be a phandle to the clock for the GPIO controller. + Example: gpio0: gpio@d0018100 { @@ -51,3 +69,17 @@ Example: #interrupt-cells = <2>; interrupts = <16>, <17>, <18>, <19>; }; + + gpio1: gpio@18140 { + compatible = "marvell,armada-370-xp-gpio"; + reg = <0x18140 0x40>, <0x181c8 0x08>; + reg-names = "gpio", "pwm"; + ngpios = <17>; + gpio-controller; + #gpio-cells = <2>; + #pwm-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + interrupts = <87>, <88>, <89>; + clocks = <&coreclk 0>; + }; diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt index e6393571001144bba965d0d72e3d4c50a0b9da8e..7f57271df2bc96b41168d337c911511ccb726e33 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt @@ -26,6 +26,7 @@ Required properties: ti,tca6416 ti,tca6424 ti,tca9539 + ti,tca9554 onsemi,pca9654 exar,xra1202 diff --git a/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt b/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt index ada4e29733234aa0f5c0afe8cf20e2ac4b8de173..7d3bd631d0114850d3598a9a64824e8af906d6ee 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt @@ -25,7 +25,6 @@ Required Properties: - "nxp,pcf8574": For the NXP PCF8574 - "nxp,pcf8574a": For the NXP PCF8574A - "nxp,pcf8575": For the NXP PCF8575 - - "ti,tca9554": For the TI TCA9554 - reg: I2C slave address. diff --git a/Documentation/devicetree/bindings/gpio/gpio-thunderx.txt b/Documentation/devicetree/bindings/gpio/gpio-thunderx.txt new file mode 100644 index 0000000000000000000000000000000000000000..3f883ae29d116887e702ead20b26a25f9d2349d5 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-thunderx.txt @@ -0,0 +1,27 @@ +Cavium ThunderX/OCTEON-TX GPIO controller bindings + +Required Properties: +- reg: The controller bus address. +- gpio-controller: Marks the device node as a GPIO controller. +- #gpio-cells: Must be 2. + - First cell is the GPIO pin number relative to the controller. + - Second cell is a standard generic flag bitfield as described in gpio.txt. + +Optional Properties: +- compatible: "cavium,thunder-8890-gpio", unused as PCI driver binding is used. +- interrupt-controller: Marks the device node as an interrupt controller. +- #interrupt-cells: Must be present and have value of 2 if + "interrupt-controller" is present. + - First cell is the GPIO pin number relative to the controller. + - Second cell is triggering flags as defined in interrupts.txt. + +Example: + +gpio_6_0: gpio@6,0 { + compatible = "cavium,thunder-8890-gpio"; + reg = <0x3000 0 0 0 0>; /* DEVFN = 0x30 (6:0) */ + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; +}; diff --git a/Documentation/devicetree/bindings/gpio/gpio-xra1403.txt b/Documentation/devicetree/bindings/gpio/gpio-xra1403.txt new file mode 100644 index 0000000000000000000000000000000000000000..e13cc399b363f24aae0083fc34b09bd434aa78cd --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-xra1403.txt @@ -0,0 +1,46 @@ +GPIO Driver for XRA1403 16-BIT GPIO Expander With Reset Input from EXAR + +The XRA1403 is an 16-bit GPIO expander with an SPI interface. Features available: + - Individually programmable inputs: + - Internal pull-up resistors + - Polarity inversion + - Individual interrupt enable + - Rising edge and/or Falling edge interrupt + - Input filter + - Individually programmable outputs + - Output Level Control + - Output Three-State Control + +Properties +---------- +Check documentation for SPI and GPIO controllers regarding properties needed to configure the node. + + - compatible = "exar,xra1403". + - reg - SPI id of the device. + - gpio-controller - marks the node as gpio. + - #gpio-cells - should be two where the first cell is the pin number + and the second one is used for optional parameters. + +Optional properties: +------------------- + - reset-gpios: in case available used to control the device reset line. + - interrupt-controller - marks the node as interrupt controller. + - #interrupt-cells - should be two and represents the number of cells + needed to encode interrupt source. + +Example +-------- + + gpioxra0: gpio@2 { + compatible = "exar,xra1403"; + reg = <2>; + + gpio-controller; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + reset-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>; + spi-max-frequency = <1000000>; + }; diff --git a/Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt b/Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt deleted file mode 100644 index f8e8f185a3dbacbd60641cd2b70da3b0c2c1e32a..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt +++ /dev/null @@ -1,19 +0,0 @@ -MOXA ART GPIO Controller - -Required properties: - -- #gpio-cells : Should be 2, The first cell is the pin number, - the second cell is used to specify polarity: - 0 = active high - 1 = active low -- compatible : Must be "moxa,moxart-gpio" -- reg : Should contain registers location and length - -Example: - - gpio: gpio@98700000 { - gpio-controller; - #gpio-cells = <2>; - compatible = "moxa,moxart-gpio"; - reg = <0x98700000 0xC>; - }; diff --git a/Documentation/devicetree/bindings/gpio/ni,169445-nand-gpio.txt b/Documentation/devicetree/bindings/gpio/ni,169445-nand-gpio.txt new file mode 100644 index 0000000000000000000000000000000000000000..ca2f8c745a279cfea1375dc68dceff29a63f8de5 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/ni,169445-nand-gpio.txt @@ -0,0 +1,38 @@ +Bindings for the National Instruments 169445 GPIO NAND controller + +The 169445 GPIO NAND controller has two memory mapped GPIO registers, one +for input (the ready signal) and one for output (control signals). It is +intended to be used with the GPIO NAND driver. + +Required properties: + - compatible: should be "ni,169445-nand-gpio" + - reg-names: must contain + "dat" - data register + - reg: address + size pairs describing the GPIO register sets; + order must correspond with the order of entries in reg-names + - #gpio-cells: must be set to 2. The first cell is the pin number and + the second cell is used to specify the gpio polarity: + 0 = active high + 1 = active low + - gpio-controller: Marks the device node as a gpio controller. + +Optional properties: + - no-output: disables driving output on the pins + +Examples: + gpio1: nand-gpio-out@1f300010 { + compatible = "ni,169445-nand-gpio"; + reg = <0x1f300010 0x4>; + reg-names = "dat"; + gpio-controller; + #gpio-cells = <2>; + }; + + gpio2: nand-gpio-in@1f300014 { + compatible = "ni,169445-nand-gpio"; + reg = <0x1f300014 0x4>; + reg-names = "dat"; + gpio-controller; + #gpio-cells = <2>; + no-output; + }; diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt index 476f5ea6c6276a15c52ffc87a7a1033c6d173a26..2b6243e730f690f9bbf2f0c50295b91c0699129f 100644 --- a/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt +++ b/Documentation/devicetree/bindings/gpu/arm,mali-utgard.txt @@ -35,6 +35,14 @@ Optional properties: - interrupt-names and interrupts: * pmu: Power Management Unit interrupt, if implemented in hardware + - memory-region: + Memory region to allocate from, as defined in + Documentation/devicetree/bindi/reserved-memory/reserved-memory.txt + + - operating-points-v2: + Operating Points for the GPU, as defined in + Documentation/devicetree/bindings/opp/opp.txt + Vendor-specific bindings ------------------------ diff --git a/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt index ff3db65e50def8224096107eaf05827df884ca25..b7e4c74445108dcd2c966529a85a4a930b40e831 100644 --- a/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt +++ b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt @@ -5,6 +5,7 @@ Required properties: Currently recognized values: - nvidia,gk20a - nvidia,gm20b + - nvidia,gp10b - reg: Physical base address and length of the controller's registers. Must contain two entries: - first entry for bar0 @@ -14,7 +15,8 @@ Required properties: - interrupt-names: Must include the following entries: - stall - nonstall -- vdd-supply: regulator for supply voltage. +- vdd-supply: regulator for supply voltage. Only required for GPUs not using + power domains. - clocks: Must contain an entry for each entry in clock-names. See ../clocks/clock-bindings.txt for details. - clock-names: Must include the following entries: @@ -27,6 +29,8 @@ is also required: See ../reset/reset.txt for details. - reset-names: Must include the following entries: - gpu +- power-domains: GPUs that make use of power domains can define this property + instead of vdd-supply. Currently "nvidia,gp10b" makes use of this. Optional properties: - iommus: A reference to the IOMMU. See ../iommu/iommu.txt for details. @@ -68,3 +72,22 @@ Example for GM20B: iommus = <&mc TEGRA_SWGROUP_GPU>; status = "disabled"; }; + +Example for GP10B: + + gpu@17000000 { + compatible = "nvidia,gp10b"; + reg = <0x0 0x17000000 0x0 0x1000000>, + <0x0 0x18000000 0x0 0x1000000>; + interrupts = ; + interrupt-names = "stall", "nonstall"; + clocks = <&bpmp TEGRA186_CLK_GPCCLK>, + <&bpmp TEGRA186_CLK_GPU>; + clock-names = "gpu", "pwr"; + resets = <&bpmp TEGRA186_RESET_GPU>; + reset-names = "gpu"; + power-domains = <&bpmp TEGRA186_POWER_DOMAIN_GPU>; + iommus = <&smmu TEGRA186_SID_GPU>; + status = "disabled"; + }; diff --git a/Documentation/devicetree/bindings/hwmon/ads7828.txt b/Documentation/devicetree/bindings/hwmon/ads7828.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe0cc4ad7ea9f289d650e71895145fbaeb2c7b02 --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/ads7828.txt @@ -0,0 +1,25 @@ +ads7828 properties + +Required properties: +- compatible: Should be one of + ti,ads7828 + ti,ads7830 +- reg: I2C address + +Optional properties: + +- ti,differential-input + Set to use the device in differential mode. +- vref-supply + The external reference on the device is set to this regulators output. If it + does not exists the internal reference will be used and output by the ads78xx + on the "external vref" pin. + + Example ADS7828 node: + + ads7828: ads@48 { + comatible = "ti,ads7828"; + reg = <0x48>; + vref-supply = <&vref>; + ti,differential-input; + }; diff --git a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf4460564adb20941f127008fdb0ec235dc5f7ff --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt @@ -0,0 +1,68 @@ +ASPEED AST2400/AST2500 PWM and Fan Tacho controller device driver + +The ASPEED PWM controller can support upto 8 PWM outputs. The ASPEED Fan Tacho +controller can support upto 16 Fan tachometer inputs. + +There can be upto 8 fans supported. Each fan can have one PWM output and +one/two Fan tach inputs. + +Required properties for pwm-tacho node: +- #address-cells : should be 1. + +- #size-cells : should be 1. + +- reg : address and length of the register set for the device. + +- pinctrl-names : a pinctrl state named "default" must be defined. + +- pinctrl-0 : phandle referencing pin configuration of the PWM ports. + +- compatible : should be "aspeed,ast2400-pwm-tacho" for AST2400 and + "aspeed,ast2500-pwm-tacho" for AST2500. + +- clocks : a fixed clock providing input clock frequency(PWM + and Fan Tach clock) + +fan subnode format: +=================== +Under fan subnode there can upto 8 child nodes, with each child node +representing a fan. If there are 8 fans each fan can have one PWM port and +one/two Fan tach inputs. + +Required properties for each child node: +- reg : should specify PWM source port. + integer value in the range 0 to 7 with 0 indicating PWM port A and + 7 indicating PWM port H. + +- aspeed,fan-tach-ch : should specify the Fan tach input channel. + integer value in the range 0 through 15, with 0 indicating + Fan tach channel 0 and 15 indicating Fan tach channel 15. + Atleast one Fan tach input channel is required. + +Examples: + +pwm_tacho_fixed_clk: fixedclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; +}; + +pwm_tacho: pwmtachocontroller@1e786000 { + #address-cells = <1>; + #size-cells = <1>; + reg = <0x1E786000 0x1000>; + compatible = "aspeed,ast2500-pwm-tacho"; + clocks = <&pwm_tacho_fixed_clk>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_pwm0_default &pinctrl_pwm1_default>; + + fan@0 { + reg = <0x00>; + aspeed,fan-tach-ch = /bits/ 8 <0x00>; + }; + + fan@1 { + reg = <0x01>; + aspeed,fan-tach-ch = /bits/ 8 <0x01 0x02>; + }; +}; diff --git a/Documentation/devicetree/bindings/hwmon/lm87.txt b/Documentation/devicetree/bindings/hwmon/lm87.txt new file mode 100644 index 0000000000000000000000000000000000000000..e1b79903f2043d6e3c16876b6651bb263afbb9fd --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/lm87.txt @@ -0,0 +1,30 @@ +*LM87 hwmon sensor. + +Required properties: +- compatible: Should be + "ti,lm87" + +- reg: I2C address + +optional properties: +- has-temp3: This configures pins 18 and 19 to be used as a second + remote temperature sensing channel. By default the pins + are configured as voltage input pins in0 and in5. + +- has-in6: When set, pin 5 is configured to be used as voltage input + in6. Otherwise the pin is set as FAN1 input. + +- has-in7: When set, pin 6 is configured to be used as voltage input + in7. Otherwise the pin is set as FAN2 input. + +- vcc-supply: a Phandle for the regulator supplying power, can be + cofigured to measure 5.0V power supply. Default is 3.3V. + +Example: + +lm87@2e { + compatible = "ti,lm87"; + reg = <0x2e>; + has-temp3; + vcc-supply = <®_5v0>; +}; diff --git a/Documentation/devicetree/bindings/i2c/i2c-meson.txt b/Documentation/devicetree/bindings/i2c/i2c-meson.txt index 386357d1aab05de652675bf6ed19c0dc22b50709..611b934c7e104d4ec0b844bbca59b54c45471009 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-meson.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-meson.txt @@ -8,6 +8,8 @@ Required properties: - #address-cells: should be <1> - #size-cells: should be <0> +For details regarding the following core I2C bindings see also i2c.txt. + Optional properties: - clock-frequency: the desired I2C bus clock frequency in Hz; in absence of this property the default value is used (100 kHz). diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e98c6b3a721bfe6b92386678027a38641865ab8 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt @@ -0,0 +1,61 @@ +* Linear Technology / Analog Devices I2C bus switch + +Required Properties: + + - compatible: Must contain one of the following. + "lltc,ltc4305", "lltc,ltc4306" + - reg: The I2C address of the device. + + The following required properties are defined externally: + + - Standard I2C mux properties. See i2c-mux.txt in this directory. + - I2C child bus nodes. See i2c-mux.txt in this directory. + +Optional Properties: + + - enable-gpios: Reference to the GPIO connected to the enable input. + - i2c-mux-idle-disconnect: Boolean; if defined, forces mux to disconnect all + children in idle state. This is necessary for example, if there are several + multiplexers on the bus and the devices behind them use same I2C addresses. + - gpio-controller: Marks the device node as a GPIO Controller. + - #gpio-cells: Should be two. The first cell is the pin number and + the second cell is used to specify flags. + See ../gpio/gpio.txt for more information. + - ltc,downstream-accelerators-enable: Enables the rise time accelerators + on the downstream port. + - ltc,upstream-accelerators-enable: Enables the rise time accelerators + on the upstream port. + +Example: + + ltc4306: i2c-mux@4a { + compatible = "lltc,ltc4306"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x4a>; + + gpio-controller; + #gpio-cells = <2>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + eeprom@50 { + compatible = "at,24c02"; + reg = <0x50>; + }; + }; + + i2c@1 { + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + + eeprom@50 { + compatible = "at,24c02"; + reg = <0x50>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt b/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt index bbc5a1ed5fa188557a4c45828ea8ba5d65f46893..e18445d0980cc6ab5d2b0d6b3b10fb35e6d3cff3 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt @@ -11,6 +11,7 @@ Required properties : - "rockchip,rk3188-i2c": for rk3188 - "rockchip,rk3228-i2c": for rk3228 - "rockchip,rk3288-i2c": for rk3288 + - "rockchip,rk3328-i2c", "rockchip,rk3399-i2c": for rk3328 - "rockchip,rk3399-i2c": for rk3399 - interrupts : interrupt number - clocks: See ../clock/clock-bindings.txt diff --git a/Documentation/devicetree/bindings/iio/accel/adxl345.txt b/Documentation/devicetree/bindings/iio/accel/adxl345.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7111b02c02c46ba7635d61f2f4b0abf2ccc047c --- /dev/null +++ b/Documentation/devicetree/bindings/iio/accel/adxl345.txt @@ -0,0 +1,38 @@ +Analog Devices ADXL345 3-Axis, +/-(2g/4g/8g/16g) Digital Accelerometer + +http://www.analog.com/en/products/mems/accelerometers/adxl345.html + +Required properties: + - compatible : should be "adi,adxl345" + - reg : the I2C address or SPI chip select number of the sensor + +Required properties for SPI bus usage: + - spi-max-frequency : set maximum clock frequency, must be 5000000 + - spi-cpol and spi-cpha : must be defined for adxl345 to enable SPI mode 3 + +Optional properties: + - interrupt-parent : phandle to the parent interrupt controller as documented + in Documentation/devicetree/bindings/interrupt-controller/interrupts.txt + - interrupts: interrupt mapping for IRQ as documented in + Documentation/devicetree/bindings/interrupt-controller/interrupts.txt + +Example for a I2C device node: + + accelerometer@2a { + compatible = "adi,adxl345"; + reg = <0x53>; + interrupt-parent = <&gpio1>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; + +Example for a SPI device node: + + accelerometer@0 { + compatible = "adi,adxl345"; + reg = <0>; + spi-max-frequency = <5000000>; + spi-cpol; + spi-cpha; + interrupt-parent = <&gpio1>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; diff --git a/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt b/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt index f9e3ff2c656e538c40cdc6e9826c6f72eda70b9b..047189192aec2691f93cb28120bd4fe1a5746ef6 100644 --- a/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt +++ b/Documentation/devicetree/bindings/iio/adc/amlogic,meson-saradc.txt @@ -7,6 +7,7 @@ Required properties: - "amlogic,meson-gxm-saradc" for GXM along with the generic "amlogic,meson-saradc" - reg: the physical base address and length of the registers +- interrupts: the interrupt indicating end of sampling - clocks: phandle and clock identifier (see clock-names) - clock-names: mandatory clocks: - "clkin" for the reference clock (typically XTAL) @@ -23,6 +24,7 @@ Example: compatible = "amlogic,meson-gxl-saradc", "amlogic,meson-saradc"; #io-channel-cells = <1>; reg = <0x0 0x8680 0x0 0x34>; + interrupts = ; clocks = <&xtal>, <&clkc CLKID_SAR_ADC>, <&clkc CLKID_SANA>, diff --git a/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt b/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt new file mode 100644 index 0000000000000000000000000000000000000000..674e133b7cd7d72aeb81efb4f7cb304ce0204644 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/aspeed_adc.txt @@ -0,0 +1,20 @@ +Aspeed ADC + +This device is a 10-bit converter for 16 voltage channels. All inputs are +single ended. + +Required properties: +- compatible: Should be "aspeed,ast2400-adc" or "aspeed,ast2500-adc" +- reg: memory window mapping address and length +- clocks: Input clock used to derive the sample clock. Expected to be the + SoC's APB clock. +- #io-channel-cells: Must be set to <1> to indicate channels are selected + by index. + +Example: + adc@1e6e9000 { + compatible = "aspeed,ast2400-adc"; + reg = <0x1e6e9000 0xb0>; + clocks = <&clk_apb>; + #io-channel-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/iio/adc/cpcap-adc.txt b/Documentation/devicetree/bindings/iio/adc/cpcap-adc.txt new file mode 100644 index 0000000000000000000000000000000000000000..487ea966858e94eb8a7d6ffe9420cadd1a8bdc77 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/cpcap-adc.txt @@ -0,0 +1,18 @@ +Motorola CPCAP PMIC ADC binding + +Required properties: +- compatible: Should be "motorola,cpcap-adc" or "motorola,mapphone-cpcap-adc" +- interrupt-parent: The interrupt controller +- interrupts: The interrupt number for the ADC device +- interrupt-names: Should be "adcdone" +- #io-channel-cells: Number of cells in an IIO specifier + +Example: + +cpcap_adc: adc { + compatible = "motorola,mapphone-cpcap-adc"; + interrupt-parent = <&cpcap>; + interrupts = <8 IRQ_TYPE_NONE>; + interrupt-names = "adcdone"; + #io-channel-cells = <1>; +}; diff --git a/Documentation/devicetree/bindings/iio/adc/ltc2497.txt b/Documentation/devicetree/bindings/iio/adc/ltc2497.txt new file mode 100644 index 0000000000000000000000000000000000000000..a237ed99c0d8d5041a4105b80a8d2523e37bc4b7 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/ltc2497.txt @@ -0,0 +1,13 @@ +* Linear Technology / Analog Devices LTC2497 ADC + +Required properties: + - compatible: Must be "lltc,ltc2497" + - reg: Must contain the ADC I2C address + - vref-supply: The regulator supply for ADC reference voltage + +Example: + ltc2497: adc@76 { + compatible = "lltc,ltc2497"; + reg = <0x76>; + vref-supply = <<c2497_reg>; + }; diff --git a/Documentation/devicetree/bindings/iio/adc/max1118.txt b/Documentation/devicetree/bindings/iio/adc/max1118.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf33d0b15a6d714024512b6d2f9e881f1ea9f6b1 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/max1118.txt @@ -0,0 +1,21 @@ +* MAX1117/MAX1118/MAX1119 8-bit, dual-channel ADCs + +Required properties: + - compatible: Should be one of + * "maxim,max1117" + * "maxim,max1118" + * "maxim,max1119" + - reg: spi chip select number for the device + - (max1118 only) vref-supply: The regulator supply for ADC reference voltage + +Recommended properties: + - spi-max-frequency: Definition as per + Documentation/devicetree/bindings/spi/spi-bus.txt + +Example: +adc@0 { + compatible = "maxim,max1118"; + reg = <0>; + vref-supply = <&vdd_supply>; + spi-max-frequency = <1000000>; +}; diff --git a/Documentation/devicetree/bindings/iio/adc/max9611.txt b/Documentation/devicetree/bindings/iio/adc/max9611.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab4f43145ae578f37f030bbdeb4080a138762a0f --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/max9611.txt @@ -0,0 +1,27 @@ +* Maxim max9611/max9612 current sense amplifier with 12-bits ADC interface + +Maxim max9611/max9612 is an high-side current sense amplifier with integrated +12-bits ADC communicating over I2c bus. +The device node for this driver shall be a child of a I2c controller. + +Required properties + - compatible: Should be "maxim,max9611" or "maxim,max9612" + - reg: The 7-bits long I2c address of the device + - shunt-resistor-micro-ohms: Value, in micro Ohms, of the current sense shunt + resistor + +Example: + +&i2c4 { + csa: adc@7c { + compatible = "maxim,max9611"; + reg = <0x7c>; + + shunt-resistor-micro-ohms = <5000>; + }; +}; + +This device node describes a current sense amplifier sitting on I2c4 bus +with address 0x7c (read address is 0xf9, write address is 0xf8). +A sense resistor of 0,005 Ohm is installed between RS+ and RS- current-sensing +inputs. diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,pm8xxx-xoadc.txt b/Documentation/devicetree/bindings/iio/adc/qcom,pm8xxx-xoadc.txt index 53cd146d8096becd5f85a6bdac7f06351a60c331..3ae06127789e30654792851c79b51fe3f53863e0 100644 --- a/Documentation/devicetree/bindings/iio/adc/qcom,pm8xxx-xoadc.txt +++ b/Documentation/devicetree/bindings/iio/adc/qcom,pm8xxx-xoadc.txt @@ -19,32 +19,42 @@ Required properties: with PMIC variant but is typically something like 2.2 or 1.8V. The following required properties are standard for IO channels, see -iio-bindings.txt for more details: +iio-bindings.txt for more details, but notice that this particular +ADC has a special addressing scheme that require two cells for +identifying each ADC channel: -- #address-cells: should be set to <1> +- #address-cells: should be set to <2>, the first cell is the + prescaler (on PM8058) or premux (on PM8921) with two valid bits + so legal values are 0x00, 0x01 or 0x02. The second cell + is the main analog mux setting (0x00..0x0f). The combination + of prescaler/premux and analog mux uniquely addresses a hardware + channel on all systems. - #size-cells: should be set to <0> -- #io-channel-cells: should be set to <1> +- #io-channel-cells: should be set to <2>, again the cells are + precaler or premux followed by the analog muxing line. - interrupts: should refer to the parent PMIC interrupt controller and reference the proper ADC interrupt. Required subnodes: -The ADC channels are configured as subnodes of the ADC. Since some of -them are used for calibrating the ADC, these nodes are compulsory: +The ADC channels are configured as subnodes of the ADC. + +Since some of them are used for calibrating the ADC, these nodes are +compulsory: adc-channel@c { - reg = <0x0c>; + reg = <0x00 0x0c>; }; adc-channel@d { - reg = <0x0d>; + reg = <0x00 0x0d>; }; adc-channel@f { - reg = <0x0f>; + reg = <0x00 0x0f>; }; These three nodes are used for absolute and ratiometric calibration @@ -52,13 +62,13 @@ and only need to have these reg values: they are by hardware definition 1:1 ratio converters that sample 625, 1250 and 0 milliV and create an interpolation calibration for all other ADCs. -Optional subnodes: any channels other than channel 0x0c, 0x0d and -0x0f are optional. +Optional subnodes: any channels other than channels [0x00 0x0c], +[0x00 0x0d] and [0x00 0x0f] are optional. Required channel node properties: - reg: should contain the hardware channel number in the range - 0 .. 0x0f (4 bits). The hardware only supports 16 channels. + 0 .. 0xff (8 bits). Optional channel node properties: @@ -94,56 +104,54 @@ Example: xoadc: xoadc@197 { compatible = "qcom,pm8058-adc"; reg = <0x197>; - interrupt-parent = <&pm8058>; - interrupts = <76 1>; - #address-cells = <1>; + interrupts-extended = <&pm8058 76 IRQ_TYPE_EDGE_RISING>; + #address-cells = <2>; #size-cells = <0>; - #io-channel-cells = <1>; + #io-channel-cells = <2>; vcoin: adc-channel@0 { - reg = <0x00>; + reg = <0x00 0x00>; }; vbat: adc-channel@1 { - reg = <0x01>; + reg = <0x00 0x01>; }; dcin: adc-channel@2 { - reg = <0x02>; + reg = <0x00 0x02>; }; ichg: adc-channel@3 { - reg = <0x03>; + reg = <0x00 0x03>; }; vph_pwr: adc-channel@4 { - reg = <0x04>; + reg = <0x00 0x04>; }; usb_vbus: adc-channel@a { - reg = <0x0a>; + reg = <0x00 0x0a>; }; die_temp: adc-channel@b { - reg = <0x0b>; + reg = <0x00 0x0b>; }; ref_625mv: adc-channel@c { - reg = <0x0c>; + reg = <0x00 0x0c>; }; ref_1250mv: adc-channel@d { - reg = <0x0d>; + reg = <0x00 0x0d>; }; ref_325mv: adc-channel@e { - reg = <0x0e>; + reg = <0x00 0x0e>; }; ref_muxoff: adc-channel@f { - reg = <0x0f>; + reg = <0x00 0x0f>; }; }; - /* IIO client node */ iio-hwmon { compatible = "iio-hwmon"; - io-channels = <&xoadc 0x01>, /* Battery */ - <&xoadc 0x02>, /* DC in (charger) */ - <&xoadc 0x04>, /* VPH the main system voltage */ - <&xoadc 0x0b>, /* Die temperature */ - <&xoadc 0x0c>, /* Reference voltage 1.25V */ - <&xoadc 0x0d>, /* Reference voltage 0.625V */ - <&xoadc 0x0e>; /* Reference voltage 0.325V */ + io-channels = <&xoadc 0x00 0x01>, /* Battery */ + <&xoadc 0x00 0x02>, /* DC in (charger) */ + <&xoadc 0x00 0x04>, /* VPH the main system voltage */ + <&xoadc 0x00 0x0b>, /* Die temperature */ + <&xoadc 0x00 0x0c>, /* Reference voltage 1.25V */ + <&xoadc 0x00 0x0d>, /* Reference voltage 0.625V */ + <&xoadc 0x00 0x0e>; /* Reference voltage 0.325V */ }; diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt index 205593f56fe759706ea6fd321c08e66059ebd489..e0a9b9d6d6fdc1b84f74a81fe9516fb343be3c9e 100644 --- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt +++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt @@ -4,6 +4,7 @@ Required properties: - compatible: should be "rockchip,-saradc" or "rockchip,rk3066-tsadc" - "rockchip,saradc": for rk3188, rk3288 - "rockchip,rk3066-tsadc": for rk3036 + - "rockchip,rk3328-saradc", "rockchip,rk3399-saradc": for rk3328 - "rockchip,rk3399-saradc": for rk3399 - reg: physical base address of the controller and length of memory mapped diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt index 5dfc88ec24a49ad7a0ae3c950b0ed5adc26d7a18..e35f9f1b320042c6b89714d32d3998db58e2a4e3 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt @@ -57,6 +57,9 @@ Optional properties: - dmas: Phandle to dma channel for this ADC instance. See ../../dma/dma.txt for details. - dma-names: Must be "rx" when dmas property is being used. +- assigned-resolution-bits: Resolution (bits) to use for conversions. Must + match device available resolutions (e.g. can be 6, 8, 10 or 12 on stm32f4). + Default is maximum resolution if unset. Example: adc: adc@40012000 { @@ -84,6 +87,7 @@ Example: st,adc-channels = <8>; dmas = <&dma2 0 0 0x400 0x0>; dma-names = "rx"; + assigned-resolution-bits = <8>; }; ... other adc child nodes follow... diff --git a/Documentation/devicetree/bindings/iio/dac/ltc2632.txt b/Documentation/devicetree/bindings/iio/dac/ltc2632.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb911e5a8ab443dc6364713f20de0f0fca5ae2b1 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/dac/ltc2632.txt @@ -0,0 +1,23 @@ +Linear Technology LTC2632 DAC device driver + +Required properties: + - compatible: Has to contain one of the following: + lltc,ltc2632-l12 + lltc,ltc2632-l10 + lltc,ltc2632-l8 + lltc,ltc2632-h12 + lltc,ltc2632-h10 + lltc,ltc2632-h8 + +Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt +apply. In particular, "reg" and "spi-max-frequency" properties must be given. + +Example: + + spi_master { + dac: ltc2632@0 { + compatible = "lltc,ltc2632-l12"; + reg = <0>; /* CS0 */ + spi-max-frequency = <1000000>; + }; + }; diff --git a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.txt b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.txt new file mode 100644 index 0000000000000000000000000000000000000000..bcee71f808d00d96dac04954f7ba464282460236 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.txt @@ -0,0 +1,61 @@ +STMicroelectronics STM32 DAC + +The STM32 DAC is a 12-bit voltage output digital-to-analog converter. The DAC +may be configured in 8 or 12-bit mode. It has two output channels, each with +its own converter. +It has built-in noise and triangle waveform generator and supports external +triggers for conversions. The DAC's output buffer allows a high drive output +current. + +Contents of a stm32 dac root node: +----------------------------------- +Required properties: +- compatible: Must be "st,stm32h7-dac-core". +- reg: Offset and length of the device's register set. +- clocks: Must contain an entry for pclk (which feeds the peripheral bus + interface) +- clock-names: Must be "pclk". +- vref-supply: Phandle to the vref+ input analog reference supply. +- #address-cells = <1>; +- #size-cells = <0>; + +Optional properties: +- resets: Must contain the phandle to the reset controller. +- A pinctrl state named "default" for each DAC channel may be defined to set + DAC_OUTx pin in mode of operation for analog output on external pin. + +Contents of a stm32 dac child node: +----------------------------------- +DAC core node should contain at least one subnode, representing a +DAC instance/channel available on the machine. + +Required properties: +- compatible: Must be "st,stm32-dac". +- reg: Must be either 1 or 2, to define (single) channel in use +- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers" in + Documentation/devicetree/bindings/iio/iio-bindings.txt + +Example: + dac: dac@40007400 { + compatible = "st,stm32h7-dac-core"; + reg = <0x40007400 0x400>; + clocks = <&clk>; + clock-names = "pclk"; + vref-supply = <®_vref>; + pinctrl-names = "default"; + pinctrl-0 = <&dac_out1 &dac_out2>; + #address-cells = <1>; + #size-cells = <0>; + + dac1: dac@1 { + compatible = "st,stm32-dac"; + #io-channels-cells = <1>; + reg = <1>; + }; + + dac2: dac@2 { + compatible = "st,stm32-dac"; + #io-channels-cells = <1>; + reg = <2>; + }; + }; diff --git a/Documentation/devicetree/bindings/iio/health/max30102.txt b/Documentation/devicetree/bindings/iio/health/max30102.txt new file mode 100644 index 0000000000000000000000000000000000000000..c695e7cbeefb9d29d7d97602341e2cc3b82d8916 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/health/max30102.txt @@ -0,0 +1,30 @@ +Maxim MAX30102 heart rate and pulse oximeter sensor + +* https://datasheets.maximintegrated.com/en/ds/MAX30102.pdf + +Required properties: + - compatible: must be "maxim,max30102" + - reg: the I2C address of the sensor + - interrupt-parent: should be the phandle for the interrupt controller + - interrupts: the sole interrupt generated by the device + + Refer to interrupt-controller/interrupts.txt for generic + interrupt client node bindings. + +Optional properties: + - maxim,red-led-current-microamp: configuration for RED LED current + - maxim,ir-led-current-microamp: configuration for IR LED current + + Note that each step is approximately 200 microamps, ranging from 0 uA to + 50800 uA. + +Example: + +max30100@57 { + compatible = "maxim,max30102"; + reg = <0x57>; + maxim,red-led-current-microamp = <7000>; + maxim,ir-led-current-microamp = <7000>; + interrupt-parent = <&gpio1>; + interrupts = <16 2>; +}; diff --git a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt index a9fc11e43b45df1f490ee0b643f311e89c7dbf65..2b4514592f833c9be71e0522ca8e839082470030 100644 --- a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt +++ b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt @@ -3,14 +3,21 @@ InvenSense MPU-6050 Six-Axis (Gyro + Accelerometer) MEMS MotionTracking Device http://www.invensense.com/mems/gyro/mpu6050.html Required properties: - - compatible : should be "invensense,mpu6050" + - compatible : should be one of + "invensense,mpu6050" + "invensense,mpu6500" + "invensense,mpu9150" + "invensense,mpu9250" + "invensense,icm20608" - reg : the I2C address of the sensor - interrupt-parent : should be the phandle for the interrupt controller - interrupts : interrupt mapping for GPIO IRQ Optional properties: - mount-matrix: an optional 3x3 mounting rotation matrix - + - i2c-gate node. These devices also support an auxiliary i2c bus. This is + simple enough to be described using the i2c-gate binding. See + i2c/i2c-gate.txt for more details. Example: mpu6050@68 { @@ -28,3 +35,19 @@ Example: "0", /* y2 */ "0.984807753012208"; /* z2 */ }; + + + mpu9250@68 { + compatible = "invensense,mpu9250"; + reg = <0x68>; + interrupt-parent = <&gpio3>; + interrupts = <21 1>; + i2c-gate { + #address-cells = <1>; + #size-cells = <0>; + ax8975@c { + compatible = "ak,ak8975"; + reg = <0x0c>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt b/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt index cf81afdf7803e2d1c49c70efe2a0cf63347739fa..8305fb05ffda433f807e9097b62118995daf90aa 100644 --- a/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt +++ b/Documentation/devicetree/bindings/iio/imu/st_lsm6dsx.txt @@ -3,6 +3,8 @@ Required properties: - compatible: must be one of: "st,lsm6ds3" + "st,lsm6ds3h" + "st,lsm6dsl" "st,lsm6dsm" - reg: i2c address of the sensor / spi cs line diff --git a/Documentation/devicetree/bindings/iio/light/vl6180.txt b/Documentation/devicetree/bindings/iio/light/vl6180.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c52952715a0e279596067b14554c0462e3a2e9b --- /dev/null +++ b/Documentation/devicetree/bindings/iio/light/vl6180.txt @@ -0,0 +1,15 @@ +STMicro VL6180 - ALS, range and proximity sensor + +Link to datasheet: http://www.st.com/resource/en/datasheet/vl6180x.pdf + +Required properties: + + -compatible: should be "st,vl6180" + -reg: the I2C address of the sensor + +Example: + +vl6180@29 { + compatible = "st,vl6180"; + reg = <0x29>; +}; diff --git a/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.txt b/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.txt new file mode 100644 index 0000000000000000000000000000000000000000..d4dc7a227e2ed8840cf2ca861661dd9bedd1cdee --- /dev/null +++ b/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.txt @@ -0,0 +1,28 @@ +* Devantech SRF04 ultrasonic range finder + Bit-banging driver using two GPIOs + +Required properties: + - compatible: Should be "devantech,srf04" + + - trig-gpios: Definition of the GPIO for the triggering (output) + This GPIO is set for about 10 us by the driver to tell the + device it should initiate the measurement cycle. + + - echo-gpios: Definition of the GPIO for the echo (input) + This GPIO is set by the device as soon as an ultrasonic + burst is sent out and reset when the first echo is + received. + Thus this GPIO is set while the ultrasonic waves are doing + one round trip. + It needs to be an GPIO which is able to deliver an + interrupt because the time between two interrupts is + measured in the driver. + See Documentation/devicetree/bindings/gpio/gpio.txt for + information on how to specify a consumer gpio. + +Example: +srf04@0 { + compatible = "devantech,srf04"; + trig-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; + echo-gpios = <&gpio2 6 GPIO_ACTIVE_HIGH>; +}; diff --git a/Documentation/devicetree/bindings/input/cpcap-pwrbutton.txt b/Documentation/devicetree/bindings/input/cpcap-pwrbutton.txt new file mode 100644 index 0000000000000000000000000000000000000000..0dd0076daf71f17463e0d06e856677e1d2d1bc44 --- /dev/null +++ b/Documentation/devicetree/bindings/input/cpcap-pwrbutton.txt @@ -0,0 +1,20 @@ +Motorola CPCAP on key + +This module is part of the CPCAP. For more details about the whole +chip see Documentation/devicetree/bindings/mfd/motorola-cpcap.txt. + +This module provides a simple power button event via an Interrupt. + +Required properties: +- compatible: should be one of the following + - "motorola,cpcap-pwrbutton" +- interrupts: irq specifier for CPCAP's ON IRQ + +Example: + +&cpcap { + cpcap_pwrbutton: pwrbutton { + compatible = "motorola,cpcap-pwrbutton"; + interrupts = <23 IRQ_TYPE_NONE>; + }; +}; diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt index d0ea09ba249fc5ea484625564e81a815abd7f014..570dc10f0cd74b7da9c1ced0c2dfb73e55e452a2 100644 --- a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt +++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt @@ -24,6 +24,8 @@ Optional Properties: - debounce-delay-ms: debounce interval in milliseconds - col-scan-delay-us: delay, measured in microseconds, that is needed before we can scan keypad after activating column gpio +- drive-inactive-cols: drive inactive columns during scan, + default is to turn inactive columns into inputs. Example: matrix-keypad { diff --git a/Documentation/devicetree/bindings/input/hid-over-i2c.txt b/Documentation/devicetree/bindings/input/hid-over-i2c.txt index 488edcb264c4fea49441bb21ea7deb02a0056882..28e8bd8b7d640b34dceadae6b7fe5cf36d392527 100644 --- a/Documentation/devicetree/bindings/input/hid-over-i2c.txt +++ b/Documentation/devicetree/bindings/input/hid-over-i2c.txt @@ -17,6 +17,22 @@ Required properties: - interrupt-parent: the phandle for the interrupt controller - interrupts: interrupt line +Additional optional properties: + +Some devices may support additional optional properties to help with, e.g., +power sequencing. The following properties can be supported by one or more +device-specific compatible properties, which should be used in addition to the +"hid-over-i2c" string. + +- compatible: + * "wacom,w9013" (Wacom W9013 digitizer). Supports: + - vdd-supply + - post-power-on-delay-ms + +- vdd-supply: phandle of the regulator that provides the supply voltage. +- post-power-on-delay-ms: time required by the device after enabling its regulators + before it is ready for communication. Must be used with 'vdd-supply'. + Example: i2c-hid-dev@2c { diff --git a/Documentation/devicetree/bindings/input/pwm-beeper.txt b/Documentation/devicetree/bindings/input/pwm-beeper.txt index 529408b4431a62e44ddfb4ee0bd6795182c2d792..8fc0e48c20db2b2b31795e8f75c4b24257f9272f 100644 --- a/Documentation/devicetree/bindings/input/pwm-beeper.txt +++ b/Documentation/devicetree/bindings/input/pwm-beeper.txt @@ -8,6 +8,7 @@ Required properties: Optional properties: - amp-supply: phandle to a regulator that acts as an amplifier for the beeper +- beeper-hz: bell frequency in Hz Example: diff --git a/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt b/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt index 4ed467b1e4021601e89254d715cc579a12676883..64bb990075c31c3c51a1b1b1aabd4b8a3083fc30 100644 --- a/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt +++ b/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt @@ -7,6 +7,7 @@ PROPERTIES Value type: Definition: must be one of: "qcom,pm8058-vib" + "qcom,pm8916-vib" "qcom,pm8921-vib" - reg: diff --git a/Documentation/devicetree/bindings/input/rotary-encoder.txt b/Documentation/devicetree/bindings/input/rotary-encoder.txt index e85ce3dea4806bfbf9769d27514458603166e632..f99fe5cdeaec6b6c4d841bfa49603e84495a1b16 100644 --- a/Documentation/devicetree/bindings/input/rotary-encoder.txt +++ b/Documentation/devicetree/bindings/input/rotary-encoder.txt @@ -12,7 +12,7 @@ Optional properties: - rotary-encoder,relative-axis: register a relative axis rather than an absolute one. Relative axis will only generate +1/-1 events on the input device, hence no steps need to be passed. -- rotary-encoder,rollover: Automatic rollove when the rotary value becomes +- rotary-encoder,rollover: Automatic rollover when the rotary value becomes greater than the specified steps or smaller than 0. For absolute axis only. - rotary-encoder,steps-per-period: Number of steps (stable states) per period. The values have the following meaning: diff --git a/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt b/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt index e3f22d23fc8fa8b91db6521afe540a46daeef5a8..3c8614c451f265558175cae62cf10e4016301d0e 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/ad7879.txt @@ -35,6 +35,7 @@ Optional properties: - adi,conversion-interval: : 0 : convert one time only 1-255: 515us + val * 35us (up to 9.440ms) This property has to be a '/bits/ 8' value +- gpio-controller : Switch AUX/VBAT/GPIO pin to GPIO mode Example: @@ -51,3 +52,21 @@ Example: adi,averaging = /bits/ 8 <1>; adi,conversion-interval = /bits/ 8 <255>; }; + + ad7879@1 { + compatible = "adi,ad7879"; + spi-max-frequency = <5000000>; + reg = <1>; + spi-cpol; + spi-cpha; + gpio-controller; + interrupt-parent = <&gpio1>; + interrupts = <13 IRQ_TYPE_EDGE_FALLING>; + touchscreen-max-pressure = <4096>; + adi,resistance-plate-x = <120>; + adi,first-conversion-delay = /bits/ 8 <3>; + adi,acquisition-time = /bits/ 8 <1>; + adi,median-filter-size = /bits/ 8 <2>; + adi,averaging = /bits/ 8 <1>; + adi,conversion-interval = /bits/ 8 <255>; + }; diff --git a/Documentation/devicetree/bindings/input/ads7846.txt b/Documentation/devicetree/bindings/input/touchscreen/ads7846.txt similarity index 100% rename from Documentation/devicetree/bindings/input/ads7846.txt rename to Documentation/devicetree/bindings/input/touchscreen/ads7846.txt diff --git a/Documentation/devicetree/bindings/input/touchscreen/ar1021.txt b/Documentation/devicetree/bindings/input/touchscreen/ar1021.txt new file mode 100644 index 0000000000000000000000000000000000000000..e459e8546f34cdb4f70ce709a29d943e4964057f --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/ar1021.txt @@ -0,0 +1,16 @@ +* Microchip AR1020 and AR1021 touchscreen interface (I2C) + +Required properties: +- compatible : "microchip,ar1021-i2c" +- reg : I2C slave address +- interrupt-parent : the phandle for the interrupt controller +- interrupts : touch controller interrupt + +Example: + + touchscreen@4d { + compatible = "microchip,ar1021-i2c"; + reg = <0x4d>; + interrupt-parent = <&gpio3>; + interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt index 6db22103e2dd535952143a2bae9ba4c6790c6e56..025cf8c9324ac362eeee56b5d4f9df09db466180 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt @@ -36,7 +36,7 @@ Optional properties: control gpios - threshold: allows setting the "click"-threshold in the range - from 20 to 80. + from 0 to 80. - gain: allows setting the sensitivity in the range from 0 to 31. Note that lower values indicate higher diff --git a/Documentation/devicetree/bindings/input/touchscreen/max11801-ts.txt b/Documentation/devicetree/bindings/input/touchscreen/max11801-ts.txt new file mode 100644 index 0000000000000000000000000000000000000000..40ac0fe94df64f30db111f177493c7e296762431 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/max11801-ts.txt @@ -0,0 +1,18 @@ +* MAXI MAX11801 Resistive touch screen controller with i2c interface + +Required properties: +- compatible: must be "maxim,max11801" +- reg: i2c slave address +- interrupt-parent: the phandle for the interrupt controller +- interrupts: touch controller interrupt + +Example: + +&i2c1 { + max11801: touchscreen@48 { + compatible = "maxim,max11801"; + reg = <0x48>; + interrupt-parent = <&gpio3>; + interrupts = <31 IRQ_TYPE_EDGE_FALLING>; + }; +}; diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt index ce85ee508238f27514172ecf9559e04a8710f908..6aa625e0cb8dafec75d57d38fbc8eca06ae22bf6 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt @@ -1,7 +1,12 @@ * GSL 1680 touchscreen controller Required properties: -- compatible : "silead,gsl1680" +- compatible : Must be one of the following, depending on the model: + "silead,gsl1680" + "silead,gsl1688" + "silead,gsl3670" + "silead,gsl3675" + "silead,gsl3692" - reg : I2C slave address of the chip (0x40) - interrupt-parent : a phandle pointing to the interrupt controller serving the interrupt for this chip diff --git a/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt b/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt deleted file mode 100644 index 89abecd938cb98b55d82070ed108eb92bf914b8d..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt +++ /dev/null @@ -1,38 +0,0 @@ -sun4i resistive touchscreen controller --------------------------------------- - -Required properties: - - compatible: "allwinner,sun4i-a10-ts", "allwinner,sun5i-a13-ts" or - "allwinner,sun6i-a31-ts" - - reg: mmio address range of the chip - - interrupts: interrupt to which the chip is connected - - #thermal-sensor-cells: shall be 0 - -Optional properties: - - allwinner,ts-attached : boolean indicating that an actual touchscreen - is attached to the controller - - allwinner,tp-sensitive-adjust : integer (4 bits) - adjust sensitivity of pen down detection - between 0 (least sensitive) and 15 - (defaults to 15) - - allwinner,filter-type : integer (2 bits) - select median and averaging filter - samples used for median / averaging filter - 0: 4/2 - 1: 5/3 - 2: 8/4 - 3: 16/8 - (defaults to 1) - -Example: - - rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-a10-ts"; - reg = <0x01c25000 0x100>; - interrupts = <29>; - allwinner,ts-attached; - #thermal-sensor-cells = <0>; - /* sensitive/noisy touch panel */ - allwinner,tp-sensitive-adjust = <0>; - allwinner,filter-type = <3>; - }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,nvic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,nvic.txt new file mode 100644 index 0000000000000000000000000000000000000000..386ab37a383fa229de70a31b4398f408fb791edc --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,nvic.txt @@ -0,0 +1,36 @@ +* ARM Nested Vector Interrupt Controller (NVIC) + +The NVIC provides an interrupt controller that is tightly coupled to +Cortex-M based processor cores. The NVIC implemented on different SoCs +vary in the number of interrupts and priority bits per interrupt. + +Main node required properties: + +- compatible : should be one of: + "arm,v6m-nvic" + "arm,v7m-nvic" + "arm,v8m-nvic" +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode an + interrupt source. The type shall be a and the value shall be 2. + + The 1st cell contains the interrupt number for the interrupt type. + + The 2nd cell is the priority of the interrupt. + +- reg : Specifies base physical address(s) and size of the NVIC registers. + This is at a fixed address (0xe000e100) and size (0xc00). + +- arm,num-irq-priority-bits: The number of priority bits implemented by the + given SoC + +Example: + + intc: interrupt-controller@e000e100 { + compatible = "arm,v7m-nvic"; + #interrupt-cells = <2>; + #address-cells = <1>; + interrupt-controller; + reg = <0xe000e100 0xc00>; + arm,num-irq-priority-bits = <4>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/cortina,gemini-interrupt-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/cortina,gemini-interrupt-controller.txt deleted file mode 100644 index 97c1167fa5335164b0d2f98a0daf8c86a6c46ee0..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/cortina,gemini-interrupt-controller.txt +++ /dev/null @@ -1,22 +0,0 @@ -* Cortina Systems Gemini interrupt controller - -This interrupt controller is found on the Gemini SoCs. - -Required properties: -- compatible: must be "cortina,gemini-interrupt-controller" -- reg: The register bank for the interrupt controller. -- interrupt-controller: Identifies the node as an interrupt controller -- #interrupt-cells: The number of cells to define the interrupts. - Must be 2 as the controller can specify level or rising edge - IRQs. The bindings follows the standard binding for controllers - with two cells specified in - interrupt-controller/interrupts.txt - -Example: - -interrupt-controller@48000000 { - compatible = "cortina,gemini-interrupt-controller"; - reg = <0x48000000 0x1000>; - interrupt-controller; - #interrupt-cells = <2>; -}; diff --git a/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.txt b/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.txt new file mode 100644 index 0000000000000000000000000000000000000000..24428d47f4872c2e8114336d3559c92636b4af56 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/faraday,ftintc010.txt @@ -0,0 +1,25 @@ +* Faraday Technologt FTINTC010 interrupt controller + +This interrupt controller is a stock IP block from Faraday Technology found +in the Gemini SoCs and other designs. + +Required properties: +- compatible: must be one of + "faraday,ftintc010" + "cortina,gemini-interrupt-controller" (deprecated) +- reg: The register bank for the interrupt controller. +- interrupt-controller: Identifies the node as an interrupt controller +- #interrupt-cells: The number of cells to define the interrupts. + Must be 2 as the controller can specify level or rising edge + IRQs. The bindings follows the standard binding for controllers + with two cells specified in + interrupt-controller/interrupts.txt + +Example: + +interrupt-controller@48000000 { + compatible = "faraday,ftintc010" + reg = <0x48000000 0x1000>; + interrupt-controller; + #interrupt-cells = <2>; +}; diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt b/Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt new file mode 100644 index 0000000000000000000000000000000000000000..a7efdbc3de5be3f7d0be225bd6916be2ac69a120 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt @@ -0,0 +1,35 @@ +* Mediatek 27xx cirq + +In Mediatek SOCs, the CIRQ is a low power interrupt controller designed to +work outside MCUSYS which comprises with Cortex-Ax cores,CCI and GIC. +The external interrupts (outside MCUSYS) will feed through CIRQ and connect +to GIC in MCUSYS. When CIRQ is enabled, it will record the edge-sensitive +interrupts and generate a pulse signal to parent interrupt controller when +flush command is executed. With CIRQ, MCUSYS can be completely turned off +to improve the system power consumption without losing interrupts. + +Required properties: +- compatible: should be one of + - "mediatek,mt2701-cirq" for mt2701 CIRQ + - "mediatek,mt8135-cirq" for mt8135 CIRQ + - "mediatek,mt8173-cirq" for mt8173 CIRQ + and "mediatek,cirq" as a fallback. +- interrupt-controller : Identifies the node as an interrupt controller. +- #interrupt-cells : Use the same format as specified by GIC in arm,gic.txt. +- interrupt-parent: phandle of irq parent for cirq. The parent must + use the same interrupt-cells format as GIC. +- reg: Physical base address of the cirq registers and length of memory + mapped region. +- mediatek,ext-irq-range: Identifies external irq number range in different + SOCs. + +Example: + cirq: interrupt-controller@10204000 { + compatible = "mediatek,mt2701-cirq", + "mediatek,mtk-cirq"; + interrupt-controller; + #interrupt-cells = <3>; + interrupt-parent = <&sysirq>; + reg = <0 0x10204000 0 0x400>; + mediatek,ext-irq-start = <32 200>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt index 9d1d72c65489e1045d76291bcb8e7724c7c3cc43..a89c03bb1a81549c6029e8b4e7aeb48e3d726a4a 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt @@ -21,13 +21,16 @@ Required properties: - interrupt-parent: phandle of irq parent for sysirq. The parent must use the same interrupt-cells format as GIC. - reg: Physical base address of the intpol registers and length of memory - mapped region. + mapped region. Could be multiple bases here. Ex: mt6797 needs 2 reg, others + need 1. Example: - sysirq: interrupt-controller@10200100 { - compatible = "mediatek,mt6589-sysirq", "mediatek,mt6577-sysirq"; + sysirq: intpol-controller@10200620 { + compatible = "mediatek,mt6797-sysirq", + "mediatek,mt6577-sysirq"; interrupt-controller; #interrupt-cells = <3>; interrupt-parent = <&gic>; - reg = <0 0x10200100 0 0x1c>; + reg = <0 0x10220620 0 0x20>, + <0 0x10220690 0 0x10>; }; diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 6cdf32d037fcbedc1a25d7f283026ff89f73b361..8a6ffce12af5305e8c630724fa86f735c1884f4d 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -60,6 +60,17 @@ conditions. aliases of secure registers have to be used during SMMU configuration. +- stream-match-mask : For SMMUs supporting stream matching and using + #iommu-cells = <1>, specifies a mask of bits to ignore + when matching stream IDs (e.g. this may be programmed + into the SMRn.MASK field of every stream match register + used). For cases where it is desirable to ignore some + portion of every Stream ID (e.g. for certain MMU-500 + configurations given globally unique input IDs). This + property is not valid for SMMUs using stream indexing, + or using stream matching with #iommu-cells = <2>, and + may be ignored if present in such cases. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -109,3 +120,20 @@ conditions. master3 { iommus = <&smmu2 1 0x30>; }; + + + /* ARM MMU-500 with 10-bit stream ID input configuration */ + smmu3: iommu { + compatible = "arm,mmu-500", "arm,smmu-v2"; + ... + #iommu-cells = <1>; + /* always ignore appended 5-bit TBU number */ + stream-match-mask = 0x7c00; + }; + + bus { + /* bus whose child devices emit one unique 10-bit stream + ID each, but may master through multiple SMMU TBUs */ + iommu-map = <0 &smmu3 0 0x400>; + ... + }; diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt index 6f28969af9dcd03048ea75f82858664570531489..028268fd99eeba926424ae90ac73eab6b818f4be 100644 --- a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt +++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt @@ -6,7 +6,9 @@ perform in-band IPMI communication with their host. Required properties: -- compatible : should be "aspeed,ast2400-ibt-bmc" +- compatible : should be one of + "aspeed,ast2400-ibt-bmc" + "aspeed,ast2500-ibt-bmc" - reg: physical address and size of the registers Optional properties: diff --git a/Documentation/devicetree/bindings/leds/backlight/arcxcnn_bl.txt b/Documentation/devicetree/bindings/leds/backlight/arcxcnn_bl.txt new file mode 100644 index 0000000000000000000000000000000000000000..230abdefd6e7be20b470c3790e74c4d26d084ee8 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/backlight/arcxcnn_bl.txt @@ -0,0 +1,33 @@ +Binding for ArcticSand arc2c0608 LED driver + +Required properties: +- compatible: should be "arc,arc2c0608" +- reg: slave address + +Optional properties: +- default-brightness: brightness value on boot, value from: 0-4095 +- label: The name of the backlight device + See Documentation/devicetree/bindings/leds/common.txt +- led-sources: List of enabled channels from 0 to 5. + See Documentation/devicetree/bindings/leds/common.txt + +- arc,led-config-0: setting for register ILED_CONFIG_0 +- arc,led-config-1: setting for register ILED_CONFIG_1 +- arc,dim-freq: PWM mode frequence setting (bits [3:0] used) +- arc,comp-config: setting for register CONFIG_COMP +- arc,filter-config: setting for register FILTER_CONFIG +- arc,trim-config: setting for register IMAXTUNE + +Note: Optional properties not specified will default to values in IC EPROM + +Example: + +arc2c0608@30 { + compatible = "arc,arc2c0608"; + reg = <0x30>; + default-brightness = <500>; + label = "lcd-backlight"; + linux,default-trigger = "backlight"; + led-sources = <0 1 2 5>; +}; + diff --git a/Documentation/devicetree/bindings/leds/leds-cpcap.txt b/Documentation/devicetree/bindings/leds/leds-cpcap.txt new file mode 100644 index 0000000000000000000000000000000000000000..ebf7cdc7f70c2d545db6669d5547c5128878251a --- /dev/null +++ b/Documentation/devicetree/bindings/leds/leds-cpcap.txt @@ -0,0 +1,29 @@ +Motorola CPCAP PMIC LEDs +------------------------ + +This module is part of the CPCAP. For more details about the whole +chip see Documentation/devicetree/bindings/mfd/motorola-cpcap.txt. + +Requires node properties: +- compatible: should be one of + * "motorola,cpcap-led-mdl" (Main Display Lighting) + * "motorola,cpcap-led-kl" (Keyboard Lighting) + * "motorola,cpcap-led-adl" (Aux Display Lighting) + * "motorola,cpcap-led-red" (Red Triode) + * "motorola,cpcap-led-green" (Green Triode) + * "motorola,cpcap-led-blue" (Blue Triode) + * "motorola,cpcap-led-cf" (Camera Flash) + * "motorola,cpcap-led-bt" (Bluetooth) + * "motorola,cpcap-led-cp" (Camera Privacy LED) +- label: see Documentation/devicetree/bindings/leds/common.txt +- vdd-supply: A phandle to the regulator powering the LED + +Example: + +&cpcap { + cpcap_led_red: red-led { + compatible = "motorola,cpcap-led-red"; + label = "cpcap:red"; + vdd-supply = <&sw5>; + }; +}; diff --git a/Documentation/devicetree/bindings/leds/leds-mt6323.txt b/Documentation/devicetree/bindings/leds/leds-mt6323.txt new file mode 100644 index 0000000000000000000000000000000000000000..45bf9f7d85f37685932f558aa9807bf5cf7ac8c8 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/leds-mt6323.txt @@ -0,0 +1,60 @@ +Device Tree Bindings for LED support on MT6323 PMIC + +MT6323 LED controller is subfunction provided by MT6323 PMIC, so the LED +controllers are defined as the subnode of the function node provided by MT6323 +PMIC controller that is being defined as one kind of Muti-Function Device (MFD) +using shared bus called PMIC wrapper for each subfunction to access remote +MT6323 PMIC hardware. + +For MT6323 MFD bindings see: +Documentation/devicetree/bindings/mfd/mt6397.txt +For MediaTek PMIC wrapper bindings see: +Documentation/devicetree/bindings/soc/mediatek/pwrap.txt + +Required properties: +- compatible : Must be "mediatek,mt6323-led" +- address-cells : Must be 1 +- size-cells : Must be 0 + +Each led is represented as a child node of the mediatek,mt6323-led that +describes the initial behavior for each LED physically and currently only four +LED child nodes can be supported. + +Required properties for the LED child node: +- reg : LED channel number (0..3) + +Optional properties for the LED child node: +- label : See Documentation/devicetree/bindings/leds/common.txt +- linux,default-trigger : See Documentation/devicetree/bindings/leds/common.txt +- default-state: See Documentation/devicetree/bindings/leds/common.txt + +Example: + + mt6323: pmic { + compatible = "mediatek,mt6323"; + + ... + + mt6323led: leds { + compatible = "mediatek,mt6323-led"; + #address-cells = <1>; + #size-cells = <0>; + + led@0 { + reg = <0>; + label = "LED0"; + linux,default-trigger = "timer"; + default-state = "on"; + }; + led@1 { + reg = <1>; + label = "LED1"; + default-state = "off"; + }; + led@2 { + reg = <2>; + label = "LED2"; + default-state = "on"; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/leds/leds-pca9532.txt b/Documentation/devicetree/bindings/leds/leds-pca9532.txt index 198f3ba0e01f822dc8c148158a01551205571072..f769c52e36439205572e33d74c4c9e0f9a09b808 100644 --- a/Documentation/devicetree/bindings/leds/leds-pca9532.txt +++ b/Documentation/devicetree/bindings/leds/leds-pca9532.txt @@ -17,6 +17,8 @@ Optional sub-node properties: - label: see Documentation/devicetree/bindings/leds/common.txt - type: Output configuration, see dt-bindings/leds/leds-pca9532.h (default NONE) - linux,default-trigger: see Documentation/devicetree/bindings/leds/common.txt + - default-state: see Documentation/devicetree/bindings/leds/common.txt + This property is only valid for sub-nodes of type . Example: #include @@ -33,6 +35,14 @@ Example: label = "pca:green:power"; type = ; }; + kernel-booting { + type = ; + default-state = "on"; + }; + sys-stat { + type = ; + default-state = "keep"; // don't touch, was set by U-Boot + }; }; For more product information please see the link below: diff --git a/Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt b/Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt new file mode 100644 index 0000000000000000000000000000000000000000..752ae6b00d26a7c39f914637d37149f93efb5b36 --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/brcm,iproc-flexrm-mbox.txt @@ -0,0 +1,59 @@ +Broadcom FlexRM Ring Manager +============================ +The Broadcom FlexRM ring manager provides a set of rings which can be +used to submit work to offload engines. An SoC may have multiple FlexRM +hardware blocks. There is one device tree entry per FlexRM block. The +FlexRM driver will create a mailbox-controller instance for given FlexRM +hardware block where each mailbox channel is a separate FlexRM ring. + +Required properties: +-------------------- +- compatible: Should be "brcm,iproc-flexrm-mbox" +- reg: Specifies base physical address and size of the FlexRM + ring registers +- msi-parent: Phandles (and potential Device IDs) to MSI controllers + The FlexRM engine will send MSIs (instead of wired + interrupts) to CPU. There is one MSI for each FlexRM ring. + Refer devicetree/bindings/interrupt-controller/msi.txt +- #mbox-cells: Specifies the number of cells needed to encode a mailbox + channel. This should be 3. + + The 1st cell is the mailbox channel number. + + The 2nd cell contains MSI completion threshold. This is the + number of completion messages for which FlexRM will inject + one MSI interrupt to CPU. + + The 3nd cell contains MSI timer value representing time for + which FlexRM will wait to accumulate N completion messages + where N is the value specified by 2nd cell above. If FlexRM + does not get required number of completion messages in time + specified by this cell then it will inject one MSI interrupt + to CPU provided atleast one completion message is available. + +Optional properties: +-------------------- +- dma-coherent: Present if DMA operations made by the FlexRM engine (such + as DMA descriptor access, access to buffers pointed by DMA + descriptors and read/write pointer updates to DDR) are + cache coherent with the CPU. + +Example: +-------- +crypto_mbox: mbox@67000000 { + compatible = "brcm,iproc-flexrm-mbox"; + reg = <0x67000000 0x200000>; + msi-parent = <&gic_its 0x7f00>; + #mbox-cells = <3>; +}; + +crypto@672c0000 { + compatible = "brcm,spu2-v2-crypto"; + reg = <0x672c0000 0x1000>; + mboxes = <&crypto_mbox 0 0x1 0xffff>, + <&crypto_mbox 1 0x1 0xffff>, + <&crypto_mbox 16 0x1 0xffff>, + <&crypto_mbox 17 0x1 0xffff>, + <&crypto_mbox 30 0x1 0xffff>, + <&crypto_mbox 31 0x1 0xffff>; +}; diff --git a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt index 411ccf421584514b5d2128373115cd9443b6d3e8..0f3ee81d92c297022adcdbb1f9689f4b924ea143 100644 --- a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt +++ b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt @@ -1,9 +1,11 @@ The PDC driver manages data transfer to and from various offload engines on some Broadcom SoCs. An SoC may have multiple PDC hardware blocks. There is -one device tree entry per block. +one device tree entry per block. On some chips, the PDC functionality is +handled by the FA2 (Northstar Plus). Required properties: -- compatible : Should be "brcm,iproc-pdc-mbox". +- compatible : Should be "brcm,iproc-pdc-mbox" or "brcm,iproc-fa2-mbox" for + FA2/Northstar Plus. - reg: Should contain PDC registers location and length. - interrupts: Should contain the IRQ line for the PDC. - #mbox-cells: 1 diff --git a/Documentation/devicetree/bindings/media/atmel-isi.txt b/Documentation/devicetree/bindings/media/atmel-isi.txt index 251f008f220cff32969b974f0b8ee8dd92320bd4..332513a151cc9d45ba299e4722cb19eb8a20a827 100644 --- a/Documentation/devicetree/bindings/media/atmel-isi.txt +++ b/Documentation/devicetree/bindings/media/atmel-isi.txt @@ -1,51 +1,66 @@ -Atmel Image Sensor Interface (ISI) SoC Camera Subsystem ----------------------------------------------- - -Required properties: -- compatible: must be "atmel,at91sam9g45-isi" -- reg: physical base address and length of the registers set for the device; -- interrupts: should contain IRQ line for the ISI; -- clocks: list of clock specifiers, corresponding to entries in - the clock-names property; -- clock-names: must contain "isi_clk", which is the isi peripherial clock. - -ISI supports a single port node with parallel bus. It should contain one +Atmel Image Sensor Interface (ISI) +---------------------------------- + +Required properties for ISI: +- compatible: must be "atmel,at91sam9g45-isi". +- reg: physical base address and length of the registers set for the device. +- interrupts: should contain IRQ line for the ISI. +- clocks: list of clock specifiers, corresponding to entries in the clock-names + property; please refer to clock-bindings.txt. +- clock-names: required elements: "isi_clk". +- pinctrl-names, pinctrl-0: please refer to pinctrl-bindings.txt. + +ISI supports a single port node with parallel bus. It shall contain one 'port' child node with child 'endpoint' node. Please refer to the bindings defined in Documentation/devicetree/bindings/media/video-interfaces.txt. -Example: - isi: isi@f0034000 { - compatible = "atmel,at91sam9g45-isi"; - reg = <0xf0034000 0x4000>; - interrupts = <37 IRQ_TYPE_LEVEL_HIGH 5>; - - clocks = <&isi_clk>; - clock-names = "isi_clk"; +Endpoint node properties +------------------------ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_isi>; +- bus-width: <8> or <10> (mandatory) +- hsync-active (default: active high) +- vsync-active (default: active high) +- pclk-sample (default: sample on falling edge) +- remote-endpoint: A phandle to the bus receiver's endpoint node (mandatory). - port { - #address-cells = <1>; - #size-cells = <0>; +Example: - isi_0: endpoint { - remote-endpoint = <&ov2640_0>; - bus-width = <8>; - }; +isi: isi@f0034000 { + compatible = "atmel,at91sam9g45-isi"; + reg = <0xf0034000 0x4000>; + interrupts = <37 IRQ_TYPE_LEVEL_HIGH 5>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_isi_data_0_7>; + clocks = <&isi_clk>; + clock-names = "isi_clk"; + port { + isi_0: endpoint { + remote-endpoint = <&ov2640_0>; + bus-width = <8>; + vsync-active = <1>; + hsync-active = <1>; }; }; +}; - i2c1: i2c@f0018000 { - ov2640: camera@0x30 { - compatible = "ovti,ov2640"; - reg = <0x30>; +i2c1: i2c@f0018000 { + ov2640: camera@30 { + compatible = "ovti,ov2640"; + reg = <0x30>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_pck0_as_isi_mck &pinctrl_sensor_power &pinctrl_sensor_reset>; + resetb-gpios = <&pioE 11 GPIO_ACTIVE_LOW>; + pwdn-gpios = <&pioE 13 GPIO_ACTIVE_HIGH>; + clocks = <&pck0>; + clock-names = "xvclk"; + assigned-clocks = <&pck0>; + assigned-clock-rates = <25000000>; - port { - ov2640_0: endpoint { - remote-endpoint = <&isi_0>; - bus-width = <8>; - }; + port { + ov2640_0: endpoint { + remote-endpoint = <&isi_0>; + bus-width = <8>; }; }; }; +}; diff --git a/Documentation/devicetree/bindings/media/i2c/ov2640.txt b/Documentation/devicetree/bindings/media/i2c/ov2640.txt index c429b5bdcaa05ef7486965814b5e55820f9d2dc3..989ce6cb6ac30489df01c8008ef0181e5e7bc637 100644 --- a/Documentation/devicetree/bindings/media/i2c/ov2640.txt +++ b/Documentation/devicetree/bindings/media/i2c/ov2640.txt @@ -1,8 +1,8 @@ * Omnivision OV2640 CMOS sensor -The Omnivision OV2640 sensor support multiple resolutions output, such as -CIF, SVGA, UXGA. It also can support YUV422/420, RGB565/555 or raw RGB -output format. +The Omnivision OV2640 sensor supports multiple resolutions output, such as +CIF, SVGA, UXGA. It also can support the YUV422/420, RGB565/555 or raw RGB +output formats. Required Properties: - compatible: should be "ovti,ov2640" @@ -20,26 +20,21 @@ Documentation/devicetree/bindings/media/video-interfaces.txt. Example: i2c1: i2c@f0018000 { - ov2640: camera@0x30 { + ov2640: camera@30 { compatible = "ovti,ov2640"; reg = <0x30>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_pck1 &pinctrl_ov2640_pwdn &pinctrl_ov2640_resetb>; - - resetb-gpios = <&pioE 24 GPIO_ACTIVE_LOW>; - pwdn-gpios = <&pioE 29 GPIO_ACTIVE_HIGH>; - - clocks = <&pck1>; + pinctrl-0 = <&pinctrl_pck0_as_isi_mck &pinctrl_sensor_power &pinctrl_sensor_reset>; + resetb-gpios = <&pioE 11 GPIO_ACTIVE_LOW>; + pwdn-gpios = <&pioE 13 GPIO_ACTIVE_HIGH>; + clocks = <&pck0>; clock-names = "xvclk"; - - assigned-clocks = <&pck1>; + assigned-clocks = <&pck0>; assigned-clock-rates = <25000000>; port { ov2640_0: endpoint { remote-endpoint = <&isi_0>; - bus-width = <8>; }; }; }; diff --git a/Documentation/devicetree/bindings/media/i2c/ov5645.txt b/Documentation/devicetree/bindings/media/i2c/ov5645.txt new file mode 100644 index 0000000000000000000000000000000000000000..fd7aec9f8e247a1a507991aba3544713cd6b06f2 --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/ov5645.txt @@ -0,0 +1,54 @@ +* Omnivision 1/4-Inch 5Mp CMOS Digital Image Sensor + +The Omnivision OV5645 is a 1/4-Inch CMOS active pixel digital image sensor with +an active array size of 2592H x 1944V. It is programmable through a serial I2C +interface. + +Required Properties: +- compatible: Value should be "ovti,ov5645". +- clocks: Reference to the xclk clock. +- clock-names: Should be "xclk". +- clock-frequency: Frequency of the xclk clock. +- enable-gpios: Chip enable GPIO. Polarity is GPIO_ACTIVE_HIGH. This corresponds + to the hardware pin PWDNB which is physically active low. +- reset-gpios: Chip reset GPIO. Polarity is GPIO_ACTIVE_LOW. This corresponds to + the hardware pin RESETB. +- vdddo-supply: Chip digital IO regulator. +- vdda-supply: Chip analog regulator. +- vddd-supply: Chip digital core regulator. + +The device node must contain one 'port' child node for its digital output +video port, in accordance with the video interface bindings defined in +Documentation/devicetree/bindings/media/video-interfaces.txt. + +Example: + + &i2c1 { + ... + + ov5645: ov5645@78 { + compatible = "ovti,ov5645"; + reg = <0x78>; + + enable-gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio5 20 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&camera_rear_default>; + + clocks = <&clks 200>; + clock-names = "xclk"; + clock-frequency = <23880000>; + + vdddo-supply = <&camera_dovdd_1v8>; + vdda-supply = <&camera_avdd_2v8>; + vddd-supply = <&camera_dvdd_1v2>; + + port { + ov5645_ep: endpoint { + clock-lanes = <1>; + data-lanes = <0 2>; + remote-endpoint = <&csi0_ep>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/media/i2c/ov5647.txt b/Documentation/devicetree/bindings/media/i2c/ov5647.txt new file mode 100644 index 0000000000000000000000000000000000000000..22e44945b66108d20eb283cb00f25b6af1a47a34 --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/ov5647.txt @@ -0,0 +1,35 @@ +Omnivision OV5647 raw image sensor +--------------------------------- + +OV5647 is a raw image sensor with MIPI CSI-2 and CCP2 image data interfaces +and CCI (I2C compatible) control bus. + +Required properties: + +- compatible : "ovti,ov5647". +- reg : I2C slave address of the sensor. +- clocks : Reference to the xclk clock. + +The common video interfaces bindings (see video-interfaces.txt) should be +used to specify link to the image data receiver. The OV5647 device +node should contain one 'port' child node with an 'endpoint' subnode. + +Endpoint node mandatory properties: + +- remote-endpoint: A phandle to the bus receiver's endpoint node. + +Example: + + i2c@2000 { + ... + ov: camera@36 { + compatible = "ovti,ov5647"; + reg = <0x36>; + clocks = <&camera_clk>; + port { + camera_1: endpoint { + remote-endpoint = <&csi1_ep1>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/media/i2c/ov7670.txt b/Documentation/devicetree/bindings/media/i2c/ov7670.txt new file mode 100644 index 0000000000000000000000000000000000000000..826b6563b009ce7508d2598323f0648a0a8e3d66 --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/ov7670.txt @@ -0,0 +1,43 @@ +* Omnivision OV7670 CMOS sensor + +The Omnivision OV7670 sensor supports multiple resolutions output, such as +CIF, SVGA, UXGA. It also can support the YUV422/420, RGB565/555 or raw RGB +output formats. + +Required Properties: +- compatible: should be "ovti,ov7670" +- clocks: reference to the xclk input clock. +- clock-names: should be "xclk". + +Optional Properties: +- reset-gpios: reference to the GPIO connected to the resetb pin, if any. + Active is low. +- powerdown-gpios: reference to the GPIO connected to the pwdn pin, if any. + Active is high. + +The device node must contain one 'port' child node for its digital output +video port, in accordance with the video interface bindings defined in +Documentation/devicetree/bindings/media/video-interfaces.txt. + +Example: + + i2c1: i2c@f0018000 { + ov7670: camera@21 { + compatible = "ovti,ov7670"; + reg = <0x21>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_pck0_as_isi_mck &pinctrl_sensor_power &pinctrl_sensor_reset>; + reset-gpios = <&pioE 11 GPIO_ACTIVE_LOW>; + powerdown-gpios = <&pioE 13 GPIO_ACTIVE_HIGH>; + clocks = <&pck0>; + clock-names = "xclk"; + assigned-clocks = <&pck0>; + assigned-clock-rates = <25000000>; + + port { + ov7670_0: endpoint { + remote-endpoint = <&isi_0>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt b/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt new file mode 100644 index 0000000000000000000000000000000000000000..3813947b4d4f02bfd27ccaf126f64e0ecb933ce2 --- /dev/null +++ b/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt @@ -0,0 +1,37 @@ +* Mediatek JPEG Decoder + +Mediatek JPEG Decoder is the JPEG decode hardware present in Mediatek SoCs + +Required properties: +- compatible : must be one of the following string: + "mediatek,mt8173-jpgdec" + "mediatek,mt2701-jpgdec" +- reg : physical base address of the jpeg decoder registers and length of + memory mapped region. +- interrupts : interrupt number to the interrupt controller. +- clocks: device clocks, see + Documentation/devicetree/bindings/clock/clock-bindings.txt for details. +- clock-names: must contain "jpgdec-smi" and "jpgdec". +- power-domains: a phandle to the power domain, see + Documentation/devicetree/bindings/power/power_domain.txt for details. +- mediatek,larb: must contain the local arbiters in the current Socs, see + Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt + for details. +- iommus: should point to the respective IOMMU block with master port as + argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt + for details. + +Example: + jpegdec: jpegdec@15004000 { + compatible = "mediatek,mt2701-jpgdec"; + reg = <0 0x15004000 0 0x1000>; + interrupts = ; + clocks = <&imgsys CLK_IMG_JPGDEC_SMI>, + <&imgsys CLK_IMG_JPGDEC>; + clock-names = "jpgdec-smi", + "jpgdec"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_ISP>; + mediatek,larb = <&larb2>; + iommus = <&iommu MT2701_M4U_PORT_JPGDEC_WDMA>, + <&iommu MT2701_M4U_PORT_JPGDEC_BSDMA>; + }; diff --git a/Documentation/devicetree/bindings/media/s5p-cec.txt b/Documentation/devicetree/bindings/media/s5p-cec.txt index 925ab4d72eaab342f8f197b131c7d7ddcf6219de..4bb08d9d940be0473d937ad7c7c97d87c5d517d9 100644 --- a/Documentation/devicetree/bindings/media/s5p-cec.txt +++ b/Documentation/devicetree/bindings/media/s5p-cec.txt @@ -15,6 +15,7 @@ Required properties: - clock-names : from common clock binding: must contain "hdmicec", corresponding to entry in the clocks property. - samsung,syscon-phandle - phandle to the PMU system controller + - hdmi-phandle - phandle to the HDMI controller Example: @@ -25,6 +26,7 @@ hdmicec: cec@100B0000 { clocks = <&clock CLK_HDMI_CEC>; clock-names = "hdmicec"; samsung,syscon-phandle = <&pmu_system_controller>; + hdmi-phandle = <&hdmi>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_cec>; status = "okay"; diff --git a/Documentation/devicetree/bindings/media/s5p-mfc.txt b/Documentation/devicetree/bindings/media/s5p-mfc.txt index 2c901286d8185a0999a4ef6097bc79f905eab748..d3404b5d4d1795034342ae8640c5d9f9648591ae 100644 --- a/Documentation/devicetree/bindings/media/s5p-mfc.txt +++ b/Documentation/devicetree/bindings/media/s5p-mfc.txt @@ -28,7 +28,7 @@ Optional properties: - memory-region : from reserved memory binding: phandles to two reserved memory regions, first is for "left" mfc memory bus interfaces, second if for the "right" mfc memory bus, used when no SYSMMU - support is available + support is available; used only by MFC v5 present in Exynos4 SoCs Obsolete properties: - samsung,mfc-r, samsung,mfc-l : support removed, please use memory-region diff --git a/Documentation/devicetree/bindings/media/stih-cec.txt b/Documentation/devicetree/bindings/media/stih-cec.txt index 71c4b2f4bcef1f0a8a31c3088250efe45677226d..289a08b336513db88957263aa6777d2b4372647e 100644 --- a/Documentation/devicetree/bindings/media/stih-cec.txt +++ b/Documentation/devicetree/bindings/media/stih-cec.txt @@ -9,6 +9,7 @@ Required properties: - pinctrl-names: Contains only one value - "default" - pinctrl-0: Specifies the pin control groups used for CEC hardware. - resets: Reference to a reset controller + - hdmi-phandle: Phandle to the HDMI controller Example for STIH407: @@ -22,4 +23,5 @@ sti-cec@094a087c { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_cec0_default>; resets = <&softreset STIH407_LPM_SOFTRESET>; + hdmi-phandle = <&hdmi>; }; diff --git a/Documentation/devicetree/bindings/media/ti,da850-vpif.txt b/Documentation/devicetree/bindings/media/ti,da850-vpif.txt index 6d25d7f23d2621c13d4556a1f3e877bddac1e007..df7182a63e59aef757c58fd930279a400f7fb210 100644 --- a/Documentation/devicetree/bindings/media/ti,da850-vpif.txt +++ b/Documentation/devicetree/bindings/media/ti,da850-vpif.txt @@ -16,8 +16,10 @@ Required properties: Video Capture: VPIF has a 16-bit parallel bus input, supporting 2 8-bit channels or a -single 16-bit channel. It should contain at least one port child node -with child 'endpoint' node. Please refer to the bindings defined in +single 16-bit channel. It should contain one or two port child nodes +with child 'endpoint' node. If there are two ports then port@0 must +describe the input and port@1 output channels. Please refer to the +bindings defined in Documentation/devicetree/bindings/media/video-interfaces.txt. Example using 2 8-bit input channels, one of which is connected to an @@ -28,17 +30,24 @@ I2C-connected TVP5147 decoder: reg = <0x217000 0x1000>; interrupts = <92>; - port { - vpif_ch0: endpoint@0 { - reg = <0>; - bus-width = <8>; - remote-endpoint = <&composite>; + port@0 { + vpif_input_ch0: endpoint@0 { + reg = <0>; + bus-width = <8>; + remote-endpoint = <&composite_in>; + }; + + vpif_input_ch1: endpoint@1 { + reg = <1>; + bus-width = <8>; + data-shift = <8>; }; + }; - vpif_ch1: endpoint@1 { - reg = <1>; - bus-width = <8>; - data-shift = <8>; + port@1 { + vpif_output_ch0: endpoint { + bus-width = <8>; + remote-endpoint = <&composite_out>; }; }; }; @@ -53,13 +62,28 @@ I2C-connected TVP5147 decoder: status = "okay"; port { - composite: endpoint { + composite_in: endpoint { hsync-active = <1>; vsync-active = <1>; pclk-sample = <0>; /* VPIF channel 0 (lower 8-bits) */ - remote-endpoint = <&vpif_ch0>; + remote-endpoint = <&vpif_input_ch0>; + bus-width = <8>; + }; + }; + }; + + adv7343@2a { + compatible = "adi,adv7343"; + reg = <0x2a>; + + port { + composite_out: endpoint { + adi,dac-enable = <1 1 1>; + adi,sd-dac-enable = <1>; + + remote-endpoint = <&vpif_output_ch0>; bus-width = <8>; }; }; diff --git a/Documentation/devicetree/bindings/mfd/altera-a10sr.txt b/Documentation/devicetree/bindings/mfd/altera-a10sr.txt index ea151f295ad71de2f7c5de2efbe6270c6c651fff..c8a736554b4b3494220293e611eeaa33653da7f7 100644 --- a/Documentation/devicetree/bindings/mfd/altera-a10sr.txt +++ b/Documentation/devicetree/bindings/mfd/altera-a10sr.txt @@ -18,6 +18,7 @@ The A10SR consists of these sub-devices: Device Description ------ ---------- a10sr_gpio GPIO Controller +a10sr_rst Reset Controller Arria10 GPIO Required Properties: @@ -27,6 +28,11 @@ Required Properties: the second cell is used to specify flags. See ../gpio/gpio.txt for more information. +Arria10 Peripheral PHY Reset +Required Properties: +- compatible : Should be "altr,a10sr-reset" +- #reset-cells : Should be one. + Example: resource-manager@0 { @@ -43,4 +49,9 @@ Example: gpio-controller; #gpio-cells = <2>; }; + + a10sr_rst: reset-controller { + compatible = "altr,a10sr-reset"; + #reset-cells = <1>; + }; }; diff --git a/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt b/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt index 670831b29565369279960e9eddb2e14b4267ccdd..eec40be7f79a007005a2e835c1d73e59298a02bc 100644 --- a/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt +++ b/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt @@ -15,7 +15,7 @@ Required properties: The HLCDC IP exposes two subdevices: - a PWM chip: see ../pwm/atmel-hlcdc-pwm.txt - - a Display Controller: see ../display/atmel-hlcdc-dc.txt + - a Display Controller: see ../display/atmel/hlcdc-dc.txt Example: diff --git a/Documentation/devicetree/bindings/mfd/axp20x.txt b/Documentation/devicetree/bindings/mfd/axp20x.txt index 8f3ad9ab46372bd36631db5701589efb04d88e87..aca09af665142c21f9512136817fd2d8f856f490 100644 --- a/Documentation/devicetree/bindings/mfd/axp20x.txt +++ b/Documentation/devicetree/bindings/mfd/axp20x.txt @@ -6,12 +6,19 @@ axp202 (X-Powers) axp209 (X-Powers) axp221 (X-Powers) axp223 (X-Powers) +axp803 (X-Powers) axp809 (X-Powers) Required properties: -- compatible: "x-powers,axp152", "x-powers,axp202", "x-powers,axp209", - "x-powers,axp221", "x-powers,axp223", "x-powers,axp806", - "x-powers,axp809" +- compatible: should be one of: + * "x-powers,axp152" + * "x-powers,axp202" + * "x-powers,axp209" + * "x-powers,axp221" + * "x-powers,axp223" + * "x-powers,axp803" + * "x-powers,axp806" + * "x-powers,axp809" - reg: The I2C slave address or RSB hardware address for the AXP chip - interrupt-parent: The parent interrupt controller - interrupts: SoC NMI / GPIO interrupt connected to the PMIC's IRQ pin @@ -28,6 +35,9 @@ Optional properties: regulator to drive the OTG VBus, rather then as an input pin which signals whether the board is driving OTG VBus or not. +- x-powers,master-mode: Boolean (axp806 only). Set this when the PMIC is + wired for master mode. The default is slave mode. + - -supply: a phandle to the regulator supply node. May be omitted if inputs are unregulated, such as using the IPSOUT output from the PMIC. @@ -86,6 +96,33 @@ LDO_IO1 : LDO : ips-supply : GPIO 1 RTC_LDO : LDO : ips-supply : always on DRIVEVBUS : Enable output : drivevbus-supply : external regulator +AXP803 regulators, type, and corresponding input supply names: + +Regulator Type Supply Name Notes +--------- ---- ----------- ----- +DCDC1 : DC-DC buck : vin1-supply +DCDC2 : DC-DC buck : vin2-supply : poly-phase capable +DCDC3 : DC-DC buck : vin3-supply : poly-phase capable +DCDC4 : DC-DC buck : vin4-supply +DCDC5 : DC-DC buck : vin5-supply : poly-phase capable +DCDC6 : DC-DC buck : vin6-supply : poly-phase capable +DC1SW : On/Off Switch : : DCDC1 secondary output +ALDO1 : LDO : aldoin-supply : shared supply +ALDO2 : LDO : aldoin-supply : shared supply +ALDO3 : LDO : aldoin-supply : shared supply +DLDO1 : LDO : dldoin-supply : shared supply +DLDO2 : LDO : dldoin-supply : shared supply +DLDO3 : LDO : dldoin-supply : shared supply +DLDO4 : LDO : dldoin-supply : shared supply +ELDO1 : LDO : eldoin-supply : shared supply +ELDO2 : LDO : eldoin-supply : shared supply +ELDO3 : LDO : eldoin-supply : shared supply +FLDO1 : LDO : fldoin-supply : shared supply +FLDO2 : LDO : fldoin-supply : shared supply +LDO_IO0 : LDO : ips-supply : GPIO 0 +LDO_IO1 : LDO : ips-supply : GPIO 1 +RTC_LDO : LDO : ips-supply : always on + AXP806 regulators, type, and corresponding input supply names: Regulator Type Supply Name Notes diff --git a/Documentation/devicetree/bindings/mfd/da9062.txt b/Documentation/devicetree/bindings/mfd/da9062.txt index 38802b54d48a4aef15a33c632050549fb9eb3707..c0a418c27e9d94796cb1d3b0a1d15845432ceb93 100644 --- a/Documentation/devicetree/bindings/mfd/da9062.txt +++ b/Documentation/devicetree/bindings/mfd/da9062.txt @@ -1,22 +1,39 @@ * Dialog DA9062 Power Management Integrated Circuit (PMIC) -DA9062 consists of a large and varied group of sub-devices: +Product information for the DA9062 and DA9061 devices can be found here: +- http://www.dialog-semiconductor.com/products/da9062 +- http://www.dialog-semiconductor.com/products/da9061 + +The DA9062 PMIC consists of: Device Supply Names Description ------ ------------ ----------- da9062-regulator : : LDOs & BUCKs da9062-rtc : : Real-Time Clock +da9062-onkey : : On Key +da9062-watchdog : : Watchdog Timer +da9062-thermal : : Thermal + +The DA9061 PMIC consists of: + +Device Supply Names Description +------ ------------ ----------- +da9062-regulator : : LDOs & BUCKs +da9062-onkey : : On Key da9062-watchdog : : Watchdog Timer +da9062-thermal : : Thermal ====== Required properties: -- compatible : Should be "dlg,da9062". +- compatible : Should be + "dlg,da9062" for DA9062 + "dlg,da9061" for DA9061 - reg : Specifies the I2C slave address (this defaults to 0x58 but it can be modified to match the chip's OTP settings). - interrupt-parent : Specifies the reference to the interrupt controller for - the DA9062. + the DA9062 or DA9061. - interrupts : IRQ line information. - interrupt-controller @@ -25,8 +42,8 @@ further information on IRQ bindings. Sub-nodes: -- regulators : This node defines the settings for the LDOs and BUCKs. The - DA9062 regulators are bound using their names listed below: +- regulators : This node defines the settings for the LDOs and BUCKs. + The DA9062 regulators are bound using their names listed below: buck1 : BUCK_1 buck2 : BUCK_2 @@ -37,19 +54,29 @@ Sub-nodes: ldo3 : LDO_3 ldo4 : LDO_4 + The DA9061 regulators are bound using their names listed below: + + buck1 : BUCK_1 + buck2 : BUCK_2 + buck3 : BUCK_3 + ldo1 : LDO_1 + ldo2 : LDO_2 + ldo3 : LDO_3 + ldo4 : LDO_4 + The component follows the standard regulator framework and the bindings details of individual regulator device can be found in: Documentation/devicetree/bindings/regulator/regulator.txt - - rtc : This node defines settings required for the Real-Time Clock associated with the DA9062. There are currently no entries in this binding, however compatible = "dlg,da9062-rtc" should be added if a node is created. -- watchdog: This node defines the settings for the watchdog driver associated - with the DA9062 PMIC. The compatible = "dlg,da9062-watchdog" should be added - if a node is created. +- onkey : See ../input/da9062-onkey.txt + +- watchdog: See ../watchdog/da9062-watchdog.txt +- thermal : See ../thermal/da9062-thermal.txt Example: @@ -64,10 +91,6 @@ Example: compatible = "dlg,da9062-rtc"; }; - watchdog { - compatible = "dlg,da9062-watchdog"; - }; - regulators { DA9062_BUCK1: buck1 { regulator-name = "BUCK1"; diff --git a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt index 05485699d70e7c85cb25eb83f39b86f5be0daaca..9630ac0e4b56ef372f08a0850175be06d13cd9a7 100644 --- a/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt +++ b/Documentation/devicetree/bindings/mfd/hisilicon,hi655x.txt @@ -16,6 +16,11 @@ Required properties: - reg: Base address of PMIC on Hi6220 SoC. - interrupt-controller: Hi655x has internal IRQs (has own IRQ domain). - pmic-gpios: The GPIO used by PMIC IRQ. +- #clock-cells: From common clock binding; shall be set to 0 + +Optional properties: +- clock-output-names: From common clock binding to override the + default output clock name Example: pmic: pmic@f8000000 { @@ -24,4 +29,5 @@ Example: interrupt-controller; #interrupt-cells = <2>; pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + #clock-cells = <0>; } diff --git a/Documentation/devicetree/bindings/mfd/mt6397.txt b/Documentation/devicetree/bindings/mfd/mt6397.txt index c568d52af5afc094c107f418859ae092fcbe6b0f..522a3bbf1bac0482018970f966a06b58aa43faf5 100644 --- a/Documentation/devicetree/bindings/mfd/mt6397.txt +++ b/Documentation/devicetree/bindings/mfd/mt6397.txt @@ -6,6 +6,7 @@ MT6397/MT6323 is a multifunction device with the following sub modules: - Audio codec - GPIO - Clock +- LED It is interfaced to host controller using SPI interface by a proprietary hardware called PMIC wrapper or pwrap. MT6397/MT6323 MFD is a child device of pwrap. diff --git a/Documentation/devicetree/bindings/iio/adc/mxs-lradc.txt b/Documentation/devicetree/bindings/mfd/mxs-lradc.txt similarity index 100% rename from Documentation/devicetree/bindings/iio/adc/mxs-lradc.txt rename to Documentation/devicetree/bindings/mfd/mxs-lradc.txt diff --git a/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt b/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt index c110e118b79feeb5745b9e57c0eee7c99585289b..df664018c148edf237fcc89e0a85b68c94c299e7 100644 --- a/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt +++ b/Documentation/devicetree/bindings/mfd/samsung,exynos5433-lpass.txt @@ -5,7 +5,10 @@ Required properties: - compatible : "samsung,exynos5433-lpass" - reg : should contain the LPASS top SFR region location and size - - samsung,pmu-syscon : the phandle to the Power Management Unit node + - clock-names : should contain following required clocks: "sfr0_ctrl" + - clocks : should contain clock specifiers of all clocks, which + input names have been specified in clock-names + property, in same order. - #address-cells : should be 1 - #size-cells : should be 1 - ranges : must be present @@ -25,7 +28,8 @@ Example: audio-subsystem { compatible = "samsung,exynos5433-lpass"; reg = <0x11400000 0x100>, <0x11500000 0x08>; - samsung,pmu-syscon = <&pmu_system_controller>; + clocks = <&cmu_aud CLK_PCLK_SFR0_CTRL>; + clock-names = "sfr0_ctrl"; #address-cells = <1>; #size-cells = <1>; ranges; diff --git a/Documentation/devicetree/bindings/mfd/sun4i-gpadc.txt b/Documentation/devicetree/bindings/mfd/sun4i-gpadc.txt new file mode 100644 index 0000000000000000000000000000000000000000..badff3611a986e9d726a4e9851b840b9ac413806 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/sun4i-gpadc.txt @@ -0,0 +1,59 @@ +Allwinner SoCs' GPADC Device Tree bindings +------------------------------------------ +The Allwinner SoCs all have an ADC that can also act as a thermal sensor +and sometimes as a touchscreen controller. + +Required properties: + - compatible: "allwinner,sun8i-a33-ths", + - reg: mmio address range of the chip, + - #thermal-sensor-cells: shall be 0, + - #io-channel-cells: shall be 0, + +Example: + ths: ths@01c25000 { + compatible = "allwinner,sun8i-a33-ths"; + reg = <0x01c25000 0x100>; + #thermal-sensor-cells = <0>; + #io-channel-cells = <0>; + }; + +sun4i, sun5i and sun6i SoCs are also supported via the older binding: + +sun4i resistive touchscreen controller +-------------------------------------- + +Required properties: + - compatible: "allwinner,sun4i-a10-ts", "allwinner,sun5i-a13-ts" or + "allwinner,sun6i-a31-ts" + - reg: mmio address range of the chip + - interrupts: interrupt to which the chip is connected + - #thermal-sensor-cells: shall be 0 + +Optional properties: + - allwinner,ts-attached : boolean indicating that an actual touchscreen + is attached to the controller + - allwinner,tp-sensitive-adjust : integer (4 bits) + adjust sensitivity of pen down detection + between 0 (least sensitive) and 15 + (defaults to 15) + - allwinner,filter-type : integer (2 bits) + select median and averaging filter + samples used for median / averaging filter + 0: 4/2 + 1: 5/3 + 2: 8/4 + 3: 16/8 + (defaults to 1) + +Example: + + rtp: rtp@01c25000 { + compatible = "allwinner,sun4i-a10-ts"; + reg = <0x01c25000 0x100>; + interrupts = <29>; + allwinner,ts-attached; + #thermal-sensor-cells = <0>; + /* sensitive/noisy touch panel */ + allwinner,tp-sensitive-adjust = <0>; + allwinner,filter-type = <3>; + }; diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt new file mode 100644 index 0000000000000000000000000000000000000000..c885cf89b8ce83ce29b9d603011859f5962493a4 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt @@ -0,0 +1,243 @@ +TI LMU (Lighting Management Unit) device tree bindings + +TI LMU driver supports lighting devices below. + + Name Child nodes + ------ --------------------------------- + LM3532 Backlight + LM3631 Backlight and regulator + LM3632 Backlight and regulator + LM3633 Backlight, LED and fault monitor + LM3695 Backlight + LM3697 Backlight and fault monitor + +Required properties: + - compatible: Should be one of: + "ti,lm3532" + "ti,lm3631" + "ti,lm3632" + "ti,lm3633" + "ti,lm3695" + "ti,lm3697" + - reg: I2C slave address. + 0x11 for LM3632 + 0x29 for LM3631 + 0x36 for LM3633, LM3697 + 0x38 for LM3532 + 0x63 for LM3695 + +Optional property: + - enable-gpios: A GPIO specifier for hardware enable pin. + +Required node: + - backlight: All LMU devices have backlight child nodes. + For the properties, please refer to [1]. + +Optional nodes: + - fault-monitor: Hardware fault monitoring driver for LM3633 and LM3697. + Required properties: + - compatible: Should be one of: + "ti,lm3633-fault-monitor" + "ti,lm3697-fault-monitor" + - leds: LED properties for LM3633. Please refer to [2]. + - regulators: Regulator properties for LM3631 and LM3632. + Please refer to [3]. + +[1] ../leds/backlight/ti-lmu-backlight.txt +[2] ../leds/leds-lm3633.txt +[3] ../regulator/lm363x-regulator.txt + +lm3532@38 { + compatible = "ti,lm3532"; + reg = <0x38>; + + enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>; + + backlight { + compatible = "ti,lm3532-backlight"; + + lcd { + led-sources = <0 1 2>; + ramp-up-msec = <30>; + ramp-down-msec = <0>; + }; + }; +}; + +lm3631@29 { + compatible = "ti,lm3631"; + reg = <0x29>; + + regulators { + compatible = "ti,lm363x-regulator"; + + vboost { + regulator-name = "lcd_boost"; + regulator-min-microvolt = <4500000>; + regulator-max-microvolt = <6350000>; + regulator-always-on; + }; + + vcont { + regulator-name = "lcd_vcont"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + }; + + voref { + regulator-name = "lcd_voref"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + }; + + vpos { + regulator-name = "lcd_vpos"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + regulator-boot-on; + }; + + vneg { + regulator-name = "lcd_vneg"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + regulator-boot-on; + }; + }; + + backlight { + compatible = "ti,lm3631-backlight"; + + lcd_bl { + led-sources = <0 1>; + ramp-up-msec = <300>; + }; + }; +}; + +lm3632@11 { + compatible = "ti,lm3632"; + reg = <0x11>; + + enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>; /* PC2 */ + + regulators { + compatible = "ti,lm363x-regulator"; + + ti,lcm-en1-gpio = <&pioC 0 GPIO_ACTIVE_HIGH>; /* PC0 */ + ti,lcm-en2-gpio = <&pioC 1 GPIO_ACTIVE_HIGH>; /* PC1 */ + + vboost { + regulator-name = "lcd_boost"; + regulator-min-microvolt = <4500000>; + regulator-max-microvolt = <6400000>; + regulator-always-on; + }; + + vpos { + regulator-name = "lcd_vpos"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + }; + + vneg { + regulator-name = "lcd_vneg"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + }; + }; + + backlight { + compatible = "ti,lm3632-backlight"; + + pwms = <&pwm0 0 10000 0>; /* pwm number, period, polarity */ + pwm-names = "lmu-backlight"; + + lcd { + led-sources = <0 1>; + pwm-period = <10000>; + }; + }; +}; + +lm3633@36 { + compatible = "ti,lm3633"; + reg = <0x36>; + + enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>; + + backlight { + compatible = "ti,lm3633-backlight"; + + main { + label = "main_lcd"; + led-sources = <1 2>; + ramp-up-msec = <500>; + ramp-down-msec = <500>; + }; + + front { + label = "front_lcd"; + led-sources = <0>; + ramp-up-msec = <1000>; + ramp-down-msec = <0>; + }; + }; + + leds { + compatible = "ti,lm3633-leds"; + + chan1 { + label = "status"; + led-sources = <1>; + led-max-microamp = <6000>; + }; + + chan345 { + label = "rgb"; + led-sources = <3 4 5>; + led-max-microamp = <10000>; + }; + }; + + fault-monitor { + compatible = "ti,lm3633-fault-monitor"; + }; +}; + +lm3695@63 { + compatible = "ti,lm3695"; + reg = <0x63>; + + enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>; + + backlight { + compatible = "ti,lm3695-backlight"; + + lcd { + label = "bl"; + led-sources = <0 1>; + }; + }; +}; + +lm3697@36 { + compatible = "ti,lm3697"; + reg = <0x36>; + + enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>; + + backlight { + compatible = "ti,lm3697-backlight"; + + lcd { + led-sources = <0 1 2>; + ramp-up-msec = <200>; + ramp-down-msec = <200>; + }; + }; + + fault-monitor { + compatible = "ti,lm3697-fault-monitor"; + }; +}; diff --git a/Documentation/devicetree/bindings/mfd/wm831x.txt b/Documentation/devicetree/bindings/mfd/wm831x.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f8b7430673c922b2d7f857a8c558e8345a2e831 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/wm831x.txt @@ -0,0 +1,81 @@ +Cirrus Logic/Wolfson Microelectronics wm831x PMICs + +System PMICs with a wide range of additional features. + +Required properties: + + - compatible : One of the following chip-specific strings: + "wlf,wm8310" + "wlf,wm8311" + "wlf,wm8312" + "wlf,wm8320" + "wlf,wm8321" + "wlf,wm8325" + "wlf,wm8326" + + - reg : I2C slave address when connected using I2C, chip select number + when using SPI. + + - gpio-controller : Indicates this device is a GPIO controller. + - #gpio-cells : Must be 2. The first cell is the pin number and the + second cell is used to specify optional parameters (currently unused). + + - interrupts : The interrupt line the IRQ signal for the device is + connected to. + - interrupt-parent : The parent interrupt controller. + + - interrupt-controller : wm831x devices contain interrupt controllers and + may provide interrupt services to other devices. + - #interrupt-cells: Must be 2. The first cell is the IRQ number, and the + second cell is the flags, encoded as the trigger masks from + ../interrupt-controller/interrupts.txt + +Optional sub-nodes: + - regulators : Contains sub-nodes for each of the regulators supplied by + the device. The regulators are bound using their names listed below: + + dcdc1 : DCDC1 + dcdc2 : DCDC2 + dcdc3 : DCDC3 + dcdc4 : DCDC3 + isink1 : ISINK1 + isink2 : ISINK2 + ldo1 : LDO1 + ldo2 : LDO2 + ldo3 : LDO3 + ldo4 : LDO4 + ldo5 : LDO5 + ldo7 : LDO7 + ldo11 : LDO11 + + The bindings details of each regulator can be found in: + ../regulator/regulator.txt + +Example: + +wm8310: pmic@36 { + compatible = "wlf,wm8310"; + reg = <0x36>; + + gpio-controller; + #gpio-cells = <2>; + + interrupts = <347>; + interrupt-parent = <&gic>; + + interrupt-controller; + #interrupt-cells = <2>; + + regulators { + dcdc1: dcdc1 { + regulator-name = "DCDC1"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <600000>; + }; + ldo1: ldo1 { + regulator-name = "LDO1"; + regulator-min-microvolt = <1700000>; + regulator-max-microvolt = <1700000>; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt new file mode 100644 index 0000000000000000000000000000000000000000..d876580ae3b837a9174e454d223c6fa4df7ce94e --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt @@ -0,0 +1,23 @@ +Broadcom BCM2835 SDHOST controller + +This file documents differences between the core properties described +by mmc.txt and the properties that represent the BCM2835 controller. + +Required properties: +- compatible: Should be "brcm,bcm2835-sdhost". +- clocks: The clock feeding the SDHOST controller. + +Optional properties: +- dmas: DMA channel for read and write. + See Documentation/devicetree/bindings/dma/dma.txt for details + +Example: + +sdhost: mmc@7e202000 { + compatible = "brcm,bcm2835-sdhost"; + reg = <0x7e202000 0x100>; + interrupts = <2 24>; + clocks = <&clocks BCM2835_CLOCK_VPU>; + dmas = <&dma 13>; + dma-names = "rx-tx"; +}; diff --git a/Documentation/devicetree/bindings/mmc/cavium-mmc.txt b/Documentation/devicetree/bindings/mmc/cavium-mmc.txt new file mode 100644 index 0000000000000000000000000000000000000000..1433e6201dff3c1b4bb4a79f80b8ccf47a33fdaf --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/cavium-mmc.txt @@ -0,0 +1,57 @@ +* Cavium Octeon & ThunderX MMC controller + +The highspeed MMC host controller on Caviums SoCs provides an interface +for MMC and SD types of memory cards. + +Supported maximum speeds are the ones of the eMMC standard 4.41 as well +as the speed of SD standard 4.0. Only 3.3 Volt is supported. + +Required properties: + - compatible : should be one of: + cavium,octeon-6130-mmc + cavium,octeon-7890-mmc + cavium,thunder-8190-mmc + cavium,thunder-8390-mmc + mmc-slot + - reg : mmc controller base registers + - clocks : phandle + +Optional properties: + - for cd, bus-width and additional generic mmc parameters + please refer to mmc.txt within this directory + - cavium,cmd-clk-skew : number of coprocessor clocks before sampling command + - cavium,dat-clk-skew : number of coprocessor clocks before sampling data + +Deprecated properties: +- spi-max-frequency : use max-frequency instead +- cavium,bus-max-width : use bus-width instead +- power-gpios : use vmmc-supply instead +- cavium,octeon-6130-mmc-slot : use mmc-slot instead + +Examples: + mmc_1_4: mmc@1,4 { + compatible = "cavium,thunder-8390-mmc"; + reg = <0x0c00 0 0 0 0>; /* DEVFN = 0x0c (1:4) */ + #address-cells = <1>; + #size-cells = <0>; + clocks = <&sclk>; + + mmc-slot@0 { + compatible = "mmc-slot"; + reg = <0>; + vmmc-supply = <&mmc_supply_3v3>; + max-frequency = <42000000>; + bus-width = <4>; + cap-sd-highspeed; + }; + + mmc-slot@1 { + compatible = "mmc-slot"; + reg = <1>; + vmmc-supply = <&mmc_supply_3v3>; + max-frequency = <42000000>; + bus-width = <8>; + cap-mmc-highspeed; + non-removable; + }; + }; diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt new file mode 100644 index 0000000000000000000000000000000000000000..b878a1e305af6e143a646b3473be522c82a62e8a --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt @@ -0,0 +1,170 @@ +Marvell Xenon SDHCI Controller device tree bindings +This file documents differences between the core mmc properties +described by mmc.txt and the properties used by the Xenon implementation. + +Multiple SDHCs might be put into a single Xenon IP, to save size and cost. +Each SDHC is independent and owns independent resources, such as register sets, +clock and PHY. +Each SDHC should have an independent device tree node. + +Required Properties: +- compatible: should be one of the following + - "marvell,armada-3700-sdhci": For controllers on Armada-3700 SoC. + Must provide a second register area and marvell,pad-type. + - "marvell,armada-ap806-sdhci": For controllers on Armada AP806. + - "marvell,armada-cp110-sdhci": For controllers on Armada CP110. + +- clocks: + Array of clocks required for SDHC. + Require at least input clock for Xenon IP core. + +- clock-names: + Array of names corresponding to clocks property. + The input clock for Xenon IP core should be named as "core". + +- reg: + * For "marvell,armada-3700-sdhci", two register areas. + The first one for Xenon IP register. The second one for the Armada 3700 SoC + PHY PAD Voltage Control register. + Please follow the examples with compatible "marvell,armada-3700-sdhci" + in below. + Please also check property marvell,pad-type in below. + + * For other compatible strings, one register area for Xenon IP. + +Optional Properties: +- marvell,xenon-sdhc-id: + Indicate the corresponding bit index of current SDHC in + SDHC System Operation Control Register Bit[7:0]. + Set/clear the corresponding bit to enable/disable current SDHC. + If Xenon IP contains only one SDHC, this property is optional. + +- marvell,xenon-phy-type: + Xenon support multiple types of PHYs. + To select eMMC 5.1 PHY, set: + marvell,xenon-phy-type = "emmc 5.1 phy" + eMMC 5.1 PHY is the default choice if this property is not provided. + To select eMMC 5.0 PHY, set: + marvell,xenon-phy-type = "emmc 5.0 phy" + + All those types of PHYs can support eMMC, SD and SDIO. + Please note that this property only presents the type of PHY. + It doesn't stand for the entire SDHC type or property. + For example, "emmc 5.1 phy" doesn't mean that this Xenon SDHC only + supports eMMC 5.1. + +- marvell,xenon-phy-znr: + Set PHY ZNR value. + Only available for eMMC PHY. + Valid range = [0:0x1F]. + ZNR is set as 0xF by default if this property is not provided. + +- marvell,xenon-phy-zpr: + Set PHY ZPR value. + Only available for eMMC PHY. + Valid range = [0:0x1F]. + ZPR is set as 0xF by default if this property is not provided. + +- marvell,xenon-phy-nr-success-tun: + Set the number of required consecutive successful sampling points + used to identify a valid sampling window, in tuning process. + Valid range = [1:7]. + Set as 0x4 by default if this property is not provided. + +- marvell,xenon-phy-tun-step-divider: + Set the divider for calculating TUN_STEP. + Set as 64 by default if this property is not provided. + +- marvell,xenon-phy-slow-mode: + If this property is selected, transfers will bypass PHY. + Only available when bus frequency lower than 55MHz in SDR mode. + Disabled by default. Please only try this property if timing issues + always occur with PHY enabled in eMMC HS SDR, SD SDR12, SD SDR25, + SD Default Speed and HS mode and eMMC legacy speed mode. + +- marvell,xenon-tun-count: + Xenon SDHC SoC usually doesn't provide re-tuning counter in + Capabilities Register 3 Bit[11:8]. + This property provides the re-tuning counter. + If this property is not set, default re-tuning counter will + be set as 0x9 in driver. + +- marvell,pad-type: + Type of Armada 3700 SoC PHY PAD Voltage Controller register. + Only valid when "marvell,armada-3700-sdhci" is selected. + Two types: "sd" and "fixed-1-8v". + If "sd" is selected, SoC PHY PAD is set as 3.3V at the beginning and is + switched to 1.8V when later in higher speed mode. + If "fixed-1-8v" is selected, SoC PHY PAD is fixed 1.8V, such as for eMMC. + Please follow the examples with compatible "marvell,armada-3700-sdhci" + in below. + +Example: +- For eMMC: + + sdhci@aa0000 { + compatible = "marvell,armada-ap806-sdhci"; + reg = <0xaa0000 0x1000>; + interrupts = + clocks = <&emmc_clk>; + clock-names = "core"; + bus-width = <4>; + marvell,xenon-phy-slow-mode; + marvell,xenon-tun-count = <11>; + non-removable; + no-sd; + no-sdio; + + /* Vmmc and Vqmmc are both fixed */ + }; + +- For SD/SDIO: + + sdhci@ab0000 { + compatible = "marvell,armada-cp110-sdhci"; + reg = <0xab0000 0x1000>; + interrupts = + vqmmc-supply = <&sd_vqmmc_regulator>; + vmmc-supply = <&sd_vmmc_regulator>; + clocks = <&sdclk>; + clock-names = "core"; + bus-width = <4>; + marvell,xenon-tun-count = <9>; + }; + +- For eMMC with compatible "marvell,armada-3700-sdhci": + + sdhci@aa0000 { + compatible = "marvell,armada-3700-sdhci"; + reg = <0xaa0000 0x1000>, + ; + interrupts = + clocks = <&emmcclk>; + clock-names = "core"; + bus-width = <8>; + mmc-ddr-1_8v; + mmc-hs400-1_8v; + non-removable; + no-sd; + no-sdio; + + /* Vmmc and Vqmmc are both fixed */ + + marvell,pad-type = "fixed-1-8v"; + }; + +- For SD/SDIO with compatible "marvell,armada-3700-sdhci": + + sdhci@ab0000 { + compatible = "marvell,armada-3700-sdhci"; + reg = <0xab0000 0x1000>, + ; + interrupts = + vqmmc-supply = <&sd_regulator>; + /* Vmmc is fixed */ + clocks = <&sdclk>; + clock-names = "core"; + bus-width = <4>; + + marvell,pad-type = "sd"; + }; diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt index e25436861867f28f75a2450ecbf28d1fd2df3921..9029b45b8a22a8d5b6a1afa3184bf389a85202f4 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt +++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt @@ -18,6 +18,8 @@ Optional properties: "ext_clock" (External clock provided to the card). - post-power-on-delay-ms : Delay in ms after powering the card and de-asserting the reset-gpios (if any) +- power-off-delay-us : Delay in us after asserting the reset-gpios (if any) + during power off of the card. Example: diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt b/Documentation/devicetree/bindings/mmc/mtk-sd.txt index 0120c7f1109cb2cf830b002f9e77e774ddbdef79..4182ea36ca5b1ca2e1282f93a2d398cacb1fc357 100644 --- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt +++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt @@ -21,6 +21,15 @@ Optional properties: - assigned-clocks: PLL of the source clock - assigned-clock-parents: parent of source clock, used for HS400 mode to get 400Mhz source clock - hs400-ds-delay: HS400 DS delay setting +- mediatek,hs200-cmd-int-delay: HS200 command internal delay setting. + This field has total 32 stages. + The value is an integer from 0 to 31. +- mediatek,hs400-cmd-int-delay: HS400 command internal delay setting + This field has total 32 stages. + The value is an integer from 0 to 31. +- mediatek,hs400-cmd-resp-sel-rising: HS400 command response sample selection + If present,HS400 command responses are sampled on rising edges. + If not present,HS400 command responses are sampled on falling edges. Examples: mmc0: mmc@11230000 { @@ -38,4 +47,7 @@ mmc0: mmc@11230000 { assigned-clocks = <&topckgen CLK_TOP_MSDC50_0_SEL>; assigned-clock-parents = <&topckgen CLK_TOP_MSDCPLL_D2>; hs400-ds-delay = <0x14015>; + mediatek,hs200-cmd-int-delay = <26>; + mediatek,hs400-cmd-int-delay = <14>; + mediatek,hs400-cmd-resp-sel-rising; }; diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt index 15b8368ee1f260a4f55d4226b6fccfea40e46162..9bce57862ed6ead2e693ee5bc2bcb6ef58f4c9f5 100644 --- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt @@ -7,11 +7,13 @@ This file documents differences between the core properties described by mmc.txt and the properties used by the sdhci-tegra driver. Required properties: -- compatible : For Tegra20, must contain "nvidia,tegra20-sdhci". - For Tegra30, must contain "nvidia,tegra30-sdhci". For Tegra114, - must contain "nvidia,tegra114-sdhci". For Tegra124, must contain - "nvidia,tegra124-sdhci". Otherwise, must contain "nvidia,-sdhci", - plus one of the above, where is tegra132 or tegra210. +- compatible : should be one of: + - "nvidia,tegra20-sdhci": for Tegra20 + - "nvidia,tegra30-sdhci": for Tegra30 + - "nvidia,tegra114-sdhci": for Tegra114 + - "nvidia,tegra124-sdhci": for Tegra124 and Tegra132 + - "nvidia,tegra210-sdhci": for Tegra210 + - "nvidia,tegra186-sdhci": for Tegra186 - clocks : Must contain one entry, for the module clock. See ../clocks/clock-bindings.txt for details. - resets : Must contain an entry for each entry in reset-names. diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt index e4ba92aa035e2314255037ae5ef844ea34d3a406..c32dc5a9dbe6cf2e434e049c4149ff0c2c3e1f4f 100644 --- a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt +++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt @@ -8,6 +8,7 @@ Required properties: - compatible: should be "renesas,mmcif-", "renesas,sh-mmcif" as a fallback. Examples with are: + - "renesas,mmcif-r7s72100" for the MMCIF found in r7s72100 SoCs - "renesas,mmcif-r8a73a4" for the MMCIF found in r8a73a4 SoCs - "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs - "renesas,mmcif-r8a7778" for the MMCIF found in r8a7778 SoCs @@ -17,6 +18,13 @@ Required properties: - "renesas,mmcif-r8a7794" for the MMCIF found in r8a7794 SoCs - "renesas,mmcif-sh73a0" for the MMCIF found in sh73a0 SoCs +- interrupts: Some SoCs have only 1 shared interrupt, while others have either + 2 or 3 individual interrupts (error, int, card detect). Below is the number + of interrupts for each SoC: + 1: r8a73a4, r8a7778, r8a7790, r8a7791, r8a7793, r8a7794 + 2: r8a7740, sh73a0 + 3: r7s72100 + - clocks: reference to the functional clock - dmas: reference to the DMA channels, one per channel name listed in the diff --git a/Documentation/devicetree/bindings/mmc/samsung,s3cmci.txt b/Documentation/devicetree/bindings/mmc/samsung,s3cmci.txt new file mode 100644 index 0000000000000000000000000000000000000000..5f68feb9f9d61f2bd0cadfe38e8b5cd6d834cbec --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/samsung,s3cmci.txt @@ -0,0 +1,42 @@ +* Samsung's S3C24XX MMC/SD/SDIO controller device tree bindings + +Samsung's S3C24XX MMC/SD/SDIO controller is used as a connectivity interface +with external MMC, SD and SDIO storage mediums. + +This file documents differences between the core mmc properties described by +mmc.txt and the properties used by the Samsung S3C24XX MMC/SD/SDIO controller +implementation. + +Required SoC Specific Properties: +- compatible: should be one of the following + - "samsung,s3c2410-sdi": for controllers compatible with s3c2410 + - "samsung,s3c2412-sdi": for controllers compatible with s3c2412 + - "samsung,s3c2440-sdi": for controllers compatible with s3c2440 +- reg: register location and length +- interrupts: mmc controller interrupt +- clocks: Should reference the controller clock +- clock-names: Should contain "sdi" + +Required Board Specific Properties: +- pinctrl-0: Should specify pin control groups used for this controller. +- pinctrl-names: Should contain only one value - "default". + +Optional Properties: +- bus-width: number of data lines (see mmc.txt) +- cd-gpios: gpio for card detection (see mmc.txt) +- wp-gpios: gpio for write protection (see mmc.txt) + +Example: + + mmc0: mmc@5a000000 { + compatible = "samsung,s3c2440-sdi"; + pinctrl-names = "default"; + pinctrl-0 = <&sdi_pins>; + reg = <0x5a000000 0x100000>; + interrupts = <0 0 21 3>; + clocks = <&clocks PCLK_SDI>; + clock-names = "sdi"; + bus-width = <4>; + cd-gpios = <&gpg 8 GPIO_ACTIVE_LOW>; + wp-gpios = <&gph 8 GPIO_ACTIVE_LOW>; + }; diff --git a/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt b/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt index c0f37cb41a9b47516f29cafa573d7886d103a2a7..fa423c2778535bb50b89f805d1a295d1babbe210 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt +++ b/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt @@ -19,6 +19,53 @@ if supported. See mmc.txt for details. - mmc-hs400-1_8v - mmc-hs400-1_2v +Some PHY delays can be configured by following properties. +PHY DLL input delays: +They are used to delay the data valid window, and align the window +to sampling clock. The delay starts from 5ns (for delay parameter equal to 0) +and it is increased by 2.5ns in each step. +- cdns,phy-input-delay-sd-highspeed: + Value of the delay in the input path for SD high-speed timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-legacy: + Value of the delay in the input path for legacy timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-sd-uhs-sdr12: + Value of the delay in the input path for SD UHS SDR12 timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-sd-uhs-sdr25: + Value of the delay in the input path for SD UHS SDR25 timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-sd-uhs-sdr50: + Value of the delay in the input path for SD UHS SDR50 timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-sd-uhs-ddr50: + Value of the delay in the input path for SD UHS DDR50 timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-mmc-highspeed: + Value of the delay in the input path for MMC high-speed timing + Valid range = [0:0x1F]. +- cdns,phy-input-delay-mmc-ddr: + Value of the delay in the input path for eMMC high-speed DDR timing + Valid range = [0:0x1F]. + +PHY DLL clock delays: +Each delay property represents the fraction of the clock period. +The approximate delay value will be +(/128)*sdmclk_clock_period. +- cdns,phy-dll-delay-sdclk: + Value of the delay introduced on the sdclk output + for all modes except HS200, HS400 and HS400_ES. + Valid range = [0:0x7F]. +- cdns,phy-dll-delay-sdclk-hsmmc: + Value of the delay introduced on the sdclk output + for HS200, HS400 and HS400_ES speed modes. + Valid range = [0:0x7F]. +- cdns,phy-dll-delay-strobe: + Value of the delay introduced on the dat_strobe input + used in HS400 / HS400_ES speed modes. + Valid range = [0:0x7F]. + Example: emmc: sdhci@5a000000 { compatible = "socionext,uniphier-sd4hc", "cdns,sd4hc"; @@ -29,4 +76,5 @@ Example: mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-hs400-1_8v; + cdns,phy-dll-delay-sdclk = <0>; }; diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt index 3e7ee99d3949ab07b5515e3afff350aebc2e0e71..f6bee57e453aa6dcb9114a188538e1c69860b4b2 100644 --- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt +++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -1,4 +1,109 @@ -Atmel NAND flash +Atmel NAND flash controller bindings + +The NAND flash controller node should be defined under the EBI bus (see +Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt). +One or several NAND devices can be defined under this NAND controller. +The NAND controller might be connected to an ECC engine. + +* NAND controller bindings: + +Required properties: +- compatible: should be one of the following + "atmel,at91rm9200-nand-controller" + "atmel,at91sam9260-nand-controller" + "atmel,at91sam9261-nand-controller" + "atmel,at91sam9g45-nand-controller" + "atmel,sama5d3-nand-controller" +- ranges: empty ranges property to forward EBI ranges definitions. +- #address-cells: should be set to 2. +- #size-cells: should be set to 1. +- atmel,nfc-io: phandle to the NFC IO block. Only required for sama5d3 + controllers. +- atmel,nfc-sram: phandle to the NFC SRAM block. Only required for sama5d3 + controllers. + +Optional properties: +- ecc-engine: phandle to the PMECC block. Only meaningful if the SoC embeds + a PMECC engine. + +* NAND device/chip bindings: + +Required properties: +- reg: describes the CS lines assigned to the NAND device. If the NAND device + exposes multiple CS lines (multi-dies chips), your reg property will + contain X tuples of 3 entries. + 1st entry: the CS line this NAND chip is connected to + 2nd entry: the base offset of the memory region assigned to this + device (always 0) + 3rd entry: the memory region size (always 0x800000) + +Optional properties: +- rb-gpios: the GPIO(s) used to check the Ready/Busy status of the NAND. +- cs-gpios: the GPIO(s) used to control the CS line. +- det-gpios: the GPIO used to detect if a Smartmedia Card is present. +- atmel,rb: an integer identifying the native Ready/Busy pin. Only meaningful + on sama5 SoCs. + +All generic properties described in +Documentation/devicetree/bindings/mtd/{common,nand}.txt also apply to the NAND +device node, and NAND partitions should be defined under the NAND node as +described in Documentation/devicetree/bindings/mtd/partition.txt. + +* ECC engine (PMECC) bindings: + +Required properties: +- compatible: should be one of the following + "atmel,at91sam9g45-pmecc" + "atmel,sama5d4-pmecc" + "atmel,sama5d2-pmecc" +- reg: should contain 2 register ranges. The first one is pointing to the PMECC + block, and the second one to the PMECC_ERRLOC block. + +Example: + + pmecc: ecc-engine@ffffc070 { + compatible = "atmel,at91sam9g45-pmecc"; + reg = <0xffffc070 0x490>, + <0xffffc500 0x100>; + }; + + ebi: ebi@10000000 { + compatible = "atmel,sama5d3-ebi"; + #address-cells = <2>; + #size-cells = <1>; + atmel,smc = <&hsmc>; + reg = <0x10000000 0x10000000 + 0x40000000 0x30000000>; + ranges = <0x0 0x0 0x10000000 0x10000000 + 0x1 0x0 0x40000000 0x10000000 + 0x2 0x0 0x50000000 0x10000000 + 0x3 0x0 0x60000000 0x10000000>; + clocks = <&mck>; + + nand_controller: nand-controller { + compatible = "atmel,sama5d3-nand-controller"; + atmel,nfc-sram = <&nfc_sram>; + atmel,nfc-io = <&nfc_io>; + ecc-engine = <&pmecc>; + #address-cells = <2>; + #size-cells = <1>; + ranges; + + nand@3 { + reg = <0x3 0x0 0x800000>; + atmel,rb = <0>; + + /* + * Put generic NAND/MTD properties and + * subnodes here. + */ + }; + }; + }; + +----------------------------------------------------------------------- + +Deprecated bindings (should not be used in new device trees): Required properties: - compatible: The possible values are: diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt index b04d03a1d49951a0fe6fbecde446fb523919f811..e593bbeb2115deb92966d593c18f002e5f269e41 100644 --- a/Documentation/devicetree/bindings/mtd/denali-nand.txt +++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt @@ -1,11 +1,11 @@ * Denali NAND controller Required properties: - - compatible : should be "denali,denali-nand-dt" + - compatible : should be one of the following: + "altr,socfpga-denali-nand" - for Altera SOCFPGA - reg : should contain registers location and length for data and reg. - reg-names: Should contain the reg names "nand_data" and "denali_reg" - interrupts : The interrupt number. - - dm-mask : DMA bit mask The device tree may optionally contain sub-nodes describing partitions of the address space. See partition.txt for more detail. @@ -15,9 +15,8 @@ Examples: nand: nand@ff900000 { #address-cells = <1>; #size-cells = <1>; - compatible = "denali,denali-nand-dt"; + compatible = "altr,socfpga-denali-nand"; reg = <0xff900000 0x100000>, <0xffb80000 0x10000>; reg-names = "nand_data", "denali_reg"; interrupts = <0 144 4>; - dma-mask = <0xffffffff>; }; diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt index af8915b41ccf4962f9aa3a232b5bd268910ae3b7..486a17d533d7a33cce0ac8814a51979ebc7bc0ca 100644 --- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt +++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt @@ -12,7 +12,7 @@ Required properties: - #address-cells, #size-cells : Must be present if the device has sub-nodes representing partitions. - gpios : Specifies the GPIO pins to control the NAND device. The order of - GPIO references is: RDY, nCE, ALE, CLE, and an optional nWP. + GPIO references is: RDY, nCE, ALE, CLE, and nWP. nCE and nWP are optional. Optional properties: - bank-width : Width (in bytes) of the device. If not present, the width @@ -36,7 +36,7 @@ gpio-nand@1,0 { #address-cells = <1>; #size-cells = <1>; gpios = <&banka 1 0>, /* RDY */ - <&banka 2 0>, /* nCE */ + <0>, /* nCE */ <&banka 3 0>, /* ALE */ <&banka 4 0>, /* CLE */ <0>; /* nWP */ diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt index 3e920ec5c4d36d48148e069ccb9de617dbee2326..9ce35af8507c1da0a208fc1823b5d1d487909114 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt @@ -40,6 +40,7 @@ Required properties: w25x80 w25x32 w25q32 + w25q64 w25q32dw w25q80bl w25q128 diff --git a/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt new file mode 100644 index 0000000000000000000000000000000000000000..ddd18c1351486d579606123aa900683848c29e65 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/stm32-quadspi.txt @@ -0,0 +1,43 @@ +* STMicroelectronics Quad Serial Peripheral Interface(QuadSPI) + +Required properties: +- compatible: should be "st,stm32f469-qspi" +- reg: the first contains the register location and length. + the second contains the memory mapping address and length +- reg-names: should contain the reg names "qspi" "qspi_mm" +- interrupts: should contain the interrupt for the device +- clocks: the phandle of the clock needed by the QSPI controller +- A pinctrl must be defined to set pins in mode of operation for QSPI transfer + +Optional properties: +- resets: must contain the phandle to the reset controller. + +A spi flash must be a child of the nor_flash node and could have some +properties. Also see jedec,spi-nor.txt. + +Required properties: +- reg: chip-Select number (QSPI controller may connect 2 nor flashes) +- spi-max-frequency: max frequency of spi bus + +Optional property: +- spi-rx-bus-width: see ../spi/spi-bus.txt for the description + +Example: + +qspi: spi@a0001000 { + compatible = "st,stm32f469-qspi"; + reg = <0xa0001000 0x1000>, <0x90000000 0x10000000>; + reg-names = "qspi", "qspi_mm"; + interrupts = <91>; + resets = <&rcc STM32F4_AHB3_RESET(QSPI)>; + clocks = <&rcc 0 STM32F4_AHB3_CLOCK(QSPI)>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_qspi0>; + + flash@0 { + reg = <0>; + spi-rx-bus-width = <4>; + spi-max-frequency = <108000000>; + ... + }; +}; diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt index 10587bdadbbe5a8e8fe703e23c194420e3701f9f..26c77d985fafe06092467c5a2ea8a0e176d0d460 100644 --- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt +++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt @@ -2,11 +2,14 @@ Required properties: - compatible: should contain one of "brcm,genet-v1", "brcm,genet-v2", - "brcm,genet-v3", "brcm,genet-v4". + "brcm,genet-v3", "brcm,genet-v4", "brcm,genet-v5". - reg: address and length of the register set for the device -- interrupts: must be two cells, the first cell is the general purpose - interrupt line, while the second cell is the interrupt for the ring - RX and TX queues operating in ring mode +- interrupts and/or interrupts-extended: must be two cells, the first cell + is the general purpose interrupt line, while the second cell is the + interrupt for the ring RX and TX queues operating in ring mode. An + optional third interrupt cell for Wake-on-LAN can be specified. + See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt + for information on the property specifics. - phy-mode: see ethernet.txt file in the same directory - #address-cells: should be 1 - #size-cells: should be 1 @@ -29,15 +32,15 @@ Optional properties: Required child nodes: -- mdio bus node: this node should always be present regarless of the PHY +- mdio bus node: this node should always be present regardless of the PHY configuration of the GENET instance MDIO bus node required properties: - compatible: should contain one of "brcm,genet-mdio-v1", "brcm,genet-mdio-v2" - "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", the version has to match the - parent node compatible property (e.g: brcm,genet-v4 pairs with - brcm,genet-mdio-v4) + "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", "brcm,genet-mdio-v5", the version + has to match the parent node compatible property (e.g: brcm,genet-v4 pairs + with brcm,genet-mdio-v4) - reg: address and length relative to the parent node base register address - #address-cells: address cell for MDIO bus addressing, should be 1 - #size-cells: size of the cells for MDIO bus addressing, should be 0 diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt index ab0bb4247d14950959044cc138e71e7c736f85b9..4648948f7c3b8f26391292b06aa01743100e8796 100644 --- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt +++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt @@ -2,8 +2,9 @@ Required properties: - compatible: should one from "brcm,genet-mdio-v1", "brcm,genet-mdio-v2", - "brcm,genet-mdio-v3", "brcm,genet-mdio-v4" or "brcm,unimac-mdio" -- reg: address and length of the regsiter set for the device, first one is the + "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", "brcm,genet-mdio-v5" or + "brcm,unimac-mdio" +- reg: address and length of the register set for the device, first one is the base register, and the second one is optional and for indirect accesses to larger than 16-bits MDIO transactions - reg-names: name(s) of the register must be "mdio" and optional "mdio_indir_rw" diff --git a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt new file mode 100644 index 0000000000000000000000000000000000000000..23aa94eab2076f142dea3a0c878aebb15d46d7fd --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt @@ -0,0 +1,24 @@ +* Holt HI-311X stand-alone CAN controller device tree bindings + +Required properties: + - compatible: Should be one of the following: + - "holt,hi3110" for HI-3110 + - reg: SPI chip select. + - clocks: The clock feeding the CAN controller. + - interrupt-parent: The parent interrupt controller. + - interrupts: Should contain IRQ line for the CAN controller. + +Optional properties: + - vdd-supply: Regulator that powers the CAN controller. + - xceiver-supply: Regulator that powers the CAN transceiver. + +Example: + can0: can@1 { + compatible = "holt,hi3110"; + reg = <1>; + clocks = <&clk32m>; + interrupt-parent = <&gpio4>; + interrupts = <13 IRQ_TYPE_EDGE_RISING>; + vdd-supply = <®5v0>; + xceiver-supply = <®5v0>; + }; diff --git a/Documentation/devicetree/bindings/net/can/ti_hecc.txt b/Documentation/devicetree/bindings/net/can/ti_hecc.txt new file mode 100644 index 0000000000000000000000000000000000000000..e0f0a7cfe3293893ae42fae3ca14571c29288a01 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/ti_hecc.txt @@ -0,0 +1,32 @@ +Texas Instruments High End CAN Controller (HECC) +================================================ + +This file provides information, what the device node +for the hecc interface contains. + +Required properties: +- compatible: "ti,am3517-hecc" +- reg: addresses and lengths of the register spaces for 'hecc', 'hecc-ram' + and 'mbx' +- reg-names :"hecc", "hecc-ram", "mbx" +- interrupts: interrupt mapping for the hecc interrupts sources +- clocks: clock phandles (see clock bindings for details) + +Optional properties: +- ti,use-hecc1int: if provided configures HECC to produce all interrupts + on HECC1INT interrupt line. By default HECC0INT interrupt + line will be used. +- xceiver-supply: regulator that powers the CAN transceiver + +Example: + +For am3517evm board: + hecc: can@5c050000 { + compatible = "ti,am3517-hecc"; + reg = <0x5c050000 0x80>, + <0x5c053000 0x180>, + <0x5c052000 0x200>; + reg-names = "hecc", "hecc-ram", "mbx"; + interrupts = <24>; + clocks = <&hecc_ck>; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/lan9303.txt b/Documentation/devicetree/bindings/net/dsa/lan9303.txt new file mode 100644 index 0000000000000000000000000000000000000000..04f2965a446768920ae90f846419d438773a633d --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/lan9303.txt @@ -0,0 +1,105 @@ +SMSC/MicroChip LAN9303 three port ethernet switch +------------------------------------------------- + +Required properties: + +- compatible: should be + - "smsc,lan9303-i2c" for I2C managed mode + or + - "smsc,lan9303-mdio" for mdio managed mode + +Optional properties: + +- reset-gpios: GPIO to be used to reset the whole device +- reset-duration: reset duration in milliseconds, defaults to 200 ms + +Subnodes: + +The integrated switch subnode should be specified according to the binding +described in dsa/dsa.txt. The CPU port of this switch is always port 0. + +Note: always use 'reg = <0/1/2>;' for the three DSA ports, even if the device is +configured to use 1/2/3 instead. This hardware configuration will be +auto-detected and mapped accordingly. + +Example: + +I2C managed mode: + + master: masterdevice@X { + status = "okay"; + + fixed-link { /* RMII fixed link to LAN9303 */ + speed = <100>; + full-duplex; + }; + }; + + switch: switch@a { + compatible = "smsc,lan9303-i2c"; + reg = <0xa>; + status = "okay"; + reset-gpios = <&gpio7 6 GPIO_ACTIVE_LOW>; + reset-duration = <200>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { /* RMII fixed link to master */ + reg = <0>; + label = "cpu"; + ethernet = <&master>; + }; + + port@1 { /* external port 1 */ + reg = <1>; + label = "lan1; + }; + + port@2 { /* external port 2 */ + reg = <2>; + label = "lan2"; + }; + }; + }; + +MDIO managed mode: + + master: masterdevice@X { + status = "okay"; + phy-handle = <&switch>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch: switch-phy@0 { + compatible = "smsc,lan9303-mdio"; + reg = <0>; + reset-gpios = <&gpio7 6 GPIO_ACTIVE_LOW>; + reset-duration = <100>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&master>; + }; + + port@1 { /* external port 1 */ + reg = <1>; + label = "lan1; + }; + + port@2 { /* external port 2 */ + reg = <2>; + label = "lan2"; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt index 7ef9dbb08957a593528a4d873d1f3af29892f5c2..1d4d0f49c9d06eb66d9957fb0661cec35ddc7af9 100644 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -26,6 +26,10 @@ Optional properties: - interrupt-controller : Indicates the switch is itself an interrupt controller. This is used for the PHY interrupts. #interrupt-cells = <2> : Controller uses two cells, number and flag +- eeprom-length : Set to the length of an EEPROM connected to the + switch. Must be set if the switch can not detect + the presence and/or size of a connected EEPROM, + otherwise optional. - mdio : Container of PHY and devices on the switches MDIO bus. - mdio? : Container of PHYs and devices on the external MDIO diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt new file mode 100644 index 0000000000000000000000000000000000000000..a9bc27b93ee326fe2ac665ffc143b0e3e345c0a0 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt @@ -0,0 +1,92 @@ +Mediatek MT7530 Ethernet switch +================================ + +Required properties: + +- compatible: Must be compatible = "mediatek,mt7530"; +- #address-cells: Must be 1. +- #size-cells: Must be 0. +- mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part + on multi-chip module belong to MT7623A has or the remotely standalone + chip as the function MT7623N reference board provided for. +- core-supply: Phandle to the regulator node necessary for the core power. +- io-supply: Phandle to the regulator node necessary for the I/O power. + See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt + for details for the regulator setup on these boards. + +If the property mediatek,mcm isn't defined, following property is required + +- reset-gpios: Should be a gpio specifier for a reset line. + +Else, following properties are required + +- resets : Phandle pointing to the system reset controller with + line index for the ethsys. +- reset-names : Should be set to "mcm". + +Required properties for the child nodes within ports container: + +- reg: Port address described must be 6 for CPU port and from 0 to 5 for + user ports. +- phy-mode: String, must be either "trgmii" or "rgmii" for port labeled + "cpu". + +See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional +required, optional properties and how the integrated switch subnodes must +be specified. + +Example: + + &mdio0 { + switch@0 { + compatible = "mediatek,mt7530"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + core-supply = <&mt6323_vpa_reg>; + io-supply = <&mt6323_vemc3v3_reg>; + reset-gpios = <&pio 33 0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "wan"; + }; + + port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "trgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/faraday,ftmac.txt b/Documentation/devicetree/bindings/net/faraday,ftmac.txt new file mode 100644 index 0000000000000000000000000000000000000000..be4f55e23bf778297f6e813f1889f205219038fe --- /dev/null +++ b/Documentation/devicetree/bindings/net/faraday,ftmac.txt @@ -0,0 +1,24 @@ +Faraday Ethernet Controller + +Required properties: + +- compatible : Must contain "faraday,ftmac", as well as one of + the SoC specific identifiers: + "andestech,atmac100" + "moxa,moxart-mac" +- reg : Should contain register location and length +- interrupts : Should contain the mac interrupt number + +Example: + + mac0: mac@90900000 { + compatible = "moxa,moxart-mac"; + reg = <0x90900000 0x100>; + interrupts = <25 0>; + }; + + mac1: mac@92000000 { + compatible = "moxa,moxart-mac"; + reg = <0x92000000 0x100>; + interrupts = <27 0>; + }; diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index a1e3693cca1601e47096dc2edc2a9580b8b0569a..6f55bdd52f8a99be3c0741cf6411a00554c85778 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -15,6 +15,10 @@ Optional properties: - phy-reset-active-high : If present then the reset sequence using the GPIO specified in the "phy-reset-gpios" property is reversed (H=reset state, L=operation state). +- phy-reset-post-delay : Post reset delay in milliseconds. If present then + a delay of phy-reset-post-delay milliseconds will be observed after the + phy-reset-gpios has been toggled. Can be omitted thus no delay is + observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. - phy-supply : regulator that powers the Ethernet PHY. - phy-handle : phandle to the PHY device connected to this device. - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. diff --git a/Documentation/devicetree/bindings/net/ftgmac100.txt b/Documentation/devicetree/bindings/net/ftgmac100.txt new file mode 100644 index 0000000000000000000000000000000000000000..c1ce1680246f0134f08a8b1957d29d0f49f655ca --- /dev/null +++ b/Documentation/devicetree/bindings/net/ftgmac100.txt @@ -0,0 +1,35 @@ +* Faraday Technology FTGMAC100 gigabit ethernet controller + +Required properties: +- compatible: "faraday,ftgmac100" + + Must also contain one of these if used as part of an Aspeed AST2400 + or 2500 family SoC as they have some subtle tweaks to the + implementation: + + - "aspeed,ast2400-mac" + - "aspeed,ast2500-mac" + +- reg: Address and length of the register set for the device +- interrupts: Should contain ethernet controller interrupt + +Optional properties: +- phy-mode: See ethernet.txt file in the same directory. If the property is + absent, "rgmii" is assumed. Supported values are "rgmii*" and "rmii" for + aspeed parts. Other (unknown) parts will accept any value. +- use-ncsi: Use the NC-SI stack instead of an MDIO PHY. Currently assumes + rmii (100bT) but kept as a separate property in case NC-SI grows support + for a gigabit link. +- no-hw-checksum: Used to disable HW checksum support. Here for backward + compatibility as the driver now should have correct defaults based on + the SoC. + +Example: + + mac0: ethernet@1e660000 { + compatible = "aspeed,ast2500-mac", "faraday,ftgmac100"; + reg = <0x1e660000 0x180>; + interrupts = <2>; + status = "okay"; + use-ncsi; + }; diff --git a/Documentation/devicetree/bindings/net/ieee802154/ca8210.txt b/Documentation/devicetree/bindings/net/ieee802154/ca8210.txt new file mode 100644 index 0000000000000000000000000000000000000000..a1046e636fa14b494963f6770ffa578359dcd224 --- /dev/null +++ b/Documentation/devicetree/bindings/net/ieee802154/ca8210.txt @@ -0,0 +1,28 @@ +* CA8210 IEEE 802.15.4 * + +Required properties: + - compatible: Should be "cascoda,ca8210" + - reg: Controlling chip select + - spi-max-frequency: Maximum clock speed, should be *less than* + 4000000 + - spi-cpol: Requires inverted clock polarity + - reset-gpio: GPIO attached to reset + - irq-gpio: GPIO attached to IRQ +Optional properties: + - extclock-enable: Include for the ca8210 to route its 16MHz clock + to an output + - extclock-freq: Frequency in Hz of the external clock + - extclock-gpio: GPIO of the ca8210 to output the clock on + +Example: + ca8210@0 { + compatible = "cascoda,ca8210"; + reg = <0>; + spi-max-frequency = <3000000>; + spi-cpol; + reset-gpio = <&gpio1 1 GPIO_ACTIVE_HIGH>; + irq-gpio = <&gpio1 2 GPIO_ACTIVE_HIGH>; + extclock-enable; + extclock-freq = 16000000; + extclock-gpio = 2; + }; diff --git a/Documentation/devicetree/bindings/net/marvell,prestera.txt b/Documentation/devicetree/bindings/net/marvell,prestera.txt index 5fbab29718e8ee087ffd2a3d6dc13e973d1bf48d..c329608fa887b1fad655f7c778ca539b4c5d36c6 100644 --- a/Documentation/devicetree/bindings/net/marvell,prestera.txt +++ b/Documentation/devicetree/bindings/net/marvell,prestera.txt @@ -32,19 +32,16 @@ DFX Server bindings ------------------- Required properties: -- compatible: must be "marvell,dfx-server" +- compatible: must be "marvell,dfx-server", "simple-bus" +- ranges: describes the address mapping of a memory-mapped bus. - reg: address and length of the register set for the device. Example: -dfx-registers { - compatible = "simple-bus"; +dfx-server { + compatible = "marvell,dfx-server", "simple-bus"; #address-cells = <1>; #size-cells = <1>; ranges = <0 MBUS_ID(0x08, 0x00) 0 0x100000>; - - dfx: dfx@0 { - compatible = "marvell,dfx-server"; - reg = <0 0x100000>; - }; + reg = ; }; diff --git a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt index 9417e54c26c0eba82004047d06fda4f3dd4d02b8..ccdabdcc8618c24e6d6801845aee62c54306466c 100644 --- a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt +++ b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt @@ -7,17 +7,20 @@ interface. Required properties: - compatible: "marvell,orion-mdio" -- reg: address and length of the SMI register +- reg: address and length of the MDIO registers. When an interrupt is + not present, the length is the size of the SMI register (4 bytes) + otherwise it must be 0x84 bytes to cover the interrupt control + registers. Optional properties: - interrupts: interrupt line number for the SMI error/done interrupt -- clocks: Phandle to the clock control device and gate bit +- clocks: phandle for up to three required clocks for the MDIO instance The child nodes of the MDIO driver are the individual PHY devices connected to this MDIO bus. They must have a "reg" property given the PHY address on the MDIO bus. -Example at the SoC level: +Example at the SoC level without an interrupt property: mdio { #address-cells = <1>; @@ -26,6 +29,16 @@ mdio { reg = <0xd0072004 0x4>; }; +Example with an interrupt property: + +mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "marvell,orion-mdio"; + reg = <0xd0072004 0x84>; + interrupts = <30>; +}; + And at the board level: mdio { diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt index 4754364df4c66adfc53e2546e31e0d124070dca1..6b4956beff8c42c3214906c83d94072fca8e9083 100644 --- a/Documentation/devicetree/bindings/net/marvell-pp2.txt +++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt @@ -1,17 +1,28 @@ -* Marvell Armada 375 Ethernet Controller (PPv2) +* Marvell Armada 375 Ethernet Controller (PPv2.1) + Marvell Armada 7K/8K Ethernet Controller (PPv2.2) Required properties: -- compatible: should be "marvell,armada-375-pp2" +- compatible: should be one of: + "marvell,armada-375-pp2" + "marvell,armada-7k-pp2" - reg: addresses and length of the register sets for the device. - Must contain the following register sets: + For "marvell,armada-375-pp2", must contain the following register + sets: - common controller registers - LMS registers - In addition, at least one port register set is required. -- clocks: a pointer to the reference clocks for this device, consequently: - - main controller clock - - GOP clock -- clock-names: names of used clocks, must be "pp_clk" and "gop_clk". + - one register area per Ethernet port + For "marvell,armada-7k-pp2", must contain the following register + sets: + - packet processor registers + - networking interfaces registers + +- clocks: pointers to the reference clocks for this device, consequently: + - main controller clock (for both armada-375-pp2 and armada-7k-pp2) + - GOP clock (for both armada-375-pp2 and armada-7k-pp2) + - MG clock (only for armada-7k-pp2) +- clock-names: names of used clocks, must be "pp_clk", "gop_clk" and + "mg_clk" (the latter only for armada-7k-pp2). The ethernet ports are represented by subnodes. At least one port is required. @@ -19,8 +30,10 @@ required. Required properties (port): - interrupts: interrupt for the port -- port-id: should be '0' or '1' for ethernet ports, and '2' for the - loopback port +- port-id: ID of the port from the MAC point of view +- gop-port-id: only for marvell,armada-7k-pp2, ID of the port from the + GOP (Group Of Ports) point of view. This ID is used to index the + per-port registers in the second register area. - phy-mode: See ethernet.txt file in the same directory Optional properties (port): @@ -29,7 +42,7 @@ Optional properties (port): - phy: a phandle to a phy node defining the PHY address (as the reg property, a single integer). -Example: +Example for marvell,armada-375-pp2: ethernet@f0000 { compatible = "marvell,armada-375-pp2"; @@ -57,3 +70,30 @@ ethernet@f0000 { phy-mode = "gmii"; }; }; + +Example for marvell,armada-7k-pp2: + +cpm_ethernet: ethernet@0 { + compatible = "marvell,armada-7k-pp22"; + reg = <0x0 0x100000>, <0x129000 0xb000>; + clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>, <&cpm_syscon0 1 5>; + clock-names = "pp_clk", "gop_clk", "gp_clk"; + + eth0: eth0 { + interrupts = ; + port-id = <0>; + gop-port-id = <0>; + }; + + eth1: eth1 { + interrupts = ; + port-id = <1>; + gop-port-id = <2>; + }; + + eth2: eth2 { + interrupts = ; + port-id = <2>; + gop-port-id = <3>; + }; +}; diff --git a/Documentation/devicetree/bindings/net/mdio.txt b/Documentation/devicetree/bindings/net/mdio.txt new file mode 100644 index 0000000000000000000000000000000000000000..96a53f89aa6e2fa7f9a0d4c6c3b18d03a3b75994 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mdio.txt @@ -0,0 +1,37 @@ +Common MDIO bus properties. + +These are generic properties that can apply to any MDIO bus. + +Optional properties: +- reset-gpios: One GPIO that control the RESET lines of all PHYs on that MDIO + bus. +- reset-delay-us: RESET pulse width in microseconds. + +A list of child nodes, one per device on the bus is expected. These +should follow the generic phy.txt, or a device specific binding document. + +The 'reset-delay-us' indicates the RESET signal pulse width in microseconds and +applies to all PHY devices. It must therefore be appropriately determined based +on all PHY requirements (maximum value of all per-PHY RESET pulse widths). + +Example : +This example shows these optional properties, plus other properties +required for the TI Davinci MDIO driver. + + davinci_mdio: ethernet@0x5c030000 { + compatible = "ti,davinci_mdio"; + reg = <0x5c030000 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + + reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>; + reset-delay-us = <2>; + + ethphy0: ethernet-phy@1 { + reg = <1>; + }; + + ethphy1: ethernet-phy@3 { + reg = <3>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/moxa,moxart-mac.txt b/Documentation/devicetree/bindings/net/moxa,moxart-mac.txt deleted file mode 100644 index 583418b2c127ebe74f2f8e904ea2aa8258f458d3..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/net/moxa,moxart-mac.txt +++ /dev/null @@ -1,21 +0,0 @@ -MOXA ART Ethernet Controller - -Required properties: - -- compatible : Must be "moxa,moxart-mac" -- reg : Should contain register location and length -- interrupts : Should contain the mac interrupt number - -Example: - - mac0: mac@90900000 { - compatible = "moxa,moxart-mac"; - reg = <0x90900000 0x100>; - interrupts = <25 0>; - }; - - mac1: mac@92000000 { - compatible = "moxa,moxart-mac"; - reg = <0x92000000 0x100>; - interrupts = <27 0>; - }; diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt index 32b35a07abe4224c298e601eb09ad50f1c0c0b2f..c627bbb3009e5a3f54472d58c2f5ef8f37f871fa 100644 --- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt +++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt @@ -5,8 +5,8 @@ Required properties: - spi-max-frequency: Maximum SPI frequency (<= 2000000). - interrupt-parent: phandle of parent interrupt handler. - interrupts: A single interrupt specifier. -- ti,enable-gpios: Two GPIO entries used for 'EN' and 'EN2' pins on the - TRF7970A. +- ti,enable-gpios: One or two GPIO entries used for 'EN' and 'EN2' pins on the + TRF7970A. EN2 is optional. - vin-supply: Regulator for supply voltage to VIN pin Optional SoC Specific Properties: @@ -21,6 +21,8 @@ Optional SoC Specific Properties: - t5t-rmb-extra-byte-quirk: Specify that the trf7970a has the erratum where an extra byte is returned by Read Multiple Block commands issued to Type 5 tags. +- vdd-io-supply: Regulator specifying voltage for vdd-io +- clock-frequency: Set to specify that the input frequency to the trf7970a is 13560000Hz or 27120000Hz Example (for ARM-based BeagleBone with TRF7970A on SPI1): @@ -39,10 +41,12 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1): <&gpio2 5 GPIO_ACTIVE_LOW>; vin-supply = <&ldo3_reg>; vin-voltage-override = <5000000>; + vdd-io-supply = <&ldo2_reg>; autosuspend-delay = <30000>; irq-status-read-quirk; en2-rf-quirk; t5t-rmb-extra-byte-quirk; + clock-frequency = <27120000>; status = "okay"; }; }; diff --git a/Documentation/devicetree/bindings/net/nokia-bluetooth.txt b/Documentation/devicetree/bindings/net/nokia-bluetooth.txt new file mode 100644 index 0000000000000000000000000000000000000000..42be7dc9a70ba7cb0f653270e4941dc85046b6e8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nokia-bluetooth.txt @@ -0,0 +1,51 @@ +Nokia Bluetooth Chips +--------------------- + +Nokia phones often come with UART connected bluetooth chips from different +vendors and modified device API. Those devices speak a protocol named H4+ +(also known as h4p) by Nokia, which is similar to the H4 protocol from the +Bluetooth standard. In addition to the H4 protocol it specifies two more +UART status lines for wakeup of UART transceivers to improve power management +and a few new packet types used to negotiate uart speed. + +Required properties: + + - compatible: should contain "nokia,h4p-bluetooth" as well as one of the following: + * "brcm,bcm2048-nokia" + * "ti,wl1271-bluetooth-nokia" + - reset-gpios: GPIO specifier, used to reset the BT module (active low) + - bluetooth-wakeup-gpios: GPIO specifier, used to wakeup the BT module (active high) + - host-wakeup-gpios: GPIO specifier, used to wakeup the host processor (active high) + - clock-names: should be "sysclk" + - clocks: should contain a clock specifier for every name in clock-names + +Optional properties: + + - None + +Example: + +/ { + /* controlled (enabled/disabled) directly by BT module */ + bluetooth_clk: vctcxo { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <38400000>; + }; +}; + +&uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&uart2_pins>; + + bluetooth { + compatible = "ti,wl1271-bluetooth-nokia", "nokia,h4p-bluetooth"; + + reset-gpios = <&gpio1 26 GPIO_ACTIVE_LOW>; /* gpio26 */ + host-wakeup-gpios = <&gpio4 5 GPIO_ACTIVE_HIGH>; /* gpio101 */ + bluetooth-wakeup-gpios = <&gpio2 5 GPIO_ACTIVE_HIGH>; /* gpio37 */ + + clocks = <&bluetooth_clk>; + clock-names = "sysclk"; + }; +}; diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt index d3bfc2b30fb5ecc07493b4c5510d5cdc32b3ff3a..c3a7be6615c547a5001c67996e18458b7f8e5b5d 100644 --- a/Documentation/devicetree/bindings/net/stmmac.txt +++ b/Documentation/devicetree/bindings/net/stmmac.txt @@ -7,9 +7,12 @@ Required properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - interrupts: Should contain the STMMAC interrupts -- interrupt-names: Should contain the interrupt names "macirq" - "eth_wake_irq" if this interrupt is supported in the "interrupts" - property +- interrupt-names: Should contain a list of interrupt names corresponding to + the interrupts in the interrupts property, if available. + Valid interrupt names are: + - "macirq" (combined signal for various interrupt events) + - "eth_wake_irq" (the interrupt to manage the remote wake-up packet detection) + - "eth_lpi" (the interrupt that occurs when Tx or Rx enters/exits LPI state) - phy-mode: See ethernet.txt file in the same directory. - snps,reset-gpio gpio number for phy reset. - snps,reset-active-low boolean flag to indicate if phy reset is active low. @@ -28,9 +31,9 @@ Optional properties: clocks may be specified in derived bindings. - clock-names: One name for each entry in the clocks property, the first one should be "stmmaceth" and the second one should be "pclk". -- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is - available this clock is used for programming the Timestamp Addend Register. - If not passed then the system clock will be used and this is fine on some +- ptp_ref: this is the PTP reference clock; in case of the PTP is available + this clock is used for programming the Timestamp Addend Register. If not + passed then the system clock will be used and this is fine on some platforms. - tx-fifo-depth: See ethernet.txt file in the same directory - rx-fifo-depth: See ethernet.txt file in the same directory @@ -72,7 +75,45 @@ Optional properties: - snps,mb: mixed-burst - snps,rb: rebuild INCRx Burst - mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus. - +- Multiple RX Queues parameters: below the list of all the parameters to + configure the multiple RX queues: + - snps,rx-queues-to-use: number of RX queues to be used in the driver + - Choose one of these RX scheduling algorithms: + - snps,rx-sched-sp: Strict priority + - snps,rx-sched-wsp: Weighted Strict priority + - For each RX queue + - Choose one of these modes: + - snps,dcb-algorithm: Queue to be enabled as DCB + - snps,avb-algorithm: Queue to be enabled as AVB + - snps,map-to-dma-channel: Channel to map + - Specifiy specific packet routing: + - snps,route-avcp: AV Untagged Control packets + - snps,route-ptp: PTP Packets + - snps,route-dcbcp: DCB Control Packets + - snps,route-up: Untagged Packets + - snps,route-multi-broad: Multicast & Broadcast Packets + - snps,priority: RX queue priority (Range: 0x0 to 0xF) +- Multiple TX Queues parameters: below the list of all the parameters to + configure the multiple TX queues: + - snps,tx-queues-to-use: number of TX queues to be used in the driver + - Choose one of these TX scheduling algorithms: + - snps,tx-sched-wrr: Weighted Round Robin + - snps,tx-sched-wfq: Weighted Fair Queuing + - snps,tx-sched-dwrr: Deficit Weighted Round Robin + - snps,tx-sched-sp: Strict priority + - For each TX queue + - snps,weight: TX queue weight (if using a DCB weight algorithm) + - Choose one of these modes: + - snps,dcb-algorithm: TX queue will be working in DCB + - snps,avb-algorithm: TX queue will be working in AVB + [Attention] Queue 0 is reserved for legacy traffic + and so no AVB is available in this queue. + - Configure Credit Base Shaper (if AVB Mode selected): + - snps,send_slope: enable Low Power Interface + - snps,idle_slope: unlock on WoL + - snps,high_credit: max write outstanding req. limit + - snps,low_credit: max read outstanding req. limit + - snps,priority: TX queue priority (Range: 0x0 to 0xF) Examples: stmmac_axi_setup: stmmac-axi-config { @@ -81,12 +122,41 @@ Examples: snps,blen = <256 128 64 32 0 0 0>; }; + mtl_rx_setup: rx-queues-config { + snps,rx-queues-to-use = <1>; + snps,rx-sched-sp; + queue0 { + snps,dcb-algorithm; + snps,map-to-dma-channel = <0x0>; + snps,priority = <0x0>; + }; + }; + + mtl_tx_setup: tx-queues-config { + snps,tx-queues-to-use = <2>; + snps,tx-sched-wrr; + queue0 { + snps,weight = <0x10>; + snps,dcb-algorithm; + snps,priority = <0x0>; + }; + + queue1 { + snps,avb-algorithm; + snps,send_slope = <0x1000>; + snps,idle_slope = <0x1000>; + snps,high_credit = <0x3E800>; + snps,low_credit = <0xFFC18000>; + snps,priority = <0x1>; + }; + }; + gmac0: ethernet@e0800000 { compatible = "st,spear600-gmac"; reg = <0xe0800000 0x8000>; interrupt-parent = <&vic1>; - interrupts = <24 23>; - interrupt-names = "macirq", "eth_wake_irq"; + interrupts = <24 23 22>; + interrupt-names = "macirq", "eth_wake_irq", "eth_lpi"; mac-address = [000000000000]; /* Filled in by U-Boot */ max-frame-size = <3800>; phy-mode = "gmii"; @@ -104,4 +174,6 @@ Examples: phy1: ethernet-phy@0 { }; }; + snps,mtl-rx-config = <&mtl_rx_setup>; + snps,mtl-tx-config = <&mtl_tx_setup>; }; diff --git a/Documentation/devicetree/bindings/net/ti,wilink-st.txt b/Documentation/devicetree/bindings/net/ti,wilink-st.txt new file mode 100644 index 0000000000000000000000000000000000000000..cbad73a84ac43250865fd6e47dc47a388e082342 --- /dev/null +++ b/Documentation/devicetree/bindings/net/ti,wilink-st.txt @@ -0,0 +1,35 @@ +TI WiLink 7/8 (wl12xx/wl18xx) Shared Transport BT/FM/GPS devices + +TI WiLink devices have a UART interface for providing Bluetooth, FM radio, +and GPS over what's called "shared transport". The shared transport is +standard BT HCI protocol with additional channels for the other functions. + +These devices also have a separate WiFi interface as described in +wireless/ti,wlcore.txt. + +This bindings follows the UART slave device binding in +../serial/slave-device.txt. + +Required properties: + - compatible: should be one of the following: + "ti,wl1271-st" + "ti,wl1273-st" + "ti,wl1831-st" + "ti,wl1835-st" + "ti,wl1837-st" + +Optional properties: + - enable-gpios : GPIO signal controlling enabling of BT. Active high. + - vio-supply : Vio input supply (1.8V) + - vbat-supply : Vbat input supply (2.9-4.8V) + +Example: + +&serial0 { + compatible = "ns16550a"; + ... + bluetooth { + compatible = "ti,wl1835-st"; + enable-gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>; + }; +}; diff --git a/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt b/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt index d543ed3f5363251d0c22b5ddac8a5b078605de93..ef06d061913cfc1aa2d488073b53f9800fd33cd9 100644 --- a/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt +++ b/Documentation/devicetree/bindings/nvmem/allwinner,sunxi-sid.txt @@ -1,7 +1,11 @@ Allwinner sunxi-sid Required properties: -- compatible: "allwinner,sun4i-a10-sid" or "allwinner,sun7i-a20-sid" +- compatible: Should be one of the following: + "allwinner,sun4i-a10-sid" + "allwinner,sun7i-a20-sid" + "allwinner,sun8i-h3-sid" + - reg: Should contain registers location and length = Data cells = diff --git a/Documentation/devicetree/bindings/nvmem/imx-iim.txt b/Documentation/devicetree/bindings/nvmem/imx-iim.txt new file mode 100644 index 0000000000000000000000000000000000000000..1978c5bcd96d1ff6f6c83520e9ad536349112b07 --- /dev/null +++ b/Documentation/devicetree/bindings/nvmem/imx-iim.txt @@ -0,0 +1,22 @@ +Freescale i.MX IC Identification Module (IIM) device tree bindings + +This binding represents the IC Identification Module (IIM) found on +i.MX25, i.MX27, i.MX31, i.MX35, i.MX51 and i.MX53 SoCs. + +Required properties: +- compatible: should be one of + "fsl,imx25-iim", "fsl,imx27-iim", + "fsl,imx31-iim", "fsl,imx35-iim", + "fsl,imx51-iim", "fsl,imx53-iim", +- reg: Should contain the register base and length. +- interrupts: Should contain the interrupt for the IIM +- clocks: Should contain a phandle pointing to the gated peripheral clock. + +Example: + + iim: iim@63f98000 { + compatible = "fsl,imx53-iim", "fsl,imx27-iim"; + reg = <0x63f98000 0x4000>; + interrupts = <69>; + clocks = <&clks IMX5_CLK_IIM_GATE>; + }; diff --git a/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt b/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt index 966a72ecc6bd8586a528b15119dd453b4d88d13b..70d791b03ea13cf52936d6542289a52c991c38a1 100644 --- a/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt +++ b/Documentation/devicetree/bindings/nvmem/imx-ocotp.txt @@ -9,14 +9,19 @@ Required properties: "fsl,imx6sl-ocotp" (i.MX6SL), or "fsl,imx6sx-ocotp" (i.MX6SX), "fsl,imx6ul-ocotp" (i.MX6UL), + "fsl,imx7d-ocotp" (i.MX7D/S), followed by "syscon". - reg: Should contain the register base and length. - clocks: Should contain a phandle pointing to the gated peripheral clock. +Optional properties: +- read-only: disable write access + Example: ocotp: ocotp@021bc000 { compatible = "fsl,imx6q-ocotp", "syscon"; reg = <0x021bc000 0x4000>; clocks = <&clks IMX6QDL_CLK_IIM>; + read-only; }; diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt index 1392c705cecae84c1bdbd601e27d91d78dc60980..b2480dd38c1135747c6bcceb5153df94a6a36326 100644 --- a/Documentation/devicetree/bindings/pci/designware-pcie.txt +++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt @@ -6,30 +6,40 @@ Required properties: - reg-names: Must be "config" for the PCIe configuration space. (The old way of getting the configuration address space from "ranges" is deprecated and should be avoided.) +- num-lanes: number of lanes to use +RC mode: - #address-cells: set to <3> - #size-cells: set to <2> - device_type: set to "pci" - ranges: ranges for the PCI memory and I/O regions - #interrupt-cells: set to <1> -- interrupt-map-mask and interrupt-map: standard PCI properties - to define the mapping of the PCIe interface to interrupt +- interrupt-map-mask and interrupt-map: standard PCI + properties to define the mapping of the PCIe interface to interrupt numbers. -- num-lanes: number of lanes to use +EP mode: +- num-ib-windows: number of inbound address translation + windows +- num-ob-windows: number of outbound address translation + windows Optional properties: -- num-viewport: number of view ports configured in hardware. If a platform - does not specify it, the driver assumes 2. - num-lanes: number of lanes to use (this property should be specified unless the link is brought already up in BIOS) - reset-gpio: gpio pin number of power good signal -- bus-range: PCI bus numbers covered (it is recommended for new devicetrees to - specify this property, to keep backwards compatibility a range of 0x00-0xff - is assumed if not present) - clocks: Must contain an entry for each entry in clock-names. See ../clocks/clock-bindings.txt for details. - clock-names: Must include the following entries: - "pcie" - "pcie_bus" +RC mode: +- num-viewport: number of view ports configured in + hardware. If a platform does not specify it, the driver assumes 2. +- bus-range: PCI bus numbers covered (it is recommended + for new devicetrees to specify this property, to keep backwards + compatibility a range of 0x00-0xff is assumed if not present) +EP mode: +- max-functions: maximum number of functions that can be + configured Example configuration: diff --git a/Documentation/devicetree/bindings/pci/faraday,ftpci100.txt b/Documentation/devicetree/bindings/pci/faraday,ftpci100.txt new file mode 100644 index 0000000000000000000000000000000000000000..35d4a979bb7b2012fc653ba23c622a76db42451e --- /dev/null +++ b/Documentation/devicetree/bindings/pci/faraday,ftpci100.txt @@ -0,0 +1,129 @@ +Faraday Technology FTPCI100 PCI Host Bridge + +This PCI bridge is found inside that Cortina Systems Gemini SoC platform and +is a generic IP block from Faraday Technology. It exists in two variants: +plain and dual PCI. The plain version embeds a cascading interrupt controller +into the host bridge. The dual version routes the interrupts to the host +chips interrupt controller. + +The host controller appear on the PCI bus with vendor ID 0x159b (Faraday +Technology) and product ID 0x4321. + +Mandatory properties: + +- compatible: ranging from specific to generic, should be one of + "cortina,gemini-pci", "faraday,ftpci100" + "cortina,gemini-pci-dual", "faraday,ftpci100-dual" + "faraday,ftpci100" + "faraday,ftpci100-dual" +- reg: memory base and size for the host bridge +- #address-cells: set to <3> +- #size-cells: set to <2> +- #interrupt-cells: set to <1> +- bus-range: set to <0x00 0xff> +- device_type, set to "pci" +- ranges: see pci.txt +- interrupt-map-mask: see pci.txt +- interrupt-map: see pci.txt +- dma-ranges: three ranges for the inbound memory region. The ranges must + be aligned to a 1MB boundary, and may be 1MB, 2MB, 4MB, 8MB, 16MB, 32MB, 64MB, + 128MB, 256MB, 512MB, 1GB or 2GB in size. The memory should be marked as + pre-fetchable. + +Mandatory subnodes: +- For "faraday,ftpci100" a node representing the interrupt-controller inside the + host bridge is mandatory. It has the following mandatory properties: + - interrupt: see interrupt-controller/interrupts.txt + - interrupt-parent: see interrupt-controller/interrupts.txt + - interrupt-controller: see interrupt-controller/interrupts.txt + - #address-cells: set to <0> + - #interrupt-cells: set to <1> + +I/O space considerations: + +The plain variant has 128MiB of non-prefetchable memory space, whereas the +"dual" variant has 64MiB. Take this into account when describing the ranges. + +Interrupt map considerations: + +The "dual" variant will get INT A, B, C, D from the system interrupt controller +and should point to respective interrupt in that controller in its +interrupt-map. + +The code which is the only documentation of how the Faraday PCI (the non-dual +variant) interrupts assigns the default interrupt mapping/swizzling has +typically been like this, doing the swizzling on the interrupt controller side +rather than in the interconnect: + +interrupt-map-mask = <0xf800 0 0 7>; +interrupt-map = + <0x4800 0 0 1 &pci_intc 0>, /* Slot 9 */ + <0x4800 0 0 2 &pci_intc 1>, + <0x4800 0 0 3 &pci_intc 2>, + <0x4800 0 0 4 &pci_intc 3>, + <0x5000 0 0 1 &pci_intc 1>, /* Slot 10 */ + <0x5000 0 0 2 &pci_intc 2>, + <0x5000 0 0 3 &pci_intc 3>, + <0x5000 0 0 4 &pci_intc 0>, + <0x5800 0 0 1 &pci_intc 2>, /* Slot 11 */ + <0x5800 0 0 2 &pci_intc 3>, + <0x5800 0 0 3 &pci_intc 0>, + <0x5800 0 0 4 &pci_intc 1>, + <0x6000 0 0 1 &pci_intc 3>, /* Slot 12 */ + <0x6000 0 0 2 &pci_intc 0>, + <0x6000 0 0 3 &pci_intc 1>, + <0x6000 0 0 4 &pci_intc 2>; + +Example: + +pci@50000000 { + compatible = "cortina,gemini-pci", "faraday,ftpci100"; + reg = <0x50000000 0x100>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>, /* PCI A */ + <26 IRQ_TYPE_LEVEL_HIGH>, /* PCI B */ + <27 IRQ_TYPE_LEVEL_HIGH>, /* PCI C */ + <28 IRQ_TYPE_LEVEL_HIGH>; /* PCI D */ + #address-cells = <3>; + #size-cells = <2>; + #interrupt-cells = <1>; + + bus-range = <0x00 0xff>; + ranges = /* 1MiB I/O space 0x50000000-0x500fffff */ + <0x01000000 0 0 0x50000000 0 0x00100000>, + /* 128MiB non-prefetchable memory 0x58000000-0x5fffffff */ + <0x02000000 0 0x58000000 0x58000000 0 0x08000000>; + + /* DMA ranges */ + dma-ranges = + /* 128MiB at 0x00000000-0x07ffffff */ + <0x02000000 0 0x00000000 0x00000000 0 0x08000000>, + /* 64MiB at 0x00000000-0x03ffffff */ + <0x02000000 0 0x00000000 0x00000000 0 0x04000000>, + /* 64MiB at 0x00000000-0x03ffffff */ + <0x02000000 0 0x00000000 0x00000000 0 0x04000000>; + + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = + <0x4800 0 0 1 &pci_intc 0>, /* Slot 9 */ + <0x4800 0 0 2 &pci_intc 1>, + <0x4800 0 0 3 &pci_intc 2>, + <0x4800 0 0 4 &pci_intc 3>, + <0x5000 0 0 1 &pci_intc 1>, /* Slot 10 */ + <0x5000 0 0 2 &pci_intc 2>, + <0x5000 0 0 3 &pci_intc 3>, + <0x5000 0 0 4 &pci_intc 0>, + <0x5800 0 0 1 &pci_intc 2>, /* Slot 11 */ + <0x5800 0 0 2 &pci_intc 3>, + <0x5800 0 0 3 &pci_intc 0>, + <0x5800 0 0 4 &pci_intc 1>, + <0x6000 0 0 1 &pci_intc 3>, /* Slot 12 */ + <0x6000 0 0 2 &pci_intc 0>, + <0x6000 0 0 3 &pci_intc 0>, + <0x6000 0 0 4 &pci_intc 0>; + pci_intc: interrupt-controller { + interrupt-parent = <&intcon>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; +}; diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt index 83aeb1f5a645ce2fe494e14dedce27dc7085155f..e3d5680875b123326c7cc5527c8f85d7219fe691 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt @@ -4,7 +4,11 @@ This PCIe host controller is based on the Synopsis Designware PCIe IP and thus inherits all the common properties defined in designware-pcie.txt. Required properties: -- compatible: "fsl,imx6q-pcie", "fsl,imx6sx-pcie", "fsl,imx6qp-pcie" +- compatible: + - "fsl,imx6q-pcie" + - "fsl,imx6sx-pcie", + - "fsl,imx6qp-pcie" + - "fsl,imx7d-pcie" - reg: base address and length of the PCIe controller - interrupts: A list of interrupt outputs of the controller. Must contain an entry for each entry in the interrupt-names property. @@ -34,6 +38,14 @@ Additional required properties for imx6sx-pcie: - clock names: Must include the following additional entries: - "pcie_inbound_axi" +Additional required properties for imx7d-pcie: +- power-domains: Must be set to a phandle pointing to PCIE_PHY power domain +- resets: Must contain phandles to PCIe-related reset lines exposed by SRC + IP block +- reset-names: Must contain the following entires: + - "pciephy" + - "apps" + Example: pcie@0x01000000 { diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt index b7fa3b97986d5fff391bf4ff1a01bdd25ce096e9..a339dbb154933282ee06aaeec0571e0d95a72b42 100644 --- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt +++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt @@ -44,13 +44,19 @@ Hip05 Example (note that Hip06 is the same except compatible): }; HiSilicon Hip06/Hip07 PCIe host bridge DT (almost-ECAM) description. + +Some BIOSes place the host controller in a mode where it is ECAM +compliant for all devices other than the root complex. In such cases, +the host controller should be described as below. + The properties and their meanings are identical to those described in host-generic-pci.txt except as listed below. Properties of the host controller node that differ from host-generic-pci.txt: -- compatible : Must be "hisilicon,pcie-almost-ecam" +- compatible : Must be "hisilicon,hip06-pcie-ecam", or + "hisilicon,hip07-pcie-ecam" - reg : Two entries: First the ECAM configuration space for any other bus underneath the root bus. Second, the base @@ -59,7 +65,7 @@ host-generic-pci.txt: Example: pcie0: pcie@a0090000 { - compatible = "hisilicon,pcie-almost-ecam"; + compatible = "hisilicon,hip06-pcie-ecam"; reg = <0 0xb0000000 0 0x2000000>, /* ECAM configuration space */ <0 0xa0090000 0 0x10000>; /* host bridge registers */ bus-range = <0 31>; diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt b/Documentation/devicetree/bindings/pci/ti-pci.txt index 60e25161f35181bfd026b48519f43666d33f091e..6a07c96227e0a0a9bd96530c2833df195657e7dc 100644 --- a/Documentation/devicetree/bindings/pci/ti-pci.txt +++ b/Documentation/devicetree/bindings/pci/ti-pci.txt @@ -1,17 +1,22 @@ TI PCI Controllers PCIe Designware Controller - - compatible: Should be "ti,dra7-pcie"" - - reg : Two register ranges as listed in the reg-names property - - reg-names : The first entry must be "ti-conf" for the TI specific registers - The second entry must be "rc-dbics" for the designware pcie - registers - The third entry must be "config" for the PCIe configuration space + - compatible: Should be "ti,dra7-pcie" for RC + Should be "ti,dra7-pcie-ep" for EP - phys : list of PHY specifiers (used by generic PHY framework) - phy-names : must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the number of PHYs as specified in *phys* property. - ti,hwmods : Name of the hwmod associated to the pcie, "pcie", where is the instance number of the pcie from the HW spec. + - num-lanes as specified in ../designware-pcie.txt + +HOST MODE +========= + - reg : Two register ranges as listed in the reg-names property + - reg-names : The first entry must be "ti-conf" for the TI specific registers + The second entry must be "rc-dbics" for the DesignWare PCIe + registers + The third entry must be "config" for the PCIe configuration space - interrupts : Two interrupt entries must be specified. The first one is for main interrupt line and the second for MSI interrupt line. - #address-cells, @@ -19,13 +24,36 @@ PCIe Designware Controller #interrupt-cells, device_type, ranges, - num-lanes, interrupt-map-mask, interrupt-map : as specified in ../designware-pcie.txt +DEVICE MODE +=========== + - reg : Four register ranges as listed in the reg-names property + - reg-names : "ti-conf" for the TI specific registers + "ep_dbics" for the standard configuration registers as + they are locally accessed within the DIF CS space + "ep_dbics2" for the standard configuration registers as + they are locally accessed within the DIF CS2 space + "addr_space" used to map remote RC address space + - interrupts : one interrupt entries must be specified for main interrupt. + - num-ib-windows : number of inbound address translation windows + - num-ob-windows : number of outbound address translation windows + - ti,syscon-unaligned-access: phandle to the syscon DT node. The 1st argument + should contain the register offset within syscon + and the 2nd argument should contain the bit field + for setting the bit to enable unaligned + access. + Optional Property: - gpios : Should be added if a gpio line is required to drive PERST# line +NOTE: Two DT nodes may be added for each PCI controller; one for host +mode and another for device mode. So in order for PCI to +work in host mode, EP mode DT node should be disabled and in order to PCI to +work in EP mode, host mode DT node should be disabled. Host mode and EP +mode are mutually exclusive. + Example: axi { compatible = "simple-bus"; diff --git a/Documentation/devicetree/bindings/phy/phy-mt65xx-usb.txt b/Documentation/devicetree/bindings/phy/phy-mt65xx-usb.txt index 33a2b1ee3f3eb20e09c8cefe400292468b3f6e43..0acc5a99fb79227262ed6ef1f8d8774fefcb914a 100644 --- a/Documentation/devicetree/bindings/phy/phy-mt65xx-usb.txt +++ b/Documentation/devicetree/bindings/phy/phy-mt65xx-usb.txt @@ -6,12 +6,11 @@ This binding describes a usb3.0 phy for mt65xx platforms of Medaitek SoC. Required properties (controller (parent) node): - compatible : should be one of "mediatek,mt2701-u3phy" + "mediatek,mt2712-u3phy" "mediatek,mt8173-u3phy" - - reg : offset and length of register for phy, exclude port's - register. - - clocks : a list of phandle + clock-specifier pairs, one for each - entry in clock-names - - clock-names : must contain + - clocks : (deprecated, use port's clocks instead) a list of phandle + + clock-specifier pairs, one for each entry in clock-names + - clock-names : (deprecated, use port's one instead) must contain "u3phya_ref": for reference clock of usb3.0 analog phy. Required nodes : a sub-node is required for each port the controller @@ -19,8 +18,19 @@ Required nodes : a sub-node is required for each port the controller 'reg' property is used inside these nodes to describe the controller's topology. +Optional properties (controller (parent) node): + - reg : offset and length of register shared by multiple ports, + exclude port's private register. It is needed on mt2701 + and mt8173, but not on mt2712. + Required properties (port (child) node): - reg : address and length of the register set for the port. +- clocks : a list of phandle + clock-specifier pairs, one for each + entry in clock-names +- clock-names : must contain + "ref": 48M reference clock for HighSpeed analog phy; and 26M + reference clock for SuperSpeed analog phy, sometimes is + 24M, 25M or 27M, depended on platform. - #phy-cells : should be 1 (See second example) cell after port phandle is phy type from: - PHY_TYPE_USB2 @@ -31,21 +41,31 @@ Example: u3phy: usb-phy@11290000 { compatible = "mediatek,mt8173-u3phy"; reg = <0 0x11290000 0 0x800>; - clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>; - clock-names = "u3phya_ref"; #address-cells = <2>; #size-cells = <2>; ranges; status = "okay"; - phy_port0: port@11290800 { - reg = <0 0x11290800 0 0x800>; + u2port0: usb-phy@11290800 { + reg = <0 0x11290800 0 0x100>; + clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>; + clock-names = "ref"; #phy-cells = <1>; status = "okay"; }; - phy_port1: port@11291000 { - reg = <0 0x11291000 0 0x800>; + u3port0: usb-phy@11290900 { + reg = <0 0x11290800 0 0x700>; + clocks = <&clk26m>; + clock-names = "ref"; + #phy-cells = <1>; + status = "okay"; + }; + + u2port1: usb-phy@11291000 { + reg = <0 0x11291000 0 0x100>; + clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>; + clock-names = "ref"; #phy-cells = <1>; status = "okay"; }; @@ -64,7 +84,54 @@ Example: usb30: usb@11270000 { ... - phys = <&phy_port0 PHY_TYPE_USB3>; - phy-names = "usb3-0"; + phys = <&u2port0 PHY_TYPE_USB2>, <&u3port0 PHY_TYPE_USB3>; + phy-names = "usb2-0", "usb3-0"; ... }; + + +Layout differences of banks between mt8173/mt2701 and mt2712 +------------------------------------------------------------- +mt8173 and mt2701: +port offset bank +shared 0x0000 SPLLC + 0x0100 FMREG +u2 port0 0x0800 U2PHY_COM +u3 port0 0x0900 U3PHYD + 0x0a00 U3PHYD_BANK2 + 0x0b00 U3PHYA + 0x0c00 U3PHYA_DA +u2 port1 0x1000 U2PHY_COM +u3 port1 0x1100 U3PHYD + 0x1200 U3PHYD_BANK2 + 0x1300 U3PHYA + 0x1400 U3PHYA_DA +u2 port2 0x1800 U2PHY_COM + ... + +mt2712: +port offset bank +u2 port0 0x0000 MISC + 0x0100 FMREG + 0x0300 U2PHY_COM +u3 port0 0x0700 SPLLC + 0x0800 CHIP + 0x0900 U3PHYD + 0x0a00 U3PHYD_BANK2 + 0x0b00 U3PHYA + 0x0c00 U3PHYA_DA +u2 port1 0x1000 MISC + 0x1100 FMREG + 0x1300 U2PHY_COM +u3 port1 0x1700 SPLLC + 0x1800 CHIP + 0x1900 U3PHYD + 0x1a00 U3PHYD_BANK2 + 0x1b00 U3PHYA + 0x1c00 U3PHYA_DA +u2 port2 0x2000 MISC + ... + + SPLLC shared by u3 ports and FMREG shared by u2 ports on +mt8173/mt2701 are put back into each port; a new bank MISC for +u2 ports and CHIP for u3 ports are added on mt2712. diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt index 3c29c77a7018da4e52b87920965d41013dbd9ef2..e71a8d23f4a8c396b8377b9ec210bd10edbb9753 100644 --- a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt +++ b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt @@ -2,6 +2,7 @@ ROCKCHIP USB2.0 PHY WITH INNO IP BLOCK Required properties (phy (parent) node): - compatible : should be one of the listed compatibles: + * "rockchip,rk3328-usb2phy" * "rockchip,rk3366-usb2phy" * "rockchip,rk3399-usb2phy" - reg : the address offset of grf for usb-phy configuration. @@ -11,6 +12,11 @@ Required properties (phy (parent) node): Optional properties: - clocks : phandle + phy specifier pair, for the input clock of phy. - clock-names : input clock name of phy, must be "phyclk". + - assigned-clocks : phandle of usb 480m clock. + - assigned-clock-parents : parent of usb 480m clock, select between + usb-phy output 480m and xin24m. + Refer to clk/clock-bindings.txt for generic clock + consumer properties. Required nodes : a sub-node is required for each port the phy provides. The sub-node name is used to identify host or otg port, diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt new file mode 100644 index 0000000000000000000000000000000000000000..e11c563a65ecac285bcc058e80774190c8b3e71b --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt @@ -0,0 +1,106 @@ +Qualcomm QMP PHY controller +=========================== + +QMP phy controller supports physical layer functionality for a number of +controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB. + +Required properties: + - compatible: compatible list, contains: + "qcom,msm8996-qmp-pcie-phy" for 14nm PCIe phy on msm8996, + "qcom,msm8996-qmp-usb3-phy" for 14nm USB3 phy on msm8996. + + - reg: offset and length of register set for PHY's common serdes block. + + - #clock-cells: must be 1 + - Phy pll outputs a bunch of clocks for Tx, Rx and Pipe + interface (for pipe based PHYs). These clock are then gate-controlled + by gcc. + - #address-cells: must be 1 + - #size-cells: must be 1 + - ranges: must be present + + - clocks: a list of phandles and clock-specifier pairs, + one for each entry in clock-names. + - clock-names: "cfg_ahb" for phy config clock, + "aux" for phy aux clock, + "ref" for 19.2 MHz ref clk, + For "qcom,msm8996-qmp-pcie-phy" must contain: + "aux", "cfg_ahb", "ref". + For "qcom,msm8996-qmp-usb3-phy" must contain: + "aux", "cfg_ahb", "ref". + + - resets: a list of phandles and reset controller specifier pairs, + one for each entry in reset-names. + - reset-names: "phy" for reset of phy block, + "common" for phy common block reset, + "cfg" for phy's ahb cfg block reset (Optional). + For "qcom,msm8996-qmp-pcie-phy" must contain: + "phy", "common", "cfg". + For "qcom,msm8996-qmp-usb3-phy" must contain + "phy", "common". + + - vdda-phy-supply: Phandle to a regulator supply to PHY core block. + - vdda-pll-supply: Phandle to 1.8V regulator supply to PHY refclk pll block. + +Optional properties: + - vddp-ref-clk-supply: Phandle to a regulator supply to any specific refclk + pll block. + +Required nodes: + - Each device node of QMP phy is required to have as many child nodes as + the number of lanes the PHY has. + +Required properties for child node: + - reg: list of offset and length pairs of register sets for PHY blocks - + tx, rx and pcs. + + - #phy-cells: must be 0 + + - clocks: a list of phandles and clock-specifier pairs, + one for each entry in clock-names. + - clock-names: Must contain following for pcie and usb qmp phys: + "pipe" for pipe clock specific to each lane. + + - resets: a list of phandles and reset controller specifier pairs, + one for each entry in reset-names. + - reset-names: Must contain following for pcie qmp phys: + "lane" for reset specific to each lane. + +Example: + phy@34000 { + compatible = "qcom,msm8996-qmp-pcie-phy"; + reg = <0x34000 0x488>; + #clock-cells = <1>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + clocks = <&gcc GCC_PCIE_PHY_AUX_CLK>, + <&gcc GCC_PCIE_PHY_CFG_AHB_CLK>, + <&gcc GCC_PCIE_CLKREF_CLK>; + clock-names = "aux", "cfg_ahb", "ref"; + + vdda-phy-supply = <&pm8994_l28>; + vdda-pll-supply = <&pm8994_l12>; + + resets = <&gcc GCC_PCIE_PHY_BCR>, + <&gcc GCC_PCIE_PHY_COM_BCR>, + <&gcc GCC_PCIE_PHY_COM_NOCSR_BCR>; + reset-names = "phy", "common", "cfg"; + + pciephy_0: lane@35000 { + reg = <0x35000 0x130>, + <0x35200 0x200>, + <0x35400 0x1dc>; + #phy-cells = <0>; + + clocks = <&gcc GCC_PCIE_0_PIPE_CLK>; + clock-names = "pipe0"; + resets = <&gcc GCC_PCIE_0_PHY_BCR>; + reset-names = "lane0"; + }; + + pciephy_1: lane@36000 { + ... + ... + }; diff --git a/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt new file mode 100644 index 0000000000000000000000000000000000000000..aa0fcb05acb3e4d309224f21fff291c71a6286e9 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt @@ -0,0 +1,43 @@ +Qualcomm QUSB2 phy controller +============================= + +QUSB2 controller supports LS/FS/HS usb connectivity on Qualcomm chipsets. + +Required properties: + - compatible: compatible list, contains "qcom,msm8996-qusb2-phy". + - reg: offset and length of the PHY register set. + - #phy-cells: must be 0. + + - clocks: a list of phandles and clock-specifier pairs, + one for each entry in clock-names. + - clock-names: must be "cfg_ahb" for phy config clock, + "ref" for 19.2 MHz ref clk, + "iface" for phy interface clock (Optional). + + - vdda-pll-supply: Phandle to 1.8V regulator supply to PHY refclk pll block. + - vdda-phy-dpdm-supply: Phandle to 3.1V regulator supply to Dp/Dm port signals. + + - resets: Phandle to reset to phy block. + +Optional properties: + - nvmem-cells: Phandle to nvmem cell that contains 'HS Tx trim' + tuning parameter value for qusb2 phy. + + - qcom,tcsr-syscon: Phandle to TCSR syscon register region. + +Example: + hsusb_phy: phy@7411000 { + compatible = "qcom,msm8996-qusb2-phy"; + reg = <0x7411000 0x180>; + #phy-cells = <0>; + + clocks = <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>, + <&gcc GCC_RX1_USB2_CLKREF_CLK>, + clock-names = "cfg_ahb", "ref"; + + vdda-pll-supply = <&pm8994_l12>; + vdda-phy-dpdm-supply = <&pm8994_l24>; + + resets = <&gcc GCC_QUSB2PHY_PRIM_BCR>; + nvmem-cells = <&qusb2p_hstx_trim>; + }; diff --git a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt index 57dc388e2fa2fe7086b2abe186b46ef2703d7112..4ed569046daf919e487d0147b47720cdd44817aa 100644 --- a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt +++ b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt @@ -30,6 +30,7 @@ Optional Properties: - reset-names: Only allow the following entries: - phy-reset - resets: Must contain an entry for each entry in reset-names. +- vbus-supply: power-supply phandle for vbus power source Example: diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt index e423342581855deebdc2bb4c9effa3e502540061..005bc22938fffb76d73fcee9b8cb29731a1e4584 100644 --- a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt +++ b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt @@ -15,6 +15,7 @@ Required properties: - reg : a list of offset + length pairs - reg-names : * "phy_ctrl" + * "pmu0" for H3, V3s and A64 * "pmu1" * "pmu2" for sun4i, sun6i or sun7i - #phy-cells : from the generic phy bindings, must be 1 diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt index 2fd688c8dbdb5ad1de269e2df6ca6261df231148..b532244736727716b2a4fe069b4ee31608f3dd05 100644 --- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt @@ -23,7 +23,8 @@ Required properties: "allwinner,sun8i-h3-pinctrl" "allwinner,sun8i-h3-r-pinctrl" "allwinner,sun50i-a64-pinctrl" - "allwinner,sun50i-h5-r-pinctrl" + "allwinner,sun50i-a64-r-pinctrl" + "allwinner,sun50i-h5-pinctrl" "nextthing,gr8-pinctrl" - reg: Should contain the register physical address and length for the diff --git a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt index 9a8a45d9d8abc284c3108adb4ef6d1355208c415..590e60378be30dd8ff6838b6a7923dfc2a761b09 100644 --- a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt @@ -4,7 +4,7 @@ The AT91 Pinmux Controller, enables the IC to share one PAD to several functional blocks. The sharing is done by multiplexing the PAD input/output signals. For each PAD there are up to 8 muxing options (called periph modes). Since different modules require -different PAD settings (like pull up, keeper, etc) the contoller controls +different PAD settings (like pull up, keeper, etc) the controller controls also the PAD settings parameters. Please refer to pinctrl-bindings.txt in this directory for details of the diff --git a/Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt new file mode 100644 index 0000000000000000000000000000000000000000..47284f85ec804fd038a9bd931cbcb867fa5fe793 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt @@ -0,0 +1,85 @@ +Axis ARTPEC-6 Pin Controller + +Required properties: +- compatible: "axis,artpec6-pinctrl". +- reg: Should contain the register physical address and length for the pin + controller. + +A pinctrl node should contain at least one subnode representing the pinctrl +groups available on the machine. Each subnode will list the mux function +required and what pin group it will use. Each subnode will also configure the +drive strength and bias pullup of the pin group. If either of these options is +not set, its actual value will be unspecified. + + +Required subnode-properties: +- function: Function to mux. +- groups: Name of the pin group to use for the function above. + + Available functions and groups (function: group0, group1...): + gpio: cpuclkoutgrp0, udlclkoutgrp0, i2c1grp0, i2c2grp0, + i2c3grp0, i2s0grp0, i2s1grp0, i2srefclkgrp0, spi0grp0, + spi1grp0, pciedebuggrp0, uart0grp0, uart0grp1, uart1grp0, + uart2grp0, uart2grp1, uart3grp0, uart4grp0, uart5grp0 + cpuclkout: cpuclkoutgrp0 + udlclkout: udlclkoutgrp0 + i2c1: i2c1grp0 + i2c2: i2c2grp0 + i2c3: i2c3grp0 + i2s0: i2s0grp0 + i2s1: i2s1grp0 + i2srefclk: i2srefclkgrp0 + spi0: spi0grp0 + spi1: spi1grp0 + pciedebug: pciedebuggrp0 + uart0: uart0grp0, uart0grp1 + uart1: uart1grp0 + uart2: uart2grp0, uart2grp1 + uart3: uart3grp0 + uart4: uart4grp0 + uart5: uart5grp0 + nand: nandgrp0 + sdio0: sdio0grp0 + sdio1: sdio1grp0 + ethernet: ethernetgrp0 + + +Optional subnode-properties (see pinctrl-bindings.txt): +- drive-strength: 4, 6, 8, 9 mA. For SD and NAND pins, this is for 3.3V VCCQ3. +- bias-pull-up +- bias-disable + +Examples: +pinctrl@f801d000 { + compatible = "axis,artpec6-pinctrl"; + reg = <0xf801d000 0x400>; + + pinctrl_uart0: uart0grp { + function = "uart0"; + groups = "uart0grp0"; + drive-strength = <4>; + bias-pull-up; + }; + pinctrl_uart3: uart3grp { + function = "uart3"; + groups = "uart3grp0"; + }; +}; +uart0: uart@f8036000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0xf8036000 0x1000>; + interrupts = <0 104 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&pll2div24>, <&apb_pclk>; + clock-names = "uart_clk", "apb_pclk"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart0>; +}; +uart3: uart@f8039000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0xf8039000 0x1000>; + interrupts = <0 128 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&pll2div24>, <&apb_pclk>; + clock-names = "uart_clk", "apb_pclk"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart3>; +}; diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt new file mode 100644 index 0000000000000000000000000000000000000000..f64060908d5ae173e39ec3440f9bbdd3350065ef --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt @@ -0,0 +1,183 @@ +* Marvell Armada 37xx SoC pin and gpio controller + +Each Armada 37xx SoC come with two pin and gpio controller one for the +south bridge and the other for the north bridge. + +Inside this set of register the gpio latch allows exposing some +configuration of the SoC and especially the clock frequency of the +xtal. Hence, this node is a represent as syscon allowing sharing the +register between multiple hardware block. + +GPIO and pin controller: +------------------------ + +Main node: + +Refer to pinctrl-bindings.txt in this directory for details of the +common pinctrl bindings used by client devices, including the meaning +of the phrase "pin configuration node". + +Required properties for pinctrl driver: + +- compatible: "marvell,armada3710-sb-pinctrl", "syscon, "simple-mfd" + for the south bridge + "marvell,armada3710-nb-pinctrl", "syscon, "simple-mfd" + for the north bridge +- reg: The first set of register are for pinctrl/gpio and the second + set for the interrupt controller +- interrupts: list of the interrupt use by the gpio + +Available groups and functions for the North bridge: + +group: jtag + - pins 20-24 + - functions jtag, gpio + +group sdio0 + - pins 8-10 + - functions sdio, gpio + +group emmc_nb + - pins 27-35 + - functions emmc, gpio + +group pwm0 + - pin 11 (GPIO1-11) + - functions pwm, gpio + +group pwm1 + - pin 12 + - functions pwm, gpio + +group pwm2 + - pin 13 + - functions pwm, gpio + +group pwm3 + - pin 14 + - functions pwm, gpio + +group pmic1 + - pin 17 + - functions pmic, gpio + +group pmic0 + - pin 16 + - functions pmic, gpio + +group i2c2 + - pins 2-3 + - functions i2c, gpio + +group i2c1 + - pins 0-1 + - functions i2c, gpio + +group spi_cs1 + - pin 17 + - functions spi, gpio + +group spi_cs2 + - pin 18 + - functions spi, gpio + +group spi_cs3 + - pin 19 + - functions spi, gpio + +group onewire + - pin 4 + - functions onewire, gpio + +group uart1 + - pins 25-26 + - functions uart, gpio + +group spi_quad + - pins 15-16 + - functions spi, gpio + +group uart_2 + - pins 9-10 + - functions uart, gpio + +Available groups and functions for the South bridge: + +group usb32_drvvbus0 + - pin 36 + - functions drvbus, gpio + +group usb2_drvvbus1 + - pin 37 + - functions drvbus, gpio + +group sdio_sb + - pins 60-64 + - functions sdio, gpio + +group rgmii + - pins 42-55 + - functions mii, gpio + +group pcie1 + - pins 39-40 + - functions pcie, gpio + +group ptp + - pins 56-58 + - functions ptp, gpio + +group ptp_clk + - pin 57 + - functions ptp, mii + +group ptp_trig + - pin 58 + - functions ptp, mii + +group mii_col + - pin 59 + - functions mii, mii_err + +GPIO subnode: + +Please refer to gpio.txt in this directory for details of gpio-ranges property +and the common GPIO bindings used by client devices. + +Required properties for gpio driver under the gpio subnode: +- interrupts: List of interrupt specifier for the controllers interrupt. +- gpio-controller: Marks the device node as a gpio controller. +- #gpio-cells: Should be 2. The first cell is the GPIO number and the + second cell specifies GPIO flags, as defined in + . Only the GPIO_ACTIVE_HIGH and + GPIO_ACTIVE_LOW flags are supported. +- gpio-ranges: Range of pins managed by the GPIO controller. + +Xtal Clock bindings for Marvell Armada 37xx SoCs +------------------------------------------------ + +see Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt + + +Example: +pinctrl_sb: pinctrl-sb@18800 { + compatible = "marvell,armada3710-sb-pinctrl", "syscon", "simple-mfd"; + reg = <0x18800 0x100>, <0x18C00 0x20>; + gpio { + #gpio-cells = <2>; + gpio-ranges = <&pinctrl_sb 0 0 29>; + gpio-controller; + interrupts = + , + , + , + , + ; + }; + + rgmii_pins: mii-pins { + groups = "rgmii"; + function = "mii"; + }; + +}; diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt index b98e6f030da84e0582b443124e305ed7fd6b0634..ca01710ee29afde12272a904907f49273403388d 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt @@ -34,13 +34,28 @@ Documentation/devicetree/bindings/mfd/syscon.txt Subnode Format ============== -The required properties of child nodes are (as defined in pinctrl-bindings): -- function -- groups +The required properties of pinmux child nodes are: +- function: the mux function to select +- groups : the list of groups to select with this function -Each function has only one associated pin group. Each group is named by its -function. The following values for the function and groups properties are -supported: +Required properties of pinconf child nodes are: +- groups: A list of groups to select (either this or "pins" must be + specified) +- pins : A list of ball names as strings, eg "D14" (either this or "groups" + must be specified) + +Optional properties of pinconf child nodes are: +- bias-disable : disable any pin bias +- bias-pull-down: pull down the pin +- drive-strength: sink or source at most X mA + +Definitions are as specified in +Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt, with any +further limitations as described above. + +For pinmux, each mux function has only one associated pin group. Each group is +named by its function. The following values for the function and groups +properties are supported: aspeed,ast2400-pinctrl, aspeed,g4-pinctrl: @@ -90,6 +105,11 @@ syscon: scu@1e6e2000 { function = "I2C3"; groups = "I2C3"; }; + + pinctrl_gpioh0_unbiased_default: gpioh0 { + pins = "A8"; + bias-disable; + }; }; }; @@ -110,6 +130,11 @@ ahb { function = "I2C3"; groups = "I2C3"; }; + + pinctrl_gpioh0_unbiased_default: gpioh0 { + pins = "A18"; + bias-disable; + }; }; }; @@ -143,6 +168,3 @@ ahb { }; }; }; - -Please refer to pinctrl-bindings.txt in this directory for details of the -common pinctrl bindings used by client devices. diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt index bf3f7b014724676471a83ef06d3f75a6fb3e54fc..f01d154090dab1b3a8a58fee6a599459392aa1f8 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt @@ -162,8 +162,8 @@ state_2_node_a { pins = "mfio29", "mfio30"; }; -Optionally an altenative binding can be used if more suitable depending on the -pin controller hardware. For hardaware where there is a large number of identical +Optionally an alternative binding can be used if more suitable depending on the +pin controller hardware. For hardware where there is a large number of identical pin controller instances, naming each pin and function can easily become unmaintainable. This is especially the case if the same controller is used for different pins and functions depending on the SoC revision and packaging. @@ -198,6 +198,28 @@ registers, and must not be a virtual index of pin instances. The reason for this is to avoid mapping of the index in the dts files and the pin controller driver as it can change. +For hardware where pin multiplexing configurations have to be specified for +each single pin the number of required sub-nodes containing "pin" and +"function" properties can quickly escalate and become hard to write and +maintain. + +For cases like this, the pin controller driver may use the pinmux helper +property, where the pin identifier is packed with mux configuration settings +in a single integer. + +The pinmux property accepts an array of integers, each of them describing +a single pin multiplexing configuration. + +pincontroller { + state_0_node_a { + pinmux = , , ...; + }; +}; + +Each individual pin controller driver bindings documentation shall specify +how those values (pin IDs and pin multiplexing configuration) are defined and +assembled together. + == Generic pin configuration node content == Many data items that are represented in a pin configuration node are common @@ -210,12 +232,15 @@ structure of the DT nodes that contain these properties. Supported generic properties are: pins - the list of pins that properties in the node - apply to (either this or "group" has to be + apply to (either this, "group" or "pinmux" has to be specified) group - the group to apply the properties to, if the driver supports configuration of whole groups rather than - individual pins (either this or "pins" has to be - specified) + individual pins (either this, "pins" or "pinmux" has + to be specified) +pinmux - the list of numeric pin ids and their mux settings + that properties in the node apply to (either this, + "pins" or "groups" have to be specified) bias-disable - disable any pin bias bias-high-impedance - high impedance mode ("third-state", "floating") bias-bus-hold - latch weakly @@ -258,6 +283,12 @@ state_2_node_a { bias-pull-up; }; }; +state_3_node_a { + mux { + pinmux = , ; + input-enable; + }; +}; Some of the generic properties take arguments. For those that do, the arguments are described below. @@ -266,6 +297,11 @@ arguments are described below. binding for the hardware defines: - Whether the entries are integers or strings, and their meaning. +- pinmux takes a list of pin IDs and mux settings as required argument. The + specific bindings for the hardware defines: + - How pin IDs and mux settings are defined and assembled together in a single + integer. + - bias-pull-up, -down and -pin-default take as optional argument on hardware supporting it the pull strength in Ohm. bias-disable will disable the pull. diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt index 4722bc61a1a29c62a909ed581b9727d94cf51f82..ee01ab58224d5e3907b3a115dcf6d0d8ed7c595f 100644 --- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt @@ -19,11 +19,18 @@ The pins are grouped into up to 5 individual pin banks which need to be defined as gpio sub-nodes of the pinmux controller. Required properties for iomux controller: - - compatible: one of "rockchip,rk1108-pinctrl", "rockchip,rk2928-pinctrl" - "rockchip,rk3066a-pinctrl", "rockchip,rk3066b-pinctrl" - "rockchip,rk3188-pinctrl", "rockchip,rk3228-pinctrl" - "rockchip,rk3288-pinctrl", "rockchip,rk3368-pinctrl" - "rockchip,rk3399-pinctrl" + - compatible: should be + "rockchip,rv1108-pinctrl": for Rockchip RV1108 + "rockchip,rk2928-pinctrl": for Rockchip RK2928 + "rockchip,rk3066a-pinctrl": for Rockchip RK3066a + "rockchip,rk3066b-pinctrl": for Rockchip RK3066b + "rockchip,rk3188-pinctrl": for Rockchip RK3188 + "rockchip,rk3228-pinctrl": for Rockchip RK3228 + "rockchip,rk3288-pinctrl": for Rockchip RK3288 + "rockchip,rk3328-pinctrl": for Rockchip RK3328 + "rockchip,rk3368-pinctrl": for Rockchip RK3368 + "rockchip,rk3399-pinctrl": for Rockchip RK3399 + - rockchip,grf: phandle referencing a syscon providing the "general register files" diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt index eac20aa33907a647377571995cb3f863dc8df27d..d907a74f8dc0a5ee98d87118232fb20580c0fba3 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt @@ -9,6 +9,7 @@ Pin controller node: Required properies: - compatible: value should be one of the following: "st,stm32f429-pinctrl" + "st,stm32f469-pinctrl" "st,stm32f746-pinctrl" "st,stm32h743-pinctrl" - #address-cells: The value of this property must be 1 @@ -38,8 +39,6 @@ Optional properties: - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node which includes IRQ mux selection register, and the offset of the IRQ mux selection register. - - ngpios: Number of gpios in a bank (to use if bank gpio numbers is less - than 16). - gpio-ranges: Define a dedicated mapping between a pin-controller and a gpio controller. Format is <&phandle a b c> with: -(phandle): phandle of pin-controller. diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt b/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt index 65cc0345747d73f540d6c5a4e8b18acc1618953b..6c1498958d48c0221ff2d7731051f5f82ea5fe85 100644 --- a/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt +++ b/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt @@ -1,22 +1,42 @@ Freescale i.MX General Power Controller ======================================= -The i.MX6Q General Power Control (GPC) block contains DVFS load tracking -counters and Power Gating Control (PGC) for the CPU and PU (GPU/VPU) power -domains. +The i.MX6 General Power Control (GPC) block contains DVFS load tracking +counters and Power Gating Control (PGC). Required properties: -- compatible: Should be "fsl,imx6q-gpc" or "fsl,imx6sl-gpc" +- compatible: Should be one of the following: + - fsl,imx6q-gpc + - fsl,imx6qp-gpc + - fsl,imx6sl-gpc - reg: should be register base and length as documented in the datasheet -- interrupts: Should contain GPC interrupt request 1 -- pu-supply: Link to the LDO regulator powering the PU power domain -- clocks: Clock phandles to devices in the PU power domain that need - to be enabled during domain power-up for reset propagation. -- #power-domain-cells: Should be 1, see below: +- interrupts: Should contain one interrupt specifier for the GPC interrupt +- clocks: Must contain an entry for each entry in clock-names. + See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details. +- clock-names: Must include the following entries: + - ipg -The gpc node is a power-controller as documented by the generic power domain -bindings in Documentation/devicetree/bindings/power/power_domain.txt. +The power domains are generic power domain providers as documented in +Documentation/devicetree/bindings/power/power_domain.txt. They are described as +subnodes of the power gating controller 'pgc' node of the GPC and should +contain the following: + +Required properties: +- reg: Must contain the DOMAIN_INDEX of this power domain + The following DOMAIN_INDEX values are valid for i.MX6Q: + ARM_DOMAIN 0 + PU_DOMAIN 1 + The following additional DOMAIN_INDEX value is valid for i.MX6SL: + DISPLAY_DOMAIN 2 + +- #power-domain-cells: Should be 0 + +Optional properties: +- clocks: a number of phandles to clocks that need to be enabled during domain + power-up sequencing to ensure reset propagation into devices located inside + this power domain +- power-supply: a phandle to the regulator powering this domain Example: @@ -25,14 +45,30 @@ Example: reg = <0x020dc000 0x4000>; interrupts = <0 89 IRQ_TYPE_LEVEL_HIGH>, <0 90 IRQ_TYPE_LEVEL_HIGH>; - pu-supply = <®_pu>; - clocks = <&clks IMX6QDL_CLK_GPU3D_CORE>, - <&clks IMX6QDL_CLK_GPU3D_SHADER>, - <&clks IMX6QDL_CLK_GPU2D_CORE>, - <&clks IMX6QDL_CLK_GPU2D_AXI>, - <&clks IMX6QDL_CLK_OPENVG_AXI>, - <&clks IMX6QDL_CLK_VPU_AXI>; - #power-domain-cells = <1>; + clocks = <&clks IMX6QDL_CLK_IPG>; + clock-names = "ipg"; + + pgc { + #address-cells = <1>; + #size-cells = <0>; + + power-domain@0 { + reg = <0>; + #power-domain-cells = <0>; + }; + + pd_pu: power-domain@1 { + reg = <1>; + #power-domain-cells = <0>; + power-supply = <®_pu>; + clocks = <&clks IMX6QDL_CLK_GPU3D_CORE>, + <&clks IMX6QDL_CLK_GPU3D_SHADER>, + <&clks IMX6QDL_CLK_GPU2D_CORE>, + <&clks IMX6QDL_CLK_GPU2D_AXI>, + <&clks IMX6QDL_CLK_OPENVG_AXI>, + <&clks IMX6QDL_CLK_VPU_AXI>; + }; + }; }; @@ -40,20 +76,13 @@ Specifying power domain for IP modules ====================================== IP cores belonging to a power domain should contain a 'power-domains' property -that is a phandle pointing to the gpc device node and a DOMAIN_INDEX specifying -the power domain the device belongs to. +that is a phandle pointing to the power domain the device belongs to. Example of a device that is part of the PU power domain: vpu: vpu@02040000 { reg = <0x02040000 0x3c000>; /* ... */ - power-domains = <&gpc 1>; + power-domains = <&pd_pu>; /* ... */ }; - -The following DOMAIN_INDEX values are valid for i.MX6Q: -ARM_DOMAIN 0 -PU_DOMAIN 1 -The following additional DOMAIN_INDEX value is valid for i.MX6SL: -DISPLAY_DOMAIN 2 diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt new file mode 100644 index 0000000000000000000000000000000000000000..02f45c65fd87627007228b58dc6d7679ec3e5507 --- /dev/null +++ b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.txt @@ -0,0 +1,71 @@ +Freescale i.MX General Power Controller v2 +========================================== + +The i.MX7S/D General Power Control (GPC) block contains Power Gating +Control (PGC) for various power domains. + +Required properties: + +- compatible: Should be "fsl,imx7d-gpc" + +- reg: should be register base and length as documented in the + datasheet + +- interrupts: Should contain GPC interrupt request 1 + +Power domains contained within GPC node are generic power domain +providers, documented in +Documentation/devicetree/bindings/power/power_domain.txt, which are +described as subnodes of the power gating controller 'pgc' node, +which, in turn, is expected to contain the following: + +Required properties: + +- reg: Power domain index. Valid values are defined in + include/dt-bindings/power/imx7-power.h + +- #power-domain-cells: Should be 0 + +Optional properties: + +- power-supply: Power supply used to power the domain + +Example: + + gpc: gpc@303a0000 { + compatible = "fsl,imx7d-gpc"; + reg = <0x303a0000 0x1000>; + interrupt-controller; + interrupts = ; + #interrupt-cells = <3>; + interrupt-parent = <&intc>; + + pgc { + #address-cells = <1>; + #size-cells = <0>; + + pgc_pcie_phy: power-domain@3 { + #power-domain-cells = <0>; + + reg = ; + power-supply = <®_1p0d>; + }; + }; + }; + + +Specifying power domain for IP modules +====================================== + +IP cores belonging to a power domain should contain a 'power-domains' +property that is a phandle for PGC node representing the domain. + +Example of a device that is part of the PCIE_PHY power domain: + + pcie: pcie@33800000 { + reg = <0x33800000 0x4000>, + <0x4ff00000 0x80000>; + /* ... */ + power-domains = <&pgc_pcie_phy>; + /* ... */ + }; diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 723e1ad937da24edefa5c48b03ccb1f345c9919f..14bd9e945ff6453f3290d628b083a13e645e9967 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -31,7 +31,9 @@ Optional properties: - domain-idle-states : A phandle of an idle-state that shall be soaked into a generic domain power state. The idle state definitions are - compatible with domain-idle-state specified in [1]. + compatible with domain-idle-state specified in [1]. phandles + that are not compatible with domain-idle-state will be + ignored. The domain-idle-state property reflects the idle state of this PM domain and not the idle states of the devices or sub-domains in the PM domain. Devices and sub-domains have their own idle-states independent of the parent @@ -79,7 +81,7 @@ Example 3: child: power-controller@12341000 { compatible = "foo,power-controller"; reg = <0x12341000 0x1000>; - power-domains = <&parent 0>; + power-domains = <&parent>; #power-domain-cells = <0>; domain-idle-states = <&DOMAIN_PWR_DN>; }; diff --git a/Documentation/devicetree/bindings/power/reset/gemini-poweroff.txt b/Documentation/devicetree/bindings/power/reset/gemini-poweroff.txt new file mode 100644 index 0000000000000000000000000000000000000000..7fec3e100214c618bd2b5d1ba3a0bb01bc9df04e --- /dev/null +++ b/Documentation/devicetree/bindings/power/reset/gemini-poweroff.txt @@ -0,0 +1,17 @@ +* Device-Tree bindings for Cortina Systems Gemini Poweroff + +This is a special IP block in the Cortina Gemini SoC that only +deals with different ways to power the system down. + +Required properties: +- compatible: should be "cortina,gemini-power-controller" +- reg: should contain the physical memory base and size +- interrupts: should contain the power management interrupt + +Example: + +power-controller@4b000000 { + compatible = "cortina,gemini-power-controller"; + reg = <0x4b000000 0x100>; + interrupts = <26 IRQ_TYPE_EDGE_FALLING>; +}; diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt index 1e2546f8b08a4aab989bbe8c04e76d6fd1df63e4..022ed1f3bc808351752ae1130dd226e7c7e7cb38 100644 --- a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt +++ b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt @@ -3,13 +3,20 @@ Generic SYSCON mapped register poweroff driver This is a generic poweroff driver using syscon to map the poweroff register. The poweroff is generally performed with a write to the poweroff register defined by the register map pointed by syscon reference plus the offset -with the mask defined in the poweroff node. +with the value and mask defined in the poweroff node. Required properties: - compatible: should contain "syscon-poweroff" - regmap: this is phandle to the register map node - offset: offset in the register map for the poweroff register (in bytes) -- mask: the poweroff value written to the poweroff register (32 bit access) +- value: the poweroff value written to the poweroff register (32 bit access) + +Optional properties: +- mask: update only the register bits defined by the mask (32 bit) + +Legacy usage: +If a node doesn't contain a value property but contains a mask property, the +mask property is used as the value. Default will be little endian mode, 32 bit access only. diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt index d23dc002a87e94840c1e68952f184eb4e7167933..d3a5a93a65cd255ff8f5bc3cafcc2fd967d99147 100644 --- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt +++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt @@ -33,6 +33,7 @@ Required properties: - compatible: should be one of: - "rockchip,rk3188-io-voltage-domain" for rk3188 - "rockchip,rk3288-io-voltage-domain" for rk3288 + - "rockchip,rk3328-io-voltage-domain" for rk3328 - "rockchip,rk3368-io-voltage-domain" for rk3368 - "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains - "rockchip,rk3399-io-voltage-domain" for rk3399 diff --git a/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt b/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt new file mode 100644 index 0000000000000000000000000000000000000000..c24886676a6058522a04a2f0c73f4c7f0e77c712 --- /dev/null +++ b/Documentation/devicetree/bindings/power/supply/axp20x_battery.txt @@ -0,0 +1,20 @@ +AXP20x and AXP22x battery power supply + +Required Properties: + - compatible, one of: + "x-powers,axp209-battery-power-supply" + "x-powers,axp221-battery-power-supply" + +This node is a subnode of the axp20x/axp22x PMIC. + +The AXP20X and AXP22X can read the battery voltage, charge and discharge +currents of the battery by reading ADC channels from the AXP20X/AXP22X +ADC. + +Example: + +&axp209 { + battery_power_supply: battery-power-supply { + compatible = "x-powers,axp209-battery-power-supply"; + } +}; diff --git a/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt b/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt new file mode 100644 index 0000000000000000000000000000000000000000..80bd873c3b1de4c2db3612f0fa0fe302fca3c1fd --- /dev/null +++ b/Documentation/devicetree/bindings/power/supply/cpcap-charger.txt @@ -0,0 +1,37 @@ +Motorola CPCAP PMIC battery charger binding + +Required properties: +- compatible: Shall be "motorola,mapphone-cpcap-charger" +- interrupts: Interrupt specifier for each name in interrupt-names +- interrupt-names: Should contain the following entries: + "chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn", + "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb" +- io-channels: IIO ADC channel specifier for each name in io-channel-names +- io-channel-names: Should contain the following entries: + "battdetb", "battp", "vbus", "chg_isense", "batti" + +Optional properties: +- mode-gpios: Optionally CPCAP charger can have a companion wireless + charge controller that is controlled with two GPIOs + that are active low. + +Example: + +cpcap_charger: charger { + compatible = "motorola,mapphone-cpcap-charger"; + interrupts-extended = < + &cpcap 13 0 &cpcap 12 0 &cpcap 29 0 &cpcap 28 0 + &cpcap 22 0 &cpcap 20 0 &cpcap 19 0 &cpcap 54 0 + >; + interrupt-names = + "chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn", + "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb"; + mode-gpios = <&gpio3 29 GPIO_ACTIVE_LOW + &gpio3 23 GPIO_ACTIVE_LOW>; + io-channels = <&cpcap_adc 0 &cpcap_adc 1 + &cpcap_adc 2 &cpcap_adc 5 + &cpcap_adc 6>; + io-channel-names = "battdetb", "battp", + "vbus", "chg_isense", + "batti"; +}; diff --git a/Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt b/Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt new file mode 100644 index 0000000000000000000000000000000000000000..5485633b1faa4798909892c4a9d8c0b2de362c5c --- /dev/null +++ b/Documentation/devicetree/bindings/power/supply/lego_ev3_battery.txt @@ -0,0 +1,21 @@ +LEGO MINDSTORMS EV3 Battery +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +LEGO MINDSTORMS EV3 has some built-in capability for monitoring the battery. +It uses 6 AA batteries or a special Li-ion rechargeable battery pack that is +detected by a key switch in the battery compartment. + +Required properties: + - compatible: Must be "lego,ev3-battery" + - io-channels: phandles to analog inputs for reading voltage and current + - io-channel-names: Must be "voltage", "current" + - rechargeable-gpios: phandle to the rechargeable battery indication gpio + +Example: + + battery { + compatible = "lego,ev3-battery"; + io-channels = <&adc 4>, <&adc 3>; + io-channel-names = "voltage", "current"; + rechargeable-gpios = <&gpio 136 GPIO_ACTIVE_LOW>; + }; diff --git a/Documentation/devicetree/bindings/power/supply/ltc2941.txt b/Documentation/devicetree/bindings/power/supply/ltc2941.txt index ea42ae12d92451579e0af57ee2bdfb886e60ad0a..a9d7aa60558b739ca312f86826b72cb247b111c0 100644 --- a/Documentation/devicetree/bindings/power/supply/ltc2941.txt +++ b/Documentation/devicetree/bindings/power/supply/ltc2941.txt @@ -6,8 +6,8 @@ temperature monitoring, and uses a slightly different conversion formula for the charge counter. Required properties: -- compatible: Should contain "ltc2941" or "ltc2943" which also indicates the - type of I2C chip attached. +- compatible: Should contain "lltc,ltc2941" or "lltc,ltc2943" which also + indicates the type of I2C chip attached. - reg: The 7-bit I2C address. - lltc,resistor-sense: The sense resistor value in milli-ohms. Can be a 32-bit negative value when the battery has been connected to the wrong end of the @@ -20,7 +20,7 @@ Required properties: Example from the Topic Miami Florida board: fuelgauge: ltc2943@64 { - compatible = "ltc2943"; + compatible = "lltc,ltc2943"; reg = <0x64>; lltc,resistor-sense = <15>; lltc,prescaler-exponent = <5>; /* 2^(2*5) = 1024 */ diff --git a/Documentation/devicetree/bindings/power/supply/max8925_batter.txt b/Documentation/devicetree/bindings/power/supply/max8925_battery.txt similarity index 100% rename from Documentation/devicetree/bindings/power/supply/max8925_batter.txt rename to Documentation/devicetree/bindings/power/supply/max8925_battery.txt diff --git a/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt new file mode 100644 index 0000000000000000000000000000000000000000..5af426e13334def9d1e7738e264953cae46fcb46 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/ibm,powerpc-cpu-features.txt @@ -0,0 +1,248 @@ +*** NOTE *** +This document is copied from OPAL firmware +(skiboot/doc/device-tree/ibm,powerpc-cpu-features/binding.txt) + +There is more complete overview and documentation of features in that +source tree. All patches and modifications should go there. +************ + +ibm,powerpc-cpu-features binding +================================ + +This device tree binding describes CPU features available to software, with +enablement, privilege, and compatibility metadata. + +More general description of design and implementation of this binding is +found in design.txt, which also points to documentation of specific features. + + +/cpus/ibm,powerpc-cpu-features node binding +------------------------------------------- + +Node: ibm,powerpc-cpu-features + +Description: Container of CPU feature nodes. + +The node name must be "ibm,powerpc-cpu-features". + +It is implemented as a child of the node "/cpus", but this must not be +assumed by parsers. + +The node is optional but should be provided by new OPAL firmware. + +Properties: + +- compatible + Usage: required + Value type: string + Definition: "ibm,powerpc-cpu-features" + + This compatibility refers to backwards compatibility of the overall + design with parsers that behave according to these guidelines. This can + be extended in a backward compatible manner which would not warrant a + revision of the compatible property. + +- isa + Usage: required + Value type: + Definition: + + isa that the CPU is currently running in. This provides instruction set + compatibility, less the individual feature nodes. For example, an ISA v3.0 + implementation that lacks the "transactional-memory" cpufeature node + should not use transactional memory facilities. + + Value corresponds to the "Power ISA Version" multiplied by 1000. + For example, <3000> corresponds to Version 3.0, <2070> to Version 2.07. + The minor digit is available for revisions. + +- display-name + Usage: optional + Value type: string + Definition: + + A human readable name for the CPU. + +/cpus/ibm,powerpc-cpu-features/example-feature node bindings +---------------------------------------------------------------- + +Each child node of cpu-features represents a CPU feature / capability. + +Node: A string describing an architected CPU feature, e.g., "floating-point". + +Description: A feature or capability supported by the CPUs. + +The name of the node is a human readable string that forms the interface +used to describe features to software. Features are currently documented +in the code where they are implemented in skiboot/core/cpufeatures.c + +Presence of the node indicates the feature is available. + +Properties: + +- isa + Usage: required + Value type: + Definition: + + First level of the Power ISA that the feature appears in. + Software should filter out features when constraining the + environment to a particular ISA version. + + Value is defined similarly to /cpus/features/isa + +- usable-privilege + Usage: required + Value type: bit mask + Definition: + Bit numbers are LSB0 + bit 0 - PR (problem state / user mode) + bit 1 - OS (privileged state) + bit 2 - HV (hypervisor state) + All other bits reserved and should be zero. + + This property describes the privilege levels and/or software components + that can use the feature. + + If bit 0 is set, then the hwcap-bit-nr property will exist. + + +- hv-support + Usage: optional + Value type: bit mask + Definition: + Bit numbers are LSB0 + bit 0 - HFSCR + All other bits reserved and should be zero. + + This property describes the HV privilege support required to enable the + feature to lesser privilege levels. If the property does not exist then no + support is required. + + If no bits are set, the hypervisor must have explicit/custom support for + this feature. + + If the HFSCR bit is set, then the hfscr-bit-nr property will exist and + the feature may be enabled by setting this bit in the HFSCR register. + + +- os-support + Usage: optional + Value type: bit mask + Definition: + Bit numbers are LSB0 + bit 0 - FSCR + All other bits reserved and should be zero. + + This property describes the OS privilege support required to enable the + feature to lesser privilege levels. If the property does not exist then no + support is required. + + If no bits are set, the operating system must have explicit/custom support + for this feature. + + If the FSCR bit is set, then the fscr-bit-nr property will exist and + the feature may be enabled by setting this bit in the FSCR register. + + +- hfscr-bit-nr + Usage: optional + Value type: + Definition: HFSCR bit position (LSB0) + + This property exists when the hv-support property HFSCR bit is set. This + property describes the bit number in the HFSCR register that the + hypervisor must set in order to enable this feature. + + This property also exists if an HFSCR bit corresponds with this feature. + This makes CPU feature parsing slightly simpler. + + +- fscr-bit-nr + Usage: optional + Value type: + Definition: FSCR bit position (LSB0) + + This property exists when the os-support property FSCR bit is set. This + property describes the bit number in the FSCR register that the + operating system must set in order to enable this feature. + + This property also exists if an FSCR bit corresponds with this feature. + This makes CPU feature parsing slightly simpler. + + +- hwcap-bit-nr + Usage: optional + Value type: + Definition: Linux ELF AUX vector bit position (LSB0) + + This property may exist when the usable-privilege property value has PR bit set. + This property describes the bit number that should be set in the ELF AUX + hardware capability vectors in order to advertise this feature to userspace. + Bits 0-31 correspond to bits 0-31 in AT_HWCAP vector. Bits 32-63 correspond + to 0-31 in AT_HWCAP2 vector, and so on. Missing AT_HWCAPx vectors implies + that the feature is not enabled or can not be advertised. Operating systems + may provide a number of unassigned hardware capability bits to allow for new + features to be advertised. + + Some properties representing features created before this binding are + advertised to userspace without a one-to-one hwcap bit number may not specify + this bit. Operating system will handle those bits specifically. All new + features usable by userspace will have a hwcap-bit-nr property. + + +- dependencies + Usage: optional + Value type: + Definition: + + If this property exists then it is a list of phandles to cpu feature + nodes that must be enabled for this feature to be enabled. + + +Example +------- + + /cpus/ibm,powerpc-cpu-features { + compatible = "ibm,powerpc-cpu-features"; + + isa = <3020>; + + darn { + isa = <3000>; + usable-privilege = <1 | 2 | 4>; + hwcap-bit-nr = ; + }; + + scv { + isa = <3000>; + usable-privilege = <1 | 2>; + os-support = <0>; + hwcap-bit-nr = ; + }; + + stop { + isa = <3000>; + usable-privilege = <2 | 4>; + hv-support = <0>; + os-support = <0>; + }; + + vsx2 (hypothetical) { + isa = <3010>; + usable-privilege = <1 | 2 | 4>; + hv-support = <0>; + os-support = <0>; + hwcap-bit-nr = ; + }; + + vsx2-newinsns { + isa = <3020>; + usable-privilege = <1 | 2 | 4>; + os-support = <1>; + fscr-bit-nr = ; + hwcap-bit-nr = ; + dependencies = <&vsx2>; + }; + + }; diff --git a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt index 02331b904d4e73780da1999f807068915545ebb9..c8c831d7b0d1bb9d90cfc9b3022b57b9e6a2f74d 100644 --- a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt +++ b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt @@ -4,6 +4,7 @@ Required properties: - compatible: should be one of: - "atmel,at91sam9rl-pwm" - "atmel,sama5d3-pwm" + - "atmel,sama5d2-pwm" - reg: physical base address and length of the controller's registers - #pwm-cells: Should be 3. See pwm.txt in this directory for a description of the cells format. diff --git a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt index b4e73778dda3a2e211ec210869d4d78807066122..c57e11b8d9375b85b882407a3477f7ff2ac58553 100644 --- a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt +++ b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt @@ -19,6 +19,19 @@ Required properties: - reset-names: Must include the following entries: - pwm +Optional properties: +============================ +In some of the interface like PWM based regulator device, it is required +to configure the pins differently in different states, especially in suspend +state of the system. The configuration of pin is provided via the pinctrl +DT node as detailed in the pinctrl DT binding document + Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt + +The PWM node will have following optional properties. +pinctrl-names: Pin state names. Must be "default" and "sleep". +pinctrl-0: phandle for the default/active state of pin configurations. +pinctrl-1: phandle for the sleep state of pin configurations. + Example: pwm: pwm@7000a000 { @@ -29,3 +42,35 @@ Example: resets = <&tegra_car 17>; reset-names = "pwm"; }; + + +Example with the pin configuration for suspend and resume: +========================================================= +Suppose pin PE7 (On Tegra210) interfaced with the regulator device and +it requires PWM output to be tristated when system enters suspend. +Following will be DT binding to achieve this: + +#include + + pinmux@700008d4 { + pwm_active_state: pwm_active_state { + pe7 { + nvidia,pins = "pe7"; + nvidia,tristate = ; + }; + }; + + pwm_sleep_state: pwm_sleep_state { + pe7 { + nvidia,pins = "pe7"; + nvidia,tristate = ; + }; + }; + }; + + pwm@7000a000 { + /* Mandatory PWM properties */ + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&pwm_active_state>; + pinctrl-1 = <&pwm_sleep_state>; + }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt new file mode 100644 index 0000000000000000000000000000000000000000..54c59b0560ad701f9542da665f845c3bb1add186 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt @@ -0,0 +1,34 @@ +MediaTek PWM controller + +Required properties: + - compatible: should be "mediatek,-pwm": + - "mediatek,mt7623-pwm": found on mt7623 SoC. + - reg: physical base address and length of the controller's registers. + - #pwm-cells: must be 2. See pwm.txt in this directory for a description of + the cell format. + - clocks: phandle and clock specifier of the PWM reference clock. + - clock-names: must contain the following: + - "top": the top clock generator + - "main": clock used by the PWM core + - "pwm1-5": the five per PWM clocks + - pinctrl-names: Must contain a "default" entry. + - pinctrl-0: One property must exist for each entry in pinctrl-names. + See pinctrl/pinctrl-bindings.txt for details of the property values. + +Example: + pwm0: pwm@11006000 { + compatible = "mediatek,mt7623-pwm"; + reg = <0 0x11006000 0 0x1000>; + #pwm-cells = <2>; + clocks = <&topckgen CLK_TOP_PWM_SEL>, + <&pericfg CLK_PERI_PWM>, + <&pericfg CLK_PERI_PWM1>, + <&pericfg CLK_PERI_PWM2>, + <&pericfg CLK_PERI_PWM3>, + <&pericfg CLK_PERI_PWM4>, + <&pericfg CLK_PERI_PWM5>; + clock-names = "top", "main", "pwm1", "pwm2", + "pwm3", "pwm4", "pwm5"; + pinctrl-names = "default"; + pinctrl-0 = <&pwm0_pins>; + }; diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.txt b/Documentation/devicetree/bindings/regulator/anatop-regulator.txt index 1d58c8cfdbc01d8fffad426774dd730a987af2d1..a3106c72fbeaa1a862f85b49693490cd74457963 100644 --- a/Documentation/devicetree/bindings/regulator/anatop-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/anatop-regulator.txt @@ -2,6 +2,7 @@ Anatop Voltage regulators Required properties: - compatible: Must be "fsl,anatop-regulator" +- regulator-name: A string used as a descriptive name for regulator outputs - anatop-reg-offset: Anatop MFD register offset - anatop-vol-bit-shift: Bit shift for the register - anatop-vol-bit-width: Number of bits used in the register diff --git a/Documentation/devicetree/bindings/regulator/lm363x-regulator.txt b/Documentation/devicetree/bindings/regulator/lm363x-regulator.txt index 8f14df9d120555adcfca9008238d7653cc7a5675..cc5a6151d85ff40c966a88098236b9fee9a6ea74 100644 --- a/Documentation/devicetree/bindings/regulator/lm363x-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/lm363x-regulator.txt @@ -8,8 +8,8 @@ Required property: Optional properties: LM3632 has external enable pins for two LDOs. - - ti,lcm-en1-gpio: A GPIO specifier for Vpos control pin. - - ti,lcm-en2-gpio: A GPIO specifier for Vneg control pin. + - enable-gpios: Two GPIO specifiers for Vpos and Vneg control pins. + The first entry is Vpos, the second is Vneg enable pin. Child nodes: LM3631 @@ -30,5 +30,79 @@ Child nodes: Examples: Please refer to ti-lmu dt-bindings [2]. +lm3631@29 { + compatible = "ti,lm3631"; + reg = <0x29>; + + regulators { + compatible = "ti,lm363x-regulator"; + + vboost { + regulator-name = "lcd_boost"; + regulator-min-microvolt = <4500000>; + regulator-max-microvolt = <6350000>; + regulator-always-on; + }; + + vcont { + regulator-name = "lcd_vcont"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + }; + + voref { + regulator-name = "lcd_voref"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + }; + + vpos { + regulator-name = "lcd_vpos"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + regulator-boot-on; + }; + + vneg { + regulator-name = "lcd_vneg"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + regulator-boot-on; + }; + }; +}; + +lm3632@11 { + compatible = "ti,lm3632"; + reg = <0x11>; + + regulators { + compatible = "ti,lm363x-regulator"; + + /* GPIO1_16 for Vpos, GPIO1_28 is for Vneg */ + enable-gpios = <&gpio1 16 GPIO_ACTIVE_HIGH>, + <&gpio1 28 GPIO_ACTIVE_HIGH>; + + vboost { + regulator-name = "lcd_boost"; + regulator-min-microvolt = <4500000>; + regulator-max-microvolt = <6400000>; + regulator-always-on; + }; + + vpos { + regulator-name = "lcd_vpos"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + }; + + vneg { + regulator-name = "lcd_vneg"; + regulator-min-microvolt = <4000000>; + regulator-max-microvolt = <6000000>; + }; + }; +}; + [1] ../regulator/regulator.txt [2] ../mfd/ti-lmu.txt diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.txt b/Documentation/devicetree/bindings/regulator/pfuze100.txt index 9b40db88f637bd9ed7f06c5eb809e7b16ddd660d..444c47831a408c6b24d908f707117d7d3788cfde 100644 --- a/Documentation/devicetree/bindings/regulator/pfuze100.txt +++ b/Documentation/devicetree/bindings/regulator/pfuze100.txt @@ -13,7 +13,7 @@ Required child node: --PFUZE100 sw1ab,sw1c,sw2,sw3a,sw3b,sw4,swbst,vsnvs,vrefddr,vgen1~vgen6 --PFUZE200 - sw1ab,sw2,sw3a,sw3b,swbst,vsnvs,vrefddr,vgen1~vgen6 + sw1ab,sw2,sw3a,sw3b,swbst,vsnvs,vrefddr,vgen1~vgen6,coin --PFUZE3000 sw1a,sw1b,sw2,sw3,swbst,vsnvs,vrefddr,vldo1,vldo2,vccsd,v33,vldo3,vldo4 @@ -205,6 +205,12 @@ Example 2: PFUZE200 regulator-max-microvolt = <3300000>; regulator-always-on; }; + + coin_reg: coin { + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; }; }; diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt index 6ab5aef619d9fe73c1db7e62fa8c10c5c03d33f9..d18edb075e1c746debc19bf4f9bf7325aef0ecef 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.txt +++ b/Documentation/devicetree/bindings/regulator/regulator.txt @@ -21,6 +21,9 @@ Optional properties: design requires. This property describes the total system ramp time required due to the combination of internal ramping of the regulator itself, and board design issues such as trace capacitance and load on the supply. +- regulator-settling-time-us: Settling time, in microseconds, for voltage + change if regulator have the constant time for any level voltage change. + This is useful when regulator have exponential voltage change. - regulator-soft-start: Enable soft start so that voltage ramps slowly - regulator-state-mem sub-root node for Suspend-to-RAM mode : suspend to memory, the device goes to sleep, but all data stored in memory, diff --git a/Documentation/devicetree/bindings/regulator/tps65132-regulator.txt b/Documentation/devicetree/bindings/regulator/tps65132-regulator.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a3505520c692b5fd9e3f5ff1f6a88d732954b43 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/tps65132-regulator.txt @@ -0,0 +1,46 @@ +TPS65132 regulators + +Required properties: +- compatible: "ti,tps65132" +- reg: I2C slave address + +Optional Subnode: +Device supports two regulators OUTP and OUTN. A sub node within the + device node describe the properties of these regulators. The sub-node + names must be as follows: + -For regulator outp, the sub node name should be "outp". + -For regulator outn, the sub node name should be "outn". + +-enable-gpios:(active high, output) Regulators are controlled by the input pins. + If it is connected to GPIO through host system then provide the + gpio number as per gpio.txt. +-active-discharge-gpios: (active high, output) Some configurations use delay mechanisms + on the enable pin, to keep the regulator enabled for some time after + the enable signal goes low. This GPIO is used to actively discharge + the delay mechanism. Requires specification of ti,active-discharge-time-us +-ti,active-discharge-time-us: how long the active discharge gpio should be + asserted for during active discharge, in microseconds. + +Each regulator is defined using the standard binding for regulators. + +Example: + + tps65132@3e { + compatible = "ti,tps65132"; + reg = <0x3e>; + + outp { + regulator-name = "outp"; + regulator-boot-on; + regulator-always-on; + enable-gpios = <&gpio 23 0>; + }; + + outn { + regulator-name = "outn"; + regulator-boot-on; + regulator-always-on; + regulator-active-discharge = <0>; + enable-gpios = <&gpio 40 0>; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/vctrl.txt b/Documentation/devicetree/bindings/regulator/vctrl.txt new file mode 100644 index 0000000000000000000000000000000000000000..601328d7fdbb725647d48e0e10d6b19dc5ca6254 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/vctrl.txt @@ -0,0 +1,49 @@ +Bindings for Voltage controlled regulators +========================================== + +Required properties: +-------------------- +- compatible : must be "vctrl-regulator". +- regulator-min-microvolt : smallest voltage consumers may set +- regulator-max-microvolt : largest voltage consumers may set +- ctrl-supply : The regulator supplying the control voltage. +- ctrl-voltage-range : an array of two integer values describing the range + (min/max) of the control voltage. The values specify + the control voltage needed to generate the corresponding + regulator-min/max-microvolt output voltage. + +Optional properties: +-------------------- +- ovp-threshold-percent : overvoltage protection (OVP) threshold of the + regulator in percent. Some regulators have an OVP + circuitry which shuts down the regulator when the + actual output voltage deviates beyond a certain + margin from the expected value for a given control + voltage. On larger voltage decreases this can occur + undesiredly since the output voltage does not adjust + inmediately to changes in the control voltage. To + avoid this situation the vctrl driver breaks down + larger voltage decreases into multiple steps, where + each step is within the OVP threshold. +- min-slew-down-rate : Describes how slowly the regulator voltage will decay + down in the worst case (lightest expected load). + Specified in uV / us (like main regulator ramp rate). + This value is required when ovp-threshold-percent is + specified. + +Example: + + vctrl-reg { + compatible = "vctrl-regulator"; + regulator-name = "vctrl_reg"; + + ctrl-supply = <&ctrl_reg>; + + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1500000>; + + ctrl-voltage-range = <200000 500000>; + + min-slew-down-rate = <225>; + ovp-threshold-percent = <16>; + }; diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt new file mode 100644 index 0000000000000000000000000000000000000000..5e1afc3d8480d45c9b75be4f0fdf2007e47a3d65 --- /dev/null +++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt @@ -0,0 +1,47 @@ +Freescale i.MX7 System Reset Controller +====================================== + +Please also refer to reset.txt in this directory for common reset +controller binding usage. + +Required properties: +- compatible: Should be "fsl,imx7-src", "syscon" +- reg: should be register base and length as documented in the + datasheet +- interrupts: Should contain SRC interrupt +- #reset-cells: 1, see below + +example: + +src: reset-controller@30390000 { + compatible = "fsl,imx7d-src", "syscon"; + reg = <0x30390000 0x2000>; + interrupts = ; + #reset-cells = <1>; +}; + + +Specifying reset lines connected to IP modules +============================================== + +The system reset controller can be used to reset various set of +peripherals. Device nodes that need access to reset lines should +specify them as a reset phandle in their corresponding node as +specified in reset.txt. + +Example: + + pcie: pcie@33800000 { + + ... + + resets = <&src IMX7_RESET_PCIEPHY>, + <&src IMX7_RESET_PCIE_CTRL_APPS_EN>; + reset-names = "pciephy", "apps"; + + ... + }; + + +For list of all valid reset indicies see + diff --git a/Documentation/devicetree/bindings/rng/amlogic,meson-rng.txt b/Documentation/devicetree/bindings/rng/amlogic,meson-rng.txt index 202f2d09a23f2cb2d817c1733b88c1b93fb67690..4d403645ac9b3dcd513c5f55a7217542a1eb1d37 100644 --- a/Documentation/devicetree/bindings/rng/amlogic,meson-rng.txt +++ b/Documentation/devicetree/bindings/rng/amlogic,meson-rng.txt @@ -6,9 +6,16 @@ Required properties: - compatible : should be "amlogic,meson-rng" - reg : Specifies base physical address and size of the registers. +Optional properties: + +- clocks : phandle to the following named clocks +- clock-names: Name of core clock, must be "core" + Example: rng { - compatible = "amlogic,meson-rng"; - reg = <0x0 0xc8834000 0x0 0x4>; + compatible = "amlogic,meson-rng"; + reg = <0x0 0xc8834000 0x0 0x4>; + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; }; diff --git a/Documentation/devicetree/bindings/rng/mtk-rng.txt b/Documentation/devicetree/bindings/rng/mtk-rng.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6d62a2abd39440ae680d10feb5e7340c5364558 --- /dev/null +++ b/Documentation/devicetree/bindings/rng/mtk-rng.txt @@ -0,0 +1,18 @@ +Device-Tree bindings for Mediatek random number generator +found in Mediatek SoC family + +Required properties: +- compatible : Should be "mediatek,mt7623-rng" +- clocks : list of clock specifiers, corresponding to + entries in clock-names property; +- clock-names : Should contain "rng" entries; +- reg : Specifies base physical address and size of the registers + +Example: + +rng: rng@1020f000 { + compatible = "mediatek,mt7623-rng"; + reg = <0 0x1020f000 0 0x1000>; + clocks = <&infracfg CLK_INFRA_TRNG>; + clock-names = "rng"; +}; diff --git a/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt b/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt new file mode 100644 index 0000000000000000000000000000000000000000..45750ff3112d26448506fdf1aa3f239cfe9f3f3c --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/cpcap-rtc.txt @@ -0,0 +1,18 @@ +Motorola CPCAP PMIC RTC +----------------------- + +This module is part of the CPCAP. For more details about the whole +chip see Documentation/devicetree/bindings/mfd/motorola-cpcap.txt. + +Requires node properties: +- compatible: should contain "motorola,cpcap-rtc" +- interrupts: An interrupt specifier for alarm and 1 Hz irq + +Example: + +&cpcap { + cpcap_rtc: rtc { + compatible = "motorola,cpcap-rtc"; + interrupts = <39 IRQ_TYPE_NONE>, <26 IRQ_TYPE_NONE>; + }; +}; diff --git a/Documentation/devicetree/bindings/rtc/rtc-sh.txt b/Documentation/devicetree/bindings/rtc/rtc-sh.txt new file mode 100644 index 0000000000000000000000000000000000000000..7676c7d2887403733965bac7c234e0665a2216c7 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/rtc-sh.txt @@ -0,0 +1,28 @@ +* Real Time Clock for Renesas SH and ARM SoCs + +Required properties: +- compatible: Should be "renesas,r7s72100-rtc" and "renesas,sh-rtc" as a + fallback. +- reg: physical base address and length of memory mapped region. +- interrupts: 3 interrupts for alarm, period, and carry. +- interrupt-names: The interrupts should be labeled as "alarm", "period", and + "carry". +- clocks: The functional clock source for the RTC controller must be listed + first (if exists). Additionally, potential clock counting sources are to be + listed. +- clock-names: The functional clock must be labeled as "fck". Other clocks + may be named in accordance to the SoC hardware manuals. + + +Example: +rtc: rtc@fcff1000 { + compatible = "renesas,r7s72100-rtc", "renesas,sh-rtc"; + reg = <0xfcff1000 0x2e>; + interrupts = ; + interrupt-names = "alarm", "period", "carry"; + clocks = <&mstp6_clks R7S72100_CLK_RTC>, <&rtc_x1_clk>, + <&rtc_x3_clk>, <&extal_clk>; + clock-names = "fck", "rtc_x1", "rtc_x3", "extal"; +}; diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.txt b/Documentation/devicetree/bindings/serial/sprd-uart.txt index 2aff0f22c9fa31837b9f49cd6d4213d14186f9e4..cab40f0f6f497ab7bc0b3ab9640bcc0563a8cddb 100644 --- a/Documentation/devicetree/bindings/serial/sprd-uart.txt +++ b/Documentation/devicetree/bindings/serial/sprd-uart.txt @@ -1,7 +1,19 @@ * Spreadtrum serial UART Required properties: -- compatible: must be "sprd,sc9836-uart" +- compatible: must be one of: + * "sprd,sc9836-uart" + * "sprd,sc9860-uart", "sprd,sc9836-uart" + - reg: offset and length of the register set for the device - interrupts: exactly one interrupt specifier - clocks: phandles to input clocks. + +Example: + uart0: serial@0 { + compatible = "sprd,sc9860-uart", + "sprd,sc9836-uart"; + reg = <0x0 0x100>; + interrupts = ; + clocks = <&ext_26m>; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt index 349f79fd7076a63b3ad373e0d473f7e00f484c14..626e1afa64a6eb729f89e311ec67716167b45408 100644 --- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt @@ -13,8 +13,17 @@ Required properties: - #gpio-cells : Should be two. The first cell is the pin number and the second cell is used to specify optional parameters (currently unused). - gpio-controller : Marks the port as GPIO controller. +Optional properties: +- fsl,cpm1-gpio-irq-mask : For banks having interrupt capability (like port C + on CPM1), this item tells which ports have an associated interrupt (ports are + listed in the same order as in PCINT register) +- interrupts : This property provides the list of interrupt for each GPIO having + one as described by the fsl,cpm1-gpio-irq-mask property. There should be as + many interrupts as number of ones in the mask property. The first interrupt in + the list corresponds to the most significant bit of the mask. +- interrupt-parent : Parent for the above interrupt property. -Example of three SOC GPIO banks defined as gpio-controller nodes: +Example of four SOC GPIO banks defined as gpio-controller nodes: CPM1_PIO_A: gpio-controller@950 { #gpio-cells = <2>; @@ -30,6 +39,16 @@ Example of three SOC GPIO banks defined as gpio-controller nodes: gpio-controller; }; + CPM1_PIO_C: gpio-controller@960 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-c"; + reg = <0x960 0x10>; + fsl,cpm1-gpio-irq-mask = <0x0fff>; + interrupts = <1 2 6 9 10 11 14 15 23 24 26 31>; + interrupt-parent = <&CPM_PIC>; + gpio-controller; + }; + CPM1_PIO_E: gpio-controller@ac8 { #gpio-cells = <2>; compatible = "fsl,cpm1-pario-bank-e"; diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.txt b/Documentation/devicetree/bindings/soc/rockchip/grf.txt index a0685c2092184d225cbb25982c8af525f48259ce..cc9f05d3cbc1a0b08a2a278364332b8114b027a3 100644 --- a/Documentation/devicetree/bindings/soc/rockchip/grf.txt +++ b/Documentation/devicetree/bindings/soc/rockchip/grf.txt @@ -8,6 +8,8 @@ From RK3368 SoCs, the GRF is divided into two sections, - SGRF, used for general secure system, - PMUGRF, used for always on system +On RK3328 SoCs, the GRF adds a section for USB2PHYGRF, + Required Properties: - compatible: GRF should be one of the following: @@ -16,6 +18,7 @@ Required Properties: - "rockchip,rk3188-grf", "syscon": for rk3188 - "rockchip,rk3228-grf", "syscon": for rk3228 - "rockchip,rk3288-grf", "syscon": for rk3288 + - "rockchip,rk3328-grf", "syscon": for rk3328 - "rockchip,rk3368-grf", "syscon": for rk3368 - "rockchip,rk3399-grf", "syscon": for rk3399 - compatible: PMUGRF should be one of the following: @@ -23,6 +26,8 @@ Required Properties: - "rockchip,rk3399-pmugrf", "syscon": for rk3399 - compatible: SGRF should be one of the following - "rockchip,rk3288-sgrf", "syscon": for rk3288 +- compatible: USB2PHYGRF should be one of the followings + - "rockchip,rk3328-usb2phy-grf", "syscon": for rk3328 - reg: physical base address of the controller and length of memory mapped region. diff --git a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt new file mode 100644 index 0000000000000000000000000000000000000000..c705db07d8206d74835c729cf6253d97a7227df0 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt @@ -0,0 +1,57 @@ +Texas Instruments TI-SCI Generic Power Domain +--------------------------------------------- + +Some TI SoCs contain a system controller (like the PMMC, etc...) that is +responsible for controlling the state of the IPs that are present. +Communication between the host processor running an OS and the system +controller happens through a protocol known as TI-SCI [1]. + +[1] Documentation/devicetree/bindings/arm/keystone/ti,sci.txt + +PM Domain Node +============== +The PM domain node represents the global PM domain managed by the PMMC, which +in this case is the implementation as documented by the generic PM domain +bindings in Documentation/devicetree/bindings/power/power_domain.txt. Because +this relies on the TI SCI protocol to communicate with the PMMC it must be a +child of the pmmc node. + +Required Properties: +-------------------- +- compatible: should be "ti,sci-pm-domain" +- #power-domain-cells: Must be 1 so that an id can be provided in each + device node. + +Example (K2G): +------------- + pmmc: pmmc { + compatible = "ti,k2g-sci"; + ... + + k2g_pds: power-controller { + compatible = "ti,sci-pm-domain"; + #power-domain-cells = <1>; + }; + }; + +PM Domain Consumers +=================== +Hardware blocks belonging to a PM domain should contain a "power-domains" +property that is a phandle pointing to the corresponding PM domain node +along with an index representing the device id to be passed to the PMMC +for device control. + +Required Properties: +-------------------- +- power-domains: phandle pointing to the corresponding PM domain node + and an ID representing the device. + +See dt-bindings/genpd/k2g.h for the list of valid identifiers for k2g. + +Example (K2G): +-------------------- + uart0: serial@02530c00 { + compatible = "ns16550a"; + ... + power-domains = <&k2g_pds K2G_DEV_UART0>; + }; diff --git a/Documentation/devicetree/bindings/sound/cs35l35.txt b/Documentation/devicetree/bindings/sound/cs35l35.txt new file mode 100644 index 0000000000000000000000000000000000000000..016b768bc722b6ca3e16600eb08ddb5a8b711a6d --- /dev/null +++ b/Documentation/devicetree/bindings/sound/cs35l35.txt @@ -0,0 +1,180 @@ +CS35L35 Boosted Speaker Amplifier + +Required properties: + + - compatible : "cirrus,cs35l35" + + - reg : the I2C address of the device for I2C + + - VA-supply, VP-supply : power supplies for the device, + as covered in + Documentation/devicetree/bindings/regulator/regulator.txt. + + - interrupt-parent : Specifies the phandle of the interrupt controller to + which the IRQs from CS35L35 are delivered to. + - interrupts : IRQ line info CS35L35. + (See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt + for further information relating to interrupt properties) + +Optional properties: + - reset-gpios : gpio used to reset the amplifier + + - cirrus,stereo-config : Boolean to determine if there are 2 AMPs for a + Stereo configuration + + - cirrus,audio-channel : Set Location of Audio Signal on Serial Port + 0 = Data Packet received on Left I2S Channel + 1 = Data Packet received on Right I2S Channel + + - cirrus,advisory-channel : Set Location of Advisory Signal on Serial Port + 0 = Data Packet received on Left I2S Channel + 1 = Data Packet received on Right I2S Channel + + - cirrus,shared-boost : Boolean to enable ClassH tracking of Advisory Signal + if 2 Devices share Boost BST_CTL + + - cirrus,external-boost : Boolean to specify the device is using an external + boost supply, note that sharing a boost from another cs35l35 would constitute + using an external supply for the slave device + + - cirrus,sp-drv-strength : Value for setting the Serial Port drive strength + Table 3-10 of the datasheet lists drive-strength specifications + 0 = 1x (Default) + 1 = .5x + - cirrus,sp-drv-unused : Determines how unused slots should be driven on the + Serial Port. + 0 - Hi-Z + 2 - Drive 0's (Default) + 3 - Drive 1's + + - cirrus,bst-pdn-fet-on : Boolean to determine if the Boost PDN control + powers down with a rectification FET On or Off. If VSPK is supplied + externally then FET is off. + + - cirrus,boost-ctl-millivolt : Boost Voltage Value. Configures the boost + converter's output voltage in mV. The range is from 2600mV to 9000mV with + increments of 100mV. + (Default) VP + + - cirrus,boost-peak-milliamp : Boost-converter peak current limit in mA. + Configures the peak current by monitoring the current through the boost FET. + Range starts at 1680mA and goes to a maximum of 4480mA with increments of + 110mA. + (Default) 2.46 Amps + + - cirrus,amp-gain-zc : Boolean to determine if to use Amplifier gain-change + zero-cross + +Optional H/G Algorithm sub-node: + + The cs35l35 node can have a single "cirrus,classh-internal-algo" sub-node + that will disable automatic control of the internal H/G Algorithm. + + It is strongly recommended that the Datasheet be referenced when adjusting + or using these Class H Algorithm controls over the internal Algorithm. + Serious damage can occur to the Device and surrounding components. + + - cirrus,classh-internal-algo : Sub-node for the Internal Class H Algorithm + See Section 4.3 Internal Class H Algorithm in the Datasheet. + If not used, the device manages the ClassH Algorithm internally. + +Optional properties for the "cirrus,classh-internal-algo" Sub-node + + Section 7.29 Class H Control + - cirrus,classh-bst-overide : Boolean + - cirrus,classh-bst-max-limit + - cirrus,classh-mem-depth + + Section 7.30 Class H Headroom Control + - cirrus,classh-headroom + + Section 7.31 Class H Release Rate + - cirrus,classh-release-rate + + Section 7.32 Class H Weak FET Drive Control + - cirrus,classh-wk-fet-disable + - cirrus,classh-wk-fet-delay + - cirrus,classh-wk-fet-thld + + Section 7.34 Class H VP Control + - cirrus,classh-vpch-auto + - cirrus,classh-vpch-rate + - cirrus,classh-vpch-man + +Optional Monitor Signal Format sub-node: + + The cs35l35 node can have a single "cirrus,monitor-signal-format" sub-node + for adjusting the Depth, Location and Frame of the Monitoring Signals + for Algorithms. + + See Sections 4.8.2 through 4.8.4 Serial-Port Control in the Datasheet + + -cirrus,monitor-signal-format : Sub-node for the Monitor Signaling Formating + on the I2S Port. Each of the 3 8 bit values in the array contain the settings + for depth, location, and frame. + + If not used, the defaults for the 6 monitor signals is used. + + Sections 7.44 - 7.53 lists values for the depth, location, and frame + for each monitoring signal. + + - cirrus,imon : 4 8 bit values to set the depth, location, frame and ADC + scale of the IMON monitor signal. + + - cirrus,vmon : 3 8 bit values to set the depth, location, and frame + of the VMON monitor signal. + + - cirrus,vpmon : 3 8 bit values to set the depth, location, and frame + of the VPMON monitor signal. + + - cirrus,vbstmon : 3 8 bit values to set the depth, location, and frame + of the VBSTMON monitor signal + + - cirrus,vpbrstat : 3 8 bit values to set the depth, location, and frame + of the VPBRSTAT monitor signal + + - cirrus,zerofill : 3 8 bit values to set the depth, location, and frame\ + of the ZEROFILL packet in the monitor signal + +Example: + +cs35l35: cs35l35@20 { + compatible = "cirrus,cs35l35"; + reg = <0x20>; + VA-supply = <&dummy_vreg>; + VP-supply = <&dummy_vreg>; + reset-gpios = <&axi_gpio 54 0>; + interrupt-parent = <&gpio8>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; + cirrus,boost-ctl-millivolt = <9000>; + + cirrus,stereo-config; + cirrus,audio-channel = <0x00>; + cirrus,advisory-channel = <0x01>; + cirrus,shared-boost; + + cirrus,classh-internal-algo { + cirrus,classh-bst-overide; + cirrus,classh-bst-max-limit = <0x01>; + cirrus,classh-mem-depth = <0x01>; + cirrus,classh-release-rate = <0x08>; + cirrus,classh-headroom-millivolt = <0x0B>; + cirrus,classh-wk-fet-disable = <0x01>; + cirrus,classh-wk-fet-delay = <0x04>; + cirrus,classh-wk-fet-thld = <0x01>; + cirrus,classh-vpch-auto = <0x01>; + cirrus,classh-vpch-rate = <0x02>; + cirrus,classh-vpch-man = <0x05>; + }; + + /* Depth, Location, Frame */ + cirrus,monitor-signal-format { + cirrus,imon = /bits/ 8 <0x03 0x00 0x01>; + cirrus,vmon = /bits/ 8 <0x03 0x00 0x00>; + cirrus,vpmon = /bits/ 8 <0x03 0x04 0x00>; + cirrus,vbstmon = /bits/ 8 <0x03 0x04 0x01>; + cirrus,vpbrstat = /bits/ 8 <0x00 0x04 0x00>; + cirrus,zerofill = /bits/ 8 <0x00 0x00 0x00>; + }; + +}; diff --git a/Documentation/devicetree/bindings/sound/dioo,dio2125.txt b/Documentation/devicetree/bindings/sound/dioo,dio2125.txt new file mode 100644 index 0000000000000000000000000000000000000000..63dbfe0f11d0cfb139f5fa03030ab3a809b6167b --- /dev/null +++ b/Documentation/devicetree/bindings/sound/dioo,dio2125.txt @@ -0,0 +1,12 @@ +DIO2125 Audio Driver + +Required properties: +- compatible : "dioo,dio2125" +- enable-gpios : the gpio connected to the enable pin of the dio2125 + +Example: + +amp: analog-amplifier { + compatible = "dioo,dio2125"; + enable-gpios = <&gpio GPIOH_3 0>; +}; diff --git a/Documentation/devicetree/bindings/sound/everest,es7134.txt b/Documentation/devicetree/bindings/sound/everest,es7134.txt new file mode 100644 index 0000000000000000000000000000000000000000..5495a3cb8b7bc2b2e6fe7925038caa9b86040ede --- /dev/null +++ b/Documentation/devicetree/bindings/sound/everest,es7134.txt @@ -0,0 +1,10 @@ +ES7134 i2s DA converter + +Required properties: +- compatible : "everest,es7134" or "everest,es7144" + +Example: + +i2s_codec: external-codec { + compatible = "everest,es7134"; +}; diff --git a/Documentation/devicetree/bindings/sound/fsl,ssi.txt b/Documentation/devicetree/bindings/sound/fsl,ssi.txt index 5b76be45d18bfecce403bf4b1f09730bb74a3c0c..d415888e131659d4b757ea8abba98c3ac4a49b2b 100644 --- a/Documentation/devicetree/bindings/sound/fsl,ssi.txt +++ b/Documentation/devicetree/bindings/sound/fsl,ssi.txt @@ -20,24 +20,8 @@ Required properties: have. - interrupt-parent: The phandle for the interrupt controller that services interrupts for this device. -- fsl,playback-dma: Phandle to a node for the DMA channel to use for - playback of audio. This is typically dictated by SOC - design. See the notes below. -- fsl,capture-dma: Phandle to a node for the DMA channel to use for - capture (recording) of audio. This is typically dictated - by SOC design. See the notes below. - fsl,fifo-depth: The number of elements in the transmit and receive FIFOs. This number is the maximum allowed value for SFCSR[TFWM0]. -- fsl,ssi-asynchronous: - If specified, the SSI is to be programmed in asynchronous - mode. In this mode, pins SRCK, STCK, SRFS, and STFS must - all be connected to valid signals. In synchronous mode, - SRCK and SRFS are ignored. Asynchronous mode allows - playback and capture to use different sample sizes and - sample rates. Some drivers may require that SRCK and STCK - be connected together, and SRFS and STFS be connected - together. This would still allow different sample sizes, - but not different sample rates. - clocks: "ipg" - Required clock for the SSI unit "baud" - Required clock for SSI master mode. Otherwise this clock is not used @@ -61,6 +45,24 @@ Optional properties: - fsl,mode: The operating mode for the AC97 interface only. "ac97-slave" - AC97 mode, SSI is clock slave "ac97-master" - AC97 mode, SSI is clock master +- fsl,ssi-asynchronous: + If specified, the SSI is to be programmed in asynchronous + mode. In this mode, pins SRCK, STCK, SRFS, and STFS must + all be connected to valid signals. In synchronous mode, + SRCK and SRFS are ignored. Asynchronous mode allows + playback and capture to use different sample sizes and + sample rates. Some drivers may require that SRCK and STCK + be connected together, and SRFS and STFS be connected + together. This would still allow different sample sizes, + but not different sample rates. +- fsl,playback-dma: Phandle to a node for the DMA channel to use for + playback of audio. This is typically dictated by SOC + design. See the notes below. + Only used on Power Architecture. +- fsl,capture-dma: Phandle to a node for the DMA channel to use for + capture (recording) of audio. This is typically dictated + by SOC design. See the notes below. + Only used on Power Architecture. Child 'codec' node required properties: - compatible: Compatible list, contains the name of the codec diff --git a/Documentation/devicetree/bindings/sound/hisilicon,hi6210-i2s.txt b/Documentation/devicetree/bindings/sound/hisilicon,hi6210-i2s.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a296784eb37489dbad5ca68fb2cf9217e69080d --- /dev/null +++ b/Documentation/devicetree/bindings/sound/hisilicon,hi6210-i2s.txt @@ -0,0 +1,42 @@ +* Hisilicon 6210 i2s controller + +Required properties: + +- compatible: should be one of the following: + - "hisilicon,hi6210-i2s" +- reg: physical base address of the i2s controller unit and length of + memory mapped region. +- interrupts: should contain the i2s interrupt. +- clocks: a list of phandle + clock-specifier pairs, one for each entry + in clock-names. +- clock-names: should contain following: + - "dacodec" + - "i2s-base" +- dmas: DMA specifiers for tx dma. See the DMA client binding, + Documentation/devicetree/bindings/dma/dma.txt +- dma-names: should be "tx" and "rx" +- hisilicon,sysctrl-syscon: phandle to sysctrl syscon +- #sound-dai-cells: Should be set to 1 (for multi-dai) + - The dai cell indexes reference the following interfaces: + 0: S2 interface + (Currently that is the only one available, but more may be + supported in the future) + +Example for the hi6210 i2s controller: + +i2s0: i2s@f7118000{ + compatible = "hisilicon,hi6210-i2s"; + reg = <0x0 0xf7118000 0x0 0x8000>; /* i2s unit */ + interrupts = ; /* 155 "DigACodec_intr"-32 */ + clocks = <&sys_ctrl HI6220_DACODEC_PCLK>, + <&sys_ctrl HI6220_BBPPLL0_DIV>; + clock-names = "dacodec", "i2s-base"; + dmas = <&dma0 15 &dma0 14>; + dma-names = "rx", "tx"; + hisilicon,sysctrl-syscon = <&sys_ctrl>; + #sound-dai-cells = <1>; +}; + +Then when referencing the i2s controller: + sound-dai = <&i2s0 0>; /* index 0 => S2 interface */ + diff --git a/Documentation/devicetree/bindings/sound/max98925.txt b/Documentation/devicetree/bindings/sound/max98925.txt deleted file mode 100644 index 27be63e2aa0de08c7160d8e37960d3816800a7ab..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/sound/max98925.txt +++ /dev/null @@ -1,22 +0,0 @@ -max98925 audio CODEC - -This device supports I2C. - -Required properties: - - - compatible : "maxim,max98925" - - - vmon-slot-no : slot number used to send voltage information - - - imon-slot-no : slot number used to send current information - - - reg : the I2C address of the device for I2C - -Example: - -codec: max98925@1a { - compatible = "maxim,max98925"; - vmon-slot-no = <0>; - imon-slot-no = <2>; - reg = <0x1a>; -}; diff --git a/Documentation/devicetree/bindings/sound/max98926.txt b/Documentation/devicetree/bindings/sound/max98926.txt deleted file mode 100644 index 0b7f4e4d5f9a5c18b53398fe2291cc84e5e2b134..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/sound/max98926.txt +++ /dev/null @@ -1,32 +0,0 @@ -max98926 audio CODEC - -This device supports I2C. - -Required properties: - - - compatible : "maxim,max98926" - - - vmon-slot-no : slot number used to send voltage information - or in inteleave mode this will be used as - interleave slot. - - - imon-slot-no : slot number used to send current information - - - interleave-mode : When using two MAX98926 in a system it is - possible to create ADC data that that will - overflow the frame size. Digital Audio Interleave - mode provides a means to output VMON and IMON data - from two devices on a single DOUT line when running - smaller frames sizes such as 32 BCLKS per LRCLK or - 48 BCLKS per LRCLK. - - - reg : the I2C address of the device for I2C - -Example: - -codec: max98926@1a { - compatible = "maxim,max98926"; - vmon-slot-no = <0>; - imon-slot-no = <2>; - reg = <0x1a>; -}; diff --git a/Documentation/devicetree/bindings/sound/max9892x.txt b/Documentation/devicetree/bindings/sound/max9892x.txt new file mode 100644 index 0000000000000000000000000000000000000000..f6171591ddc61daa71f2fb58e84aff5c2744297e --- /dev/null +++ b/Documentation/devicetree/bindings/sound/max9892x.txt @@ -0,0 +1,41 @@ +Maxim Integrated MAX98925/MAX98926/MAX98927 Speaker Amplifier + +This device supports I2C. + +Required properties: + + - compatible : should be one of the following + - "maxim,max98925" + - "maxim,max98926" + - "maxim,max98927" + + - vmon-slot-no : slot number used to send voltage information + or in inteleave mode this will be used as + interleave slot. + MAX98925/MAX98926 slot range : 0 ~ 30, Default : 0 + MAX98927 slot range : 0 ~ 15, Default : 0 + + - imon-slot-no : slot number used to send current information + MAX98925/MAX98926 slot range : 0 ~ 30, Default : 0 + MAX98927 slot range : 0 ~ 15, Default : 0 + + - interleave-mode : When using two MAX9892X in a system it is + possible to create ADC data that that will + overflow the frame size. Digital Audio Interleave + mode provides a means to output VMON and IMON data + from two devices on a single DOUT line when running + smaller frames sizes such as 32 BCLKS per LRCLK or + 48 BCLKS per LRCLK. + Range : 0 (off), 1 (on), Default : 0 + + - reg : the I2C address of the device for I2C + +Example: + +codec: max98927@3a { + compatible = "maxim,max98927"; + vmon-slot-no = <0>; + imon-slot-no = <1>; + interleave-mode = <0>; + reg = <0x3a>; +}; diff --git a/Documentation/devicetree/bindings/sound/mt2701-wm8960.txt b/Documentation/devicetree/bindings/sound/mt2701-wm8960.txt new file mode 100644 index 0000000000000000000000000000000000000000..809b609ea9d0b56e0761305f916bcf8c860e5f90 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/mt2701-wm8960.txt @@ -0,0 +1,24 @@ +MT2701 with WM8960 CODEC + +Required properties: +- compatible: "mediatek,mt2701-wm8960-machine" +- mediatek,platform: the phandle of MT2701 ASoC platform +- audio-routing: a list of the connections between audio +- mediatek,audio-codec: the phandles of wm8960 codec +- pinctrl-names: Should contain only one value - "default" +- pinctrl-0: Should specify pin control groups used for this controller. + +Example: + + sound:sound { + compatible = "mediatek,mt2701-wm8960-machine"; + mediatek,platform = <&afe>; + audio-routing = + "Headphone", "HP_L", + "Headphone", "HP_R", + "LINPUT1", "AMIC", + "RINPUT1", "AMIC"; + mediatek,audio-codec = <&wm8960>; + pinctrl-names = "default"; + pinctrl-0 = <&aud_pins_default>; + }; diff --git a/Documentation/devicetree/bindings/sound/nau8824.txt b/Documentation/devicetree/bindings/sound/nau8824.txt new file mode 100644 index 0000000000000000000000000000000000000000..e0058b97e49a48240dee993beca7ae27ea994f8e --- /dev/null +++ b/Documentation/devicetree/bindings/sound/nau8824.txt @@ -0,0 +1,88 @@ +Nuvoton NAU8824 audio codec + +This device supports I2C only. + +Required properties: + - compatible : Must be "nuvoton,nau8824" + + - reg : the I2C address of the device. This is either 0x1a (CSB=0) or 0x1b (CSB=1). + +Optional properties: + - nuvoton,jkdet-polarity: JKDET pin polarity. 0 - active high, 1 - active low. + + - nuvoton,vref-impedance: VREF Impedance selection + 0 - Open + 1 - 25 kOhm + 2 - 125 kOhm + 3 - 2.5 kOhm + + - nuvoton,micbias-voltage: Micbias voltage level. + 0 - VDDA + 1 - VDDA + 2 - VDDA * 1.1 + 3 - VDDA * 1.2 + 4 - VDDA * 1.3 + 5 - VDDA * 1.4 + 6 - VDDA * 1.53 + 7 - VDDA * 1.53 + + - nuvoton,sar-threshold-num: Number of buttons supported + - nuvoton,sar-threshold: Impedance threshold for each button. Array that contains up to 8 buttons configuration. SAR value is calculated as + SAR = 255 * MICBIAS / SAR_VOLTAGE * R / (2000 + R) + where MICBIAS is configured by 'nuvoton,micbias-voltage', SAR_VOLTAGE is configured by 'nuvoton,sar-voltage', R - button impedance. + Refer datasheet section 10.2 for more information about threshold calculation. + + - nuvoton,sar-hysteresis: Button impedance measurement hysteresis. + + - nuvoton,sar-voltage: Reference voltage for button impedance measurement. + 0 - VDDA + 1 - VDDA + 2 - VDDA * 1.1 + 3 - VDDA * 1.2 + 4 - VDDA * 1.3 + 5 - VDDA * 1.4 + 6 - VDDA * 1.53 + 7 - VDDA * 1.53 + + - nuvoton,sar-compare-time: SAR compare time + 0 - 500 ns + 1 - 1 us + 2 - 2 us + 3 - 4 us + + - nuvoton,sar-sampling-time: SAR sampling time + 0 - 2 us + 1 - 4 us + 2 - 8 us + 3 - 16 us + + - nuvoton,short-key-debounce: Button short key press debounce time. + 0 - 30 ms + 1 - 50 ms + 2 - 100 ms + + - nuvoton,jack-eject-debounce: Jack ejection debounce time. + 0 - 0 ms + 1 - 1 ms + 2 - 10 ms + + +Example: + + headset: nau8824@1a { + compatible = "nuvoton,nau8824"; + reg = <0x1a>; + interrupt-parent = <&gpio>; + interrupts = ; + nuvoton,vref-impedance = <2>; + nuvoton,micbias-voltage = <6>; + // Setup 4 buttons impedance according to Android specification + nuvoton,sar-threshold-num = <4>; + nuvoton,sar-threshold = <0xc 0x1e 0x38 0x60>; + nuvoton,sar-hysteresis = <0>; + nuvoton,sar-voltage = <6>; + nuvoton,sar-compare-time = <1>; + nuvoton,sar-sampling-time = <1>; + nuvoton,short-key-debounce = <0>; + nuvoton,jack-eject-debounce = <1>; + }; diff --git a/Documentation/devicetree/bindings/sound/rockchip-i2s.txt b/Documentation/devicetree/bindings/sound/rockchip-i2s.txt index a6600f6dea64dd7c22188e05bc1a38e3ff4a0df0..206aba1b34bb5a3f44e528d874a7520e5882c3fc 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-i2s.txt +++ b/Documentation/devicetree/bindings/sound/rockchip-i2s.txt @@ -9,6 +9,7 @@ Required properties: - "rockchip,rk3066-i2s": for rk3066 - "rockchip,rk3188-i2s", "rockchip,rk3066-i2s": for rk3188 - "rockchip,rk3288-i2s", "rockchip,rk3066-i2s": for rk3288 + - "rockchip,rk3368-i2s", "rockchip,rk3066-i2s": for rk3368 - "rockchip,rk3399-i2s", "rockchip,rk3066-i2s": for rk3399 - reg: physical base address of the controller and length of memory mapped region. diff --git a/Documentation/devicetree/bindings/sound/samsung,odroid.txt b/Documentation/devicetree/bindings/sound/samsung,odroid.txt new file mode 100644 index 0000000000000000000000000000000000000000..c1ac70cb0afb470d079d202a83fcbdc2ed227ce6 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/samsung,odroid.txt @@ -0,0 +1,57 @@ +Samsung Exynos Odroid XU3/XU4 audio complex with MAX98090 codec + +Required properties: + + - compatible - "samsung,odroidxu3-audio" - for Odroid XU3 board, + "samsung,odroidxu4-audio" - for Odroid XU4 board + - model - the user-visible name of this sound complex + - 'cpu' subnode with a 'sound-dai' property containing the phandle of the I2S + controller + - 'codec' subnode with a 'sound-dai' property containing list of phandles + to the CODEC nodes, first entry must be corresponding to the MAX98090 + CODEC and the second entry must be the phandle of the HDMI IP block node + - clocks - should contain entries matching clock names in the clock-names + property + - clock-names - should contain following entries: + - "epll" - indicating the EPLL output clock + - "i2s_rclk" - indicating the RCLK (root) clock of the I2S0 controller + - samsung,audio-widgets - this property specifies off-codec audio elements + like headphones or speakers, for details see widgets.txt + - samsung,audio-routing - a list of the connections between audio + components; each entry is a pair of strings, the first being the + connection's sink, the second being the connection's source; + valid names for sources and sinks are the MAX98090's pins (as + documented in its binding), and the jacks on the board + + For Odroid X2: + "Headphone Jack", "Mic Jack", "DMIC" + + For Odroid U3, XU3: + "Headphone Jack", "Speakers" + + For Odroid XU4: + no entries + +Example: + +sound { + compatible = "samsung,odroidxu3-audio"; + samsung,cpu-dai = <&i2s0>; + samsung,codec-dai = <&max98090>; + model = "Odroid-XU3"; + samsung,audio-routing = + "Headphone Jack", "HPL", + "Headphone Jack", "HPR", + "IN1", "Mic Jack", + "Mic Jack", "MICBIAS"; + + clocks = <&clock CLK_FOUT_EPLL>, <&i2s0 CLK_I2S_RCLK_SRC>; + clock-names = "epll", "sclk_i2s"; + + cpu { + sound-dai = <&i2s0 0>; + }; + codec { + sound-dai = <&hdmi>, <&max98090>; + }; +}; diff --git a/Documentation/devicetree/bindings/sound/sgtl5000.txt b/Documentation/devicetree/bindings/sound/sgtl5000.txt index 5666da7b86059f39c411a6acebf5b8dc758342ac..7a73a9d62015ecc600af3c3f1c69f001b180b410 100644 --- a/Documentation/devicetree/bindings/sound/sgtl5000.txt +++ b/Documentation/devicetree/bindings/sound/sgtl5000.txt @@ -26,6 +26,15 @@ Optional properties: If this node is not mentioned or the value is unknown, then the value is set to 1.25V. +- lrclk-strength: the LRCLK pad strength. Possible values are: +0, 1, 2 and 3 as per the table below: + +VDDIO 1.8V 2.5V 3.3V +0 = Disable +1 = 1.66 mA 2.87 mA 4.02 mA +2 = 3.33 mA 5.74 mA 8.03 mA +3 = 4.99 mA 8.61 mA 12.05 mA + Example: codec: sgtl5000@0a { diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt new file mode 100644 index 0000000000000000000000000000000000000000..c59a3d779e067b9a56feba52962498dfb2e241fc --- /dev/null +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt @@ -0,0 +1,89 @@ +STMicroelectronics STM32 Serial Audio Interface (SAI). + +The SAI interface (Serial Audio Interface) offers a wide set of audio protocols +as I2S standards, LSB or MSB-justified, PCM/DSP, TDM, and AC'97. +The SAI contains two independent audio sub-blocks. Each sub-block has +its own clock generator and I/O lines controller. + +Required properties: + - compatible: Should be "st,stm32f4-sai" + - reg: Base address and size of SAI common register set. + - clocks: Must contain phandle and clock specifier pairs for each entry + in clock-names. + - clock-names: Must contain "x8k" and "x11k" + "x8k": SAI parent clock for sampling rates multiple of 8kHz. + "x11k": SAI parent clock for sampling rates multiple of 11.025kHz. + - interrupts: cpu DAI interrupt line shared by SAI sub-blocks + +Optional properties: + - resets: Reference to a reset controller asserting the SAI + +SAI subnodes: +Two subnodes corresponding to SAI sub-block instances A et B can be defined. +Subnode can be omitted for unsused sub-block. + +SAI subnodes required properties: + - compatible: Should be "st,stm32-sai-sub-a" or "st,stm32-sai-sub-b" + for SAI sub-block A or B respectively. + - reg: Base address and size of SAI sub-block register set. + - clocks: Must contain one phandle and clock specifier pair + for sai_ck which feeds the internal clock generator. + - clock-names: Must contain "sai_ck". + - dmas: see Documentation/devicetree/bindings/dma/stm32-dma.txt + - dma-names: identifier string for each DMA request line + "tx": if sai sub-block is configured as playback DAI + "rx": if sai sub-block is configured as capture DAI + - pinctrl-names: should contain only value "default" + - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/pinctrl-stm32.txt + +Example: +sound_card { + compatible = "audio-graph-card"; + dais = <&sai1b_port>; +}; + +sai1: sai1@40015800 { + compatible = "st,stm32f4-sai"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + reg = <0x40015800 0x4>; + clocks = <&rcc 1 CLK_SAIQ_PDIV>, <&rcc 1 CLK_I2SQ_PDIV>; + clock-names = "x8k", "x11k"; + interrupts = <87>; + + sai1b: audio-controller@40015824 { + #sound-dai-cells = <0>; + compatible = "st,stm32-sai-sub-b"; + reg = <0x40015824 0x1C>; + clocks = <&rcc 1 CLK_SAI2>; + clock-names = "sai_ck"; + dmas = <&dma2 5 0 0x400 0x0>; + dma-names = "tx"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_sai1b>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + sai1b_port: port@0 { + reg = <0>; + cpu_endpoint: endpoint { + remote-endpoint = <&codec_endpoint>; + audio-graph-card,format = "i2s"; + audio-graph-card,bitclock-master = <&codec_endpoint>; + audio-graph-card,frame-master = <&codec_endpoint>; + }; + }; + }; + }; +}; + +audio-codec { + codec_port: port { + codec_endpoint: endpoint { + remote-endpoint = <&cpu_endpoint>; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/sound/tas2552.txt b/Documentation/devicetree/bindings/sound/tas2552.txt index c49992c0b62acb97f9d6bb11ad41812747242f84..2d71eb05c1d384c72e166d02a46fc2d695cea382 100644 --- a/Documentation/devicetree/bindings/sound/tas2552.txt +++ b/Documentation/devicetree/bindings/sound/tas2552.txt @@ -5,7 +5,8 @@ The tas2552 serial control bus communicates through I2C protocols Required properties: - compatible - One of: "ti,tas2552" - TAS2552 - - reg - I2C slave address + - reg - I2C slave address: it can be 0x40 if ADDR pin is 0 + or 0x41 if ADDR pin is 1. - supply-*: Required supply regulators are: "vbat" battery voltage "iovdd" I/O Voltage @@ -14,17 +15,20 @@ Required properties: Optional properties: - enable-gpio - gpio pin to enable/disable the device -tas2552 can receive it's reference clock via MCLK, BCLK, IVCLKIN pin or use the +tas2552 can receive its reference clock via MCLK, BCLK, IVCLKIN pin or use the internal 1.8MHz. This CLKIN is used by the PLL. In addition to PLL, the PDM reference clock is also selectable: PLL, IVCLKIN, BCLK or MCLK. For system integration the dt-bindings/sound/tas2552.h header file provides -defined values to selct and configure the PLL and PDM reference clocks. +defined values to select and configure the PLL and PDM reference clocks. Example: tas2552: tas2552@41 { compatible = "ti,tas2552"; reg = <0x41>; + vbat-supply = <®_vbat>; + iovdd-supply = <®_iovdd>; + avdd-supply = <®_avdd>; enable-gpio = <&gpio4 2 GPIO_ACTIVE_HIGH>; }; diff --git a/Documentation/devicetree/bindings/sound/wm8903.txt b/Documentation/devicetree/bindings/sound/wm8903.txt index 94ec32c194bb1fc8f3dd4b6bbe10069008e4d673..afc51caf113705e4da112635385c205f290442ac 100644 --- a/Documentation/devicetree/bindings/sound/wm8903.txt +++ b/Documentation/devicetree/bindings/sound/wm8903.txt @@ -28,6 +28,14 @@ Optional properties: performed. If any entry has the value 0xffffffff, that GPIO's configuration will not be modified. + - AVDD-supply : Analog power supply regulator on the AVDD pin. + + - CPVDD-supply : Charge pump supply regulator on the CPVDD pin. + + - DBVDD-supply : Digital buffer supply regulator for the DBVDD pin. + + - DCVDD-supply : Digital core supply regulator for the DCVDD pin. + Pins on the device (for linking into audio routes): * IN1L @@ -54,6 +62,11 @@ codec: wm8903@1a { reg = <0x1a>; interrupts = < 347 >; + AVDD-supply = <&fooreg_a>; + CPVDD-supply = <&fooreg_b>; + DBVDD-supply = <&fooreg_c>; + DCVDC-supply = <&fooreg_d>; + gpio-controller; #gpio-cells = <2>; diff --git a/Documentation/devicetree/bindings/sound/zte,tdm.txt b/Documentation/devicetree/bindings/sound/zte,tdm.txt new file mode 100644 index 0000000000000000000000000000000000000000..2a07ca655264c377a2d8a658b10f594d6206b34a --- /dev/null +++ b/Documentation/devicetree/bindings/sound/zte,tdm.txt @@ -0,0 +1,30 @@ +ZTE TDM DAI driver + +Required properties: + +- compatible : should be one of the following. + * zte,zx296718-tdm +- reg : physical base address of the controller and length of memory mapped + region. +- clocks : Pairs of phandle and specifier referencing the controller's clocks. +- clock-names: "wclk" for the wclk. + "pclk" for the pclk. +-#clock-cells: should be 1. +- zte,tdm-dma-sysctrl : Reference to the sysctrl controller controlling + the dma. includes: + phandle of sysctrl. + register offset in sysctrl for control dma. + mask of the register that be written to sysctrl. + +Example: + + tdm: tdm@1487000 { + compatible = "zte,zx296718-tdm"; + reg = <0x01487000 0x1000>; + clocks = <&audiocrm AUDIO_TDM_WCLK>, <&audiocrm AUDIO_TDM_PCLK>; + clock-names = "wclk", "pclk"; + #clock-cells = <1>; + pinctrl-names = "default"; + pinctrl-0 = <&tdm_global_pin>; + zte,tdm-dma-sysctrl = <&sysctrl 0x10c 4>; + }; diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt index 8bc95e2fc47fadf7eb75d215494f48af45107922..31b5b21598ff5755d3e12a50aac2abf51d6653f1 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt @@ -23,6 +23,12 @@ See the clock consumer binding, Obsolete properties: - fsl,spi-num-chipselects : Contains the number of the chipselect +Optional properties: +- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register +controlling the SPI_READY handling. Note that to enable the DRCTL consideration, +the SPI_READY mode-flag needs to be set too. +Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). + Example: ecspi@70010000 { @@ -35,4 +41,5 @@ ecspi@70010000 { <&gpio3 25 0>; /* GPIO3_25 */ dmas = <&sdma 3 7 1>, <&sdma 4 7 2>; dma-names = "rx", "tx"; + fsl,spi-rdy-drctl = <1>; }; diff --git a/Documentation/devicetree/bindings/spi/spi-bcm63xx-hsspi.txt b/Documentation/devicetree/bindings/spi/spi-bcm63xx-hsspi.txt new file mode 100644 index 0000000000000000000000000000000000000000..37b29ee138600fe265b4caa1166e209a2714b3fa --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-bcm63xx-hsspi.txt @@ -0,0 +1,33 @@ +Binding for Broadcom BCM6328 High Speed SPI controller + +Required properties: +- compatible: must contain of "brcm,bcm6328-hsspi". +- reg: Base address and size of the controllers memory area. +- interrupts: Interrupt for the SPI block. +- clocks: phandles of the SPI clock and the PLL clock. +- clock-names: must be "hsspi", "pll". +- #address-cells: <1>, as required by generic SPI binding. +- #size-cells: <0>, also as required by generic SPI binding. + +Optional properties: +- num-cs: some controllers have less than 8 cs signals. Defaults to 8 + if absent. + +Child nodes as per the generic SPI binding. + +Example: + + spi@10001000 { + compatible = "brcm,bcm6328-hsspi"; + reg = <0x10001000 0x600>; + + interrupts = <29>; + + clocks = <&clkctl 9>, <&hsspi_pll>; + clock-names = "hsspi", "pll"; + + num-cs = <2>; + + #address-cells = <1>; + #size-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/spi/spi-bcm63xx.txt b/Documentation/devicetree/bindings/spi/spi-bcm63xx.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c16f66926137eb56817dd58ce360e585fece179 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-bcm63xx.txt @@ -0,0 +1,33 @@ +Binding for Broadcom BCM6348/BCM6358 SPI controller + +Required properties: +- compatible: must contain one of "brcm,bcm6348-spi", "brcm,bcm6358-spi". +- reg: Base address and size of the controllers memory area. +- interrupts: Interrupt for the SPI block. +- clocks: phandle of the SPI clock. +- clock-names: has to be "spi". +- #address-cells: <1>, as required by generic SPI binding. +- #size-cells: <0>, also as required by generic SPI binding. + +Optional properties: +- num-cs: some controllers have less than 8 cs signals. Defaults to 8 + if absent. + +Child nodes as per the generic SPI binding. + +Example: + + spi@10000800 { + compatible = "brcm,bcm6368-spi", "brcm,bcm6358-spi"; + reg = <0x10000800 0x70c>; + + interrupts = <1>; + + clocks = <&clkctl 9>; + clock-names = "spi"; + + num-cs = <5>; + + #address-cells = <1>; + #size-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt index 4d1673ca8cf802d4b9e13fb53538baa3d5372a48..7638b4968ddb4ad8adabbf36ab540751b6eec343 100644 --- a/Documentation/devicetree/bindings/spi/spi_pl022.txt +++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt @@ -30,7 +30,10 @@ contain the following properties. 0: SPI 1: Texas Instruments Synchronous Serial Frame Format 2: Microwire (Half Duplex) -- pl022,com-mode : polling, interrupt or dma +- pl022,com-mode : specifies the transfer mode: + 0: interrupt mode + 1: polling mode (default mode if property not present) + 2: DMA mode - pl022,rx-level-trig : Rx FIFO watermark level - pl022,tx-level-trig : Tx FIFO watermark level - pl022,ctrl-len : Microwire interface: Control length @@ -56,9 +59,7 @@ Example: spi-max-frequency = <12000000>; spi-cpol; spi-cpha; - pl022,hierarchy = <0>; pl022,interface = <0>; - pl022,slave-tx-disable; pl022,com-mode = <0x2>; pl022,rx-level-trig = <0>; pl022,tx-level-trig = <0>; @@ -67,4 +68,3 @@ Example: pl022,duplex = <0>; }; }; - diff --git a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt b/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt deleted file mode 100644 index c59e27c632c1778883bbadfbff151290f2413c13..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/staging/ion/hi6220-ion.txt +++ /dev/null @@ -1,31 +0,0 @@ -Hi6220 SoC ION -=================================================================== -Required properties: -- compatible : "hisilicon,hi6220-ion" -- list of the ION heaps - - heap name : maybe heap_sys_user@0 - - heap id : id should be unique in the system. - - heap base : base ddr address of the heap,0 means that - it is dynamic. - - heap size : memory size and 0 means it is dynamic. - - heap type : the heap type of the heap, please also - see the define in ion.h(drivers/staging/android/uapi/ion.h) -------------------------------------------------------------------- -Example: - hi6220-ion { - compatible = "hisilicon,hi6220-ion"; - heap_sys_user@0 { - heap-name = "sys_user"; - heap-id = <0x0>; - heap-base = <0x0>; - heap-size = <0x0>; - heap-type = "ion_system"; - }; - heap_sys_contig@0 { - heap-name = "sys_contig"; - heap-id = <0x1>; - heap-base = <0x0>; - heap-size = <0x0>; - heap-type = "ion_system_contig"; - }; - }; diff --git a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt index 474531d2b2c5fc746d625a5725e9a09f3ec19b8a..da8c5b73ad105a5fa5bf1510f5ec1f2d1ab462f9 100644 --- a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt @@ -3,15 +3,39 @@ Binding for Thermal Sensor driver for BCM2835 SoCs. Required parameters: ------------------- -compatible: should be one of: "brcm,bcm2835-thermal", - "brcm,bcm2836-thermal" or "brcm,bcm2837-thermal" -reg: Address range of the thermal registers. -clocks: Phandle of the clock used by the thermal sensor. +compatible: should be one of: "brcm,bcm2835-thermal", + "brcm,bcm2836-thermal" or "brcm,bcm2837-thermal" +reg: Address range of the thermal registers. +clocks: Phandle of the clock used by the thermal sensor. +#thermal-sensor-cells: should be 0 (see thermal.txt) Example: +thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <0>; + polling-delay = <1000>; + + thermal-sensors = <&thermal>; + + trips { + cpu-crit { + temperature = <80000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + coefficients = <(-538) 407000>; + + cooling-maps { + }; + }; +}; + thermal: thermal@7e212000 { compatible = "brcm,bcm2835-thermal"; reg = <0x7e212000 0x8>; clocks = <&clocks BCM2835_CLOCK_TSENS>; + #thermal-sensor-cells = <0>; }; diff --git a/Documentation/devicetree/bindings/thermal/brcm,ns-thermal b/Documentation/devicetree/bindings/thermal/brcm,ns-thermal new file mode 100644 index 0000000000000000000000000000000000000000..68e047170039ecc08625fb1902d939aeb32bd58e --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/brcm,ns-thermal @@ -0,0 +1,37 @@ +* Broadcom Northstar Thermal + +This binding describes thermal sensor that is part of Northstar's DMU (Device +Management Unit). + +Required properties: +- compatible : Must be "brcm,ns-thermal" +- reg : iomem address range of PVTMON registers +- #thermal-sensor-cells : Should be <0> + +Example: + +thermal: thermal@1800c2c0 { + compatible = "brcm,ns-thermal"; + reg = <0x1800c2c0 0x10>; + #thermal-sensor-cells = <0>; +}; + +thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <0>; + polling-delay = <1000>; + coefficients = <(-556) 418000>; + thermal-sensors = <&thermal>; + + trips { + cpu-crit { + temperature = <125000>; + hysteresis = <0>; + type = "critical"; + }; + }; + + cooling-maps { + }; + }; +}; diff --git a/Documentation/devicetree/bindings/thermal/da9062-thermal.txt b/Documentation/devicetree/bindings/thermal/da9062-thermal.txt new file mode 100644 index 0000000000000000000000000000000000000000..e241bb5a5584d2c5c0b4ff068926794264920f45 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/da9062-thermal.txt @@ -0,0 +1,36 @@ +* Dialog DA9062/61 TJUNC Thermal Module + +This module is part of the DA9061/DA9062. For more details about entire +DA9062 and DA9061 chips see Documentation/devicetree/bindings/mfd/da9062.txt + +Junction temperature thermal module uses an interrupt signal to identify +high THERMAL_TRIP_HOT temperatures for the PMIC device. + +Required properties: + +- compatible: should be one of the following valid compatible string lines: + "dlg,da9061-thermal", "dlg,da9062-thermal" + "dlg,da9062-thermal" + +Optional properties: + +- polling-delay-passive : Specify the polling period, measured in + milliseconds, between thermal zone device update checks. + +Example: DA9062 + + pmic0: da9062@58 { + thermal { + compatible = "dlg,da9062-thermal"; + polling-delay-passive = <3000>; + }; + }; + +Example: DA9061 using a fall-back compatible for the DA9062 onkey driver + + pmic0: da9061@58 { + thermal { + compatible = "dlg,da9061-thermal", "dlg,da9062-thermal"; + polling-delay-passive = <3000>; + }; + }; diff --git a/Documentation/devicetree/bindings/timer/cortina,gemini-timer.txt b/Documentation/devicetree/bindings/timer/cortina,gemini-timer.txt deleted file mode 100644 index 16ea1d3b2e9e22f8c0bb1b2a2d28a0b765259c89..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/timer/cortina,gemini-timer.txt +++ /dev/null @@ -1,22 +0,0 @@ -Cortina Systems Gemini timer - -This timer is embedded in the Cortina Systems Gemini SoCs. - -Required properties: - -- compatible : Must be "cortina,gemini-timer" -- reg : Should contain registers location and length -- interrupts : Should contain the three timer interrupts with - flags for rising edge -- syscon : a phandle to the global Gemini system controller - -Example: - -timer@43000000 { - compatible = "cortina,gemini-timer"; - reg = <0x43000000 0x1000>; - interrupts = <14 IRQ_TYPE_EDGE_RISING>, /* Timer 1 */ - <15 IRQ_TYPE_EDGE_RISING>, /* Timer 2 */ - <16 IRQ_TYPE_EDGE_RISING>; /* Timer 3 */ - syscon = <&syscon>; -}; diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt b/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt new file mode 100644 index 0000000000000000000000000000000000000000..b73ca6cd07f80c5ff3296d0ee71e865b6d31688a --- /dev/null +++ b/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt @@ -0,0 +1,33 @@ +Faraday Technology timer + +This timer is a generic IP block from Faraday Technology, embedded in the +Cortina Systems Gemini SoCs and other designs. + +Required properties: + +- compatible : Must be one of + "faraday,fttmr010" + "cortina,gemini-timer" +- reg : Should contain registers location and length +- interrupts : Should contain the three timer interrupts usually with + flags for falling edge + +Optionally required properties: + +- clocks : a clock to provide the tick rate for "faraday,fttmr010" +- clock-names : should be "EXTCLK" and "PCLK" for the external tick timer + and peripheral clock respectively, for "faraday,fttmr010" +- syscon : a phandle to the global Gemini system controller if the compatible + type is "cortina,gemini-timer" + +Example: + +timer@43000000 { + compatible = "faraday,fttmr010"; + reg = <0x43000000 0x1000>; + interrupts = <14 IRQ_TYPE_EDGE_FALLING>, /* Timer 1 */ + <15 IRQ_TYPE_EDGE_FALLING>, /* Timer 2 */ + <16 IRQ_TYPE_EDGE_FALLING>; /* Timer 3 */ + clocks = <&extclk>, <&pclk>; + clock-names = "EXTCLK", "PCLK"; +}; diff --git a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt index a41b184d553843679323b36024c7d2653ca26f4a..16a5f4577a61886be16103aedf1a38539d4248b3 100644 --- a/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt +++ b/Documentation/devicetree/bindings/timer/rockchip,rk-timer.txt @@ -1,9 +1,15 @@ Rockchip rk timer Required properties: -- compatible: shall be one of: - "rockchip,rk3288-timer" - for rk3066, rk3036, rk3188, rk322x, rk3288, rk3368 - "rockchip,rk3399-timer" - for rk3399 +- compatible: should be: + "rockchip,rk3036-timer", "rockchip,rk3288-timer": for Rockchip RK3036 + "rockchip,rk3066-timer", "rockchip,rk3288-timer": for Rockchip RK3066 + "rockchip,rk3188-timer", "rockchip,rk3288-timer": for Rockchip RK3188 + "rockchip,rk3228-timer", "rockchip,rk3288-timer": for Rockchip RK3228 + "rockchip,rk3229-timer", "rockchip,rk3288-timer": for Rockchip RK3229 + "rockchip,rk3288-timer": for Rockchip RK3288 + "rockchip,rk3368-timer", "rockchip,rk3288-timer": for Rockchip RK3368 + "rockchip,rk3399-timer": for Rockchip RK3399 - reg: base address of the timer register starting with TIMERS CONTROL register - interrupts: should contain the interrupts for Timer0 - clocks : must contain an entry for each entry in clock-names diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/trivial-devices.txt similarity index 99% rename from Documentation/devicetree/bindings/i2c/trivial-devices.txt rename to Documentation/devicetree/bindings/trivial-devices.txt index ad10fbe61562506015c93e3e6fe52dba751665a3..3e0a34c88e07759bdd076e98d43a4c988e1de4cc 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/trivial-devices.txt @@ -160,6 +160,7 @@ sii,s35390a 2-wire CMOS real-time clock silabs,si7020 Relative Humidity and Temperature Sensors skyworks,sky81452 Skyworks SKY81452: Six-Channel White LED Driver with Touch Panel Bias Supply st,24c256 i2c serial eeprom (24cxx) +st,m41t0 Serial real-time clock (RTC) st,m41t00 Serial real-time clock (RTC) st,m41t62 Serial real-time clock (RTC) with alarm st,m41t80 M41T80 - SERIAL ACCESS RTC WITH ALARMS diff --git a/Documentation/devicetree/bindings/usb/da8xx-usb.txt b/Documentation/devicetree/bindings/usb/da8xx-usb.txt index ccb844aba7d4b4d241affaba6d216c4494e0971c..717c5f6562373decc6eb91e7963d445b59739f59 100644 --- a/Documentation/devicetree/bindings/usb/da8xx-usb.txt +++ b/Documentation/devicetree/bindings/usb/da8xx-usb.txt @@ -18,10 +18,26 @@ Required properties: - phy-names: Should be "usb-phy" + - dmas: specifies the dma channels + + - dma-names: specifies the names of the channels. Use "rxN" for receive + and "txN" for transmit endpoints. N specifies the endpoint number. + Optional properties: ~~~~~~~~~~~~~~~~~~~~ - vbus-supply: Phandle to a regulator providing the USB bus power. +DMA +~~~ +- compatible: ti,da830-cppi41 +- reg: offset and length of the following register spaces: CPPI DMA Controller, + CPPI DMA Scheduler, Queue Manager +- reg-names: "controller", "scheduler", "queuemgr" +- #dma-cells: should be set to 2. The first number represents the + channel number (0 … 3 for endpoints 1 … 4). + The second number is 0 for RX and 1 for TX transfers. +- #dma-channels: should be set to 4 representing the 4 endpoints. + Example: usb_phy: usb-phy { compatible = "ti,da830-usb-phy"; @@ -30,7 +46,10 @@ Example: }; usb0: usb@200000 { compatible = "ti,da830-musb"; - reg = <0x00200000 0x10000>; + reg = <0x00200000 0x1000>; + ranges; + #address-cells = <1>; + #size-cells = <1>; interrupts = <58>; interrupt-names = "mc"; @@ -39,5 +58,25 @@ Example: phys = <&usb_phy 0>; phy-names = "usb-phy"; + dmas = <&cppi41dma 0 0 &cppi41dma 1 0 + &cppi41dma 2 0 &cppi41dma 3 0 + &cppi41dma 0 1 &cppi41dma 1 1 + &cppi41dma 2 1 &cppi41dma 3 1>; + dma-names = + "rx1", "rx2", "rx3", "rx4", + "tx1", "tx2", "tx3", "tx4"; + status = "okay"; + + cppi41dma: dma-controller@201000 { + compatible = "ti,da830-cppi41"; + reg = <0x201000 0x1000 + 0x202000 0x1000 + 0x204000 0x4000>; + reg-names = "controller", "scheduler", "queuemgr"; + interrupts = <58>; + #dma-cells = <2>; + #dma-channels = <4>; + }; + }; diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt index 6c7c2bce6d0c3c2b59c8726da4a40a9a1a4a345d..fcf199b64d3d3f2b703c42001d7cf7dfff5ca7bf 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.txt +++ b/Documentation/devicetree/bindings/usb/dwc2.txt @@ -10,10 +10,15 @@ Required properties: - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc; - "lantiq,arx100-usb": The DWC2 USB controller instance in Lantiq ARX SoCs; - "lantiq,xrx200-usb": The DWC2 USB controller instance in Lantiq XRX SoCs; + - "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 SoCs; - "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b SoCs; - "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 SoCs; - "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX SoCs; - snps,dwc2: A generic DWC2 USB controller with default parameters. + - "st,stm32f4x9-fsotg": The DWC2 USB FS/HS controller instance in STM32F4x9 SoCs + configured in FS mode; + - "st,stm32f4x9-hsotg": The DWC2 USB HS controller instance in STM32F4x9 SoCs + configured in HS mode; - reg : Should contain 1 register range (address and length) - interrupts : Should contain 1 interrupt - clocks: clock provider specifier diff --git a/Documentation/devicetree/bindings/usb/ehci-orion.txt b/Documentation/devicetree/bindings/usb/ehci-orion.txt index 17c3bc858b86dd2bbcdaba8c2b175c1fa0dc34d6..2855bae79fda60b2aa4e2ae7a6a7c54115e6cd60 100644 --- a/Documentation/devicetree/bindings/usb/ehci-orion.txt +++ b/Documentation/devicetree/bindings/usb/ehci-orion.txt @@ -1,7 +1,9 @@ * EHCI controller, Orion Marvell variants Required properties: -- compatible: must be "marvell,orion-ehci" +- compatible: must be one of the following + "marvell,orion-ehci" + "marvell,armada-3700-ehci" - reg: physical base address of the controller and length of memory mapped region. - interrupts: The EHCI interrupt diff --git a/Documentation/devicetree/bindings/usb/generic.txt b/Documentation/devicetree/bindings/usb/generic.txt index bfadeb1c3bab6212993a3efc2a01750409e9a798..0a74ab8dfdc2f369979232e4fc90b4c660cf6b3a 100644 --- a/Documentation/devicetree/bindings/usb/generic.txt +++ b/Documentation/devicetree/bindings/usb/generic.txt @@ -22,6 +22,7 @@ Optional properties: property is used if any real OTG features(HNP/SRP/ADP) is enabled, if ADP is required, otg-rev should be 0x0200 or above. + - companion: phandle of a companion - hnp-disable: tells OTG controllers we want to disable OTG HNP, normally HNP is the basic function of real OTG except you want it to be a srp-capable only B device. diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index aed180d8e5850fce7b6d1e2569b766831e764cdd..c03d201403661926164f1f9ba45ecd381a77b544 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -51,6 +51,7 @@ brcm Broadcom Corporation buffalo Buffalo, Inc. calxeda Calxeda capella Capella Microsystems, Inc +cascoda Cascoda, Ltd. cavium Cavium, Inc. cdns Cadence Design Systems Inc. ceva Ceva, Inc. @@ -79,6 +80,7 @@ denx Denx Software Engineering devantech Devantech, Ltd. digi Digi International Inc. digilent Diglent, Inc. +dioo Dioo Microcircuit Co., Ltd dlg Dialog Semiconductor dlink D-Link Corporation dmo Data Modul AG @@ -102,6 +104,7 @@ ettus NI Ettus Research eukrea Eukréa Electromatique everest Everest Semiconductor Co. Ltd. everspin Everspin Technologies, Inc. +exar Exar Corporation excito Excito ezchip EZchip Semiconductor faraday Faraday Technology Corporation @@ -136,6 +139,7 @@ holt Holt Integrated Circuits, Inc. honeywell Honeywell hp Hewlett Packard holtek Holtek Semiconductor, Inc. +hwacom HwaCom Systems Inc. i2se I2SE GmbH ibm International Business Machines (IBM) idt Integrated Device Technologies, Inc. @@ -159,6 +163,7 @@ jedec JEDEC Solid State Technology Association karo Ka-Ro electronics GmbH keithkoep Keith & Koep GmbH keymile Keymile GmbH +khadas Khadas kinetic Kinetic Technologies kosagi Sutajio Ko-Usagi PTE Ltd. kyo Kyocera Corporation @@ -168,6 +173,7 @@ lego LEGO Systems A/S lenovo Lenovo Group Ltd. lg LG Corporation licheepi Lichee Pi +linaro Linaro Limited linux Linux-specific binding lltc Linear Technology Corporation lsi LSI Corp. (LSI Logic) @@ -191,6 +197,7 @@ minix MINIX Technology Ltd. miramems MiraMEMS Sensing Technology Co., Ltd. mitsubishi Mitsubishi Electric Corporation mosaixtech Mosaix Technologies, Inc. +motorola Motorola, Inc. moxa Moxa mpl MPL AG mqmaker mqmaker Inc. @@ -213,6 +220,7 @@ newhaven Newhaven Display International ni National Instruments nintendo Nintendo nokia Nokia +nordic Nordic Semiconductor nuvoton Nuvoton Technology Corporation nvd New Vision Display nvidia NVIDIA @@ -259,6 +267,7 @@ richtek Richtek Technology Corporation ricoh Ricoh Co. Ltd. rikomagic Rikomagic Tech Corp. Ltd rockchip Fuzhou Rockchip Electronics Co., Ltd +rohm ROHM Semiconductor Co., Ltd samsung Samsung Semiconductor samtec Samtec/Softing company sandisk Sandisk Corporation @@ -266,6 +275,7 @@ sbs Smart Battery System schindler Schindler seagate Seagate Technology PLC semtech Semtech Corporation +sensirion Sensirion AG sgx SGX Sensortech sharp Sharp Corporation si-en Si-En Technology Ltd. diff --git a/Documentation/devicetree/bindings/watchdog/cortina,gemini-watchdog.txt b/Documentation/devicetree/bindings/watchdog/cortina,gemini-watchdog.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc4b865d178b4f386ef9fdb9bb4f81e14816d214 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/cortina,gemini-watchdog.txt @@ -0,0 +1,17 @@ +Cortina Systems Gemini SoC Watchdog + +Required properties: +- compatible : must be "cortina,gemini-watchdog" +- reg : shall contain base register location and length +- interrupts : shall contain the interrupt for the watchdog + +Optional properties: +- timeout-sec : the default watchdog timeout in seconds. + +Example: + +watchdog@41000000 { + compatible = "cortina,gemini-watchdog"; + reg = <0x41000000 0x1000>; + interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; +}; diff --git a/Documentation/driver-api/80211/cfg80211.rst b/Documentation/driver-api/80211/cfg80211.rst index eca534ab617259a63bdf0d63f6a14319dedc7c0d..8ffac57e1f5b7ddd0b56547258f604d06816e9bd 100644 --- a/Documentation/driver-api/80211/cfg80211.rst +++ b/Documentation/driver-api/80211/cfg80211.rst @@ -2,6 +2,9 @@ cfg80211 subsystem ================== +.. kernel-doc:: include/net/cfg80211.h + :doc: Introduction + Device registration =================== @@ -179,6 +182,12 @@ Actions and configuration .. kernel-doc:: include/net/cfg80211.h :functions: cfg80211_ibss_joined +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_connect_resp_params + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_connect_done + .. kernel-doc:: include/net/cfg80211.h :functions: cfg80211_connect_result diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst index 935b9b8d456c8af606324f57c81a5ef4750ac6c6..472e7a664d13ce6ae6d09cc36ccdde60089de99a 100644 --- a/Documentation/driver-api/basics.rst +++ b/Documentation/driver-api/basics.rst @@ -7,6 +7,12 @@ Driver Entry and Exit points .. kernel-doc:: include/linux/init.h :internal: +Driver device table +------------------- + +.. kernel-doc:: include/linux/mod_devicetable.h + :internal: + Atomic and pointer manipulation ------------------------------- diff --git a/Documentation/driver-api/firmware/index.rst b/Documentation/driver-api/firmware/index.rst index 1abe0179303108008b90e9dba1b9c06e0d28fd86..29da39ec4b8a4c5ab60145ee4b3c72684e828ee6 100644 --- a/Documentation/driver-api/firmware/index.rst +++ b/Documentation/driver-api/firmware/index.rst @@ -7,6 +7,7 @@ Linux Firmware API introduction core request_firmware + other_interfaces .. only:: subproject and html diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst new file mode 100644 index 0000000000000000000000000000000000000000..36c47b1e98242ce16bbfea4050ef2e8a2341fed9 --- /dev/null +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -0,0 +1,15 @@ +Other Firmware Interfaces +========================= + +DMI Interfaces +-------------- + +.. kernel-doc:: drivers/firmware/dmi_scan.c + :export: + +EDD Interfaces +-------------- + +.. kernel-doc:: drivers/firmware/edd.c + :internal: + diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index 60db00d1532baede53509ce01eea748793d5264c..8058a87c1c74e1ba36c585dfb34bf4970841d412 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst @@ -26,7 +26,8 @@ available subsections can be seen below. regulator iio/index input - usb + usb/index + pci spi i2c hsi @@ -36,6 +37,7 @@ available subsections can be seen below. 80211/index uio-howto firmware/index + misc_devices .. only:: subproject and html diff --git a/Documentation/driver-api/misc_devices.rst b/Documentation/driver-api/misc_devices.rst new file mode 100644 index 0000000000000000000000000000000000000000..c7ee7b02ba889e5d6a6a72a15dc17974ba4303d9 --- /dev/null +++ b/Documentation/driver-api/misc_devices.rst @@ -0,0 +1,5 @@ +Miscellaneous Devices +===================== + +.. kernel-doc:: drivers/char/misc.c + :export: diff --git a/Documentation/driver-api/pci.rst b/Documentation/driver-api/pci.rst new file mode 100644 index 0000000000000000000000000000000000000000..01a6c8b7d3a7b59dcee9a11d828968e464af9c1c --- /dev/null +++ b/Documentation/driver-api/pci.rst @@ -0,0 +1,50 @@ +PCI Support Library +------------------- + +.. kernel-doc:: drivers/pci/pci.c + :export: + +.. kernel-doc:: drivers/pci/pci-driver.c + :export: + +.. kernel-doc:: drivers/pci/remove.c + :export: + +.. kernel-doc:: drivers/pci/search.c + :export: + +.. kernel-doc:: drivers/pci/msi.c + :export: + +.. kernel-doc:: drivers/pci/bus.c + :export: + +.. kernel-doc:: drivers/pci/access.c + :export: + +.. kernel-doc:: drivers/pci/irq.c + :export: + +.. kernel-doc:: drivers/pci/htirq.c + :export: + +.. kernel-doc:: drivers/pci/probe.c + :export: + +.. kernel-doc:: drivers/pci/slot.c + :export: + +.. kernel-doc:: drivers/pci/rom.c + :export: + +.. kernel-doc:: drivers/pci/iov.c + :export: + +.. kernel-doc:: drivers/pci/pci-sysfs.c + :internal: + +PCI Hotplug Support Library +--------------------------- + +.. kernel-doc:: drivers/pci/hotplug/pci_hotplug_core.c + :export: diff --git a/Documentation/driver-api/usb.rst b/Documentation/driver-api/usb.rst deleted file mode 100644 index 851cc40b66b523d8778e968bc0a6a9f459ac2b9a..0000000000000000000000000000000000000000 --- a/Documentation/driver-api/usb.rst +++ /dev/null @@ -1,748 +0,0 @@ -=========================== -The Linux-USB Host Side API -=========================== - -Introduction to USB on Linux -============================ - -A Universal Serial Bus (USB) is used to connect a host, such as a PC or -workstation, to a number of peripheral devices. USB uses a tree -structure, with the host as the root (the system's master), hubs as -interior nodes, and peripherals as leaves (and slaves). Modern PCs -support several such trees of USB devices, usually -a few USB 3.0 (5 GBit/s) or USB 3.1 (10 GBit/s) and some legacy -USB 2.0 (480 MBit/s) busses just in case. - -That master/slave asymmetry was designed-in for a number of reasons, one -being ease of use. It is not physically possible to mistake upstream and -downstream or it does not matter with a type C plug (or they are built into the -peripheral). Also, the host software doesn't need to deal with -distributed auto-configuration since the pre-designated master node -manages all that. - -Kernel developers added USB support to Linux early in the 2.2 kernel -series and have been developing it further since then. Besides support -for each new generation of USB, various host controllers gained support, -new drivers for peripherals have been added and advanced features for latency -measurement and improved power management introduced. - -Linux can run inside USB devices as well as on the hosts that control -the devices. But USB device drivers running inside those peripherals -don't do the same things as the ones running inside hosts, so they've -been given a different name: *gadget drivers*. This document does not -cover gadget drivers. - -USB Host-Side API Model -======================= - -Host-side drivers for USB devices talk to the "usbcore" APIs. There are -two. One is intended for *general-purpose* drivers (exposed through -driver frameworks), and the other is for drivers that are *part of the -core*. Such core drivers include the *hub* driver (which manages trees -of USB devices) and several different kinds of *host controller -drivers*, which control individual busses. - -The device model seen by USB drivers is relatively complex. - -- USB supports four kinds of data transfers (control, bulk, interrupt, - and isochronous). Two of them (control and bulk) use bandwidth as - it's available, while the other two (interrupt and isochronous) are - scheduled to provide guaranteed bandwidth. - -- The device description model includes one or more "configurations" - per device, only one of which is active at a time. Devices are supposed - to be capable of operating at lower than their top - speeds and may provide a BOS descriptor showing the lowest speed they - remain fully operational at. - -- From USB 3.0 on configurations have one or more "functions", which - provide a common functionality and are grouped together for purposes - of power management. - -- Configurations or functions have one or more "interfaces", each of which may have - "alternate settings". Interfaces may be standardized by USB "Class" - specifications, or may be specific to a vendor or device. - - USB device drivers actually bind to interfaces, not devices. Think of - them as "interface drivers", though you may not see many devices - where the distinction is important. *Most USB devices are simple, - with only one function, one configuration, one interface, and one alternate - setting.* - -- Interfaces have one or more "endpoints", each of which supports one - type and direction of data transfer such as "bulk out" or "interrupt - in". The entire configuration may have up to sixteen endpoints in - each direction, allocated as needed among all the interfaces. - -- Data transfer on USB is packetized; each endpoint has a maximum - packet size. Drivers must often be aware of conventions such as - flagging the end of bulk transfers using "short" (including zero - length) packets. - -- The Linux USB API supports synchronous calls for control and bulk - messages. It also supports asynchronous calls for all kinds of data - transfer, using request structures called "URBs" (USB Request - Blocks). - -Accordingly, the USB Core API exposed to device drivers covers quite a -lot of territory. You'll probably need to consult the USB 3.0 -specification, available online from www.usb.org at no cost, as well as -class or device specifications. - -The only host-side drivers that actually touch hardware (reading/writing -registers, handling IRQs, and so on) are the HCDs. In theory, all HCDs -provide the same functionality through the same API. In practice, that's -becoming more true, but there are still differences -that crop up especially with fault handling on the less common controllers. -Different controllers don't -necessarily report the same aspects of failures, and recovery from -faults (including software-induced ones like unlinking an URB) isn't yet -fully consistent. Device driver authors should make a point of doing -disconnect testing (while the device is active) with each different host -controller driver, to make sure drivers don't have bugs of their own as -well as to make sure they aren't relying on some HCD-specific behavior. - -USB-Standard Types -================== - -In ```` you will find the USB data types defined in -chapter 9 of the USB specification. These data types are used throughout -USB, and in APIs including this host side API, gadget APIs, and usbfs. - -.. kernel-doc:: include/linux/usb/ch9.h - :internal: - -Host-Side Data Types and Macros -=============================== - -The host side API exposes several layers to drivers, some of which are -more necessary than others. These support lifecycle models for host side -drivers and devices, and support passing buffers through usbcore to some -HCD that performs the I/O for the device driver. - -.. kernel-doc:: include/linux/usb.h - :internal: - -USB Core APIs -============= - -There are two basic I/O models in the USB API. The most elemental one is -asynchronous: drivers submit requests in the form of an URB, and the -URB's completion callback handles the next step. All USB transfer types -support that model, although there are special cases for control URBs -(which always have setup and status stages, but may not have a data -stage) and isochronous URBs (which allow large packets and include -per-packet fault reports). Built on top of that is synchronous API -support, where a driver calls a routine that allocates one or more URBs, -submits them, and waits until they complete. There are synchronous -wrappers for single-buffer control and bulk transfers (which are awkward -to use in some driver disconnect scenarios), and for scatterlist based -streaming i/o (bulk or interrupt). - -USB drivers need to provide buffers that can be used for DMA, although -they don't necessarily need to provide the DMA mapping themselves. There -are APIs to use used when allocating DMA buffers, which can prevent use -of bounce buffers on some systems. In some cases, drivers may be able to -rely on 64bit DMA to eliminate another kind of bounce buffer. - -.. kernel-doc:: drivers/usb/core/urb.c - :export: - -.. kernel-doc:: drivers/usb/core/message.c - :export: - -.. kernel-doc:: drivers/usb/core/file.c - :export: - -.. kernel-doc:: drivers/usb/core/driver.c - :export: - -.. kernel-doc:: drivers/usb/core/usb.c - :export: - -.. kernel-doc:: drivers/usb/core/hub.c - :export: - -Host Controller APIs -==================== - -These APIs are only for use by host controller drivers, most of which -implement standard register interfaces such as XHCI, EHCI, OHCI, or UHCI. UHCI -was one of the first interfaces, designed by Intel and also used by VIA; -it doesn't do much in hardware. OHCI was designed later, to have the -hardware do more work (bigger transfers, tracking protocol state, and so -on). EHCI was designed with USB 2.0; its design has features that -resemble OHCI (hardware does much more work) as well as UHCI (some parts -of ISO support, TD list processing). XHCI was designed with USB 3.0. It -continues to shift support for functionality into hardware. - -There are host controllers other than the "big three", although most PCI -based controllers (and a few non-PCI based ones) use one of those -interfaces. Not all host controllers use DMA; some use PIO, and there is -also a simulator and a virtual host controller to pipe USB over the network. - -The same basic APIs are available to drivers for all those controllers. -For historical reasons they are in two layers: :c:type:`struct -usb_bus ` is a rather thin layer that became available -in the 2.2 kernels, while :c:type:`struct usb_hcd ` -is a more featureful layer -that lets HCDs share common code, to shrink driver size and -significantly reduce hcd-specific behaviors. - -.. kernel-doc:: drivers/usb/core/hcd.c - :export: - -.. kernel-doc:: drivers/usb/core/hcd-pci.c - :export: - -.. kernel-doc:: drivers/usb/core/buffer.c - :internal: - -The USB Filesystem (usbfs) -========================== - -This chapter presents the Linux *usbfs*. You may prefer to avoid writing -new kernel code for your USB driver; that's the problem that usbfs set -out to solve. User mode device drivers are usually packaged as -applications or libraries, and may use usbfs through some programming -library that wraps it. Such libraries include -`libusb `__ for C/C++, and -`jUSB `__ for Java. - - **Note** - - This particular documentation is incomplete, especially with respect - to the asynchronous mode. As of kernel 2.5.66 the code and this - (new) documentation need to be cross-reviewed. - -Configure usbfs into Linux kernels by enabling the *USB filesystem* -option (CONFIG_USB_DEVICEFS), and you get basic support for user mode -USB device drivers. Until relatively recently it was often (confusingly) -called *usbdevfs* although it wasn't solving what *devfs* was. Every USB -device will appear in usbfs, regardless of whether or not it has a -kernel driver. - -What files are in "usbfs"? --------------------------- - -Conventionally mounted at ``/proc/bus/usb``, usbfs features include: - -- ``/proc/bus/usb/devices`` ... a text file showing each of the USB - devices on known to the kernel, and their configuration descriptors. - You can also poll() this to learn about new devices. - -- ``/proc/bus/usb/BBB/DDD`` ... magic files exposing the each device's - configuration descriptors, and supporting a series of ioctls for - making device requests, including I/O to devices. (Purely for access - by programs.) - -Each bus is given a number (BBB) based on when it was enumerated; within -each bus, each device is given a similar number (DDD). Those BBB/DDD -paths are not "stable" identifiers; expect them to change even if you -always leave the devices plugged in to the same hub port. *Don't even -think of saving these in application configuration files.* Stable -identifiers are available, for user mode applications that want to use -them. HID and networking devices expose these stable IDs, so that for -example you can be sure that you told the right UPS to power down its -second server. "usbfs" doesn't (yet) expose those IDs. - -Mounting and Access Control ---------------------------- - -There are a number of mount options for usbfs, which will be of most -interest to you if you need to override the default access control -policy. That policy is that only root may read or write device files -(``/proc/bus/BBB/DDD``) although anyone may read the ``devices`` or -``drivers`` files. I/O requests to the device also need the -CAP_SYS_RAWIO capability, - -The significance of that is that by default, all user mode device -drivers need super-user privileges. You can change modes or ownership in -a driver setup when the device hotplugs, or maye just start the driver -right then, as a privileged server (or some activity within one). That's -the most secure approach for multi-user systems, but for single user -systems ("trusted" by that user) it's more convenient just to grant -everyone all access (using the *devmode=0666* option) so the driver can -start whenever it's needed. - -The mount options for usbfs, usable in /etc/fstab or in command line -invocations of *mount*, are: - -*busgid*\ =NNNNN - Controls the GID used for the /proc/bus/usb/BBB directories. - (Default: 0) - -*busmode*\ =MMM - Controls the file mode used for the /proc/bus/usb/BBB directories. - (Default: 0555) - -*busuid*\ =NNNNN - Controls the UID used for the /proc/bus/usb/BBB directories. - (Default: 0) - -*devgid*\ =NNNNN - Controls the GID used for the /proc/bus/usb/BBB/DDD files. (Default: - 0) - -*devmode*\ =MMM - Controls the file mode used for the /proc/bus/usb/BBB/DDD files. - (Default: 0644) - -*devuid*\ =NNNNN - Controls the UID used for the /proc/bus/usb/BBB/DDD files. (Default: - 0) - -*listgid*\ =NNNNN - Controls the GID used for the /proc/bus/usb/devices and drivers - files. (Default: 0) - -*listmode*\ =MMM - Controls the file mode used for the /proc/bus/usb/devices and - drivers files. (Default: 0444) - -*listuid*\ =NNNNN - Controls the UID used for the /proc/bus/usb/devices and drivers - files. (Default: 0) - -Note that many Linux distributions hard-wire the mount options for usbfs -in their init scripts, such as ``/etc/rc.d/rc.sysinit``, rather than -making it easy to set this per-system policy in ``/etc/fstab``. - -/proc/bus/usb/devices ---------------------- - -This file is handy for status viewing tools in user mode, which can scan -the text format and ignore most of it. More detailed device status -(including class and vendor status) is available from device-specific -files. For information about the current format of this file, see the -``Documentation/usb/proc_usb_info.txt`` file in your Linux kernel -sources. - -This file, in combination with the poll() system call, can also be used -to detect when devices are added or removed: - -:: - - int fd; - struct pollfd pfd; - - fd = open("/proc/bus/usb/devices", O_RDONLY); - pfd = { fd, POLLIN, 0 }; - for (;;) { - /* The first time through, this call will return immediately. */ - poll(&pfd, 1, -1); - - /* To see what's changed, compare the file's previous and current - contents or scan the filesystem. (Scanning is more precise.) */ - } - -Note that this behavior is intended to be used for informational and -debug purposes. It would be more appropriate to use programs such as -udev or HAL to initialize a device or start a user-mode helper program, -for instance. - -/proc/bus/usb/BBB/DDD ---------------------- - -Use these files in one of these basic ways: - -*They can be read,* producing first the device descriptor (18 bytes) and -then the descriptors for the current configuration. See the USB 2.0 spec -for details about those binary data formats. You'll need to convert most -multibyte values from little endian format to your native host byte -order, although a few of the fields in the device descriptor (both of -the BCD-encoded fields, and the vendor and product IDs) will be -byteswapped for you. Note that configuration descriptors include -descriptors for interfaces, altsettings, endpoints, and maybe additional -class descriptors. - -*Perform USB operations* using *ioctl()* requests to make endpoint I/O -requests (synchronously or asynchronously) or manage the device. These -requests need the CAP_SYS_RAWIO capability, as well as filesystem -access permissions. Only one ioctl request can be made on one of these -device files at a time. This means that if you are synchronously reading -an endpoint from one thread, you won't be able to write to a different -endpoint from another thread until the read completes. This works for -*half duplex* protocols, but otherwise you'd use asynchronous i/o -requests. - -Life Cycle of User Mode Drivers -------------------------------- - -Such a driver first needs to find a device file for a device it knows -how to handle. Maybe it was told about it because a ``/sbin/hotplug`` -event handling agent chose that driver to handle the new device. Or -maybe it's an application that scans all the /proc/bus/usb device files, -and ignores most devices. In either case, it should :c:func:`read()` -all the descriptors from the device file, and check them against what it -knows how to handle. It might just reject everything except a particular -vendor and product ID, or need a more complex policy. - -Never assume there will only be one such device on the system at a time! -If your code can't handle more than one device at a time, at least -detect when there's more than one, and have your users choose which -device to use. - -Once your user mode driver knows what device to use, it interacts with -it in either of two styles. The simple style is to make only control -requests; some devices don't need more complex interactions than those. -(An example might be software using vendor-specific control requests for -some initialization or configuration tasks, with a kernel driver for the -rest.) - -More likely, you need a more complex style driver: one using non-control -endpoints, reading or writing data and claiming exclusive use of an -interface. *Bulk* transfers are easiest to use, but only their sibling -*interrupt* transfers work with low speed devices. Both interrupt and -*isochronous* transfers offer service guarantees because their bandwidth -is reserved. Such "periodic" transfers are awkward to use through usbfs, -unless you're using the asynchronous calls. However, interrupt transfers -can also be used in a synchronous "one shot" style. - -Your user-mode driver should never need to worry about cleaning up -request state when the device is disconnected, although it should close -its open file descriptors as soon as it starts seeing the ENODEV errors. - -The ioctl() Requests --------------------- - -To use these ioctls, you need to include the following headers in your -userspace program: - -:: - - #include - #include - #include - -The standard USB device model requests, from "Chapter 9" of the USB 2.0 -specification, are automatically included from the ```` -header. - -Unless noted otherwise, the ioctl requests described here will update -the modification time on the usbfs file to which they are applied -(unless they fail). A return of zero indicates success; otherwise, a -standard USB error code is returned. (These are documented in -``Documentation/usb/error-codes.txt`` in your kernel sources.) - -Each of these files multiplexes access to several I/O streams, one per -endpoint. Each device has one control endpoint (endpoint zero) which -supports a limited RPC style RPC access. Devices are configured by -hub_wq (in the kernel) setting a device-wide *configuration* that -affects things like power consumption and basic functionality. The -endpoints are part of USB *interfaces*, which may have *altsettings* -affecting things like which endpoints are available. Many devices only -have a single configuration and interface, so drivers for them will -ignore configurations and altsettings. - -Management/Status Requests -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A number of usbfs requests don't deal very directly with device I/O. -They mostly relate to device management and status. These are all -synchronous requests. - -USBDEVFS_CLAIMINTERFACE - This is used to force usbfs to claim a specific interface, which has - not previously been claimed by usbfs or any other kernel driver. The - ioctl parameter is an integer holding the number of the interface - (bInterfaceNumber from descriptor). - - Note that if your driver doesn't claim an interface before trying to - use one of its endpoints, and no other driver has bound to it, then - the interface is automatically claimed by usbfs. - - This claim will be released by a RELEASEINTERFACE ioctl, or by - closing the file descriptor. File modification time is not updated - by this request. - -USBDEVFS_CONNECTINFO - Says whether the device is lowspeed. The ioctl parameter points to a - structure like this: - - :: - - struct usbdevfs_connectinfo { - unsigned int devnum; - unsigned char slow; - }; - - File modification time is not updated by this request. - - *You can't tell whether a "not slow" device is connected at high - speed (480 MBit/sec) or just full speed (12 MBit/sec).* You should - know the devnum value already, it's the DDD value of the device file - name. - -USBDEVFS_GETDRIVER - Returns the name of the kernel driver bound to a given interface (a - string). Parameter is a pointer to this structure, which is - modified: - - :: - - struct usbdevfs_getdriver { - unsigned int interface; - char driver[USBDEVFS_MAXDRIVERNAME + 1]; - }; - - File modification time is not updated by this request. - -USBDEVFS_IOCTL - Passes a request from userspace through to a kernel driver that has - an ioctl entry in the *struct usb_driver* it registered. - - :: - - struct usbdevfs_ioctl { - int ifno; - int ioctl_code; - void *data; - }; - - /* user mode call looks like this. - * 'request' becomes the driver->ioctl() 'code' parameter. - * the size of 'param' is encoded in 'request', and that data - * is copied to or from the driver->ioctl() 'buf' parameter. - */ - static int - usbdev_ioctl (int fd, int ifno, unsigned request, void *param) - { - struct usbdevfs_ioctl wrapper; - - wrapper.ifno = ifno; - wrapper.ioctl_code = request; - wrapper.data = param; - - return ioctl (fd, USBDEVFS_IOCTL, &wrapper); - } - - File modification time is not updated by this request. - - This request lets kernel drivers talk to user mode code through - filesystem operations even when they don't create a character or - block special device. It's also been used to do things like ask - devices what device special file should be used. Two pre-defined - ioctls are used to disconnect and reconnect kernel drivers, so that - user mode code can completely manage binding and configuration of - devices. - -USBDEVFS_RELEASEINTERFACE - This is used to release the claim usbfs made on interface, either - implicitly or because of a USBDEVFS_CLAIMINTERFACE call, before the - file descriptor is closed. The ioctl parameter is an integer holding - the number of the interface (bInterfaceNumber from descriptor); File - modification time is not updated by this request. - - **Warning** - - *No security check is made to ensure that the task which made - the claim is the one which is releasing it. This means that user - mode driver may interfere other ones.* - -USBDEVFS_RESETEP - Resets the data toggle value for an endpoint (bulk or interrupt) to - DATA0. The ioctl parameter is an integer endpoint number (1 to 15, - as identified in the endpoint descriptor), with USB_DIR_IN added - if the device's endpoint sends data to the host. - - **Warning** - - *Avoid using this request. It should probably be removed.* Using - it typically means the device and driver will lose toggle - synchronization. If you really lost synchronization, you likely - need to completely handshake with the device, using a request - like CLEAR_HALT or SET_INTERFACE. - -USBDEVFS_DROP_PRIVILEGES - This is used to relinquish the ability to do certain operations - which are considered to be privileged on a usbfs file descriptor. - This includes claiming arbitrary interfaces, resetting a device on - which there are currently claimed interfaces from other users, and - issuing USBDEVFS_IOCTL calls. The ioctl parameter is a 32 bit mask - of interfaces the user is allowed to claim on this file descriptor. - You may issue this ioctl more than one time to narrow said mask. - -Synchronous I/O Support -~~~~~~~~~~~~~~~~~~~~~~~ - -Synchronous requests involve the kernel blocking until the user mode -request completes, either by finishing successfully or by reporting an -error. In most cases this is the simplest way to use usbfs, although as -noted above it does prevent performing I/O to more than one endpoint at -a time. - -USBDEVFS_BULK - Issues a bulk read or write request to the device. The ioctl - parameter is a pointer to this structure: - - :: - - struct usbdevfs_bulktransfer { - unsigned int ep; - unsigned int len; - unsigned int timeout; /* in milliseconds */ - void *data; - }; - - The "ep" value identifies a bulk endpoint number (1 to 15, as - identified in an endpoint descriptor), masked with USB_DIR_IN when - referring to an endpoint which sends data to the host from the - device. The length of the data buffer is identified by "len"; Recent - kernels support requests up to about 128KBytes. *FIXME say how read - length is returned, and how short reads are handled.*. - -USBDEVFS_CLEAR_HALT - Clears endpoint halt (stall) and resets the endpoint toggle. This is - only meaningful for bulk or interrupt endpoints. The ioctl parameter - is an integer endpoint number (1 to 15, as identified in an endpoint - descriptor), masked with USB_DIR_IN when referring to an endpoint - which sends data to the host from the device. - - Use this on bulk or interrupt endpoints which have stalled, - returning *-EPIPE* status to a data transfer request. Do not issue - the control request directly, since that could invalidate the host's - record of the data toggle. - -USBDEVFS_CONTROL - Issues a control request to the device. The ioctl parameter points - to a structure like this: - - :: - - struct usbdevfs_ctrltransfer { - __u8 bRequestType; - __u8 bRequest; - __u16 wValue; - __u16 wIndex; - __u16 wLength; - __u32 timeout; /* in milliseconds */ - void *data; - }; - - The first eight bytes of this structure are the contents of the - SETUP packet to be sent to the device; see the USB 2.0 specification - for details. The bRequestType value is composed by combining a - USB_TYPE_\* value, a USB_DIR_\* value, and a USB_RECIP_\* - value (from **). If wLength is nonzero, it describes - the length of the data buffer, which is either written to the device - (USB_DIR_OUT) or read from the device (USB_DIR_IN). - - At this writing, you can't transfer more than 4 KBytes of data to or - from a device; usbfs has a limit, and some host controller drivers - have a limit. (That's not usually a problem.) *Also* there's no way - to say it's not OK to get a short read back from the device. - -USBDEVFS_RESET - Does a USB level device reset. The ioctl parameter is ignored. After - the reset, this rebinds all device interfaces. File modification - time is not updated by this request. - - **Warning** - - *Avoid using this call* until some usbcore bugs get fixed, since - it does not fully synchronize device, interface, and driver (not - just usbfs) state. - -USBDEVFS_SETINTERFACE - Sets the alternate setting for an interface. The ioctl parameter is - a pointer to a structure like this: - - :: - - struct usbdevfs_setinterface { - unsigned int interface; - unsigned int altsetting; - }; - - File modification time is not updated by this request. - - Those struct members are from some interface descriptor applying to - the current configuration. The interface number is the - bInterfaceNumber value, and the altsetting number is the - bAlternateSetting value. (This resets each endpoint in the - interface.) - -USBDEVFS_SETCONFIGURATION - Issues the :c:func:`usb_set_configuration()` call for the - device. The parameter is an integer holding the number of a - configuration (bConfigurationValue from descriptor). File - modification time is not updated by this request. - - **Warning** - - *Avoid using this call* until some usbcore bugs get fixed, since - it does not fully synchronize device, interface, and driver (not - just usbfs) state. - -Asynchronous I/O Support -~~~~~~~~~~~~~~~~~~~~~~~~ - -As mentioned above, there are situations where it may be important to -initiate concurrent operations from user mode code. This is particularly -important for periodic transfers (interrupt and isochronous), but it can -be used for other kinds of USB requests too. In such cases, the -asynchronous requests described here are essential. Rather than -submitting one request and having the kernel block until it completes, -the blocking is separate. - -These requests are packaged into a structure that resembles the URB used -by kernel device drivers. (No POSIX Async I/O support here, sorry.) It -identifies the endpoint type (USBDEVFS_URB_TYPE_\*), endpoint -(number, masked with USB_DIR_IN as appropriate), buffer and length, -and a user "context" value serving to uniquely identify each request. -(It's usually a pointer to per-request data.) Flags can modify requests -(not as many as supported for kernel drivers). - -Each request can specify a realtime signal number (between SIGRTMIN and -SIGRTMAX, inclusive) to request a signal be sent when the request -completes. - -When usbfs returns these urbs, the status value is updated, and the -buffer may have been modified. Except for isochronous transfers, the -actual_length is updated to say how many bytes were transferred; if the -USBDEVFS_URB_DISABLE_SPD flag is set ("short packets are not OK"), if -fewer bytes were read than were requested then you get an error report. - -:: - - struct usbdevfs_iso_packet_desc { - unsigned int length; - unsigned int actual_length; - unsigned int status; - }; - - struct usbdevfs_urb { - unsigned char type; - unsigned char endpoint; - int status; - unsigned int flags; - void *buffer; - int buffer_length; - int actual_length; - int start_frame; - int number_of_packets; - int error_count; - unsigned int signr; - void *usercontext; - struct usbdevfs_iso_packet_desc iso_frame_desc[]; - }; - -For these asynchronous requests, the file modification time reflects -when the request was initiated. This contrasts with their use with the -synchronous requests, where it reflects when requests complete. - -USBDEVFS_DISCARDURB - *TBS* File modification time is not updated by this request. - -USBDEVFS_DISCSIGNAL - *TBS* File modification time is not updated by this request. - -USBDEVFS_REAPURB - *TBS* File modification time is not updated by this request. - -USBDEVFS_REAPURBNDELAY - *TBS* File modification time is not updated by this request. - -USBDEVFS_SUBMITURB - *TBS* diff --git a/Documentation/driver-api/usb/URB.rst b/Documentation/driver-api/usb/URB.rst new file mode 100644 index 0000000000000000000000000000000000000000..61a54da9fce94f4d952fdf67c8a311db810cfac0 --- /dev/null +++ b/Documentation/driver-api/usb/URB.rst @@ -0,0 +1,290 @@ +.. _usb-urb: + +USB Request Block (URB) +~~~~~~~~~~~~~~~~~~~~~~~ + +:Revised: 2000-Dec-05 +:Again: 2002-Jul-06 +:Again: 2005-Sep-19 +:Again: 2017-Mar-29 + + +.. note:: + + The USB subsystem now has a substantial section at :ref:`usb-hostside-api` + section, generated from the current source code. + This particular documentation file isn't complete and may not be + updated to the last version; don't rely on it except for a quick + overview. + +Basic concept or 'What is an URB?' +================================== + +The basic idea of the new driver is message passing, the message itself is +called USB Request Block, or URB for short. + +- An URB consists of all relevant information to execute any USB transaction + and deliver the data and status back. + +- Execution of an URB is inherently an asynchronous operation, i.e. the + :c:func:`usb_submit_urb` call returns immediately after it has successfully + queued the requested action. + +- Transfers for one URB can be canceled with :c:func:`usb_unlink_urb` + at any time. + +- Each URB has a completion handler, which is called after the action + has been successfully completed or canceled. The URB also contains a + context-pointer for passing information to the completion handler. + +- Each endpoint for a device logically supports a queue of requests. + You can fill that queue, so that the USB hardware can still transfer + data to an endpoint while your driver handles completion of another. + This maximizes use of USB bandwidth, and supports seamless streaming + of data to (or from) devices when using periodic transfer modes. + + +The URB structure +================= + +Some of the fields in struct :c:type:`urb` are:: + + struct urb + { + // (IN) device and pipe specify the endpoint queue + struct usb_device *dev; // pointer to associated USB device + unsigned int pipe; // endpoint information + + unsigned int transfer_flags; // URB_ISO_ASAP, URB_SHORT_NOT_OK, etc. + + // (IN) all urbs need completion routines + void *context; // context for completion routine + usb_complete_t complete; // pointer to completion routine + + // (OUT) status after each completion + int status; // returned status + + // (IN) buffer used for data transfers + void *transfer_buffer; // associated data buffer + u32 transfer_buffer_length; // data buffer length + int number_of_packets; // size of iso_frame_desc + + // (OUT) sometimes only part of CTRL/BULK/INTR transfer_buffer is used + u32 actual_length; // actual data buffer length + + // (IN) setup stage for CTRL (pass a struct usb_ctrlrequest) + unsigned char *setup_packet; // setup packet (control only) + + // Only for PERIODIC transfers (ISO, INTERRUPT) + // (IN/OUT) start_frame is set unless URB_ISO_ASAP isn't set + int start_frame; // start frame + int interval; // polling interval + + // ISO only: packets are only "best effort"; each can have errors + int error_count; // number of errors + struct usb_iso_packet_descriptor iso_frame_desc[0]; + }; + +Your driver must create the "pipe" value using values from the appropriate +endpoint descriptor in an interface that it's claimed. + + +How to get an URB? +================== + +URBs are allocated by calling :c:func:`usb_alloc_urb`:: + + struct urb *usb_alloc_urb(int isoframes, int mem_flags) + +Return value is a pointer to the allocated URB, 0 if allocation failed. +The parameter isoframes specifies the number of isochronous transfer frames +you want to schedule. For CTRL/BULK/INT, use 0. The mem_flags parameter +holds standard memory allocation flags, letting you control (among other +things) whether the underlying code may block or not. + +To free an URB, use :c:func:`usb_free_urb`:: + + void usb_free_urb(struct urb *urb) + +You may free an urb that you've submitted, but which hasn't yet been +returned to you in a completion callback. It will automatically be +deallocated when it is no longer in use. + + +What has to be filled in? +========================= + +Depending on the type of transaction, there are some inline functions +defined in ``linux/usb.h`` to simplify the initialization, such as +:c:func:`usb_fill_control_urb`, :c:func:`usb_fill_bulk_urb` and +:c:func:`usb_fill_int_urb`. In general, they need the usb device pointer, +the pipe (usual format from usb.h), the transfer buffer, the desired transfer +length, the completion handler, and its context. Take a look at the some +existing drivers to see how they're used. + +Flags: + +- For ISO there are two startup behaviors: Specified start_frame or ASAP. +- For ASAP set ``URB_ISO_ASAP`` in transfer_flags. + +If short packets should NOT be tolerated, set ``URB_SHORT_NOT_OK`` in +transfer_flags. + + +How to submit an URB? +===================== + +Just call :c:func:`usb_submit_urb`:: + + int usb_submit_urb(struct urb *urb, int mem_flags) + +The ``mem_flags`` parameter, such as ``GFP_ATOMIC``, controls memory +allocation, such as whether the lower levels may block when memory is tight. + +It immediately returns, either with status 0 (request queued) or some +error code, usually caused by the following: + +- Out of memory (``-ENOMEM``) +- Unplugged device (``-ENODEV``) +- Stalled endpoint (``-EPIPE``) +- Too many queued ISO transfers (``-EAGAIN``) +- Too many requested ISO frames (``-EFBIG``) +- Invalid INT interval (``-EINVAL``) +- More than one packet for INT (``-EINVAL``) + +After submission, ``urb->status`` is ``-EINPROGRESS``; however, you should +never look at that value except in your completion callback. + +For isochronous endpoints, your completion handlers should (re)submit +URBs to the same endpoint with the ``URB_ISO_ASAP`` flag, using +multi-buffering, to get seamless ISO streaming. + + +How to cancel an already running URB? +===================================== + +There are two ways to cancel an URB you've submitted but which hasn't +been returned to your driver yet. For an asynchronous cancel, call +:c:func:`usb_unlink_urb`:: + + int usb_unlink_urb(struct urb *urb) + +It removes the urb from the internal list and frees all allocated +HW descriptors. The status is changed to reflect unlinking. Note +that the URB will not normally have finished when :c:func:`usb_unlink_urb` +returns; you must still wait for the completion handler to be called. + +To cancel an URB synchronously, call :c:func:`usb_kill_urb`:: + + void usb_kill_urb(struct urb *urb) + +It does everything :c:func:`usb_unlink_urb` does, and in addition it waits +until after the URB has been returned and the completion handler +has finished. It also marks the URB as temporarily unusable, so +that if the completion handler or anyone else tries to resubmit it +they will get a ``-EPERM`` error. Thus you can be sure that when +:c:func:`usb_kill_urb` returns, the URB is totally idle. + +There is a lifetime issue to consider. An URB may complete at any +time, and the completion handler may free the URB. If this happens +while :c:func:`usb_unlink_urb` or :c:func:`usb_kill_urb` is running, it will +cause a memory-access violation. The driver is responsible for avoiding this, +which often means some sort of lock will be needed to prevent the URB +from being deallocated while it is still in use. + +On the other hand, since usb_unlink_urb may end up calling the +completion handler, the handler must not take any lock that is held +when usb_unlink_urb is invoked. The general solution to this problem +is to increment the URB's reference count while holding the lock, then +drop the lock and call usb_unlink_urb or usb_kill_urb, and then +decrement the URB's reference count. You increment the reference +count by calling :c:func`usb_get_urb`:: + + struct urb *usb_get_urb(struct urb *urb) + +(ignore the return value; it is the same as the argument) and +decrement the reference count by calling :c:func:`usb_free_urb`. Of course, +none of this is necessary if there's no danger of the URB being freed +by the completion handler. + + +What about the completion handler? +================================== + +The handler is of the following type:: + + typedef void (*usb_complete_t)(struct urb *) + +I.e., it gets the URB that caused the completion call. In the completion +handler, you should have a look at ``urb->status`` to detect any USB errors. +Since the context parameter is included in the URB, you can pass +information to the completion handler. + +Note that even when an error (or unlink) is reported, data may have been +transferred. That's because USB transfers are packetized; it might take +sixteen packets to transfer your 1KByte buffer, and ten of them might +have transferred successfully before the completion was called. + + +.. warning:: + + NEVER SLEEP IN A COMPLETION HANDLER. + + These are often called in atomic context. + +In the current kernel, completion handlers run with local interrupts +disabled, but in the future this will be changed, so don't assume that +local IRQs are always disabled inside completion handlers. + +How to do isochronous (ISO) transfers? +====================================== + +Besides the fields present on a bulk transfer, for ISO, you also +also have to set ``urb->interval`` to say how often to make transfers; it's +often one per frame (which is once every microframe for highspeed devices). +The actual interval used will be a power of two that's no bigger than what +you specify. You can use the :c:func:`usb_fill_int_urb` macro to fill +most ISO transfer fields. + +For ISO transfers you also have to fill a :c:type:`usb_iso_packet_descriptor` +structure, allocated at the end of the URB by :c:func:`usb_alloc_urb`, for +each packet you want to schedule. + +The :c:func:`usb_submit_urb` call modifies ``urb->interval`` to the implemented +interval value that is less than or equal to the requested interval value. If +``URB_ISO_ASAP`` scheduling is used, ``urb->start_frame`` is also updated. + +For each entry you have to specify the data offset for this frame (base is +transfer_buffer), and the length you want to write/expect to read. +After completion, actual_length contains the actual transferred length and +status contains the resulting status for the ISO transfer for this frame. +It is allowed to specify a varying length from frame to frame (e.g. for +audio synchronisation/adaptive transfer rates). You can also use the length +0 to omit one or more frames (striping). + +For scheduling you can choose your own start frame or ``URB_ISO_ASAP``. As +explained earlier, if you always keep at least one URB queued and your +completion keeps (re)submitting a later URB, you'll get smooth ISO streaming +(if usb bandwidth utilization allows). + +If you specify your own start frame, make sure it's several frames in advance +of the current frame. You might want this model if you're synchronizing +ISO data with some other event stream. + + +How to start interrupt (INT) transfers? +======================================= + +Interrupt transfers, like isochronous transfers, are periodic, and happen +in intervals that are powers of two (1, 2, 4 etc) units. Units are frames +for full and low speed devices, and microframes for high speed ones. +You can use the :c:func:`usb_fill_int_urb` macro to fill INT transfer fields. + +The :c:func:`usb_submit_urb` call modifies ``urb->interval`` to the implemented +interval value that is less than or equal to the requested interval value. + +In Linux 2.6, unlike earlier versions, interrupt URBs are not automagically +restarted when they complete. They end when the completion handler is +called, just like other URBs. If you want an interrupt URB to be restarted, +your completion handler must resubmit it. +s diff --git a/Documentation/driver-api/usb/anchors.rst b/Documentation/driver-api/usb/anchors.rst new file mode 100644 index 0000000000000000000000000000000000000000..4b248e691bd6e76c838d84e3c778f6e06415c15e --- /dev/null +++ b/Documentation/driver-api/usb/anchors.rst @@ -0,0 +1,83 @@ +USB Anchors +~~~~~~~~~~~ + +What is anchor? +=============== + +A USB driver needs to support some callbacks requiring +a driver to cease all IO to an interface. To do so, a +driver has to keep track of the URBs it has submitted +to know they've all completed or to call usb_kill_urb +for them. The anchor is a data structure takes care of +keeping track of URBs and provides methods to deal with +multiple URBs. + +Allocation and Initialisation +============================= + +There's no API to allocate an anchor. It is simply declared +as struct usb_anchor. :c:func:`init_usb_anchor` must be called to +initialise the data structure. + +Deallocation +============ + +Once it has no more URBs associated with it, the anchor can be +freed with normal memory management operations. + +Association and disassociation of URBs with anchors +=================================================== + +An association of URBs to an anchor is made by an explicit +call to :c:func:`usb_anchor_urb`. The association is maintained until +an URB is finished by (successful) completion. Thus disassociation +is automatic. A function is provided to forcibly finish (kill) +all URBs associated with an anchor. +Furthermore, disassociation can be made with :c:func:`usb_unanchor_urb` + +Operations on multitudes of URBs +================================ + +:c:func:`usb_kill_anchored_urbs` +-------------------------------- + +This function kills all URBs associated with an anchor. The URBs +are called in the reverse temporal order they were submitted. +This way no data can be reordered. + +:c:func:`usb_unlink_anchored_urbs` +---------------------------------- + + +This function unlinks all URBs associated with an anchor. The URBs +are processed in the reverse temporal order they were submitted. +This is similar to :c:func:`usb_kill_anchored_urbs`, but it will not sleep. +Therefore no guarantee is made that the URBs have been unlinked when +the call returns. They may be unlinked later but will be unlinked in +finite time. + +:c:func:`usb_scuttle_anchored_urbs` +----------------------------------- + +All URBs of an anchor are unanchored en masse. + +:c:func:`usb_wait_anchor_empty_timeout` +--------------------------------------- + +This function waits for all URBs associated with an anchor to finish +or a timeout, whichever comes first. Its return value will tell you +whether the timeout was reached. + +:c:func:`usb_anchor_empty` +-------------------------- + +Returns true if no URBs are associated with an anchor. Locking +is the caller's responsibility. + +:c:func:`usb_get_from_anchor` +----------------------------- + +Returns the oldest anchored URB of an anchor. The URB is unanchored +and returned with a reference. As you may mix URBs to several +destinations in one anchor you have no guarantee the chronologically +first submitted URB is returned. diff --git a/Documentation/driver-api/usb/bulk-streams.rst b/Documentation/driver-api/usb/bulk-streams.rst new file mode 100644 index 0000000000000000000000000000000000000000..99b515babdeb2b41bc8053a562e77522b2403789 --- /dev/null +++ b/Documentation/driver-api/usb/bulk-streams.rst @@ -0,0 +1,83 @@ +USB bulk streams +~~~~~~~~~~~~~~~~ + +Background +========== + +Bulk endpoint streams were added in the USB 3.0 specification. Streams allow a +device driver to overload a bulk endpoint so that multiple transfers can be +queued at once. + +Streams are defined in sections 4.4.6.4 and 8.12.1.4 of the Universal Serial Bus +3.0 specification at http://www.usb.org/developers/docs/ The USB Attached SCSI +Protocol, which uses streams to queue multiple SCSI commands, can be found on +the T10 website (http://t10.org/). + + +Device-side implications +======================== + +Once a buffer has been queued to a stream ring, the device is notified (through +an out-of-band mechanism on another endpoint) that data is ready for that stream +ID. The device then tells the host which "stream" it wants to start. The host +can also initiate a transfer on a stream without the device asking, but the +device can refuse that transfer. Devices can switch between streams at any +time. + + +Driver implications +=================== + +:: + + int usb_alloc_streams(struct usb_interface *interface, + struct usb_host_endpoint **eps, unsigned int num_eps, + unsigned int num_streams, gfp_t mem_flags); + +Device drivers will call this API to request that the host controller driver +allocate memory so the driver can use up to num_streams stream IDs. They must +pass an array of usb_host_endpoints that need to be setup with similar stream +IDs. This is to ensure that a UASP driver will be able to use the same stream +ID for the bulk IN and OUT endpoints used in a Bi-directional command sequence. + +The return value is an error condition (if one of the endpoints doesn't support +streams, or the xHCI driver ran out of memory), or the number of streams the +host controller allocated for this endpoint. The xHCI host controller hardware +declares how many stream IDs it can support, and each bulk endpoint on a +SuperSpeed device will say how many stream IDs it can handle. Therefore, +drivers should be able to deal with being allocated less stream IDs than they +requested. + +Do NOT call this function if you have URBs enqueued for any of the endpoints +passed in as arguments. Do not call this function to request less than two +streams. + +Drivers will only be allowed to call this API once for the same endpoint +without calling usb_free_streams(). This is a simplification for the xHCI host +controller driver, and may change in the future. + + +Picking new Stream IDs to use +============================= + +Stream ID 0 is reserved, and should not be used to communicate with devices. If +usb_alloc_streams() returns with a value of N, you may use streams 1 though N. +To queue an URB for a specific stream, set the urb->stream_id value. If the +endpoint does not support streams, an error will be returned. + +Note that new API to choose the next stream ID will have to be added if the xHCI +driver supports secondary stream IDs. + + +Clean up +======== + +If a driver wishes to stop using streams to communicate with the device, it +should call:: + + void usb_free_streams(struct usb_interface *interface, + struct usb_host_endpoint **eps, unsigned int num_eps, + gfp_t mem_flags); + +All stream IDs will be deallocated when the driver releases the interface, to +ensure that drivers that don't support streams will be able to use the endpoint. diff --git a/Documentation/driver-api/usb/callbacks.rst b/Documentation/driver-api/usb/callbacks.rst new file mode 100644 index 0000000000000000000000000000000000000000..2b80cf54bcc3118b956a8292aeae8e3af80acf7a --- /dev/null +++ b/Documentation/driver-api/usb/callbacks.rst @@ -0,0 +1,157 @@ +USB core callbacks +~~~~~~~~~~~~~~~~~~ + +What callbacks will usbcore do? +=============================== + +Usbcore will call into a driver through callbacks defined in the driver +structure and through the completion handler of URBs a driver submits. +Only the former are in the scope of this document. These two kinds of +callbacks are completely independent of each other. Information on the +completion callback can be found in :ref:`usb-urb`. + +The callbacks defined in the driver structure are: + +1. Hotplugging callbacks: + + - @probe: + Called to see if the driver is willing to manage a particular + interface on a device. + + - @disconnect: + Called when the interface is no longer accessible, usually + because its device has been (or is being) disconnected or the + driver module is being unloaded. + +2. Odd backdoor through usbfs: + + - @ioctl: + Used for drivers that want to talk to userspace through + the "usbfs" filesystem. This lets devices provide ways to + expose information to user space regardless of where they + do (or don't) show up otherwise in the filesystem. + +3. Power management (PM) callbacks: + + - @suspend: + Called when the device is going to be suspended. + + - @resume: + Called when the device is being resumed. + + - @reset_resume: + Called when the suspended device has been reset instead + of being resumed. + +4. Device level operations: + + - @pre_reset: + Called when the device is about to be reset. + + - @post_reset: + Called after the device has been reset + +The ioctl interface (2) should be used only if you have a very good +reason. Sysfs is preferred these days. The PM callbacks are covered +separately in :ref:`usb-power-management`. + +Calling conventions +=================== + +All callbacks are mutually exclusive. There's no need for locking +against other USB callbacks. All callbacks are called from a task +context. You may sleep. However, it is important that all sleeps have a +small fixed upper limit in time. In particular you must not call out to +user space and await results. + +Hotplugging callbacks +===================== + +These callbacks are intended to associate and disassociate a driver with +an interface. A driver's bond to an interface is exclusive. + +The probe() callback +-------------------- + +:: + + int (*probe) (struct usb_interface *intf, + const struct usb_device_id *id); + +Accept or decline an interface. If you accept the device return 0, +otherwise -ENODEV or -ENXIO. Other error codes should be used only if a +genuine error occurred during initialisation which prevented a driver +from accepting a device that would else have been accepted. +You are strongly encouraged to use usbcore's facility, +usb_set_intfdata(), to associate a data structure with an interface, so +that you know which internal state and identity you associate with a +particular interface. The device will not be suspended and you may do IO +to the interface you are called for and endpoint 0 of the device. Device +initialisation that doesn't take too long is a good idea here. + +The disconnect() callback +------------------------- + +:: + + void (*disconnect) (struct usb_interface *intf); + +This callback is a signal to break any connection with an interface. +You are not allowed any IO to a device after returning from this +callback. You also may not do any other operation that may interfere +with another driver bound the interface, eg. a power management +operation. +If you are called due to a physical disconnection, all your URBs will be +killed by usbcore. Note that in this case disconnect will be called some +time after the physical disconnection. Thus your driver must be prepared +to deal with failing IO even prior to the callback. + +Device level callbacks +====================== + +pre_reset +--------- + +:: + + int (*pre_reset)(struct usb_interface *intf); + +A driver or user space is triggering a reset on the device which +contains the interface passed as an argument. Cease IO, wait for all +outstanding URBs to complete, and save any device state you need to +restore. No more URBs may be submitted until the post_reset method +is called. + +If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you +are in atomic context. + +post_reset +---------- + +:: + + int (*post_reset)(struct usb_interface *intf); + +The reset has completed. Restore any saved device state and begin +using the device again. + +If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you +are in atomic context. + +Call sequences +============== + +No callbacks other than probe will be invoked for an interface +that isn't bound to your driver. + +Probe will never be called for an interface bound to a driver. +Hence following a successful probe, disconnect will be called +before there is another probe for the same interface. + +Once your driver is bound to an interface, disconnect can be +called at any time except in between pre_reset and post_reset. +pre_reset is always followed by post_reset, even if the reset +failed or the device has been unplugged. + +suspend is always followed by one of: resume, reset_resume, or +disconnect. diff --git a/Documentation/driver-api/usb/dma.rst b/Documentation/driver-api/usb/dma.rst new file mode 100644 index 0000000000000000000000000000000000000000..59d5aee89e37e9dfd5a3b3019035fb3d1af2e5ed --- /dev/null +++ b/Documentation/driver-api/usb/dma.rst @@ -0,0 +1,136 @@ +USB DMA +~~~~~~~ + +In Linux 2.5 kernels (and later), USB device drivers have additional control +over how DMA may be used to perform I/O operations. The APIs are detailed +in the kernel usb programming guide (kerneldoc, from the source code). + +API overview +============ + +The big picture is that USB drivers can continue to ignore most DMA issues, +though they still must provide DMA-ready buffers (see +``Documentation/DMA-API-HOWTO.txt``). That's how they've worked through +the 2.4 (and earlier) kernels, or they can now be DMA-aware. + +DMA-aware usb drivers: + +- New calls enable DMA-aware drivers, letting them allocate dma buffers and + manage dma mappings for existing dma-ready buffers (see below). + +- URBs have an additional "transfer_dma" field, as well as a transfer_flags + bit saying if it's valid. (Control requests also have "setup_dma", but + drivers must not use it.) + +- "usbcore" will map this DMA address, if a DMA-aware driver didn't do + it first and set ``URB_NO_TRANSFER_DMA_MAP``. HCDs + don't manage dma mappings for URBs. + +- There's a new "generic DMA API", parts of which are usable by USB device + drivers. Never use dma_set_mask() on any USB interface or device; that + would potentially break all devices sharing that bus. + +Eliminating copies +================== + +It's good to avoid making CPUs copy data needlessly. The costs can add up, +and effects like cache-trashing can impose subtle penalties. + +- If you're doing lots of small data transfers from the same buffer all + the time, that can really burn up resources on systems which use an + IOMMU to manage the DMA mappings. It can cost MUCH more to set up and + tear down the IOMMU mappings with each request than perform the I/O! + + For those specific cases, USB has primitives to allocate less expensive + memory. They work like kmalloc and kfree versions that give you the right + kind of addresses to store in urb->transfer_buffer and urb->transfer_dma. + You'd also set ``URB_NO_TRANSFER_DMA_MAP`` in urb->transfer_flags:: + + void *usb_alloc_coherent (struct usb_device *dev, size_t size, + int mem_flags, dma_addr_t *dma); + + void usb_free_coherent (struct usb_device *dev, size_t size, + void *addr, dma_addr_t dma); + + Most drivers should **NOT** be using these primitives; they don't need + to use this type of memory ("dma-coherent"), and memory returned from + :c:func:`kmalloc` will work just fine. + + The memory buffer returned is "dma-coherent"; sometimes you might need to + force a consistent memory access ordering by using memory barriers. It's + not using a streaming DMA mapping, so it's good for small transfers on + systems where the I/O would otherwise thrash an IOMMU mapping. (See + ``Documentation/DMA-API-HOWTO.txt`` for definitions of "coherent" and + "streaming" DMA mappings.) + + Asking for 1/Nth of a page (as well as asking for N pages) is reasonably + space-efficient. + + On most systems the memory returned will be uncached, because the + semantics of dma-coherent memory require either bypassing CPU caches + or using cache hardware with bus-snooping support. While x86 hardware + has such bus-snooping, many other systems use software to flush cache + lines to prevent DMA conflicts. + +- Devices on some EHCI controllers could handle DMA to/from high memory. + + Unfortunately, the current Linux DMA infrastructure doesn't have a sane + way to expose these capabilities ... and in any case, HIGHMEM is mostly a + design wart specific to x86_32. So your best bet is to ensure you never + pass a highmem buffer into a USB driver. That's easy; it's the default + behavior. Just don't override it; e.g. with ``NETIF_F_HIGHDMA``. + + This may force your callers to do some bounce buffering, copying from + high memory to "normal" DMA memory. If you can come up with a good way + to fix this issue (for x86_32 machines with over 1 GByte of memory), + feel free to submit patches. + +Working with existing buffers +============================= + +Existing buffers aren't usable for DMA without first being mapped into the +DMA address space of the device. However, most buffers passed to your +driver can safely be used with such DMA mapping. (See the first section +of Documentation/DMA-API-HOWTO.txt, titled "What memory is DMA-able?") + +- When you're using scatterlists, you can map everything at once. On some + systems, this kicks in an IOMMU and turns the scatterlists into single + DMA transactions:: + + int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int nents); + + void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents); + + void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe, + struct scatterlist *sg, int n_hw_ents); + + It's probably easier to use the new ``usb_sg_*()`` calls, which do the DMA + mapping and apply other tweaks to make scatterlist i/o be fast. + +- Some drivers may prefer to work with the model that they're mapping large + buffers, synchronizing their safe re-use. (If there's no re-use, then let + usbcore do the map/unmap.) Large periodic transfers make good examples + here, since it's cheaper to just synchronize the buffer than to unmap it + each time an urb completes and then re-map it on during resubmission. + + These calls all work with initialized urbs: ``urb->dev``, ``urb->pipe``, + ``urb->transfer_buffer``, and ``urb->transfer_buffer_length`` must all be + valid when these calls are used (``urb->setup_packet`` must be valid too + if urb is a control request):: + + struct urb *usb_buffer_map (struct urb *urb); + + void usb_buffer_dmasync (struct urb *urb); + + void usb_buffer_unmap (struct urb *urb); + + The calls manage ``urb->transfer_dma`` for you, and set + ``URB_NO_TRANSFER_DMA_MAP`` so that usbcore won't map or unmap the buffer. + They cannot be used for setup_packet buffers in control requests. + +Note that several of those interfaces are currently commented out, since +they don't have current users. See the source code. Other than the dmasync +calls (where the underlying DMA primitives have changed), most of them can +easily be commented back in if you want to use them. diff --git a/Documentation/driver-api/usb/error-codes.rst b/Documentation/driver-api/usb/error-codes.rst new file mode 100644 index 0000000000000000000000000000000000000000..a3e84bfac77649bc62a73a3575cb56f3e1645c5d --- /dev/null +++ b/Documentation/driver-api/usb/error-codes.rst @@ -0,0 +1,207 @@ +.. _usb-error-codes: + +USB Error codes +~~~~~~~~~~~~~~~ + +:Revised: 2004-Oct-21 + +This is the documentation of (hopefully) all possible error codes (and +their interpretation) that can be returned from usbcore. + +Some of them are returned by the Host Controller Drivers (HCDs), which +device drivers only see through usbcore. As a rule, all the HCDs should +behave the same except for transfer speed dependent behaviors and the +way certain faults are reported. + + +Error codes returned by :c:func:`usb_submit_urb` +================================================ + +Non-USB-specific: + + +=============== =============================================== +0 URB submission went fine + +``-ENOMEM`` no memory for allocation of internal structures +=============== =============================================== + +USB-specific: + +======================= ======================================================= +``-EBUSY`` The URB is already active. + +``-ENODEV`` specified USB-device or bus doesn't exist + +``-ENOENT`` specified interface or endpoint does not exist or + is not enabled + +``-ENXIO`` host controller driver does not support queuing of + this type of urb. (treat as a host controller bug.) + +``-EINVAL`` a) Invalid transfer type specified (or not supported) + b) Invalid or unsupported periodic transfer interval + c) ISO: attempted to change transfer interval + d) ISO: ``number_of_packets`` is < 0 + e) various other cases + +``-EXDEV`` ISO: ``URB_ISO_ASAP`` wasn't specified and all the + frames the URB would be scheduled in have already + expired. + +``-EFBIG`` Host controller driver can't schedule that many ISO + frames. + +``-EPIPE`` The pipe type specified in the URB doesn't match the + endpoint's actual type. + +``-EMSGSIZE`` (a) endpoint maxpacket size is zero; it is not usable + in the current interface altsetting. + (b) ISO packet is larger than the endpoint maxpacket. + (c) requested data transfer length is invalid: negative + or too large for the host controller. + +``-ENOSPC`` This request would overcommit the usb bandwidth reserved + for periodic transfers (interrupt, isochronous). + +``-ESHUTDOWN`` The device or host controller has been disabled due to + some problem that could not be worked around. + +``-EPERM`` Submission failed because ``urb->reject`` was set. + +``-EHOSTUNREACH`` URB was rejected because the device is suspended. + +``-ENOEXEC`` A control URB doesn't contain a Setup packet. +======================= ======================================================= + +Error codes returned by ``in urb->status`` or in ``iso_frame_desc[n].status`` (for ISO) +======================================================================================= + +USB device drivers may only test urb status values in completion handlers. +This is because otherwise there would be a race between HCDs updating +these values on one CPU, and device drivers testing them on another CPU. + +A transfer's actual_length may be positive even when an error has been +reported. That's because transfers often involve several packets, so that +one or more packets could finish before an error stops further endpoint I/O. + +For isochronous URBs, the urb status value is non-zero only if the URB is +unlinked, the device is removed, the host controller is disabled, or the total +transferred length is less than the requested length and the +``URB_SHORT_NOT_OK`` flag is set. Completion handlers for isochronous URBs +should only see ``urb->status`` set to zero, ``-ENOENT``, ``-ECONNRESET``, +``-ESHUTDOWN``, or ``-EREMOTEIO``. Individual frame descriptor status fields +may report more status codes. + + +=============================== =============================================== +0 Transfer completed successfully + +``-ENOENT`` URB was synchronously unlinked by + :c:func:`usb_unlink_urb` + +``-EINPROGRESS`` URB still pending, no results yet + (That is, if drivers see this it's a bug.) + +``-EPROTO`` [#f1]_, [#f2]_ a) bitstuff error + b) no response packet received within the + prescribed bus turn-around time + c) unknown USB error + +``-EILSEQ`` [#f1]_, [#f2]_ a) CRC mismatch + b) no response packet received within the + prescribed bus turn-around time + c) unknown USB error + + Note that often the controller hardware does + not distinguish among cases a), b), and c), so + a driver cannot tell whether there was a + protocol error, a failure to respond (often + caused by device disconnect), or some other + fault. + +``-ETIME`` [#f2]_ No response packet received within the + prescribed bus turn-around time. This error + may instead be reported as + ``-EPROTO`` or ``-EILSEQ``. + +``-ETIMEDOUT`` Synchronous USB message functions use this code + to indicate timeout expired before the transfer + completed, and no other error was reported + by HC. + +``-EPIPE`` [#f2]_ Endpoint stalled. For non-control endpoints, + reset this status with + :c:func:`usb_clear_halt`. + +``-ECOMM`` During an IN transfer, the host controller + received data from an endpoint faster than it + could be written to system memory + +``-ENOSR`` During an OUT transfer, the host controller + could not retrieve data from system memory fast + enough to keep up with the USB data rate + +``-EOVERFLOW`` [#f1]_ The amount of data returned by the endpoint was + greater than either the max packet size of the + endpoint or the remaining buffer size. + "Babble". + +``-EREMOTEIO`` The data read from the endpoint did not fill + the specified buffer, and ``URB_SHORT_NOT_OK`` + was set in ``urb->transfer_flags``. + +``-ENODEV`` Device was removed. Often preceded by a burst + of other errors, since the hub driver doesn't + detect device removal events immediately. + +``-EXDEV`` ISO transfer only partially completed + (only set in ``iso_frame_desc[n].status``, + not ``urb->status``) + +``-EINVAL`` ISO madness, if this happens: Log off and + go home + +``-ECONNRESET`` URB was asynchronously unlinked by + :c:func:`usb_unlink_urb` + +``-ESHUTDOWN`` The device or host controller has been + disabled due to some problem that could not + be worked around, such as a physical + disconnect. +=============================== =============================================== + + +.. [#f1] + + Error codes like ``-EPROTO``, ``-EILSEQ`` and ``-EOVERFLOW`` normally + indicate hardware problems such as bad devices (including firmware) + or cables. + +.. [#f2] + + This is also one of several codes that different kinds of host + controller use to indicate a transfer has failed because of device + disconnect. In the interval before the hub driver starts disconnect + processing, devices may receive such fault reports for every request. + + + +Error codes returned by usbcore-functions +========================================= + +.. note:: expect also other submit and transfer status codes + +:c:func:`usb_register`: + +======================= =================================== +``-EINVAL`` error during registering new driver +======================= =================================== + +``usb_get_*/usb_set_*()``, +:c:func:`usb_control_msg`, +:c:func:`usb_bulk_msg()`: + +======================= ============================================== +``-ETIMEDOUT`` Timeout expired before the transfer completed. +======================= ============================================== diff --git a/Documentation/driver-api/usb/gadget.rst b/Documentation/driver-api/usb/gadget.rst new file mode 100644 index 0000000000000000000000000000000000000000..3e8a3809c0b82afb50551499b9ce090ce92f26b5 --- /dev/null +++ b/Documentation/driver-api/usb/gadget.rst @@ -0,0 +1,510 @@ +======================== +USB Gadget API for Linux +======================== + +:Author: David Brownell +:Date: 20 August 2004 + +Introduction +============ + +This document presents a Linux-USB "Gadget" kernel mode API, for use +within peripherals and other USB devices that embed Linux. It provides +an overview of the API structure, and shows how that fits into a system +development project. This is the first such API released on Linux to +address a number of important problems, including: + +- Supports USB 2.0, for high speed devices which can stream data at + several dozen megabytes per second. + +- Handles devices with dozens of endpoints just as well as ones with + just two fixed-function ones. Gadget drivers can be written so + they're easy to port to new hardware. + +- Flexible enough to expose more complex USB device capabilities such + as multiple configurations, multiple interfaces, composite devices, + and alternate interface settings. + +- USB "On-The-Go" (OTG) support, in conjunction with updates to the + Linux-USB host side. + +- Sharing data structures and API models with the Linux-USB host side + API. This helps the OTG support, and looks forward to more-symmetric + frameworks (where the same I/O model is used by both host and device + side drivers). + +- Minimalist, so it's easier to support new device controller hardware. + I/O processing doesn't imply large demands for memory or CPU + resources. + +Most Linux developers will not be able to use this API, since they have +USB ``host`` hardware in a PC, workstation, or server. Linux users with +embedded systems are more likely to have USB peripheral hardware. To +distinguish drivers running inside such hardware from the more familiar +Linux "USB device drivers", which are host side proxies for the real USB +devices, a different term is used: the drivers inside the peripherals +are "USB gadget drivers". In USB protocol interactions, the device +driver is the master (or "client driver") and the gadget driver is the +slave (or "function driver"). + +The gadget API resembles the host side Linux-USB API in that both use +queues of request objects to package I/O buffers, and those requests may +be submitted or canceled. They share common definitions for the standard +USB *Chapter 9* messages, structures, and constants. Also, both APIs +bind and unbind drivers to devices. The APIs differ in detail, since the +host side's current URB framework exposes a number of implementation +details and assumptions that are inappropriate for a gadget API. While +the model for control transfers and configuration management is +necessarily different (one side is a hardware-neutral master, the other +is a hardware-aware slave), the endpoint I/0 API used here should also +be usable for an overhead-reduced host side API. + +Structure of Gadget Drivers +=========================== + +A system running inside a USB peripheral normally has at least three +layers inside the kernel to handle USB protocol processing, and may have +additional layers in user space code. The ``gadget`` API is used by the +middle layer to interact with the lowest level (which directly handles +hardware). + +In Linux, from the bottom up, these layers are: + +*USB Controller Driver* + This is the lowest software level. It is the only layer that talks + to hardware, through registers, fifos, dma, irqs, and the like. The + ```` API abstracts the peripheral controller + endpoint hardware. That hardware is exposed through endpoint + objects, which accept streams of IN/OUT buffers, and through + callbacks that interact with gadget drivers. Since normal USB + devices only have one upstream port, they only have one of these + drivers. The controller driver can support any number of different + gadget drivers, but only one of them can be used at a time. + + Examples of such controller hardware include the PCI-based NetChip + 2280 USB 2.0 high speed controller, the SA-11x0 or PXA-25x UDC + (found within many PDAs), and a variety of other products. + +*Gadget Driver* + The lower boundary of this driver implements hardware-neutral USB + functions, using calls to the controller driver. Because such + hardware varies widely in capabilities and restrictions, and is used + in embedded environments where space is at a premium, the gadget + driver is often configured at compile time to work with endpoints + supported by one particular controller. Gadget drivers may be + portable to several different controllers, using conditional + compilation. (Recent kernels substantially simplify the work + involved in supporting new hardware, by *autoconfiguring* endpoints + automatically for many bulk-oriented drivers.) Gadget driver + responsibilities include: + + - handling setup requests (ep0 protocol responses) possibly + including class-specific functionality + + - returning configuration and string descriptors + + - (re)setting configurations and interface altsettings, including + enabling and configuring endpoints + + - handling life cycle events, such as managing bindings to + hardware, USB suspend/resume, remote wakeup, and disconnection + from the USB host. + + - managing IN and OUT transfers on all currently enabled endpoints + + Such drivers may be modules of proprietary code, although that + approach is discouraged in the Linux community. + +*Upper Level* + Most gadget drivers have an upper boundary that connects to some + Linux driver or framework in Linux. Through that boundary flows the + data which the gadget driver produces and/or consumes through + protocol transfers over USB. Examples include: + + - user mode code, using generic (gadgetfs) or application specific + files in ``/dev`` + + - networking subsystem (for network gadgets, like the CDC Ethernet + Model gadget driver) + + - data capture drivers, perhaps video4Linux or a scanner driver; or + test and measurement hardware. + + - input subsystem (for HID gadgets) + + - sound subsystem (for audio gadgets) + + - file system (for PTP gadgets) + + - block i/o subsystem (for usb-storage gadgets) + + - ... and more + +*Additional Layers* + Other layers may exist. These could include kernel layers, such as + network protocol stacks, as well as user mode applications building + on standard POSIX system call APIs such as ``open()``, ``close()``, + ``read()`` and ``write()``. On newer systems, POSIX Async I/O calls may + be an option. Such user mode code will not necessarily be subject to + the GNU General Public License (GPL). + +OTG-capable systems will also need to include a standard Linux-USB host +side stack, with ``usbcore``, one or more *Host Controller Drivers* +(HCDs), *USB Device Drivers* to support the OTG "Targeted Peripheral +List", and so forth. There will also be an *OTG Controller Driver*, +which is visible to gadget and device driver developers only indirectly. +That helps the host and device side USB controllers implement the two +new OTG protocols (HNP and SRP). Roles switch (host to peripheral, or +vice versa) using HNP during USB suspend processing, and SRP can be +viewed as a more battery-friendly kind of device wakeup protocol. + +Over time, reusable utilities are evolving to help make some gadget +driver tasks simpler. For example, building configuration descriptors +from vectors of descriptors for the configurations interfaces and +endpoints is now automated, and many drivers now use autoconfiguration +to choose hardware endpoints and initialize their descriptors. A +potential example of particular interest is code implementing standard +USB-IF protocols for HID, networking, storage, or audio classes. Some +developers are interested in KDB or KGDB hooks, to let target hardware +be remotely debugged. Most such USB protocol code doesn't need to be +hardware-specific, any more than network protocols like X11, HTTP, or +NFS are. Such gadget-side interface drivers should eventually be +combined, to implement composite devices. + +Kernel Mode Gadget API +====================== + +Gadget drivers declare themselves through a struct +:c:type:`usb_gadget_driver`, which is responsible for most parts of enumeration +for a struct :c:type:`usb_gadget`. The response to a set_configuration usually +involves enabling one or more of the struct :c:type:`usb_ep` objects exposed by +the gadget, and submitting one or more struct :c:type:`usb_request` buffers to +transfer data. Understand those four data types, and their operations, +and you will understand how this API works. + +.. Note:: + + Other than the "Chapter 9" data types, most of the significant data + types and functions are described here. + + However, some relevant information is likely omitted from what you + are reading. One example of such information is endpoint + autoconfiguration. You'll have to read the header file, and use + example source code (such as that for "Gadget Zero"), to fully + understand the API. + + The part of the API implementing some basic driver capabilities is + specific to the version of the Linux kernel that's in use. The 2.6 + and upper kernel versions include a *driver model* framework that has + no analogue on earlier kernels; so those parts of the gadget API are + not fully portable. (They are implemented on 2.4 kernels, but in a + different way.) The driver model state is another part of this API that is + ignored by the kerneldoc tools. + +The core API does not expose every possible hardware feature, only the +most widely available ones. There are significant hardware features, +such as device-to-device DMA (without temporary storage in a memory +buffer) that would be added using hardware-specific APIs. + +This API allows drivers to use conditional compilation to handle +endpoint capabilities of different hardware, but doesn't require that. +Hardware tends to have arbitrary restrictions, relating to transfer +types, addressing, packet sizes, buffering, and availability. As a rule, +such differences only matter for "endpoint zero" logic that handles +device configuration and management. The API supports limited run-time +detection of capabilities, through naming conventions for endpoints. +Many drivers will be able to at least partially autoconfigure +themselves. In particular, driver init sections will often have endpoint +autoconfiguration logic that scans the hardware's list of endpoints to +find ones matching the driver requirements (relying on those +conventions), to eliminate some of the most common reasons for +conditional compilation. + +Like the Linux-USB host side API, this API exposes the "chunky" nature +of USB messages: I/O requests are in terms of one or more "packets", and +packet boundaries are visible to drivers. Compared to RS-232 serial +protocols, USB resembles synchronous protocols like HDLC (N bytes per +frame, multipoint addressing, host as the primary station and devices as +secondary stations) more than asynchronous ones (tty style: 8 data bits +per frame, no parity, one stop bit). So for example the controller +drivers won't buffer two single byte writes into a single two-byte USB +IN packet, although gadget drivers may do so when they implement +protocols where packet boundaries (and "short packets") are not +significant. + +Driver Life Cycle +----------------- + +Gadget drivers make endpoint I/O requests to hardware without needing to +know many details of the hardware, but driver setup/configuration code +needs to handle some differences. Use the API like this: + +1. Register a driver for the particular device side usb controller + hardware, such as the net2280 on PCI (USB 2.0), sa11x0 or pxa25x as + found in Linux PDAs, and so on. At this point the device is logically + in the USB ch9 initial state (``attached``), drawing no power and not + usable (since it does not yet support enumeration). Any host should + not see the device, since it's not activated the data line pullup + used by the host to detect a device, even if VBUS power is available. + +2. Register a gadget driver that implements some higher level device + function. That will then bind() to a :c:type:`usb_gadget`, which activates + the data line pullup sometime after detecting VBUS. + +3. The hardware driver can now start enumerating. The steps it handles + are to accept USB ``power`` and ``set_address`` requests. Other steps are + handled by the gadget driver. If the gadget driver module is unloaded + before the host starts to enumerate, steps before step 7 are skipped. + +4. The gadget driver's ``setup()`` call returns usb descriptors, based both + on what the bus interface hardware provides and on the functionality + being implemented. That can involve alternate settings or + configurations, unless the hardware prevents such operation. For OTG + devices, each configuration descriptor includes an OTG descriptor. + +5. The gadget driver handles the last step of enumeration, when the USB + host issues a ``set_configuration`` call. It enables all endpoints used + in that configuration, with all interfaces in their default settings. + That involves using a list of the hardware's endpoints, enabling each + endpoint according to its descriptor. It may also involve using + ``usb_gadget_vbus_draw`` to let more power be drawn from VBUS, as + allowed by that configuration. For OTG devices, setting a + configuration may also involve reporting HNP capabilities through a + user interface. + +6. Do real work and perform data transfers, possibly involving changes + to interface settings or switching to new configurations, until the + device is disconnect()ed from the host. Queue any number of transfer + requests to each endpoint. It may be suspended and resumed several + times before being disconnected. On disconnect, the drivers go back + to step 3 (above). + +7. When the gadget driver module is being unloaded, the driver unbind() + callback is issued. That lets the controller driver be unloaded. + +Drivers will normally be arranged so that just loading the gadget driver +module (or statically linking it into a Linux kernel) allows the +peripheral device to be enumerated, but some drivers will defer +enumeration until some higher level component (like a user mode daemon) +enables it. Note that at this lowest level there are no policies about +how ep0 configuration logic is implemented, except that it should obey +USB specifications. Such issues are in the domain of gadget drivers, +including knowing about implementation constraints imposed by some USB +controllers or understanding that composite devices might happen to be +built by integrating reusable components. + +Note that the lifecycle above can be slightly different for OTG devices. +Other than providing an additional OTG descriptor in each configuration, +only the HNP-related differences are particularly visible to driver +code. They involve reporting requirements during the ``SET_CONFIGURATION`` +request, and the option to invoke HNP during some suspend callbacks. +Also, SRP changes the semantics of ``usb_gadget_wakeup`` slightly. + +USB 2.0 Chapter 9 Types and Constants +------------------------------------- + +Gadget drivers rely on common USB structures and constants defined in +the :ref:`linux/usb/ch9.h ` header file, which is standard in +Linux 2.6+ kernels. These are the same types and constants used by host side +drivers (and usbcore). + +Core Objects and Methods +------------------------ + +These are declared in ````, and are used by gadget +drivers to interact with USB peripheral controller drivers. + +.. kernel-doc:: include/linux/usb/gadget.h + :internal: + +Optional Utilities +------------------ + +The core API is sufficient for writing a USB Gadget Driver, but some +optional utilities are provided to simplify common tasks. These +utilities include endpoint autoconfiguration. + +.. kernel-doc:: drivers/usb/gadget/usbstring.c + :export: + +.. kernel-doc:: drivers/usb/gadget/config.c + :export: + +Composite Device Framework +-------------------------- + +The core API is sufficient for writing drivers for composite USB devices +(with more than one function in a given configuration), and also +multi-configuration devices (also more than one function, but not +necessarily sharing a given configuration). There is however an optional +framework which makes it easier to reuse and combine functions. + +Devices using this framework provide a struct :c:type:`usb_composite_driver`, +which in turn provides one or more struct :c:type:`usb_configuration` +instances. Each such configuration includes at least one struct +:c:type:`usb_function`, which packages a user visible role such as "network +link" or "mass storage device". Management functions may also exist, +such as "Device Firmware Upgrade". + +.. kernel-doc:: include/linux/usb/composite.h + :internal: + +.. kernel-doc:: drivers/usb/gadget/composite.c + :export: + +Composite Device Functions +-------------------------- + +At this writing, a few of the current gadget drivers have been converted +to this framework. Near-term plans include converting all of them, +except for ``gadgetfs``. + +Peripheral Controller Drivers +============================= + +The first hardware supporting this API was the NetChip 2280 controller, +which supports USB 2.0 high speed and is based on PCI. This is the +``net2280`` driver module. The driver supports Linux kernel versions 2.4 +and 2.6; contact NetChip Technologies for development boards and product +information. + +Other hardware working in the ``gadget`` framework includes: Intel's PXA +25x and IXP42x series processors (``pxa2xx_udc``), Toshiba TC86c001 +"Goku-S" (``goku_udc``), Renesas SH7705/7727 (``sh_udc``), MediaQ 11xx +(``mq11xx_udc``), Hynix HMS30C7202 (``h7202_udc``), National 9303/4 +(``n9604_udc``), Texas Instruments OMAP (``omap_udc``), Sharp LH7A40x +(``lh7a40x_udc``), and more. Most of those are full speed controllers. + +At this writing, there are people at work on drivers in this framework +for several other USB device controllers, with plans to make many of +them be widely available. + +A partial USB simulator, the ``dummy_hcd`` driver, is available. It can +act like a net2280, a pxa25x, or an sa11x0 in terms of available +endpoints and device speeds; and it simulates control, bulk, and to some +extent interrupt transfers. That lets you develop some parts of a gadget +driver on a normal PC, without any special hardware, and perhaps with +the assistance of tools such as GDB running with User Mode Linux. At +least one person has expressed interest in adapting that approach, +hooking it up to a simulator for a microcontroller. Such simulators can +help debug subsystems where the runtime hardware is unfriendly to +software development, or is not yet available. + +Support for other controllers is expected to be developed and +contributed over time, as this driver framework evolves. + +Gadget Drivers +============== + +In addition to *Gadget Zero* (used primarily for testing and development +with drivers for usb controller hardware), other gadget drivers exist. + +There's an ``ethernet`` gadget driver, which implements one of the most +useful *Communications Device Class* (CDC) models. One of the standards +for cable modem interoperability even specifies the use of this ethernet +model as one of two mandatory options. Gadgets using this code look to a +USB host as if they're an Ethernet adapter. It provides access to a +network where the gadget's CPU is one host, which could easily be +bridging, routing, or firewalling access to other networks. Since some +hardware can't fully implement the CDC Ethernet requirements, this +driver also implements a "good parts only" subset of CDC Ethernet. (That +subset doesn't advertise itself as CDC Ethernet, to avoid creating +problems.) + +Support for Microsoft's ``RNDIS`` protocol has been contributed by +Pengutronix and Auerswald GmbH. This is like CDC Ethernet, but it runs +on more slightly USB hardware (but less than the CDC subset). However, +its main claim to fame is being able to connect directly to recent +versions of Windows, using drivers that Microsoft bundles and supports, +making it much simpler to network with Windows. + +There is also support for user mode gadget drivers, using ``gadgetfs``. +This provides a *User Mode API* that presents each endpoint as a single +file descriptor. I/O is done using normal ``read()`` and ``read()`` calls. +Familiar tools like GDB and pthreads can be used to develop and debug +user mode drivers, so that once a robust controller driver is available +many applications for it won't require new kernel mode software. Linux +2.6 *Async I/O (AIO)* support is available, so that user mode software +can stream data with only slightly more overhead than a kernel driver. + +There's a USB Mass Storage class driver, which provides a different +solution for interoperability with systems such as MS-Windows and MacOS. +That *Mass Storage* driver uses a file or block device as backing store +for a drive, like the ``loop`` driver. The USB host uses the BBB, CB, or +CBI versions of the mass storage class specification, using transparent +SCSI commands to access the data from the backing store. + +There's a "serial line" driver, useful for TTY style operation over USB. +The latest version of that driver supports CDC ACM style operation, like +a USB modem, and so on most hardware it can interoperate easily with +MS-Windows. One interesting use of that driver is in boot firmware (like +a BIOS), which can sometimes use that model with very small systems +without real serial lines. + +Support for other kinds of gadget is expected to be developed and +contributed over time, as this driver framework evolves. + +USB On-The-GO (OTG) +=================== + +USB OTG support on Linux 2.6 was initially developed by Texas +Instruments for `OMAP `__ 16xx and 17xx series +processors. Other OTG systems should work in similar ways, but the +hardware level details could be very different. + +Systems need specialized hardware support to implement OTG, notably +including a special *Mini-AB* jack and associated transceiver to support +*Dual-Role* operation: they can act either as a host, using the standard +Linux-USB host side driver stack, or as a peripheral, using this +``gadget`` framework. To do that, the system software relies on small +additions to those programming interfaces, and on a new internal +component (here called an "OTG Controller") affecting which driver stack +connects to the OTG port. In each role, the system can re-use the +existing pool of hardware-neutral drivers, layered on top of the +controller driver interfaces (:c:type:`usb_bus` or :c:type:`usb_gadget`). +Such drivers need at most minor changes, and most of the calls added to +support OTG can also benefit non-OTG products. + +- Gadget drivers test the ``is_otg`` flag, and use it to determine + whether or not to include an OTG descriptor in each of their + configurations. + +- Gadget drivers may need changes to support the two new OTG protocols, + exposed in new gadget attributes such as ``b_hnp_enable`` flag. HNP + support should be reported through a user interface (two LEDs could + suffice), and is triggered in some cases when the host suspends the + peripheral. SRP support can be user-initiated just like remote + wakeup, probably by pressing the same button. + +- On the host side, USB device drivers need to be taught to trigger HNP + at appropriate moments, using ``usb_suspend_device()``. That also + conserves battery power, which is useful even for non-OTG + configurations. + +- Also on the host side, a driver must support the OTG "Targeted + Peripheral List". That's just a whitelist, used to reject peripherals + not supported with a given Linux OTG host. *This whitelist is + product-specific; each product must modify* ``otg_whitelist.h`` *to + match its interoperability specification.* + + Non-OTG Linux hosts, like PCs and workstations, normally have some + solution for adding drivers, so that peripherals that aren't + recognized can eventually be supported. That approach is unreasonable + for consumer products that may never have their firmware upgraded, + and where it's usually unrealistic to expect traditional + PC/workstation/server kinds of support model to work. For example, + it's often impractical to change device firmware once the product has + been distributed, so driver bugs can't normally be fixed if they're + found after shipment. + +Additional changes are needed below those hardware-neutral :c:type:`usb_bus` +and :c:type:`usb_gadget` driver interfaces; those aren't discussed here in any +detail. Those affect the hardware-specific code for each USB Host or +Peripheral controller, and how the HCD initializes (since OTG can be +active only on a single port). They also involve what may be called an +*OTG Controller Driver*, managing the OTG transceiver and the OTG state +machine logic as well as much of the root hub behavior for the OTG port. +The OTG controller driver needs to activate and deactivate USB +controllers depending on the relevant device role. Some related changes +were needed inside usbcore, so that it can identify OTG-capable devices +and respond appropriately to HNP or SRP protocols. diff --git a/Documentation/driver-api/usb/hotplug.rst b/Documentation/driver-api/usb/hotplug.rst new file mode 100644 index 0000000000000000000000000000000000000000..79663e653ca1cca947eadfd8c5349a62576ab3fb --- /dev/null +++ b/Documentation/driver-api/usb/hotplug.rst @@ -0,0 +1,154 @@ +USB hotplugging +~~~~~~~~~~~~~~~ + +Linux Hotplugging +================= + + +In hotpluggable busses like USB (and Cardbus PCI), end-users plug devices +into the bus with power on. In most cases, users expect the devices to become +immediately usable. That means the system must do many things, including: + + - Find a driver that can handle the device. That may involve + loading a kernel module; newer drivers can use module-init-tools + to publish their device (and class) support to user utilities. + + - Bind a driver to that device. Bus frameworks do that using a + device driver's probe() routine. + + - Tell other subsystems to configure the new device. Print + queues may need to be enabled, networks brought up, disk + partitions mounted, and so on. In some cases these will + be driver-specific actions. + +This involves a mix of kernel mode and user mode actions. Making devices +be immediately usable means that any user mode actions can't wait for an +administrator to do them: the kernel must trigger them, either passively +(triggering some monitoring daemon to invoke a helper program) or +actively (calling such a user mode helper program directly). + +Those triggered actions must support a system's administrative policies; +such programs are called "policy agents" here. Typically they involve +shell scripts that dispatch to more familiar administration tools. + +Because some of those actions rely on information about drivers (metadata) +that is currently available only when the drivers are dynamically linked, +you get the best hotplugging when you configure a highly modular system. + +Kernel Hotplug Helper (``/sbin/hotplug``) +========================================= + +There is a kernel parameter: ``/proc/sys/kernel/hotplug``, which normally +holds the pathname ``/sbin/hotplug``. That parameter names a program +which the kernel may invoke at various times. + +The /sbin/hotplug program can be invoked by any subsystem as part of its +reaction to a configuration change, from a thread in that subsystem. +Only one parameter is required: the name of a subsystem being notified of +some kernel event. That name is used as the first key for further event +dispatch; any other argument and environment parameters are specified by +the subsystem making that invocation. + +Hotplug software and other resources is available at: + + http://linux-hotplug.sourceforge.net + +Mailing list information is also available at that site. + + +USB Policy Agent +================ + +The USB subsystem currently invokes ``/sbin/hotplug`` when USB devices +are added or removed from system. The invocation is done by the kernel +hub workqueue [hub_wq], or else as part of root hub initialization +(done by init, modprobe, kapmd, etc). Its single command line parameter +is the string "usb", and it passes these environment variables: + +========== ============================================ +ACTION ``add``, ``remove`` +PRODUCT USB vendor, product, and version codes (hex) +TYPE device class codes (decimal) +INTERFACE interface 0 class codes (decimal) +========== ============================================ + +If "usbdevfs" is configured, DEVICE and DEVFS are also passed. DEVICE is +the pathname of the device, and is useful for devices with multiple and/or +alternate interfaces that complicate driver selection. By design, USB +hotplugging is independent of ``usbdevfs``: you can do most essential parts +of USB device setup without using that filesystem, and without running a +user mode daemon to detect changes in system configuration. + +Currently available policy agent implementations can load drivers for +modules, and can invoke driver-specific setup scripts. The newest ones +leverage USB module-init-tools support. Later agents might unload drivers. + + +USB Modutils Support +==================== + +Current versions of module-init-tools will create a ``modules.usbmap`` file +which contains the entries from each driver's ``MODULE_DEVICE_TABLE``. Such +files can be used by various user mode policy agents to make sure all the +right driver modules get loaded, either at boot time or later. + +See ``linux/usb.h`` for full information about such table entries; or look +at existing drivers. Each table entry describes one or more criteria to +be used when matching a driver to a device or class of devices. The +specific criteria are identified by bits set in "match_flags", paired +with field values. You can construct the criteria directly, or with +macros such as these, and use driver_info to store more information:: + + USB_DEVICE (vendorId, productId) + ... matching devices with specified vendor and product ids + USB_DEVICE_VER (vendorId, productId, lo, hi) + ... like USB_DEVICE with lo <= productversion <= hi + USB_INTERFACE_INFO (class, subclass, protocol) + ... matching specified interface class info + USB_DEVICE_INFO (class, subclass, protocol) + ... matching specified device class info + +A short example, for a driver that supports several specific USB devices +and their quirks, might have a MODULE_DEVICE_TABLE like this:: + + static const struct usb_device_id mydriver_id_table[] = { + { USB_DEVICE (0x9999, 0xaaaa), driver_info: QUIRK_X }, + { USB_DEVICE (0xbbbb, 0x8888), driver_info: QUIRK_Y|QUIRK_Z }, + ... + { } /* end with an all-zeroes entry */ + }; + MODULE_DEVICE_TABLE(usb, mydriver_id_table); + +Most USB device drivers should pass these tables to the USB subsystem as +well as to the module management subsystem. Not all, though: some driver +frameworks connect using interfaces layered over USB, and so they won't +need such a struct :c:type:`usb_driver`. + +Drivers that connect directly to the USB subsystem should be declared +something like this:: + + static struct usb_driver mydriver = { + .name = "mydriver", + .id_table = mydriver_id_table, + .probe = my_probe, + .disconnect = my_disconnect, + + /* + if using the usb chardev framework: + .minor = MY_USB_MINOR_START, + .fops = my_file_ops, + if exposing any operations through usbdevfs: + .ioctl = my_ioctl, + */ + }; + +When the USB subsystem knows about a driver's device ID table, it's used when +choosing drivers to probe(). The thread doing new device processing checks +drivers' device ID entries from the ``MODULE_DEVICE_TABLE`` against interface +and device descriptors for the device. It will only call ``probe()`` if there +is a match, and the third argument to ``probe()`` will be the entry that +matched. + +If you don't provide an ``id_table`` for your driver, then your driver may get +probed for each new device; the third parameter to ``probe()`` will be +``NULL``. diff --git a/Documentation/driver-api/usb/index.rst b/Documentation/driver-api/usb/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1bf64edc8c8a5c9bff3655d154eaa32584843530 --- /dev/null +++ b/Documentation/driver-api/usb/index.rst @@ -0,0 +1,26 @@ +============= +Linux USB API +============= + +.. toctree:: + + usb + gadget + anchors + bulk-streams + callbacks + dma + URB + power-management + hotplug + persist + error-codes + writing_usb_driver + writing_musb_glue_layer + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/driver-api/usb/persist.rst b/Documentation/driver-api/usb/persist.rst new file mode 100644 index 0000000000000000000000000000000000000000..08cafc6292c1d279da4f65b1f3061748d463602f --- /dev/null +++ b/Documentation/driver-api/usb/persist.rst @@ -0,0 +1,171 @@ +.. _usb-persist: + +USB device persistence during system suspend +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:Author: Alan Stern +:Date: September 2, 2006 (Updated February 25, 2008) + + +What is the problem? +==================== + +According to the USB specification, when a USB bus is suspended the +bus must continue to supply suspend current (around 1-5 mA). This +is so that devices can maintain their internal state and hubs can +detect connect-change events (devices being plugged in or unplugged). +The technical term is "power session". + +If a USB device's power session is interrupted then the system is +required to behave as though the device has been unplugged. It's a +conservative approach; in the absence of suspend current the computer +has no way to know what has actually happened. Perhaps the same +device is still attached or perhaps it was removed and a different +device plugged into the port. The system must assume the worst. + +By default, Linux behaves according to the spec. If a USB host +controller loses power during a system suspend, then when the system +wakes up all the devices attached to that controller are treated as +though they had disconnected. This is always safe and it is the +"officially correct" thing to do. + +For many sorts of devices this behavior doesn't matter in the least. +If the kernel wants to believe that your USB keyboard was unplugged +while the system was asleep and a new keyboard was plugged in when the +system woke up, who cares? It'll still work the same when you type on +it. + +Unfortunately problems _can_ arise, particularly with mass-storage +devices. The effect is exactly the same as if the device really had +been unplugged while the system was suspended. If you had a mounted +filesystem on the device, you're out of luck -- everything in that +filesystem is now inaccessible. This is especially annoying if your +root filesystem was located on the device, since your system will +instantly crash. + +Loss of power isn't the only mechanism to worry about. Anything that +interrupts a power session will have the same effect. For example, +even though suspend current may have been maintained while the system +was asleep, on many systems during the initial stages of wakeup the +firmware (i.e., the BIOS) resets the motherboard's USB host +controllers. Result: all the power sessions are destroyed and again +it's as though you had unplugged all the USB devices. Yes, it's +entirely the BIOS's fault, but that doesn't do _you_ any good unless +you can convince the BIOS supplier to fix the problem (lots of luck!). + +On many systems the USB host controllers will get reset after a +suspend-to-RAM. On almost all systems, no suspend current is +available during hibernation (also known as swsusp or suspend-to-disk). +You can check the kernel log after resuming to see if either of these +has happened; look for lines saying "root hub lost power or was reset". + +In practice, people are forced to unmount any filesystems on a USB +device before suspending. If the root filesystem is on a USB device, +the system can't be suspended at all. (All right, it _can_ be +suspended -- but it will crash as soon as it wakes up, which isn't +much better.) + + +What is the solution? +===================== + +The kernel includes a feature called USB-persist. It tries to work +around these issues by allowing the core USB device data structures to +persist across a power-session disruption. + +It works like this. If the kernel sees that a USB host controller is +not in the expected state during resume (i.e., if the controller was +reset or otherwise had lost power) then it applies a persistence check +to each of the USB devices below that controller for which the +"persist" attribute is set. It doesn't try to resume the device; that +can't work once the power session is gone. Instead it issues a USB +port reset and then re-enumerates the device. (This is exactly the +same thing that happens whenever a USB device is reset.) If the +re-enumeration shows that the device now attached to that port has the +same descriptors as before, including the Vendor and Product IDs, then +the kernel continues to use the same device structure. In effect, the +kernel treats the device as though it had merely been reset instead of +unplugged. + +The same thing happens if the host controller is in the expected state +but a USB device was unplugged and then replugged, or if a USB device +fails to carry out a normal resume. + +If no device is now attached to the port, or if the descriptors are +different from what the kernel remembers, then the treatment is what +you would expect. The kernel destroys the old device structure and +behaves as though the old device had been unplugged and a new device +plugged in. + +The end result is that the USB device remains available and usable. +Filesystem mounts and memory mappings are unaffected, and the world is +now a good and happy place. + +Note that the "USB-persist" feature will be applied only to those +devices for which it is enabled. You can enable the feature by doing +(as root):: + + echo 1 >/sys/bus/usb/devices/.../power/persist + +where the "..." should be filled in the with the device's ID. Disable +the feature by writing 0 instead of 1. For hubs the feature is +automatically and permanently enabled and the power/persist file +doesn't even exist, so you only have to worry about setting it for +devices where it really matters. + + +Is this the best solution? +========================== + +Perhaps not. Arguably, keeping track of mounted filesystems and +memory mappings across device disconnects should be handled by a +centralized Logical Volume Manager. Such a solution would allow you +to plug in a USB flash device, create a persistent volume associated +with it, unplug the flash device, plug it back in later, and still +have the same persistent volume associated with the device. As such +it would be more far-reaching than USB-persist. + +On the other hand, writing a persistent volume manager would be a big +job and using it would require significant input from the user. This +solution is much quicker and easier -- and it exists now, a giant +point in its favor! + +Furthermore, the USB-persist feature applies to _all_ USB devices, not +just mass-storage devices. It might turn out to be equally useful for +other device types, such as network interfaces. + + +WARNING: USB-persist can be dangerous!! +======================================= + +When recovering an interrupted power session the kernel does its best +to make sure the USB device hasn't been changed; that is, the same +device is still plugged into the port as before. But the checks +aren't guaranteed to be 100% accurate. + +If you replace one USB device with another of the same type (same +manufacturer, same IDs, and so on) there's an excellent chance the +kernel won't detect the change. The serial number string and other +descriptors are compared with the kernel's stored values, but this +might not help since manufacturers frequently omit serial numbers +entirely in their devices. + +Furthermore it's quite possible to leave a USB device exactly the same +while changing its media. If you replace the flash memory card in a +USB card reader while the system is asleep, the kernel will have no +way to know you did it. The kernel will assume that nothing has +happened and will continue to use the partition tables, inodes, and +memory mappings for the old card. + +If the kernel gets fooled in this way, it's almost certain to cause +data corruption and to crash your system. You'll have no one to blame +but yourself. + +For those devices with avoid_reset_quirk attribute being set, persist +maybe fail because they may morph after reset. + +YOU HAVE BEEN WARNED! USE AT YOUR OWN RISK! + +That having been said, most of the time there shouldn't be any trouble +at all. The USB-persist feature can be extremely useful. Make the +most of it. diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst new file mode 100644 index 0000000000000000000000000000000000000000..79beb807996b7a3a17e08b5f1d6e31d4176d3fc2 --- /dev/null +++ b/Documentation/driver-api/usb/power-management.rst @@ -0,0 +1,794 @@ +.. _usb-power-management: + +Power Management for USB +~~~~~~~~~~~~~~~~~~~~~~~~ + +:Author: Alan Stern +:Date: Last-updated: February 2014 + +.. + Contents: + --------- + * What is Power Management? + * What is Remote Wakeup? + * When is a USB device idle? + * Forms of dynamic PM + * The user interface for dynamic PM + * Changing the default idle-delay time + * Warnings + * The driver interface for Power Management + * The driver interface for autosuspend and autoresume + * Other parts of the driver interface + * Mutual exclusion + * Interaction between dynamic PM and system PM + * xHCI hardware link PM + * USB Port Power Control + * User Interface for Port Power Control + * Suggested Userspace Port Power Policy + + +What is Power Management? +------------------------- + +Power Management (PM) is the practice of saving energy by suspending +parts of a computer system when they aren't being used. While a +component is ``suspended`` it is in a nonfunctional low-power state; it +might even be turned off completely. A suspended component can be +``resumed`` (returned to a functional full-power state) when the kernel +needs to use it. (There also are forms of PM in which components are +placed in a less functional but still usable state instead of being +suspended; an example would be reducing the CPU's clock rate. This +document will not discuss those other forms.) + +When the parts being suspended include the CPU and most of the rest of +the system, we speak of it as a "system suspend". When a particular +device is turned off while the system as a whole remains running, we +call it a "dynamic suspend" (also known as a "runtime suspend" or +"selective suspend"). This document concentrates mostly on how +dynamic PM is implemented in the USB subsystem, although system PM is +covered to some extent (see ``Documentation/power/*.txt`` for more +information about system PM). + +System PM support is present only if the kernel was built with +``CONFIG_SUSPEND`` or ``CONFIG_HIBERNATION`` enabled. Dynamic PM support + +for USB is present whenever +the kernel was built with ``CONFIG_PM`` enabled. + +[Historically, dynamic PM support for USB was present only if the +kernel had been built with ``CONFIG_USB_SUSPEND`` enabled (which depended on +``CONFIG_PM_RUNTIME``). Starting with the 3.10 kernel release, dynamic PM +support for USB was present whenever the kernel was built with +``CONFIG_PM_RUNTIME`` enabled. The ``CONFIG_USB_SUSPEND`` option had been +eliminated.] + + +What is Remote Wakeup? +---------------------- + +When a device has been suspended, it generally doesn't resume until +the computer tells it to. Likewise, if the entire computer has been +suspended, it generally doesn't resume until the user tells it to, say +by pressing a power button or opening the cover. + +However some devices have the capability of resuming by themselves, or +asking the kernel to resume them, or even telling the entire computer +to resume. This capability goes by several names such as "Wake On +LAN"; we will refer to it generically as "remote wakeup". When a +device is enabled for remote wakeup and it is suspended, it may resume +itself (or send a request to be resumed) in response to some external +event. Examples include a suspended keyboard resuming when a key is +pressed, or a suspended USB hub resuming when a device is plugged in. + + +When is a USB device idle? +-------------------------- + +A device is idle whenever the kernel thinks it's not busy doing +anything important and thus is a candidate for being suspended. The +exact definition depends on the device's driver; drivers are allowed +to declare that a device isn't idle even when there's no actual +communication taking place. (For example, a hub isn't considered idle +unless all the devices plugged into that hub are already suspended.) +In addition, a device isn't considered idle so long as a program keeps +its usbfs file open, whether or not any I/O is going on. + +If a USB device has no driver, its usbfs file isn't open, and it isn't +being accessed through sysfs, then it definitely is idle. + + +Forms of dynamic PM +------------------- + +Dynamic suspends occur when the kernel decides to suspend an idle +device. This is called ``autosuspend`` for short. In general, a device +won't be autosuspended unless it has been idle for some minimum period +of time, the so-called idle-delay time. + +Of course, nothing the kernel does on its own initiative should +prevent the computer or its devices from working properly. If a +device has been autosuspended and a program tries to use it, the +kernel will automatically resume the device (autoresume). For the +same reason, an autosuspended device will usually have remote wakeup +enabled, if the device supports remote wakeup. + +It is worth mentioning that many USB drivers don't support +autosuspend. In fact, at the time of this writing (Linux 2.6.23) the +only drivers which do support it are the hub driver, kaweth, asix, +usblp, usblcd, and usb-skeleton (which doesn't count). If a +non-supporting driver is bound to a device, the device won't be +autosuspended. In effect, the kernel pretends the device is never +idle. + +We can categorize power management events in two broad classes: +external and internal. External events are those triggered by some +agent outside the USB stack: system suspend/resume (triggered by +userspace), manual dynamic resume (also triggered by userspace), and +remote wakeup (triggered by the device). Internal events are those +triggered within the USB stack: autosuspend and autoresume. Note that +all dynamic suspend events are internal; external agents are not +allowed to issue dynamic suspends. + + +The user interface for dynamic PM +--------------------------------- + +The user interface for controlling dynamic PM is located in the ``power/`` +subdirectory of each USB device's sysfs directory, that is, in +``/sys/bus/usb/devices/.../power/`` where "..." is the device's ID. The +relevant attribute files are: wakeup, control, and +``autosuspend_delay_ms``. (There may also be a file named ``level``; this +file was deprecated as of the 2.6.35 kernel and replaced by the +``control`` file. In 2.6.38 the ``autosuspend`` file will be deprecated +and replaced by the ``autosuspend_delay_ms`` file. The only difference +is that the newer file expresses the delay in milliseconds whereas the +older file uses seconds. Confusingly, both files are present in 2.6.37 +but only ``autosuspend`` works.) + + ``power/wakeup`` + + This file is empty if the device does not support + remote wakeup. Otherwise the file contains either the + word ``enabled`` or the word ``disabled``, and you can + write those words to the file. The setting determines + whether or not remote wakeup will be enabled when the + device is next suspended. (If the setting is changed + while the device is suspended, the change won't take + effect until the following suspend.) + + ``power/control`` + + This file contains one of two words: ``on`` or ``auto``. + You can write those words to the file to change the + device's setting. + + - ``on`` means that the device should be resumed and + autosuspend is not allowed. (Of course, system + suspends are still allowed.) + + - ``auto`` is the normal state in which the kernel is + allowed to autosuspend and autoresume the device. + + (In kernels up to 2.6.32, you could also specify + ``suspend``, meaning that the device should remain + suspended and autoresume was not allowed. This + setting is no longer supported.) + + ``power/autosuspend_delay_ms`` + + This file contains an integer value, which is the + number of milliseconds the device should remain idle + before the kernel will autosuspend it (the idle-delay + time). The default is 2000. 0 means to autosuspend + as soon as the device becomes idle, and negative + values mean never to autosuspend. You can write a + number to the file to change the autosuspend + idle-delay time. + +Writing ``-1`` to ``power/autosuspend_delay_ms`` and writing ``on`` to +``power/control`` do essentially the same thing -- they both prevent the +device from being autosuspended. Yes, this is a redundancy in the +API. + +(In 2.6.21 writing ``0`` to ``power/autosuspend`` would prevent the device +from being autosuspended; the behavior was changed in 2.6.22. The +``power/autosuspend`` attribute did not exist prior to 2.6.21, and the +``power/level`` attribute did not exist prior to 2.6.22. ``power/control`` +was added in 2.6.34, and ``power/autosuspend_delay_ms`` was added in +2.6.37 but did not become functional until 2.6.38.) + + +Changing the default idle-delay time +------------------------------------ + +The default autosuspend idle-delay time (in seconds) is controlled by +a module parameter in usbcore. You can specify the value when usbcore +is loaded. For example, to set it to 5 seconds instead of 2 you would +do:: + + modprobe usbcore autosuspend=5 + +Equivalently, you could add to a configuration file in /etc/modprobe.d +a line saying:: + + options usbcore autosuspend=5 + +Some distributions load the usbcore module very early during the boot +process, by means of a program or script running from an initramfs +image. To alter the parameter value you would have to rebuild that +image. + +If usbcore is compiled into the kernel rather than built as a loadable +module, you can add:: + + usbcore.autosuspend=5 + +to the kernel's boot command line. + +Finally, the parameter value can be changed while the system is +running. If you do:: + + echo 5 >/sys/module/usbcore/parameters/autosuspend + +then each new USB device will have its autosuspend idle-delay +initialized to 5. (The idle-delay values for already existing devices +will not be affected.) + +Setting the initial default idle-delay to -1 will prevent any +autosuspend of any USB device. This has the benefit of allowing you +then to enable autosuspend for selected devices. + + +Warnings +-------- + +The USB specification states that all USB devices must support power +management. Nevertheless, the sad fact is that many devices do not +support it very well. You can suspend them all right, but when you +try to resume them they disconnect themselves from the USB bus or +they stop working entirely. This seems to be especially prevalent +among printers and scanners, but plenty of other types of device have +the same deficiency. + +For this reason, by default the kernel disables autosuspend (the +``power/control`` attribute is initialized to ``on``) for all devices other +than hubs. Hubs, at least, appear to be reasonably well-behaved in +this regard. + +(In 2.6.21 and 2.6.22 this wasn't the case. Autosuspend was enabled +by default for almost all USB devices. A number of people experienced +problems as a result.) + +This means that non-hub devices won't be autosuspended unless the user +or a program explicitly enables it. As of this writing there aren't +any widespread programs which will do this; we hope that in the near +future device managers such as HAL will take on this added +responsibility. In the meantime you can always carry out the +necessary operations by hand or add them to a udev script. You can +also change the idle-delay time; 2 seconds is not the best choice for +every device. + +If a driver knows that its device has proper suspend/resume support, +it can enable autosuspend all by itself. For example, the video +driver for a laptop's webcam might do this (in recent kernels they +do), since these devices are rarely used and so should normally be +autosuspended. + +Sometimes it turns out that even when a device does work okay with +autosuspend there are still problems. For example, the usbhid driver, +which manages keyboards and mice, has autosuspend support. Tests with +a number of keyboards show that typing on a suspended keyboard, while +causing the keyboard to do a remote wakeup all right, will nonetheless +frequently result in lost keystrokes. Tests with mice show that some +of them will issue a remote-wakeup request in response to button +presses but not to motion, and some in response to neither. + +The kernel will not prevent you from enabling autosuspend on devices +that can't handle it. It is even possible in theory to damage a +device by suspending it at the wrong time. (Highly unlikely, but +possible.) Take care. + + +The driver interface for Power Management +----------------------------------------- + +The requirements for a USB driver to support external power management +are pretty modest; the driver need only define:: + + .suspend + .resume + .reset_resume + +methods in its :c:type:`usb_driver` structure, and the ``reset_resume`` method +is optional. The methods' jobs are quite simple: + + - The ``suspend`` method is called to warn the driver that the + device is going to be suspended. If the driver returns a + negative error code, the suspend will be aborted. Normally + the driver will return 0, in which case it must cancel all + outstanding URBs (:c:func:`usb_kill_urb`) and not submit any more. + + - The ``resume`` method is called to tell the driver that the + device has been resumed and the driver can return to normal + operation. URBs may once more be submitted. + + - The ``reset_resume`` method is called to tell the driver that + the device has been resumed and it also has been reset. + The driver should redo any necessary device initialization, + since the device has probably lost most or all of its state + (although the interfaces will be in the same altsettings as + before the suspend). + +If the device is disconnected or powered down while it is suspended, +the ``disconnect`` method will be called instead of the ``resume`` or +``reset_resume`` method. This is also quite likely to happen when +waking up from hibernation, as many systems do not maintain suspend +current to the USB host controllers during hibernation. (It's +possible to work around the hibernation-forces-disconnect problem by +using the USB Persist facility.) + +The ``reset_resume`` method is used by the USB Persist facility (see +:ref:`usb-persist`) and it can also be used under certain +circumstances when ``CONFIG_USB_PERSIST`` is not enabled. Currently, if a +device is reset during a resume and the driver does not have a +``reset_resume`` method, the driver won't receive any notification about +the resume. Later kernels will call the driver's ``disconnect`` method; +2.6.23 doesn't do this. + +USB drivers are bound to interfaces, so their ``suspend`` and ``resume`` +methods get called when the interfaces are suspended or resumed. In +principle one might want to suspend some interfaces on a device (i.e., +force the drivers for those interface to stop all activity) without +suspending the other interfaces. The USB core doesn't allow this; all +interfaces are suspended when the device itself is suspended and all +interfaces are resumed when the device is resumed. It isn't possible +to suspend or resume some but not all of a device's interfaces. The +closest you can come is to unbind the interfaces' drivers. + + +The driver interface for autosuspend and autoresume +--------------------------------------------------- + +To support autosuspend and autoresume, a driver should implement all +three of the methods listed above. In addition, a driver indicates +that it supports autosuspend by setting the ``.supports_autosuspend`` flag +in its usb_driver structure. It is then responsible for informing the +USB core whenever one of its interfaces becomes busy or idle. The +driver does so by calling these six functions:: + + int usb_autopm_get_interface(struct usb_interface *intf); + void usb_autopm_put_interface(struct usb_interface *intf); + int usb_autopm_get_interface_async(struct usb_interface *intf); + void usb_autopm_put_interface_async(struct usb_interface *intf); + void usb_autopm_get_interface_no_resume(struct usb_interface *intf); + void usb_autopm_put_interface_no_suspend(struct usb_interface *intf); + +The functions work by maintaining a usage counter in the +usb_interface's embedded device structure. When the counter is > 0 +then the interface is deemed to be busy, and the kernel will not +autosuspend the interface's device. When the usage counter is = 0 +then the interface is considered to be idle, and the kernel may +autosuspend the device. + +Drivers need not be concerned about balancing changes to the usage +counter; the USB core will undo any remaining "get"s when a driver +is unbound from its interface. As a corollary, drivers must not call +any of the ``usb_autopm_*`` functions after their ``disconnect`` +routine has returned. + +Drivers using the async routines are responsible for their own +synchronization and mutual exclusion. + + :c:func:`usb_autopm_get_interface` increments the usage counter and + does an autoresume if the device is suspended. If the + autoresume fails, the counter is decremented back. + + :c:func:`usb_autopm_put_interface` decrements the usage counter and + attempts an autosuspend if the new value is = 0. + + :c:func:`usb_autopm_get_interface_async` and + :c:func:`usb_autopm_put_interface_async` do almost the same things as + their non-async counterparts. The big difference is that they + use a workqueue to do the resume or suspend part of their + jobs. As a result they can be called in an atomic context, + such as an URB's completion handler, but when they return the + device will generally not yet be in the desired state. + + :c:func:`usb_autopm_get_interface_no_resume` and + :c:func:`usb_autopm_put_interface_no_suspend` merely increment or + decrement the usage counter; they do not attempt to carry out + an autoresume or an autosuspend. Hence they can be called in + an atomic context. + +The simplest usage pattern is that a driver calls +:c:func:`usb_autopm_get_interface` in its open routine and +:c:func:`usb_autopm_put_interface` in its close or release routine. But other +patterns are possible. + +The autosuspend attempts mentioned above will often fail for one +reason or another. For example, the ``power/control`` attribute might be +set to ``on``, or another interface in the same device might not be +idle. This is perfectly normal. If the reason for failure was that +the device hasn't been idle for long enough, a timer is scheduled to +carry out the operation automatically when the autosuspend idle-delay +has expired. + +Autoresume attempts also can fail, although failure would mean that +the device is no longer present or operating properly. Unlike +autosuspend, there's no idle-delay for an autoresume. + + +Other parts of the driver interface +----------------------------------- + +Drivers can enable autosuspend for their devices by calling:: + + usb_enable_autosuspend(struct usb_device *udev); + +in their :c:func:`probe` routine, if they know that the device is capable of +suspending and resuming correctly. This is exactly equivalent to +writing ``auto`` to the device's ``power/control`` attribute. Likewise, +drivers can disable autosuspend by calling:: + + usb_disable_autosuspend(struct usb_device *udev); + +This is exactly the same as writing ``on`` to the ``power/control`` attribute. + +Sometimes a driver needs to make sure that remote wakeup is enabled +during autosuspend. For example, there's not much point +autosuspending a keyboard if the user can't cause the keyboard to do a +remote wakeup by typing on it. If the driver sets +``intf->needs_remote_wakeup`` to 1, the kernel won't autosuspend the +device if remote wakeup isn't available. (If the device is already +autosuspended, though, setting this flag won't cause the kernel to +autoresume it. Normally a driver would set this flag in its ``probe`` +method, at which time the device is guaranteed not to be +autosuspended.) + +If a driver does its I/O asynchronously in interrupt context, it +should call :c:func:`usb_autopm_get_interface_async` before starting output and +:c:func:`usb_autopm_put_interface_async` when the output queue drains. When +it receives an input event, it should call:: + + usb_mark_last_busy(struct usb_device *udev); + +in the event handler. This tells the PM core that the device was just +busy and therefore the next autosuspend idle-delay expiration should +be pushed back. Many of the usb_autopm_* routines also make this call, +so drivers need to worry only when interrupt-driven input arrives. + +Asynchronous operation is always subject to races. For example, a +driver may call the :c:func:`usb_autopm_get_interface_async` routine at a time +when the core has just finished deciding the device has been idle for +long enough but not yet gotten around to calling the driver's ``suspend`` +method. The ``suspend`` method must be responsible for synchronizing with +the I/O request routine and the URB completion handler; it should +cause autosuspends to fail with -EBUSY if the driver needs to use the +device. + +External suspend calls should never be allowed to fail in this way, +only autosuspend calls. The driver can tell them apart by applying +the :c:func:`PMSG_IS_AUTO` macro to the message argument to the ``suspend`` +method; it will return True for internal PM events (autosuspend) and +False for external PM events. + + +Mutual exclusion +---------------- + +For external events -- but not necessarily for autosuspend or +autoresume -- the device semaphore (udev->dev.sem) will be held when a +``suspend`` or ``resume`` method is called. This implies that external +suspend/resume events are mutually exclusive with calls to ``probe``, +``disconnect``, ``pre_reset``, and ``post_reset``; the USB core guarantees that +this is true of autosuspend/autoresume events as well. + +If a driver wants to block all suspend/resume calls during some +critical section, the best way is to lock the device and call +:c:func:`usb_autopm_get_interface` (and do the reverse at the end of the +critical section). Holding the device semaphore will block all +external PM calls, and the :c:func:`usb_autopm_get_interface` will prevent any +internal PM calls, even if it fails. (Exercise: Why?) + + +Interaction between dynamic PM and system PM +-------------------------------------------- + +Dynamic power management and system power management can interact in +a couple of ways. + +Firstly, a device may already be autosuspended when a system suspend +occurs. Since system suspends are supposed to be as transparent as +possible, the device should remain suspended following the system +resume. But this theory may not work out well in practice; over time +the kernel's behavior in this regard has changed. As of 2.6.37 the +policy is to resume all devices during a system resume and let them +handle their own runtime suspends afterward. + +Secondly, a dynamic power-management event may occur as a system +suspend is underway. The window for this is short, since system +suspends don't take long (a few seconds usually), but it can happen. +For example, a suspended device may send a remote-wakeup signal while +the system is suspending. The remote wakeup may succeed, which would +cause the system suspend to abort. If the remote wakeup doesn't +succeed, it may still remain active and thus cause the system to +resume as soon as the system suspend is complete. Or the remote +wakeup may fail and get lost. Which outcome occurs depends on timing +and on the hardware and firmware design. + + +xHCI hardware link PM +--------------------- + +xHCI host controller provides hardware link power management to usb2.0 +(xHCI 1.0 feature) and usb3.0 devices which support link PM. By +enabling hardware LPM, the host can automatically put the device into +lower power state(L1 for usb2.0 devices, or U1/U2 for usb3.0 devices), +which state device can enter and resume very quickly. + +The user interface for controlling hardware LPM is located in the +``power/`` subdirectory of each USB device's sysfs directory, that is, in +``/sys/bus/usb/devices/.../power/`` where "..." is the device's ID. The +relevant attribute files are ``usb2_hardware_lpm`` and ``usb3_hardware_lpm``. + + ``power/usb2_hardware_lpm`` + + When a USB2 device which support LPM is plugged to a + xHCI host root hub which support software LPM, the + host will run a software LPM test for it; if the device + enters L1 state and resume successfully and the host + supports USB2 hardware LPM, this file will show up and + driver will enable hardware LPM for the device. You + can write y/Y/1 or n/N/0 to the file to enable/disable + USB2 hardware LPM manually. This is for test purpose mainly. + + ``power/usb3_hardware_lpm_u1`` + ``power/usb3_hardware_lpm_u2`` + + When a USB 3.0 lpm-capable device is plugged in to a + xHCI host which supports link PM, it will check if U1 + and U2 exit latencies have been set in the BOS + descriptor; if the check is passed and the host + supports USB3 hardware LPM, USB3 hardware LPM will be + enabled for the device and these files will be created. + The files hold a string value (enable or disable) + indicating whether or not USB3 hardware LPM U1 or U2 + is enabled for the device. + +USB Port Power Control +---------------------- + +In addition to suspending endpoint devices and enabling hardware +controlled link power management, the USB subsystem also has the +capability to disable power to ports under some conditions. Power is +controlled through ``Set/ClearPortFeature(PORT_POWER)`` requests to a hub. +In the case of a root or platform-internal hub the host controller +driver translates ``PORT_POWER`` requests into platform firmware (ACPI) +method calls to set the port power state. For more background see the +Linux Plumbers Conference 2012 slides [#f1]_ and video [#f2]_: + +Upon receiving a ``ClearPortFeature(PORT_POWER)`` request a USB port is +logically off, and may trigger the actual loss of VBUS to the port [#f3]_. +VBUS may be maintained in the case where a hub gangs multiple ports into +a shared power well causing power to remain until all ports in the gang +are turned off. VBUS may also be maintained by hub ports configured for +a charging application. In any event a logically off port will lose +connection with its device, not respond to hotplug events, and not +respond to remote wakeup events. + +.. warning:: + + turning off a port may result in the inability to hot add a device. + Please see "User Interface for Port Power Control" for details. + +As far as the effect on the device itself it is similar to what a device +goes through during system suspend, i.e. the power session is lost. Any +USB device or driver that misbehaves with system suspend will be +similarly affected by a port power cycle event. For this reason the +implementation shares the same device recovery path (and honors the same +quirks) as the system resume path for the hub. + +.. [#f1] + + http://dl.dropbox.com/u/96820575/sarah-sharp-lpt-port-power-off2-mini.pdf + +.. [#f2] + + http://linuxplumbers.ubicast.tv/videos/usb-port-power-off-kerneluserspace-api/ + +.. [#f3] + + USB 3.1 Section 10.12 + + wakeup note: if a device is configured to send wakeup events the port + power control implementation will block poweroff attempts on that + port. + + +User Interface for Port Power Control +------------------------------------- + +The port power control mechanism uses the PM runtime system. Poweroff is +requested by clearing the ``power/pm_qos_no_power_off`` flag of the port device +(defaults to 1). If the port is disconnected it will immediately receive a +``ClearPortFeature(PORT_POWER)`` request. Otherwise, it will honor the pm +runtime rules and require the attached child device and all descendants to be +suspended. This mechanism is dependent on the hub advertising port power +switching in its hub descriptor (wHubCharacteristics logical power switching +mode field). + +Note, some interface devices/drivers do not support autosuspend. Userspace may +need to unbind the interface drivers before the :c:type:`usb_device` will +suspend. An unbound interface device is suspended by default. When unbinding, +be careful to unbind interface drivers, not the driver of the parent usb +device. Also, leave hub interface drivers bound. If the driver for the usb +device (not interface) is unbound the kernel is no longer able to resume the +device. If a hub interface driver is unbound, control of its child ports is +lost and all attached child-devices will disconnect. A good rule of thumb is +that if the 'driver/module' link for a device points to +``/sys/module/usbcore`` then unbinding it will interfere with port power +control. + +Example of the relevant files for port power control. Note, in this example +these files are relative to a usb hub device (prefix):: + + prefix=/sys/devices/pci0000:00/0000:00:14.0/usb3/3-1 + + attached child device + + hub port device + | + hub interface device + | | + v v v + $prefix/3-1:1.0/3-1-port1/device + + $prefix/3-1:1.0/3-1-port1/power/pm_qos_no_power_off + $prefix/3-1:1.0/3-1-port1/device/power/control + $prefix/3-1:1.0/3-1-port1/device/3-1.1:/driver/unbind + $prefix/3-1:1.0/3-1-port1/device/3-1.1:/driver/unbind + ... + $prefix/3-1:1.0/3-1-port1/device/3-1.1:/driver/unbind + +In addition to these files some ports may have a 'peer' link to a port on +another hub. The expectation is that all superspeed ports have a +hi-speed peer:: + + $prefix/3-1:1.0/3-1-port1/peer -> ../../../../usb2/2-1/2-1:1.0/2-1-port1 + ../../../../usb2/2-1/2-1:1.0/2-1-port1/peer -> ../../../../usb3/3-1/3-1:1.0/3-1-port1 + +Distinct from 'companion ports', or 'ehci/xhci shared switchover ports' +peer ports are simply the hi-speed and superspeed interface pins that +are combined into a single usb3 connector. Peer ports share the same +ancestor XHCI device. + +While a superspeed port is powered off a device may downgrade its +connection and attempt to connect to the hi-speed pins. The +implementation takes steps to prevent this: + +1. Port suspend is sequenced to guarantee that hi-speed ports are powered-off + before their superspeed peer is permitted to power-off. The implication is + that the setting ``pm_qos_no_power_off`` to zero on a superspeed port may + not cause the port to power-off until its highspeed peer has gone to its + runtime suspend state. Userspace must take care to order the suspensions + if it wants to guarantee that a superspeed port will power-off. + +2. Port resume is sequenced to force a superspeed port to power-on prior to its + highspeed peer. + +3. Port resume always triggers an attached child device to resume. After a + power session is lost the device may have been removed, or need reset. + Resuming the child device when the parent port regains power resolves those + states and clamps the maximum port power cycle frequency at the rate the + child device can suspend (autosuspend-delay) and resume (reset-resume + latency). + +Sysfs files relevant for port power control: + + ``/power/pm_qos_no_power_off``: + This writable flag controls the state of an idle port. + Once all children and descendants have suspended the + port may suspend/poweroff provided that + pm_qos_no_power_off is '0'. If pm_qos_no_power_off is + '1' the port will remain active/powered regardless of + the stats of descendants. Defaults to 1. + + ``/power/runtime_status``: + This file reflects whether the port is 'active' (power is on) + or 'suspended' (logically off). There is no indication to + userspace whether VBUS is still supplied. + + ``/connect_type``: + An advisory read-only flag to userspace indicating the + location and connection type of the port. It returns + one of four values 'hotplug', 'hardwired', 'not used', + and 'unknown'. All values, besides unknown, are set by + platform firmware. + + ``hotplug`` indicates an externally connectable/visible + port on the platform. Typically userspace would choose + to keep such a port powered to handle new device + connection events. + + ``hardwired`` refers to a port that is not visible but + connectable. Examples are internal ports for USB + bluetooth that can be disconnected via an external + switch or a port with a hardwired USB camera. It is + expected to be safe to allow these ports to suspend + provided pm_qos_no_power_off is coordinated with any + switch that gates connections. Userspace must arrange + for the device to be connected prior to the port + powering off, or to activate the port prior to enabling + connection via a switch. + + ``not used`` refers to an internal port that is expected + to never have a device connected to it. These may be + empty internal ports, or ports that are not physically + exposed on a platform. Considered safe to be + powered-off at all times. + + ``unknown`` means platform firmware does not provide + information for this port. Most commonly refers to + external hub ports which should be considered 'hotplug' + for policy decisions. + + .. note:: + + - since we are relying on the BIOS to get this ACPI + information correct, the USB port descriptions may + be missing or wrong. + + - Take care in clearing ``pm_qos_no_power_off``. Once + power is off this port will + not respond to new connect events. + + Once a child device is attached additional constraints are + applied before the port is allowed to poweroff. + + ``/power/control``: + Must be ``auto``, and the port will not + power down until ``/power/runtime_status`` + reflects the 'suspended' state. Default + value is controlled by child device driver. + + ``/power/persist``: + This defaults to ``1`` for most devices and indicates if + kernel can persist the device's configuration across a + power session loss (suspend / port-power event). When + this value is ``0`` (quirky devices), port poweroff is + disabled. + + ``/driver/unbind``: + Wakeup capable devices will block port poweroff. At + this time the only mechanism to clear the usb-internal + wakeup-capability for an interface device is to unbind + its driver. + +Summary of poweroff pre-requisite settings relative to a port device:: + + echo 0 > power/pm_qos_no_power_off + echo 0 > peer/power/pm_qos_no_power_off # if it exists + echo auto > power/control # this is the default value + echo auto > /power/control + echo 1 > /power/persist # this is the default value + +Suggested Userspace Port Power Policy +------------------------------------- + +As noted above userspace needs to be careful and deliberate about what +ports are enabled for poweroff. + +The default configuration is that all ports start with +``power/pm_qos_no_power_off`` set to ``1`` causing ports to always remain +active. + +Given confidence in the platform firmware's description of the ports +(ACPI _PLD record for a port populates 'connect_type') userspace can +clear pm_qos_no_power_off for all 'not used' ports. The same can be +done for 'hardwired' ports provided poweroff is coordinated with any +connection switch for the port. + +A more aggressive userspace policy is to enable USB port power off for +all ports (set ``/power/pm_qos_no_power_off`` to ``0``) when +some external factor indicates the user has stopped interacting with the +system. For example, a distro may want to enable power off all USB +ports when the screen blanks, and re-power them when the screen becomes +active. Smart phones and tablets may want to power off USB ports when +the user pushes the power button. diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst new file mode 100644 index 0000000000000000000000000000000000000000..dba0f876b36f4dc1f52c412f82896972b36d208a --- /dev/null +++ b/Documentation/driver-api/usb/usb.rst @@ -0,0 +1,1047 @@ +.. _usb-hostside-api: + +=========================== +The Linux-USB Host Side API +=========================== + +Introduction to USB on Linux +============================ + +A Universal Serial Bus (USB) is used to connect a host, such as a PC or +workstation, to a number of peripheral devices. USB uses a tree +structure, with the host as the root (the system's master), hubs as +interior nodes, and peripherals as leaves (and slaves). Modern PCs +support several such trees of USB devices, usually +a few USB 3.0 (5 GBit/s) or USB 3.1 (10 GBit/s) and some legacy +USB 2.0 (480 MBit/s) busses just in case. + +That master/slave asymmetry was designed-in for a number of reasons, one +being ease of use. It is not physically possible to mistake upstream and +downstream or it does not matter with a type C plug (or they are built into the +peripheral). Also, the host software doesn't need to deal with +distributed auto-configuration since the pre-designated master node +manages all that. + +Kernel developers added USB support to Linux early in the 2.2 kernel +series and have been developing it further since then. Besides support +for each new generation of USB, various host controllers gained support, +new drivers for peripherals have been added and advanced features for latency +measurement and improved power management introduced. + +Linux can run inside USB devices as well as on the hosts that control +the devices. But USB device drivers running inside those peripherals +don't do the same things as the ones running inside hosts, so they've +been given a different name: *gadget drivers*. This document does not +cover gadget drivers. + +USB Host-Side API Model +======================= + +Host-side drivers for USB devices talk to the "usbcore" APIs. There are +two. One is intended for *general-purpose* drivers (exposed through +driver frameworks), and the other is for drivers that are *part of the +core*. Such core drivers include the *hub* driver (which manages trees +of USB devices) and several different kinds of *host controller +drivers*, which control individual busses. + +The device model seen by USB drivers is relatively complex. + +- USB supports four kinds of data transfers (control, bulk, interrupt, + and isochronous). Two of them (control and bulk) use bandwidth as + it's available, while the other two (interrupt and isochronous) are + scheduled to provide guaranteed bandwidth. + +- The device description model includes one or more "configurations" + per device, only one of which is active at a time. Devices are supposed + to be capable of operating at lower than their top + speeds and may provide a BOS descriptor showing the lowest speed they + remain fully operational at. + +- From USB 3.0 on configurations have one or more "functions", which + provide a common functionality and are grouped together for purposes + of power management. + +- Configurations or functions have one or more "interfaces", each of which may have + "alternate settings". Interfaces may be standardized by USB "Class" + specifications, or may be specific to a vendor or device. + + USB device drivers actually bind to interfaces, not devices. Think of + them as "interface drivers", though you may not see many devices + where the distinction is important. *Most USB devices are simple, + with only one function, one configuration, one interface, and one alternate + setting.* + +- Interfaces have one or more "endpoints", each of which supports one + type and direction of data transfer such as "bulk out" or "interrupt + in". The entire configuration may have up to sixteen endpoints in + each direction, allocated as needed among all the interfaces. + +- Data transfer on USB is packetized; each endpoint has a maximum + packet size. Drivers must often be aware of conventions such as + flagging the end of bulk transfers using "short" (including zero + length) packets. + +- The Linux USB API supports synchronous calls for control and bulk + messages. It also supports asynchronous calls for all kinds of data + transfer, using request structures called "URBs" (USB Request + Blocks). + +Accordingly, the USB Core API exposed to device drivers covers quite a +lot of territory. You'll probably need to consult the USB 3.0 +specification, available online from www.usb.org at no cost, as well as +class or device specifications. + +The only host-side drivers that actually touch hardware (reading/writing +registers, handling IRQs, and so on) are the HCDs. In theory, all HCDs +provide the same functionality through the same API. In practice, that's +becoming more true, but there are still differences +that crop up especially with fault handling on the less common controllers. +Different controllers don't +necessarily report the same aspects of failures, and recovery from +faults (including software-induced ones like unlinking an URB) isn't yet +fully consistent. Device driver authors should make a point of doing +disconnect testing (while the device is active) with each different host +controller driver, to make sure drivers don't have bugs of their own as +well as to make sure they aren't relying on some HCD-specific behavior. + +.. _usb_chapter9: + +USB-Standard Types +================== + +In ```` you will find the USB data types defined in +chapter 9 of the USB specification. These data types are used throughout +USB, and in APIs including this host side API, gadget APIs, usb character +devices and debugfs interfaces. + +.. kernel-doc:: include/linux/usb/ch9.h + :internal: + +.. _usb_header: + +Host-Side Data Types and Macros +=============================== + +The host side API exposes several layers to drivers, some of which are +more necessary than others. These support lifecycle models for host side +drivers and devices, and support passing buffers through usbcore to some +HCD that performs the I/O for the device driver. + +.. kernel-doc:: include/linux/usb.h + :internal: + +USB Core APIs +============= + +There are two basic I/O models in the USB API. The most elemental one is +asynchronous: drivers submit requests in the form of an URB, and the +URB's completion callback handles the next step. All USB transfer types +support that model, although there are special cases for control URBs +(which always have setup and status stages, but may not have a data +stage) and isochronous URBs (which allow large packets and include +per-packet fault reports). Built on top of that is synchronous API +support, where a driver calls a routine that allocates one or more URBs, +submits them, and waits until they complete. There are synchronous +wrappers for single-buffer control and bulk transfers (which are awkward +to use in some driver disconnect scenarios), and for scatterlist based +streaming i/o (bulk or interrupt). + +USB drivers need to provide buffers that can be used for DMA, although +they don't necessarily need to provide the DMA mapping themselves. There +are APIs to use used when allocating DMA buffers, which can prevent use +of bounce buffers on some systems. In some cases, drivers may be able to +rely on 64bit DMA to eliminate another kind of bounce buffer. + +.. kernel-doc:: drivers/usb/core/urb.c + :export: + +.. kernel-doc:: drivers/usb/core/message.c + :export: + +.. kernel-doc:: drivers/usb/core/file.c + :export: + +.. kernel-doc:: drivers/usb/core/driver.c + :export: + +.. kernel-doc:: drivers/usb/core/usb.c + :export: + +.. kernel-doc:: drivers/usb/core/hub.c + :export: + +Host Controller APIs +==================== + +These APIs are only for use by host controller drivers, most of which +implement standard register interfaces such as XHCI, EHCI, OHCI, or UHCI. UHCI +was one of the first interfaces, designed by Intel and also used by VIA; +it doesn't do much in hardware. OHCI was designed later, to have the +hardware do more work (bigger transfers, tracking protocol state, and so +on). EHCI was designed with USB 2.0; its design has features that +resemble OHCI (hardware does much more work) as well as UHCI (some parts +of ISO support, TD list processing). XHCI was designed with USB 3.0. It +continues to shift support for functionality into hardware. + +There are host controllers other than the "big three", although most PCI +based controllers (and a few non-PCI based ones) use one of those +interfaces. Not all host controllers use DMA; some use PIO, and there is +also a simulator and a virtual host controller to pipe USB over the network. + +The same basic APIs are available to drivers for all those controllers. +For historical reasons they are in two layers: :c:type:`struct +usb_bus ` is a rather thin layer that became available +in the 2.2 kernels, while :c:type:`struct usb_hcd ` +is a more featureful layer +that lets HCDs share common code, to shrink driver size and +significantly reduce hcd-specific behaviors. + +.. kernel-doc:: drivers/usb/core/hcd.c + :export: + +.. kernel-doc:: drivers/usb/core/hcd-pci.c + :export: + +.. kernel-doc:: drivers/usb/core/buffer.c + :internal: + +The USB character device nodes +============================== + +This chapter presents the Linux character device nodes. You may prefer +to avoid writing new kernel code for your USB driver. User mode device +drivers are usually packaged as applications or libraries, and may use +character devices through some programming library that wraps it. +Such libraries include: + + - `libusb `__ for C/C++, and + - `jUSB `__ for Java. + +Some old information about it can be seen at the "USB Device Filesystem" +section of the USB Guide. The latest copy of the USB Guide can be found +at http://www.linux-usb.org/ + +.. note:: + + - They were used to be implemented via *usbfs*, but this is not part of + the sysfs debug interface. + + - This particular documentation is incomplete, especially with respect + to the asynchronous mode. As of kernel 2.5.66 the code and this + (new) documentation need to be cross-reviewed. + +What files are in "devtmpfs"? +----------------------------- + +Conventionally mounted at ``/dev/bus/usb/``, usbfs features include: + +- ``/dev/bus/usb/BBB/DDD`` ... magic files exposing the each device's + configuration descriptors, and supporting a series of ioctls for + making device requests, including I/O to devices. (Purely for access + by programs.) + +Each bus is given a number (``BBB``) based on when it was enumerated; within +each bus, each device is given a similar number (``DDD``). Those ``BBB/DDD`` +paths are not "stable" identifiers; expect them to change even if you +always leave the devices plugged in to the same hub port. *Don't even +think of saving these in application configuration files.* Stable +identifiers are available, for user mode applications that want to use +them. HID and networking devices expose these stable IDs, so that for +example you can be sure that you told the right UPS to power down its +second server. Pleast note that it doesn't (yet) expose those IDs. + +/dev/bus/usb/BBB/DDD +-------------------- + +Use these files in one of these basic ways: + +- *They can be read,* producing first the device descriptor (18 bytes) and + then the descriptors for the current configuration. See the USB 2.0 spec + for details about those binary data formats. You'll need to convert most + multibyte values from little endian format to your native host byte + order, although a few of the fields in the device descriptor (both of + the BCD-encoded fields, and the vendor and product IDs) will be + byteswapped for you. Note that configuration descriptors include + descriptors for interfaces, altsettings, endpoints, and maybe additional + class descriptors. + +- *Perform USB operations* using *ioctl()* requests to make endpoint I/O + requests (synchronously or asynchronously) or manage the device. These + requests need the ``CAP_SYS_RAWIO`` capability, as well as filesystem + access permissions. Only one ioctl request can be made on one of these + device files at a time. This means that if you are synchronously reading + an endpoint from one thread, you won't be able to write to a different + endpoint from another thread until the read completes. This works for + *half duplex* protocols, but otherwise you'd use asynchronous i/o + requests. + +Each connected USB device has one file. The ``BBB`` indicates the bus +number. The ``DDD`` indicates the device address on that bus. Both +of these numbers are assigned sequentially, and can be reused, so +you can't rely on them for stable access to devices. For example, +it's relatively common for devices to re-enumerate while they are +still connected (perhaps someone jostled their power supply, hub, +or USB cable), so a device might be ``002/027`` when you first connect +it and ``002/048`` sometime later. + +These files can be read as binary data. The binary data consists +of first the device descriptor, then the descriptors for each +configuration of the device. Multi-byte fields in the device descriptor +are converted to host endianness by the kernel. The configuration +descriptors are in bus endian format! The configuration descriptor +are wTotalLength bytes apart. If a device returns less configuration +descriptor data than indicated by wTotalLength there will be a hole in +the file for the missing bytes. This information is also shown +in text form by the ``/sys/kernel/debug/usb/devices`` file, described later. + +These files may also be used to write user-level drivers for the USB +devices. You would open the ``/dev/bus/usb/BBB/DDD`` file read/write, +read its descriptors to make sure it's the device you expect, and then +bind to an interface (or perhaps several) using an ioctl call. You +would issue more ioctls to the device to communicate to it using +control, bulk, or other kinds of USB transfers. The IOCTLs are +listed in the ```` file, and at this writing the +source code (``linux/drivers/usb/core/devio.c``) is the primary reference +for how to access devices through those files. + +Note that since by default these ``BBB/DDD`` files are writable only by +root, only root can write such user mode drivers. You can selectively +grant read/write permissions to other users by using ``chmod``. Also, +usbfs mount options such as ``devmode=0666`` may be helpful. + + +Life Cycle of User Mode Drivers +------------------------------- + +Such a driver first needs to find a device file for a device it knows +how to handle. Maybe it was told about it because a ``/sbin/hotplug`` +event handling agent chose that driver to handle the new device. Or +maybe it's an application that scans all the ``/dev/bus/usb`` device files, +and ignores most devices. In either case, it should :c:func:`read()` +all the descriptors from the device file, and check them against what it +knows how to handle. It might just reject everything except a particular +vendor and product ID, or need a more complex policy. + +Never assume there will only be one such device on the system at a time! +If your code can't handle more than one device at a time, at least +detect when there's more than one, and have your users choose which +device to use. + +Once your user mode driver knows what device to use, it interacts with +it in either of two styles. The simple style is to make only control +requests; some devices don't need more complex interactions than those. +(An example might be software using vendor-specific control requests for +some initialization or configuration tasks, with a kernel driver for the +rest.) + +More likely, you need a more complex style driver: one using non-control +endpoints, reading or writing data and claiming exclusive use of an +interface. *Bulk* transfers are easiest to use, but only their sibling +*interrupt* transfers work with low speed devices. Both interrupt and +*isochronous* transfers offer service guarantees because their bandwidth +is reserved. Such "periodic" transfers are awkward to use through usbfs, +unless you're using the asynchronous calls. However, interrupt transfers +can also be used in a synchronous "one shot" style. + +Your user-mode driver should never need to worry about cleaning up +request state when the device is disconnected, although it should close +its open file descriptors as soon as it starts seeing the ENODEV errors. + +The ioctl() Requests +-------------------- + +To use these ioctls, you need to include the following headers in your +userspace program:: + + #include + #include + #include + +The standard USB device model requests, from "Chapter 9" of the USB 2.0 +specification, are automatically included from the ```` +header. + +Unless noted otherwise, the ioctl requests described here will update +the modification time on the usbfs file to which they are applied +(unless they fail). A return of zero indicates success; otherwise, a +standard USB error code is returned (These are documented in +:ref:`usb-error-codes`). + +Each of these files multiplexes access to several I/O streams, one per +endpoint. Each device has one control endpoint (endpoint zero) which +supports a limited RPC style RPC access. Devices are configured by +hub_wq (in the kernel) setting a device-wide *configuration* that +affects things like power consumption and basic functionality. The +endpoints are part of USB *interfaces*, which may have *altsettings* +affecting things like which endpoints are available. Many devices only +have a single configuration and interface, so drivers for them will +ignore configurations and altsettings. + +Management/Status Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A number of usbfs requests don't deal very directly with device I/O. +They mostly relate to device management and status. These are all +synchronous requests. + +USBDEVFS_CLAIMINTERFACE + This is used to force usbfs to claim a specific interface, which has + not previously been claimed by usbfs or any other kernel driver. The + ioctl parameter is an integer holding the number of the interface + (bInterfaceNumber from descriptor). + + Note that if your driver doesn't claim an interface before trying to + use one of its endpoints, and no other driver has bound to it, then + the interface is automatically claimed by usbfs. + + This claim will be released by a RELEASEINTERFACE ioctl, or by + closing the file descriptor. File modification time is not updated + by this request. + +USBDEVFS_CONNECTINFO + Says whether the device is lowspeed. The ioctl parameter points to a + structure like this:: + + struct usbdevfs_connectinfo { + unsigned int devnum; + unsigned char slow; + }; + + File modification time is not updated by this request. + + *You can't tell whether a "not slow" device is connected at high + speed (480 MBit/sec) or just full speed (12 MBit/sec).* You should + know the devnum value already, it's the DDD value of the device file + name. + +USBDEVFS_GETDRIVER + Returns the name of the kernel driver bound to a given interface (a + string). Parameter is a pointer to this structure, which is + modified:: + + struct usbdevfs_getdriver { + unsigned int interface; + char driver[USBDEVFS_MAXDRIVERNAME + 1]; + }; + + File modification time is not updated by this request. + +USBDEVFS_IOCTL + Passes a request from userspace through to a kernel driver that has + an ioctl entry in the *struct usb_driver* it registered:: + + struct usbdevfs_ioctl { + int ifno; + int ioctl_code; + void *data; + }; + + /* user mode call looks like this. + * 'request' becomes the driver->ioctl() 'code' parameter. + * the size of 'param' is encoded in 'request', and that data + * is copied to or from the driver->ioctl() 'buf' parameter. + */ + static int + usbdev_ioctl (int fd, int ifno, unsigned request, void *param) + { + struct usbdevfs_ioctl wrapper; + + wrapper.ifno = ifno; + wrapper.ioctl_code = request; + wrapper.data = param; + + return ioctl (fd, USBDEVFS_IOCTL, &wrapper); + } + + File modification time is not updated by this request. + + This request lets kernel drivers talk to user mode code through + filesystem operations even when they don't create a character or + block special device. It's also been used to do things like ask + devices what device special file should be used. Two pre-defined + ioctls are used to disconnect and reconnect kernel drivers, so that + user mode code can completely manage binding and configuration of + devices. + +USBDEVFS_RELEASEINTERFACE + This is used to release the claim usbfs made on interface, either + implicitly or because of a USBDEVFS_CLAIMINTERFACE call, before the + file descriptor is closed. The ioctl parameter is an integer holding + the number of the interface (bInterfaceNumber from descriptor); File + modification time is not updated by this request. + + .. warning:: + + *No security check is made to ensure that the task which made + the claim is the one which is releasing it. This means that user + mode driver may interfere other ones.* + +USBDEVFS_RESETEP + Resets the data toggle value for an endpoint (bulk or interrupt) to + DATA0. The ioctl parameter is an integer endpoint number (1 to 15, + as identified in the endpoint descriptor), with USB_DIR_IN added + if the device's endpoint sends data to the host. + + .. Warning:: + + *Avoid using this request. It should probably be removed.* Using + it typically means the device and driver will lose toggle + synchronization. If you really lost synchronization, you likely + need to completely handshake with the device, using a request + like CLEAR_HALT or SET_INTERFACE. + +USBDEVFS_DROP_PRIVILEGES + This is used to relinquish the ability to do certain operations + which are considered to be privileged on a usbfs file descriptor. + This includes claiming arbitrary interfaces, resetting a device on + which there are currently claimed interfaces from other users, and + issuing USBDEVFS_IOCTL calls. The ioctl parameter is a 32 bit mask + of interfaces the user is allowed to claim on this file descriptor. + You may issue this ioctl more than one time to narrow said mask. + +Synchronous I/O Support +~~~~~~~~~~~~~~~~~~~~~~~ + +Synchronous requests involve the kernel blocking until the user mode +request completes, either by finishing successfully or by reporting an +error. In most cases this is the simplest way to use usbfs, although as +noted above it does prevent performing I/O to more than one endpoint at +a time. + +USBDEVFS_BULK + Issues a bulk read or write request to the device. The ioctl + parameter is a pointer to this structure:: + + struct usbdevfs_bulktransfer { + unsigned int ep; + unsigned int len; + unsigned int timeout; /* in milliseconds */ + void *data; + }; + + The ``ep`` value identifies a bulk endpoint number (1 to 15, as + identified in an endpoint descriptor), masked with USB_DIR_IN when + referring to an endpoint which sends data to the host from the + device. The length of the data buffer is identified by ``len``; Recent + kernels support requests up to about 128KBytes. *FIXME say how read + length is returned, and how short reads are handled.*. + +USBDEVFS_CLEAR_HALT + Clears endpoint halt (stall) and resets the endpoint toggle. This is + only meaningful for bulk or interrupt endpoints. The ioctl parameter + is an integer endpoint number (1 to 15, as identified in an endpoint + descriptor), masked with USB_DIR_IN when referring to an endpoint + which sends data to the host from the device. + + Use this on bulk or interrupt endpoints which have stalled, + returning ``-EPIPE`` status to a data transfer request. Do not issue + the control request directly, since that could invalidate the host's + record of the data toggle. + +USBDEVFS_CONTROL + Issues a control request to the device. The ioctl parameter points + to a structure like this:: + + struct usbdevfs_ctrltransfer { + __u8 bRequestType; + __u8 bRequest; + __u16 wValue; + __u16 wIndex; + __u16 wLength; + __u32 timeout; /* in milliseconds */ + void *data; + }; + + The first eight bytes of this structure are the contents of the + SETUP packet to be sent to the device; see the USB 2.0 specification + for details. The bRequestType value is composed by combining a + ``USB_TYPE_*`` value, a ``USB_DIR_*`` value, and a ``USB_RECIP_*`` + value (from ``linux/usb.h``). If wLength is nonzero, it describes + the length of the data buffer, which is either written to the device + (USB_DIR_OUT) or read from the device (USB_DIR_IN). + + At this writing, you can't transfer more than 4 KBytes of data to or + from a device; usbfs has a limit, and some host controller drivers + have a limit. (That's not usually a problem.) *Also* there's no way + to say it's not OK to get a short read back from the device. + +USBDEVFS_RESET + Does a USB level device reset. The ioctl parameter is ignored. After + the reset, this rebinds all device interfaces. File modification + time is not updated by this request. + +.. warning:: + + *Avoid using this call* until some usbcore bugs get fixed, since + it does not fully synchronize device, interface, and driver (not + just usbfs) state. + +USBDEVFS_SETINTERFACE + Sets the alternate setting for an interface. The ioctl parameter is + a pointer to a structure like this:: + + struct usbdevfs_setinterface { + unsigned int interface; + unsigned int altsetting; + }; + + File modification time is not updated by this request. + + Those struct members are from some interface descriptor applying to + the current configuration. The interface number is the + bInterfaceNumber value, and the altsetting number is the + bAlternateSetting value. (This resets each endpoint in the + interface.) + +USBDEVFS_SETCONFIGURATION + Issues the :c:func:`usb_set_configuration()` call for the + device. The parameter is an integer holding the number of a + configuration (bConfigurationValue from descriptor). File + modification time is not updated by this request. + +.. warning:: + + *Avoid using this call* until some usbcore bugs get fixed, since + it does not fully synchronize device, interface, and driver (not + just usbfs) state. + +Asynchronous I/O Support +~~~~~~~~~~~~~~~~~~~~~~~~ + +As mentioned above, there are situations where it may be important to +initiate concurrent operations from user mode code. This is particularly +important for periodic transfers (interrupt and isochronous), but it can +be used for other kinds of USB requests too. In such cases, the +asynchronous requests described here are essential. Rather than +submitting one request and having the kernel block until it completes, +the blocking is separate. + +These requests are packaged into a structure that resembles the URB used +by kernel device drivers. (No POSIX Async I/O support here, sorry.) It +identifies the endpoint type (``USBDEVFS_URB_TYPE_*``), endpoint +(number, masked with USB_DIR_IN as appropriate), buffer and length, +and a user "context" value serving to uniquely identify each request. +(It's usually a pointer to per-request data.) Flags can modify requests +(not as many as supported for kernel drivers). + +Each request can specify a realtime signal number (between SIGRTMIN and +SIGRTMAX, inclusive) to request a signal be sent when the request +completes. + +When usbfs returns these urbs, the status value is updated, and the +buffer may have been modified. Except for isochronous transfers, the +actual_length is updated to say how many bytes were transferred; if the +USBDEVFS_URB_DISABLE_SPD flag is set ("short packets are not OK"), if +fewer bytes were read than were requested then you get an error report:: + + struct usbdevfs_iso_packet_desc { + unsigned int length; + unsigned int actual_length; + unsigned int status; + }; + + struct usbdevfs_urb { + unsigned char type; + unsigned char endpoint; + int status; + unsigned int flags; + void *buffer; + int buffer_length; + int actual_length; + int start_frame; + int number_of_packets; + int error_count; + unsigned int signr; + void *usercontext; + struct usbdevfs_iso_packet_desc iso_frame_desc[]; + }; + +For these asynchronous requests, the file modification time reflects +when the request was initiated. This contrasts with their use with the +synchronous requests, where it reflects when requests complete. + +USBDEVFS_DISCARDURB + *TBS* File modification time is not updated by this request. + +USBDEVFS_DISCSIGNAL + *TBS* File modification time is not updated by this request. + +USBDEVFS_REAPURB + *TBS* File modification time is not updated by this request. + +USBDEVFS_REAPURBNDELAY + *TBS* File modification time is not updated by this request. + +USBDEVFS_SUBMITURB + *TBS* + +The USB devices +=============== + +The USB devices are now exported via debugfs: + +- ``/sys/kernel/debug/usb/devices`` ... a text file showing each of the USB + devices on known to the kernel, and their configuration descriptors. + You can also poll() this to learn about new devices. + +/sys/kernel/debug/usb/devices +----------------------------- + +This file is handy for status viewing tools in user mode, which can scan +the text format and ignore most of it. More detailed device status +(including class and vendor status) is available from device-specific +files. For information about the current format of this file, see the +``Documentation/usb/proc_usb_info.txt`` file in your Linux kernel +sources. + +This file, in combination with the poll() system call, can also be used +to detect when devices are added or removed:: + + int fd; + struct pollfd pfd; + + fd = open("/sys/kernel/debug/usb/devices", O_RDONLY); + pfd = { fd, POLLIN, 0 }; + for (;;) { + /* The first time through, this call will return immediately. */ + poll(&pfd, 1, -1); + + /* To see what's changed, compare the file's previous and current + contents or scan the filesystem. (Scanning is more precise.) */ + } + +Note that this behavior is intended to be used for informational and +debug purposes. It would be more appropriate to use programs such as +udev or HAL to initialize a device or start a user-mode helper program, +for instance. + +In this file, each device's output has multiple lines of ASCII output. + +I made it ASCII instead of binary on purpose, so that someone +can obtain some useful data from it without the use of an +auxiliary program. However, with an auxiliary program, the numbers +in the first 4 columns of each ``T:`` line (topology info: +Lev, Prnt, Port, Cnt) can be used to build a USB topology diagram. + +Each line is tagged with a one-character ID for that line:: + + T = Topology (etc.) + B = Bandwidth (applies only to USB host controllers, which are + virtualized as root hubs) + D = Device descriptor info. + P = Product ID info. (from Device descriptor, but they won't fit + together on one line) + S = String descriptors. + C = Configuration descriptor info. (* = active configuration) + I = Interface descriptor info. + E = Endpoint descriptor info. + +/sys/kernel/debug/usb/devices output format +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Legend:: + d = decimal number (may have leading spaces or 0's) + x = hexadecimal number (may have leading spaces or 0's) + s = string + + + +Topology info +^^^^^^^^^^^^^ + +:: + + T: Bus=dd Lev=dd Prnt=dd Port=dd Cnt=dd Dev#=ddd Spd=dddd MxCh=dd + | | | | | | | | |__MaxChildren + | | | | | | | |__Device Speed in Mbps + | | | | | | |__DeviceNumber + | | | | | |__Count of devices at this level + | | | | |__Connector/Port on Parent for this device + | | | |__Parent DeviceNumber + | | |__Level in topology for this bus + | |__Bus number + |__Topology info tag + +Speed may be: + + ======= ====================================================== + 1.5 Mbit/s for low speed USB + 12 Mbit/s for full speed USB + 480 Mbit/s for high speed USB (added for USB 2.0); + also used for Wireless USB, which has no fixed speed + 5000 Mbit/s for SuperSpeed USB (added for USB 3.0) + ======= ====================================================== + +For reasons lost in the mists of time, the Port number is always +too low by 1. For example, a device plugged into port 4 will +show up with ``Port=03``. + +Bandwidth info +^^^^^^^^^^^^^^ + +:: + + B: Alloc=ddd/ddd us (xx%), #Int=ddd, #Iso=ddd + | | | |__Number of isochronous requests + | | |__Number of interrupt requests + | |__Total Bandwidth allocated to this bus + |__Bandwidth info tag + +Bandwidth allocation is an approximation of how much of one frame +(millisecond) is in use. It reflects only periodic transfers, which +are the only transfers that reserve bandwidth. Control and bulk +transfers use all other bandwidth, including reserved bandwidth that +is not used for transfers (such as for short packets). + +The percentage is how much of the "reserved" bandwidth is scheduled by +those transfers. For a low or full speed bus (loosely, "USB 1.1"), +90% of the bus bandwidth is reserved. For a high speed bus (loosely, +"USB 2.0") 80% is reserved. + + +Device descriptor info & Product ID info +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + D: Ver=x.xx Cls=xx(s) Sub=xx Prot=xx MxPS=dd #Cfgs=dd + P: Vendor=xxxx ProdID=xxxx Rev=xx.xx + +where:: + + D: Ver=x.xx Cls=xx(sssss) Sub=xx Prot=xx MxPS=dd #Cfgs=dd + | | | | | | |__NumberConfigurations + | | | | | |__MaxPacketSize of Default Endpoint + | | | | |__DeviceProtocol + | | | |__DeviceSubClass + | | |__DeviceClass + | |__Device USB version + |__Device info tag #1 + +where:: + + P: Vendor=xxxx ProdID=xxxx Rev=xx.xx + | | | |__Product revision number + | | |__Product ID code + | |__Vendor ID code + |__Device info tag #2 + + +String descriptor info +^^^^^^^^^^^^^^^^^^^^^^ +:: + + S: Manufacturer=ssss + | |__Manufacturer of this device as read from the device. + | For USB host controller drivers (virtual root hubs) this may + | be omitted, or (for newer drivers) will identify the kernel + | version and the driver which provides this hub emulation. + |__String info tag + + S: Product=ssss + | |__Product description of this device as read from the device. + | For older USB host controller drivers (virtual root hubs) this + | indicates the driver; for newer ones, it's a product (and vendor) + | description that often comes from the kernel's PCI ID database. + |__String info tag + + S: SerialNumber=ssss + | |__Serial Number of this device as read from the device. + | For USB host controller drivers (virtual root hubs) this is + | some unique ID, normally a bus ID (address or slot name) that + | can't be shared with any other device. + |__String info tag + + + +Configuration descriptor info +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + C:* #Ifs=dd Cfg#=dd Atr=xx MPwr=dddmA + | | | | | |__MaxPower in mA + | | | | |__Attributes + | | | |__ConfiguratioNumber + | | |__NumberOfInterfaces + | |__ "*" indicates the active configuration (others are " ") + |__Config info tag + +USB devices may have multiple configurations, each of which act +rather differently. For example, a bus-powered configuration +might be much less capable than one that is self-powered. Only +one device configuration can be active at a time; most devices +have only one configuration. + +Each configuration consists of one or more interfaces. Each +interface serves a distinct "function", which is typically bound +to a different USB device driver. One common example is a USB +speaker with an audio interface for playback, and a HID interface +for use with software volume control. + +Interface descriptor info (can be multiple per Config) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + I:* If#=dd Alt=dd #EPs=dd Cls=xx(sssss) Sub=xx Prot=xx Driver=ssss + | | | | | | | | |__Driver name + | | | | | | | | or "(none)" + | | | | | | | |__InterfaceProtocol + | | | | | | |__InterfaceSubClass + | | | | | |__InterfaceClass + | | | | |__NumberOfEndpoints + | | | |__AlternateSettingNumber + | | |__InterfaceNumber + | |__ "*" indicates the active altsetting (others are " ") + |__Interface info tag + +A given interface may have one or more "alternate" settings. +For example, default settings may not use more than a small +amount of periodic bandwidth. To use significant fractions +of bus bandwidth, drivers must select a non-default altsetting. + +Only one setting for an interface may be active at a time, and +only one driver may bind to an interface at a time. Most devices +have only one alternate setting per interface. + + +Endpoint descriptor info (can be multiple per Interface) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + E: Ad=xx(s) Atr=xx(ssss) MxPS=dddd Ivl=dddss + | | | | |__Interval (max) between transfers + | | | |__EndpointMaxPacketSize + | | |__Attributes(EndpointType) + | |__EndpointAddress(I=In,O=Out) + |__Endpoint info tag + +The interval is nonzero for all periodic (interrupt or isochronous) +endpoints. For high speed endpoints the transfer interval may be +measured in microseconds rather than milliseconds. + +For high speed periodic endpoints, the ``EndpointMaxPacketSize`` reflects +the per-microframe data transfer size. For "high bandwidth" +endpoints, that can reflect two or three packets (for up to +3KBytes every 125 usec) per endpoint. + +With the Linux-USB stack, periodic bandwidth reservations use the +transfer intervals and sizes provided by URBs, which can be less +than those found in endpoint descriptor. + +Usage examples +~~~~~~~~~~~~~~ + +If a user or script is interested only in Topology info, for +example, use something like ``grep ^T: /sys/kernel/debug/usb/devices`` +for only the Topology lines. A command like +``grep -i ^[tdp]: /sys/kernel/debug/usb/devices`` can be used to list +only the lines that begin with the characters in square brackets, +where the valid characters are TDPCIE. With a slightly more able +script, it can display any selected lines (for example, only T, D, +and P lines) and change their output format. (The ``procusb`` +Perl script is the beginning of this idea. It will list only +selected lines [selected from TBDPSCIE] or "All" lines from +``/sys/kernel/debug/usb/devices``.) + +The Topology lines can be used to generate a graphic/pictorial +of the USB devices on a system's root hub. (See more below +on how to do this.) + +The Interface lines can be used to determine what driver is +being used for each device, and which altsetting it activated. + +The Configuration lines could be used to list maximum power +(in milliamps) that a system's USB devices are using. +For example, ``grep ^C: /sys/kernel/debug/usb/devices``. + + +Here's an example, from a system which has a UHCI root hub, +an external hub connected to the root hub, and a mouse and +a serial converter connected to the external hub. + +:: + + T: Bus=00 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2 + B: Alloc= 28/900 us ( 3%), #Int= 2, #Iso= 0 + D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0000 ProdID=0000 Rev= 0.00 + S: Product=USB UHCI Root Hub + S: SerialNumber=dce0 + C:* #Ifs= 1 Cfg#= 1 Atr=40 MxPwr= 0mA + I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub + E: Ad=81(I) Atr=03(Int.) MxPS= 8 Ivl=255ms + + T: Bus=00 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 4 + D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0451 ProdID=1446 Rev= 1.00 + C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub + E: Ad=81(I) Atr=03(Int.) MxPS= 1 Ivl=255ms + + T: Bus=00 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=1.5 MxCh= 0 + D: Ver= 1.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=04b4 ProdID=0001 Rev= 0.00 + C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=mouse + E: Ad=81(I) Atr=03(Int.) MxPS= 3 Ivl= 10ms + + T: Bus=00 Lev=02 Prnt=02 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 + D: Ver= 1.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0565 ProdID=0001 Rev= 1.08 + S: Manufacturer=Peracom Networks, Inc. + S: Product=Peracom USB to Serial Converter + C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 3 Cls=00(>ifc ) Sub=00 Prot=00 Driver=serial + E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl= 16ms + E: Ad=01(O) Atr=02(Bulk) MxPS= 16 Ivl= 16ms + E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl= 8ms + + +Selecting only the ``T:`` and ``I:`` lines from this (for example, by using +``procusb ti``), we have + +:: + + T: Bus=00 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2 + T: Bus=00 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 4 + I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub + T: Bus=00 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=1.5 MxCh= 0 + I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=mouse + T: Bus=00 Lev=02 Prnt=02 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 + I: If#= 0 Alt= 0 #EPs= 3 Cls=00(>ifc ) Sub=00 Prot=00 Driver=serial + + +Physically this looks like (or could be converted to):: + + +------------------+ + | PC/root_hub (12)| Dev# = 1 + +------------------+ (nn) is Mbps. + Level 0 | CN.0 | CN.1 | [CN = connector/port #] + +------------------+ + / + / + +-----------------------+ + Level 1 | Dev#2: 4-port hub (12)| + +-----------------------+ + |CN.0 |CN.1 |CN.2 |CN.3 | + +-----------------------+ + \ \____________________ + \_____ \ + \ \ + +--------------------+ +--------------------+ + Level 2 | Dev# 3: mouse (1.5)| | Dev# 4: serial (12)| + +--------------------+ +--------------------+ + + + +Or, in a more tree-like structure (ports [Connectors] without +connections could be omitted):: + + PC: Dev# 1, root hub, 2 ports, 12 Mbps + |_ CN.0: Dev# 2, hub, 4 ports, 12 Mbps + |_ CN.0: Dev #3, mouse, 1.5 Mbps + |_ CN.1: + |_ CN.2: Dev #4, serial, 12 Mbps + |_ CN.3: + |_ CN.1: diff --git a/Documentation/driver-api/usb/writing_musb_glue_layer.rst b/Documentation/driver-api/usb/writing_musb_glue_layer.rst new file mode 100644 index 0000000000000000000000000000000000000000..e90e8fa95600fe0eb2a2260fb0c6ac1723762652 --- /dev/null +++ b/Documentation/driver-api/usb/writing_musb_glue_layer.rst @@ -0,0 +1,723 @@ +========================= +Writing a MUSB Glue Layer +========================= + +:Author: Apelete Seketeli + +Introduction +============ + +The Linux MUSB subsystem is part of the larger Linux USB subsystem. It +provides support for embedded USB Device Controllers (UDC) that do not +use Universal Host Controller Interface (UHCI) or Open Host Controller +Interface (OHCI). + +Instead, these embedded UDC rely on the USB On-the-Go (OTG) +specification which they implement at least partially. The silicon +reference design used in most cases is the Multipoint USB Highspeed +Dual-Role Controller (MUSB HDRC) found in the Mentor Graphics Inventra™ +design. + +As a self-taught exercise I have written an MUSB glue layer for the +Ingenic JZ4740 SoC, modelled after the many MUSB glue layers in the +kernel source tree. This layer can be found at +``drivers/usb/musb/jz4740.c``. In this documentation I will walk through the +basics of the ``jz4740.c`` glue layer, explaining the different pieces and +what needs to be done in order to write your own device glue layer. + +.. _musb-basics: + +Linux MUSB Basics +================= + +To get started on the topic, please read USB On-the-Go Basics (see +Resources) which provides an introduction of USB OTG operation at the +hardware level. A couple of wiki pages by Texas Instruments and Analog +Devices also provide an overview of the Linux kernel MUSB configuration, +albeit focused on some specific devices provided by these companies. +Finally, getting acquainted with the USB specification at USB home page +may come in handy, with practical instance provided through the Writing +USB Device Drivers documentation (again, see Resources). + +Linux USB stack is a layered architecture in which the MUSB controller +hardware sits at the lowest. The MUSB controller driver abstract the +MUSB controller hardware to the Linux USB stack:: + + ------------------------ + | | <------- drivers/usb/gadget + | Linux USB Core Stack | <------- drivers/usb/host + | | <------- drivers/usb/core + ------------------------ + ⬍ + -------------------------- + | | <------ drivers/usb/musb/musb_gadget.c + | MUSB Controller driver | <------ drivers/usb/musb/musb_host.c + | | <------ drivers/usb/musb/musb_core.c + -------------------------- + ⬍ + --------------------------------- + | MUSB Platform Specific Driver | + | | <-- drivers/usb/musb/jz4740.c + | aka "Glue Layer" | + --------------------------------- + ⬍ + --------------------------------- + | MUSB Controller Hardware | + --------------------------------- + +As outlined above, the glue layer is actually the platform specific code +sitting in between the controller driver and the controller hardware. + +Just like a Linux USB driver needs to register itself with the Linux USB +subsystem, the MUSB glue layer needs first to register itself with the +MUSB controller driver. This will allow the controller driver to know +about which device the glue layer supports and which functions to call +when a supported device is detected or released; remember we are talking +about an embedded controller chip here, so no insertion or removal at +run-time. + +All of this information is passed to the MUSB controller driver through +a :c:type:`platform_driver` structure defined in the glue layer as:: + + static struct platform_driver jz4740_driver = { + .probe = jz4740_probe, + .remove = jz4740_remove, + .driver = { + .name = "musb-jz4740", + }, + }; + +The probe and remove function pointers are called when a matching device +is detected and, respectively, released. The name string describes the +device supported by this glue layer. In the current case it matches a +platform_device structure declared in ``arch/mips/jz4740/platform.c``. Note +that we are not using device tree bindings here. + +In order to register itself to the controller driver, the glue layer +goes through a few steps, basically allocating the controller hardware +resources and initialising a couple of circuits. To do so, it needs to +keep track of the information used throughout these steps. This is done +by defining a private ``jz4740_glue`` structure:: + + struct jz4740_glue { + struct device *dev; + struct platform_device *musb; + struct clk *clk; + }; + + +The dev and musb members are both device structure variables. The first +one holds generic information about the device, since it's the basic +device structure, and the latter holds information more closely related +to the subsystem the device is registered to. The clk variable keeps +information related to the device clock operation. + +Let's go through the steps of the probe function that leads the glue +layer to register itself to the controller driver. + +.. note:: + + For the sake of readability each function will be split in logical + parts, each part being shown as if it was independent from the others. + +.. code-block:: c + :emphasize-lines: 8,12,18 + + static int jz4740_probe(struct platform_device *pdev) + { + struct platform_device *musb; + struct jz4740_glue *glue; + struct clk *clk; + int ret; + + glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL); + if (!glue) + return -ENOMEM; + + musb = platform_device_alloc("musb-hdrc", PLATFORM_DEVID_AUTO); + if (!musb) { + dev_err(&pdev->dev, "failed to allocate musb device\n"); + return -ENOMEM; + } + + clk = devm_clk_get(&pdev->dev, "udc"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + ret = PTR_ERR(clk); + goto err_platform_device_put; + } + + ret = clk_prepare_enable(clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable clock\n"); + goto err_platform_device_put; + } + + musb->dev.parent = &pdev->dev; + + glue->dev = &pdev->dev; + glue->musb = musb; + glue->clk = clk; + + return 0; + + err_platform_device_put: + platform_device_put(musb); + return ret; + } + +The first few lines of the probe function allocate and assign the glue, +musb and clk variables. The ``GFP_KERNEL`` flag (line 8) allows the +allocation process to sleep and wait for memory, thus being usable in a +locking situation. The ``PLATFORM_DEVID_AUTO`` flag (line 12) allows +automatic allocation and management of device IDs in order to avoid +device namespace collisions with explicit IDs. With :c:func:`devm_clk_get` +(line 18) the glue layer allocates the clock -- the ``devm_`` prefix +indicates that :c:func:`clk_get` is managed: it automatically frees the +allocated clock resource data when the device is released -- and enable +it. + + + +Then comes the registration steps: + +.. code-block:: c + :emphasize-lines: 3,5,7,9,16 + + static int jz4740_probe(struct platform_device *pdev) + { + struct musb_hdrc_platform_data *pdata = &jz4740_musb_platform_data; + + pdata->platform_ops = &jz4740_musb_ops; + + platform_set_drvdata(pdev, glue); + + ret = platform_device_add_resources(musb, pdev->resource, + pdev->num_resources); + if (ret) { + dev_err(&pdev->dev, "failed to add resources\n"); + goto err_clk_disable; + } + + ret = platform_device_add_data(musb, pdata, sizeof(*pdata)); + if (ret) { + dev_err(&pdev->dev, "failed to add platform_data\n"); + goto err_clk_disable; + } + + return 0; + + err_clk_disable: + clk_disable_unprepare(clk); + err_platform_device_put: + platform_device_put(musb); + return ret; + } + +The first step is to pass the device data privately held by the glue +layer on to the controller driver through :c:func:`platform_set_drvdata` +(line 7). Next is passing on the device resources information, also privately +held at that point, through :c:func:`platform_device_add_resources` (line 9). + +Finally comes passing on the platform specific data to the controller +driver (line 16). Platform data will be discussed in +:ref:`musb-dev-platform-data`, but here we are looking at the +``platform_ops`` function pointer (line 5) in ``musb_hdrc_platform_data`` +structure (line 3). This function pointer allows the MUSB controller +driver to know which function to call for device operation:: + + static const struct musb_platform_ops jz4740_musb_ops = { + .init = jz4740_musb_init, + .exit = jz4740_musb_exit, + }; + +Here we have the minimal case where only init and exit functions are +called by the controller driver when needed. Fact is the JZ4740 MUSB +controller is a basic controller, lacking some features found in other +controllers, otherwise we may also have pointers to a few other +functions like a power management function or a function to switch +between OTG and non-OTG modes, for instance. + +At that point of the registration process, the controller driver +actually calls the init function: + + .. code-block:: c + :emphasize-lines: 12,14 + + static int jz4740_musb_init(struct musb *musb) + { + musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); + if (!musb->xceiv) { + pr_err("HS UDC: no transceiver configured\n"); + return -ENODEV; + } + + /* Silicon does not implement ConfigData register. + * Set dyn_fifo to avoid reading EP config from hardware. + */ + musb->dyn_fifo = true; + + musb->isr = jz4740_musb_interrupt; + + return 0; + } + +The goal of ``jz4740_musb_init()`` is to get hold of the transceiver +driver data of the MUSB controller hardware and pass it on to the MUSB +controller driver, as usual. The transceiver is the circuitry inside the +controller hardware responsible for sending/receiving the USB data. +Since it is an implementation of the physical layer of the OSI model, +the transceiver is also referred to as PHY. + +Getting hold of the ``MUSB PHY`` driver data is done with ``usb_get_phy()`` +which returns a pointer to the structure containing the driver instance +data. The next couple of instructions (line 12 and 14) are used as a +quirk and to setup IRQ handling respectively. Quirks and IRQ handling +will be discussed later in :ref:`musb-dev-quirks` and +:ref:`musb-handling-irqs`\ :: + + static int jz4740_musb_exit(struct musb *musb) + { + usb_put_phy(musb->xceiv); + + return 0; + } + +Acting as the counterpart of init, the exit function releases the MUSB +PHY driver when the controller hardware itself is about to be released. + +Again, note that init and exit are fairly simple in this case due to the +basic set of features of the JZ4740 controller hardware. When writing an +musb glue layer for a more complex controller hardware, you might need +to take care of more processing in those two functions. + +Returning from the init function, the MUSB controller driver jumps back +into the probe function:: + + static int jz4740_probe(struct platform_device *pdev) + { + ret = platform_device_add(musb); + if (ret) { + dev_err(&pdev->dev, "failed to register musb device\n"); + goto err_clk_disable; + } + + return 0; + + err_clk_disable: + clk_disable_unprepare(clk); + err_platform_device_put: + platform_device_put(musb); + return ret; + } + +This is the last part of the device registration process where the glue +layer adds the controller hardware device to Linux kernel device +hierarchy: at this stage, all known information about the device is +passed on to the Linux USB core stack: + + .. code-block:: c + :emphasize-lines: 5,6 + + static int jz4740_remove(struct platform_device *pdev) + { + struct jz4740_glue *glue = platform_get_drvdata(pdev); + + platform_device_unregister(glue->musb); + clk_disable_unprepare(glue->clk); + + return 0; + } + +Acting as the counterpart of probe, the remove function unregister the +MUSB controller hardware (line 5) and disable the clock (line 6), +allowing it to be gated. + +.. _musb-handling-irqs: + +Handling IRQs +============= + +Additionally to the MUSB controller hardware basic setup and +registration, the glue layer is also responsible for handling the IRQs: + + .. code-block:: c + :emphasize-lines: 7,9-11,14,24 + + static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) + { + unsigned long flags; + irqreturn_t retval = IRQ_NONE; + struct musb *musb = __hci; + + spin_lock_irqsave(&musb->lock, flags); + + musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB); + musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX); + musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX); + + /* + * The controller is gadget only, the state of the host mode IRQ bits is + * undefined. Mask them to make sure that the musb driver core will + * never see them set + */ + musb->int_usb &= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME | + MUSB_INTR_RESET | MUSB_INTR_SOF; + + if (musb->int_usb || musb->int_tx || musb->int_rx) + retval = musb_interrupt(musb); + + spin_unlock_irqrestore(&musb->lock, flags); + + return retval; + } + +Here the glue layer mostly has to read the relevant hardware registers +and pass their values on to the controller driver which will handle the +actual event that triggered the IRQ. + +The interrupt handler critical section is protected by the +:c:func:`spin_lock_irqsave` and counterpart :c:func:`spin_unlock_irqrestore` +functions (line 7 and 24 respectively), which prevent the interrupt +handler code to be run by two different threads at the same time. + +Then the relevant interrupt registers are read (line 9 to 11): + +- ``MUSB_INTRUSB``: indicates which USB interrupts are currently active, + +- ``MUSB_INTRTX``: indicates which of the interrupts for TX endpoints are + currently active, + +- ``MUSB_INTRRX``: indicates which of the interrupts for TX endpoints are + currently active. + +Note that :c:func:`musb_readb` is used to read 8-bit registers at most, while +:c:func:`musb_readw` allows us to read at most 16-bit registers. There are +other functions that can be used depending on the size of your device +registers. See ``musb_io.h`` for more information. + +Instruction on line 18 is another quirk specific to the JZ4740 USB +device controller, which will be discussed later in :ref:`musb-dev-quirks`. + +The glue layer still needs to register the IRQ handler though. Remember +the instruction on line 14 of the init function:: + + static int jz4740_musb_init(struct musb *musb) + { + musb->isr = jz4740_musb_interrupt; + + return 0; + } + +This instruction sets a pointer to the glue layer IRQ handler function, +in order for the controller hardware to call the handler back when an +IRQ comes from the controller hardware. The interrupt handler is now +implemented and registered. + +.. _musb-dev-platform-data: + +Device Platform Data +==================== + +In order to write an MUSB glue layer, you need to have some data +describing the hardware capabilities of your controller hardware, which +is called the platform data. + +Platform data is specific to your hardware, though it may cover a broad +range of devices, and is generally found somewhere in the ``arch/`` +directory, depending on your device architecture. + +For instance, platform data for the JZ4740 SoC is found in +``arch/mips/jz4740/platform.c``. In the ``platform.c`` file each device of the +JZ4740 SoC is described through a set of structures. + +Here is the part of ``arch/mips/jz4740/platform.c`` that covers the USB +Device Controller (UDC): + + .. code-block:: c + :emphasize-lines: 2,7,14-17,21,22,25,26,28,29 + + /* USB Device Controller */ + struct platform_device jz4740_udc_xceiv_device = { + .name = "usb_phy_gen_xceiv", + .id = 0, + }; + + static struct resource jz4740_udc_resources[] = { + [0] = { + .start = JZ4740_UDC_BASE_ADDR, + .end = JZ4740_UDC_BASE_ADDR + 0x10000 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = JZ4740_IRQ_UDC, + .end = JZ4740_IRQ_UDC, + .flags = IORESOURCE_IRQ, + .name = "mc", + }, + }; + + struct platform_device jz4740_udc_device = { + .name = "musb-jz4740", + .id = -1, + .dev = { + .dma_mask = &jz4740_udc_device.dev.coherent_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, + .num_resources = ARRAY_SIZE(jz4740_udc_resources), + .resource = jz4740_udc_resources, + }; + +The ``jz4740_udc_xceiv_device`` platform device structure (line 2) +describes the UDC transceiver with a name and id number. + +At the time of this writing, note that ``usb_phy_gen_xceiv`` is the +specific name to be used for all transceivers that are either built-in +with reference USB IP or autonomous and doesn't require any PHY +programming. You will need to set ``CONFIG_NOP_USB_XCEIV=y`` in the +kernel configuration to make use of the corresponding transceiver +driver. The id field could be set to -1 (equivalent to +``PLATFORM_DEVID_NONE``), -2 (equivalent to ``PLATFORM_DEVID_AUTO``) or +start with 0 for the first device of this kind if we want a specific id +number. + +The ``jz4740_udc_resources`` resource structure (line 7) defines the UDC +registers base addresses. + +The first array (line 9 to 11) defines the UDC registers base memory +addresses: start points to the first register memory address, end points +to the last register memory address and the flags member defines the +type of resource we are dealing with. So ``IORESOURCE_MEM`` is used to +define the registers memory addresses. The second array (line 14 to 17) +defines the UDC IRQ registers addresses. Since there is only one IRQ +register available for the JZ4740 UDC, start and end point at the same +address. The ``IORESOURCE_IRQ`` flag tells that we are dealing with IRQ +resources, and the name ``mc`` is in fact hard-coded in the MUSB core in +order for the controller driver to retrieve this IRQ resource by +querying it by its name. + +Finally, the ``jz4740_udc_device`` platform device structure (line 21) +describes the UDC itself. + +The ``musb-jz4740`` name (line 22) defines the MUSB driver that is used +for this device; remember this is in fact the name that we used in the +``jz4740_driver`` platform driver structure in :ref:`musb-basics`. +The id field (line 23) is set to -1 (equivalent to ``PLATFORM_DEVID_NONE``) +since we do not need an id for the device: the MUSB controller driver was +already set to allocate an automatic id in :ref:`musb-basics`. In the dev field +we care for DMA related information here. The ``dma_mask`` field (line 25) +defines the width of the DMA mask that is going to be used, and +``coherent_dma_mask`` (line 26) has the same purpose but for the +``alloc_coherent`` DMA mappings: in both cases we are using a 32 bits mask. +Then the resource field (line 29) is simply a pointer to the resource +structure defined before, while the ``num_resources`` field (line 28) keeps +track of the number of arrays defined in the resource structure (in this +case there were two resource arrays defined before). + +With this quick overview of the UDC platform data at the ``arch/`` level now +done, let's get back to the MUSB glue layer specific platform data in +``drivers/usb/musb/jz4740.c``: + + .. code-block:: c + :emphasize-lines: 3,5,7-9,11 + + static struct musb_hdrc_config jz4740_musb_config = { + /* Silicon does not implement USB OTG. */ + .multipoint = 0, + /* Max EPs scanned, driver will decide which EP can be used. */ + .num_eps = 4, + /* RAMbits needed to configure EPs from table */ + .ram_bits = 9, + .fifo_cfg = jz4740_musb_fifo_cfg, + .fifo_cfg_size = ARRAY_SIZE(jz4740_musb_fifo_cfg), + }; + + static struct musb_hdrc_platform_data jz4740_musb_platform_data = { + .mode = MUSB_PERIPHERAL, + .config = &jz4740_musb_config, + }; + +First the glue layer configures some aspects of the controller driver +operation related to the controller hardware specifics. This is done +through the ``jz4740_musb_config`` :c:type:`musb_hdrc_config` structure. + +Defining the OTG capability of the controller hardware, the multipoint +member (line 3) is set to 0 (equivalent to false) since the JZ4740 UDC +is not OTG compatible. Then ``num_eps`` (line 5) defines the number of USB +endpoints of the controller hardware, including endpoint 0: here we have +3 endpoints + endpoint 0. Next is ``ram_bits`` (line 7) which is the width +of the RAM address bus for the MUSB controller hardware. This +information is needed when the controller driver cannot automatically +configure endpoints by reading the relevant controller hardware +registers. This issue will be discussed when we get to device quirks in +:ref:`musb-dev-quirks`. Last two fields (line 8 and 9) are also +about device quirks: ``fifo_cfg`` points to the USB endpoints configuration +table and ``fifo_cfg_size`` keeps track of the size of the number of +entries in that configuration table. More on that later in +:ref:`musb-dev-quirks`. + +Then this configuration is embedded inside ``jz4740_musb_platform_data`` +:c:type:`musb_hdrc_platform_data` structure (line 11): config is a pointer to +the configuration structure itself, and mode tells the controller driver +if the controller hardware may be used as ``MUSB_HOST`` only, +``MUSB_PERIPHERAL`` only or ``MUSB_OTG`` which is a dual mode. + +Remember that ``jz4740_musb_platform_data`` is then used to convey +platform data information as we have seen in the probe function in +:ref:`musb-basics`. + +.. _musb-dev-quirks: + +Device Quirks +============= + +Completing the platform data specific to your device, you may also need +to write some code in the glue layer to work around some device specific +limitations. These quirks may be due to some hardware bugs, or simply be +the result of an incomplete implementation of the USB On-the-Go +specification. + +The JZ4740 UDC exhibits such quirks, some of which we will discuss here +for the sake of insight even though these might not be found in the +controller hardware you are working on. + +Let's get back to the init function first: + + .. code-block:: c + :emphasize-lines: 12 + + static int jz4740_musb_init(struct musb *musb) + { + musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2); + if (!musb->xceiv) { + pr_err("HS UDC: no transceiver configured\n"); + return -ENODEV; + } + + /* Silicon does not implement ConfigData register. + * Set dyn_fifo to avoid reading EP config from hardware. + */ + musb->dyn_fifo = true; + + musb->isr = jz4740_musb_interrupt; + + return 0; + } + +Instruction on line 12 helps the MUSB controller driver to work around +the fact that the controller hardware is missing registers that are used +for USB endpoints configuration. + +Without these registers, the controller driver is unable to read the +endpoints configuration from the hardware, so we use line 12 instruction +to bypass reading the configuration from silicon, and rely on a +hard-coded table that describes the endpoints configuration instead:: + + static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = { + { .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, }, + { .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, }, + { .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, }, + }; + +Looking at the configuration table above, we see that each endpoints is +described by three fields: ``hw_ep_num`` is the endpoint number, style is +its direction (either ``FIFO_TX`` for the controller driver to send packets +in the controller hardware, or ``FIFO_RX`` to receive packets from +hardware), and maxpacket defines the maximum size of each data packet +that can be transmitted over that endpoint. Reading from the table, the +controller driver knows that endpoint 1 can be used to send and receive +USB data packets of 512 bytes at once (this is in fact a bulk in/out +endpoint), and endpoint 2 can be used to send data packets of 64 bytes +at once (this is in fact an interrupt endpoint). + +Note that there is no information about endpoint 0 here: that one is +implemented by default in every silicon design, with a predefined +configuration according to the USB specification. For more examples of +endpoint configuration tables, see ``musb_core.c``. + +Let's now get back to the interrupt handler function: + + .. code-block:: c + :emphasize-lines: 18-19 + + static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci) + { + unsigned long flags; + irqreturn_t retval = IRQ_NONE; + struct musb *musb = __hci; + + spin_lock_irqsave(&musb->lock, flags); + + musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB); + musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX); + musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX); + + /* + * The controller is gadget only, the state of the host mode IRQ bits is + * undefined. Mask them to make sure that the musb driver core will + * never see them set + */ + musb->int_usb &= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME | + MUSB_INTR_RESET | MUSB_INTR_SOF; + + if (musb->int_usb || musb->int_tx || musb->int_rx) + retval = musb_interrupt(musb); + + spin_unlock_irqrestore(&musb->lock, flags); + + return retval; + } + +Instruction on line 18 above is a way for the controller driver to work +around the fact that some interrupt bits used for USB host mode +operation are missing in the ``MUSB_INTRUSB`` register, thus left in an +undefined hardware state, since this MUSB controller hardware is used in +peripheral mode only. As a consequence, the glue layer masks these +missing bits out to avoid parasite interrupts by doing a logical AND +operation between the value read from ``MUSB_INTRUSB`` and the bits that +are actually implemented in the register. + +These are only a couple of the quirks found in the JZ4740 USB device +controller. Some others were directly addressed in the MUSB core since +the fixes were generic enough to provide a better handling of the issues +for others controller hardware eventually. + +Conclusion +========== + +Writing a Linux MUSB glue layer should be a more accessible task, as +this documentation tries to show the ins and outs of this exercise. + +The JZ4740 USB device controller being fairly simple, I hope its glue +layer serves as a good example for the curious mind. Used with the +current MUSB glue layers, this documentation should provide enough +guidance to get started; should anything gets out of hand, the linux-usb +mailing list archive is another helpful resource to browse through. + +Acknowledgements +================ + +Many thanks to Lars-Peter Clausen and Maarten ter Huurne for answering +my questions while I was writing the JZ4740 glue layer and for helping +me out getting the code in good shape. + +I would also like to thank the Qi-Hardware community at large for its +cheerful guidance and support. + +Resources +========= + +USB Home Page: http://www.usb.org + +linux-usb Mailing List Archives: http://marc.info/?l=linux-usb + +USB On-the-Go Basics: +http://www.maximintegrated.com/app-notes/index.mvp/id/1822 + +:ref:`Writing USB Device Drivers ` + +Texas Instruments USB Configuration Wiki Page: +http://processors.wiki.ti.com/index.php/Usbgeneralpage + +Analog Devices Blackfin MUSB Configuration: +http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:musb diff --git a/Documentation/driver-api/usb/writing_usb_driver.rst b/Documentation/driver-api/usb/writing_usb_driver.rst new file mode 100644 index 0000000000000000000000000000000000000000..69f077dcdb785c7265064010eafeb9887feca2b4 --- /dev/null +++ b/Documentation/driver-api/usb/writing_usb_driver.rst @@ -0,0 +1,326 @@ +.. _writing-usb-driver: + +========================== +Writing USB Device Drivers +========================== + +:Author: Greg Kroah-Hartman + +Introduction +============ + +The Linux USB subsystem has grown from supporting only two different +types of devices in the 2.2.7 kernel (mice and keyboards), to over 20 +different types of devices in the 2.4 kernel. Linux currently supports +almost all USB class devices (standard types of devices like keyboards, +mice, modems, printers and speakers) and an ever-growing number of +vendor-specific devices (such as USB to serial converters, digital +cameras, Ethernet devices and MP3 players). For a full list of the +different USB devices currently supported, see Resources. + +The remaining kinds of USB devices that do not have support on Linux are +almost all vendor-specific devices. Each vendor decides to implement a +custom protocol to talk to their device, so a custom driver usually +needs to be created. Some vendors are open with their USB protocols and +help with the creation of Linux drivers, while others do not publish +them, and developers are forced to reverse-engineer. See Resources for +some links to handy reverse-engineering tools. + +Because each different protocol causes a new driver to be created, I +have written a generic USB driver skeleton, modelled after the +pci-skeleton.c file in the kernel source tree upon which many PCI +network drivers have been based. This USB skeleton can be found at +drivers/usb/usb-skeleton.c in the kernel source tree. In this article I +will walk through the basics of the skeleton driver, explaining the +different pieces and what needs to be done to customize it to your +specific device. + +Linux USB Basics +================ + +If you are going to write a Linux USB driver, please become familiar +with the USB protocol specification. It can be found, along with many +other useful documents, at the USB home page (see Resources). An +excellent introduction to the Linux USB subsystem can be found at the +USB Working Devices List (see Resources). It explains how the Linux USB +subsystem is structured and introduces the reader to the concept of USB +urbs (USB Request Blocks), which are essential to USB drivers. + +The first thing a Linux USB driver needs to do is register itself with +the Linux USB subsystem, giving it some information about which devices +the driver supports and which functions to call when a device supported +by the driver is inserted or removed from the system. All of this +information is passed to the USB subsystem in the :c:type:`usb_driver` +structure. The skeleton driver declares a :c:type:`usb_driver` as:: + + static struct usb_driver skel_driver = { + .name = "skeleton", + .probe = skel_probe, + .disconnect = skel_disconnect, + .fops = &skel_fops, + .minor = USB_SKEL_MINOR_BASE, + .id_table = skel_table, + }; + + +The variable name is a string that describes the driver. It is used in +informational messages printed to the system log. The probe and +disconnect function pointers are called when a device that matches the +information provided in the ``id_table`` variable is either seen or +removed. + +The fops and minor variables are optional. Most USB drivers hook into +another kernel subsystem, such as the SCSI, network or TTY subsystem. +These types of drivers register themselves with the other kernel +subsystem, and any user-space interactions are provided through that +interface. But for drivers that do not have a matching kernel subsystem, +such as MP3 players or scanners, a method of interacting with user space +is needed. The USB subsystem provides a way to register a minor device +number and a set of :c:type:`file_operations` function pointers that enable +this user-space interaction. The skeleton driver needs this kind of +interface, so it provides a minor starting number and a pointer to its +:c:type:`file_operations` functions. + +The USB driver is then registered with a call to :c:func:`usb_register`, +usually in the driver's init function, as shown here:: + + static int __init usb_skel_init(void) + { + int result; + + /* register this driver with the USB subsystem */ + result = usb_register(&skel_driver); + if (result < 0) { + err("usb_register failed for the "__FILE__ "driver." + "Error number %d", result); + return -1; + } + + return 0; + } + module_init(usb_skel_init); + + +When the driver is unloaded from the system, it needs to deregister +itself with the USB subsystem. This is done with the :c:func:`usb_deregister` +function:: + + static void __exit usb_skel_exit(void) + { + /* deregister this driver with the USB subsystem */ + usb_deregister(&skel_driver); + } + module_exit(usb_skel_exit); + + +To enable the linux-hotplug system to load the driver automatically when +the device is plugged in, you need to create a ``MODULE_DEVICE_TABLE``. +The following code tells the hotplug scripts that this module supports a +single device with a specific vendor and product ID:: + + /* table of devices that work with this driver */ + static struct usb_device_id skel_table [] = { + { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) }, + { } /* Terminating entry */ + }; + MODULE_DEVICE_TABLE (usb, skel_table); + + +There are other macros that can be used in describing a struct +:c:type:`usb_device_id` for drivers that support a whole class of USB +drivers. See :ref:`usb.h ` for more information on this. + +Device operation +================ + +When a device is plugged into the USB bus that matches the device ID +pattern that your driver registered with the USB core, the probe +function is called. The :c:type:`usb_device` structure, interface number and +the interface ID are passed to the function:: + + static int skel_probe(struct usb_interface *interface, + const struct usb_device_id *id) + + +The driver now needs to verify that this device is actually one that it +can accept. If so, it returns 0. If not, or if any error occurs during +initialization, an errorcode (such as ``-ENOMEM`` or ``-ENODEV``) is +returned from the probe function. + +In the skeleton driver, we determine what end points are marked as +bulk-in and bulk-out. We create buffers to hold the data that will be +sent and received from the device, and a USB urb to write data to the +device is initialized. + +Conversely, when the device is removed from the USB bus, the disconnect +function is called with the device pointer. The driver needs to clean +any private data that has been allocated at this time and to shut down +any pending urbs that are in the USB system. + +Now that the device is plugged into the system and the driver is bound +to the device, any of the functions in the :c:type:`file_operations` structure +that were passed to the USB subsystem will be called from a user program +trying to talk to the device. The first function called will be open, as +the program tries to open the device for I/O. We increment our private +usage count and save a pointer to our internal structure in the file +structure. This is done so that future calls to file operations will +enable the driver to determine which device the user is addressing. All +of this is done with the following code:: + + /* increment our usage count for the module */ + ++skel->open_count; + + /* save our object in the file's private structure */ + file->private_data = dev; + + +After the open function is called, the read and write functions are +called to receive and send data to the device. In the ``skel_write`` +function, we receive a pointer to some data that the user wants to send +to the device and the size of the data. The function determines how much +data it can send to the device based on the size of the write urb it has +created (this size depends on the size of the bulk out end point that +the device has). Then it copies the data from user space to kernel +space, points the urb to the data and submits the urb to the USB +subsystem. This can be seen in the following code:: + + /* we can only write as much as 1 urb will hold */ + bytes_written = (count > skel->bulk_out_size) ? skel->bulk_out_size : count; + + /* copy the data from user space into our urb */ + copy_from_user(skel->write_urb->transfer_buffer, buffer, bytes_written); + + /* set up our urb */ + usb_fill_bulk_urb(skel->write_urb, + skel->dev, + usb_sndbulkpipe(skel->dev, skel->bulk_out_endpointAddr), + skel->write_urb->transfer_buffer, + bytes_written, + skel_write_bulk_callback, + skel); + + /* send the data out the bulk port */ + result = usb_submit_urb(skel->write_urb); + if (result) { + err("Failed submitting write urb, error %d", result); + } + + +When the write urb is filled up with the proper information using the +:c:func:`usb_fill_bulk_urb` function, we point the urb's completion callback +to call our own ``skel_write_bulk_callback`` function. This function is +called when the urb is finished by the USB subsystem. The callback +function is called in interrupt context, so caution must be taken not to +do very much processing at that time. Our implementation of +``skel_write_bulk_callback`` merely reports if the urb was completed +successfully or not and then returns. + +The read function works a bit differently from the write function in +that we do not use an urb to transfer data from the device to the +driver. Instead we call the :c:func:`usb_bulk_msg` function, which can be used +to send or receive data from a device without having to create urbs and +handle urb completion callback functions. We call the :c:func:`usb_bulk_msg` +function, giving it a buffer into which to place any data received from +the device and a timeout value. If the timeout period expires without +receiving any data from the device, the function will fail and return an +error message. This can be shown with the following code:: + + /* do an immediate bulk read to get data from the device */ + retval = usb_bulk_msg (skel->dev, + usb_rcvbulkpipe (skel->dev, + skel->bulk_in_endpointAddr), + skel->bulk_in_buffer, + skel->bulk_in_size, + &count, HZ*10); + /* if the read was successful, copy the data to user space */ + if (!retval) { + if (copy_to_user (buffer, skel->bulk_in_buffer, count)) + retval = -EFAULT; + else + retval = count; + } + + +The :c:func:`usb_bulk_msg` function can be very useful for doing single reads +or writes to a device; however, if you need to read or write constantly to +a device, it is recommended to set up your own urbs and submit them to +the USB subsystem. + +When the user program releases the file handle that it has been using to +talk to the device, the release function in the driver is called. In +this function we decrement our private usage count and wait for possible +pending writes:: + + /* decrement our usage count for the device */ + --skel->open_count; + + +One of the more difficult problems that USB drivers must be able to +handle smoothly is the fact that the USB device may be removed from the +system at any point in time, even if a program is currently talking to +it. It needs to be able to shut down any current reads and writes and +notify the user-space programs that the device is no longer there. The +following code (function ``skel_delete``) is an example of how to do +this:: + + static inline void skel_delete (struct usb_skel *dev) + { + kfree (dev->bulk_in_buffer); + if (dev->bulk_out_buffer != NULL) + usb_free_coherent (dev->udev, dev->bulk_out_size, + dev->bulk_out_buffer, + dev->write_urb->transfer_dma); + usb_free_urb (dev->write_urb); + kfree (dev); + } + + +If a program currently has an open handle to the device, we reset the +flag ``device_present``. For every read, write, release and other +functions that expect a device to be present, the driver first checks +this flag to see if the device is still present. If not, it releases +that the device has disappeared, and a ``-ENODEV`` error is returned to the +user-space program. When the release function is eventually called, it +determines if there is no device and if not, it does the cleanup that +the ``skel_disconnect`` function normally does if there are no open files +on the device (see Listing 5). + +Isochronous Data +================ + +This usb-skeleton driver does not have any examples of interrupt or +isochronous data being sent to or from the device. Interrupt data is +sent almost exactly as bulk data is, with a few minor exceptions. +Isochronous data works differently with continuous streams of data being +sent to or from the device. The audio and video camera drivers are very +good examples of drivers that handle isochronous data and will be useful +if you also need to do this. + +Conclusion +========== + +Writing Linux USB device drivers is not a difficult task as the +usb-skeleton driver shows. This driver, combined with the other current +USB drivers, should provide enough examples to help a beginning author +create a working driver in a minimal amount of time. The linux-usb-devel +mailing list archives also contain a lot of helpful information. + +Resources +========= + +The Linux USB Project: +http://www.linux-usb.org/ + +Linux Hotplug Project: +http://linux-hotplug.sourceforge.net/ + +Linux USB Working Devices List: +http://www.qbik.ch/usb/devices/ + +linux-usb-devel Mailing List Archives: +http://marc.theaimsgroup.com/?l=linux-usb-devel + +Programming Guide for Linux USB Device Drivers: +http://usb.cs.tum.edu/usbdoc + +USB Home Page: http://www.usb.org diff --git a/Documentation/driver-api/vme.rst b/Documentation/driver-api/vme.rst index 89776fb3c8bd04c8fec2e922b4ff29e5f137b068..def139c13410863ccb556af9b3425fadb6acc9c5 100644 --- a/Documentation/driver-api/vme.rst +++ b/Documentation/driver-api/vme.rst @@ -6,36 +6,15 @@ Driver registration As with other subsystems within the Linux kernel, VME device drivers register with the VME subsystem, typically called from the devices init routine. This is -achieved via a call to the following function: +achieved via a call to :c:func:`vme_register_driver`. -.. code-block:: c - - int vme_register_driver (struct vme_driver *driver, unsigned int ndevs); +A pointer to a structure of type :c:type:`struct vme_driver ` must +be provided to the registration function. Along with the maximum number of +devices your driver is able to support. -If driver registration is successful this function returns zero, if an error -occurred a negative error code will be returned. - -A pointer to a structure of type 'vme_driver' must be provided to the -registration function. Along with ndevs, which is the number of devices your -driver is able to support. The structure is as follows: - -.. code-block:: c - - struct vme_driver { - struct list_head node; - const char *name; - int (*match)(struct vme_dev *); - int (*probe)(struct vme_dev *); - int (*remove)(struct vme_dev *); - void (*shutdown)(void); - struct device_driver driver; - struct list_head devices; - unsigned int ndev; - }; - -At the minimum, the '.name', '.match' and '.probe' elements of this structure -should be correctly set. The '.name' element is a pointer to a string holding -the device driver's name. +At the minimum, the '.name', '.match' and '.probe' elements of +:c:type:`struct vme_driver ` should be correctly set. The '.name' +element is a pointer to a string holding the device driver's name. The '.match' function allows control over which VME devices should be registered with the driver. The match function should return 1 if a device should be @@ -54,29 +33,16 @@ the number of devices probed to one: } The '.probe' element should contain a pointer to the probe routine. The -probe routine is passed a 'struct vme_dev' pointer as an argument. The -'struct vme_dev' structure looks like the following: - -.. code-block:: c - - struct vme_dev { - int num; - struct vme_bridge *bridge; - struct device dev; - struct list_head drv_list; - struct list_head bridge_list; - }; +probe routine is passed a :c:type:`struct vme_dev ` pointer as an +argument. Here, the 'num' field refers to the sequential device ID for this specific driver. The bridge number (or bus number) can be accessed using dev->bridge->num. -A function is also provided to unregister the driver from the VME core and is -usually called from the device driver's exit routine: - -.. code-block:: c - - void vme_unregister_driver (struct vme_driver *driver); +A function is also provided to unregister the driver from the VME core called +:c:func:`vme_unregister_driver` and should usually be called from the device +driver's exit routine. Resource management @@ -90,47 +56,29 @@ driver is called. The probe routine is passed a pointer to the devices device structure. This pointer should be saved, it will be required for requesting VME resources. -The driver can request ownership of one or more master windows, slave windows -and/or dma channels. Rather than allowing the device driver to request a -specific window or DMA channel (which may be used by a different driver) this -driver allows a resource to be assigned based on the required attributes of the -driver in question: - -.. code-block:: c - - struct vme_resource * vme_master_request(struct vme_dev *dev, - u32 aspace, u32 cycle, u32 width); - - struct vme_resource * vme_slave_request(struct vme_dev *dev, u32 aspace, - u32 cycle); - - struct vme_resource *vme_dma_request(struct vme_dev *dev, u32 route); - -For slave windows these attributes are split into the VME address spaces that -need to be accessed in 'aspace' and VME bus cycle types required in 'cycle'. -Master windows add a further set of attributes in 'width' specifying the -required data transfer widths. These attributes are defined as bitmasks and as -such any combination of the attributes can be requested for a single window, -the core will assign a window that meets the requirements, returning a pointer -of type vme_resource that should be used to identify the allocated resource -when it is used. For DMA controllers, the request function requires the -potential direction of any transfers to be provided in the route attributes. -This is typically VME-to-MEM and/or MEM-to-VME, though some hardware can -support VME-to-VME and MEM-to-MEM transfers as well as test pattern generation. -If an unallocated window fitting the requirements can not be found a NULL -pointer will be returned. +The driver can request ownership of one or more master windows +(:c:func:`vme_master_request`), slave windows (:c:func:`vme_slave_request`) +and/or dma channels (:c:func:`vme_dma_request`). Rather than allowing the device +driver to request a specific window or DMA channel (which may be used by a +different driver) the API allows a resource to be assigned based on the required +attributes of the driver in question. For slave windows these attributes are +split into the VME address spaces that need to be accessed in 'aspace' and VME +bus cycle types required in 'cycle'. Master windows add a further set of +attributes in 'width' specifying the required data transfer widths. These +attributes are defined as bitmasks and as such any combination of the +attributes can be requested for a single window, the core will assign a window +that meets the requirements, returning a pointer of type vme_resource that +should be used to identify the allocated resource when it is used. For DMA +controllers, the request function requires the potential direction of any +transfers to be provided in the route attributes. This is typically VME-to-MEM +and/or MEM-to-VME, though some hardware can support VME-to-VME and MEM-to-MEM +transfers as well as test pattern generation. If an unallocated window fitting +the requirements can not be found a NULL pointer will be returned. Functions are also provided to free window allocations once they are no longer -required. These functions should be passed the pointer to the resource provided -during resource allocation: - -.. code-block:: c - - void vme_master_free(struct vme_resource *res); - - void vme_slave_free(struct vme_resource *res); - - void vme_dma_free(struct vme_resource *res); +required. These functions (:c:func:`vme_master_free`, :c:func:`vme_slave_free` +and :c:func:`vme_dma_free`) should be passed the pointer to the resource +provided during resource allocation. Master windows @@ -144,61 +92,22 @@ the underlying chipset. A window must be configured before it can be used. Master window configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Once a master window has been assigned the following functions can be used to -configure it and retrieve the current settings: - -.. code-block:: c - - int vme_master_set (struct vme_resource *res, int enabled, - unsigned long long base, unsigned long long size, u32 aspace, - u32 cycle, u32 width); - - int vme_master_get (struct vme_resource *res, int *enabled, - unsigned long long *base, unsigned long long *size, u32 *aspace, - u32 *cycle, u32 *width); - -The address spaces, transfer widths and cycle types are the same as described +Once a master window has been assigned :c:func:`vme_master_set` can be used to +configure it and :c:func:`vme_master_get` to retrieve the current settings. The +address spaces, transfer widths and cycle types are the same as described under resource management, however some of the options are mutually exclusive. For example, only one address space may be specified. -These functions return 0 on success or an error code should the call fail. - Master window access ~~~~~~~~~~~~~~~~~~~~ -The following functions can be used to read from and write to configured master -windows. These functions return the number of bytes copied: - -.. code-block:: c - - ssize_t vme_master_read(struct vme_resource *res, void *buf, - size_t count, loff_t offset); - - ssize_t vme_master_write(struct vme_resource *res, void *buf, - size_t count, loff_t offset); - -In addition to simple reads and writes, a function is provided to do a -read-modify-write transaction. This function returns the original value of the -VME bus location : - -.. code-block:: c - - unsigned int vme_master_rmw (struct vme_resource *res, - unsigned int mask, unsigned int compare, unsigned int swap, - loff_t offset); - -This functions by reading the offset, applying the mask. If the bits selected in -the mask match with the values of the corresponding bits in the compare field, -the value of swap is written the specified offset. - -Parts of a VME window can be mapped into user space memory using the following -function: +The function :c:func:`vme_master_read` can be used to read from and +:c:func:`vme_master_write` used to write to configured master windows. -.. code-block:: c - - int vme_master_mmap(struct vme_resource *resource, - struct vm_area_struct *vma) +In addition to simple reads and writes, :c:func:`vme_master_rmw` is provided to +do a read-modify-write transaction. Parts of a VME window can also be mapped +into user space memory using :c:func:`vme_master_mmap`. Slave windows @@ -213,41 +122,23 @@ it can be used. Slave window configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Once a slave window has been assigned the following functions can be used to -configure it and retrieve the current settings: - -.. code-block:: c - - int vme_slave_set (struct vme_resource *res, int enabled, - unsigned long long base, unsigned long long size, - dma_addr_t mem, u32 aspace, u32 cycle); - - int vme_slave_get (struct vme_resource *res, int *enabled, - unsigned long long *base, unsigned long long *size, - dma_addr_t *mem, u32 *aspace, u32 *cycle); +Once a slave window has been assigned :c:func:`vme_slave_set` can be used to +configure it and :c:func:`vme_slave_get` to retrieve the current settings. The address spaces, transfer widths and cycle types are the same as described under resource management, however some of the options are mutually exclusive. For example, only one address space may be specified. -These functions return 0 on success or an error code should the call fail. - Slave window buffer allocation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Functions are provided to allow the user to allocate and free a contiguous -buffers which will be accessible by the VME bridge. These functions do not have -to be used, other methods can be used to allocate a buffer, though care must be -taken to ensure that they are contiguous and accessible by the VME bridge: - -.. code-block:: c - - void * vme_alloc_consistent(struct vme_resource *res, size_t size, - dma_addr_t *mem); - - void vme_free_consistent(struct vme_resource *res, size_t size, - void *virt, dma_addr_t mem); +Functions are provided to allow the user to allocate +(:c:func:`vme_alloc_consistent`) and free (:c:func:`vme_free_consistent`) +contiguous buffers which will be accessible by the VME bridge. These functions +do not have to be used, other methods can be used to allocate a buffer, though +care must be taken to ensure that they are contiguous and accessible by the VME +bridge. Slave window access @@ -269,29 +160,18 @@ executed, reused and destroyed. List Management ~~~~~~~~~~~~~~~ -The following functions are provided to create and destroy DMA lists. Execution -of a list will not automatically destroy the list, thus enabling a list to be -reused for repetitive tasks: - -.. code-block:: c - - struct vme_dma_list *vme_new_dma_list(struct vme_resource *res); - - int vme_dma_list_free(struct vme_dma_list *list); +The function :c:func:`vme_new_dma_list` is provided to create and +:c:func:`vme_dma_list_free` to destroy DMA lists. Execution of a list will not +automatically destroy the list, thus enabling a list to be reused for repetitive +tasks. List Population ~~~~~~~~~~~~~~~ -An item can be added to a list using the following function ( the source and +An item can be added to a list using :c:func:`vme_dma_list_add` (the source and destination attributes need to be created before calling this function, this is -covered under "Transfer Attributes"): - -.. code-block:: c - - int vme_dma_list_add(struct vme_dma_list *list, - struct vme_dma_attr *src, struct vme_dma_attr *dest, - size_t count); +covered under "Transfer Attributes"). .. note:: @@ -310,41 +190,19 @@ an item to a list. This is due to the diverse attributes required for each type of source and destination. There are functions to create attributes for PCI, VME and pattern sources and destinations (where appropriate): -Pattern source: - -.. code-block:: c - - struct vme_dma_attr *vme_dma_pattern_attribute(u32 pattern, u32 type); - -PCI source or destination: - -.. code-block:: c - - struct vme_dma_attr *vme_dma_pci_attribute(dma_addr_t mem); - -VME source or destination: + - PCI source or destination: :c:func:`vme_dma_pci_attribute` + - VME source or destination: :c:func:`vme_dma_vme_attribute` + - Pattern source: :c:func:`vme_dma_pattern_attribute` -.. code-block:: c - - struct vme_dma_attr *vme_dma_vme_attribute(unsigned long long base, - u32 aspace, u32 cycle, u32 width); - -The following function should be used to free an attribute: - -.. code-block:: c - - void vme_dma_free_attribute(struct vme_dma_attr *attr); +The function :c:func:`vme_dma_free_attribute` should be used to free an +attribute. List Execution ~~~~~~~~~~~~~~ -The following function queues a list for execution. The function will return -once the list has been executed: - -.. code-block:: c - - int vme_dma_list_exec(struct vme_dma_list *list); +The function :c:func:`vme_dma_list_exec` queues a list for execution and will +return once the list has been executed. Interrupts @@ -358,20 +216,13 @@ specific VME level and status IDs. Attaching Interrupt Handlers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The following functions can be used to attach and free a specific VME level and -status ID combination. Any given combination can only be assigned a single -callback function. A void pointer parameter is provided, the value of which is -passed to the callback function, the use of this pointer is user undefined: - -.. code-block:: c - - int vme_irq_request(struct vme_dev *dev, int level, int statid, - void (*callback)(int, int, void *), void *priv); - - void vme_irq_free(struct vme_dev *dev, int level, int statid); - -The callback parameters are as follows. Care must be taken in writing a callback -function, callback functions run in interrupt context: +The function :c:func:`vme_irq_request` can be used to attach and +:c:func:`vme_irq_free` to free a specific VME level and status ID combination. +Any given combination can only be assigned a single callback function. A void +pointer parameter is provided, the value of which is passed to the callback +function, the use of this pointer is user undefined. The callback parameters are +as follows. Care must be taken in writing a callback function, callback +functions run in interrupt context: .. code-block:: c @@ -381,12 +232,8 @@ function, callback functions run in interrupt context: Interrupt Generation ~~~~~~~~~~~~~~~~~~~~ -The following function can be used to generate a VME interrupt at a given VME -level and VME status ID: - -.. code-block:: c - - int vme_irq_generate(struct vme_dev *dev, int level, int statid); +The function :c:func:`vme_irq_generate` can be used to generate a VME interrupt +at a given VME level and VME status ID. Location monitors @@ -399,54 +246,29 @@ monitor. Location Monitor Management ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The following functions are provided to request the use of a block of location -monitors and to free them after they are no longer required: - -.. code-block:: c - - struct vme_resource * vme_lm_request(struct vme_dev *dev); - - void vme_lm_free(struct vme_resource * res); - -Each block may provide a number of location monitors, monitoring adjacent -locations. The following function can be used to determine how many locations -are provided: - -.. code-block:: c - - int vme_lm_count(struct vme_resource * res); +The function :c:func:`vme_lm_request` is provided to request the use of a block +of location monitors and :c:func:`vme_lm_free` to free them after they are no +longer required. Each block may provide a number of location monitors, +monitoring adjacent locations. The function :c:func:`vme_lm_count` can be used +to determine how many locations are provided. Location Monitor Configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Once a bank of location monitors has been allocated, the following functions -are provided to configure the location and mode of the location monitor: - -.. code-block:: c - - int vme_lm_set(struct vme_resource *res, unsigned long long base, - u32 aspace, u32 cycle); - - int vme_lm_get(struct vme_resource *res, unsigned long long *base, - u32 *aspace, u32 *cycle); +Once a bank of location monitors has been allocated, the function +:c:func:`vme_lm_set` is provided to configure the location and mode of the +location monitor. The function :c:func:`vme_lm_get` can be used to retrieve +existing settings. Location Monitor Use ~~~~~~~~~~~~~~~~~~~~ -The following functions allow a callback to be attached and detached from each -location monitor location. Each location monitor can monitor a number of -adjacent locations: - -.. code-block:: c - - int vme_lm_attach(struct vme_resource *res, int num, - void (*callback)(void *)); - - int vme_lm_detach(struct vme_resource *res, int num); - -The callback function is declared as follows. +The function :c:func:`vme_lm_attach` enables a callback to be attached and +:c:func:`vme_lm_detach` allows on to be detached from each location monitor +location. Each location monitor can monitor a number of adjacent locations. The +callback function is declared as follows. .. code-block:: c @@ -456,19 +278,20 @@ The callback function is declared as follows. Slot Detection -------------- -This function returns the slot ID of the provided bridge. - -.. code-block:: c - - int vme_slot_num(struct vme_dev *dev); +The function :c:func:`vme_slot_num` returns the slot ID of the provided bridge. Bus Detection ------------- -This function returns the bus ID of the provided bridge. +The function :c:func:`vme_bus_num` returns the bus ID of the provided bridge. -.. code-block:: c - int vme_bus_num(struct vme_dev *dev); +VME API +------- + +.. kernel-doc:: include/linux/vme.h + :internal: +.. kernel-doc:: drivers/vme/vme.c + :export: diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index bf34d5b3a7330e5c26f9c6eebe977ed853804959..e72587fe477d5f1ad958e9810f1e5c46cbccc786 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -342,8 +342,10 @@ PER-CPU MEM devm_free_percpu() PCI - pcim_enable_device() : after success, all PCI ops become managed - pcim_pin_device() : keep PCI device enabled after release + devm_pci_remap_cfgspace() : ioremap PCI configuration space + devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource + pcim_enable_device() : after success, all PCI ops become managed + pcim_pin_device() : keep PCI device enabled after release PHY devm_usb_get_phy() diff --git a/Documentation/early-userspace/README b/Documentation/early-userspace/README index 93e63a9af30b7b0ec63f1cdf3904d92e5731596c..2c00b072a4c8ca382f84af09ebea29150645380d 100644 --- a/Documentation/early-userspace/README +++ b/Documentation/early-userspace/README @@ -86,7 +86,7 @@ early userspace useful. The klibc distribution is currently maintained separately from the kernel. You can obtain somewhat infrequent snapshots of klibc from -ftp://ftp.kernel.org/pub/linux/libs/klibc/ +https://www.kernel.org/pub/linux/libs/klibc/ For active users, you are better off using the klibc git repository, at http://git.kernel.org/?p=libs/klibc/klibc.git diff --git a/Documentation/extcon/porting-android-switch-class b/Documentation/extcon/porting-android-switch-class deleted file mode 100644 index 49c81caef84dc421fd8194cda8d092d5a4de7302..0000000000000000000000000000000000000000 --- a/Documentation/extcon/porting-android-switch-class +++ /dev/null @@ -1,123 +0,0 @@ - - Staging/Android Switch Class Porting Guide - (linux/drivers/staging/android/switch) - (c) Copyright 2012 Samsung Electronics - -AUTHORS -MyungJoo Ham - -/***************************************************************** - * CHAPTER 1. * - * PORTING SWITCH CLASS DEVICE DRIVERS * - *****************************************************************/ - -****** STEP 1. Basic Functionality - No extcon extended feature, but switch features only. - -- struct switch_dev (fed to switch_dev_register/unregister) - @name: no change - @dev: no change - @index: drop (not used in switch device driver side anyway) - @state: no change - If you have used @state with magic numbers, keep it - at this step. - @print_name: no change but type change (switch_dev->extcon_dev) - @print_state: no change but type change (switch_dev->extcon_dev) - -- switch_dev_register(sdev, dev) - => extcon_dev_register(edev) - : type change (sdev->edev) - : remove second param('dev'). if edev has parent device, should store - 'dev' to 'edev.dev.parent' before registering extcon device -- switch_dev_unregister(sdev) - => extcon_dev_unregister(edev) - : no change but type change (sdev->edev) -- switch_get_state(sdev) - => extcon_get_state(edev) - : no change but type change (sdev->edev) and (return: int->u32) -- switch_set_state(sdev, state) - => extcon_set_state(edev, state) - : no change but type change (sdev->edev) and (state: int->u32) - -With this changes, the ex-switch extcon class device works as it once -worked as switch class device. However, it will now have additional -interfaces (both ABI and in-kernel API) and different ABI locations. -However, if CONFIG_ANDROID is enabled without CONFIG_ANDROID_SWITCH, -/sys/class/switch/* will be symbolically linked to /sys/class/extcon/ -so that they are still compatible with legacy userspace processes. - -****** STEP 2. Multistate (no more magic numbers in state value) - Extcon's extended features for switch device drivers with - complex features usually required magic numbers in state - value of switch_dev. With extcon, such magic numbers that - support multiple cables are no more required or supported. - - 1. Define cable names at edev->supported_cable. - 2. (Recommended) remove print_state callback. - 3. Use extcon_get_cable_state_(edev, index) or - extcon_get_cable_state(edev, cable_name) instead of - extcon_get_state(edev) if you intend to get a state of a specific - cable. Same for set_state. This way, you can remove the usage of - magic numbers in state value. - 4. Use extcon_update_state() if you are updating specific bits of - the state value. - -Example: a switch device driver w/ magic numbers for two cables. - "0x00": no cables connected. - "0x01": cable 1 connected - "0x02": cable 2 connected - "0x03": cable 1 and 2 connected - 1. edev->supported_cable = {"1", "2", NULL}; - 2. edev->print_state = NULL; - 3. extcon_get_cable_state_(edev, 0) shows cable 1's state. - extcon_get_cable_state(edev, "1") shows cable 1's state. - extcon_set_cable_state_(edev, 1) sets cable 2's state. - extcon_set_cable_state(edev, "2") sets cable 2's state - 4. extcon_update_state(edev, 0x01, 0) sets the least bit's 0. - -****** STEP 3. Notify other device drivers - - You can notify others of the cable attach/detach events with -notifier chains. - - At the side of other device drivers (the extcon device itself -does not need to get notified of its own events), there are two -methods to register notifier_block for cable events: -(a) for a specific cable or (b) for every cable. - - (a) extcon_register_interest(obj, extcon_name, cable_name, nb) - Example: want to get news of "MAX8997_MUIC"'s "USB" cable - - obj = kzalloc(sizeof(struct extcon_specific_cable_nb), - GFP_KERNEL); - nb->notifier_call = the_callback_to_handle_usb; - - extcon_register_intereset(obj, "MAX8997_MUIC", "USB", nb); - - (b) extcon_register_notifier(edev, nb) - Call nb for any changes in edev. - - Please note that in order to properly behave with method (a), -the extcon device driver should support multistate feature (STEP 2). - -****** STEP 4. Inter-cable relation (mutually exclusive) - - You can provide inter-cable mutually exclusiveness information -for an extcon device. When cables A and B are declared to be mutually -exclusive, the two cables cannot be in ATTACHED state simulteneously. - - -/***************************************************************** - * CHAPTER 2. * - * PORTING USERSPACE w/ SWITCH CLASS DEVICE SUPPORT * - *****************************************************************/ - -****** ABI Location - - If "CONFIG_ANDROID" is enabled, /sys/class/switch/* are created -as symbolic links to /sys/class/extcon/*. - - The two files of switch class, name and state, are provided with -extcon, too. When the multistate support (STEP 2 of CHAPTER 1.) is -not enabled or print_state callback is supplied, the output of -state ABI is same with switch class. diff --git a/Documentation/features/core/BPF-JIT/arch-support.txt b/Documentation/features/core/BPF-JIT/arch-support.txt index c1b4f917238f7124aaf78778f4df8c1c3ebdf2e5..5575d2d09625ac7ce97029099ee5ee938f179bd4 100644 --- a/Documentation/features/core/BPF-JIT/arch-support.txt +++ b/Documentation/features/core/BPF-JIT/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt index 6d930fcbe519105831326012a6a383a2cd1d51a0..abb5f271a792e96566a36496055afffd5df078c8 100644 --- a/Documentation/features/core/generic-idle-thread/arch-support.txt +++ b/Documentation/features/core/generic-idle-thread/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | ok | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt index 136868b636e678b6ec6d8e4ed308f729a85a0d4a..dbdaffcc51107c0c0cd513774fcfb41b626a66a4 100644 --- a/Documentation/features/core/jump-labels/arch-support.txt +++ b/Documentation/features/core/jump-labels/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt index 728061d763b1150f09222848cec80ff1012d96c1..5e97a89420ef09ceaa6c8fb7d89c7ca771d955ef 100644 --- a/Documentation/features/core/tracehook/arch-support.txt +++ b/Documentation/features/core/tracehook/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | ok | | c6x: | ok | | cris: | TODO | diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index 703f5784bc90d9647d015c086330b59a48b4631d..76bbd7fe27b35bec5839e6bcac0337c550fc76b2 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt index 38dea8eeba0a3db202c2def713eacc3164f2cee7..830dbe801aafbfce012fb433b631fcd60a819b78 100644 --- a/Documentation/features/debug/gcov-profile-all/arch-support.txt +++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt index 862e15d6f79ec75b4c99c8b7c0d70f1f9a7136eb..0217bf6e942d2b83ddb24f14d2ce6aa079ada9ac 100644 --- a/Documentation/features/debug/kgdb/arch-support.txt +++ b/Documentation/features/debug/kgdb/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | ok | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt index 40f44d041fb4764386fb6f90f12397bf805c7a7d..1e84be3c142e01ea0c23a092924c950793a278a7 100644 --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | @@ -27,7 +26,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | TODO | + | powerpc: | ok | | s390: | TODO | | score: | TODO | | sh: | TODO | diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt index a44bfff6940b9a8a2b3d6a6abef7ec262562c658..529f66eda679aaa4f16bd3dc1d090c2a1d1338e5 100644 --- a/Documentation/features/debug/kprobes/arch-support.txt +++ b/Documentation/features/debug/kprobes/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | TODO | - | avr32: | ok | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt index d87c1ce24204fafbdf9cd42dfdd7eb04bd4d784e..43353242e43968ac3622cd2de1eb82a72c8fb558 100644 --- a/Documentation/features/debug/kretprobes/arch-support.txt +++ b/Documentation/features/debug/kretprobes/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt index b8999d8544cac4bbaa6bd4c7480a3073017f65e5..f559f1ba5416da682fed3c947bdb08bd8090f0d8 100644 --- a/Documentation/features/debug/optprobes/arch-support.txt +++ b/Documentation/features/debug/optprobes/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index 0fa423313409c9bbbc505283f9f114ef41df47bd..d7acd7bd36197025afc332efb7e53065dd138f13 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt index d605c3fc38fd5cd920230b8057e16a2f180eec1b..53ed42b0e7e59ba14ca7b52ee7023357e006a9be 100644 --- a/Documentation/features/debug/uprobes/arch-support.txt +++ b/Documentation/features/debug/uprobes/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt index 44cc1ff3f603a957a6b552bb911e08803c882caa..149443936de9fda2607aca200796c64719a2d81e 100644 --- a/Documentation/features/debug/user-ret-profiler/arch-support.txt +++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/io/dma-api-debug/arch-support.txt b/Documentation/features/io/dma-api-debug/arch-support.txt index ffa522a9bdfdcc34b017bc9a3e70be8fe2d87082..6be920643be667f312b62e5335ec8f849ddf01d9 100644 --- a/Documentation/features/io/dma-api-debug/arch-support.txt +++ b/Documentation/features/io/dma-api-debug/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | ok | | cris: | TODO | diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt index 83d2cf989ea3494dc825fc9e4da3b1ed09491646..0eb08e1e32b808c0f81a27df0470b2fdd9e1a14c 100644 --- a/Documentation/features/io/dma-contiguous/arch-support.txt +++ b/Documentation/features/io/dma-contiguous/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/io/sg-chain/arch-support.txt b/Documentation/features/io/sg-chain/arch-support.txt index 6ca98f9911bbb31693ff342039d5c84772e37d91..514ad3468aa597b36a030b46941f68dc7d161a68 100644 --- a/Documentation/features/io/sg-chain/arch-support.txt +++ b/Documentation/features/io/sg-chain/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/lib/strncasecmp/arch-support.txt b/Documentation/features/lib/strncasecmp/arch-support.txt index 12b1c9358e57150827bf1d8f7e7a4b86ef373ed0..532c6f0fc15c0c2291765ae9bf6da0c921c82140 100644 --- a/Documentation/features/lib/strncasecmp/arch-support.txt +++ b/Documentation/features/lib/strncasecmp/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt index d9c310889bc1a529f66313921568ce42e57ff4b1..f3eec26c8cf80c589e768fdc078fa365f1839cba 100644 --- a/Documentation/features/locking/cmpxchg-local/arch-support.txt +++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt index cf90635bdcbb23124b8c199103d71c11bcadec65..9756abc680a7e5de130945a5bf4ccac5c271ef7a 100644 --- a/Documentation/features/locking/lockdep/arch-support.txt +++ b/Documentation/features/locking/lockdep/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | ok | | blackfin: | ok | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt index 68c3a5ddd9b9cf9ad4f17e3e88c530186edf9333..62f4ee5c156c41dffd9306c4e6dd5ccece93029e 100644 --- a/Documentation/features/locking/queued-rwlocks/arch-support.txt +++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt index e973b1a9572f86da2cce2e0c586bd376ebf61668..321b32f6e63c5070c69c6b4d9e5e372b2cd544a3 100644 --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/locking/rwsem-optimized/arch-support.txt b/Documentation/features/locking/rwsem-optimized/arch-support.txt index ac93d7ab66c4f3134c13da5b04224dfe99141162..79bfa4d6e41ffa18f361f9d14455a5a8dab93e18 100644 --- a/Documentation/features/locking/rwsem-optimized/arch-support.txt +++ b/Documentation/features/locking/rwsem-optimized/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt index 4660bf222db17099e221fc37f2d002ab7b45fd8b..00f1606bbf45709355356fbc326ddebb6324f811 100644 --- a/Documentation/features/perf/kprobes-event/arch-support.txt +++ b/Documentation/features/perf/kprobes-event/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt index f179b1fb26ef49734ff84bf1deee48bfe8915fdd..7d516eacf7b929cbb375c69293ba08e93c686c8a 100644 --- a/Documentation/features/perf/perf-regs/arch-support.txt +++ b/Documentation/features/perf/perf-regs/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt index 85777c5c6353712c51b6ae58ad322af7ebcd9d74..f974b8df5d825621997e604bac797fbef5254333 100644 --- a/Documentation/features/perf/perf-stackdump/arch-support.txt +++ b/Documentation/features/perf/perf-stackdump/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt index ac7cd6b1502b5a029f7aa3f31d5ad17217b69fe6..1d3c0f66915252f939fef5c2ba23f89a123c175a 100644 --- a/Documentation/features/sched/numa-balancing/arch-support.txt +++ b/Documentation/features/sched/numa-balancing/arch-support.txt @@ -10,7 +10,6 @@ | arc: | .. | | arm: | .. | | arm64: | .. | - | avr32: | .. | | blackfin: | .. | | c6x: | .. | | cris: | .. | diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt index 4f66ec13395112ae9be41a7250f0152963995a1c..a32d5b20767916f4df047feee74a01af0ab26e17 100644 --- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt +++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt index 8acb439a4a1767e0eab512a4fec5e786f5200255..caee8f64d1bca0e4bf26d8971d955deca45d03ee 100644 --- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt +++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt index ff670b2207f15051202d9808ff5464dc31e0db2f..1cd87f6cd07da9a64b908ff62a4d4ad82a49159c 100644 --- a/Documentation/features/time/clockevents/arch-support.txt +++ b/Documentation/features/time/clockevents/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | ok | | blackfin: | ok | | c6x: | ok | | cris: | ok | diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index a1e3eea7003f785e561626615c2a11d468297f6d..e6d7c7b2253c8b4980b5a2bbb9f7363ca13f96d1 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt index 4199ffecc0ff06bde4b26fa5f09d1fd0a9a08601..15c6071788aec5ba0a7e5db6b5d682db9199f891 100644 --- a/Documentation/features/time/irq-time-acct/arch-support.txt +++ b/Documentation/features/time/irq-time-acct/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/time/modern-timekeeping/arch-support.txt b/Documentation/features/time/modern-timekeeping/arch-support.txt index 17f68a02e84db5f1d441359f7b17efd97ae0f2d1..baee7611ba3d6a33cba69f73d107ed7d67008f94 100644 --- a/Documentation/features/time/modern-timekeeping/arch-support.txt +++ b/Documentation/features/time/modern-timekeeping/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | TODO | | arm64: | ok | - | avr32: | ok | | blackfin: | TODO | | c6x: | ok | | cris: | TODO | diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt index cf3c3e383d156d90e85dfcf2b7a23da3465b879f..9129530cb73c76b6ac1a2aa94c2987c1eb007205 100644 --- a/Documentation/features/time/virt-cpuacct/arch-support.txt +++ b/Documentation/features/time/virt-cpuacct/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt index ec4dd28e12979fedb5cfe26c9f2780936650505a..f6829af3255f3545e0f111b280bfc7b5797e99b3 100644 --- a/Documentation/features/vm/ELF-ASLR/arch-support.txt +++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt index 991974275a3ef789975502dd248d026133db138c..1a09ea99d486c7cb0f1ebcdbb232f6c46afbb279 100644 --- a/Documentation/features/vm/PG_uncached/arch-support.txt +++ b/Documentation/features/vm/PG_uncached/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt index 523f8307b9cd574ee22961d4fd27e618f5fe5b05..d170e6236503afe80577e5ebc73fe5e975d72ad0 100644 --- a/Documentation/features/vm/THP/arch-support.txt +++ b/Documentation/features/vm/THP/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | .. | | blackfin: | .. | | c6x: | .. | | cris: | .. | diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt index 261b92e2fb1ae1e1f41c7ee481f87a6b9cb48198..abfab4080a9136d24a477e022b887cd8500a39e8 100644 --- a/Documentation/features/vm/TLB/arch-support.txt +++ b/Documentation/features/vm/TLB/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | TODO | - | avr32: | .. | | blackfin: | TODO | | c6x: | .. | | cris: | .. | diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index df1d1f3c9af290aa6ffaa1584f71ba6025e54c4e..f81f09b22b08cf8f8c69477752b4a5f93d062af2 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -10,7 +10,6 @@ | arc: | TODO | | arm: | TODO | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index 90c53749fde78107d7d2f66bc6ec7434a40341a9..0cc3e11c42e2e5f5ab31e3256c36a1ef740ed81f 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | TODO | | arm64: | TODO | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/features/vm/numa-memblock/arch-support.txt b/Documentation/features/vm/numa-memblock/arch-support.txt index e7c252a0c53123bb22fbf7d8bd217bf7e3b762b0..9a3fdac42ce1258ff2c604634cfecea9069a210c 100644 --- a/Documentation/features/vm/numa-memblock/arch-support.txt +++ b/Documentation/features/vm/numa-memblock/arch-support.txt @@ -10,7 +10,6 @@ | arc: | .. | | arm: | .. | | arm64: | .. | - | avr32: | .. | | blackfin: | .. | | c6x: | .. | | cris: | .. | diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index 3de5434c857c8909deac9f8c1770c56723c31ff8..dfaa39e664fffe944f2c348c361e45bf4e62e22f 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt @@ -10,7 +10,6 @@ | arc: | ok | | arm: | ok | | arm64: | ok | - | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | | cris: | TODO | diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt index 78043d5a8fc35f01c343ced6f956aadee03e1026..843ce91a2e40504beaedf9c159c36d271700071b 100644 --- a/Documentation/filesystems/bfs.txt +++ b/Documentation/filesystems/bfs.txt @@ -54,4 +54,4 @@ The first 4 bytes should be 0x1badface. If you have any patches, questions or suggestions regarding this BFS implementation please contact the author: -Tigran Aivazian +Tigran Aivazian diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 3698ed3146e3011b11ed4211a06a7badc2f254cb..5a8f7f4d2bca227516b0e21fae0945b4554d4485 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -25,7 +25,7 @@ Note: More extensive information for getting started with ext4 can be or - ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ + https://www.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/ or grab the latest git repository from: diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt index 1e6564545edf067a1d9e2be42d78c149bead0730..22dc0dd6889cbfbaff1ad2a5af43f6f4f6303fdb 100644 --- a/Documentation/filesystems/nfs/nfs-rdma.txt +++ b/Documentation/filesystems/nfs/nfs-rdma.txt @@ -110,10 +110,10 @@ Installation - Install a Linux kernel with NFS/RDMA The NFS/RDMA client and server are both included in the mainline Linux - kernel version 2.6.25 and later. This and other versions of the 2.6 Linux + kernel version 2.6.25 and later. This and other versions of the Linux kernel can be found at: - ftp://ftp.kernel.org/pub/linux/kernel/v2.6/ + https://www.kernel.org/pub/linux/kernel/ Download the sources and place them in an appropriate location. diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt index 8de578a98222784ebab5e930586a019dd6cd67aa..80dc0bdc302a6ae86db68657efddc7aba8fec498 100644 --- a/Documentation/filesystems/nfs/pnfs.txt +++ b/Documentation/filesystems/nfs/pnfs.txt @@ -64,46 +64,9 @@ table which are called by the nfs-client pnfs-core to implement the different layout types. Files-layout-driver code is in: fs/nfs/filelayout/.. directory -Objects-layout-driver code is in: fs/nfs/objlayout/.. directory Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory -objects-layout setup --------------------- - -As part of the full STD implementation the objlayoutdriver.ko needs, at times, -to automatically login to yet undiscovered iscsi/osd devices. For this the -driver makes up-calles to a user-mode script called *osd_login* - -The path_name of the script to use is by default: - /sbin/osd_login. -This name can be overridden by the Kernel module parameter: - objlayoutdriver.osd_login_prog - -If Kernel does not find the osd_login_prog path it will zero it out -and will not attempt farther logins. An admin can then write new value -to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it. - -The /sbin/osd_login is part of the nfs-utils package, and should usually -be installed on distributions that support this Kernel version. - -The API to the login script is as follows: - Usage: $0 -u -o -s - Options: - -u target uri e.g. iscsi://: - (always exists) - (More protocols can be defined in the future. - The client does not interpret this string it is - passed unchanged as received from the Server) - -o osdname of the requested target OSD - (Might be empty) - (A string which denotes the OSD name, there is a - limit of 64 chars on this string) - -s systemid of the requested target OSD - (Might be empty) - (This string, if not empty is always an hex - representation of the 20 bytes osd_system_id) - blocks-layout setup ------------------- diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 634d03e20c2d9154c926b7db51061c69a6d32cd9..c9e884b52698020f88fdab12588687c10f16004e 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -21,12 +21,19 @@ from accessing the corresponding object from the original filesystem. This is most obvious from the 'st_dev' field returned by stat(2). While directories will report an st_dev from the overlay-filesystem, -all non-directory objects will report an st_dev from the lower or +non-directory objects may report an st_dev from the lower filesystem or upper filesystem that is providing the object. Similarly st_ino will only be unique when combined with st_dev, and both of these can change over the lifetime of a non-directory object. Many applications and tools ignore these values and will not be affected. +In the special case of all overlay layers on the same underlying +filesystem, all objects will report an st_dev from the overlay +filesystem and st_ino from the underlying filesystem. This will +make the overlay mount more compliant with filesystem scanners and +overlay objects will be distinguishable from the corresponding +objects in the original filesystem. + Upper and Lower --------------- diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index c94b4675d021ffd374de22d7d83df61dbb6c34dd..4cddbce85ac9ad175743f5f4384a32937bf87888 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -44,6 +44,7 @@ Table of Contents 3.8 /proc//fdinfo/ - Information about opened file 3.9 /proc//map_files - Information about memory mapped files 3.10 /proc//timerslack_ns - Task timerslack value + 3.11 /proc//patch_state - Livepatch patch operation state 4 Configuring procfs 4.1 Mount options @@ -412,6 +413,7 @@ Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 892 kB Anonymous: 0 kB +LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB Shared_Hugetlb: 0 kB @@ -441,6 +443,11 @@ accessed. "Anonymous" shows the amount of memory that does not belong to any file. Even a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE and a page is modified, the file page is replaced by a private anonymous copy. +"LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE). +The memory isn't freed immediately with madvise(). It's freed in memory +pressure if the memory is clean. Please note that the printed value might +be lower than the real value due to optimizations used in the current +implementation. If this is not desirable please file a bug report. "AnonHugePages" shows the ammount of memory backed by transparent hugepage. "ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by huge pages. @@ -1887,6 +1894,23 @@ Valid values are from 0 - ULLONG_MAX An application setting the value must have PTRACE_MODE_ATTACH_FSCREDS level permissions on the task specified to change its timerslack_ns value. +3.11 /proc//patch_state - Livepatch patch operation state +----------------------------------------------------------------- +When CONFIG_LIVEPATCH is enabled, this file displays the value of the +patch state for the task. + +A value of '-1' indicates that no patch is in transition. + +A value of '0' indicates that a patch is in transition and the task is +unpatched. If the patch is being enabled, then the task hasn't been +patched yet. If the patch is being disabled, then the task has already +been unpatched. + +A value of '1' indicates that a patch is in transition and the task is +patched. If the patch is being enabled, then the task has already been +patched. If the patch is being disabled, then the task hasn't been +unpatched yet. + ------------------------------------------------------------------------------ Configuring procfs diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 6ea1ceda6f5235ea18458b7aa70f1fe51866fcf5..06f1d64c6f702fa2507b748b6feb5a6bddf20975 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -113,9 +113,18 @@ Supporting PCI access on new platforms -------------------------------------- In order to support PCI resource mapping as described above, Linux platform -code must define HAVE_PCI_MMAP and provide a pci_mmap_page_range function. -Platforms are free to only support subsets of the mmap functionality, but -useful return codes should be provided. +code should ideally define ARCH_GENERIC_PCI_MMAP_RESOURCE and use the generic +implementation of that functionality. To support the historical interface of +mmap() through files in /proc/bus/pci, platforms may also set HAVE_PCI_MMAP. + +Alternatively, platforms which set HAVE_PCI_MMAP may provide their own +implementation of pci_mmap_page_range() instead of defining +ARCH_GENERIC_PCI_MMAP_RESOURCE. + +Platforms which support write-combining maps of PCI resources must define +arch_can_pci_mmap_wc() which shall evaluate to non-zero at runtime when +write-combining is permitted. Platforms which support maps of I/O resources +define arch_can_pci_mmap_io() similarly. Legacy resources are protected by the HAVE_PCI_LEGACY define. Platforms wishing to support legacy functionality should define it and provide diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 94dd27ef4a766ca5414146fab1b76ea5aacb67de..f42b90687d406b73b4a24b2d92c9116540cc8ca1 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -694,8 +694,7 @@ struct address_space_operations { write_end: After a successful write_begin, and data copy, write_end must be called. len is the original len passed to write_begin, and copied - is the amount that was able to be copied (copied == len is always true - if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag). + is the amount that was able to be copied. The filesystem must take care of unlocking the page and releasing it refcount, and updating i_size. diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt index 05676fdacfe3c56f1a9afdaf54795e9167a9c287..912568baabb9629083151fc4ed30578c066684cd 100644 --- a/Documentation/gpio/consumer.txt +++ b/Documentation/gpio/consumer.txt @@ -70,6 +70,12 @@ instead of -ENOENT if no GPIO has been assigned to the requested function: unsigned int index, enum gpiod_flags flags) +Note that gpio_get*_optional() functions (and their managed variants), unlike +the rest of gpiolib API, also return NULL when gpiolib support is disabled. +This is helpful to driver authors, since they do not need to special case +-ENOSYS return codes. System integrators should however be careful to enable +gpiolib on systems that need it. + For a function using multiple GPIOs all of those can be obtained with one call: struct gpio_descs *gpiod_get_array(struct device *dev, diff --git a/Documentation/gpu/introduction.rst b/Documentation/gpu/introduction.rst index 05a82bdfbca4d195ca6b5cd675554d13804ce8fc..fccbe375244d8c10767f83b4d6da4ac09366022f 100644 --- a/Documentation/gpu/introduction.rst +++ b/Documentation/gpu/introduction.rst @@ -85,3 +85,14 @@ This means that there's a blackout-period of about one month where feature work can't be merged. The recommended way to deal with that is having a -next tree that's always open, but making sure to not feed it into linux-next during the blackout period. As an example, drm-misc works like that. + +Code of Conduct +--------------- + +As a freedesktop.org project, dri-devel, and the DRM community, follows the +Contributor Covenant, found at: https://www.freedesktop.org/wiki/CodeOfConduct + +Please conduct yourself in a respectful and civilised manner when +interacting with community members on mailing lists, IRC, or bug +trackers. The community represents the project as a whole, and abusive +or bullying behaviour is not tolerated by the project. diff --git a/Documentation/hid/hidraw.txt b/Documentation/hid/hidraw.txt index 029e6cb9a7e8ea78326c36b3a5fe42f8302c6e31..c8436e354f44b3ac7220f6766e1eef85fb8304b3 100644 --- a/Documentation/hid/hidraw.txt +++ b/Documentation/hid/hidraw.txt @@ -81,7 +81,7 @@ of: BUS_HIL BUS_BLUETOOTH BUS_VIRTUAL -which are defined in linux/input.h. +which are defined in uapi/linux/input.h. HIDIOCGRAWNAME(len): Get Raw Name This ioctl returns a string containing the vendor and product strings of diff --git a/Documentation/hwmon/aspeed-pwm-tacho b/Documentation/hwmon/aspeed-pwm-tacho new file mode 100644 index 0000000000000000000000000000000000000000..7cfb3497746069c4a90cba474e9aca4d27c5ebba --- /dev/null +++ b/Documentation/hwmon/aspeed-pwm-tacho @@ -0,0 +1,22 @@ +Kernel driver aspeed-pwm-tacho +============================== + +Supported chips: + ASPEED AST2400/2500 + +Authors: + + +Description: +------------ +This driver implements support for ASPEED AST2400/2500 PWM and Fan Tacho +controller. The PWM controller supports upto 8 PWM outputs. The Fan tacho +controller supports up to 16 tachometer inputs. + +The driver provides the following sensor accesses in sysfs: + +fanX_input ro provide current fan rotation value in RPM as reported + by the fan to the device. + +pwmX rw get or set PWM fan control value. This is an integer + value between 0(off) and 255(full speed). diff --git a/Documentation/hwmon/tc654 b/Documentation/hwmon/tc654 index 91a2843f5f98811347e434519e4771502dd95c16..47636a8077b4cbf5bb80dda17a3bbfc6bfae722b 100644 --- a/Documentation/hwmon/tc654 +++ b/Documentation/hwmon/tc654 @@ -2,7 +2,7 @@ Kernel driver tc654 =================== Supported chips: - * Microship TC654 and TC655 + * Microchip TC654 and TC655 Prefix: 'tc654' Datasheet: http://ww1.microchip.com/downloads/en/DeviceDoc/20001734C.pdf diff --git a/Documentation/index.rst b/Documentation/index.rst index f6e641a54bbcab5c6dd8e4d8f724c580deaff19e..bc67dbf76eb041c5dbd66813d873ce74420a0da5 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -24,6 +24,18 @@ trying to get it to work optimally on a given system. admin-guide/index +Application-developer documentation +----------------------------------- + +The user-space API manual gathers together documents describing aspects of +the kernel interface as seen by application developers. + +.. toctree:: + :maxdepth: 2 + + userspace-api/index + + Introduction to kernel development ---------------------------------- @@ -55,6 +67,7 @@ needed). driver-api/index core-api/index media/index + input/index gpu/index security/index sound/index @@ -76,6 +89,14 @@ Chinese translations translations/zh_CN/index +Japanese translations +--------------------- + +.. toctree:: + :maxdepth: 1 + + translations/ja_JP/index + Indices and tables ================== diff --git a/Documentation/infiniband/opa_vnic.txt b/Documentation/infiniband/opa_vnic.txt new file mode 100644 index 0000000000000000000000000000000000000000..282e17be798a9cb4985b37f01e65252af0dc0362 --- /dev/null +++ b/Documentation/infiniband/opa_vnic.txt @@ -0,0 +1,153 @@ +Intel Omni-Path (OPA) Virtual Network Interface Controller (VNIC) feature +supports Ethernet functionality over Omni-Path fabric by encapsulating +the Ethernet packets between HFI nodes. + +Architecture +============= +The patterns of exchanges of Omni-Path encapsulated Ethernet packets +involves one or more virtual Ethernet switches overlaid on the Omni-Path +fabric topology. A subset of HFI nodes on the Omni-Path fabric are +permitted to exchange encapsulated Ethernet packets across a particular +virtual Ethernet switch. The virtual Ethernet switches are logical +abstractions achieved by configuring the HFI nodes on the fabric for +header generation and processing. In the simplest configuration all HFI +nodes across the fabric exchange encapsulated Ethernet packets over a +single virtual Ethernet switch. A virtual Ethernet switch, is effectively +an independent Ethernet network. The configuration is performed by an +Ethernet Manager (EM) which is part of the trusted Fabric Manager (FM) +application. HFI nodes can have multiple VNICs each connected to a +different virtual Ethernet switch. The below diagram presents a case +of two virtual Ethernet switches with two HFI nodes. + + +-------------------+ + | Subnet/ | + | Ethernet | + | Manager | + +-------------------+ + / / + / / + / / + / / ++-----------------------------+ +------------------------------+ +| Virtual Ethernet Switch | | Virtual Ethernet Switch | +| +---------+ +---------+ | | +---------+ +---------+ | +| | VPORT | | VPORT | | | | VPORT | | VPORT | | ++--+---------+----+---------+-+ +-+---------+----+---------+---+ + | \ / | + | \ / | + | \/ | + | / \ | + | / \ | + +-----------+------------+ +-----------+------------+ + | VNIC | VNIC | | VNIC | VNIC | + +-----------+------------+ +-----------+------------+ + | HFI | | HFI | + +------------------------+ +------------------------+ + + +The Omni-Path encapsulated Ethernet packet format is as described below. + +Bits Field +------------------------------------ +Quad Word 0: +0-19 SLID (lower 20 bits) +20-30 Length (in Quad Words) +31 BECN bit +32-51 DLID (lower 20 bits) +52-56 SC (Service Class) +57-59 RC (Routing Control) +60 FECN bit +61-62 L2 (=10, 16B format) +63 LT (=1, Link Transfer Head Flit) + +Quad Word 1: +0-7 L4 type (=0x78 ETHERNET) +8-11 SLID[23:20] +12-15 DLID[23:20] +16-31 PKEY +32-47 Entropy +48-63 Reserved + +Quad Word 2: +0-15 Reserved +16-31 L4 header +32-63 Ethernet Packet + +Quad Words 3 to N-1: +0-63 Ethernet packet (pad extended) + +Quad Word N (last): +0-23 Ethernet packet (pad extended) +24-55 ICRC +56-61 Tail +62-63 LT (=01, Link Transfer Tail Flit) + +Ethernet packet is padded on the transmit side to ensure that the VNIC OPA +packet is quad word aligned. The 'Tail' field contains the number of bytes +padded. On the receive side the 'Tail' field is read and the padding is +removed (along with ICRC, Tail and OPA header) before passing packet up +the network stack. + +The L4 header field contains the virtual Ethernet switch id the VNIC port +belongs to. On the receive side, this field is used to de-multiplex the +received VNIC packets to different VNIC ports. + +Driver Design +============== +Intel OPA VNIC software design is presented in the below diagram. +OPA VNIC functionality has a HW dependent component and a HW +independent component. + +The support has been added for IB device to allocate and free the RDMA +netdev devices. The RDMA netdev supports interfacing with the network +stack thus creating standard network interfaces. OPA_VNIC is an RDMA +netdev device type. + +The HW dependent VNIC functionality is part of the HFI1 driver. It +implements the verbs to allocate and free the OPA_VNIC RDMA netdev. +It involves HW resource allocation/management for VNIC functionality. +It interfaces with the network stack and implements the required +net_device_ops functions. It expects Omni-Path encapsulated Ethernet +packets in the transmit path and provides HW access to them. It strips +the Omni-Path header from the received packets before passing them up +the network stack. It also implements the RDMA netdev control operations. + +The OPA VNIC module implements the HW independent VNIC functionality. +It consists of two parts. The VNIC Ethernet Management Agent (VEMA) +registers itself with IB core as an IB client and interfaces with the +IB MAD stack. It exchanges the management information with the Ethernet +Manager (EM) and the VNIC netdev. The VNIC netdev part allocates and frees +the OPA_VNIC RDMA netdev devices. It overrides the net_device_ops functions +set by HW dependent VNIC driver where required to accommodate any control +operation. It also handles the encapsulation of Ethernet packets with an +Omni-Path header in the transmit path. For each VNIC interface, the +information required for encapsulation is configured by the EM via VEMA MAD +interface. It also passes any control information to the HW dependent driver +by invoking the RDMA netdev control operations. + + +-------------------+ +----------------------+ + | | | Linux | + | IB MAD | | Network | + | | | Stack | + +-------------------+ +----------------------+ + | | | + | | | + +----------------------------+ | + | | | + | OPA VNIC Module | | + | (OPA VNIC RDMA Netdev | | + | & EMA functions) | | + | | | + +----------------------------+ | + | | + | | + +------------------+ | + | IB core | | + +------------------+ | + | | + | | + +--------------------------------------------+ + | | + | HFI1 Driver with VNIC support | + | | + +--------------------------------------------+ diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt deleted file mode 100644 index 8d1341ccde6498bebc534bcd7c1ef4560560a96f..0000000000000000000000000000000000000000 --- a/Documentation/input/alps.txt +++ /dev/null @@ -1,378 +0,0 @@ -ALPS Touchpad Protocol ----------------------- - -Introduction ------------- -Currently the ALPS touchpad driver supports seven protocol versions in use by -ALPS touchpads, called versions 1, 2, 3, 4, 5, 6 and 7. - -Since roughly mid-2010 several new ALPS touchpads have been released and -integrated into a variety of laptops and netbooks. These new touchpads -have enough behavior differences that the alps_model_data definition -table, describing the properties of the different versions, is no longer -adequate. The design choices were to re-define the alps_model_data -table, with the risk of regression testing existing devices, or isolate -the new devices outside of the alps_model_data table. The latter design -choice was made. The new touchpad signatures are named: "Rushmore", -"Pinnacle", and "Dolphin", which you will see in the alps.c code. -For the purposes of this document, this group of ALPS touchpads will -generically be called "new ALPS touchpads". - -We experimented with probing the ACPI interface _HID (Hardware ID)/_CID -(Compatibility ID) definition as a way to uniquely identify the -different ALPS variants but there did not appear to be a 1:1 mapping. -In fact, it appeared to be an m:n mapping between the _HID and actual -hardware type. - -Detection ---------- - -All ALPS touchpads should respond to the "E6 report" command sequence: -E8-E6-E6-E6-E9. An ALPS touchpad should respond with either 00-00-0A or -00-00-64 if no buttons are pressed. The bits 0-2 of the first byte will be 1s -if some buttons are pressed. - -If the E6 report is successful, the touchpad model is identified using the "E7 -report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is -matched against known models in the alps_model_data_array. - -For older touchpads supporting protocol versions 3 and 4, the E7 report -model signature is always 73-02-64. To differentiate between these -versions, the response from the "Enter Command Mode" sequence must be -inspected as described below. - -The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but -seem to be better differentiated by the EC Command Mode response. - -Command Mode ------------- - -Protocol versions 3 and 4 have a command mode that is used to read and write -one-byte device registers in a 16-bit address space. The command sequence -EC-EC-EC-E9 places the device in command mode, and the device will respond -with 88-07 followed by a third byte. This third byte can be used to determine -whether the devices uses the version 3 or 4 protocol. - -To exit command mode, PSMOUSE_CMD_SETSTREAM (EA) is sent to the touchpad. - -While in command mode, register addresses can be set by first sending a -specific command, either EC for v3 devices or F5 for v4 devices. Then the -address is sent one nibble at a time, where each nibble is encoded as a -command with optional data. This encoding differs slightly between the v3 and -v4 protocols. - -Once an address has been set, the addressed register can be read by sending -PSMOUSE_CMD_GETINFO (E9). The first two bytes of the response contains the -address of the register being read, and the third contains the value of the -register. Registers are written by writing the value one nibble at a time -using the same encoding used for addresses. - -For the new ALPS touchpads, the EC command is used to enter command -mode. The response in the new ALPS touchpads is significantly different, -and more important in determining the behavior. This code has been -separated from the original alps_model_data table and put in the -alps_identify function. For example, there seem to be two hardware init -sequences for the "Dolphin" touchpads as determined by the second byte -of the EC response. - -Packet Format -------------- - -In the following tables, the following notation is used. - - CAPITALS = stick, miniscules = touchpad - -?'s can have different meanings on different models, such as wheel rotation, -extra buttons, stick buttons on a dualpoint, etc. - -PS/2 packet format ------------------- - - byte 0: 0 0 YSGN XSGN 1 M R L - byte 1: X7 X6 X5 X4 X3 X2 X1 X0 - byte 2: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 - -Note that the device never signals overflow condition. - -For protocol version 2 devices when the trackpoint is used, and no fingers -are on the touchpad, the M R L bits signal the combined status of both the -pointingstick and touchpad buttons. - -ALPS Absolute Mode - Protocol Version 1 --------------------------------------- - - byte 0: 1 0 0 0 1 x9 x8 x7 - byte 1: 0 x6 x5 x4 x3 x2 x1 x0 - byte 2: 0 ? ? l r ? fin ges - byte 3: 0 ? ? ? ? y9 y8 y7 - byte 4: 0 y6 y5 y4 y3 y2 y1 y0 - byte 5: 0 z6 z5 z4 z3 z2 z1 z0 - -ALPS Absolute Mode - Protocol Version 2 ---------------------------------------- - - byte 0: 1 ? ? ? 1 PSM PSR PSL - byte 1: 0 x6 x5 x4 x3 x2 x1 x0 - byte 2: 0 x10 x9 x8 x7 ? fin ges - byte 3: 0 y9 y8 y7 1 M R L - byte 4: 0 y6 y5 y4 y3 y2 y1 y0 - byte 5: 0 z6 z5 z4 z3 z2 z1 z0 - -Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for -the DualPoint Stick. The M, R and L bits signal the combined status of both -the pointingstick and touchpad buttons, except for Dell dualpoint devices -where the pointingstick buttons get reported separately in the PSM, PSR -and PSL bits. - -Dualpoint device -- interleaved packet format ---------------------------------------------- - - byte 0: 1 1 0 0 1 1 1 1 - byte 1: 0 x6 x5 x4 x3 x2 x1 x0 - byte 2: 0 x10 x9 x8 x7 0 fin ges - byte 3: 0 0 YSGN XSGN 1 1 1 1 - byte 4: X7 X6 X5 X4 X3 X2 X1 X0 - byte 5: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 - byte 6: 0 y9 y8 y7 1 m r l - byte 7: 0 y6 y5 y4 y3 y2 y1 y0 - byte 8: 0 z6 z5 z4 z3 z2 z1 z0 - -Devices which use the interleaving format normally send standard PS/2 mouse -packets for the DualPoint Stick + ALPS Absolute Mode packets for the -touchpad, switching to the interleaved packet format when both the stick and -the touchpad are used at the same time. - -ALPS Absolute Mode - Protocol Version 3 ---------------------------------------- - -ALPS protocol version 3 has three different packet formats. The first two are -associated with touchpad events, and the third is associated with trackstick -events. - -The first type is the touchpad position packet. - - byte 0: 1 ? x1 x0 1 1 1 1 - byte 1: 0 x10 x9 x8 x7 x6 x5 x4 - byte 2: 0 y10 y9 y8 y7 y6 y5 y4 - byte 3: 0 M R L 1 m r l - byte 4: 0 mt x3 x2 y3 y2 y1 y0 - byte 5: 0 z6 z5 z4 z3 z2 z1 z0 - -Note that for some devices the trackstick buttons are reported in this packet, -and on others it is reported in the trackstick packets. - -The second packet type contains bitmaps representing the x and y axes. In the -bitmaps a given bit is set if there is a finger covering that position on the -given axis. Thus the bitmap packet can be used for low-resolution multi-touch -data, although finger tracking is not possible. This packet also encodes the -number of contacts (f1 and f0 in the table below). - - byte 0: 1 1 x1 x0 1 1 1 1 - byte 1: 0 x8 x7 x6 x5 x4 x3 x2 - byte 2: 0 y7 y6 y5 y4 y3 y2 y1 - byte 3: 0 y10 y9 y8 1 1 1 1 - byte 4: 0 x14 x13 x12 x11 x10 x9 y0 - byte 5: 0 1 ? ? ? ? f1 f0 - -This packet only appears after a position packet with the mt bit set, and -usually only appears when there are two or more contacts (although -occasionally it's seen with only a single contact). - -The final v3 packet type is the trackstick packet. - - byte 0: 1 1 x7 y7 1 1 1 1 - byte 1: 0 x6 x5 x4 x3 x2 x1 x0 - byte 2: 0 y6 y5 y4 y3 y2 y1 y0 - byte 3: 0 1 0 0 1 0 0 0 - byte 4: 0 z4 z3 z2 z1 z0 ? ? - byte 5: 0 0 1 1 1 1 1 1 - -ALPS Absolute Mode - Protocol Version 4 ---------------------------------------- - -Protocol version 4 has an 8-byte packet format. - - byte 0: 1 ? x1 x0 1 1 1 1 - byte 1: 0 x10 x9 x8 x7 x6 x5 x4 - byte 2: 0 y10 y9 y8 y7 y6 y5 y4 - byte 3: 0 1 x3 x2 y3 y2 y1 y0 - byte 4: 0 ? ? ? 1 ? r l - byte 5: 0 z6 z5 z4 z3 z2 z1 z0 - byte 6: bitmap data (described below) - byte 7: bitmap data (described below) - -The last two bytes represent a partial bitmap packet, with 3 full packets -required to construct a complete bitmap packet. Once assembled, the 6-byte -bitmap packet has the following format: - - byte 0: 0 1 x7 x6 x5 x4 x3 x2 - byte 1: 0 x1 x0 y4 y3 y2 y1 y0 - byte 2: 0 0 ? x14 x13 x12 x11 x10 - byte 3: 0 x9 x8 y9 y8 y7 y6 y5 - byte 4: 0 0 0 0 0 0 0 0 - byte 5: 0 0 0 0 0 0 0 y10 - -There are several things worth noting here. - - 1) In the bitmap data, bit 6 of byte 0 serves as a sync byte to - identify the first fragment of a bitmap packet. - - 2) The bitmaps represent the same data as in the v3 bitmap packets, although - the packet layout is different. - - 3) There doesn't seem to be a count of the contact points anywhere in the v4 - protocol packets. Deriving a count of contact points must be done by - analyzing the bitmaps. - - 4) There is a 3 to 1 ratio of position packets to bitmap packets. Therefore - MT position can only be updated for every third ST position update, and - the count of contact points can only be updated every third packet as - well. - -So far no v4 devices with tracksticks have been encountered. - -ALPS Absolute Mode - Protocol Version 5 ---------------------------------------- -This is basically Protocol Version 3 but with different logic for packet -decode. It uses the same alps_process_touchpad_packet_v3 call with a -specialized decode_fields function pointer to correctly interpret the -packets. This appears to only be used by the Dolphin devices. - -For single-touch, the 6-byte packet format is: - - byte 0: 1 1 0 0 1 0 0 0 - byte 1: 0 x6 x5 x4 x3 x2 x1 x0 - byte 2: 0 y6 y5 y4 y3 y2 y1 y0 - byte 3: 0 M R L 1 m r l - byte 4: y10 y9 y8 y7 x10 x9 x8 x7 - byte 5: 0 z6 z5 z4 z3 z2 z1 z0 - -For mt, the format is: - - byte 0: 1 1 1 n3 1 n2 n1 x24 - byte 1: 1 y7 y6 y5 y4 y3 y2 y1 - byte 2: ? x2 x1 y12 y11 y10 y9 y8 - byte 3: 0 x23 x22 x21 x20 x19 x18 x17 - byte 4: 0 x9 x8 x7 x6 x5 x4 x3 - byte 5: 0 x16 x15 x14 x13 x12 x11 x10 - -ALPS Absolute Mode - Protocol Version 6 ---------------------------------------- - -For trackstick packet, the format is: - - byte 0: 1 1 1 1 1 1 1 1 - byte 1: 0 X6 X5 X4 X3 X2 X1 X0 - byte 2: 0 Y6 Y5 Y4 Y3 Y2 Y1 Y0 - byte 3: ? Y7 X7 ? ? M R L - byte 4: Z7 Z6 Z5 Z4 Z3 Z2 Z1 Z0 - byte 5: 0 1 1 1 1 1 1 1 - -For touchpad packet, the format is: - - byte 0: 1 1 1 1 1 1 1 1 - byte 1: 0 0 0 0 x3 x2 x1 x0 - byte 2: 0 0 0 0 y3 y2 y1 y0 - byte 3: ? x7 x6 x5 x4 ? r l - byte 4: ? y7 y6 y5 y4 ? ? ? - byte 5: z7 z6 z5 z4 z3 z2 z1 z0 - -(v6 touchpad does not have middle button) - -ALPS Absolute Mode - Protocol Version 7 ---------------------------------------- - -For trackstick packet, the format is: - - byte 0: 0 1 0 0 1 0 0 0 - byte 1: 1 1 * * 1 M R L - byte 2: X7 1 X5 X4 X3 X2 X1 X0 - byte 3: Z6 1 Y6 X6 1 Y2 Y1 Y0 - byte 4: Y7 0 Y5 Y4 Y3 1 1 0 - byte 5: T&P 0 Z5 Z4 Z3 Z2 Z1 Z0 - -For touchpad packet, the format is: - - packet-fmt b7 b6 b5 b4 b3 b2 b1 b0 - byte 0: TWO & MULTI L 1 R M 1 Y0-2 Y0-1 Y0-0 - byte 0: NEW L 1 X1-5 1 1 Y0-2 Y0-1 Y0-0 - byte 1: Y0-10 Y0-9 Y0-8 Y0-7 Y0-6 Y0-5 Y0-4 Y0-3 - byte 2: X0-11 1 X0-10 X0-9 X0-8 X0-7 X0-6 X0-5 - byte 3: X1-11 1 X0-4 X0-3 1 X0-2 X0-1 X0-0 - byte 4: TWO X1-10 TWO X1-9 X1-8 X1-7 X1-6 X1-5 X1-4 - byte 4: MULTI X1-10 TWO X1-9 X1-8 X1-7 X1-6 Y1-5 1 - byte 4: NEW X1-10 TWO X1-9 X1-8 X1-7 X1-6 0 0 - byte 5: TWO & NEW Y1-10 0 Y1-9 Y1-8 Y1-7 Y1-6 Y1-5 Y1-4 - byte 5: MULTI Y1-10 0 Y1-9 Y1-8 Y1-7 Y1-6 F-1 F-0 - - L: Left button - R / M: Non-clickpads: Right / Middle button - Clickpads: When > 2 fingers are down, and some fingers - are in the button area, then the 2 coordinates reported - are for fingers outside the button area and these report - extra fingers being present in the right / left button - area. Note these fingers are not added to the F field! - so if a TWO packet is received and R = 1 then there are - 3 fingers down, etc. - TWO: 1: Two touches present, byte 0/4/5 are in TWO fmt - 0: If byte 4 bit 0 is 1, then byte 0/4/5 are in MULTI fmt - otherwise byte 0 bit 4 must be set and byte 0/4/5 are - in NEW fmt - F: Number of fingers - 3, 0 means 3 fingers, 1 means 4 ... - - -ALPS Absolute Mode - Protocol Version 8 ---------------------------------------- - -Spoken by SS4 (73 03 14) and SS5 (73 03 28) hardware. - -The packet type is given by the APD field, bits 4-5 of byte 3. - -Touchpad packet (APD = 0x2): - - b7 b6 b5 b4 b3 b2 b1 b0 - byte 0: SWM SWR SWL 1 1 0 0 X7 - byte 1: 0 X6 X5 X4 X3 X2 X1 X0 - byte 2: 0 Y6 Y5 Y4 Y3 Y2 Y1 Y0 - byte 3: 0 T&P 1 0 1 0 0 Y7 - byte 4: 0 Z6 Z5 Z4 Z3 Z2 Z1 Z0 - byte 5: 0 0 0 0 0 0 0 0 - -SWM, SWR, SWL: Middle, Right, and Left button states - -Touchpad 1 Finger packet (APD = 0x0): - - b7 b6 b5 b4 b3 b2 b1 b0 - byte 0: SWM SWR SWL 1 1 X2 X1 X0 - byte 1: X9 X8 X7 1 X6 X5 X4 X3 - byte 2: 0 X11 X10 LFB Y3 Y2 Y1 Y0 - byte 3: Y5 Y4 0 0 1 TAPF2 TAPF1 TAPF0 - byte 4: Zv7 Y11 Y10 1 Y9 Y8 Y7 Y6 - byte 5: Zv6 Zv5 Zv4 0 Zv3 Zv2 Zv1 Zv0 - -TAPF: ??? -LFB: ??? - -Touchpad 2 Finger packet (APD = 0x1): - - b7 b6 b5 b4 b3 b2 b1 b0 - byte 0: SWM SWR SWL 1 1 AX6 AX5 AX4 - byte 1: AX11 AX10 AX9 AX8 AX7 AZ1 AY4 AZ0 - byte 2: AY11 AY10 AY9 CONT AY8 AY7 AY6 AY5 - byte 3: 0 0 0 1 1 BX6 BX5 BX4 - byte 4: BX11 BX10 BX9 BX8 BX7 BZ1 BY4 BZ0 - byte 5: BY11 BY10 BY9 0 BY8 BY7 BY5 BY5 - -CONT: A 3-or-4 Finger packet is to follow - -Touchpad 3-or-4 Finger packet (APD = 0x3): - - b7 b6 b5 b4 b3 b2 b1 b0 - byte 0: SWM SWR SWL 1 1 AX6 AX5 AX4 - byte 1: AX11 AX10 AX9 AX8 AX7 AZ1 AY4 AZ0 - byte 2: AY11 AY10 AY9 OVF AY8 AY7 AY6 AY5 - byte 3: 0 0 1 1 1 BX6 BX5 BX4 - byte 4: BX11 BX10 BX9 BX8 BX7 BZ1 BY4 BZ0 - byte 5: BY11 BY10 BY9 0 BY8 BY7 BY5 BY5 - -OVF: 5th finger detected diff --git a/Documentation/input/amijoy.txt b/Documentation/input/amijoy.txt deleted file mode 100644 index 7dc4f175943cff2854dd1d54ff93c03be98b2b56..0000000000000000000000000000000000000000 --- a/Documentation/input/amijoy.txt +++ /dev/null @@ -1,184 +0,0 @@ -Amiga 4-joystick parport extension -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Parallel port pins: - - (2) - Up1 (6) - Up2 - (3) - Down1 (7) - Down2 - (4) - Left1 (8) - Left2 - (5) - Right1 (9) - Right2 -(13) - Fire1 (11) - Fire2 -(18) - Gnd1 (18) - Gnd2 - -Amiga digital joystick pinout -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -(1) - Up -(2) - Down -(3) - Left -(4) - Right -(5) - n/c -(6) - Fire button -(7) - +5V (50mA) -(8) - Gnd -(9) - Thumb button - -Amiga mouse pinout -~~~~~~~~~~~~~~~~~~ -(1) - V-pulse -(2) - H-pulse -(3) - VQ-pulse -(4) - HQ-pulse -(5) - Middle button -(6) - Left button -(7) - +5V (50mA) -(8) - Gnd -(9) - Right button - -Amiga analog joystick pinout -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -(1) - Top button -(2) - Top2 button -(3) - Trigger button -(4) - Thumb button -(5) - Analog X -(6) - n/c -(7) - +5V (50mA) -(8) - Gnd -(9) - Analog Y - -Amiga lightpen pinout -~~~~~~~~~~~~~~~~~~~~~ -(1) - n/c -(2) - n/c -(3) - n/c -(4) - n/c -(5) - Touch button -(6) - /Beamtrigger -(7) - +5V (50mA) -(8) - Gnd -(9) - Stylus button - -------------------------------------------------------------------------------- - -NAME rev ADDR type chip Description -JOY0DAT 00A R Denise Joystick-mouse 0 data (left vert, horiz) -JOY1DAT 00C R Denise Joystick-mouse 1 data (right vert,horiz) - - These addresses each read a 16 bit register. These in turn - are loaded from the MDAT serial stream and are clocked in on - the rising edge of SCLK. MLD output is used to parallel load - the external parallel-to-serial converter.This in turn is - loaded with the 4 quadrature inputs from each of two game - controller ports (8 total) plus 8 miscellaneous control bits - which are new for LISA and can be read in upper 8 bits of - LISAID. - Register bits are as follows: - Mouse counter usage (pins 1,3 =Yclock, pins 2,4 =Xclock) - - BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -JOY0DAT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 -JOY1DAT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 - - 0=LEFT CONTROLLER PAIR, 1=RIGHT CONTROLLER PAIR. - (4 counters total). The bit usage for both left and right - addresses is shown below. Each 6 bit counter (Y7-Y2,X7-X2) is - clocked by 2 of the signals input from the mouse serial - stream. Starting with first bit received: - - +-------------------+-----------------------------------------+ - | Serial | Bit Name | Description | - +--------+----------+-----------------------------------------+ - | 0 | M0H | JOY0DAT Horizontal Clock | - | 1 | M0HQ | JOY0DAT Horizontal Clock (quadrature) | - | 2 | M0V | JOY0DAT Vertical Clock | - | 3 | M0VQ | JOY0DAT Vertical Clock (quadrature) | - | 4 | M1V | JOY1DAT Horizontal Clock | - | 5 | M1VQ | JOY1DAT Horizontal Clock (quadrature) | - | 6 | M1V | JOY1DAT Vertical Clock | - | 7 | M1VQ | JOY1DAT Vertical Clock (quadrature) | - +--------+----------+-----------------------------------------+ - - Bits 1 and 0 of each counter (Y1-Y0,X1-X0) may be - read to determine the state of the related input signal pair. - This allows these pins to double as joystick switch inputs. - Joystick switch closures can be deciphered as follows: - - +------------+------+---------------------------------+ - | Directions | Pin# | Counter bits | - +------------+------+---------------------------------+ - | Forward | 1 | Y1 xor Y0 (BIT#09 xor BIT#08) | - | Left | 3 | Y1 | - | Back | 2 | X1 xor X0 (BIT#01 xor BIT#00) | - | Right | 4 | X1 | - +------------+------+---------------------------------+ - -------------------------------------------------------------------------------- - -NAME rev ADDR type chip Description -JOYTEST 036 W Denise Write to all 4 joystick-mouse counters at once. - - Mouse counter write test data: - BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 - JOYxDAT Y7 Y6 Y5 Y4 Y3 Y2 xx xx X7 X6 X5 X4 X3 X2 xx xx - JOYxDAT Y7 Y6 Y5 Y4 Y3 Y2 xx xx X7 X6 X5 X4 X3 X2 xx xx - -------------------------------------------------------------------------------- - -NAME rev ADDR type chip Description -POT0DAT h 012 R Paula Pot counter data left pair (vert, horiz) -POT1DAT h 014 R Paula Pot counter data right pair (vert,horiz) - - These addresses each read a pair of 8 bit pot counters. - (4 counters total). The bit assignment for both - addresses is shown below. The counters are stopped by signals - from 2 controller connectors (left-right) with 2 pins each. - - BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 - RIGHT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 - LEFT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 - - +--------------------------+-------+ - | CONNECTORS | PAULA | - +-------+------+-----+-----+-------+ - | Loc. | Dir. | Sym | pin | pin | - +-------+------+-----+-----+-------+ - | RIGHT | Y | RX | 9 | 33 | - | RIGHT | X | RX | 5 | 32 | - | LEFT | Y | LY | 9 | 36 | - | LEFT | X | LX | 5 | 35 | - +-------+------+-----+-----+-------+ - - With normal (NTSC or PAL) horiz. line rate, the pots will - give a full scale (FF) reading with about 500kohms in one - frame time. With proportionally faster horiz line times, - the counters will count proportionally faster. - This should be noted when doing variable beam displays. - -------------------------------------------------------------------------------- - -NAME rev ADDR type chip Description -POTGO 034 W Paula Pot port (4 bit) bi-direction and data, and pot counter start. - -------------------------------------------------------------------------------- - -NAME rev ADDR type chip Description -POTINP 016 R Paula Pot pin data read - - This register controls a 4 bit bi-direction I/O port - that shares the same 4 pins as the 4 pot counters above. - - +-------+----------+---------------------------------------------+ - | BIT# | FUNCTION | DESCRIPTION | - +-------+----------+---------------------------------------------+ - | 15 | OUTRY | Output enable for Paula pin 33 | - | 14 | DATRY | I/O data Paula pin 33 | - | 13 | OUTRX | Output enable for Paula pin 32 | - | 12 | DATRX | I/O data Paula pin 32 | - | 11 | OUTLY | Out put enable for Paula pin 36 | - | 10 | DATLY | I/O data Paula pin 36 | - | 09 | OUTLX | Output enable for Paula pin 35 | - | 08 | DATLX | I/O data Paula pin 35 | - | 07-01 | X | Not used | - | 00 | START | Start pots (dump capacitors,start counters) | - +-------+----------+---------------------------------------------+ - -------------------------------------------------------------------------------- diff --git a/Documentation/input/appletouch.txt b/Documentation/input/appletouch.txt deleted file mode 100644 index b13de3f89108df1e7b9cd585c97aa81b458261d9..0000000000000000000000000000000000000000 --- a/Documentation/input/appletouch.txt +++ /dev/null @@ -1,85 +0,0 @@ -Apple Touchpad Driver (appletouch) ----------------------------------- - Copyright (C) 2005 Stelian Pop - -appletouch is a Linux kernel driver for the USB touchpad found on post -February 2005 and October 2005 Apple Aluminium Powerbooks. - -This driver is derived from Johannes Berg's appletrackpad driver[1], but it has -been improved in some areas: - * appletouch is a full kernel driver, no userspace program is necessary - * appletouch can be interfaced with the synaptics X11 driver, in order - to have touchpad acceleration, scrolling, etc. - -Credits go to Johannes Berg for reverse-engineering the touchpad protocol, -Frank Arnold for further improvements, and Alex Harper for some additional -information about the inner workings of the touchpad sensors. Michael -Hanselmann added support for the October 2005 models. - -Usage: ------- - -In order to use the touchpad in the basic mode, compile the driver and load -the module. A new input device will be detected and you will be able to read -the mouse data from /dev/input/mice (using gpm, or X11). - -In X11, you can configure the touchpad to use the synaptics X11 driver, which -will give additional functionalities, like acceleration, scrolling, 2 finger -tap for middle button mouse emulation, 3 finger tap for right button mouse -emulation, etc. In order to do this, make sure you're using a recent version of -the synaptics driver (tested with 0.14.2, available from [2]), and configure a -new input device in your X11 configuration file (take a look below for an -example). For additional configuration, see the synaptics driver documentation. - - Section "InputDevice" - Identifier "Synaptics Touchpad" - Driver "synaptics" - Option "SendCoreEvents" "true" - Option "Device" "/dev/input/mice" - Option "Protocol" "auto-dev" - Option "LeftEdge" "0" - Option "RightEdge" "850" - Option "TopEdge" "0" - Option "BottomEdge" "645" - Option "MinSpeed" "0.4" - Option "MaxSpeed" "1" - Option "AccelFactor" "0.02" - Option "FingerLow" "0" - Option "FingerHigh" "30" - Option "MaxTapMove" "20" - Option "MaxTapTime" "100" - Option "HorizScrollDelta" "0" - Option "VertScrollDelta" "30" - Option "SHMConfig" "on" - EndSection - - Section "ServerLayout" - ... - InputDevice "Mouse" - InputDevice "Synaptics Touchpad" - ... - EndSection - -Fuzz problems: --------------- - -The touchpad sensors are very sensitive to heat, and will generate a lot of -noise when the temperature changes. This is especially true when you power-on -the laptop for the first time. - -The appletouch driver tries to handle this noise and auto adapt itself, but it -is not perfect. If finger movements are not recognized anymore, try reloading -the driver. - -You can activate debugging using the 'debug' module parameter. A value of 0 -deactivates any debugging, 1 activates tracing of invalid samples, 2 activates -full tracing (each sample is being traced): - modprobe appletouch debug=1 - or - echo "1" > /sys/module/appletouch/parameters/debug - -Links: ------- - -[1]: http://johannes.sipsolutions.net/PowerBook/touchpad/ -[2]: http://web.archive.org/web/*/http://web.telia.com/~u89404340/touchpad/index.html diff --git a/Documentation/input/atarikbd.txt b/Documentation/input/atarikbd.txt deleted file mode 100644 index f3a3ba8847ba7749c0d881220f50a56c217d8c31..0000000000000000000000000000000000000000 --- a/Documentation/input/atarikbd.txt +++ /dev/null @@ -1,709 +0,0 @@ -Intelligent Keyboard (ikbd) Protocol - - -1. Introduction - -The Atari Corp. Intelligent Keyboard (ikbd) is a general purpose keyboard -controller that is flexible enough that it can be used in a variety of -products without modification. The keyboard, with its microcontroller, -provides a convenient connection point for a mouse and switch-type joysticks. -The ikbd processor also maintains a time-of-day clock with one second -resolution. -The ikbd has been designed to be general enough that it can be used with a -variety of new computer products. Product variations in a number of -keyswitches, mouse resolution, etc. can be accommodated. -The ikbd communicates with the main processor over a high speed bi-directional -serial interface. It can function in a variety of modes to facilitate -different applications of the keyboard, joysticks, or mouse. Limited use of -the controller is possible in applications in which only a unidirectional -communications medium is available by carefully designing the default modes. - -3. Keyboard - -The keyboard always returns key make/break scan codes. The ikbd generates -keyboard scan codes for each key press and release. The key scan make (key -closure) codes start at 1, and are defined in Appendix A. For example, the -ISO key position in the scan code table should exist even if no keyswitch -exists in that position on a particular keyboard. The break code for each key -is obtained by ORing 0x80 with the make code. - -The special codes 0xF6 through 0xFF are reserved for use as follows: - 0xF6 status report - 0xF7 absolute mouse position record - 0xF8-0xFB relative mouse position records (lsbs determined by - mouse button states) - 0xFC time-of-day - 0xFD joystick report (both sticks) - 0xFE joystick 0 event - 0xFF joystick 1 event - -The two shift keys return different scan codes in this mode. The ENTER key -and the RETurn key are also distinct. - -4. Mouse - -The mouse port should be capable of supporting a mouse with resolution of -approximately 200 counts (phase changes or 'clicks') per inch of travel. The -mouse should be scanned at a rate that will permit accurate tracking at -velocities up to 10 inches per second. -The ikbd can report mouse motion in three distinctly different ways. It can -report relative motion, absolute motion in a coordinate system maintained -within the ikbd, or by converting mouse motion into keyboard cursor control -key equivalents. -The mouse buttons can be treated as part of the mouse or as additional -keyboard keys. - -4.1 Relative Position Reporting - -In relative position mode, the ikbd will return relative mouse position -records whenever a mouse event occurs. A mouse event consists of a mouse -button being pressed or released, or motion in either axis exceeding a -settable threshold of motion. Regardless of the threshold, all bits of -resolution are returned to the host computer. -Note that the ikbd may return mouse relative position reports with -significantly more than the threshold delta x or y. This may happen since no -relative mouse motion events will be generated: (a) while the keyboard has -been 'paused' ( the event will be stored until keyboard communications is -resumed) (b) while any event is being transmitted. - -The relative mouse position record is a three byte record of the form -(regardless of keyboard mode): - %111110xy ; mouse position record flag - ; where y is the right button state - ; and x is the left button state - X ; delta x as twos complement integer - Y ; delta y as twos complement integer - -Note that the value of the button state bits should be valid even if the -MOUSE BUTTON ACTION has set the buttons to act like part of the keyboard. -If the accumulated motion before the report packet is generated exceeds the -+127...-128 range, the motion is broken into multiple packets. -Note that the sign of the delta y reported is a function of the Y origin -selected. - -4.2 Absolute Position reporting - -The ikbd can also maintain absolute mouse position. Commands exist for -resetting the mouse position, setting X/Y scaling, and interrogating the -current mouse position. - -4.3 Mouse Cursor Key Mode - -The ikbd can translate mouse motion into the equivalent cursor keystrokes. -The number of mouse clicks per keystroke is independently programmable in -each axis. The ikbd internally maintains mouse motion information to the -highest resolution available, and merely generates a pair of cursor key events -for each multiple of the scale factor. -Mouse motion produces the cursor key make code immediately followed by the -break code for the appropriate cursor key. The mouse buttons produce scan -codes above those normally assigned for the largest envisioned keyboard (i.e. -LEFT=0x74 & RIGHT=0x75). - -5. Joystick - -5.1 Joystick Event Reporting - -In this mode, the ikbd generates a record whenever the joystick position is -changed (i.e. for each opening or closing of a joystick switch or trigger). - -The joystick event record is two bytes of the form: - %1111111x ; Joystick event marker - ; where x is Joystick 0 or 1 - %x000yyyy ; where yyyy is the stick position - ; and x is the trigger - -5.2 Joystick Interrogation - -The current state of the joystick ports may be interrogated at any time in -this mode by sending an 'Interrogate Joystick' command to the ikbd. - -The ikbd response to joystick interrogation is a three byte report of the form - 0xFD ; joystick report header - %x000yyyy ; Joystick 0 - %x000yyyy ; Joystick 1 - ; where x is the trigger - ; and yyy is the stick position - -5.3 Joystick Monitoring - -A mode is available that devotes nearly all of the keyboard communications -time to reporting the state of the joystick ports at a user specifiable rate. -It remains in this mode until reset or commanded into another mode. The PAUSE -command in this mode not only stop the output but also temporarily stops -scanning the joysticks (samples are not queued). - -5.4 Fire Button Monitoring - -A mode is provided to permit monitoring a single input bit at a high rate. In -this mode the ikbd monitors the state of the Joystick 1 fire button at the -maximum rate permitted by the serial communication channel. The data is packed -8 bits per byte for transmission to the host. The ikbd remains in this mode -until reset or commanded into another mode. The PAUSE command in this mode not -only stops the output but also temporarily stops scanning the button (samples -are not queued). - -5.5 Joystick Key Code Mode - -The ikbd may be commanded to translate the use of either joystick into the -equivalent cursor control keystroke(s). The ikbd provides a single breakpoint -velocity joystick cursor. -Joystick events produce the make code, immediately followed by the break code -for the appropriate cursor motion keys. The trigger or fire buttons of the -joysticks produce pseudo key scan codes above those used by the largest key -matrix envisioned (i.e. JOYSTICK0=0x74, JOYSTICK1=0x75). - -6. Time-of-Day Clock - -The ikbd also maintains a time-of-day clock for the system. Commands are -available to set and interrogate the timer-of-day clock. Time-keeping is -maintained down to a resolution of one second. - -7. Status Inquiries - -The current state of ikbd modes and parameters may be found by sending status -inquiry commands that correspond to the ikbd set commands. - -8. Power-Up Mode - -The keyboard controller will perform a simple self-test on power-up to detect -major controller faults (ROM checksum and RAM test) and such things as stuck -keys. Any keys down at power-up are presumed to be stuck, and their BREAK -(sic) code is returned (which without the preceding MAKE code is a flag for a -keyboard error). If the controller self-test completes without error, the code -0xF0 is returned. (This code will be used to indicate the version/release of -the ikbd controller. The first release of the ikbd is version 0xF0, should -there be a second release it will be 0xF1, and so on.) -The ikbd defaults to a mouse position reporting with threshold of 1 unit in -either axis and the Y=0 origin at the top of the screen, and joystick event -reporting mode for joystick 1, with both buttons being logically assigned to -the mouse. After any joystick command, the ikbd assumes that joysticks are -connected to both Joystick0 and Joystick1. Any mouse command (except MOUSE -DISABLE) then causes port 0 to again be scanned as if it were a mouse, and -both buttons are logically connected to it. If a mouse disable command is -received while port 0 is presumed to be a mouse, the button is logically -assigned to Joystick1 (until the mouse is reenabled by another mouse command). - -9. ikbd Command Set - -This section contains a list of commands that can be sent to the ikbd. Command -codes (such as 0x00) which are not specified should perform no operation -(NOPs). - -9.1 RESET - - 0x80 - 0x01 - -N.B. The RESET command is the only two byte command understood by the ikbd. -Any byte following an 0x80 command byte other than 0x01 is ignored (and causes -the 0x80 to be ignored). -A reset may also be caused by sending a break lasting at least 200mS to the -ikbd. -Executing the RESET command returns the keyboard to its default (power-up) -mode and parameter settings. It does not affect the time-of-day clock. -The RESET command or function causes the ikbd to perform a simple self-test. -If the test is successful, the ikbd will send the code of 0xF0 within 300mS -of receipt of the RESET command (or the end of the break, or power-up). The -ikbd will then scan the key matrix for any stuck (closed) keys. Any keys found -closed will cause the break scan code to be generated (the break code arriving -without being preceded by the make code is a flag for a key matrix error). - -9.2. SET MOUSE BUTTON ACTION - - 0x07 - %00000mss ; mouse button action - ; (m is presumed = 1 when in MOUSE KEYCODE mode) - ; mss=0xy, mouse button press or release causes mouse - ; position report - ; where y=1, mouse key press causes absolute report - ; and x=1, mouse key release causes absolute report - ; mss=100, mouse buttons act like keys - -This command sets how the ikbd should treat the buttons on the mouse. The -default mouse button action mode is %00000000, the buttons are treated as part -of the mouse logically. -When buttons act like keys, LEFT=0x74 & RIGHT=0x75. - -9.3 SET RELATIVE MOUSE POSITION REPORTING - - 0x08 - -Set relative mouse position reporting. (DEFAULT) Mouse position packets are -generated asynchronously by the ikbd whenever motion exceeds the setable -threshold in either axis (see SET MOUSE THRESHOLD). Depending upon the mouse -key mode, mouse position reports may also be generated when either mouse -button is pressed or released. Otherwise the mouse buttons behave as if they -were keyboard keys. - -9.4 SET ABSOLUTE MOUSE POSITIONING - - 0x09 - XMSB ; X maximum (in scaled mouse clicks) - XLSB - YMSB ; Y maximum (in scaled mouse clicks) - YLSB - -Set absolute mouse position maintenance. Resets the ikbd maintained X and Y -coordinates. -In this mode, the value of the internally maintained coordinates does NOT wrap -between 0 and large positive numbers. Excess motion below 0 is ignored. The -command sets the maximum positive value that can be attained in the scaled -coordinate system. Motion beyond that value is also ignored. - -9.5 SET MOUSE KEYCODE MOSE - - 0x0A - deltax ; distance in X clicks to return (LEFT) or (RIGHT) - deltay ; distance in Y clicks to return (UP) or (DOWN) - -Set mouse monitoring routines to return cursor motion keycodes instead of -either RELATIVE or ABSOLUTE motion records. The ikbd returns the appropriate -cursor keycode after mouse travel exceeding the user specified deltas in -either axis. When the keyboard is in key scan code mode, mouse motion will -cause the make code immediately followed by the break code. Note that this -command is not affected by the mouse motion origin. - -9..6 SET MOUSE THRESHOLD - - 0x0B - X ; x threshold in mouse ticks (positive integers) - Y ; y threshold in mouse ticks (positive integers) - -This command sets the threshold before a mouse event is generated. Note that -it does NOT affect the resolution of the data returned to the host. This -command is valid only in RELATIVE MOUSE POSITIONING mode. The thresholds -default to 1 at RESET (or power-up). - -9.7 SET MOUSE SCALE - - 0x0C - X ; horizontal mouse ticks per internal X - Y ; vertical mouse ticks per internal Y - -This command sets the scale factor for the ABSOLUTE MOUSE POSITIONING mode. -In this mode, the specified number of mouse phase changes ('clicks') must -occur before the internally maintained coordinate is changed by one -(independently scaled for each axis). Remember that the mouse position -information is available only by interrogating the ikbd in the ABSOLUTE MOUSE -POSITIONING mode unless the ikbd has been commanded to report on button press -or release (see SET MOSE BUTTON ACTION). - -9.8 INTERROGATE MOUSE POSITION - - 0x0D - Returns: - 0xF7 ; absolute mouse position header - BUTTONS - 0000dcba ; where a is right button down since last interrogation - ; b is right button up since last - ; c is left button down since last - ; d is left button up since last - XMSB ; X coordinate - XLSB - YMSB ; Y coordinate - YLSB - -The INTERROGATE MOUSE POSITION command is valid when in the ABSOLUTE MOUSE -POSITIONING mode, regardless of the setting of the MOUSE BUTTON ACTION. - -9.9 LOAD MOUSE POSITION - - 0x0E - 0x00 ; filler - XMSB ; X coordinate - XLSB ; (in scaled coordinate system) - YMSB ; Y coordinate - YLSB - -This command allows the user to preset the internally maintained absolute -mouse position. - -9.10 SET Y=0 AT BOTTOM - - 0x0F - -This command makes the origin of the Y axis to be at the bottom of the -logical coordinate system internal to the ikbd for all relative or absolute -mouse motion. This causes mouse motion toward the user to be negative in sign -and away from the user to be positive. - -9.11 SET Y=0 AT TOP - - 0x10 - -Makes the origin of the Y axis to be at the top of the logical coordinate -system within the ikbd for all relative or absolute mouse motion. (DEFAULT) -This causes mouse motion toward the user to be positive in sign and away from -the user to be negative. - -9.12 RESUME - - 0x11 - -Resume sending data to the host. Since any command received by the ikbd after -its output has been paused also causes an implicit RESUME this command can be -thought of as a NO OPERATION command. If this command is received by the ikbd -and it is not PAUSED, it is simply ignored. - -9.13 DISABLE MOUSE - - 0x12 - -All mouse event reporting is disabled (and scanning may be internally -disabled). Any valid mouse mode command resumes mouse motion monitoring. (The -valid mouse mode commands are SET RELATIVE MOUSE POSITION REPORTING, SET -ABSOLUTE MOUSE POSITIONING, and SET MOUSE KEYCODE MODE. ) -N.B. If the mouse buttons have been commanded to act like keyboard keys, this -command DOES affect their actions. - -9.14 PAUSE OUTPUT - - 0x13 - -Stop sending data to the host until another valid command is received. Key -matrix activity is still monitored and scan codes or ASCII characters enqueued -(up to the maximum supported by the microcontroller) to be sent when the host -allows the output to be resumed. If in the JOYSTICK EVENT REPORTING mode, -joystick events are also queued. -Mouse motion should be accumulated while the output is paused. If the ikbd is -in RELATIVE MOUSE POSITIONING REPORTING mode, motion is accumulated beyond the -normal threshold limits to produce the minimum number of packets necessary for -transmission when output is resumed. Pressing or releasing either mouse button -causes any accumulated motion to be immediately queued as packets, if the -mouse is in RELATIVE MOUSE POSITION REPORTING mode. -Because of the limitations of the microcontroller memory this command should -be used sparingly, and the output should not be shut of for more than -milliseconds at a time. -The output is stopped only at the end of the current 'even'. If the PAUSE -OUTPUT command is received in the middle of a multiple byte report, the packet -will still be transmitted to conclusion and then the PAUSE will take effect. -When the ikbd is in either the JOYSTICK MONITORING mode or the FIRE BUTTON -MONITORING mode, the PAUSE OUTPUT command also temporarily stops the -monitoring process (i.e. the samples are not enqueued for transmission). - -0.15 SET JOYSTICK EVENT REPORTING - - 0x14 - -Enter JOYSTICK EVENT REPORTING mode (DEFAULT). Each opening or closure of a -joystick switch or trigger causes a joystick event record to be generated. - -9.16 SET JOYSTICK INTERROGATION MODE - - 0x15 - -Disables JOYSTICK EVENT REPORTING. Host must send individual JOYSTICK -INTERROGATE commands to sense joystick state. - -9.17 JOYSTICK INTERROGATE - - 0x16 - -Return a record indicating the current state of the joysticks. This command -is valid in either the JOYSTICK EVENT REPORTING mode or the JOYSTICK -INTERROGATION MODE. - -9.18 SET JOYSTICK MONITORING - - 0x17 - rate ; time between samples in hundredths of a second - Returns: (in packets of two as long as in mode) - %000000xy ; where y is JOYSTICK1 Fire button - ; and x is JOYSTICK0 Fire button - %nnnnmmmm ; where m is JOYSTICK1 state - ; and n is JOYSTICK0 state - -Sets the ikbd to do nothing but monitor the serial command line, maintain the -time-of-day clock, and monitor the joystick. The rate sets the interval -between joystick samples. -N.B. The user should not set the rate higher than the serial communications -channel will allow the 2 bytes packets to be transmitted. - -9.19 SET FIRE BUTTON MONITORING - - 0x18 - Returns: (as long as in mode) - %bbbbbbbb ; state of the JOYSTICK1 fire button packed - ; 8 bits per byte, the first sample if the MSB - -Set the ikbd to do nothing but monitor the serial command line, maintain the -time-of-day clock, and monitor the fire button on Joystick 1. The fire button -is scanned at a rate that causes 8 samples to be made in the time it takes for -the previous byte to be sent to the host (i.e. scan rate = 8/10 * baud rate). -The sample interval should be as constant as possible. - -9.20 SET JOYSTICK KEYCODE MODE - - 0x19 - RX ; length of time (in tenths of seconds) until - ; horizontal velocity breakpoint is reached - RY ; length of time (in tenths of seconds) until - ; vertical velocity breakpoint is reached - TX ; length (in tenths of seconds) of joystick closure - ; until horizontal cursor key is generated before RX - ; has elapsed - TY ; length (in tenths of seconds) of joystick closure - ; until vertical cursor key is generated before RY - ; has elapsed - VX ; length (in tenths of seconds) of joystick closure - ; until horizontal cursor keystrokes are generated - ; after RX has elapsed - VY ; length (in tenths of seconds) of joystick closure - ; until vertical cursor keystrokes are generated - ; after RY has elapsed - -In this mode, joystick 0 is scanned in a way that simulates cursor keystrokes. -On initial closure, a keystroke pair (make/break) is generated. Then up to Rn -tenths of seconds later, keystroke pairs are generated every Tn tenths of -seconds. After the Rn breakpoint is reached, keystroke pairs are generated -every Vn tenths of seconds. This provides a velocity (auto-repeat) breakpoint -feature. -Note that by setting RX and/or Ry to zero, the velocity feature can be -disabled. The values of TX and TY then become meaningless, and the generation -of cursor 'keystrokes' is set by VX and VY. - -9.21 DISABLE JOYSTICKS - - 0x1A - -Disable the generation of any joystick events (and scanning may be internally -disabled). Any valid joystick mode command resumes joystick monitoring. (The -joystick mode commands are SET JOYSTICK EVENT REPORTING, SET JOYSTICK -INTERROGATION MODE, SET JOYSTICK MONITORING, SET FIRE BUTTON MONITORING, and -SET JOYSTICK KEYCODE MODE.) - -9.22 TIME-OF-DAY CLOCK SET - - 0x1B - YY ; year (2 least significant digits) - MM ; month - DD ; day - hh ; hour - mm ; minute - ss ; second - -All time-of-day data should be sent to the ikbd in packed BCD format. -Any digit that is not a valid BCD digit should be treated as a 'don't care' -and not alter that particular field of the date or time. This permits setting -only some subfields of the time-of-day clock. - -9.23 INTERROGATE TIME-OF-DAT CLOCK - - 0x1C - Returns: - 0xFC ; time-of-day event header - YY ; year (2 least significant digits) - MM ; month - DD ; day - hh ; hour - mm ; minute - ss ; second - - All time-of-day is sent in packed BCD format. - -9.24 MEMORY LOAD - - 0x20 - ADRMSB ; address in controller - ADRLSB ; memory to be loaded - NUM ; number of bytes (0-128) - { data } - -This command permits the host to load arbitrary values into the ikbd -controller memory. The time between data bytes must be less than 20ms. - -9.25 MEMORY READ - - 0x21 - ADRMSB ; address in controller - ADRLSB ; memory to be read - Returns: - 0xF6 ; status header - 0x20 ; memory access - { data } ; 6 data bytes starting at ADR - -This command permits the host to read from the ikbd controller memory. - -9.26 CONTROLLER EXECUTE - - 0x22 - ADRMSB ; address of subroutine in - ADRLSB ; controller memory to be called - -This command allows the host to command the execution of a subroutine in the -ikbd controller memory. - -9.27 STATUS INQUIRIES - - Status commands are formed by inclusively ORing 0x80 with the - relevant SET command. - - Example: - 0x88 (or 0x89 or 0x8A) ; request mouse mode - Returns: - 0xF6 ; status response header - mode ; 0x08 is RELATIVE - ; 0x09 is ABSOLUTE - ; 0x0A is KEYCODE - param1 ; 0 is RELATIVE - ; XMSB maximum if ABSOLUTE - ; DELTA X is KEYCODE - param2 ; 0 is RELATIVE - ; YMSB maximum if ABSOLUTE - ; DELTA Y is KEYCODE - param3 ; 0 if RELATIVE - ; or KEYCODE - ; YMSB is ABSOLUTE - param4 ; 0 if RELATIVE - ; or KEYCODE - ; YLSB is ABSOLUTE - 0 ; pad - 0 - -The STATUS INQUIRY commands request the ikbd to return either the current mode -or the parameters associated with a given command. All status reports are -padded to form 8 byte long return packets. The responses to the status -requests are designed so that the host may store them away (after stripping -off the status report header byte) and later send them back as commands to -ikbd to restore its state. The 0 pad bytes will be treated as NOPs by the -ikbd. - - Valid STATUS INQUIRY commands are: - - 0x87 mouse button action - 0x88 mouse mode - 0x89 - 0x8A - 0x8B mnouse threshold - 0x8C mouse scale - 0x8F mouse vertical coordinates - 0x90 ( returns 0x0F Y=0 at bottom - 0x10 Y=0 at top ) - 0x92 mouse enable/disable - ( returns 0x00 enabled) - 0x12 disabled ) - 0x94 joystick mode - 0x95 - 0x96 - 0x9A joystick enable/disable - ( returns 0x00 enabled - 0x1A disabled ) - -It is the (host) programmer's responsibility to have only one unanswered -inquiry in process at a time. -STATUS INQUIRY commands are not valid if the ikbd is in JOYSTICK MONITORING -mode or FIRE BUTTON MONITORING mode. - - -10. SCAN CODES - -The key scan codes returned by the ikbd are chosen to simplify the -implementation of GSX. - -GSX Standard Keyboard Mapping. - -Hex Keytop -01 Esc -02 1 -03 2 -04 3 -05 4 -06 5 -07 6 -08 7 -09 8 -0A 9 -0B 0 -0C - -0D == -0E BS -0F TAB -10 Q -11 W -12 E -13 R -14 T -15 Y -16 U -17 I -18 O -19 P -1A [ -1B ] -1C RET -1D CTRL -1E A -1F S -20 D -21 F -22 G -23 H -24 J -25 K -26 L -27 ; -28 ' -29 ` -2A (LEFT) SHIFT -2B \ -2C Z -2D X -2E C -2F V -30 B -31 N -32 M -33 , -34 . -35 / -36 (RIGHT) SHIFT -37 { NOT USED } -38 ALT -39 SPACE BAR -3A CAPS LOCK -3B F1 -3C F2 -3D F3 -3E F4 -3F F5 -40 F6 -41 F7 -42 F8 -43 F9 -44 F10 -45 { NOT USED } -46 { NOT USED } -47 HOME -48 UP ARROW -49 { NOT USED } -4A KEYPAD - -4B LEFT ARROW -4C { NOT USED } -4D RIGHT ARROW -4E KEYPAD + -4F { NOT USED } -50 DOWN ARROW -51 { NOT USED } -52 INSERT -53 DEL -54 { NOT USED } -5F { NOT USED } -60 ISO KEY -61 UNDO -62 HELP -63 KEYPAD ( -64 KEYPAD / -65 KEYPAD * -66 KEYPAD * -67 KEYPAD 7 -68 KEYPAD 8 -69 KEYPAD 9 -6A KEYPAD 4 -6B KEYPAD 5 -6C KEYPAD 6 -6D KEYPAD 1 -6E KEYPAD 2 -6F KEYPAD 3 -70 KEYPAD 0 -71 KEYPAD . -72 KEYPAD ENTER diff --git a/Documentation/input/bcm5974.txt b/Documentation/input/bcm5974.txt deleted file mode 100644 index 74d3876d6f348d5f05eb39d8d78026ce9ffb5894..0000000000000000000000000000000000000000 --- a/Documentation/input/bcm5974.txt +++ /dev/null @@ -1,65 +0,0 @@ -BCM5974 Driver (bcm5974) ------------------------- - Copyright (C) 2008-2009 Henrik Rydberg - -The USB initialization and package decoding was made by Scott Shawcroft as -part of the touchd user-space driver project: - Copyright (C) 2008 Scott Shawcroft (scott.shawcroft@gmail.com) - -The BCM5974 driver is based on the appletouch driver: - Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com) - Copyright (C) 2005 Johannes Berg (johannes@sipsolutions.net) - Copyright (C) 2005 Stelian Pop (stelian@popies.net) - Copyright (C) 2005 Frank Arnold (frank@scirocco-5v-turbo.de) - Copyright (C) 2005 Peter Osterlund (petero2@telia.com) - Copyright (C) 2005 Michael Hanselmann (linux-kernel@hansmi.ch) - Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch) - -This driver adds support for the multi-touch trackpad on the new Apple -Macbook Air and Macbook Pro laptops. It replaces the appletouch driver on -those computers, and integrates well with the synaptics driver of the Xorg -system. - -Known to work on Macbook Air, Macbook Pro Penryn and the new unibody -Macbook 5 and Macbook Pro 5. - -Usage ------ - -The driver loads automatically for the supported usb device ids, and -becomes available both as an event device (/dev/input/event*) and as a -mouse via the mousedev driver (/dev/input/mice). - -USB Race --------- - -The Apple multi-touch trackpads report both mouse and keyboard events via -different interfaces of the same usb device. This creates a race condition -with the HID driver, which, if not told otherwise, will find the standard -HID mouse and keyboard, and claim the whole device. To remedy, the usb -product id must be listed in the mouse_ignore list of the hid driver. - -Debug output ------------- - -To ease the development for new hardware version, verbose packet output can -be switched on with the debug kernel module parameter. The range [1-9] -yields different levels of verbosity. Example (as root): - -echo -n 9 > /sys/module/bcm5974/parameters/debug - -tail -f /var/log/debug - -echo -n 0 > /sys/module/bcm5974/parameters/debug - -Trivia ------- - -The driver was developed at the ubuntu forums in June 2008 [1], and now has -a more permanent home at bitmath.org [2]. - -Links ------ - -[1] http://ubuntuforums.org/showthread.php?t=840040 -[2] http://bitmath.org/code/ diff --git a/Documentation/input/cd32.txt b/Documentation/input/cd32.txt deleted file mode 100644 index a003d9b41eca058eeaa040e423267e3d441899ee..0000000000000000000000000000000000000000 --- a/Documentation/input/cd32.txt +++ /dev/null @@ -1,19 +0,0 @@ -I have written a small patch that let's me use my Amiga CD32 -joypad connected to the parallel port. Thought I'd share it with you so -you can add it to the list of supported joysticks (hopefully someone will -find it useful). - -It needs the following wiring: - -CD32 pad | Parallel port ----------------------------- -1 (Up) | 2 (D0) -2 (Down) | 3 (D1) -3 (Left) | 4 (D2) -4 (Right) | 5 (D3) -5 (Fire3) | 14 (AUTOFD) -6 (Fire1) | 17 (SELIN) -7 (+5V) | 1 (STROBE) -8 (Gnd) | 18 (Gnd) -9 (Fire2) | 7 (D5) - diff --git a/Documentation/input/cma3000_d0x.txt b/Documentation/input/cma3000_d0x.txt deleted file mode 100644 index 29d088db4afd6f804d2061b4d16e54c1ea769ed7..0000000000000000000000000000000000000000 --- a/Documentation/input/cma3000_d0x.txt +++ /dev/null @@ -1,115 +0,0 @@ -Kernel driver for CMA3000-D0x -============================ - -Supported chips: -* VTI CMA3000-D0x -Datasheet: - CMA3000-D0X Product Family Specification 8281000A.02.pdf - - -Author: Hemanth V - - -Description ------------ -CMA3000 Tri-axis accelerometer supports Motion detect, Measurement and -Free fall modes. - -Motion Detect Mode: Its the low power mode where interrupts are generated only -when motion exceeds the defined thresholds. - -Measurement Mode: This mode is used to read the acceleration data on X,Y,Z -axis and supports 400, 100, 40 Hz sample frequency. - -Free fall Mode: This mode is intended to save system resources. - -Threshold values: Chip supports defining threshold values for above modes -which includes time and g value. Refer product specifications for more details. - -CMA3000 chip supports mutually exclusive I2C and SPI interfaces for -communication, currently the driver supports I2C based communication only. -Initial configuration for bus mode is set in non volatile memory and can later -be modified through bus interface command. - -Driver reports acceleration data through input subsystem. It generates ABS_MISC -event with value 1 when free fall is detected. - -Platform data need to be configured for initial default values. - -Platform Data -------------- -fuzz_x: Noise on X Axis - -fuzz_y: Noise on Y Axis - -fuzz_z: Noise on Z Axis - -g_range: G range in milli g i.e 2000 or 8000 - -mode: Default Operating mode - -mdthr: Motion detect g range threshold value - -mdfftmr: Motion detect and free fall time threshold value - -ffthr: Free fall g range threshold value - -Input Interface --------------- -Input driver version is 1.0.0 -Input device ID: bus 0x18 vendor 0x0 product 0x0 version 0x0 -Input device name: "cma3000-accelerometer" -Supported events: - Event type 0 (Sync) - Event type 3 (Absolute) - Event code 0 (X) - Value 47 - Min -8000 - Max 8000 - Fuzz 200 - Event code 1 (Y) - Value -28 - Min -8000 - Max 8000 - Fuzz 200 - Event code 2 (Z) - Value 905 - Min -8000 - Max 8000 - Fuzz 200 - Event code 40 (Misc) - Value 0 - Min 0 - Max 1 - Event type 4 (Misc) - - -Register/Platform parameters Description ----------------------------------------- - -mode: - 0: power down mode - 1: 100 Hz Measurement mode - 2: 400 Hz Measurement mode - 3: 40 Hz Measurement mode - 4: Motion Detect mode (default) - 5: 100 Hz Free fall mode - 6: 40 Hz Free fall mode - 7: Power off mode - -grange: - 2000: 2000 mg or 2G Range - 8000: 8000 mg or 8G Range - -mdthr: - X: X * 71mg (8G Range) - X: X * 18mg (2G Range) - -mdfftmr: - X: (X & 0x70) * 100 ms (MDTMR) - (X & 0x0F) * 2.5 ms (FFTMR 400 Hz) - (X & 0x0F) * 10 ms (FFTMR 100 Hz) - -ffthr: - X: (X >> 2) * 18mg (2G Range) - X: (X & 0x0F) * 71 mg (8G Range) diff --git a/Documentation/input/conf.py b/Documentation/input/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..d2352fdc92ede64ce47afccd0bac6ee152d836b9 --- /dev/null +++ b/Documentation/input/conf.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8; mode: python -*- + +project = "The Linux input driver subsystem" + +tags.add("subproject") + +latex_documents = [ + ('index', 'linux-input.tex', project, + 'The kernel development community', 'manual'), +] diff --git a/Documentation/input/cs461x.txt b/Documentation/input/cs461x.txt deleted file mode 100644 index 202e9dbacec39a96990c5722c4b1ab832b939b61..0000000000000000000000000000000000000000 --- a/Documentation/input/cs461x.txt +++ /dev/null @@ -1,45 +0,0 @@ -Preface. - -This is a new low-level driver to support analog joystick attached to -Crystal SoundFusion CS4610/CS4612/CS4615. This code is based upon -Vortex/Solo drivers as an example of decoration style, and ALSA -0.5.8a kernel drivers as an chipset documentation and samples. - -This version does not have cooked mode support; the basic code -is present here, but have not tested completely. The button analysis -is completed in this mode, but the axis movement is not. - -Raw mode works fine with analog joystick front-end driver and cs461x -driver as a backend. I've tested this driver with CS4610, 4-axis and -4-button joystick; I mean the jstest utility. Also I've tried to -play in xracer game using joystick, and the result is better than -keyboard only mode. - -The sensitivity and calibrate quality have not been tested; the two -reasons are performed: the same hardware cannot work under Win95 (blue -screen in VJOYD); I have no documentation on my chip; and the existing -behavior in my case was not raised the requirement of joystick calibration. -So the driver have no code to perform hardware related calibration. - -The patch contains minor changes of Config.in and Makefile files. All -needed code have been moved to one separate file cs461x.c like ns558.c -This driver have the basic support for PCI devices only; there is no -ISA or PnP ISA cards supported. AFAIK the ns558 have support for Crystal -ISA and PnP ISA series. - -The driver works with ALSA drivers simultaneously. For example, the xracer -uses joystick as input device and PCM device as sound output in one time. -There are no sound or input collisions detected. The source code have -comments about them; but I've found the joystick can be initialized -separately of ALSA modules. So, you can use only one joystick driver -without ALSA drivers. The ALSA drivers are not needed to compile or -run this driver. - -There are no debug information print have been placed in source, and no -specific options required to work this driver. The found chipset parameters -are printed via printk(KERN_INFO "..."), see the /var/log/messages to -inspect cs461x: prefixed messages to determine possible card detection -errors. - -Regards, -Viktor diff --git a/Documentation/input/devices/alps.rst b/Documentation/input/devices/alps.rst new file mode 100644 index 0000000000000000000000000000000000000000..6779148e428cb830ed1a40ddd5c8d5ca1689e0d3 --- /dev/null +++ b/Documentation/input/devices/alps.rst @@ -0,0 +1,387 @@ +---------------------- +ALPS Touchpad Protocol +---------------------- + +Introduction +------------ +Currently the ALPS touchpad driver supports seven protocol versions in use by +ALPS touchpads, called versions 1, 2, 3, 4, 5, 6, 7 and 8. + +Since roughly mid-2010 several new ALPS touchpads have been released and +integrated into a variety of laptops and netbooks. These new touchpads +have enough behavior differences that the alps_model_data definition +table, describing the properties of the different versions, is no longer +adequate. The design choices were to re-define the alps_model_data +table, with the risk of regression testing existing devices, or isolate +the new devices outside of the alps_model_data table. The latter design +choice was made. The new touchpad signatures are named: "Rushmore", +"Pinnacle", and "Dolphin", which you will see in the alps.c code. +For the purposes of this document, this group of ALPS touchpads will +generically be called "new ALPS touchpads". + +We experimented with probing the ACPI interface _HID (Hardware ID)/_CID +(Compatibility ID) definition as a way to uniquely identify the +different ALPS variants but there did not appear to be a 1:1 mapping. +In fact, it appeared to be an m:n mapping between the _HID and actual +hardware type. + +Detection +--------- + +All ALPS touchpads should respond to the "E6 report" command sequence: +E8-E6-E6-E6-E9. An ALPS touchpad should respond with either 00-00-0A or +00-00-64 if no buttons are pressed. The bits 0-2 of the first byte will be 1s +if some buttons are pressed. + +If the E6 report is successful, the touchpad model is identified using the "E7 +report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is +matched against known models in the alps_model_data_array. + +For older touchpads supporting protocol versions 3 and 4, the E7 report +model signature is always 73-02-64. To differentiate between these +versions, the response from the "Enter Command Mode" sequence must be +inspected as described below. + +The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but +seem to be better differentiated by the EC Command Mode response. + +Command Mode +------------ + +Protocol versions 3 and 4 have a command mode that is used to read and write +one-byte device registers in a 16-bit address space. The command sequence +EC-EC-EC-E9 places the device in command mode, and the device will respond +with 88-07 followed by a third byte. This third byte can be used to determine +whether the devices uses the version 3 or 4 protocol. + +To exit command mode, PSMOUSE_CMD_SETSTREAM (EA) is sent to the touchpad. + +While in command mode, register addresses can be set by first sending a +specific command, either EC for v3 devices or F5 for v4 devices. Then the +address is sent one nibble at a time, where each nibble is encoded as a +command with optional data. This encoding differs slightly between the v3 and +v4 protocols. + +Once an address has been set, the addressed register can be read by sending +PSMOUSE_CMD_GETINFO (E9). The first two bytes of the response contains the +address of the register being read, and the third contains the value of the +register. Registers are written by writing the value one nibble at a time +using the same encoding used for addresses. + +For the new ALPS touchpads, the EC command is used to enter command +mode. The response in the new ALPS touchpads is significantly different, +and more important in determining the behavior. This code has been +separated from the original alps_model_data table and put in the +alps_identify function. For example, there seem to be two hardware init +sequences for the "Dolphin" touchpads as determined by the second byte +of the EC response. + +Packet Format +------------- + +In the following tables, the following notation is used:: + + CAPITALS = stick, miniscules = touchpad + +?'s can have different meanings on different models, such as wheel rotation, +extra buttons, stick buttons on a dualpoint, etc. + +PS/2 packet format +------------------ + +:: + + byte 0: 0 0 YSGN XSGN 1 M R L + byte 1: X7 X6 X5 X4 X3 X2 X1 X0 + byte 2: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + +Note that the device never signals overflow condition. + +For protocol version 2 devices when the trackpoint is used, and no fingers +are on the touchpad, the M R L bits signal the combined status of both the +pointingstick and touchpad buttons. + +ALPS Absolute Mode - Protocol Version 1 +--------------------------------------- + +:: + + byte 0: 1 0 0 0 1 x9 x8 x7 + byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + byte 2: 0 ? ? l r ? fin ges + byte 3: 0 ? ? ? ? y9 y8 y7 + byte 4: 0 y6 y5 y4 y3 y2 y1 y0 + byte 5: 0 z6 z5 z4 z3 z2 z1 z0 + +ALPS Absolute Mode - Protocol Version 2 +--------------------------------------- + +:: + + byte 0: 1 ? ? ? 1 PSM PSR PSL + byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + byte 2: 0 x10 x9 x8 x7 ? fin ges + byte 3: 0 y9 y8 y7 1 M R L + byte 4: 0 y6 y5 y4 y3 y2 y1 y0 + byte 5: 0 z6 z5 z4 z3 z2 z1 z0 + +Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for +the DualPoint Stick. The M, R and L bits signal the combined status of both +the pointingstick and touchpad buttons, except for Dell dualpoint devices +where the pointingstick buttons get reported separately in the PSM, PSR +and PSL bits. + +Dualpoint device -- interleaved packet format +--------------------------------------------- + +:: + + byte 0: 1 1 0 0 1 1 1 1 + byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + byte 2: 0 x10 x9 x8 x7 0 fin ges + byte 3: 0 0 YSGN XSGN 1 1 1 1 + byte 4: X7 X6 X5 X4 X3 X2 X1 X0 + byte 5: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + byte 6: 0 y9 y8 y7 1 m r l + byte 7: 0 y6 y5 y4 y3 y2 y1 y0 + byte 8: 0 z6 z5 z4 z3 z2 z1 z0 + +Devices which use the interleaving format normally send standard PS/2 mouse +packets for the DualPoint Stick + ALPS Absolute Mode packets for the +touchpad, switching to the interleaved packet format when both the stick and +the touchpad are used at the same time. + +ALPS Absolute Mode - Protocol Version 3 +--------------------------------------- + +ALPS protocol version 3 has three different packet formats. The first two are +associated with touchpad events, and the third is associated with trackstick +events. + +The first type is the touchpad position packet:: + + byte 0: 1 ? x1 x0 1 1 1 1 + byte 1: 0 x10 x9 x8 x7 x6 x5 x4 + byte 2: 0 y10 y9 y8 y7 y6 y5 y4 + byte 3: 0 M R L 1 m r l + byte 4: 0 mt x3 x2 y3 y2 y1 y0 + byte 5: 0 z6 z5 z4 z3 z2 z1 z0 + +Note that for some devices the trackstick buttons are reported in this packet, +and on others it is reported in the trackstick packets. + +The second packet type contains bitmaps representing the x and y axes. In the +bitmaps a given bit is set if there is a finger covering that position on the +given axis. Thus the bitmap packet can be used for low-resolution multi-touch +data, although finger tracking is not possible. This packet also encodes the +number of contacts (f1 and f0 in the table below):: + + byte 0: 1 1 x1 x0 1 1 1 1 + byte 1: 0 x8 x7 x6 x5 x4 x3 x2 + byte 2: 0 y7 y6 y5 y4 y3 y2 y1 + byte 3: 0 y10 y9 y8 1 1 1 1 + byte 4: 0 x14 x13 x12 x11 x10 x9 y0 + byte 5: 0 1 ? ? ? ? f1 f0 + +This packet only appears after a position packet with the mt bit set, and +usually only appears when there are two or more contacts (although +occasionally it's seen with only a single contact). + +The final v3 packet type is the trackstick packet:: + + byte 0: 1 1 x7 y7 1 1 1 1 + byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + byte 2: 0 y6 y5 y4 y3 y2 y1 y0 + byte 3: 0 1 0 0 1 0 0 0 + byte 4: 0 z4 z3 z2 z1 z0 ? ? + byte 5: 0 0 1 1 1 1 1 1 + +ALPS Absolute Mode - Protocol Version 4 +--------------------------------------- + +Protocol version 4 has an 8-byte packet format:: + + byte 0: 1 ? x1 x0 1 1 1 1 + byte 1: 0 x10 x9 x8 x7 x6 x5 x4 + byte 2: 0 y10 y9 y8 y7 y6 y5 y4 + byte 3: 0 1 x3 x2 y3 y2 y1 y0 + byte 4: 0 ? ? ? 1 ? r l + byte 5: 0 z6 z5 z4 z3 z2 z1 z0 + byte 6: bitmap data (described below) + byte 7: bitmap data (described below) + +The last two bytes represent a partial bitmap packet, with 3 full packets +required to construct a complete bitmap packet. Once assembled, the 6-byte +bitmap packet has the following format:: + + byte 0: 0 1 x7 x6 x5 x4 x3 x2 + byte 1: 0 x1 x0 y4 y3 y2 y1 y0 + byte 2: 0 0 ? x14 x13 x12 x11 x10 + byte 3: 0 x9 x8 y9 y8 y7 y6 y5 + byte 4: 0 0 0 0 0 0 0 0 + byte 5: 0 0 0 0 0 0 0 y10 + +There are several things worth noting here. + + 1) In the bitmap data, bit 6 of byte 0 serves as a sync byte to + identify the first fragment of a bitmap packet. + + 2) The bitmaps represent the same data as in the v3 bitmap packets, although + the packet layout is different. + + 3) There doesn't seem to be a count of the contact points anywhere in the v4 + protocol packets. Deriving a count of contact points must be done by + analyzing the bitmaps. + + 4) There is a 3 to 1 ratio of position packets to bitmap packets. Therefore + MT position can only be updated for every third ST position update, and + the count of contact points can only be updated every third packet as + well. + +So far no v4 devices with tracksticks have been encountered. + +ALPS Absolute Mode - Protocol Version 5 +--------------------------------------- +This is basically Protocol Version 3 but with different logic for packet +decode. It uses the same alps_process_touchpad_packet_v3 call with a +specialized decode_fields function pointer to correctly interpret the +packets. This appears to only be used by the Dolphin devices. + +For single-touch, the 6-byte packet format is:: + + byte 0: 1 1 0 0 1 0 0 0 + byte 1: 0 x6 x5 x4 x3 x2 x1 x0 + byte 2: 0 y6 y5 y4 y3 y2 y1 y0 + byte 3: 0 M R L 1 m r l + byte 4: y10 y9 y8 y7 x10 x9 x8 x7 + byte 5: 0 z6 z5 z4 z3 z2 z1 z0 + +For mt, the format is:: + + byte 0: 1 1 1 n3 1 n2 n1 x24 + byte 1: 1 y7 y6 y5 y4 y3 y2 y1 + byte 2: ? x2 x1 y12 y11 y10 y9 y8 + byte 3: 0 x23 x22 x21 x20 x19 x18 x17 + byte 4: 0 x9 x8 x7 x6 x5 x4 x3 + byte 5: 0 x16 x15 x14 x13 x12 x11 x10 + +ALPS Absolute Mode - Protocol Version 6 +--------------------------------------- + +For trackstick packet, the format is:: + + byte 0: 1 1 1 1 1 1 1 1 + byte 1: 0 X6 X5 X4 X3 X2 X1 X0 + byte 2: 0 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + byte 3: ? Y7 X7 ? ? M R L + byte 4: Z7 Z6 Z5 Z4 Z3 Z2 Z1 Z0 + byte 5: 0 1 1 1 1 1 1 1 + +For touchpad packet, the format is:: + + byte 0: 1 1 1 1 1 1 1 1 + byte 1: 0 0 0 0 x3 x2 x1 x0 + byte 2: 0 0 0 0 y3 y2 y1 y0 + byte 3: ? x7 x6 x5 x4 ? r l + byte 4: ? y7 y6 y5 y4 ? ? ? + byte 5: z7 z6 z5 z4 z3 z2 z1 z0 + +(v6 touchpad does not have middle button) + +ALPS Absolute Mode - Protocol Version 7 +--------------------------------------- + +For trackstick packet, the format is:: + + byte 0: 0 1 0 0 1 0 0 0 + byte 1: 1 1 * * 1 M R L + byte 2: X7 1 X5 X4 X3 X2 X1 X0 + byte 3: Z6 1 Y6 X6 1 Y2 Y1 Y0 + byte 4: Y7 0 Y5 Y4 Y3 1 1 0 + byte 5: T&P 0 Z5 Z4 Z3 Z2 Z1 Z0 + +For touchpad packet, the format is:: + + packet-fmt b7 b6 b5 b4 b3 b2 b1 b0 + byte 0: TWO & MULTI L 1 R M 1 Y0-2 Y0-1 Y0-0 + byte 0: NEW L 1 X1-5 1 1 Y0-2 Y0-1 Y0-0 + byte 1: Y0-10 Y0-9 Y0-8 Y0-7 Y0-6 Y0-5 Y0-4 Y0-3 + byte 2: X0-11 1 X0-10 X0-9 X0-8 X0-7 X0-6 X0-5 + byte 3: X1-11 1 X0-4 X0-3 1 X0-2 X0-1 X0-0 + byte 4: TWO X1-10 TWO X1-9 X1-8 X1-7 X1-6 X1-5 X1-4 + byte 4: MULTI X1-10 TWO X1-9 X1-8 X1-7 X1-6 Y1-5 1 + byte 4: NEW X1-10 TWO X1-9 X1-8 X1-7 X1-6 0 0 + byte 5: TWO & NEW Y1-10 0 Y1-9 Y1-8 Y1-7 Y1-6 Y1-5 Y1-4 + byte 5: MULTI Y1-10 0 Y1-9 Y1-8 Y1-7 Y1-6 F-1 F-0 + + L: Left button + R / M: Non-clickpads: Right / Middle button + Clickpads: When > 2 fingers are down, and some fingers + are in the button area, then the 2 coordinates reported + are for fingers outside the button area and these report + extra fingers being present in the right / left button + area. Note these fingers are not added to the F field! + so if a TWO packet is received and R = 1 then there are + 3 fingers down, etc. + TWO: 1: Two touches present, byte 0/4/5 are in TWO fmt + 0: If byte 4 bit 0 is 1, then byte 0/4/5 are in MULTI fmt + otherwise byte 0 bit 4 must be set and byte 0/4/5 are + in NEW fmt + F: Number of fingers - 3, 0 means 3 fingers, 1 means 4 ... + + +ALPS Absolute Mode - Protocol Version 8 +--------------------------------------- + +Spoken by SS4 (73 03 14) and SS5 (73 03 28) hardware. + +The packet type is given by the APD field, bits 4-5 of byte 3. + +Touchpad packet (APD = 0x2):: + + b7 b6 b5 b4 b3 b2 b1 b0 + byte 0: SWM SWR SWL 1 1 0 0 X7 + byte 1: 0 X6 X5 X4 X3 X2 X1 X0 + byte 2: 0 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + byte 3: 0 T&P 1 0 1 0 0 Y7 + byte 4: 0 Z6 Z5 Z4 Z3 Z2 Z1 Z0 + byte 5: 0 0 0 0 0 0 0 0 + +SWM, SWR, SWL: Middle, Right, and Left button states + +Touchpad 1 Finger packet (APD = 0x0):: + + b7 b6 b5 b4 b3 b2 b1 b0 + byte 0: SWM SWR SWL 1 1 X2 X1 X0 + byte 1: X9 X8 X7 1 X6 X5 X4 X3 + byte 2: 0 X11 X10 LFB Y3 Y2 Y1 Y0 + byte 3: Y5 Y4 0 0 1 TAPF2 TAPF1 TAPF0 + byte 4: Zv7 Y11 Y10 1 Y9 Y8 Y7 Y6 + byte 5: Zv6 Zv5 Zv4 0 Zv3 Zv2 Zv1 Zv0 + +TAPF: ??? +LFB: ??? + +Touchpad 2 Finger packet (APD = 0x1):: + + b7 b6 b5 b4 b3 b2 b1 b0 + byte 0: SWM SWR SWL 1 1 AX6 AX5 AX4 + byte 1: AX11 AX10 AX9 AX8 AX7 AZ1 AY4 AZ0 + byte 2: AY11 AY10 AY9 CONT AY8 AY7 AY6 AY5 + byte 3: 0 0 0 1 1 BX6 BX5 BX4 + byte 4: BX11 BX10 BX9 BX8 BX7 BZ1 BY4 BZ0 + byte 5: BY11 BY10 BY9 0 BY8 BY7 BY5 BY5 + +CONT: A 3-or-4 Finger packet is to follow + +Touchpad 3-or-4 Finger packet (APD = 0x3):: + + b7 b6 b5 b4 b3 b2 b1 b0 + byte 0: SWM SWR SWL 1 1 AX6 AX5 AX4 + byte 1: AX11 AX10 AX9 AX8 AX7 AZ1 AY4 AZ0 + byte 2: AY11 AY10 AY9 OVF AY8 AY7 AY6 AY5 + byte 3: 0 0 1 1 1 BX6 BX5 BX4 + byte 4: BX11 BX10 BX9 BX8 BX7 BZ1 BY4 BZ0 + byte 5: BY11 BY10 BY9 0 BY8 BY7 BY5 BY5 + +OVF: 5th finger detected diff --git a/Documentation/input/devices/amijoy.rst b/Documentation/input/devices/amijoy.rst new file mode 100644 index 0000000000000000000000000000000000000000..8df7b11cd98d76e3b3d5828fc4275ddc7aa9ffbe --- /dev/null +++ b/Documentation/input/devices/amijoy.rst @@ -0,0 +1,263 @@ +~~~~~~~~~~~~~~~~~~~~~~~~~ +Amiga joystick extensions +~~~~~~~~~~~~~~~~~~~~~~~~~ + + +Amiga 4-joystick parport extension +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Parallel port pins: + + +===== ======== ==== ========== +Pin Meaning Pin Meaning +===== ======== ==== ========== + 2 Up1 6 Up2 + 3 Down1 7 Down2 + 4 Left1 8 Left2 + 5 Right1 9 Right2 +13 Fire1 11 Fire2 +18 Gnd1 18 Gnd2 +===== ======== ==== ========== + +Amiga digital joystick pinout +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +=== ============ +Pin Meaning +=== ============ +1 Up +2 Down +3 Left +4 Right +5 n/c +6 Fire button +7 +5V (50mA) +8 Gnd +9 Thumb button +=== ============ + +Amiga mouse pinout +~~~~~~~~~~~~~~~~~~ + +=== ============ +Pin Meaning +=== ============ +1 V-pulse +2 H-pulse +3 VQ-pulse +4 HQ-pulse +5 Middle button +6 Left button +7 +5V (50mA) +8 Gnd +9 Right button +=== ============ + +Amiga analog joystick pinout +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +=== ============== +Pin Meaning +=== ============== +1 Top button +2 Top2 button +3 Trigger button +4 Thumb button +5 Analog X +6 n/c +7 +5V (50mA) +8 Gnd +9 Analog Y +=== ============== + +Amiga lightpen pinout +~~~~~~~~~~~~~~~~~~~~~ + +=== ============= +Pin Meaning +=== ============= +1 n/c +2 n/c +3 n/c +4 n/c +5 Touch button +6 /Beamtrigger +7 +5V (50mA) +8 Gnd +9 Stylus button +=== ============= + +------------------------------------------------------------------------------- + +======== === ==== ==== ====== ======================================== +NAME rev ADDR type chip Description +======== === ==== ==== ====== ======================================== +JOY0DAT 00A R Denise Joystick-mouse 0 data (left vert, horiz) +JOY1DAT 00C R Denise Joystick-mouse 1 data (right vert,horiz) +======== === ==== ==== ====== ======================================== + + These addresses each read a 16 bit register. These in turn + are loaded from the MDAT serial stream and are clocked in on + the rising edge of SCLK. MLD output is used to parallel load + the external parallel-to-serial converter.This in turn is + loaded with the 4 quadrature inputs from each of two game + controller ports (8 total) plus 8 miscellaneous control bits + which are new for LISA and can be read in upper 8 bits of + LISAID. + + Register bits are as follows: + + - Mouse counter usage (pins 1,3 =Yclock, pins 2,4 =Xclock) + +======== === === === === === === === === ====== === === === === === === === + BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +======== === === === === === === === === ====== === === === === === === === +JOY0DAT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 +JOY1DAT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 +======== === === === === === === === === ====== === === === === === === === + + 0=LEFT CONTROLLER PAIR, 1=RIGHT CONTROLLER PAIR. + (4 counters total). The bit usage for both left and right + addresses is shown below. Each 6 bit counter (Y7-Y2,X7-X2) is + clocked by 2 of the signals input from the mouse serial + stream. Starting with first bit received: + + +-------------------+-----------------------------------------+ + | Serial | Bit Name | Description | + +========+==========+=========================================+ + | 0 | M0H | JOY0DAT Horizontal Clock | + +--------+----------+-----------------------------------------+ + | 1 | M0HQ | JOY0DAT Horizontal Clock (quadrature) | + +--------+----------+-----------------------------------------+ + | 2 | M0V | JOY0DAT Vertical Clock | + +--------+----------+-----------------------------------------+ + | 3 | M0VQ | JOY0DAT Vertical Clock (quadrature) | + +--------+----------+-----------------------------------------+ + | 4 | M1V | JOY1DAT Horizontal Clock | + +--------+----------+-----------------------------------------+ + | 5 | M1VQ | JOY1DAT Horizontal Clock (quadrature) | + +--------+----------+-----------------------------------------+ + | 6 | M1V | JOY1DAT Vertical Clock | + +--------+----------+-----------------------------------------+ + | 7 | M1VQ | JOY1DAT Vertical Clock (quadrature) | + +--------+----------+-----------------------------------------+ + + Bits 1 and 0 of each counter (Y1-Y0,X1-X0) may be + read to determine the state of the related input signal pair. + This allows these pins to double as joystick switch inputs. + Joystick switch closures can be deciphered as follows: + + +------------+------+---------------------------------+ + | Directions | Pin# | Counter bits | + +============+======+=================================+ + | Forward | 1 | Y1 xor Y0 (BIT#09 xor BIT#08) | + +------------+------+---------------------------------+ + | Left | 3 | Y1 | + +------------+------+---------------------------------+ + | Back | 2 | X1 xor X0 (BIT#01 xor BIT#00) | + +------------+------+---------------------------------+ + | Right | 4 | X1 | + +------------+------+---------------------------------+ + +------------------------------------------------------------------------------- + +======== === ==== ==== ====== ================================================= +NAME rev ADDR type chip Description +======== === ==== ==== ====== ================================================= +JOYTEST 036 W Denise Write to all 4 joystick-mouse counters at once. +======== === ==== ==== ====== ================================================= + + Mouse counter write test data: + +========= === === === === === === === === ====== === === === === === === === + BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +========= === === === === === === === === ====== === === === === === === === + JOYxDAT Y7 Y6 Y5 Y4 Y3 Y2 xx xx X7 X6 X5 X4 X3 X2 xx xx + JOYxDAT Y7 Y6 Y5 Y4 Y3 Y2 xx xx X7 X6 X5 X4 X3 X2 xx xx +========= === === === === === === === === ====== === === === === === === === + +------------------------------------------------------------------------------- + +======= === ==== ==== ====== ======================================== +NAME rev ADDR type chip Description +======= === ==== ==== ====== ======================================== +POT0DAT h 012 R Paula Pot counter data left pair (vert, horiz) +POT1DAT h 014 R Paula Pot counter data right pair (vert,horiz) +======= === ==== ==== ====== ======================================== + + These addresses each read a pair of 8 bit pot counters. + (4 counters total). The bit assignment for both + addresses is shown below. The counters are stopped by signals + from 2 controller connectors (left-right) with 2 pins each. + +====== === === === === === === === === ====== === === === === === === === + BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +====== === === === === === === === === ====== === === === === === === === + RIGHT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 + LEFT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0 +====== === === === === === === === === ====== === === === === === === === + + +--------------------------+-------+ + | CONNECTORS | PAULA | + +-------+------+-----+-----+-------+ + | Loc. | Dir. | Sym | pin | pin | + +=======+======+=====+=====+=======+ + | RIGHT | Y | RX | 9 | 33 | + +-------+------+-----+-----+-------+ + | RIGHT | X | RX | 5 | 32 | + +-------+------+-----+-----+-------+ + | LEFT | Y | LY | 9 | 36 | + +-------+------+-----+-----+-------+ + | LEFT | X | LX | 5 | 35 | + +-------+------+-----+-----+-------+ + + With normal (NTSC or PAL) horiz. line rate, the pots will + give a full scale (FF) reading with about 500kohms in one + frame time. With proportionally faster horiz line times, + the counters will count proportionally faster. + This should be noted when doing variable beam displays. + +------------------------------------------------------------------------------- + +====== === ==== ==== ====== ================================================ +NAME rev ADDR type chip Description +====== === ==== ==== ====== ================================================ +POTGO 034 W Paula Pot port (4 bit) bi-direction and data, and pot + counter start. +====== === ==== ==== ====== ================================================ + +------------------------------------------------------------------------------- + +====== === ==== ==== ====== ================================================ +NAME rev ADDR type chip Description +====== === ==== ==== ====== ================================================ +POTINP 016 R Paula Pot pin data read +====== === ==== ==== ====== ================================================ + + This register controls a 4 bit bi-direction I/O port + that shares the same 4 pins as the 4 pot counters above. + + +-------+----------+---------------------------------------------+ + | BIT# | FUNCTION | DESCRIPTION | + +=======+==========+=============================================+ + | 15 | OUTRY | Output enable for Paula pin 33 | + +-------+----------+---------------------------------------------+ + | 14 | DATRY | I/O data Paula pin 33 | + +-------+----------+---------------------------------------------+ + | 13 | OUTRX | Output enable for Paula pin 32 | + +-------+----------+---------------------------------------------+ + | 12 | DATRX | I/O data Paula pin 32 | + +-------+----------+---------------------------------------------+ + | 11 | OUTLY | Out put enable for Paula pin 36 | + +-------+----------+---------------------------------------------+ + | 10 | DATLY | I/O data Paula pin 36 | + +-------+----------+---------------------------------------------+ + | 09 | OUTLX | Output enable for Paula pin 35 | + +-------+----------+---------------------------------------------+ + | 08 | DATLX | I/O data Paula pin 35 | + +-------+----------+---------------------------------------------+ + | 07-01 | X | Not used | + +-------+----------+---------------------------------------------+ + | 00 | START | Start pots (dump capacitors,start counters) | + +-------+----------+---------------------------------------------+ diff --git a/Documentation/input/devices/appletouch.rst b/Documentation/input/devices/appletouch.rst new file mode 100644 index 0000000000000000000000000000000000000000..c94470e66533e91ce8de0b52c2dfe28fe04aac58 --- /dev/null +++ b/Documentation/input/devices/appletouch.rst @@ -0,0 +1,94 @@ +.. include:: + +---------------------------------- +Apple Touchpad Driver (appletouch) +---------------------------------- + +:Copyright: |copy| 2005 Stelian Pop + +appletouch is a Linux kernel driver for the USB touchpad found on post +February 2005 and October 2005 Apple Aluminium Powerbooks. + +This driver is derived from Johannes Berg's appletrackpad driver [#f1]_, +but it has been improved in some areas: + + * appletouch is a full kernel driver, no userspace program is necessary + * appletouch can be interfaced with the synaptics X11 driver, in order + to have touchpad acceleration, scrolling, etc. + +Credits go to Johannes Berg for reverse-engineering the touchpad protocol, +Frank Arnold for further improvements, and Alex Harper for some additional +information about the inner workings of the touchpad sensors. Michael +Hanselmann added support for the October 2005 models. + +Usage +----- + +In order to use the touchpad in the basic mode, compile the driver and load +the module. A new input device will be detected and you will be able to read +the mouse data from /dev/input/mice (using gpm, or X11). + +In X11, you can configure the touchpad to use the synaptics X11 driver, which +will give additional functionalities, like acceleration, scrolling, 2 finger +tap for middle button mouse emulation, 3 finger tap for right button mouse +emulation, etc. In order to do this, make sure you're using a recent version of +the synaptics driver (tested with 0.14.2, available from [#f2]_), and configure +a new input device in your X11 configuration file (take a look below for an +example). For additional configuration, see the synaptics driver documentation:: + + Section "InputDevice" + Identifier "Synaptics Touchpad" + Driver "synaptics" + Option "SendCoreEvents" "true" + Option "Device" "/dev/input/mice" + Option "Protocol" "auto-dev" + Option "LeftEdge" "0" + Option "RightEdge" "850" + Option "TopEdge" "0" + Option "BottomEdge" "645" + Option "MinSpeed" "0.4" + Option "MaxSpeed" "1" + Option "AccelFactor" "0.02" + Option "FingerLow" "0" + Option "FingerHigh" "30" + Option "MaxTapMove" "20" + Option "MaxTapTime" "100" + Option "HorizScrollDelta" "0" + Option "VertScrollDelta" "30" + Option "SHMConfig" "on" + EndSection + + Section "ServerLayout" + ... + InputDevice "Mouse" + InputDevice "Synaptics Touchpad" + ... + EndSection + +Fuzz problems +------------- + +The touchpad sensors are very sensitive to heat, and will generate a lot of +noise when the temperature changes. This is especially true when you power-on +the laptop for the first time. + +The appletouch driver tries to handle this noise and auto adapt itself, but it +is not perfect. If finger movements are not recognized anymore, try reloading +the driver. + +You can activate debugging using the 'debug' module parameter. A value of 0 +deactivates any debugging, 1 activates tracing of invalid samples, 2 activates +full tracing (each sample is being traced):: + + modprobe appletouch debug=1 + +or:: + + echo "1" > /sys/module/appletouch/parameters/debug + + +.. Links: + +.. [#f1] http://johannes.sipsolutions.net/PowerBook/touchpad/ + +.. [#f2] ``_ diff --git a/Documentation/input/devices/atarikbd.rst b/Documentation/input/devices/atarikbd.rst new file mode 100644 index 0000000000000000000000000000000000000000..745e7a1ff122b3d8dec0e39b3fc1eb235fba20dd --- /dev/null +++ b/Documentation/input/devices/atarikbd.rst @@ -0,0 +1,820 @@ +==================================== +Intelligent Keyboard (ikbd) Protocol +==================================== + + +Introduction +============ + +The Atari Corp. Intelligent Keyboard (ikbd) is a general purpose keyboard +controller that is flexible enough that it can be used in a variety of +products without modification. The keyboard, with its microcontroller, +provides a convenient connection point for a mouse and switch-type joysticks. +The ikbd processor also maintains a time-of-day clock with one second +resolution. +The ikbd has been designed to be general enough that it can be used with a +variety of new computer products. Product variations in a number of +keyswitches, mouse resolution, etc. can be accommodated. +The ikbd communicates with the main processor over a high speed bi-directional +serial interface. It can function in a variety of modes to facilitate +different applications of the keyboard, joysticks, or mouse. Limited use of +the controller is possible in applications in which only a unidirectional +communications medium is available by carefully designing the default modes. + +Keyboard +======== + +The keyboard always returns key make/break scan codes. The ikbd generates +keyboard scan codes for each key press and release. The key scan make (key +closure) codes start at 1, and are defined in Appendix A. For example, the +ISO key position in the scan code table should exist even if no keyswitch +exists in that position on a particular keyboard. The break code for each key +is obtained by ORing 0x80 with the make code. + +The special codes 0xF6 through 0xFF are reserved for use as follows: + +=================== ==================================================== + Code Command +=================== ==================================================== + 0xF6 status report + 0xF7 absolute mouse position record + 0xF8-0xFB relative mouse position records (lsbs determined by + mouse button states) + 0xFC time-of-day + 0xFD joystick report (both sticks) + 0xFE joystick 0 event + 0xFF joystick 1 event +=================== ==================================================== + +The two shift keys return different scan codes in this mode. The ENTER key +and the RETurn key are also distinct. + +Mouse +===== + +The mouse port should be capable of supporting a mouse with resolution of +approximately 200 counts (phase changes or 'clicks') per inch of travel. The +mouse should be scanned at a rate that will permit accurate tracking at +velocities up to 10 inches per second. +The ikbd can report mouse motion in three distinctly different ways. It can +report relative motion, absolute motion in a coordinate system maintained +within the ikbd, or by converting mouse motion into keyboard cursor control +key equivalents. +The mouse buttons can be treated as part of the mouse or as additional +keyboard keys. + +Relative Position Reporting +--------------------------- + +In relative position mode, the ikbd will return relative mouse position +records whenever a mouse event occurs. A mouse event consists of a mouse +button being pressed or released, or motion in either axis exceeding a +settable threshold of motion. Regardless of the threshold, all bits of +resolution are returned to the host computer. +Note that the ikbd may return mouse relative position reports with +significantly more than the threshold delta x or y. This may happen since no +relative mouse motion events will be generated: (a) while the keyboard has +been 'paused' ( the event will be stored until keyboard communications is +resumed) (b) while any event is being transmitted. + +The relative mouse position record is a three byte record of the form +(regardless of keyboard mode):: + + %111110xy ; mouse position record flag + ; where y is the right button state + ; and x is the left button state + X ; delta x as twos complement integer + Y ; delta y as twos complement integer + +Note that the value of the button state bits should be valid even if the +MOUSE BUTTON ACTION has set the buttons to act like part of the keyboard. +If the accumulated motion before the report packet is generated exceeds the ++127...-128 range, the motion is broken into multiple packets. +Note that the sign of the delta y reported is a function of the Y origin +selected. + +Absolute Position reporting +--------------------------- + +The ikbd can also maintain absolute mouse position. Commands exist for +resetting the mouse position, setting X/Y scaling, and interrogating the +current mouse position. + +Mouse Cursor Key Mode +--------------------- + +The ikbd can translate mouse motion into the equivalent cursor keystrokes. +The number of mouse clicks per keystroke is independently programmable in +each axis. The ikbd internally maintains mouse motion information to the +highest resolution available, and merely generates a pair of cursor key events +for each multiple of the scale factor. +Mouse motion produces the cursor key make code immediately followed by the +break code for the appropriate cursor key. The mouse buttons produce scan +codes above those normally assigned for the largest envisioned keyboard (i.e. +LEFT=0x74 & RIGHT=0x75). + +Joystick +======== + +Joystick Event Reporting +------------------------ + +In this mode, the ikbd generates a record whenever the joystick position is +changed (i.e. for each opening or closing of a joystick switch or trigger). + +The joystick event record is two bytes of the form:: + + %1111111x ; Joystick event marker + ; where x is Joystick 0 or 1 + %x000yyyy ; where yyyy is the stick position + ; and x is the trigger + +Joystick Interrogation +---------------------- + +The current state of the joystick ports may be interrogated at any time in +this mode by sending an 'Interrogate Joystick' command to the ikbd. + +The ikbd response to joystick interrogation is a three byte report of the form:: + + 0xFD ; joystick report header + %x000yyyy ; Joystick 0 + %x000yyyy ; Joystick 1 + ; where x is the trigger + ; and yyy is the stick position + +Joystick Monitoring +------------------- + +A mode is available that devotes nearly all of the keyboard communications +time to reporting the state of the joystick ports at a user specifiable rate. +It remains in this mode until reset or commanded into another mode. The PAUSE +command in this mode not only stop the output but also temporarily stops +scanning the joysticks (samples are not queued). + +Fire Button Monitoring +---------------------- + +A mode is provided to permit monitoring a single input bit at a high rate. In +this mode the ikbd monitors the state of the Joystick 1 fire button at the +maximum rate permitted by the serial communication channel. The data is packed +8 bits per byte for transmission to the host. The ikbd remains in this mode +until reset or commanded into another mode. The PAUSE command in this mode not +only stops the output but also temporarily stops scanning the button (samples +are not queued). + +Joystick Key Code Mode +---------------------- + +The ikbd may be commanded to translate the use of either joystick into the +equivalent cursor control keystroke(s). The ikbd provides a single breakpoint +velocity joystick cursor. +Joystick events produce the make code, immediately followed by the break code +for the appropriate cursor motion keys. The trigger or fire buttons of the +joysticks produce pseudo key scan codes above those used by the largest key +matrix envisioned (i.e. JOYSTICK0=0x74, JOYSTICK1=0x75). + +Time-of-Day Clock +================= + +The ikbd also maintains a time-of-day clock for the system. Commands are +available to set and interrogate the timer-of-day clock. Time-keeping is +maintained down to a resolution of one second. + +Status Inquiries +================ + +The current state of ikbd modes and parameters may be found by sending status +inquiry commands that correspond to the ikbd set commands. + +Power-Up Mode +============= + +The keyboard controller will perform a simple self-test on power-up to detect +major controller faults (ROM checksum and RAM test) and such things as stuck +keys. Any keys down at power-up are presumed to be stuck, and their BREAK +(sic) code is returned (which without the preceding MAKE code is a flag for a +keyboard error). If the controller self-test completes without error, the code +0xF0 is returned. (This code will be used to indicate the version/release of +the ikbd controller. The first release of the ikbd is version 0xF0, should +there be a second release it will be 0xF1, and so on.) +The ikbd defaults to a mouse position reporting with threshold of 1 unit in +either axis and the Y=0 origin at the top of the screen, and joystick event +reporting mode for joystick 1, with both buttons being logically assigned to +the mouse. After any joystick command, the ikbd assumes that joysticks are +connected to both Joystick0 and Joystick1. Any mouse command (except MOUSE +DISABLE) then causes port 0 to again be scanned as if it were a mouse, and +both buttons are logically connected to it. If a mouse disable command is +received while port 0 is presumed to be a mouse, the button is logically +assigned to Joystick1 (until the mouse is reenabled by another mouse command). + +ikbd Command Set +================ + +This section contains a list of commands that can be sent to the ikbd. Command +codes (such as 0x00) which are not specified should perform no operation +(NOPs). + +RESET +----- + +:: + + 0x80 + 0x01 + +N.B. The RESET command is the only two byte command understood by the ikbd. +Any byte following an 0x80 command byte other than 0x01 is ignored (and causes +the 0x80 to be ignored). +A reset may also be caused by sending a break lasting at least 200mS to the +ikbd. +Executing the RESET command returns the keyboard to its default (power-up) +mode and parameter settings. It does not affect the time-of-day clock. +The RESET command or function causes the ikbd to perform a simple self-test. +If the test is successful, the ikbd will send the code of 0xF0 within 300mS +of receipt of the RESET command (or the end of the break, or power-up). The +ikbd will then scan the key matrix for any stuck (closed) keys. Any keys found +closed will cause the break scan code to be generated (the break code arriving +without being preceded by the make code is a flag for a key matrix error). + +SET MOUSE BUTTON ACTION +----------------------- + +:: + + 0x07 + %00000mss ; mouse button action + ; (m is presumed = 1 when in MOUSE KEYCODE mode) + ; mss=0xy, mouse button press or release causes mouse + ; position report + ; where y=1, mouse key press causes absolute report + ; and x=1, mouse key release causes absolute report + ; mss=100, mouse buttons act like keys + +This command sets how the ikbd should treat the buttons on the mouse. The +default mouse button action mode is %00000000, the buttons are treated as part +of the mouse logically. +When buttons act like keys, LEFT=0x74 & RIGHT=0x75. + +SET RELATIVE MOUSE POSITION REPORTING +------------------------------------- + +:: + + 0x08 + +Set relative mouse position reporting. (DEFAULT) Mouse position packets are +generated asynchronously by the ikbd whenever motion exceeds the setable +threshold in either axis (see SET MOUSE THRESHOLD). Depending upon the mouse +key mode, mouse position reports may also be generated when either mouse +button is pressed or released. Otherwise the mouse buttons behave as if they +were keyboard keys. + +SET ABSOLUTE MOUSE POSITIONING +------------------------------ + +:: + + 0x09 + XMSB ; X maximum (in scaled mouse clicks) + XLSB + YMSB ; Y maximum (in scaled mouse clicks) + YLSB + +Set absolute mouse position maintenance. Resets the ikbd maintained X and Y +coordinates. +In this mode, the value of the internally maintained coordinates does NOT wrap +between 0 and large positive numbers. Excess motion below 0 is ignored. The +command sets the maximum positive value that can be attained in the scaled +coordinate system. Motion beyond that value is also ignored. + +SET MOUSE KEYCODE MOSE +---------------------- + +:: + + 0x0A + deltax ; distance in X clicks to return (LEFT) or (RIGHT) + deltay ; distance in Y clicks to return (UP) or (DOWN) + +Set mouse monitoring routines to return cursor motion keycodes instead of +either RELATIVE or ABSOLUTE motion records. The ikbd returns the appropriate +cursor keycode after mouse travel exceeding the user specified deltas in +either axis. When the keyboard is in key scan code mode, mouse motion will +cause the make code immediately followed by the break code. Note that this +command is not affected by the mouse motion origin. + +SET MOUSE THRESHOLD +------------------- + +:: + + 0x0B + X ; x threshold in mouse ticks (positive integers) + Y ; y threshold in mouse ticks (positive integers) + +This command sets the threshold before a mouse event is generated. Note that +it does NOT affect the resolution of the data returned to the host. This +command is valid only in RELATIVE MOUSE POSITIONING mode. The thresholds +default to 1 at RESET (or power-up). + +SET MOUSE SCALE +--------------- + +:: + + 0x0C + X ; horizontal mouse ticks per internal X + Y ; vertical mouse ticks per internal Y + +This command sets the scale factor for the ABSOLUTE MOUSE POSITIONING mode. +In this mode, the specified number of mouse phase changes ('clicks') must +occur before the internally maintained coordinate is changed by one +(independently scaled for each axis). Remember that the mouse position +information is available only by interrogating the ikbd in the ABSOLUTE MOUSE +POSITIONING mode unless the ikbd has been commanded to report on button press +or release (see SET MOSE BUTTON ACTION). + +INTERROGATE MOUSE POSITION +-------------------------- + +:: + + 0x0D + Returns: + 0xF7 ; absolute mouse position header + BUTTONS + 0000dcba ; where a is right button down since last interrogation + ; b is right button up since last + ; c is left button down since last + ; d is left button up since last + XMSB ; X coordinate + XLSB + YMSB ; Y coordinate + YLSB + +The INTERROGATE MOUSE POSITION command is valid when in the ABSOLUTE MOUSE +POSITIONING mode, regardless of the setting of the MOUSE BUTTON ACTION. + +LOAD MOUSE POSITION +------------------- + +:: + + 0x0E + 0x00 ; filler + XMSB ; X coordinate + XLSB ; (in scaled coordinate system) + YMSB ; Y coordinate + YLSB + +This command allows the user to preset the internally maintained absolute +mouse position. + +SET Y=0 AT BOTTOM +----------------- + +:: + + 0x0F + +This command makes the origin of the Y axis to be at the bottom of the +logical coordinate system internal to the ikbd for all relative or absolute +mouse motion. This causes mouse motion toward the user to be negative in sign +and away from the user to be positive. + +SET Y=0 AT TOP +-------------- + +:: + + 0x10 + +Makes the origin of the Y axis to be at the top of the logical coordinate +system within the ikbd for all relative or absolute mouse motion. (DEFAULT) +This causes mouse motion toward the user to be positive in sign and away from +the user to be negative. + +RESUME +------ + +:: + + 0x11 + +Resume sending data to the host. Since any command received by the ikbd after +its output has been paused also causes an implicit RESUME this command can be +thought of as a NO OPERATION command. If this command is received by the ikbd +and it is not PAUSED, it is simply ignored. + +DISABLE MOUSE +------------- + +:: + + 0x12 + +All mouse event reporting is disabled (and scanning may be internally +disabled). Any valid mouse mode command resumes mouse motion monitoring. (The +valid mouse mode commands are SET RELATIVE MOUSE POSITION REPORTING, SET +ABSOLUTE MOUSE POSITIONING, and SET MOUSE KEYCODE MODE. ) +N.B. If the mouse buttons have been commanded to act like keyboard keys, this +command DOES affect their actions. + +PAUSE OUTPUT +------------ + +:: + + 0x13 + +Stop sending data to the host until another valid command is received. Key +matrix activity is still monitored and scan codes or ASCII characters enqueued +(up to the maximum supported by the microcontroller) to be sent when the host +allows the output to be resumed. If in the JOYSTICK EVENT REPORTING mode, +joystick events are also queued. +Mouse motion should be accumulated while the output is paused. If the ikbd is +in RELATIVE MOUSE POSITIONING REPORTING mode, motion is accumulated beyond the +normal threshold limits to produce the minimum number of packets necessary for +transmission when output is resumed. Pressing or releasing either mouse button +causes any accumulated motion to be immediately queued as packets, if the +mouse is in RELATIVE MOUSE POSITION REPORTING mode. +Because of the limitations of the microcontroller memory this command should +be used sparingly, and the output should not be shut of for more than +milliseconds at a time. +The output is stopped only at the end of the current 'even'. If the PAUSE +OUTPUT command is received in the middle of a multiple byte report, the packet +will still be transmitted to conclusion and then the PAUSE will take effect. +When the ikbd is in either the JOYSTICK MONITORING mode or the FIRE BUTTON +MONITORING mode, the PAUSE OUTPUT command also temporarily stops the +monitoring process (i.e. the samples are not enqueued for transmission). + +SET JOYSTICK EVENT REPORTING +---------------------------- + +:: + + 0x14 + +Enter JOYSTICK EVENT REPORTING mode (DEFAULT). Each opening or closure of a +joystick switch or trigger causes a joystick event record to be generated. + +SET JOYSTICK INTERROGATION MODE +------------------------------- + +:: + + 0x15 + +Disables JOYSTICK EVENT REPORTING. Host must send individual JOYSTICK +INTERROGATE commands to sense joystick state. + +JOYSTICK INTERROGATE +-------------------- + +:: + + 0x16 + +Return a record indicating the current state of the joysticks. This command +is valid in either the JOYSTICK EVENT REPORTING mode or the JOYSTICK +INTERROGATION MODE. + +SET JOYSTICK MONITORING +----------------------- + +:: + + 0x17 + rate ; time between samples in hundredths of a second + Returns: (in packets of two as long as in mode) + %000000xy ; where y is JOYSTICK1 Fire button + ; and x is JOYSTICK0 Fire button + %nnnnmmmm ; where m is JOYSTICK1 state + ; and n is JOYSTICK0 state + +Sets the ikbd to do nothing but monitor the serial command line, maintain the +time-of-day clock, and monitor the joystick. The rate sets the interval +between joystick samples. +N.B. The user should not set the rate higher than the serial communications +channel will allow the 2 bytes packets to be transmitted. + +SET FIRE BUTTON MONITORING +-------------------------- + +:: + + 0x18 + Returns: (as long as in mode) + %bbbbbbbb ; state of the JOYSTICK1 fire button packed + ; 8 bits per byte, the first sample if the MSB + +Set the ikbd to do nothing but monitor the serial command line, maintain the +time-of-day clock, and monitor the fire button on Joystick 1. The fire button +is scanned at a rate that causes 8 samples to be made in the time it takes for +the previous byte to be sent to the host (i.e. scan rate = 8/10 * baud rate). +The sample interval should be as constant as possible. + +SET JOYSTICK KEYCODE MODE +------------------------- + +:: + + 0x19 + RX ; length of time (in tenths of seconds) until + ; horizontal velocity breakpoint is reached + RY ; length of time (in tenths of seconds) until + ; vertical velocity breakpoint is reached + TX ; length (in tenths of seconds) of joystick closure + ; until horizontal cursor key is generated before RX + ; has elapsed + TY ; length (in tenths of seconds) of joystick closure + ; until vertical cursor key is generated before RY + ; has elapsed + VX ; length (in tenths of seconds) of joystick closure + ; until horizontal cursor keystrokes are generated + ; after RX has elapsed + VY ; length (in tenths of seconds) of joystick closure + ; until vertical cursor keystrokes are generated + ; after RY has elapsed + +In this mode, joystick 0 is scanned in a way that simulates cursor keystrokes. +On initial closure, a keystroke pair (make/break) is generated. Then up to Rn +tenths of seconds later, keystroke pairs are generated every Tn tenths of +seconds. After the Rn breakpoint is reached, keystroke pairs are generated +every Vn tenths of seconds. This provides a velocity (auto-repeat) breakpoint +feature. +Note that by setting RX and/or Ry to zero, the velocity feature can be +disabled. The values of TX and TY then become meaningless, and the generation +of cursor 'keystrokes' is set by VX and VY. + +DISABLE JOYSTICKS +----------------- + +:: + + 0x1A + +Disable the generation of any joystick events (and scanning may be internally +disabled). Any valid joystick mode command resumes joystick monitoring. (The +joystick mode commands are SET JOYSTICK EVENT REPORTING, SET JOYSTICK +INTERROGATION MODE, SET JOYSTICK MONITORING, SET FIRE BUTTON MONITORING, and +SET JOYSTICK KEYCODE MODE.) + +TIME-OF-DAY CLOCK SET +--------------------- + +:: + + 0x1B + YY ; year (2 least significant digits) + MM ; month + DD ; day + hh ; hour + mm ; minute + ss ; second + +All time-of-day data should be sent to the ikbd in packed BCD format. +Any digit that is not a valid BCD digit should be treated as a 'don't care' +and not alter that particular field of the date or time. This permits setting +only some subfields of the time-of-day clock. + +INTERROGATE TIME-OF-DAT CLOCK +----------------------------- + +:: + + 0x1C + Returns: + 0xFC ; time-of-day event header + YY ; year (2 least significant digits) + MM ; month + DD ; day + hh ; hour + mm ; minute + ss ; second + + All time-of-day is sent in packed BCD format. + +MEMORY LOAD +----------- + +:: + + 0x20 + ADRMSB ; address in controller + ADRLSB ; memory to be loaded + NUM ; number of bytes (0-128) + { data } + +This command permits the host to load arbitrary values into the ikbd +controller memory. The time between data bytes must be less than 20ms. + +MEMORY READ +----------- + +:: + + 0x21 + ADRMSB ; address in controller + ADRLSB ; memory to be read + Returns: + 0xF6 ; status header + 0x20 ; memory access + { data } ; 6 data bytes starting at ADR + +This command permits the host to read from the ikbd controller memory. + +CONTROLLER EXECUTE +------------------ + +:: + + 0x22 + ADRMSB ; address of subroutine in + ADRLSB ; controller memory to be called + +This command allows the host to command the execution of a subroutine in the +ikbd controller memory. + +STATUS INQUIRIES +---------------- + +:: + + Status commands are formed by inclusively ORing 0x80 with the + relevant SET command. + + Example: + 0x88 (or 0x89 or 0x8A) ; request mouse mode + Returns: + 0xF6 ; status response header + mode ; 0x08 is RELATIVE + ; 0x09 is ABSOLUTE + ; 0x0A is KEYCODE + param1 ; 0 is RELATIVE + ; XMSB maximum if ABSOLUTE + ; DELTA X is KEYCODE + param2 ; 0 is RELATIVE + ; YMSB maximum if ABSOLUTE + ; DELTA Y is KEYCODE + param3 ; 0 if RELATIVE + ; or KEYCODE + ; YMSB is ABSOLUTE + param4 ; 0 if RELATIVE + ; or KEYCODE + ; YLSB is ABSOLUTE + 0 ; pad + 0 + +The STATUS INQUIRY commands request the ikbd to return either the current mode +or the parameters associated with a given command. All status reports are +padded to form 8 byte long return packets. The responses to the status +requests are designed so that the host may store them away (after stripping +off the status report header byte) and later send them back as commands to +ikbd to restore its state. The 0 pad bytes will be treated as NOPs by the +ikbd. + + Valid STATUS INQUIRY commands are:: + + 0x87 mouse button action + 0x88 mouse mode + 0x89 + 0x8A + 0x8B mnouse threshold + 0x8C mouse scale + 0x8F mouse vertical coordinates + 0x90 ( returns 0x0F Y=0 at bottom + 0x10 Y=0 at top ) + 0x92 mouse enable/disable + ( returns 0x00 enabled) + 0x12 disabled ) + 0x94 joystick mode + 0x95 + 0x96 + 0x9A joystick enable/disable + ( returns 0x00 enabled + 0x1A disabled ) + +It is the (host) programmer's responsibility to have only one unanswered +inquiry in process at a time. +STATUS INQUIRY commands are not valid if the ikbd is in JOYSTICK MONITORING +mode or FIRE BUTTON MONITORING mode. + + +SCAN CODES +========== + +The key scan codes returned by the ikbd are chosen to simplify the +implementation of GSX. + +GSX Standard Keyboard Mapping + +======= ============ +Hex Keytop +======= ============ +01 Esc +02 1 +03 2 +04 3 +05 4 +06 5 +07 6 +08 7 +09 8 +0A 9 +0B 0 +0C \- +0D \= +0E BS +0F TAB +10 Q +11 W +12 E +13 R +14 T +15 Y +16 U +17 I +18 O +19 P +1A [ +1B ] +1C RET +1D CTRL +1E A +1F S +20 D +21 F +22 G +23 H +24 J +25 K +26 L +27 ; +28 ' +29 \` +2A (LEFT) SHIFT +2B \\ +2C Z +2D X +2E C +2F V +30 B +31 N +32 M +33 , +34 . +35 / +36 (RIGHT) SHIFT +37 { NOT USED } +38 ALT +39 SPACE BAR +3A CAPS LOCK +3B F1 +3C F2 +3D F3 +3E F4 +3F F5 +40 F6 +41 F7 +42 F8 +43 F9 +44 F10 +45 { NOT USED } +46 { NOT USED } +47 HOME +48 UP ARROW +49 { NOT USED } +4A KEYPAD - +4B LEFT ARROW +4C { NOT USED } +4D RIGHT ARROW +4E KEYPAD + +4F { NOT USED } +50 DOWN ARROW +51 { NOT USED } +52 INSERT +53 DEL +54 { NOT USED } +5F { NOT USED } +60 ISO KEY +61 UNDO +62 HELP +63 KEYPAD ( +64 KEYPAD / +65 KEYPAD * +66 KEYPAD * +67 KEYPAD 7 +68 KEYPAD 8 +69 KEYPAD 9 +6A KEYPAD 4 +6B KEYPAD 5 +6C KEYPAD 6 +6D KEYPAD 1 +6E KEYPAD 2 +6F KEYPAD 3 +70 KEYPAD 0 +71 KEYPAD . +72 KEYPAD ENTER +======= ============ diff --git a/Documentation/input/devices/bcm5974.rst b/Documentation/input/devices/bcm5974.rst new file mode 100644 index 0000000000000000000000000000000000000000..4aca199b0aa66a302e3bcb7f49cdbfc3d57106d3 --- /dev/null +++ b/Documentation/input/devices/bcm5974.rst @@ -0,0 +1,70 @@ +.. include:: + +------------------------ +BCM5974 Driver (bcm5974) +------------------------ + +:Copyright: |copy| 2008-2009 Henrik Rydberg + +The USB initialization and package decoding was made by Scott Shawcroft as +part of the touchd user-space driver project: + +:Copyright: |copy| 2008 Scott Shawcroft (scott.shawcroft@gmail.com) + +The BCM5974 driver is based on the appletouch driver: + +:Copyright: |copy| 2001-2004 Greg Kroah-Hartman (greg@kroah.com) +:Copyright: |copy| 2005 Johannes Berg (johannes@sipsolutions.net) +:Copyright: |copy| 2005 Stelian Pop (stelian@popies.net) +:Copyright: |copy| 2005 Frank Arnold (frank@scirocco-5v-turbo.de) +:Copyright: |copy| 2005 Peter Osterlund (petero2@telia.com) +:Copyright: |copy| 2005 Michael Hanselmann (linux-kernel@hansmi.ch) +:Copyright: |copy| 2006 Nicolas Boichat (nicolas@boichat.ch) + +This driver adds support for the multi-touch trackpad on the new Apple +Macbook Air and Macbook Pro laptops. It replaces the appletouch driver on +those computers, and integrates well with the synaptics driver of the Xorg +system. + +Known to work on Macbook Air, Macbook Pro Penryn and the new unibody +Macbook 5 and Macbook Pro 5. + +Usage +----- + +The driver loads automatically for the supported usb device ids, and +becomes available both as an event device (/dev/input/event*) and as a +mouse via the mousedev driver (/dev/input/mice). + +USB Race +-------- + +The Apple multi-touch trackpads report both mouse and keyboard events via +different interfaces of the same usb device. This creates a race condition +with the HID driver, which, if not told otherwise, will find the standard +HID mouse and keyboard, and claim the whole device. To remedy, the usb +product id must be listed in the mouse_ignore list of the hid driver. + +Debug output +------------ + +To ease the development for new hardware version, verbose packet output can +be switched on with the debug kernel module parameter. The range [1-9] +yields different levels of verbosity. Example (as root):: + + echo -n 9 > /sys/module/bcm5974/parameters/debug + + tail -f /var/log/debug + + echo -n 0 > /sys/module/bcm5974/parameters/debug + +Trivia +------ + +The driver was developed at the ubuntu forums in June 2008 [#f1]_, and now has +a more permanent home at bitmath.org [#f2]_. + +.. Links + +.. [#f1] http://ubuntuforums.org/showthread.php?t=840040 +.. [#f2] http://bitmath.org/code/ diff --git a/Documentation/input/devices/cma3000_d0x.rst b/Documentation/input/devices/cma3000_d0x.rst new file mode 100644 index 0000000000000000000000000000000000000000..8bc8e61487b04aa98c502b3153a0c5c1cce81e30 --- /dev/null +++ b/Documentation/input/devices/cma3000_d0x.rst @@ -0,0 +1,139 @@ +CMA3000-D0x Accelerometer +========================= + +Supported chips: +* VTI CMA3000-D0x + +Datasheet: + CMA3000-D0X Product Family Specification 8281000A.02.pdf + + +:Author: Hemanth V + + +Description +----------- + +CMA3000 Tri-axis accelerometer supports Motion detect, Measurement and +Free fall modes. + +Motion Detect Mode: + Its the low power mode where interrupts are generated only + when motion exceeds the defined thresholds. + +Measurement Mode: + This mode is used to read the acceleration data on X,Y,Z + axis and supports 400, 100, 40 Hz sample frequency. + +Free fall Mode: + This mode is intended to save system resources. + +Threshold values: + Chip supports defining threshold values for above modes + which includes time and g value. Refer product specifications for + more details. + +CMA3000 chip supports mutually exclusive I2C and SPI interfaces for +communication, currently the driver supports I2C based communication only. +Initial configuration for bus mode is set in non volatile memory and can later +be modified through bus interface command. + +Driver reports acceleration data through input subsystem. It generates ABS_MISC +event with value 1 when free fall is detected. + +Platform data need to be configured for initial default values. + +Platform Data +------------- + +fuzz_x: + Noise on X Axis + +fuzz_y: + Noise on Y Axis + +fuzz_z: + Noise on Z Axis + +g_range: + G range in milli g i.e 2000 or 8000 + +mode: + Default Operating mode + +mdthr: + Motion detect g range threshold value + +mdfftmr: + Motion detect and free fall time threshold value + +ffthr: + Free fall g range threshold value + +Input Interface +--------------- + +Input driver version is 1.0.0 +Input device ID: bus 0x18 vendor 0x0 product 0x0 version 0x0 +Input device name: "cma3000-accelerometer" + +Supported events:: + + Event type 0 (Sync) + Event type 3 (Absolute) + Event code 0 (X) + Value 47 + Min -8000 + Max 8000 + Fuzz 200 + Event code 1 (Y) + Value -28 + Min -8000 + Max 8000 + Fuzz 200 + Event code 2 (Z) + Value 905 + Min -8000 + Max 8000 + Fuzz 200 + Event code 40 (Misc) + Value 0 + Min 0 + Max 1 + Event type 4 (Misc) + + +Register/Platform parameters Description +---------------------------------------- + +mode:: + + 0: power down mode + 1: 100 Hz Measurement mode + 2: 400 Hz Measurement mode + 3: 40 Hz Measurement mode + 4: Motion Detect mode (default) + 5: 100 Hz Free fall mode + 6: 40 Hz Free fall mode + 7: Power off mode + +grange:: + + 2000: 2000 mg or 2G Range + 8000: 8000 mg or 8G Range + +mdthr:: + + X: X * 71mg (8G Range) + X: X * 18mg (2G Range) + +mdfftmr:: + + X: (X & 0x70) * 100 ms (MDTMR) + (X & 0x0F) * 2.5 ms (FFTMR 400 Hz) + (X & 0x0F) * 10 ms (FFTMR 100 Hz) + +ffthr:: + + X: (X >> 2) * 18mg (2G Range) + X: (X & 0x0F) * 71 mg (8G Range) diff --git a/Documentation/input/devices/cs461x.rst b/Documentation/input/devices/cs461x.rst new file mode 100644 index 0000000000000000000000000000000000000000..b1e6d508ad26ec54dd519032650fb3b8bd7cd3f6 --- /dev/null +++ b/Documentation/input/devices/cs461x.rst @@ -0,0 +1,43 @@ +Crystal SoundFusion CS4610/CS4612/CS461 joystick +================================================ + +This is a new low-level driver to support analog joystick attached to +Crystal SoundFusion CS4610/CS4612/CS4615. This code is based upon +Vortex/Solo drivers as an example of decoration style, and ALSA +0.5.8a kernel drivers as an chipset documentation and samples. + +This version does not have cooked mode support; the basic code +is present here, but have not tested completely. The button analysis +is completed in this mode, but the axis movement is not. + +Raw mode works fine with analog joystick front-end driver and cs461x +driver as a backend. I've tested this driver with CS4610, 4-axis and +4-button joystick; I mean the jstest utility. Also I've tried to +play in xracer game using joystick, and the result is better than +keyboard only mode. + +The sensitivity and calibrate quality have not been tested; the two +reasons are performed: the same hardware cannot work under Win95 (blue +screen in VJOYD); I have no documentation on my chip; and the existing +behavior in my case was not raised the requirement of joystick calibration. +So the driver have no code to perform hardware related calibration. + +This driver have the basic support for PCI devices only; there is no +ISA or PnP ISA cards supported. + +The driver works with ALSA drivers simultaneously. For example, the xracer +uses joystick as input device and PCM device as sound output in one time. +There are no sound or input collisions detected. The source code have +comments about them; but I've found the joystick can be initialized +separately of ALSA modules. So, you can use only one joystick driver +without ALSA drivers. The ALSA drivers are not needed to compile or +run this driver. + +There are no debug information print have been placed in source, and no +specific options required to work this driver. The found chipset parameters +are printed via printk(KERN_INFO "..."), see the /var/log/messages to +inspect cs461x: prefixed messages to determine possible card detection +errors. + +Regards, +Viktor diff --git a/Documentation/input/devices/edt-ft5x06.rst b/Documentation/input/devices/edt-ft5x06.rst new file mode 100644 index 0000000000000000000000000000000000000000..1ccc94b192b7edacd8a42ff81de923b4a35a27e7 --- /dev/null +++ b/Documentation/input/devices/edt-ft5x06.rst @@ -0,0 +1,54 @@ +EDT ft5x06 based Polytouch devices +---------------------------------- + +The edt-ft5x06 driver is useful for the EDT "Polytouch" family of capacitive +touch screens. Note that it is *not* suitable for other devices based on the +focaltec ft5x06 devices, since they contain vendor-specific firmware. In +particular this driver is not suitable for the Nook tablet. + +It has been tested with the following devices: + * EP0350M06 + * EP0430M06 + * EP0570M06 + * EP0700M06 + +The driver allows configuration of the touch screen via a set of sysfs files: + +/sys/class/input/eventX/device/device/threshold: + allows setting the "click"-threshold in the range from 0 to 80. + +/sys/class/input/eventX/device/device/gain: + allows setting the sensitivity in the range from 0 to 31. Note that + lower values indicate higher sensitivity. + +/sys/class/input/eventX/device/device/offset: + allows setting the edge compensation in the range from 0 to 31. + +/sys/class/input/eventX/device/device/report_rate: + allows setting the report rate in the range from 3 to 14. + + +For debugging purposes the driver provides a few files in the debug +filesystem (if available in the kernel). In /sys/kernel/debug/edt_ft5x06 +you'll find the following files: + +num_x, num_y: + (readonly) contains the number of sensor fields in X- and + Y-direction. + +mode: + allows switching the sensor between "factory mode" and "operation + mode" by writing "1" or "0" to it. In factory mode (1) it is + possible to get the raw data from the sensor. Note that in factory + mode regular events don't get delivered and the options described + above are unavailable. + +raw_data: + contains num_x * num_y big endian 16 bit values describing the raw + values for each sensor field. Note that each read() call on this + files triggers a new readout. It is recommended to provide a buffer + big enough to contain num_x * num_y * 2 bytes. + +Note that reading raw_data gives a I/O error when the device is not in factory +mode. The same happens when reading/writing to the parameter files when the +device is not in regular operation mode. diff --git a/Documentation/input/devices/elantech.rst b/Documentation/input/devices/elantech.rst new file mode 100644 index 0000000000000000000000000000000000000000..c3374a7ce7af84b3b9adbd1c9f5ce20dbbd665b1 --- /dev/null +++ b/Documentation/input/devices/elantech.rst @@ -0,0 +1,841 @@ +Elantech Touchpad Driver +======================== + + Copyright (C) 2007-2008 Arjan Opmeer + + Extra information for hardware version 1 found and + provided by Steve Havelka + + Version 2 (EeePC) hardware support based on patches + received from Woody at Xandros and forwarded to me + by user StewieGriffin at the eeeuser.com forum + +.. Contents + + 1. Introduction + 2. Extra knobs + 3. Differentiating hardware versions + 4. Hardware version 1 + 4.1 Registers + 4.2 Native relative mode 4 byte packet format + 4.3 Native absolute mode 4 byte packet format + 5. Hardware version 2 + 5.1 Registers + 5.2 Native absolute mode 6 byte packet format + 5.2.1 Parity checking and packet re-synchronization + 5.2.2 One/Three finger touch + 5.2.3 Two finger touch + 6. Hardware version 3 + 6.1 Registers + 6.2 Native absolute mode 6 byte packet format + 6.2.1 One/Three finger touch + 6.2.2 Two finger touch + 7. Hardware version 4 + 7.1 Registers + 7.2 Native absolute mode 6 byte packet format + 7.2.1 Status packet + 7.2.2 Head packet + 7.2.3 Motion packet + 8. Trackpoint (for Hardware version 3 and 4) + 8.1 Registers + 8.2 Native relative mode 6 byte packet format + 8.2.1 Status Packet + + + +Introduction +~~~~~~~~~~~~ + +Currently the Linux Elantech touchpad driver is aware of four different +hardware versions unimaginatively called version 1,version 2, version 3 +and version 4. Version 1 is found in "older" laptops and uses 4 bytes per +packet. Version 2 seems to be introduced with the EeePC and uses 6 bytes +per packet, and provides additional features such as position of two fingers, +and width of the touch. Hardware version 3 uses 6 bytes per packet (and +for 2 fingers the concatenation of two 6 bytes packets) and allows tracking +of up to 3 fingers. Hardware version 4 uses 6 bytes per packet, and can +combine a status packet with multiple head or motion packets. Hardware version +4 allows tracking up to 5 fingers. + +Some Hardware version 3 and version 4 also have a trackpoint which uses a +separate packet format. It is also 6 bytes per packet. + +The driver tries to support both hardware versions and should be compatible +with the Xorg Synaptics touchpad driver and its graphical configuration +utilities. + +Note that a mouse button is also associated with either the touchpad or the +trackpoint when a trackpoint is available. Disabling the Touchpad in xorg +(TouchPadOff=0) will also disable the buttons associated with the touchpad. + +Additionally the operation of the touchpad can be altered by adjusting the +contents of some of its internal registers. These registers are represented +by the driver as sysfs entries under /sys/bus/serio/drivers/psmouse/serio? +that can be read from and written to. + +Currently only the registers for hardware version 1 are somewhat understood. +Hardware version 2 seems to use some of the same registers but it is not +known whether the bits in the registers represent the same thing or might +have changed their meaning. + +On top of that, some register settings have effect only when the touchpad is +in relative mode and not in absolute mode. As the Linux Elantech touchpad +driver always puts the hardware into absolute mode not all information +mentioned below can be used immediately. But because there is no freely +available Elantech documentation the information is provided here anyway for +completeness sake. + + +Extra knobs +~~~~~~~~~~~ + +Currently the Linux Elantech touchpad driver provides three extra knobs under +/sys/bus/serio/drivers/psmouse/serio? for the user. + +* debug + + Turn different levels of debugging ON or OFF. + + By echoing "0" to this file all debugging will be turned OFF. + + Currently a value of "1" will turn on some basic debugging and a value of + "2" will turn on packet debugging. For hardware version 1 the default is + OFF. For version 2 the default is "1". + + Turning packet debugging on will make the driver dump every packet + received to the syslog before processing it. Be warned that this can + generate quite a lot of data! + +* paritycheck + + Turns parity checking ON or OFF. + + By echoing "0" to this file parity checking will be turned OFF. Any + non-zero value will turn it ON. For hardware version 1 the default is ON. + For version 2 the default it is OFF. + + Hardware version 1 provides basic data integrity verification by + calculating a parity bit for the last 3 bytes of each packet. The driver + can check these bits and reject any packet that appears corrupted. Using + this knob you can bypass that check. + + Hardware version 2 does not provide the same parity bits. Only some basic + data consistency checking can be done. For now checking is disabled by + default. Currently even turning it on will do nothing. + +* crc_enabled + + Sets crc_enabled to 0/1. The name "crc_enabled" is the official name of + this integrity check, even though it is not an actual cyclic redundancy + check. + + Depending on the state of crc_enabled, certain basic data integrity + verification is done by the driver on hardware version 3 and 4. The + driver will reject any packet that appears corrupted. Using this knob, + The state of crc_enabled can be altered with this knob. + + Reading the crc_enabled value will show the active value. Echoing + "0" or "1" to this file will set the state to "0" or "1". + +Differentiating hardware versions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To detect the hardware version, read the version number as param[0].param[1].param[2]:: + + 4 bytes version: (after the arrow is the name given in the Dell-provided driver) + 02.00.22 => EF013 + 02.06.00 => EF019 + +In the wild, there appear to be more versions, such as 00.01.64, 01.00.21, +02.00.00, 02.00.04, 02.00.06:: + + 6 bytes: + 02.00.30 => EF113 + 02.08.00 => EF023 + 02.08.XX => EF123 + 02.0B.00 => EF215 + 04.01.XX => Scroll_EF051 + 04.02.XX => EF051 + +In the wild, there appear to be more versions, such as 04.03.01, 04.04.11. There +appears to be almost no difference, except for EF113, which does not report +pressure/width and has different data consistency checks. + +Probably all the versions with param[0] <= 01 can be considered as +4 bytes/firmware 1. The versions < 02.08.00, with the exception of 02.00.30, as +4 bytes/firmware 2. Everything >= 02.08.00 can be considered as 6 bytes. + + +Hardware version 1 +~~~~~~~~~~~~~~~~~~ + +Registers +--------- + +By echoing a hexadecimal value to a register it contents can be altered. + +For example:: + + echo -n 0x16 > reg_10 + +* reg_10:: + + bit 7 6 5 4 3 2 1 0 + B C T D L A S E + + E: 1 = enable smart edges unconditionally + S: 1 = enable smart edges only when dragging + A: 1 = absolute mode (needs 4 byte packets, see reg_11) + L: 1 = enable drag lock (see reg_22) + D: 1 = disable dynamic resolution + T: 1 = disable tapping + C: 1 = enable corner tap + B: 1 = swap left and right button + +* reg_11:: + + bit 7 6 5 4 3 2 1 0 + 1 0 0 H V 1 F P + + P: 1 = enable parity checking for relative mode + F: 1 = enable native 4 byte packet mode + V: 1 = enable vertical scroll area + H: 1 = enable horizontal scroll area + +* reg_20:: + + single finger width? + +* reg_21:: + + scroll area width (small: 0x40 ... wide: 0xff) + +* reg_22:: + + drag lock time out (short: 0x14 ... long: 0xfe; + 0xff = tap again to release) + +* reg_23:: + + tap make timeout? + +* reg_24:: + + tap release timeout? + +* reg_25:: + + smart edge cursor speed (0x02 = slow, 0x03 = medium, 0x04 = fast) + +* reg_26:: + + smart edge activation area width? + + +Native relative mode 4 byte packet format +----------------------------------------- + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + c c p2 p1 1 M R L + + L, R, M = 1 when Left, Right, Middle mouse button pressed + some models have M as byte 3 odd parity bit + when parity checking is enabled (reg_11, P = 1): + p1..p2 = byte 1 and 2 odd parity bit + c = 1 when corner tap detected + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + dx7 dx6 dx5 dx4 dx3 dx2 dx1 dx0 + + dx7..dx0 = x movement; positive = right, negative = left + byte 1 = 0xf0 when corner tap detected + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + dy7 dy6 dy5 dy4 dy3 dy2 dy1 dy0 + + dy7..dy0 = y movement; positive = up, negative = down + +byte 3:: + + parity checking enabled (reg_11, P = 1): + + bit 7 6 5 4 3 2 1 0 + w h n1 n0 ds3 ds2 ds1 ds0 + + normally: + ds3..ds0 = scroll wheel amount and direction + positive = down or left + negative = up or right + when corner tap detected: + ds0 = 1 when top right corner tapped + ds1 = 1 when bottom right corner tapped + ds2 = 1 when bottom left corner tapped + ds3 = 1 when top left corner tapped + n1..n0 = number of fingers on touchpad + only models with firmware 2.x report this, models with + firmware 1.x seem to map one, two and three finger taps + directly to L, M and R mouse buttons + h = 1 when horizontal scroll action + w = 1 when wide finger touch? + + otherwise (reg_11, P = 0): + + bit 7 6 5 4 3 2 1 0 + ds7 ds6 ds5 ds4 ds3 ds2 ds1 ds0 + + ds7..ds0 = vertical scroll amount and direction + negative = up + positive = down + + +Native absolute mode 4 byte packet format +----------------------------------------- + +EF013 and EF019 have a special behaviour (due to a bug in the firmware?), and +when 1 finger is touching, the first 2 position reports must be discarded. +This counting is reset whenever a different number of fingers is reported. + +byte 0:: + + firmware version 1.x: + + bit 7 6 5 4 3 2 1 0 + D U p1 p2 1 p3 R L + + L, R = 1 when Left, Right mouse button pressed + p1..p3 = byte 1..3 odd parity bit + D, U = 1 when rocker switch pressed Up, Down + + firmware version 2.x: + + bit 7 6 5 4 3 2 1 0 + n1 n0 p2 p1 1 p3 R L + + L, R = 1 when Left, Right mouse button pressed + p1..p3 = byte 1..3 odd parity bit + n1..n0 = number of fingers on touchpad + +byte 1:: + + firmware version 1.x: + + bit 7 6 5 4 3 2 1 0 + f 0 th tw x9 x8 y9 y8 + + tw = 1 when two finger touch + th = 1 when three finger touch + f = 1 when finger touch + + firmware version 2.x: + + bit 7 6 5 4 3 2 1 0 + . . . . x9 x8 y9 y8 + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + + x9..x0 = absolute x value (horizontal) + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + y9..y0 = absolute y value (vertical) + + +Hardware version 2 +~~~~~~~~~~~~~~~~~~ + + +Registers +--------- + +By echoing a hexadecimal value to a register it contents can be altered. + +For example:: + + echo -n 0x56 > reg_10 + +* reg_10:: + + bit 7 6 5 4 3 2 1 0 + 0 1 0 1 0 1 D 0 + + D: 1 = enable drag and drop + +* reg_11:: + + bit 7 6 5 4 3 2 1 0 + 1 0 0 0 S 0 1 0 + + S: 1 = enable vertical scroll + +* reg_21:: + + unknown (0x00) + +* reg_22:: + + drag and drop release time out (short: 0x70 ... long 0x7e; + 0x7f = never i.e. tap again to release) + + +Native absolute mode 6 byte packet format +----------------------------------------- + +Parity checking and packet re-synchronization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There is no parity checking, however some consistency checks can be performed. + +For instance for EF113:: + + SA1= packet[0]; + A1 = packet[1]; + B1 = packet[2]; + SB1= packet[3]; + C1 = packet[4]; + D1 = packet[5]; + if( (((SA1 & 0x3C) != 0x3C) && ((SA1 & 0xC0) != 0x80)) || // check Byte 1 + (((SA1 & 0x0C) != 0x0C) && ((SA1 & 0xC0) == 0x80)) || // check Byte 1 (one finger pressed) + (((SA1 & 0xC0) != 0x80) && (( A1 & 0xF0) != 0x00)) || // check Byte 2 + (((SB1 & 0x3E) != 0x38) && ((SA1 & 0xC0) != 0x80)) || // check Byte 4 + (((SB1 & 0x0E) != 0x08) && ((SA1 & 0xC0) == 0x80)) || // check Byte 4 (one finger pressed) + (((SA1 & 0xC0) != 0x80) && (( C1 & 0xF0) != 0x00)) ) // check Byte 5 + // error detected + +For all the other ones, there are just a few constant bits:: + + if( ((packet[0] & 0x0C) != 0x04) || + ((packet[3] & 0x0f) != 0x02) ) + // error detected + + +In case an error is detected, all the packets are shifted by one (and packet[0] is discarded). + +One/Three finger touch +^^^^^^^^^^^^^^^^^^^^^^ + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + n1 n0 w3 w2 . . R L + + L, R = 1 when Left, Right mouse button pressed + n1..n0 = number of fingers on touchpad + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + p7 p6 p5 p4 x11 x10 x9 x8 + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + + x11..x0 = absolute x value (horizontal) + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + n4 vf w1 w0 . . . b2 + + n4 = set if more than 3 fingers (only in 3 fingers mode) + vf = a kind of flag ? (only on EF123, 0 when finger is over one + of the buttons, 1 otherwise) + w3..w0 = width of the finger touch (not EF113) + b2 (on EF113 only, 0 otherwise), b2.R.L indicates one button pressed: + 0 = none + 1 = Left + 2 = Right + 3 = Middle (Left and Right) + 4 = Forward + 5 = Back + 6 = Another one + 7 = Another one + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + p3 p1 p2 p0 y11 y10 y9 y8 + + p7..p0 = pressure (not EF113) + +byte 5:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + y11..y0 = absolute y value (vertical) + + +Two finger touch +^^^^^^^^^^^^^^^^ + +Note that the two pairs of coordinates are not exactly the coordinates of the +two fingers, but only the pair of the lower-left and upper-right coordinates. +So the actual fingers might be situated on the other diagonal of the square +defined by these two points. + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + n1 n0 ay8 ax8 . . R L + + L, R = 1 when Left, Right mouse button pressed + n1..n0 = number of fingers on touchpad + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + ax7 ax6 ax5 ax4 ax3 ax2 ax1 ax0 + + ax8..ax0 = lower-left finger absolute x value + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + ay7 ay6 ay5 ay4 ay3 ay2 ay1 ay0 + + ay8..ay0 = lower-left finger absolute y value + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + . . by8 bx8 . . . . + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + bx7 bx6 bx5 bx4 bx3 bx2 bx1 bx0 + + bx8..bx0 = upper-right finger absolute x value + +byte 5:: + + bit 7 6 5 4 3 2 1 0 + by7 by8 by5 by4 by3 by2 by1 by0 + + by8..by0 = upper-right finger absolute y value + +Hardware version 3 +~~~~~~~~~~~~~~~~~~ + +Registers +--------- + +* reg_10:: + + bit 7 6 5 4 3 2 1 0 + 0 0 0 0 R F T A + + A: 1 = enable absolute tracking + T: 1 = enable two finger mode auto correct + F: 1 = disable ABS Position Filter + R: 1 = enable real hardware resolution + +Native absolute mode 6 byte packet format +----------------------------------------- + +1 and 3 finger touch shares the same 6-byte packet format, except that +3 finger touch only reports the position of the center of all three fingers. + +Firmware would send 12 bytes of data for 2 finger touch. + +Note on debounce: +In case the box has unstable power supply or other electricity issues, or +when number of finger changes, F/W would send "debounce packet" to inform +driver that the hardware is in debounce status. +The debouce packet has the following signature:: + + byte 0: 0xc4 + byte 1: 0xff + byte 2: 0xff + byte 3: 0x02 + byte 4: 0xff + byte 5: 0xff + +When we encounter this kind of packet, we just ignore it. + +One/Three finger touch +^^^^^^^^^^^^^^^^^^^^^^ + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + n1 n0 w3 w2 0 1 R L + + L, R = 1 when Left, Right mouse button pressed + n1..n0 = number of fingers on touchpad + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + p7 p6 p5 p4 x11 x10 x9 x8 + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + + x11..x0 = absolute x value (horizontal) + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + 0 0 w1 w0 0 0 1 0 + + w3..w0 = width of the finger touch + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + p3 p1 p2 p0 y11 y10 y9 y8 + + p7..p0 = pressure + +byte 5:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + y11..y0 = absolute y value (vertical) + +Two finger touch +^^^^^^^^^^^^^^^^ + +The packet format is exactly the same for two finger touch, except the hardware +sends two 6 byte packets. The first packet contains data for the first finger, +the second packet has data for the second finger. So for two finger touch a +total of 12 bytes are sent. + +Hardware version 4 +~~~~~~~~~~~~~~~~~~ + +Registers +--------- + +* reg_07:: + + bit 7 6 5 4 3 2 1 0 + 0 0 0 0 0 0 0 A + + A: 1 = enable absolute tracking + +Native absolute mode 6 byte packet format +----------------------------------------- + +v4 hardware is a true multitouch touchpad, capable of tracking up to 5 fingers. +Unfortunately, due to PS/2's limited bandwidth, its packet format is rather +complex. + +Whenever the numbers or identities of the fingers changes, the hardware sends a +status packet to indicate how many and which fingers is on touchpad, followed by +head packets or motion packets. A head packet contains data of finger id, finger +position (absolute x, y values), width, and pressure. A motion packet contains +two fingers' position delta. + +For example, when status packet tells there are 2 fingers on touchpad, then we +can expect two following head packets. If the finger status doesn't change, +the following packets would be motion packets, only sending delta of finger +position, until we receive a status packet. + +One exception is one finger touch. when a status packet tells us there is only +one finger, the hardware would just send head packets afterwards. + +Status packet +^^^^^^^^^^^^^ + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + . . . . 0 1 R L + + L, R = 1 when Left, Right mouse button pressed + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + . . . ft4 ft3 ft2 ft1 ft0 + + ft4 ft3 ft2 ft1 ft0 ftn = 1 when finger n is on touchpad + +byte 2:: + + not used + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + . . . 1 0 0 0 0 + + constant bits + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + p . . . . . . . + + p = 1 for palm + +byte 5:: + + not used + +Head packet +^^^^^^^^^^^ + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + w3 w2 w1 w0 0 1 R L + + L, R = 1 when Left, Right mouse button pressed + w3..w0 = finger width (spans how many trace lines) + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + p7 p6 p5 p4 x11 x10 x9 x8 + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + + x11..x0 = absolute x value (horizontal) + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + id2 id1 id0 1 0 0 0 1 + + id2..id0 = finger id + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + p3 p1 p2 p0 y11 y10 y9 y8 + + p7..p0 = pressure + +byte 5:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + y11..y0 = absolute y value (vertical) + +Motion packet +^^^^^^^^^^^^^ + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + id2 id1 id0 w 0 1 R L + + L, R = 1 when Left, Right mouse button pressed + id2..id0 = finger id + w = 1 when delta overflows (> 127 or < -128), in this case + firmware sends us (delta x / 5) and (delta y / 5) + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + + x7..x0 = delta x (two's complement) + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + y7..y0 = delta y (two's complement) + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + id2 id1 id0 1 0 0 1 0 + + id2..id0 = finger id + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + + x7..x0 = delta x (two's complement) + +byte 5:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + y7..y0 = delta y (two's complement) + + byte 0 ~ 2 for one finger + byte 3 ~ 5 for another + + +Trackpoint (for Hardware version 3 and 4) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Registers +--------- + +No special registers have been identified. + +Native relative mode 6 byte packet format +----------------------------------------- + +Status Packet +^^^^^^^^^^^^^ + +byte 0:: + + bit 7 6 5 4 3 2 1 0 + 0 0 sx sy 0 M R L + +byte 1:: + + bit 7 6 5 4 3 2 1 0 + ~sx 0 0 0 0 0 0 0 + +byte 2:: + + bit 7 6 5 4 3 2 1 0 + ~sy 0 0 0 0 0 0 0 + +byte 3:: + + bit 7 6 5 4 3 2 1 0 + 0 0 ~sy ~sx 0 1 1 0 + +byte 4:: + + bit 7 6 5 4 3 2 1 0 + x7 x6 x5 x4 x3 x2 x1 x0 + +byte 5:: + + bit 7 6 5 4 3 2 1 0 + y7 y6 y5 y4 y3 y2 y1 y0 + + + x and y are written in two's complement spread + over 9 bits with sx/sy the relative top bit and + x7..x0 and y7..y0 the lower bits. + ~sx is the inverse of sx, ~sy is the inverse of sy. + The sign of y is opposite to what the input driver + expects for a relative movement diff --git a/Documentation/input/devices/gpio-tilt.rst b/Documentation/input/devices/gpio-tilt.rst new file mode 100644 index 0000000000000000000000000000000000000000..fa6e64570aa72f622b5484ab4b8fa90e6ac8aaa5 --- /dev/null +++ b/Documentation/input/devices/gpio-tilt.rst @@ -0,0 +1,103 @@ +Driver for tilt-switches connected via GPIOs +============================================ + +Generic driver to read data from tilt switches connected via gpios. +Orientation can be provided by one or more than one tilt switches, +i.e. each tilt switch providing one axis, and the number of axes +is also not limited. + + +Data structures +--------------- + +The array of struct gpio in the gpios field is used to list the gpios +that represent the current tilt state. + +The array of struct gpio_tilt_axis describes the axes that are reported +to the input system. The values set therein are used for the +input_set_abs_params calls needed to init the axes. + +The array of struct gpio_tilt_state maps gpio states to the corresponding +values to report. The gpio state is represented as a bitfield where the +bit-index corresponds to the index of the gpio in the struct gpio array. +In the same manner the values stored in the axes array correspond to +the elements of the gpio_tilt_axis-array. + + +Example +------- + +Example configuration for a single TS1003 tilt switch that rotates around +one axis in 4 steps and emits the current tilt via two GPIOs:: + + static int sg060_tilt_enable(struct device *dev) { + /* code to enable the sensors */ + }; + + static void sg060_tilt_disable(struct device *dev) { + /* code to disable the sensors */ + }; + + static struct gpio sg060_tilt_gpios[] = { + { SG060_TILT_GPIO_SENSOR1, GPIOF_IN, "tilt_sensor1" }, + { SG060_TILT_GPIO_SENSOR2, GPIOF_IN, "tilt_sensor2" }, + }; + + static struct gpio_tilt_state sg060_tilt_states[] = { + { + .gpios = (0 << 1) | (0 << 0), + .axes = (int[]) { + 0, + }, + }, { + .gpios = (0 << 1) | (1 << 0), + .axes = (int[]) { + 1, /* 90 degrees */ + }, + }, { + .gpios = (1 << 1) | (1 << 0), + .axes = (int[]) { + 2, /* 180 degrees */ + }, + }, { + .gpios = (1 << 1) | (0 << 0), + .axes = (int[]) { + 3, /* 270 degrees */ + }, + }, + }; + + static struct gpio_tilt_axis sg060_tilt_axes[] = { + { + .axis = ABS_RY, + .min = 0, + .max = 3, + .fuzz = 0, + .flat = 0, + }, + }; + + static struct gpio_tilt_platform_data sg060_tilt_pdata= { + .gpios = sg060_tilt_gpios, + .nr_gpios = ARRAY_SIZE(sg060_tilt_gpios), + + .axes = sg060_tilt_axes, + .nr_axes = ARRAY_SIZE(sg060_tilt_axes), + + .states = sg060_tilt_states, + .nr_states = ARRAY_SIZE(sg060_tilt_states), + + .debounce_interval = 100, + + .poll_interval = 1000, + .enable = sg060_tilt_enable, + .disable = sg060_tilt_disable, + }; + + static struct platform_device sg060_device_tilt = { + .name = "gpio-tilt-polled", + .id = -1, + .dev = { + .platform_data = &sg060_tilt_pdata, + }, + }; diff --git a/Documentation/input/devices/iforce-protocol.rst b/Documentation/input/devices/iforce-protocol.rst new file mode 100644 index 0000000000000000000000000000000000000000..8634beac3fdb67b72d137012664f824eaeb2008c --- /dev/null +++ b/Documentation/input/devices/iforce-protocol.rst @@ -0,0 +1,381 @@ +=============== +Iforce Protocol +=============== + +:Author: Johann Deneux + +Home page at ``_ + +:Additions: by Vojtech Pavlik. + + +Introduction +============ + +This document describes what I managed to discover about the protocol used to +specify force effects to I-Force 2.0 devices. None of this information comes +from Immerse. That's why you should not trust what is written in this +document. This document is intended to help understanding the protocol. +This is not a reference. Comments and corrections are welcome. To contact me, +send an email to: johann.deneux@gmail.com + +.. warning:: + + I shall not be held responsible for any damage or harm caused if you try to + send data to your I-Force device based on what you read in this document. + +Preliminary Notes +================= + +All values are hexadecimal with big-endian encoding (msb on the left). Beware, +values inside packets are encoded using little-endian. Bytes whose roles are +unknown are marked ??? Information that needs deeper inspection is marked (?) + +General form of a packet +------------------------ + +This is how packets look when the device uses the rs232 to communicate. + +== == === ==== == +2B OP LEN DATA CS +== == === ==== == + +CS is the checksum. It is equal to the exclusive or of all bytes. + +When using USB: + +== ==== +OP DATA +== ==== + +The 2B, LEN and CS fields have disappeared, probably because USB handles +frames and data corruption is handled or unsignificant. + +First, I describe effects that are sent by the device to the computer + +Device input state +================== + +This packet is used to indicate the state of each button and the value of each +axis:: + + OP= 01 for a joystick, 03 for a wheel + LEN= Varies from device to device + 00 X-Axis lsb + 01 X-Axis msb + 02 Y-Axis lsb, or gas pedal for a wheel + 03 Y-Axis msb, or brake pedal for a wheel + 04 Throttle + 05 Buttons + 06 Lower 4 bits: Buttons + Upper 4 bits: Hat + 07 Rudder + +Device effects states +===================== + +:: + + OP= 02 + LEN= Varies + 00 ? Bit 1 (Value 2) is the value of the deadman switch + 01 Bit 8 is set if the effect is playing. Bits 0 to 7 are the effect id. + 02 ?? + 03 Address of parameter block changed (lsb) + 04 Address of parameter block changed (msb) + 05 Address of second parameter block changed (lsb) + ... depending on the number of parameter blocks updated + +Force effect +------------ + +:: + + OP= 01 + LEN= 0e + 00 Channel (when playing several effects at the same time, each must + be assigned a channel) + 01 Wave form + Val 00 Constant + Val 20 Square + Val 21 Triangle + Val 22 Sine + Val 23 Sawtooth up + Val 24 Sawtooth down + Val 40 Spring (Force = f(pos)) + Val 41 Friction (Force = f(velocity)) and Inertia + (Force = f(acceleration)) + + + 02 Axes affected and trigger + Bits 4-7: Val 2 = effect along one axis. Byte 05 indicates direction + Val 4 = X axis only. Byte 05 must contain 5a + Val 8 = Y axis only. Byte 05 must contain b4 + Val c = X and Y axes. Bytes 05 must contain 60 + Bits 0-3: Val 0 = No trigger + Val x+1 = Button x triggers the effect + When the whole byte is 0, cancel the previously set trigger + + 03-04 Duration of effect (little endian encoding, in ms) + + 05 Direction of effect, if applicable. Else, see 02 for value to assign. + + 06-07 Minimum time between triggering. + + 08-09 Address of periodicity or magnitude parameters + 0a-0b Address of attack and fade parameters, or ffff if none. + *or* + 08-09 Address of interactive parameters for X-axis, + or ffff if not applicable + 0a-0b Address of interactive parameters for Y-axis, + or ffff if not applicable + + 0c-0d Delay before execution of effect (little endian encoding, in ms) + + +Time based parameters +--------------------- + +Attack and fade +^^^^^^^^^^^^^^^ + +:: + + OP= 02 + LEN= 08 + 00-01 Address where to store the parameters + 02-03 Duration of attack (little endian encoding, in ms) + 04 Level at end of attack. Signed byte. + 05-06 Duration of fade. + 07 Level at end of fade. + +Magnitude +^^^^^^^^^ + +:: + + OP= 03 + LEN= 03 + 00-01 Address + 02 Level. Signed byte. + +Periodicity +^^^^^^^^^^^ + +:: + + OP= 04 + LEN= 07 + 00-01 Address + 02 Magnitude. Signed byte. + 03 Offset. Signed byte. + 04 Phase. Val 00 = 0 deg, Val 40 = 90 degs. + 05-06 Period (little endian encoding, in ms) + +Interactive parameters +---------------------- + +:: + + OP= 05 + LEN= 0a + 00-01 Address + 02 Positive Coeff + 03 Negative Coeff + 04+05 Offset (center) + 06+07 Dead band (Val 01F4 = 5000 (decimal)) + 08 Positive saturation (Val 0a = 1000 (decimal) Val 64 = 10000 (decimal)) + 09 Negative saturation + +The encoding is a bit funny here: For coeffs, these are signed values. The +maximum value is 64 (100 decimal), the min is 9c. +For the offset, the minimum value is FE0C, the maximum value is 01F4. +For the deadband, the minimum value is 0, the max is 03E8. + +Controls +-------- + +:: + + OP= 41 + LEN= 03 + 00 Channel + 01 Start/Stop + Val 00: Stop + Val 01: Start and play once. + Val 41: Start and play n times (See byte 02 below) + 02 Number of iterations n. + +Init +---- + + +Querying features +^^^^^^^^^^^^^^^^^ +:: + + OP= ff + Query command. Length varies according to the query type. + The general format of this packet is: + ff 01 QUERY [INDEX] CHECKSUM + responses are of the same form: + FF LEN QUERY VALUE_QUERIED CHECKSUM2 + where LEN = 1 + length(VALUE_QUERIED) + +Query ram size +~~~~~~~~~~~~~~ + +:: + + QUERY = 42 ('B'uffer size) + +The device should reply with the same packet plus two additional bytes +containing the size of the memory: +ff 03 42 03 e8 CS would mean that the device has 1000 bytes of ram available. + +Query number of effects +~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + QUERY = 4e ('N'umber of effects) + +The device should respond by sending the number of effects that can be played +at the same time (one byte) +ff 02 4e 14 CS would stand for 20 effects. + +Vendor's id +~~~~~~~~~~~ + +:: + + QUERY = 4d ('M'anufacturer) + +Query the vendors'id (2 bytes) + +Product id +~~~~~~~~~~ + +:: + + QUERY = 50 ('P'roduct) + +Query the product id (2 bytes) + +Open device +~~~~~~~~~~~ + +:: + + QUERY = 4f ('O'pen) + +No data returned. + +Close device +~~~~~~~~~~~~ + +:: + + QUERY = 43 ('C')lose + +No data returned. + +Query effect +~~~~~~~~~~~~ + +:: + + QUERY = 45 ('E') + +Send effect type. +Returns nonzero if supported (2 bytes) + +Firmware Version +~~~~~~~~~~~~~~~~ + +:: + + QUERY = 56 ('V'ersion) + +Sends back 3 bytes - major, minor, subminor + +Initialisation of the device +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Set Control +~~~~~~~~~~~ + +.. note:: + Device dependent, can be different on different models! + +:: + + OP= 40 [] + LEN= 2 or 3 + 00 Idx + Idx 00 Set dead zone (0..2048) + Idx 01 Ignore Deadman sensor (0..1) + Idx 02 Enable comm watchdog (0..1) + Idx 03 Set the strength of the spring (0..100) + Idx 04 Enable or disable the spring (0/1) + Idx 05 Set axis saturation threshold (0..2048) + +Set Effect State +~~~~~~~~~~~~~~~~ + +:: + + OP= 42 + LEN= 1 + 00 State + Bit 3 Pause force feedback + Bit 2 Enable force feedback + Bit 0 Stop all effects + +Set overall +~~~~~~~~~~~ + +:: + + OP= 43 + LEN= 1 + 00 Gain + Val 00 = 0% + Val 40 = 50% + Val 80 = 100% + +Parameter memory +---------------- + +Each device has a certain amount of memory to store parameters of effects. +The amount of RAM may vary, I encountered values from 200 to 1000 bytes. Below +is the amount of memory apparently needed for every set of parameters: + + - period : 0c + - magnitude : 02 + - attack and fade : 0e + - interactive : 08 + +Appendix: How to study the protocol? +==================================== + +1. Generate effects using the force editor provided with the DirectX SDK, or +use Immersion Studio (freely available at their web site in the developer section: +www.immersion.com) +2. Start a soft spying RS232 or USB (depending on where you connected your +joystick/wheel). I used ComPortSpy from fCoder (alpha version!) +3. Play the effect, and watch what happens on the spy screen. + +A few words about ComPortSpy: +At first glance, this software seems, hum, well... buggy. In fact, data appear with a +few seconds latency. Personally, I restart it every time I play an effect. +Remember it's free (as in free beer) and alpha! + +URLS +==== + +Check http://www.immerse.com for Immersion Studio, +and http://www.fcoder.com for ComPortSpy. + + +I-Force is trademark of Immersion Corp. diff --git a/Documentation/input/devices/index.rst b/Documentation/input/devices/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..95a453782bad377d234e101700d031f69d9f26bd --- /dev/null +++ b/Documentation/input/devices/index.rst @@ -0,0 +1,19 @@ +Driver-specific documentation +============================= + +This section provides information about various devices supported by the +Linux kernel, their protocols, and driver details. + +.. toctree:: + :maxdepth: 2 + :numbered: + :glob: + + * + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/input/devices/joystick-parport.rst b/Documentation/input/devices/joystick-parport.rst new file mode 100644 index 0000000000000000000000000000000000000000..e8ce16ee799a2a089312e842594e1248b1350ef6 --- /dev/null +++ b/Documentation/input/devices/joystick-parport.rst @@ -0,0 +1,611 @@ +.. include:: + +.. _joystick-parport: + +============================== +Parallel Port Joystick Drivers +============================== + +:Copyright: |copy| 1998-2000 Vojtech Pavlik +:Copyright: |copy| 1998 Andree Borrmann + + +Sponsored by SuSE + +Disclaimer +========== + +Any information in this file is provided as-is, without any guarantee that +it will be true. So, use it at your own risk. The possible damages that can +happen include burning your parallel port, and/or the sticks and joystick +and maybe even more. Like when a lightning kills you it is not our problem. + +Introduction +============ + +The joystick parport drivers are used for joysticks and gamepads not +originally designed for PCs and other computers Linux runs on. Because of +that, PCs usually lack the right ports to connect these devices to. Parallel +port, because of its ability to change single bits at will, and providing +both output and input bits is the most suitable port on the PC for +connecting such devices. + +Devices supported +================= + +Many console and 8-bit computer gamepads and joysticks are supported. The +following subsections discuss usage of each. + +NES and SNES +------------ + +The Nintendo Entertainment System and Super Nintendo Entertainment System +gamepads are widely available, and easy to get. Also, they are quite easy to +connect to a PC, and don't need much processing speed (108 us for NES and +165 us for SNES, compared to about 1000 us for PC gamepads) to communicate +with them. + +All NES and SNES use the same synchronous serial protocol, clocked from +the computer's side (and thus timing insensitive). To allow up to 5 NES +and/or SNES gamepads and/or SNES mice connected to the parallel port at once, +the output lines of the parallel port are shared, while one of 5 available +input lines is assigned to each gamepad. + +This protocol is handled by the gamecon.c driver, so that's the one +you'll use for NES, SNES gamepads and SNES mice. + +The main problem with PC parallel ports is that they don't have +5V power +source on any of their pins. So, if you want a reliable source of power +for your pads, use either keyboard or joystick port, and make a pass-through +cable. You can also pull the power directly from the power supply (the red +wire is +5V). + +If you want to use the parallel port only, you can take the power is from +some data pin. For most gamepad and parport implementations only one pin is +needed, and I'd recommend pin 9 for that, the highest data bit. On the other +hand, if you are not planning to use anything else than NES / SNES on the +port, anything between and including pin 4 and pin 9 will work:: + + (pin 9) -----> Power + +Unfortunately, there are pads that need a lot more of power, and parallel +ports that can't give much current through the data pins. If this is your +case, you'll need to use diodes (as a prevention of destroying your parallel +port), and combine the currents of two or more data bits together:: + + Diodes + (pin 9) ----|>|-------+------> Power + | + (pin 8) ----|>|-------+ + | + (pin 7) ----|>|-------+ + | + : + | + (pin 4) ----|>|-------+ + +Ground is quite easy. On PC's parallel port the ground is on any of the +pins from pin 18 to pin 25. So use any pin of these you like for the ground:: + + (pin 18) -----> Ground + +NES and SNES pads have two input bits, Clock and Latch, which drive the +serial transfer. These are connected to pins 2 and 3 of the parallel port, +respectively:: + + (pin 2) -----> Clock + (pin 3) -----> Latch + +And the last thing is the NES / SNES data wire. Only that isn't shared and +each pad needs its own data pin. The parallel port pins are:: + + (pin 10) -----> Pad 1 data + (pin 11) -----> Pad 2 data + (pin 12) -----> Pad 3 data + (pin 13) -----> Pad 4 data + (pin 15) -----> Pad 5 data + +Note that pin 14 is not used, since it is not an input pin on the parallel +port. + +This is everything you need on the PC's side of the connection, now on to +the gamepads side. The NES and SNES have different connectors. Also, there +are quite a lot of NES clones, and because Nintendo used proprietary +connectors for their machines, the cloners couldn't and used standard D-Cannon +connectors. Anyway, if you've got a gamepad, and it has buttons A, B, Turbo +A, Turbo B, Select and Start, and is connected through 5 wires, then it is +either a NES or NES clone and will work with this connection. SNES gamepads +also use 5 wires, but have more buttons. They will work as well, of course:: + + Pinout for NES gamepads Pinout for SNES gamepads and mice + + +----> Power +-----------------------\ + | 7 | o o o o | x x o | 1 + 5 +---------+ 7 +-----------------------/ + | x x o \ | | | | | + | o o o o | | | | | +-> Ground + 4 +------------+ 1 | | | +------------> Data + | | | | | | +---------------> Latch + | | | +-> Ground | +------------------> Clock + | | +----> Clock +---------------------> Power + | +-------> Latch + +----------> Data + + Pinout for NES clone (db9) gamepads Pinout for NES clone (db15) gamepads + + +---------> Clock +-----------------> Data + | +-------> Latch | +---> Ground + | | +-----> Data | | + | | | ___________________ + _____________ 8 \ o x x x x x x o / 1 + 5 \ x o o o x / 1 \ o x x o x x o / + \ x o x o / 15 `~~~~~~~~~~~~~' 9 + 9 `~~~~~~~' 6 | | | + | | | | +----> Clock + | +----> Power | +----------> Latch + +--------> Ground +----------------> Power + +Multisystem joysticks +--------------------- + +In the era of 8-bit machines, there was something like de-facto standard +for joystick ports. They were all digital, and all used D-Cannon 9 pin +connectors (db9). Because of that, a single joystick could be used without +hassle on Atari (130, 800XE, 800XL, 2600, 7200), Amiga, Commodore C64, +Amstrad CPC, Sinclair ZX Spectrum and many other machines. That's why these +joysticks are called "Multisystem". + +Now their pinout:: + + +---------> Right + | +-------> Left + | | +-----> Down + | | | +---> Up + | | | | + _____________ + 5 \ x o o o o / 1 + \ x o x o / + 9 `~~~~~~~' 6 + | | + | +----> Button + +--------> Ground + +However, as time passed, extensions to this standard developed, and these +were not compatible with each other:: + + + Atari 130, 800/XL/XE MSX + + +-----------> Power + +---------> Right | +---------> Right + | +-------> Left | | +-------> Left + | | +-----> Down | | | +-----> Down + | | | +---> Up | | | | +---> Up + | | | | | | | | | + _____________ _____________ + 5 \ x o o o o / 1 5 \ o o o o o / 1 + \ x o o o / \ o o o o / + 9 `~~~~~~~' 6 9 `~~~~~~~' 6 + | | | | | | | + | | +----> Button | | | +----> Button 1 + | +------> Power | | +------> Button 2 + +--------> Ground | +--------> Output 3 + +----------> Ground + + Amstrad CPC Commodore C64 + + +-----------> Analog Y + +---------> Right | +---------> Right + | +-------> Left | | +-------> Left + | | +-----> Down | | | +-----> Down + | | | +---> Up | | | | +---> Up + | | | | | | | | | + _____________ _____________ + 5 \ x o o o o / 1 5 \ o o o o o / 1 + \ x o o o / \ o o o o / + 9 `~~~~~~~' 6 9 `~~~~~~~' 6 + | | | | | | | + | | +----> Button 1 | | | +----> Button + | +------> Button 2 | | +------> Power + +--------> Ground | +--------> Ground + +----------> Analog X + + Sinclair Spectrum +2A/+3 Amiga 1200 + + +-----------> Up +-----------> Button 3 + | +---------> Fire | +---------> Right + | | | | +-------> Left + | | +-----> Ground | | | +-----> Down + | | | | | | | +---> Up + | | | | | | | | + _____________ _____________ + 5 \ o o x o x / 1 5 \ o o o o o / 1 + \ o o o o / \ o o o o / + 9 `~~~~~~~' 6 9 `~~~~~~~' 6 + | | | | | | | | + | | | +----> Right | | | +----> Button 1 + | | +------> Left | | +------> Power + | +--------> Ground | +--------> Ground + +----------> Down +----------> Button 2 + + And there were many others. + +Multisystem joysticks using db9.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For the Multisystem joysticks, and their derivatives, the db9.c driver +was written. It allows only one joystick / gamepad per parallel port, but +the interface is easy to build and works with almost anything. + +For the basic 1-button Multisystem joystick you connect its wires to the +parallel port like this:: + + (pin 1) -----> Power + (pin 18) -----> Ground + + (pin 2) -----> Up + (pin 3) -----> Down + (pin 4) -----> Left + (pin 5) -----> Right + (pin 6) -----> Button 1 + +However, if the joystick is switch based (eg. clicks when you move it), +you might or might not, depending on your parallel port, need 10 kOhm pullup +resistors on each of the direction and button signals, like this:: + + (pin 2) ------------+------> Up + Resistor | + (pin 1) --[10kOhm]--+ + +Try without, and if it doesn't work, add them. For TTL based joysticks / +gamepads the pullups are not needed. + +For joysticks with two buttons you connect the second button to pin 7 on +the parallel port:: + + (pin 7) -----> Button 2 + +And that's it. + +On a side note, if you have already built a different adapter for use with +the digital joystick driver 0.8.0.2, this is also supported by the db9.c +driver, as device type 8. (See section 3.2) + +Multisystem joysticks using gamecon.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For some people just one joystick per parallel port is not enough, and/or +want to use them on one parallel port together with NES/SNES/PSX pads. This is +possible using the gamecon.c. It supports up to 5 devices of the above types, +including 1 and 2 buttons Multisystem joysticks. + +However, there is nothing for free. To allow more sticks to be used at +once, you need the sticks to be purely switch based (that is non-TTL), and +not to need power. Just a plain simple six switches inside. If your +joystick can do more (eg. turbofire) you'll need to disable it totally first +if you want to use gamecon.c. + +Also, the connection is a bit more complex. You'll need a bunch of diodes, +and one pullup resistor. First, you connect the Directions and the button +the same as for db9, however with the diodes between:: + + Diodes + (pin 2) -----|<|----> Up + (pin 3) -----|<|----> Down + (pin 4) -----|<|----> Left + (pin 5) -----|<|----> Right + (pin 6) -----|<|----> Button 1 + +For two button sticks you also connect the other button:: + + (pin 7) -----|<|----> Button 2 + +And finally, you connect the Ground wire of the joystick, like done in +this little schematic to Power and Data on the parallel port, as described +for the NES / SNES pads in section 2.1 of this file - that is, one data pin +for each joystick. The power source is shared:: + + Data ------------+-----> Ground + Resistor | + Power --[10kOhm]--+ + +And that's all, here we go! + +Multisystem joysticks using turbografx.c +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The TurboGraFX interface, designed by + + Steffen Schwenke + +allows up to 7 Multisystem joysticks connected to the parallel port. In +Steffen's version, there is support for up to 5 buttons per joystick. However, +since this doesn't work reliably on all parallel ports, the turbografx.c driver +supports only one button per joystick. For more information on how to build the +interface, see: + + http://www2.burg-halle.de/~schwenke/parport.html + +Sony Playstation +---------------- + +The PSX controller is supported by the gamecon.c. Pinout of the PSX +controller (compatible with DirectPadPro):: + + +---------+---------+---------+ + 9 | o o o | o o o | o o o | 1 parallel + \________|_________|________/ port pins + | | | | | | + | | | | | +--------> Clock --- (4) + | | | | +------------> Select --- (3) + | | | +---------------> Power --- (5-9) + | | +------------------> Ground --- (18-25) + | +-------------------------> Command --- (2) + +----------------------------> Data --- (one of 10,11,12,13,15) + +The driver supports these controllers: + + * Standard PSX Pad + * NegCon PSX Pad + * Analog PSX Pad (red mode) + * Analog PSX Pad (green mode) + * PSX Rumble Pad + * PSX DDR Pad + +Sega +---- + +All the Sega controllers are more or less based on the standard 2-button +Multisystem joystick. However, since they don't use switches and use TTL +logic, the only driver usable with them is the db9.c driver. + +Sega Master System +~~~~~~~~~~~~~~~~~~ + +The SMS gamepads are almost exactly the same as normal 2-button +Multisystem joysticks. Set the driver to Multi2 mode, use the corresponding +parallel port pins, and the following schematic:: + + +-----------> Power + | +---------> Right + | | +-------> Left + | | | +-----> Down + | | | | +---> Up + | | | | | + _____________ + 5 \ o o o o o / 1 + \ o o x o / + 9 `~~~~~~~' 6 + | | | + | | +----> Button 1 + | +--------> Ground + +----------> Button 2 + +Sega Genesis aka MegaDrive +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Sega Genesis (in Europe sold as Sega MegaDrive) pads are an extension +to the Sega Master System pads. They use more buttons (3+1, 5+1, 6+1). Use +the following schematic:: + + +-----------> Power + | +---------> Right + | | +-------> Left + | | | +-----> Down + | | | | +---> Up + | | | | | + _____________ + 5 \ o o o o o / 1 + \ o o o o / + 9 `~~~~~~~' 6 + | | | | + | | | +----> Button 1 + | | +------> Select + | +--------> Ground + +----------> Button 2 + +The Select pin goes to pin 14 on the parallel port:: + + (pin 14) -----> Select + +The rest is the same as for Multi2 joysticks using db9.c + +Sega Saturn +~~~~~~~~~~~ + +Sega Saturn has eight buttons, and to transfer that, without hacks like +Genesis 6 pads use, it needs one more select pin. Anyway, it is still +handled by the db9.c driver. Its pinout is very different from anything +else. Use this schematic:: + + +-----------> Select 1 + | +---------> Power + | | +-------> Up + | | | +-----> Down + | | | | +---> Ground + | | | | | + _____________ + 5 \ o o o o o / 1 + \ o o o o / + 9 `~~~~~~~' 6 + | | | | + | | | +----> Select 2 + | | +------> Right + | +--------> Left + +----------> Power + +Select 1 is pin 14 on the parallel port, Select 2 is pin 16 on the +parallel port:: + + (pin 14) -----> Select 1 + (pin 16) -----> Select 2 + +The other pins (Up, Down, Right, Left, Power, Ground) are the same as for +Multi joysticks using db9.c + +Amiga CD32 +---------- + +Amiga CD32 joypad uses the following pinout:: + + +-----------> Button 3 + | +---------> Right + | | +-------> Left + | | | +-----> Down + | | | | +---> Up + | | | | | + _____________ + 5 \ o o o o o / 1 + \ o o o o / + 9 `~~~~~~~' 6 + | | | | + | | | +----> Button 1 + | | +------> Power + | +--------> Ground + +----------> Button 2 + +It can be connected to the parallel port and driven by db9.c driver. It needs the following wiring: + + ============ ============= + CD32 pad Parallel port + ============ ============= + 1 (Up) 2 (D0) + 2 (Down) 3 (D1) + 3 (Left) 4 (D2) + 4 (Right) 5 (D3) + 5 (Button 3) 14 (AUTOFD) + 6 (Button 1) 17 (SELIN) + 7 (+5V) 1 (STROBE) + 8 (Gnd) 18 (Gnd) + 9 (Button 2) 7 (D5) + ============ ============= + +The drivers +=========== + +There are three drivers for the parallel port interfaces. Each, as +described above, allows to connect a different group of joysticks and pads. +Here are described their command lines: + +gamecon.c +--------- + +Using gamecon.c you can connect up to five devices to one parallel port. It +uses the following kernel/module command line:: + + gamecon.map=port,pad1,pad2,pad3,pad4,pad5 + +Where ``port`` the number of the parport interface (eg. 0 for parport0). + +And ``pad1`` to ``pad5`` are pad types connected to different data input pins +(10,11,12,13,15), as described in section 2.1 of this file. + +The types are: + + ===== ============================= + Type Joystick/Pad + ===== ============================= + 0 None + 1 SNES pad + 2 NES pad + 4 Multisystem 1-button joystick + 5 Multisystem 2-button joystick + 6 N64 pad + 7 Sony PSX controller + 8 Sony PSX DDR controller + 9 SNES mouse + ===== ============================= + +The exact type of the PSX controller type is autoprobed when used, so +hot swapping should work (but is not recommended). + +Should you want to use more than one of parallel ports at once, you can use +gamecon.map2 and gamecon.map3 as additional command line parameters for two +more parallel ports. + +There are two options specific to PSX driver portion. gamecon.psx_delay sets +the command delay when talking to the controllers. The default of 25 should +work but you can try lowering it for better performance. If your pads don't +respond try raising it until they work. Setting the type to 8 allows the +driver to be used with Dance Dance Revolution or similar games. Arrow keys are +registered as key presses instead of X and Y axes. + +db9.c +----- + +Apart from making an interface, there is nothing difficult on using the +db9.c driver. It uses the following kernel/module command line:: + + db9.dev=port,type + +Where ``port`` is the number of the parport interface (eg. 0 for parport0). + +Caveat here: This driver only works on bidirectional parallel ports. If +your parallel port is recent enough, you should have no trouble with this. +Old parallel ports may not have this feature. + +``Type`` is the type of joystick or pad attached: + + ===== ====================================================== + Type Joystick/Pad + ===== ====================================================== + 0 None + 1 Multisystem 1-button joystick + 2 Multisystem 2-button joystick + 3 Genesis pad (3+1 buttons) + 5 Genesis pad (5+1 buttons) + 6 Genesis pad (6+2 buttons) + 7 Saturn pad (8 buttons) + 8 Multisystem 1-button joystick (v0.8.0.2 pin-out) + 9 Two Multisystem 1-button joysticks (v0.8.0.2 pin-out) + 10 Amiga CD32 pad + ===== ====================================================== + +Should you want to use more than one of these joysticks/pads at once, you +can use db9.dev2 and db9.dev3 as additional command line parameters for two +more joysticks/pads. + +turbografx.c +------------ + +The turbografx.c driver uses a very simple kernel/module command line:: + + turbografx.map=port,js1,js2,js3,js4,js5,js6,js7 + +Where ``port`` is the number of the parport interface (eg. 0 for parport0). + +``jsX`` is the number of buttons the Multisystem joysticks connected to the +interface ports 1-7 have. For a standard multisystem joystick, this is 1. + +Should you want to use more than one of these interfaces at once, you can +use turbografx.map2 and turbografx.map3 as additional command line parameters +for two more interfaces. + +PC parallel port pinout +======================= + +:: + + .----------------------------------------. + At the PC: \ 13 12 11 10 9 8 7 6 5 4 3 2 1 / + \ 25 24 23 22 21 20 19 18 17 16 15 14 / + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +====== ======= ============= + Pin Name Description +====== ======= ============= + 1 /STROBE Strobe + 2-9 D0-D7 Data Bit 0-7 + 10 /ACK Acknowledge + 11 BUSY Busy + 12 PE Paper End + 13 SELIN Select In + 14 /AUTOFD Autofeed + 15 /ERROR Error + 16 /INIT Initialize + 17 /SEL Select + 18-25 GND Signal Ground +====== ======= ============= + + +That's all, folks! Have fun! diff --git a/Documentation/input/devices/ntrig.rst b/Documentation/input/devices/ntrig.rst new file mode 100644 index 0000000000000000000000000000000000000000..a6b22ce6c61cdda2d8c48a53da866374dd52d07d --- /dev/null +++ b/Documentation/input/devices/ntrig.rst @@ -0,0 +1,137 @@ +.. include:: + +========================= +N-Trig touchscreen Driver +========================= + +:Copyright: |copy| 2008-2010 Rafi Rubin +:Copyright: |copy| 2009-2010 Stephane Chatty + +This driver provides support for N-Trig pen and multi-touch sensors. Single +and multi-touch events are translated to the appropriate protocols for +the hid and input systems. Pen events are sufficiently hid compliant and +are left to the hid core. The driver also provides additional filtering +and utility functions accessible with sysfs and module parameters. + +This driver has been reported to work properly with multiple N-Trig devices +attached. + + +Parameters +---------- + +Note: values set at load time are global and will apply to all applicable +devices. Adjusting parameters with sysfs will override the load time values, +but only for that one device. + +The following parameters are used to configure filters to reduce noise: + ++-----------------------+-----------------------------------------------------+ +|activate_slack |number of fingers to ignore before processing events | ++-----------------------+-----------------------------------------------------+ +|activation_height, |size threshold to activate immediately | +|activation_width | | ++-----------------------+-----------------------------------------------------+ +|min_height, |size threshold bellow which fingers are ignored | +|min_width |both to decide activation and during activity | ++-----------------------+-----------------------------------------------------+ +|deactivate_slack |the number of "no contact" frames to ignore before | +| |propagating the end of activity events | ++-----------------------+-----------------------------------------------------+ + +When the last finger is removed from the device, it sends a number of empty +frames. By holding off on deactivation for a few frames we can tolerate false +erroneous disconnects, where the sensor may mistakenly not detect a finger that +is still present. Thus deactivate_slack addresses problems where a users might +see breaks in lines during drawing, or drop an object during a long drag. + + +Additional sysfs items +---------------------- + +These nodes just provide easy access to the ranges reported by the device. + ++-----------------------+-----------------------------------------------------+ +|sensor_logical_height, | the range for positions reported during activity | +|sensor_logical_width | | ++-----------------------+-----------------------------------------------------+ +|sensor_physical_height,| internal ranges not used for normal events but | +|sensor_physical_width | useful for tuning | ++-----------------------+-----------------------------------------------------+ + +All N-Trig devices with product id of 1 report events in the ranges of + +* X: 0-9600 +* Y: 0-7200 + +However not all of these devices have the same physical dimensions. Most +seem to be 12" sensors (Dell Latitude XT and XT2 and the HP TX2), and +at least one model (Dell Studio 17) has a 17" sensor. The ratio of physical +to logical sizes is used to adjust the size based filter parameters. + + +Filtering +--------- + +With the release of the early multi-touch firmwares it became increasingly +obvious that these sensors were prone to erroneous events. Users reported +seeing both inappropriately dropped contact and ghosts, contacts reported +where no finger was actually touching the screen. + +Deactivation slack helps prevent dropped contact for single touch use, but does +not address the problem of dropping one of more contacts while other contacts +are still active. Drops in the multi-touch context require additional +processing and should be handled in tandem with tacking. + +As observed ghost contacts are similar to actual use of the sensor, but they +seem to have different profiles. Ghost activity typically shows up as small +short lived touches. As such, I assume that the longer the continuous stream +of events the more likely those events are from a real contact, and that the +larger the size of each contact the more likely it is real. Balancing the +goals of preventing ghosts and accepting real events quickly (to minimize +user observable latency), the filter accumulates confidence for incoming +events until it hits thresholds and begins propagating. In the interest in +minimizing stored state as well as the cost of operations to make a decision, +I've kept that decision simple. + +Time is measured in terms of the number of fingers reported, not frames since +the probability of multiple simultaneous ghosts is expected to drop off +dramatically with increasing numbers. Rather than accumulate weight as a +function of size, I just use it as a binary threshold. A sufficiently large +contact immediately overrides the waiting period and leads to activation. + +Setting the activation size thresholds to large values will result in deciding +primarily on activation slack. If you see longer lived ghosts, turning up the +activation slack while reducing the size thresholds may suffice to eliminate +the ghosts while keeping the screen quite responsive to firm taps. + +Contacts continue to be filtered with min_height and min_width even after +the initial activation filter is satisfied. The intent is to provide +a mechanism for filtering out ghosts in the form of an extra finger while +you actually are using the screen. In practice this sort of ghost has +been far less problematic or relatively rare and I've left the defaults +set to 0 for both parameters, effectively turning off that filter. + +I don't know what the optimal values are for these filters. If the defaults +don't work for you, please play with the parameters. If you do find other +values more comfortable, I would appreciate feedback. + +The calibration of these devices does drift over time. If ghosts or contact +dropping worsen and interfere with the normal usage of your device, try +recalibrating it. + + +Calibration +----------- + +The N-Trig windows tools provide calibration and testing routines. Also an +unofficial unsupported set of user space tools including a calibrator is +available at: +http://code.launchpad.net/~rafi-seas/+junk/ntrig_calib + + +Tracking +-------- + +As of yet, all tested N-Trig firmwares do not track fingers. When multiple +contacts are active they seem to be sorted primarily by Y position. diff --git a/Documentation/input/devices/rotary-encoder.rst b/Documentation/input/devices/rotary-encoder.rst new file mode 100644 index 0000000000000000000000000000000000000000..b07b20a295ac5c220a55f0ba6def574d6c9ea5af --- /dev/null +++ b/Documentation/input/devices/rotary-encoder.rst @@ -0,0 +1,131 @@ +============================================================ +rotary-encoder - a generic driver for GPIO connected devices +============================================================ + +:Author: Daniel Mack , Feb 2009 + +Function +-------- + +Rotary encoders are devices which are connected to the CPU or other +peripherals with two wires. The outputs are phase-shifted by 90 degrees +and by triggering on falling and rising edges, the turn direction can +be determined. + +Some encoders have both outputs low in stable states, others also have +a stable state with both outputs high (half-period mode) and some have +a stable state in all steps (quarter-period mode). + +The phase diagram of these two outputs look like this:: + + _____ _____ _____ + | | | | | | + Channel A ____| |_____| |_____| |____ + + : : : : : : : : : : : : + __ _____ _____ _____ + | | | | | | | + Channel B |_____| |_____| |_____| |__ + + : : : : : : : : : : : : + Event a b c d a b c d a b c d + + |<-------->| + one step + + |<-->| + one step (half-period mode) + + |<>| + one step (quarter-period mode) + +For more information, please see + https://en.wikipedia.org/wiki/Rotary_encoder + + +Events / state machine +---------------------- + +In half-period mode, state a) and c) above are used to determine the +rotational direction based on the last stable state. Events are reported in +states b) and d) given that the new stable state is different from the last +(i.e. the rotation was not reversed half-way). + +Otherwise, the following apply: + +a) Rising edge on channel A, channel B in low state + This state is used to recognize a clockwise turn + +b) Rising edge on channel B, channel A in high state + When entering this state, the encoder is put into 'armed' state, + meaning that there it has seen half the way of a one-step transition. + +c) Falling edge on channel A, channel B in high state + This state is used to recognize a counter-clockwise turn + +d) Falling edge on channel B, channel A in low state + Parking position. If the encoder enters this state, a full transition + should have happened, unless it flipped back on half the way. The + 'armed' state tells us about that. + +Platform requirements +--------------------- + +As there is no hardware dependent call in this driver, the platform it is +used with must support gpiolib. Another requirement is that IRQs must be +able to fire on both edges. + + +Board integration +----------------- + +To use this driver in your system, register a platform_device with the +name 'rotary-encoder' and associate the IRQs and some specific platform +data with it. Because the driver uses generic device properties, this can +be done either via device tree, ACPI, or using static board files, like in +example below: + +:: + + /* board support file example */ + + #include + #include + #include + + #define GPIO_ROTARY_A 1 + #define GPIO_ROTARY_B 2 + + static struct gpiod_lookup_table rotary_encoder_gpios = { + .dev_id = "rotary-encoder.0", + .table = { + GPIO_LOOKUP_IDX("gpio-0", + GPIO_ROTARY_A, NULL, 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-0", + GPIO_ROTARY_B, NULL, 1, GPIO_ACTIVE_HIGH), + { }, + }, + }; + + static const struct property_entry rotary_encoder_properties[] __initconst = { + PROPERTY_ENTRY_INTEGER("rotary-encoder,steps-per-period", u32, 24), + PROPERTY_ENTRY_INTEGER("linux,axis", u32, ABS_X), + PROPERTY_ENTRY_INTEGER("rotary-encoder,relative_axis", u32, 0), + { }, + }; + + static struct platform_device rotary_encoder_device = { + .name = "rotary-encoder", + .id = 0, + }; + + ... + + gpiod_add_lookup_table(&rotary_encoder_gpios); + device_add_properties(&rotary_encoder_device, rotary_encoder_properties); + platform_device_register(&rotary_encoder_device); + + ... + +Please consult device tree binding documentation to see all properties +supported by the driver. diff --git a/Documentation/input/devices/sentelic.rst b/Documentation/input/devices/sentelic.rst new file mode 100644 index 0000000000000000000000000000000000000000..d7ad603dd77e26ad78ee0bff5d2261c97e3bcc42 --- /dev/null +++ b/Documentation/input/devices/sentelic.rst @@ -0,0 +1,901 @@ +.. include:: + +================= +Sentelic Touchpad +================= + + +:Copyright: |copy| 2002-2011 Sentelic Corporation. + +:Last update: Dec-07-2011 + +Finger Sensing Pad Intellimouse Mode (scrolling wheel, 4th and 5th buttons) +============================================================================ + +A) MSID 4: Scrolling wheel mode plus Forward page(4th button) and Backward + page (5th button) + +1. Set sample rate to 200; +2. Set sample rate to 200; +3. Set sample rate to 80; +4. Issuing the "Get device ID" command (0xF2) and waits for the response; +5. FSP will respond 0x04. + +:: + + Packet 1 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|W|W|W|W| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7 => Y overflow + Bit6 => X overflow + Bit5 => Y sign bit + Bit4 => X sign bit + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X Movement(9-bit 2's complement integers) + Byte 3: Y Movement(9-bit 2's complement integers) + Byte 4: Bit3~Bit0 => the scrolling wheel's movement since the last data report. + valid values, -8 ~ +7 + Bit4 => 1 = 4th mouse button is pressed, Forward one page. + 0 = 4th mouse button is not pressed. + Bit5 => 1 = 5th mouse button is pressed, Backward one page. + 0 = 5th mouse button is not pressed. + +B) MSID 6: Horizontal and Vertical scrolling + +- Set bit 1 in register 0x40 to 1 + +FSP replaces scrolling wheel's movement as 4 bits to show horizontal and +vertical scrolling. + +:: + + Packet 1 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|r|l|u|d| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7 => Y overflow + Bit6 => X overflow + Bit5 => Y sign bit + Bit4 => X sign bit + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X Movement(9-bit 2's complement integers) + Byte 3: Y Movement(9-bit 2's complement integers) + Byte 4: Bit0 => the Vertical scrolling movement downward. + Bit1 => the Vertical scrolling movement upward. + Bit2 => the Horizontal scrolling movement leftward. + Bit3 => the Horizontal scrolling movement rightward. + Bit4 => 1 = 4th mouse button is pressed, Forward one page. + 0 = 4th mouse button is not pressed. + Bit5 => 1 = 5th mouse button is pressed, Backward one page. + 0 = 5th mouse button is not pressed. + +C) MSID 7 + +FSP uses 2 packets (8 Bytes) to represent Absolute Position. +so we have PACKET NUMBER to identify packets. + + If PACKET NUMBER is 0, the packet is Packet 1. + If PACKET NUMBER is 1, the packet is Packet 2. + Please count this number in program. + +MSID6 special packet will be enable at the same time when enable MSID 7. + +Absolute position for STL3886-G0 +================================ + +1. Set bit 2 or 3 in register 0x40 to 1 +2. Set bit 6 in register 0x40 to 1 + +:: + + Packet 1 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|1|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|d|u|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => valid bit + Bit4 => 1 + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit4 => scroll up + Bit5 => scroll down + Bit6 => scroll left + Bit7 => scroll right + + Notify Packet for G0 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |1|0|0|1|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |M|M|M|M|M|M|M|M| 4 |0|0|0|0|0|0|0|0| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => 0 + Bit4 => 1 + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: Message Type => 0x5A (Enable/Disable status packet) + Mode Type => 0xA5 (Normal/Icon mode status) + Byte 3: Message Type => 0x00 (Disabled) + => 0x01 (Enabled) + Mode Type => 0x00 (Normal) + => 0x01 (Icon) + Byte 4: Bit7~Bit0 => Don't Care + +Absolute position for STL3888-Ax +================================ + +:: + + Packet 1 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|A|1|L|0|1| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + => 11, Normal data packet with on-pad click + Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. + When both fingers are up, the last two reports have zero valid + bit. + Bit4 => arc + Bit3 => 1 + Bit2 => Left Button, 1 is pressed, 0 is released. + Bit1 => 0 + Bit0 => 1 + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit5~Bit4 => y1_g + Bit7~Bit6 => x1_g + + Packet 2 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|A|1|R|1|0| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordinates packet + => 10, Notify packet + => 11, Normal data packet with on-pad click + Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. + When both fingers are up, the last two reports have zero valid + bit. + Bit4 => arc + Bit3 => 1 + Bit2 => Right Button, 1 is pressed, 0 is released. + Bit1 => 1 + Bit0 => 0 + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit5~Bit4 => y2_g + Bit7~Bit6 => x2_g + + Notify Packet for STL3888-Ax + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|d|u|0|0|0|0| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordinates packet + => 10, Notify packet + => 11, Normal data packet with on-pad click + Bit5 => 1 + Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): + 0: left button is generated by the on-pad command + 1: left button is generated by the external button + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode) + Byte 3: Bit7~Bit6 => Don't care + Bit5~Bit4 => Number of fingers + Bit3~Bit1 => Reserved + Bit0 => 1: enter gesture mode; 0: leaving gesture mode + Byte 4: Bit7 => scroll right button + Bit6 => scroll left button + Bit5 => scroll down button + Bit4 => scroll up button + * Note that if gesture and additional button (Bit4~Bit7) + happen at the same time, the button information will not + be sent. + Bit3~Bit0 => Reserved + +Sample sequence of Multi-finger, Multi-coordinate mode: + + notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1, + abs pkt 2, ..., notify packet (valid bit == 0) + +Absolute position for STL3888-B0 +================================ + +:: + + Packet 1(ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|F|1|0|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|u|d|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordinates packet + => 10, Notify packet + => 11, Normal data packet with on-pad click + Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. + When both fingers are up, the last two reports have zero valid + bit. + Bit4 => finger up/down information. 1: finger down, 0: finger up. + Bit3 => 1 + Bit2 => finger index, 0 is the first finger, 1 is the second finger. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit4 => scroll down button + Bit5 => scroll up button + Bit6 => scroll left button + Bit7 => scroll right button + + Packet 2 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|F|1|1|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|u|d|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + => 11, Normal data packet with on-pad click + Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. + When both fingers are up, the last two reports have zero valid + bit. + Bit4 => finger up/down information. 1: finger down, 0: finger up. + Bit3 => 1 + Bit2 => finger index, 0 is the first finger, 1 is the second finger. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit4 => scroll down button + Bit5 => scroll up button + Bit6 => scroll left button + Bit7 => scroll right button + +Notify Packet for STL3888-B0:: + + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + => 11, Normal data packet with on-pad click + Bit5 => 1 + Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): + 0: left button is generated by the on-pad command + 1: left button is generated by the external button + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode) + Byte 3: Bit7~Bit6 => Don't care + Bit5~Bit4 => Number of fingers + Bit3~Bit1 => Reserved + Bit0 => 1: enter gesture mode; 0: leaving gesture mode + Byte 4: Bit7 => scroll right button + Bit6 => scroll left button + Bit5 => scroll up button + Bit4 => scroll down button + * Note that if gesture and additional button(Bit4~Bit7) + happen at the same time, the button information will not + be sent. + Bit3~Bit0 => Reserved + +Sample sequence of Multi-finger, Multi-coordinate mode: + + notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1, + abs pkt 2, ..., notify packet (valid bit == 0) + +Absolute position for STL3888-Cx and STL3888-Dx +=============================================== + +:: + + Single Finger, Absolute Coordinate Mode (SFAC) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|0|P|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordinates packet + => 10, Notify packet + Bit5 => Coordinate mode(always 0 in SFAC mode): + 0: single-finger absolute coordinates (SFAC) mode + 1: multi-finger, multiple coordinates (MFMC) mode + Bit4 => 0: The LEFT button is generated by on-pad command (OPC) + 1: The LEFT button is generated by external button + Default is 1 even if the LEFT button is not pressed. + Bit3 => Always 1, as specified by PS/2 protocol. + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit4 => 4th mouse button(forward one page) + Bit5 => 5th mouse button(backward one page) + Bit6 => scroll left button + Bit7 => scroll right button + + Multi Finger, Multiple Coordinates Mode (MFMC): + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|1|P|1|F|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => Coordinate mode (always 1 in MFMC mode): + 0: single-finger absolute coordinates (SFAC) mode + 1: multi-finger, multiple coordinates (MFMC) mode + Bit4 => 0: The LEFT button is generated by on-pad command (OPC) + 1: The LEFT button is generated by external button + Default is 1 even if the LEFT button is not pressed. + Bit3 => Always 1, as specified by PS/2 protocol. + Bit2 => Finger index, 0 is the first finger, 1 is the second finger. + If bit 1 and 0 are all 1 and bit 4 is 0, the middle external + button is pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: X coordinate (xpos[9:2]) + Byte 3: Y coordinate (ypos[9:2]) + Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit4 => 4th mouse button(forward one page) + Bit5 => 5th mouse button(backward one page) + Bit6 => scroll left button + Bit7 => scroll right button + +When one of the two fingers is up, the device will output four consecutive +MFMC#0 report packets with zero X and Y to represent 1st finger is up or +four consecutive MFMC#1 report packets with zero X and Y to represent that +the 2nd finger is up. On the other hand, if both fingers are up, the device +will output four consecutive single-finger, absolute coordinate(SFAC) packets +with zero X and Y. + +Notify Packet for STL3888-Cx/Dx:: + + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |1|0|0|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0| + |---------------| |---------------| |---------------| |---------------| + + Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordinates packet + => 10, Notify packet + Bit5 => Always 0 + Bit4 => 0: The LEFT button is generated by on-pad command(OPC) + 1: The LEFT button is generated by external button + Default is 1 even if the LEFT button is not pressed. + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. + Byte 2: Message type: + 0xba => gesture information + 0xc0 => one finger hold-rotating gesture + Byte 3: The first parameter for the received message: + 0xba => gesture ID (refer to the 'Gesture ID' section) + 0xc0 => region ID + Byte 4: The second parameter for the received message: + 0xba => N/A + 0xc0 => finger up/down information + +Sample sequence of Multi-finger, Multi-coordinates mode: + + notify packet (valid bit == 1), MFMC packet 1 (byte 1, bit 2 == 0), + MFMC packet 2 (byte 1, bit 2 == 1), MFMC packet 1, MFMC packet 2, + ..., notify packet (valid bit == 0) + + That is, when the device is in MFMC mode, the host will receive + interleaved absolute coordinate packets for each finger. + +FSP Enable/Disable packet +========================= + +:: + + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 + BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |Y|X|0|0|1|M|R|L| 2 |0|1|0|1|1|0|1|E| 3 | | | | | | | | | 4 | | | | | | | | | + |---------------| |---------------| |---------------| |---------------| + + FSP will send out enable/disable packet when FSP receive PS/2 enable/disable + command. Host will receive the packet which Middle, Right, Left button will + be set. The packet only use byte 0 and byte 1 as a pattern of original packet. + Ignore the other bytes of the packet. + + Byte 1: Bit7 => 0, Y overflow + Bit6 => 0, X overflow + Bit5 => 0, Y sign bit + Bit4 => 0, X sign bit + Bit3 => 1 + Bit2 => 1, Middle Button + Bit1 => 1, Right Button + Bit0 => 1, Left Button + Byte 2: Bit7~1 => (0101101b) + Bit0 => 1 = Enable + 0 = Disable + Byte 3: Don't care + Byte 4: Don't care (MOUSE ID 3, 4) + Byte 5~8: Don't care (Absolute packet) + +PS/2 Command Set +================ + +FSP supports basic PS/2 commanding set and modes, refer to following URL for +details about PS/2 commands: + +http://www.computer-engineering.org/ps2mouse/ + +Programming Sequence for Determining Packet Parsing Flow +======================================================== + +1. Identify FSP by reading device ID(0x00) and version(0x01) register + +2. For FSP version < STL3888 Cx, determine number of buttons by reading + the 'test mode status' (0x20) register:: + + buttons = reg[0x20] & 0x30 + + if buttons == 0x30 or buttons == 0x20: + # two/four buttons + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section A for packet parsing detail(ignore byte 4, bit ~ 7) + elif buttons == 0x10: + # 6 buttons + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section B for packet parsing detail + elif buttons == 0x00: + # 6 buttons + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section A for packet parsing detail + +3. For FSP version >= STL3888 Cx: + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section A for packet parsing detail (ignore byte 4, bit ~ 7) + +Programming Sequence for Register Reading/Writing +================================================= + +Register inversion requirement: + +Following values needed to be inverted(the '~' operator in C) before being +sent to FSP:: + + 0xe8, 0xe9, 0xee, 0xf2, 0xf3 and 0xff. + +Register swapping requirement: + +Following values needed to have their higher 4 bits and lower 4 bits being +swapped before being sent to FSP:: + + 10, 20, 40, 60, 80, 100 and 200. + +Register reading sequence: + + 1. send 0xf3 PS/2 command to FSP; + + 2. send 0x66 PS/2 command to FSP; + + 3. send 0x88 PS/2 command to FSP; + + 4. send 0xf3 PS/2 command to FSP; + + 5. if the register address being to read is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 6 + + a. send 0x68 PS/2 command to FSP; + + b. send the inverted register address to FSP and goto step 8; + + 6. if the register address being to read is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 7 + + a. send 0xcc PS/2 command to FSP; + + b. send the swapped register address to FSP and goto step 8; + + 7. send 0x66 PS/2 command to FSP; + + a. send the original register address to FSP and goto step 8; + + 8. send 0xe9(status request) PS/2 command to FSP; + + 9. the 4th byte of the response read from FSP should be the + requested register value(?? indicates don't care byte):: + + host: 0xe9 + 3888: 0xfa (??) (??) (val) + + * Note that since the Cx release, the hardware will return 1's + complement of the register value at the 3rd byte of status request + result:: + + host: 0xe9 + 3888: 0xfa (??) (~val) (val) + +Register writing sequence: + + 1. send 0xf3 PS/2 command to FSP; + + 2. if the register address being to write is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 3 + + a. send 0x74 PS/2 command to FSP; + + b. send the inverted register address to FSP and goto step 5; + + 3. if the register address being to write is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 4 + + a. send 0x77 PS/2 command to FSP; + + b. send the swapped register address to FSP and goto step 5; + + 4. send 0x55 PS/2 command to FSP; + + a. send the register address to FSP and goto step 5; + + 5. send 0xf3 PS/2 command to FSP; + + 6. if the register value being to write is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 7 + + a. send 0x47 PS/2 command to FSP; + + b. send the inverted register value to FSP and goto step 9; + + 7. if the register value being to write is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 8 + + a. send 0x44 PS/2 command to FSP; + + b. send the swapped register value to FSP and goto step 9; + + 8. send 0x33 PS/2 command to FSP; + + a. send the register value to FSP; + + 9. the register writing sequence is completed. + + * Since the Cx release, the hardware will return 1's + complement of the register value at the 3rd byte of status request + result. Host can optionally send another 0xe9 (status request) PS/2 + command to FSP at the end of register writing to verify that the + register writing operation is successful (?? indicates don't care + byte):: + + host: 0xe9 + 3888: 0xfa (??) (~val) (val) + +Programming Sequence for Page Register Reading/Writing +====================================================== + +In order to overcome the limitation of maximum number of registers +supported, the hardware separates register into different groups called +'pages.' Each page is able to include up to 255 registers. + +The default page after power up is 0x82; therefore, if one has to get +access to register 0x8301, one has to use following sequence to switch +to page 0x83, then start reading/writing from/to offset 0x01 by using +the register read/write sequence described in previous section. + +Page register reading sequence: + + 1. send 0xf3 PS/2 command to FSP; + + 2. send 0x66 PS/2 command to FSP; + + 3. send 0x88 PS/2 command to FSP; + + 4. send 0xf3 PS/2 command to FSP; + + 5. send 0x83 PS/2 command to FSP; + + 6. send 0x88 PS/2 command to FSP; + + 7. send 0xe9(status request) PS/2 command to FSP; + + 8. the response read from FSP should be the requested page value. + + +Page register writing sequence: + + 1. send 0xf3 PS/2 command to FSP; + + 2. send 0x38 PS/2 command to FSP; + + 3. send 0x88 PS/2 command to FSP; + + 4. send 0xf3 PS/2 command to FSP; + + 5. if the page address being written is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 6 + + a. send 0x47 PS/2 command to FSP; + + b. send the inverted page address to FSP and goto step 9; + + 6. if the page address being written is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 7 + + a. send 0x44 PS/2 command to FSP; + + b. send the swapped page address to FSP and goto step 9; + + 7. send 0x33 PS/2 command to FSP; + + 8. send the page address to FSP; + + 9. the page register writing sequence is completed. + +Gesture ID +========== + +Unlike other devices which sends multiple fingers' coordinates to host, +FSP processes multiple fingers' coordinates internally and convert them +into a 8 bits integer, namely 'Gesture ID.' Following is a list of +supported gesture IDs: + + ======= ================================== + ID Description + ======= ================================== + 0x86 2 finger straight up + 0x82 2 finger straight down + 0x80 2 finger straight right + 0x84 2 finger straight left + 0x8f 2 finger zoom in + 0x8b 2 finger zoom out + 0xc0 2 finger curve, counter clockwise + 0xc4 2 finger curve, clockwise + 0x2e 3 finger straight up + 0x2a 3 finger straight down + 0x28 3 finger straight right + 0x2c 3 finger straight left + 0x38 palm + ======= ================================== + +Register Listing +================ + +Registers are represented in 16 bits values. The higher 8 bits represent +the page address and the lower 8 bits represent the relative offset within +that particular page. Refer to the 'Programming Sequence for Page Register +Reading/Writing' section for instructions on how to change current page +address:: + + offset width default r/w name + 0x8200 bit7~bit0 0x01 RO device ID + + 0x8201 bit7~bit0 RW version ID + 0xc1: STL3888 Ax + 0xd0 ~ 0xd2: STL3888 Bx + 0xe0 ~ 0xe1: STL3888 Cx + 0xe2 ~ 0xe3: STL3888 Dx + + 0x8202 bit7~bit0 0x01 RO vendor ID + + 0x8203 bit7~bit0 0x01 RO product ID + + 0x8204 bit3~bit0 0x01 RW revision ID + + 0x820b test mode status 1 + bit3 1 RO 0: rotate 180 degree + 1: no rotation + *only supported by H/W prior to Cx + + 0x820f register file page control + bit2 0 RW 1: rotate 180 degree + 0: no rotation + *supported since Cx + + bit0 0 RW 1 to enable page 1 register files + *only supported by H/W prior to Cx + + 0x8210 RW system control 1 + bit0 1 RW Reserved, must be 1 + bit1 0 RW Reserved, must be 0 + bit4 0 RW Reserved, must be 0 + bit5 1 RW register clock gating enable + 0: read only, 1: read/write enable + (Note that following registers does not require clock gating being + enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e + 40 41 42 43. In addition to that, this bit must be 1 when gesture + mode is enabled) + + 0x8220 test mode status + bit5~bit4 RO number of buttons + 11 => 2, lbtn/rbtn + 10 => 4, lbtn/rbtn/scru/scrd + 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr + 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn + *only supported by H/W prior to Cx + + 0x8231 RW on-pad command detection + bit7 0 RW on-pad command left button down tag + enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + 0x8234 RW on-pad command control 5 + bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5 + (Note that position unit is in 0.5 scanline) + *only supported by H/W prior to Cx + + bit7 0 RW on-pad tap zone enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + 0x8235 RW on-pad command control 6 + bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5 + (Note that position unit is in 0.5 scanline) + *only supported by H/W prior to Cx + + 0x8236 RW on-pad command control 7 + bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5 + (Note that position unit is in 0.5 scanline) + *only supported by H/W prior to Cx + + 0x8237 RW on-pad command control 8 + bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5 + (Note that position unit is in 0.5 scanline) + *only supported by H/W prior to Cx + + 0x8240 RW system control 5 + bit1 0 RW FSP Intellimouse mode enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + bit2 0 RW movement + abs. coordinate mode enable + 0: disable, 1: enable + (Note that this function has the functionality of bit 1 even when + bit 1 is not set. However, the format is different from that of bit 1. + In addition, when bit 1 and bit 2 are set at the same time, bit 2 will + override bit 1.) + *only supported by H/W prior to Cx + + bit3 0 RW abs. coordinate only mode enable + 0: disable, 1: enable + (Note that this function has the functionality of bit 1 even when + bit 1 is not set. However, the format is different from that of bit 1. + In addition, when bit 1, bit 2 and bit 3 are set at the same time, + bit 3 will override bit 1 and 2.) + *only supported by H/W prior to Cx + + bit5 0 RW auto switch enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + bit6 0 RW G0 abs. + notify packet format enable + 0: disable, 1: enable + (Note that the absolute/relative coordinate output still depends on + bit 2 and 3. That is, if any of those bit is 1, host will receive + absolute coordinates; otherwise, host only receives packets with + relative coordinate.) + *only supported by H/W prior to Cx + + bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd + finger packet enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + 0x8243 RW on-pad control + bit0 0 RW on-pad control enable + 0: disable, 1: enable + (Note that if this bit is cleared, bit 3/5 will be ineffective) + *only supported by H/W prior to Cx + + bit3 0 RW on-pad fix vertical scrolling enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + bit5 0 RW on-pad fix horizontal scrolling enable + 0: disable, 1: enable + *only supported by H/W prior to Cx + + 0x8290 RW software control register 1 + bit0 0 RW absolute coordination mode + 0: disable, 1: enable + *supported since Cx + + bit1 0 RW gesture ID output + 0: disable, 1: enable + *supported since Cx + + bit2 0 RW two fingers' coordinates output + 0: disable, 1: enable + *supported since Cx + + bit3 0 RW finger up one packet output + 0: disable, 1: enable + *supported since Cx + + bit4 0 RW absolute coordination continuous mode + 0: disable, 1: enable + *supported since Cx + + bit6~bit5 00 RW gesture group selection + 00: basic + 01: suite + 10: suite pro + 11: advanced + *supported since Cx + + bit7 0 RW Bx packet output compatible mode + 0: disable, 1: enable + *supported since Cx + *supported since Cx + + + 0x833d RW on-pad command control 1 + bit7 1 RW on-pad command detection enable + 0: disable, 1: enable + *supported since Cx + + 0x833e RW on-pad command detection + bit7 0 RW on-pad command left button down tag + enable. Works only in H/W based PS/2 + data packet mode. + 0: disable, 1: enable + *supported since Cx diff --git a/Documentation/input/devices/walkera0701.rst b/Documentation/input/devices/walkera0701.rst new file mode 100644 index 0000000000000000000000000000000000000000..2adda99ca7171f0c3e2633f4ca164ba0cc86b0ca --- /dev/null +++ b/Documentation/input/devices/walkera0701.rst @@ -0,0 +1,128 @@ +=========================== +Walkera WK-0701 transmitter +=========================== + +Walkera WK-0701 transmitter is supplied with a ready to fly Walkera +helicopters such as HM36, HM37, HM60. The walkera0701 module enables to use +this transmitter as joystick + +Devel homepage and download: +http://zub.fei.tuke.sk/walkera-wk0701/ + +or use cogito: +cg-clone http://zub.fei.tuke.sk/GIT/walkera0701-joystick + + +Connecting to PC +================ + +At back side of transmitter S-video connector can be found. Modulation +pulses from processor to HF part can be found at pin 2 of this connector, +pin 3 is GND. Between pin 3 and CPU 5k6 resistor can be found. To get +modulation pulses to PC, signal pulses must be amplified. + +Cable: (walkera TX to parport) + +Walkera WK-0701 TX S-VIDEO connector:: + + (back side of TX) + __ __ S-video: canon25 + / |_| \ pin 2 (signal) NPN parport + / O 4 3 O \ pin 3 (GND) LED ________________ 10 ACK + ( O 2 1 O ) | C + \ ___ / 2 ________________________|\|_____|/ + | [___] | |/| B |\ + ------- 3 __________________________________|________________ 25 GND + E + +I use green LED and BC109 NPN transistor. + +Software +======== + +Build kernel with walkera0701 module. Module walkera0701 need exclusive +access to parport, modules like lp must be unloaded before loading +walkera0701 module, check dmesg for error messages. Connect TX to PC by +cable and run jstest /dev/input/js0 to see values from TX. If no value can +be changed by TX "joystick", check output from /proc/interrupts. Value for +(usually irq7) parport must increase if TX is on. + + + +Technical details +================= + +Driver use interrupt from parport ACK input bit to measure pulse length +using hrtimers. + +Frame format: +Based on walkera WK-0701 PCM Format description by Shaul Eizikovich. +(downloaded from http://www.smartpropoplus.com/Docs/Walkera_Wk-0701_PCM.pdf) + +Signal pulses +------------- + +:: + + (ANALOG) + SYNC BIN OCT + +---------+ +------+ + | | | | + --+ +------+ +--- + +Frame +----- + +:: + + SYNC , BIN1, OCT1, BIN2, OCT2 ... BIN24, OCT24, BIN25, next frame SYNC .. + +pulse length +------------ + +:: + + Binary values: Analog octal values: + + 288 uS Binary 0 318 uS 000 + 438 uS Binary 1 398 uS 001 + 478 uS 010 + 558 uS 011 + 638 uS 100 + 1306 uS SYNC 718 uS 101 + 798 uS 110 + 878 uS 111 + +24 bin+oct values + 1 bin value = 24*4+1 bits = 97 bits + +(Warning, pulses on ACK are inverted by transistor, irq is raised up on sync +to bin change or octal value to bin change). + +Binary data representations +--------------------------- + +One binary and octal value can be grouped to nibble. 24 nibbles + one binary +values can be sampled between sync pulses. + +Values for first four channels (analog joystick values) can be found in +first 10 nibbles. Analog value is represented by one sign bit and 9 bit +absolute binary value. (10 bits per channel). Next nibble is checksum for +first ten nibbles. + +Next nibbles 12 .. 21 represents four channels (not all channels can be +directly controlled from TX). Binary representations are the same as in first +four channels. In nibbles 22 and 23 is a special magic number. Nibble 24 is +checksum for nibbles 12..23. + +After last octal value for nibble 24 and next sync pulse one additional +binary value can be sampled. This bit and magic number is not used in +software driver. Some details about this magic numbers can be found in +Walkera_Wk-0701_PCM.pdf. + +Checksum calculation +-------------------- + +Summary of octal values in nibbles must be same as octal value in checksum +nibble (only first 3 bits are used). Binary value for checksum nibble is +calculated by sum of binary values in checked nibbles + sum of octal values +in checked nibbles divided by 8. Only bit 0 of this sum is used. diff --git a/Documentation/input/devices/xpad.rst b/Documentation/input/devices/xpad.rst new file mode 100644 index 0000000000000000000000000000000000000000..5a709ab77c8dbe42f8d7da8669cb6090a3f8323f --- /dev/null +++ b/Documentation/input/devices/xpad.rst @@ -0,0 +1,233 @@ +======================================================= +xpad - Linux USB driver for Xbox compatible controllers +======================================================= + +This driver exposes all first-party and third-party Xbox compatible +controllers. It has a long history and has enjoyed considerable usage +as Window's xinput library caused most PC games to focus on Xbox +controller compatibility. + +Due to backwards compatibility all buttons are reported as digital. +This only effects Original Xbox controllers. All later controller models +have only digital face buttons. + +Rumble is supported on some models of Xbox 360 controllers but not of +Original Xbox controllers nor on Xbox One controllers. As of writing +the Xbox One's rumble protocol has not been reverse engineered but in +the future could be supported. + + +Notes +===== + +The number of buttons/axes reported varies based on 3 things: + +- if you are using a known controller +- if you are using a known dance pad +- if using an unknown device (one not listed below), what you set in the + module configuration for "Map D-PAD to buttons rather than axes for unknown + pads" (module option dpad_to_buttons) + +If you set dpad_to_buttons to N and you are using an unknown device +the driver will map the directional pad to axes (X/Y). +If you said Y it will map the d-pad to buttons, which is needed for dance +style games to function correctly. The default is Y. + +dpad_to_buttons has no effect for known pads. A erroneous commit message +claimed dpad_to_buttons could be used to force behavior on known devices. +This is not true. Both dpad_to_buttons and triggers_to_buttons only affect +unknown controllers. + + +Normal Controllers +------------------ + +With a normal controller, the directional pad is mapped to its own X/Y axes. +The jstest-program from joystick-1.2.15 (jstest-version 2.1.0) will report 8 +axes and 10 buttons. + +All 8 axes work, though they all have the same range (-32768..32767) +and the zero-setting is not correct for the triggers (I don't know if that +is some limitation of jstest, since the input device setup should be fine. I +didn't have a look at jstest itself yet). + +All of the 10 buttons work (in digital mode). The six buttons on the +right side (A, B, X, Y, black, white) are said to be "analog" and +report their values as 8 bit unsigned, not sure what this is good for. + +I tested the controller with quake3, and configuration and +in game functionality were OK. However, I find it rather difficult to +play first person shooters with a pad. Your mileage may vary. + + +Xbox Dance Pads +--------------- + +When using a known dance pad, jstest will report 6 axes and 14 buttons. + +For dance style pads (like the redoctane pad) several changes +have been made. The old driver would map the d-pad to axes, resulting +in the driver being unable to report when the user was pressing both +left+right or up+down, making DDR style games unplayable. + +Known dance pads automatically map the d-pad to buttons and will work +correctly out of the box. + +If your dance pad is recognized by the driver but is using axes instead +of buttons, see section 0.3 - Unknown Controllers + +I've tested this with Stepmania, and it works quite well. + + +Unknown Controllers +------------------- + +If you have an unknown xbox controller, it should work just fine with +the default settings. + +HOWEVER if you have an unknown dance pad not listed below, it will not +work UNLESS you set "dpad_to_buttons" to 1 in the module configuration. + + +USB adapters +============ + +All generations of Xbox controllers speak USB over the wire. + +- Original Xbox controllers use a proprietary connector and require adapters. +- Wireless Xbox 360 controllers require a 'Xbox 360 Wireless Gaming Receiver + for Windows' +- Wired Xbox 360 controllers use standard USB connectors. +- Xbox One controllers can be wireless but speak Wi-Fi Direct and are not + yet supported. +- Xbox One controllers can be wired and use standard Micro-USB connectors. + + + +Original Xbox USB adapters +-------------------------- + +Using this driver with an Original Xbox controller requires an +adapter cable to break out the proprietary connector's pins to USB. +You can buy these online fairly cheap, or build your own. + +Such a cable is pretty easy to build. The Controller itself is a USB +compound device (a hub with three ports for two expansion slots and +the controller device) with the only difference in a nonstandard connector +(5 pins vs. 4 on standard USB 1.0 connectors). + +You just need to solder a USB connector onto the cable and keep the +yellow wire unconnected. The other pins have the same order on both +connectors so there is no magic to it. Detailed info on these matters +can be found on the net ([1]_, [2]_, [3]_). + +Thanks to the trip splitter found on the cable you don't even need to cut the +original one. You can buy an extension cable and cut that instead. That way, +you can still use the controller with your X-Box, if you have one ;) + + + +Driver Installation +=================== + +Once you have the adapter cable, if needed, and the controller connected +the xpad module should be auto loaded. To confirm you can cat +/sys/kernel/debug/usb/devices. There should be an entry like those: + +.. code-block:: none + :caption: dump from InterAct PowerPad Pro (Germany) + + T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 5 Spd=12 MxCh= 0 + D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=32 #Cfgs= 1 + P: Vendor=05fd ProdID=107a Rev= 1.00 + C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 2 Cls=58(unk. ) Sub=42 Prot=00 Driver=(none) + E: Ad=81(I) Atr=03(Int.) MxPS= 32 Ivl= 10ms + E: Ad=02(O) Atr=03(Int.) MxPS= 32 Ivl= 10ms + +.. code-block:: none + :caption: dump from Redoctane Xbox Dance Pad (US) + + T: Bus=01 Lev=02 Prnt=09 Port=00 Cnt=01 Dev#= 10 Spd=12 MxCh= 0 + D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 + P: Vendor=0c12 ProdID=8809 Rev= 0.01 + S: Product=XBOX DDR + C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA + I: If#= 0 Alt= 0 #EPs= 2 Cls=58(unk. ) Sub=42 Prot=00 Driver=xpad + E: Ad=82(I) Atr=03(Int.) MxPS= 32 Ivl=4ms + E: Ad=02(O) Atr=03(Int.) MxPS= 32 Ivl=4ms + + +Supported Controllers +===================== + +For a full list of supported controllers and associated vendor and product +IDs see the xpad_device[] array\ [4]_. + +As of the historic version 0.0.6 (2006-10-10) the following devices +were supported:: + + original Microsoft XBOX controller (US), vendor=0x045e, product=0x0202 + smaller Microsoft XBOX controller (US), vendor=0x045e, product=0x0289 + original Microsoft XBOX controller (Japan), vendor=0x045e, product=0x0285 + InterAct PowerPad Pro (Germany), vendor=0x05fd, product=0x107a + RedOctane Xbox Dance Pad (US), vendor=0x0c12, product=0x8809 + +Unrecognized models of Xbox controllers should function as Generic +Xbox controllers. Unrecognized Dance Pad controllers require setting +the module option 'dpad_to_buttons'. + +If you have an unrecognized controller please see 0.3 - Unknown Controllers + + +Manual Testing +============== + +To test this driver's functionality you may use 'jstest'. + +For example:: + + > modprobe xpad + > modprobe joydev + > jstest /dev/js0 + +If you're using a normal controller, there should be a single line showing +18 inputs (8 axes, 10 buttons), and its values should change if you move +the sticks and push the buttons. If you're using a dance pad, it should +show 20 inputs (6 axes, 14 buttons). + +It works? Voila, you're done ;) + + + +Thanks +====== + +I have to thank ITO Takayuki for the detailed info on his site + http://euc.jp/periphs/xbox-controller.ja.html. + +His useful info and both the usb-skeleton as well as the iforce input driver +(Greg Kroah-Hartmann; Vojtech Pavlik) helped a lot in rapid prototyping +the basic functionality. + + + +References +========== + +.. [1] http://euc.jp/periphs/xbox-controller.ja.html (ITO Takayuki) +.. [2] http://xpad.xbox-scene.com/ +.. [3] http://www.markosweb.com/www/xboxhackz.com/ +.. [4] http://lxr.free-electrons.com/ident?i=xpad_device + + +Historic Edits +============== + +2002-07-16 - Marko Friedemann + - original doc + +2005-03-19 - Dominic Cerquetti + - added stuff for dance pads, new d-pad->axes mappings + +Later changes may be viewed with 'git log Documentation/input/xpad.txt' diff --git a/Documentation/input/devices/yealink.rst b/Documentation/input/devices/yealink.rst new file mode 100644 index 0000000000000000000000000000000000000000..bb5a1aafeca2a03a1fb353502b039d47bb64a1de --- /dev/null +++ b/Documentation/input/devices/yealink.rst @@ -0,0 +1,225 @@ +=============================================== +Driver documentation for yealink usb-p1k phones +=============================================== + +Status +====== + +The p1k is a relatively cheap usb 1.1 phone with: + + - keyboard full support, yealink.ko / input event API + - LCD full support, yealink.ko / sysfs API + - LED full support, yealink.ko / sysfs API + - dialtone full support, yealink.ko / sysfs API + - ringtone full support, yealink.ko / sysfs API + - audio playback full support, snd_usb_audio.ko / alsa API + - audio record full support, snd_usb_audio.ko / alsa API + +For vendor documentation see http://www.yealink.com + + +keyboard features +================= + +The current mapping in the kernel is provided by the map_p1k_to_key +function:: + + Physical USB-P1K button layout input events + + + up up + IN OUT left, right + down down + + pickup C hangup enter, backspace, escape + 1 2 3 1, 2, 3 + 4 5 6 4, 5, 6, + 7 8 9 7, 8, 9, + * 0 # *, 0, #, + +The "up" and "down" keys, are symbolised by arrows on the button. +The "pickup" and "hangup" keys are symbolised by a green and red phone +on the button. + + +LCD features +============ + +The LCD is divided and organised as a 3 line display:: + + |[] [][] [][] [][] in |[][] + |[] M [][] D [][] : [][] out |[][] + store + + NEW REP SU MO TU WE TH FR SA + + [] [] [] [] [] [] [] [] [] [] [] [] + [] [] [] [] [] [] [] [] [] [] [] [] + + + Line 1 Format (see below) : 18.e8.M8.88...188 + Icon names : M D : IN OUT STORE + Line 2 Format : ......... + Icon name : NEW REP SU MO TU WE TH FR SA + Line 3 Format : 888888888888 + + +Format description: + From a userspace perspective the world is separated into "digits" and "icons". + A digit can have a character set, an icon can only be ON or OFF. + + Format specifier:: + + '8' : Generic 7 segment digit with individual addressable segments + + Reduced capability 7 segment digit, when segments are hard wired together. + '1' : 2 segments digit only able to produce a 1. + 'e' : Most significant day of the month digit, + able to produce at least 1 2 3. + 'M' : Most significant minute digit, + able to produce at least 0 1 2 3 4 5. + + Icons or pictograms: + '.' : For example like AM, PM, SU, a 'dot' .. or other single segment + elements. + + +Driver usage +============ + +For userland the following interfaces are available using the sysfs interface:: + + /sys/.../ + line1 Read/Write, lcd line1 + line2 Read/Write, lcd line2 + line3 Read/Write, lcd line3 + + get_icons Read, returns a set of available icons. + hide_icon Write, hide the element by writing the icon name. + show_icon Write, display the element by writing the icon name. + + map_seg7 Read/Write, the 7 segments char set, common for all + yealink phones. (see map_to_7segment.h) + + ringtone Write, upload binary representation of a ringtone, + see yealink.c. status EXPERIMENTAL due to potential + races between async. and sync usb calls. + + +lineX +~~~~~ + +Reading /sys/../lineX will return the format string with its current value. + + Example:: + + cat ./line3 + 888888888888 + Linux Rocks! + +Writing to /sys/../lineX will set the corresponding LCD line. + + - Excess characters are ignored. + - If less characters are written than allowed, the remaining digits are + unchanged. + - The tab '\t'and '\n' char does not overwrite the original content. + - Writing a space to an icon will always hide its content. + + Example:: + + date +"%m.%e.%k:%M" | sed 's/^0/ /' > ./line1 + + Will update the LCD with the current date & time. + + +get_icons +~~~~~~~~~ + +Reading will return all available icon names and its current settings:: + + cat ./get_icons + on M + on D + on : + IN + OUT + STORE + NEW + REP + SU + MO + TU + WE + TH + FR + SA + LED + DIALTONE + RINGTONE + + +show/hide icons +~~~~~~~~~~~~~~~ + +Writing to these files will update the state of the icon. +Only one icon at a time can be updated. + +If an icon is also on a ./lineX the corresponding value is +updated with the first letter of the icon. + + Example - light up the store icon:: + + echo -n "STORE" > ./show_icon + + cat ./line1 + 18.e8.M8.88...188 + S + + Example - sound the ringtone for 10 seconds:: + + echo -n RINGTONE > /sys/..../show_icon + sleep 10 + echo -n RINGTONE > /sys/..../hide_icon + + +Sound features +============== + +Sound is supported by the ALSA driver: snd_usb_audio + +One 16-bit channel with sample and playback rates of 8000 Hz is the practical +limit of the device. + + Example - recording test:: + + arecord -v -d 10 -r 8000 -f S16_LE -t wav foobar.wav + + Example - playback test:: + + aplay foobar.wav + + +Troubleshooting +=============== + +:Q: Module yealink compiled and installed without any problem but phone + is not initialized and does not react to any actions. +:A: If you see something like: + hiddev0: USB HID v1.00 Device [Yealink Network Technology Ltd. VOIP USB Phone + in dmesg, it means that the hid driver has grabbed the device first. Try to + load module yealink before any other usb hid driver. Please see the + instructions provided by your distribution on module configuration. + +:Q: Phone is working now (displays version and accepts keypad input) but I can't + find the sysfs files. +:A: The sysfs files are located on the particular usb endpoint. On most + distributions you can do: "find /sys/ -name get_icons" for a hint. + + +Credits & Acknowledgments +========================= + + - Olivier Vandorpe, for starting the usbb2k-api project doing much of + the reverse engineering. + - Martin Diehl, for pointing out how to handle USB memory allocation. + - Dmitry Torokhov, for the numerous code reviews and suggestions. diff --git a/Documentation/input/edt-ft5x06.txt b/Documentation/input/edt-ft5x06.txt deleted file mode 100644 index 2032f0b7a8fa125dc3c88842dbeccb25a41a3453..0000000000000000000000000000000000000000 --- a/Documentation/input/edt-ft5x06.txt +++ /dev/null @@ -1,54 +0,0 @@ -EDT ft5x06 based Polytouch devices ----------------------------------- - -The edt-ft5x06 driver is useful for the EDT "Polytouch" family of capacitive -touch screens. Note that it is *not* suitable for other devices based on the -focaltec ft5x06 devices, since they contain vendor-specific firmware. In -particular this driver is not suitable for the Nook tablet. - -It has been tested with the following devices: - * EP0350M06 - * EP0430M06 - * EP0570M06 - * EP0700M06 - -The driver allows configuration of the touch screen via a set of sysfs files: - -/sys/class/input/eventX/device/device/threshold: - allows setting the "click"-threshold in the range from 20 to 80. - -/sys/class/input/eventX/device/device/gain: - allows setting the sensitivity in the range from 0 to 31. Note that - lower values indicate higher sensitivity. - -/sys/class/input/eventX/device/device/offset: - allows setting the edge compensation in the range from 0 to 31. - -/sys/class/input/eventX/device/device/report_rate: - allows setting the report rate in the range from 3 to 14. - - -For debugging purposes the driver provides a few files in the debug -filesystem (if available in the kernel). In /sys/kernel/debug/edt_ft5x06 -you'll find the following files: - -num_x, num_y: - (readonly) contains the number of sensor fields in X- and - Y-direction. - -mode: - allows switching the sensor between "factory mode" and "operation - mode" by writing "1" or "0" to it. In factory mode (1) it is - possible to get the raw data from the sensor. Note that in factory - mode regular events don't get delivered and the options described - above are unavailable. - -raw_data: - contains num_x * num_y big endian 16 bit values describing the raw - values for each sensor field. Note that each read() call on this - files triggers a new readout. It is recommended to provide a buffer - big enough to contain num_x * num_y * 2 bytes. - -Note that reading raw_data gives a I/O error when the device is not in factory -mode. The same happens when reading/writing to the parameter files when the -device is not in regular operation mode. diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt deleted file mode 100644 index 1ec0db7879d310dd3525eee439bc291a3e595c02..0000000000000000000000000000000000000000 --- a/Documentation/input/elantech.txt +++ /dev/null @@ -1,817 +0,0 @@ -Elantech Touchpad Driver -======================== - - Copyright (C) 2007-2008 Arjan Opmeer - - Extra information for hardware version 1 found and - provided by Steve Havelka - - Version 2 (EeePC) hardware support based on patches - received from Woody at Xandros and forwarded to me - by user StewieGriffin at the eeeuser.com forum - - -Contents -~~~~~~~~ - - 1. Introduction - 2. Extra knobs - 3. Differentiating hardware versions - 4. Hardware version 1 - 4.1 Registers - 4.2 Native relative mode 4 byte packet format - 4.3 Native absolute mode 4 byte packet format - 5. Hardware version 2 - 5.1 Registers - 5.2 Native absolute mode 6 byte packet format - 5.2.1 Parity checking and packet re-synchronization - 5.2.2 One/Three finger touch - 5.2.3 Two finger touch - 6. Hardware version 3 - 6.1 Registers - 6.2 Native absolute mode 6 byte packet format - 6.2.1 One/Three finger touch - 6.2.2 Two finger touch - 7. Hardware version 4 - 7.1 Registers - 7.2 Native absolute mode 6 byte packet format - 7.2.1 Status packet - 7.2.2 Head packet - 7.2.3 Motion packet - 8. Trackpoint (for Hardware version 3 and 4) - 8.1 Registers - 8.2 Native relative mode 6 byte packet format - 8.2.1 Status Packet - - - -1. Introduction - ~~~~~~~~~~~~ - -Currently the Linux Elantech touchpad driver is aware of four different -hardware versions unimaginatively called version 1,version 2, version 3 -and version 4. Version 1 is found in "older" laptops and uses 4 bytes per -packet. Version 2 seems to be introduced with the EeePC and uses 6 bytes -per packet, and provides additional features such as position of two fingers, -and width of the touch. Hardware version 3 uses 6 bytes per packet (and -for 2 fingers the concatenation of two 6 bytes packets) and allows tracking -of up to 3 fingers. Hardware version 4 uses 6 bytes per packet, and can -combine a status packet with multiple head or motion packets. Hardware version -4 allows tracking up to 5 fingers. - -Some Hardware version 3 and version 4 also have a trackpoint which uses a -separate packet format. It is also 6 bytes per packet. - -The driver tries to support both hardware versions and should be compatible -with the Xorg Synaptics touchpad driver and its graphical configuration -utilities. - -Note that a mouse button is also associated with either the touchpad or the -trackpoint when a trackpoint is available. Disabling the Touchpad in xorg -(TouchPadOff=0) will also disable the buttons associated with the touchpad. - -Additionally the operation of the touchpad can be altered by adjusting the -contents of some of its internal registers. These registers are represented -by the driver as sysfs entries under /sys/bus/serio/drivers/psmouse/serio? -that can be read from and written to. - -Currently only the registers for hardware version 1 are somewhat understood. -Hardware version 2 seems to use some of the same registers but it is not -known whether the bits in the registers represent the same thing or might -have changed their meaning. - -On top of that, some register settings have effect only when the touchpad is -in relative mode and not in absolute mode. As the Linux Elantech touchpad -driver always puts the hardware into absolute mode not all information -mentioned below can be used immediately. But because there is no freely -available Elantech documentation the information is provided here anyway for -completeness sake. - - -///////////////////////////////////////////////////////////////////////////// - - -2. Extra knobs - ~~~~~~~~~~~ - -Currently the Linux Elantech touchpad driver provides three extra knobs under -/sys/bus/serio/drivers/psmouse/serio? for the user. - -* debug - - Turn different levels of debugging ON or OFF. - - By echoing "0" to this file all debugging will be turned OFF. - - Currently a value of "1" will turn on some basic debugging and a value of - "2" will turn on packet debugging. For hardware version 1 the default is - OFF. For version 2 the default is "1". - - Turning packet debugging on will make the driver dump every packet - received to the syslog before processing it. Be warned that this can - generate quite a lot of data! - -* paritycheck - - Turns parity checking ON or OFF. - - By echoing "0" to this file parity checking will be turned OFF. Any - non-zero value will turn it ON. For hardware version 1 the default is ON. - For version 2 the default it is OFF. - - Hardware version 1 provides basic data integrity verification by - calculating a parity bit for the last 3 bytes of each packet. The driver - can check these bits and reject any packet that appears corrupted. Using - this knob you can bypass that check. - - Hardware version 2 does not provide the same parity bits. Only some basic - data consistency checking can be done. For now checking is disabled by - default. Currently even turning it on will do nothing. - -* crc_enabled - - Sets crc_enabled to 0/1. The name "crc_enabled" is the official name of - this integrity check, even though it is not an actual cyclic redundancy - check. - - Depending on the state of crc_enabled, certain basic data integrity - verification is done by the driver on hardware version 3 and 4. The - driver will reject any packet that appears corrupted. Using this knob, - The state of crc_enabled can be altered with this knob. - - Reading the crc_enabled value will show the active value. Echoing - "0" or "1" to this file will set the state to "0" or "1". - -///////////////////////////////////////////////////////////////////////////// - -3. Differentiating hardware versions - ================================= - -To detect the hardware version, read the version number as param[0].param[1].param[2] - - 4 bytes version: (after the arrow is the name given in the Dell-provided driver) - 02.00.22 => EF013 - 02.06.00 => EF019 -In the wild, there appear to be more versions, such as 00.01.64, 01.00.21, -02.00.00, 02.00.04, 02.00.06. - - 6 bytes: - 02.00.30 => EF113 - 02.08.00 => EF023 - 02.08.XX => EF123 - 02.0B.00 => EF215 - 04.01.XX => Scroll_EF051 - 04.02.XX => EF051 -In the wild, there appear to be more versions, such as 04.03.01, 04.04.11. There -appears to be almost no difference, except for EF113, which does not report -pressure/width and has different data consistency checks. - -Probably all the versions with param[0] <= 01 can be considered as -4 bytes/firmware 1. The versions < 02.08.00, with the exception of 02.00.30, as -4 bytes/firmware 2. Everything >= 02.08.00 can be considered as 6 bytes. - -///////////////////////////////////////////////////////////////////////////// - -4. Hardware version 1 - ================== - -4.1 Registers - ~~~~~~~~~ - -By echoing a hexadecimal value to a register it contents can be altered. - -For example: - - echo -n 0x16 > reg_10 - -* reg_10 - - bit 7 6 5 4 3 2 1 0 - B C T D L A S E - - E: 1 = enable smart edges unconditionally - S: 1 = enable smart edges only when dragging - A: 1 = absolute mode (needs 4 byte packets, see reg_11) - L: 1 = enable drag lock (see reg_22) - D: 1 = disable dynamic resolution - T: 1 = disable tapping - C: 1 = enable corner tap - B: 1 = swap left and right button - -* reg_11 - - bit 7 6 5 4 3 2 1 0 - 1 0 0 H V 1 F P - - P: 1 = enable parity checking for relative mode - F: 1 = enable native 4 byte packet mode - V: 1 = enable vertical scroll area - H: 1 = enable horizontal scroll area - -* reg_20 - - single finger width? - -* reg_21 - - scroll area width (small: 0x40 ... wide: 0xff) - -* reg_22 - - drag lock time out (short: 0x14 ... long: 0xfe; - 0xff = tap again to release) - -* reg_23 - - tap make timeout? - -* reg_24 - - tap release timeout? - -* reg_25 - - smart edge cursor speed (0x02 = slow, 0x03 = medium, 0x04 = fast) - -* reg_26 - - smart edge activation area width? - - -4.2 Native relative mode 4 byte packet format - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -byte 0: - bit 7 6 5 4 3 2 1 0 - c c p2 p1 1 M R L - - L, R, M = 1 when Left, Right, Middle mouse button pressed - some models have M as byte 3 odd parity bit - when parity checking is enabled (reg_11, P = 1): - p1..p2 = byte 1 and 2 odd parity bit - c = 1 when corner tap detected - -byte 1: - bit 7 6 5 4 3 2 1 0 - dx7 dx6 dx5 dx4 dx3 dx2 dx1 dx0 - - dx7..dx0 = x movement; positive = right, negative = left - byte 1 = 0xf0 when corner tap detected - -byte 2: - bit 7 6 5 4 3 2 1 0 - dy7 dy6 dy5 dy4 dy3 dy2 dy1 dy0 - - dy7..dy0 = y movement; positive = up, negative = down - -byte 3: - parity checking enabled (reg_11, P = 1): - - bit 7 6 5 4 3 2 1 0 - w h n1 n0 ds3 ds2 ds1 ds0 - - normally: - ds3..ds0 = scroll wheel amount and direction - positive = down or left - negative = up or right - when corner tap detected: - ds0 = 1 when top right corner tapped - ds1 = 1 when bottom right corner tapped - ds2 = 1 when bottom left corner tapped - ds3 = 1 when top left corner tapped - n1..n0 = number of fingers on touchpad - only models with firmware 2.x report this, models with - firmware 1.x seem to map one, two and three finger taps - directly to L, M and R mouse buttons - h = 1 when horizontal scroll action - w = 1 when wide finger touch? - - otherwise (reg_11, P = 0): - - bit 7 6 5 4 3 2 1 0 - ds7 ds6 ds5 ds4 ds3 ds2 ds1 ds0 - - ds7..ds0 = vertical scroll amount and direction - negative = up - positive = down - - -4.3 Native absolute mode 4 byte packet format - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -EF013 and EF019 have a special behaviour (due to a bug in the firmware?), and -when 1 finger is touching, the first 2 position reports must be discarded. -This counting is reset whenever a different number of fingers is reported. - -byte 0: - firmware version 1.x: - - bit 7 6 5 4 3 2 1 0 - D U p1 p2 1 p3 R L - - L, R = 1 when Left, Right mouse button pressed - p1..p3 = byte 1..3 odd parity bit - D, U = 1 when rocker switch pressed Up, Down - - firmware version 2.x: - - bit 7 6 5 4 3 2 1 0 - n1 n0 p2 p1 1 p3 R L - - L, R = 1 when Left, Right mouse button pressed - p1..p3 = byte 1..3 odd parity bit - n1..n0 = number of fingers on touchpad - -byte 1: - firmware version 1.x: - - bit 7 6 5 4 3 2 1 0 - f 0 th tw x9 x8 y9 y8 - - tw = 1 when two finger touch - th = 1 when three finger touch - f = 1 when finger touch - - firmware version 2.x: - - bit 7 6 5 4 3 2 1 0 - . . . . x9 x8 y9 y8 - -byte 2: - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 - - x9..x0 = absolute x value (horizontal) - -byte 3: - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - y9..y0 = absolute y value (vertical) - - -///////////////////////////////////////////////////////////////////////////// - - -5. Hardware version 2 - ================== - - -5.1 Registers - ~~~~~~~~~ - -By echoing a hexadecimal value to a register it contents can be altered. - -For example: - - echo -n 0x56 > reg_10 - -* reg_10 - - bit 7 6 5 4 3 2 1 0 - 0 1 0 1 0 1 D 0 - - D: 1 = enable drag and drop - -* reg_11 - - bit 7 6 5 4 3 2 1 0 - 1 0 0 0 S 0 1 0 - - S: 1 = enable vertical scroll - -* reg_21 - - unknown (0x00) - -* reg_22 - - drag and drop release time out (short: 0x70 ... long 0x7e; - 0x7f = never i.e. tap again to release) - - -5.2 Native absolute mode 6 byte packet format - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -5.2.1 Parity checking and packet re-synchronization -There is no parity checking, however some consistency checks can be performed. - -For instance for EF113: - SA1= packet[0]; - A1 = packet[1]; - B1 = packet[2]; - SB1= packet[3]; - C1 = packet[4]; - D1 = packet[5]; - if( (((SA1 & 0x3C) != 0x3C) && ((SA1 & 0xC0) != 0x80)) || // check Byte 1 - (((SA1 & 0x0C) != 0x0C) && ((SA1 & 0xC0) == 0x80)) || // check Byte 1 (one finger pressed) - (((SA1 & 0xC0) != 0x80) && (( A1 & 0xF0) != 0x00)) || // check Byte 2 - (((SB1 & 0x3E) != 0x38) && ((SA1 & 0xC0) != 0x80)) || // check Byte 4 - (((SB1 & 0x0E) != 0x08) && ((SA1 & 0xC0) == 0x80)) || // check Byte 4 (one finger pressed) - (((SA1 & 0xC0) != 0x80) && (( C1 & 0xF0) != 0x00)) ) // check Byte 5 - // error detected - -For all the other ones, there are just a few constant bits: - if( ((packet[0] & 0x0C) != 0x04) || - ((packet[3] & 0x0f) != 0x02) ) - // error detected - - -In case an error is detected, all the packets are shifted by one (and packet[0] is discarded). - -5.2.2 One/Three finger touch - ~~~~~~~~~~~~~~~~ - -byte 0: - - bit 7 6 5 4 3 2 1 0 - n1 n0 w3 w2 . . R L - - L, R = 1 when Left, Right mouse button pressed - n1..n0 = number of fingers on touchpad - -byte 1: - - bit 7 6 5 4 3 2 1 0 - p7 p6 p5 p4 x11 x10 x9 x8 - -byte 2: - - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 - - x11..x0 = absolute x value (horizontal) - -byte 3: - - bit 7 6 5 4 3 2 1 0 - n4 vf w1 w0 . . . b2 - - n4 = set if more than 3 fingers (only in 3 fingers mode) - vf = a kind of flag ? (only on EF123, 0 when finger is over one - of the buttons, 1 otherwise) - w3..w0 = width of the finger touch (not EF113) - b2 (on EF113 only, 0 otherwise), b2.R.L indicates one button pressed: - 0 = none - 1 = Left - 2 = Right - 3 = Middle (Left and Right) - 4 = Forward - 5 = Back - 6 = Another one - 7 = Another one - -byte 4: - - bit 7 6 5 4 3 2 1 0 - p3 p1 p2 p0 y11 y10 y9 y8 - - p7..p0 = pressure (not EF113) - -byte 5: - - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - y11..y0 = absolute y value (vertical) - - -5.2.3 Two finger touch - ~~~~~~~~~~~~~~~~ - -Note that the two pairs of coordinates are not exactly the coordinates of the -two fingers, but only the pair of the lower-left and upper-right coordinates. -So the actual fingers might be situated on the other diagonal of the square -defined by these two points. - -byte 0: - - bit 7 6 5 4 3 2 1 0 - n1 n0 ay8 ax8 . . R L - - L, R = 1 when Left, Right mouse button pressed - n1..n0 = number of fingers on touchpad - -byte 1: - - bit 7 6 5 4 3 2 1 0 - ax7 ax6 ax5 ax4 ax3 ax2 ax1 ax0 - - ax8..ax0 = lower-left finger absolute x value - -byte 2: - - bit 7 6 5 4 3 2 1 0 - ay7 ay6 ay5 ay4 ay3 ay2 ay1 ay0 - - ay8..ay0 = lower-left finger absolute y value - -byte 3: - - bit 7 6 5 4 3 2 1 0 - . . by8 bx8 . . . . - -byte 4: - - bit 7 6 5 4 3 2 1 0 - bx7 bx6 bx5 bx4 bx3 bx2 bx1 bx0 - - bx8..bx0 = upper-right finger absolute x value - -byte 5: - - bit 7 6 5 4 3 2 1 0 - by7 by8 by5 by4 by3 by2 by1 by0 - - by8..by0 = upper-right finger absolute y value - -///////////////////////////////////////////////////////////////////////////// - -6. Hardware version 3 - ================== - -6.1 Registers - ~~~~~~~~~ -* reg_10 - - bit 7 6 5 4 3 2 1 0 - 0 0 0 0 R F T A - - A: 1 = enable absolute tracking - T: 1 = enable two finger mode auto correct - F: 1 = disable ABS Position Filter - R: 1 = enable real hardware resolution - -6.2 Native absolute mode 6 byte packet format - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -1 and 3 finger touch shares the same 6-byte packet format, except that -3 finger touch only reports the position of the center of all three fingers. - -Firmware would send 12 bytes of data for 2 finger touch. - -Note on debounce: -In case the box has unstable power supply or other electricity issues, or -when number of finger changes, F/W would send "debounce packet" to inform -driver that the hardware is in debounce status. -The debouce packet has the following signature: - byte 0: 0xc4 - byte 1: 0xff - byte 2: 0xff - byte 3: 0x02 - byte 4: 0xff - byte 5: 0xff -When we encounter this kind of packet, we just ignore it. - -6.2.1 One/Three finger touch - ~~~~~~~~~~~~~~~~~~~~~~ - -byte 0: - - bit 7 6 5 4 3 2 1 0 - n1 n0 w3 w2 0 1 R L - - L, R = 1 when Left, Right mouse button pressed - n1..n0 = number of fingers on touchpad - -byte 1: - - bit 7 6 5 4 3 2 1 0 - p7 p6 p5 p4 x11 x10 x9 x8 - -byte 2: - - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 - - x11..x0 = absolute x value (horizontal) - -byte 3: - - bit 7 6 5 4 3 2 1 0 - 0 0 w1 w0 0 0 1 0 - - w3..w0 = width of the finger touch - -byte 4: - - bit 7 6 5 4 3 2 1 0 - p3 p1 p2 p0 y11 y10 y9 y8 - - p7..p0 = pressure - -byte 5: - - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - y11..y0 = absolute y value (vertical) - -6.2.2 Two finger touch - ~~~~~~~~~~~~~~~~ - -The packet format is exactly the same for two finger touch, except the hardware -sends two 6 byte packets. The first packet contains data for the first finger, -the second packet has data for the second finger. So for two finger touch a -total of 12 bytes are sent. - -///////////////////////////////////////////////////////////////////////////// - -7. Hardware version 4 - ================== - -7.1 Registers - ~~~~~~~~~ -* reg_07 - - bit 7 6 5 4 3 2 1 0 - 0 0 0 0 0 0 0 A - - A: 1 = enable absolute tracking - -7.2 Native absolute mode 6 byte packet format - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -v4 hardware is a true multitouch touchpad, capable of tracking up to 5 fingers. -Unfortunately, due to PS/2's limited bandwidth, its packet format is rather -complex. - -Whenever the numbers or identities of the fingers changes, the hardware sends a -status packet to indicate how many and which fingers is on touchpad, followed by -head packets or motion packets. A head packet contains data of finger id, finger -position (absolute x, y values), width, and pressure. A motion packet contains -two fingers' position delta. - -For example, when status packet tells there are 2 fingers on touchpad, then we -can expect two following head packets. If the finger status doesn't change, -the following packets would be motion packets, only sending delta of finger -position, until we receive a status packet. - -One exception is one finger touch. when a status packet tells us there is only -one finger, the hardware would just send head packets afterwards. - -7.2.1 Status packet - ~~~~~~~~~~~~~ - -byte 0: - - bit 7 6 5 4 3 2 1 0 - . . . . 0 1 R L - - L, R = 1 when Left, Right mouse button pressed - -byte 1: - - bit 7 6 5 4 3 2 1 0 - . . . ft4 ft3 ft2 ft1 ft0 - - ft4 ft3 ft2 ft1 ft0 ftn = 1 when finger n is on touchpad - -byte 2: not used - -byte 3: - - bit 7 6 5 4 3 2 1 0 - . . . 1 0 0 0 0 - - constant bits - -byte 4: - - bit 7 6 5 4 3 2 1 0 - p . . . . . . . - - p = 1 for palm - -byte 5: not used - -7.2.2 Head packet - ~~~~~~~~~~~ - -byte 0: - - bit 7 6 5 4 3 2 1 0 - w3 w2 w1 w0 0 1 R L - - L, R = 1 when Left, Right mouse button pressed - w3..w0 = finger width (spans how many trace lines) - -byte 1: - - bit 7 6 5 4 3 2 1 0 - p7 p6 p5 p4 x11 x10 x9 x8 - -byte 2: - - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 - - x11..x0 = absolute x value (horizontal) - -byte 3: - - bit 7 6 5 4 3 2 1 0 - id2 id1 id0 1 0 0 0 1 - - id2..id0 = finger id - -byte 4: - - bit 7 6 5 4 3 2 1 0 - p3 p1 p2 p0 y11 y10 y9 y8 - - p7..p0 = pressure - -byte 5: - - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - y11..y0 = absolute y value (vertical) - -7.2.3 Motion packet - ~~~~~~~~~~~~~ - -byte 0: - - bit 7 6 5 4 3 2 1 0 - id2 id1 id0 w 0 1 R L - - L, R = 1 when Left, Right mouse button pressed - id2..id0 = finger id - w = 1 when delta overflows (> 127 or < -128), in this case - firmware sends us (delta x / 5) and (delta y / 5) - -byte 1: - - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 - - x7..x0 = delta x (two's complement) - -byte 2: - - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - y7..y0 = delta y (two's complement) - -byte 3: - - bit 7 6 5 4 3 2 1 0 - id2 id1 id0 1 0 0 1 0 - - id2..id0 = finger id - -byte 4: - - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 - - x7..x0 = delta x (two's complement) - -byte 5: - - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - y7..y0 = delta y (two's complement) - - byte 0 ~ 2 for one finger - byte 3 ~ 5 for another - - -8. Trackpoint (for Hardware version 3 and 4) - ========================================= -8.1 Registers - ~~~~~~~~~ -No special registers have been identified. - -8.2 Native relative mode 6 byte packet format - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -8.2.1 Status Packet - ~~~~~~~~~~~~~ - -byte 0: - bit 7 6 5 4 3 2 1 0 - 0 0 sx sy 0 M R L -byte 1: - bit 7 6 5 4 3 2 1 0 - ~sx 0 0 0 0 0 0 0 -byte 2: - bit 7 6 5 4 3 2 1 0 - ~sy 0 0 0 0 0 0 0 -byte 3: - bit 7 6 5 4 3 2 1 0 - 0 0 ~sy ~sx 0 1 1 0 -byte 4: - bit 7 6 5 4 3 2 1 0 - x7 x6 x5 x4 x3 x2 x1 x0 -byte 5: - bit 7 6 5 4 3 2 1 0 - y7 y6 y5 y4 y3 y2 y1 y0 - - - x and y are written in two's complement spread - over 9 bits with sx/sy the relative top bit and - x7..x0 and y7..y0 the lower bits. - ~sx is the inverse of sx, ~sy is the inverse of sy. - The sign of y is opposite to what the input driver - expects for a relative movement diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst new file mode 100644 index 0000000000000000000000000000000000000000..a8c0873beb952e620db9bb2f2df823624ac90650 --- /dev/null +++ b/Documentation/input/event-codes.rst @@ -0,0 +1,409 @@ +.. _input-event-codes: + +================= +Input event codes +================= + + +The input protocol uses a map of types and codes to express input device values +to userspace. This document describes the types and codes and how and when they +may be used. + +A single hardware event generates multiple input events. Each input event +contains the new value of a single data item. A special event type, EV_SYN, is +used to separate input events into packets of input data changes occurring at +the same moment in time. In the following, the term "event" refers to a single +input event encompassing a type, code, and value. + +The input protocol is a stateful protocol. Events are emitted only when values +of event codes have changed. However, the state is maintained within the Linux +input subsystem; drivers do not need to maintain the state and may attempt to +emit unchanged values without harm. Userspace may obtain the current state of +event code values using the EVIOCG* ioctls defined in linux/input.h. The event +reports supported by a device are also provided by sysfs in +class/input/event*/device/capabilities/, and the properties of a device are +provided in class/input/event*/device/properties. + +Event types +=========== + +Event types are groupings of codes under a logical input construct. Each +type has a set of applicable codes to be used in generating events. See the +Codes section for details on valid codes for each type. + +* EV_SYN: + + - Used as markers to separate events. Events may be separated in time or in + space, such as with the multitouch protocol. + +* EV_KEY: + + - Used to describe state changes of keyboards, buttons, or other key-like + devices. + +* EV_REL: + + - Used to describe relative axis value changes, e.g. moving the mouse 5 units + to the left. + +* EV_ABS: + + - Used to describe absolute axis value changes, e.g. describing the + coordinates of a touch on a touchscreen. + +* EV_MSC: + + - Used to describe miscellaneous input data that do not fit into other types. + +* EV_SW: + + - Used to describe binary state input switches. + +* EV_LED: + + - Used to turn LEDs on devices on and off. + +* EV_SND: + + - Used to output sound to devices. + +* EV_REP: + + - Used for autorepeating devices. + +* EV_FF: + + - Used to send force feedback commands to an input device. + +* EV_PWR: + + - A special type for power button and switch input. + +* EV_FF_STATUS: + + - Used to receive force feedback device status. + +Event codes +=========== + +Event codes define the precise type of event. + +EV_SYN +------ + +EV_SYN event values are undefined. Their usage is defined only by when they are +sent in the evdev event stream. + +* SYN_REPORT: + + - Used to synchronize and separate events into packets of input data changes + occurring at the same moment in time. For example, motion of a mouse may set + the REL_X and REL_Y values for one motion, then emit a SYN_REPORT. The next + motion will emit more REL_X and REL_Y values and send another SYN_REPORT. + +* SYN_CONFIG: + + - TBD + +* SYN_MT_REPORT: + + - Used to synchronize and separate touch events. See the + multi-touch-protocol.txt document for more information. + +* SYN_DROPPED: + + - Used to indicate buffer overrun in the evdev client's event queue. + Client should ignore all events up to and including next SYN_REPORT + event and query the device (using EVIOCG* ioctls) to obtain its + current state. + +EV_KEY +------ + +EV_KEY events take the form KEY_ or BTN_. For example, KEY_A is used +to represent the 'A' key on a keyboard. When a key is depressed, an event with +the key's code is emitted with value 1. When the key is released, an event is +emitted with value 0. Some hardware send events when a key is repeated. These +events have a value of 2. In general, KEY_ is used for keyboard keys, and +BTN_ is used for other types of momentary switch events. + +A few EV_KEY codes have special meanings: + +* BTN_TOOL_: + + - These codes are used in conjunction with input trackpads, tablets, and + touchscreens. These devices may be used with fingers, pens, or other tools. + When an event occurs and a tool is used, the corresponding BTN_TOOL_ + code should be set to a value of 1. When the tool is no longer interacting + with the input device, the BTN_TOOL_ code should be reset to 0. All + trackpads, tablets, and touchscreens should use at least one BTN_TOOL_ + code when events are generated. + +* BTN_TOUCH: + + BTN_TOUCH is used for touch contact. While an input tool is determined to be + within meaningful physical contact, the value of this property must be set + to 1. Meaningful physical contact may mean any contact, or it may mean + contact conditioned by an implementation defined property. For example, a + touchpad may set the value to 1 only when the touch pressure rises above a + certain value. BTN_TOUCH may be combined with BTN_TOOL_ codes. For + example, a pen tablet may set BTN_TOOL_PEN to 1 and BTN_TOUCH to 0 while the + pen is hovering over but not touching the tablet surface. + +Note: For appropriate function of the legacy mousedev emulation driver, +BTN_TOUCH must be the first evdev code emitted in a synchronization frame. + +Note: Historically a touch device with BTN_TOOL_FINGER and BTN_TOUCH was +interpreted as a touchpad by userspace, while a similar device without +BTN_TOOL_FINGER was interpreted as a touchscreen. For backwards compatibility +with current userspace it is recommended to follow this distinction. In the +future, this distinction will be deprecated and the device properties ioctl +EVIOCGPROP, defined in linux/input.h, will be used to convey the device type. + +* BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP, BTN_TOOL_TRIPLETAP, BTN_TOOL_QUADTAP: + + - These codes denote one, two, three, and four finger interaction on a + trackpad or touchscreen. For example, if the user uses two fingers and moves + them on the touchpad in an effort to scroll content on screen, + BTN_TOOL_DOUBLETAP should be set to value 1 for the duration of the motion. + Note that all BTN_TOOL_ codes and the BTN_TOUCH code are orthogonal in + purpose. A trackpad event generated by finger touches should generate events + for one code from each group. At most only one of these BTN_TOOL_ + codes should have a value of 1 during any synchronization frame. + +Note: Historically some drivers emitted multiple of the finger count codes with +a value of 1 in the same synchronization frame. This usage is deprecated. + +Note: In multitouch drivers, the input_mt_report_finger_count() function should +be used to emit these codes. Please see multi-touch-protocol.txt for details. + +EV_REL +------ + +EV_REL events describe relative changes in a property. For example, a mouse may +move to the left by a certain number of units, but its absolute position in +space is unknown. If the absolute position is known, EV_ABS codes should be used +instead of EV_REL codes. + +A few EV_REL codes have special meanings: + +* REL_WHEEL, REL_HWHEEL: + + - These codes are used for vertical and horizontal scroll wheels, + respectively. + +EV_ABS +------ + +EV_ABS events describe absolute changes in a property. For example, a touchpad +may emit coordinates for a touch location. + +A few EV_ABS codes have special meanings: + +* ABS_DISTANCE: + + - Used to describe the distance of a tool from an interaction surface. This + event should only be emitted while the tool is hovering, meaning in close + proximity of the device and while the value of the BTN_TOUCH code is 0. If + the input device may be used freely in three dimensions, consider ABS_Z + instead. + - BTN_TOOL_ should be set to 1 when the tool comes into detectable + proximity and set to 0 when the tool leaves detectable proximity. + BTN_TOOL_ signals the type of tool that is currently detected by the + hardware and is otherwise independent of ABS_DISTANCE and/or BTN_TOUCH. + +* ABS_MT_: + + - Used to describe multitouch input events. Please see + multi-touch-protocol.txt for details. + +EV_SW +----- + +EV_SW events describe stateful binary switches. For example, the SW_LID code is +used to denote when a laptop lid is closed. + +Upon binding to a device or resuming from suspend, a driver must report +the current switch state. This ensures that the device, kernel, and userspace +state is in sync. + +Upon resume, if the switch state is the same as before suspend, then the input +subsystem will filter out the duplicate switch state reports. The driver does +not need to keep the state of the switch at any time. + +EV_MSC +------ + +EV_MSC events are used for input and output events that do not fall under other +categories. + +A few EV_MSC codes have special meaning: + +* MSC_TIMESTAMP: + + - Used to report the number of microseconds since the last reset. This event + should be coded as an uint32 value, which is allowed to wrap around with + no special consequence. It is assumed that the time difference between two + consecutive events is reliable on a reasonable time scale (hours). + A reset to zero can happen, in which case the time since the last event is + unknown. If the device does not provide this information, the driver must + not provide it to user space. + +EV_LED +------ + +EV_LED events are used for input and output to set and query the state of +various LEDs on devices. + +EV_REP +------ + +EV_REP events are used for specifying autorepeating events. + +EV_SND +------ + +EV_SND events are used for sending sound commands to simple sound output +devices. + +EV_FF +----- + +EV_FF events are used to initialize a force feedback capable device and to cause +such device to feedback. + +EV_PWR +------ + +EV_PWR events are a special type of event used specifically for power +management. Its usage is not well defined. To be addressed later. + +Device properties +================= + +Normally, userspace sets up an input device based on the data it emits, +i.e., the event types. In the case of two devices emitting the same event +types, additional information can be provided in the form of device +properties. + +INPUT_PROP_DIRECT + INPUT_PROP_POINTER +-------------------------------------- + +The INPUT_PROP_DIRECT property indicates that device coordinates should be +directly mapped to screen coordinates (not taking into account trivial +transformations, such as scaling, flipping and rotating). Non-direct input +devices require non-trivial transformation, such as absolute to relative +transformation for touchpads. Typical direct input devices: touchscreens, +drawing tablets; non-direct devices: touchpads, mice. + +The INPUT_PROP_POINTER property indicates that the device is not transposed +on the screen and thus requires use of an on-screen pointer to trace user's +movements. Typical pointer devices: touchpads, tablets, mice; non-pointer +device: touchscreen. + +If neither INPUT_PROP_DIRECT or INPUT_PROP_POINTER are set, the property is +considered undefined and the device type should be deduced in the +traditional way, using emitted event types. + +INPUT_PROP_BUTTONPAD +-------------------- + +For touchpads where the button is placed beneath the surface, such that +pressing down on the pad causes a button click, this property should be +set. Common in clickpad notebooks and macbooks from 2009 and onwards. + +Originally, the buttonpad property was coded into the bcm5974 driver +version field under the name integrated button. For backwards +compatibility, both methods need to be checked in userspace. + +INPUT_PROP_SEMI_MT +------------------ + +Some touchpads, most common between 2008 and 2011, can detect the presence +of multiple contacts without resolving the individual positions; only the +number of contacts and a rectangular shape is known. For such +touchpads, the semi-mt property should be set. + +Depending on the device, the rectangle may enclose all touches, like a +bounding box, or just some of them, for instance the two most recent +touches. The diversity makes the rectangle of limited use, but some +gestures can normally be extracted from it. + +If INPUT_PROP_SEMI_MT is not set, the device is assumed to be a true MT +device. + +INPUT_PROP_TOPBUTTONPAD +----------------------- + +Some laptops, most notably the Lenovo 40 series provide a trackstick +device but do not have physical buttons associated with the trackstick +device. Instead, the top area of the touchpad is marked to show +visual/haptic areas for left, middle, right buttons intended to be used +with the trackstick. + +If INPUT_PROP_TOPBUTTONPAD is set, userspace should emulate buttons +accordingly. This property does not affect kernel behavior. +The kernel does not provide button emulation for such devices but treats +them as any other INPUT_PROP_BUTTONPAD device. + +INPUT_PROP_ACCELEROMETER +------------------------ + +Directional axes on this device (absolute and/or relative x, y, z) represent +accelerometer data. Some devices also report gyroscope data, which devices +can report through the rotational axes (absolute and/or relative rx, ry, rz). + +All other axes retain their meaning. A device must not mix +regular directional axes and accelerometer axes on the same event node. + +Guidelines +========== + +The guidelines below ensure proper single-touch and multi-finger functionality. +For multi-touch functionality, see the multi-touch-protocol.txt document for +more information. + +Mice +---- + +REL_{X,Y} must be reported when the mouse moves. BTN_LEFT must be used to report +the primary button press. BTN_{MIDDLE,RIGHT,4,5,etc.} should be used to report +further buttons of the device. REL_WHEEL and REL_HWHEEL should be used to report +scroll wheel events where available. + +Touchscreens +------------ + +ABS_{X,Y} must be reported with the location of the touch. BTN_TOUCH must be +used to report when a touch is active on the screen. +BTN_{MOUSE,LEFT,MIDDLE,RIGHT} must not be reported as the result of touch +contact. BTN_TOOL_ events should be reported where possible. + +For new hardware, INPUT_PROP_DIRECT should be set. + +Trackpads +--------- + +Legacy trackpads that only provide relative position information must report +events like mice described above. + +Trackpads that provide absolute touch position must report ABS_{X,Y} for the +location of the touch. BTN_TOUCH should be used to report when a touch is active +on the trackpad. Where multi-finger support is available, BTN_TOOL_ should +be used to report the number of touches active on the trackpad. + +For new hardware, INPUT_PROP_POINTER should be set. + +Tablets +------- + +BTN_TOOL_ events must be reported when a stylus or other tool is active on +the tablet. ABS_{X,Y} must be reported with the location of the tool. BTN_TOUCH +should be used to report when the tool is in contact with the tablet. +BTN_{STYLUS,STYLUS2} should be used to report buttons on the tool itself. Any +button may be used for buttons on the tablet except BTN_{MOUSE,LEFT}. +BTN_{0,1,2,etc} are good generic codes for unlabeled buttons. Do not use +meaningful buttons, like BTN_FORWARD, unless the button is labeled for that +purpose on the device. + +For new hardware, both INPUT_PROP_DIRECT and INPUT_PROP_POINTER should be set. diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt deleted file mode 100644 index 36ea940e5bb91fe9711544e5d0bbe2e2293c8ca8..0000000000000000000000000000000000000000 --- a/Documentation/input/event-codes.txt +++ /dev/null @@ -1,352 +0,0 @@ -The input protocol uses a map of types and codes to express input device values -to userspace. This document describes the types and codes and how and when they -may be used. - -A single hardware event generates multiple input events. Each input event -contains the new value of a single data item. A special event type, EV_SYN, is -used to separate input events into packets of input data changes occurring at -the same moment in time. In the following, the term "event" refers to a single -input event encompassing a type, code, and value. - -The input protocol is a stateful protocol. Events are emitted only when values -of event codes have changed. However, the state is maintained within the Linux -input subsystem; drivers do not need to maintain the state and may attempt to -emit unchanged values without harm. Userspace may obtain the current state of -event code values using the EVIOCG* ioctls defined in linux/input.h. The event -reports supported by a device are also provided by sysfs in -class/input/event*/device/capabilities/, and the properties of a device are -provided in class/input/event*/device/properties. - -Event types: -=========== -Event types are groupings of codes under a logical input construct. Each -type has a set of applicable codes to be used in generating events. See the -Codes section for details on valid codes for each type. - -* EV_SYN: - - Used as markers to separate events. Events may be separated in time or in - space, such as with the multitouch protocol. - -* EV_KEY: - - Used to describe state changes of keyboards, buttons, or other key-like - devices. - -* EV_REL: - - Used to describe relative axis value changes, e.g. moving the mouse 5 units - to the left. - -* EV_ABS: - - Used to describe absolute axis value changes, e.g. describing the - coordinates of a touch on a touchscreen. - -* EV_MSC: - - Used to describe miscellaneous input data that do not fit into other types. - -* EV_SW: - - Used to describe binary state input switches. - -* EV_LED: - - Used to turn LEDs on devices on and off. - -* EV_SND: - - Used to output sound to devices. - -* EV_REP: - - Used for autorepeating devices. - -* EV_FF: - - Used to send force feedback commands to an input device. - -* EV_PWR: - - A special type for power button and switch input. - -* EV_FF_STATUS: - - Used to receive force feedback device status. - -Event codes: -=========== -Event codes define the precise type of event. - -EV_SYN: ----------- -EV_SYN event values are undefined. Their usage is defined only by when they are -sent in the evdev event stream. - -* SYN_REPORT: - - Used to synchronize and separate events into packets of input data changes - occurring at the same moment in time. For example, motion of a mouse may set - the REL_X and REL_Y values for one motion, then emit a SYN_REPORT. The next - motion will emit more REL_X and REL_Y values and send another SYN_REPORT. - -* SYN_CONFIG: - - TBD - -* SYN_MT_REPORT: - - Used to synchronize and separate touch events. See the - multi-touch-protocol.txt document for more information. - -* SYN_DROPPED: - - Used to indicate buffer overrun in the evdev client's event queue. - Client should ignore all events up to and including next SYN_REPORT - event and query the device (using EVIOCG* ioctls) to obtain its - current state. - -EV_KEY: ----------- -EV_KEY events take the form KEY_ or BTN_. For example, KEY_A is used -to represent the 'A' key on a keyboard. When a key is depressed, an event with -the key's code is emitted with value 1. When the key is released, an event is -emitted with value 0. Some hardware send events when a key is repeated. These -events have a value of 2. In general, KEY_ is used for keyboard keys, and -BTN_ is used for other types of momentary switch events. - -A few EV_KEY codes have special meanings: - -* BTN_TOOL_: - - These codes are used in conjunction with input trackpads, tablets, and - touchscreens. These devices may be used with fingers, pens, or other tools. - When an event occurs and a tool is used, the corresponding BTN_TOOL_ - code should be set to a value of 1. When the tool is no longer interacting - with the input device, the BTN_TOOL_ code should be reset to 0. All - trackpads, tablets, and touchscreens should use at least one BTN_TOOL_ - code when events are generated. - -* BTN_TOUCH: - BTN_TOUCH is used for touch contact. While an input tool is determined to be - within meaningful physical contact, the value of this property must be set - to 1. Meaningful physical contact may mean any contact, or it may mean - contact conditioned by an implementation defined property. For example, a - touchpad may set the value to 1 only when the touch pressure rises above a - certain value. BTN_TOUCH may be combined with BTN_TOOL_ codes. For - example, a pen tablet may set BTN_TOOL_PEN to 1 and BTN_TOUCH to 0 while the - pen is hovering over but not touching the tablet surface. - -Note: For appropriate function of the legacy mousedev emulation driver, -BTN_TOUCH must be the first evdev code emitted in a synchronization frame. - -Note: Historically a touch device with BTN_TOOL_FINGER and BTN_TOUCH was -interpreted as a touchpad by userspace, while a similar device without -BTN_TOOL_FINGER was interpreted as a touchscreen. For backwards compatibility -with current userspace it is recommended to follow this distinction. In the -future, this distinction will be deprecated and the device properties ioctl -EVIOCGPROP, defined in linux/input.h, will be used to convey the device type. - -* BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP, BTN_TOOL_TRIPLETAP, BTN_TOOL_QUADTAP: - - These codes denote one, two, three, and four finger interaction on a - trackpad or touchscreen. For example, if the user uses two fingers and moves - them on the touchpad in an effort to scroll content on screen, - BTN_TOOL_DOUBLETAP should be set to value 1 for the duration of the motion. - Note that all BTN_TOOL_ codes and the BTN_TOUCH code are orthogonal in - purpose. A trackpad event generated by finger touches should generate events - for one code from each group. At most only one of these BTN_TOOL_ - codes should have a value of 1 during any synchronization frame. - -Note: Historically some drivers emitted multiple of the finger count codes with -a value of 1 in the same synchronization frame. This usage is deprecated. - -Note: In multitouch drivers, the input_mt_report_finger_count() function should -be used to emit these codes. Please see multi-touch-protocol.txt for details. - -EV_REL: ----------- -EV_REL events describe relative changes in a property. For example, a mouse may -move to the left by a certain number of units, but its absolute position in -space is unknown. If the absolute position is known, EV_ABS codes should be used -instead of EV_REL codes. - -A few EV_REL codes have special meanings: - -* REL_WHEEL, REL_HWHEEL: - - These codes are used for vertical and horizontal scroll wheels, - respectively. - -EV_ABS: ----------- -EV_ABS events describe absolute changes in a property. For example, a touchpad -may emit coordinates for a touch location. - -A few EV_ABS codes have special meanings: - -* ABS_DISTANCE: - - Used to describe the distance of a tool from an interaction surface. This - event should only be emitted while the tool is hovering, meaning in close - proximity of the device and while the value of the BTN_TOUCH code is 0. If - the input device may be used freely in three dimensions, consider ABS_Z - instead. - - BTN_TOOL_ should be set to 1 when the tool comes into detectable - proximity and set to 0 when the tool leaves detectable proximity. - BTN_TOOL_ signals the type of tool that is currently detected by the - hardware and is otherwise independent of ABS_DISTANCE and/or BTN_TOUCH. - -* ABS_MT_: - - Used to describe multitouch input events. Please see - multi-touch-protocol.txt for details. - -EV_SW: ----------- -EV_SW events describe stateful binary switches. For example, the SW_LID code is -used to denote when a laptop lid is closed. - -Upon binding to a device or resuming from suspend, a driver must report -the current switch state. This ensures that the device, kernel, and userspace -state is in sync. - -Upon resume, if the switch state is the same as before suspend, then the input -subsystem will filter out the duplicate switch state reports. The driver does -not need to keep the state of the switch at any time. - -EV_MSC: ----------- -EV_MSC events are used for input and output events that do not fall under other -categories. - -A few EV_MSC codes have special meaning: - -* MSC_TIMESTAMP: - - Used to report the number of microseconds since the last reset. This event - should be coded as an uint32 value, which is allowed to wrap around with - no special consequence. It is assumed that the time difference between two - consecutive events is reliable on a reasonable time scale (hours). - A reset to zero can happen, in which case the time since the last event is - unknown. If the device does not provide this information, the driver must - not provide it to user space. - -EV_LED: ----------- -EV_LED events are used for input and output to set and query the state of -various LEDs on devices. - -EV_REP: ----------- -EV_REP events are used for specifying autorepeating events. - -EV_SND: ----------- -EV_SND events are used for sending sound commands to simple sound output -devices. - -EV_FF: ----------- -EV_FF events are used to initialize a force feedback capable device and to cause -such device to feedback. - -EV_PWR: ----------- -EV_PWR events are a special type of event used specifically for power -management. Its usage is not well defined. To be addressed later. - -Device properties: -================= -Normally, userspace sets up an input device based on the data it emits, -i.e., the event types. In the case of two devices emitting the same event -types, additional information can be provided in the form of device -properties. - -INPUT_PROP_DIRECT + INPUT_PROP_POINTER: --------------------------------------- -The INPUT_PROP_DIRECT property indicates that device coordinates should be -directly mapped to screen coordinates (not taking into account trivial -transformations, such as scaling, flipping and rotating). Non-direct input -devices require non-trivial transformation, such as absolute to relative -transformation for touchpads. Typical direct input devices: touchscreens, -drawing tablets; non-direct devices: touchpads, mice. - -The INPUT_PROP_POINTER property indicates that the device is not transposed -on the screen and thus requires use of an on-screen pointer to trace user's -movements. Typical pointer devices: touchpads, tablets, mice; non-pointer -device: touchscreen. - -If neither INPUT_PROP_DIRECT or INPUT_PROP_POINTER are set, the property is -considered undefined and the device type should be deduced in the -traditional way, using emitted event types. - -INPUT_PROP_BUTTONPAD: --------------------- -For touchpads where the button is placed beneath the surface, such that -pressing down on the pad causes a button click, this property should be -set. Common in clickpad notebooks and macbooks from 2009 and onwards. - -Originally, the buttonpad property was coded into the bcm5974 driver -version field under the name integrated button. For backwards -compatibility, both methods need to be checked in userspace. - -INPUT_PROP_SEMI_MT: ------------------- -Some touchpads, most common between 2008 and 2011, can detect the presence -of multiple contacts without resolving the individual positions; only the -number of contacts and a rectangular shape is known. For such -touchpads, the semi-mt property should be set. - -Depending on the device, the rectangle may enclose all touches, like a -bounding box, or just some of them, for instance the two most recent -touches. The diversity makes the rectangle of limited use, but some -gestures can normally be extracted from it. - -If INPUT_PROP_SEMI_MT is not set, the device is assumed to be a true MT -device. - -INPUT_PROP_TOPBUTTONPAD: ------------------------ -Some laptops, most notably the Lenovo *40 series provide a trackstick -device but do not have physical buttons associated with the trackstick -device. Instead, the top area of the touchpad is marked to show -visual/haptic areas for left, middle, right buttons intended to be used -with the trackstick. - -If INPUT_PROP_TOPBUTTONPAD is set, userspace should emulate buttons -accordingly. This property does not affect kernel behavior. -The kernel does not provide button emulation for such devices but treats -them as any other INPUT_PROP_BUTTONPAD device. - -INPUT_PROP_ACCELEROMETER -------------------------- -Directional axes on this device (absolute and/or relative x, y, z) represent -accelerometer data. All other axes retain their meaning. A device must not mix -regular directional axes and accelerometer axes on the same event node. - -Guidelines: -========== -The guidelines below ensure proper single-touch and multi-finger functionality. -For multi-touch functionality, see the multi-touch-protocol.txt document for -more information. - -Mice: ----------- -REL_{X,Y} must be reported when the mouse moves. BTN_LEFT must be used to report -the primary button press. BTN_{MIDDLE,RIGHT,4,5,etc.} should be used to report -further buttons of the device. REL_WHEEL and REL_HWHEEL should be used to report -scroll wheel events where available. - -Touchscreens: ----------- -ABS_{X,Y} must be reported with the location of the touch. BTN_TOUCH must be -used to report when a touch is active on the screen. -BTN_{MOUSE,LEFT,MIDDLE,RIGHT} must not be reported as the result of touch -contact. BTN_TOOL_ events should be reported where possible. - -For new hardware, INPUT_PROP_DIRECT should be set. - -Trackpads: ----------- -Legacy trackpads that only provide relative position information must report -events like mice described above. - -Trackpads that provide absolute touch position must report ABS_{X,Y} for the -location of the touch. BTN_TOUCH should be used to report when a touch is active -on the trackpad. Where multi-finger support is available, BTN_TOOL_ should -be used to report the number of touches active on the trackpad. - -For new hardware, INPUT_PROP_POINTER should be set. - -Tablets: ----------- -BTN_TOOL_ events must be reported when a stylus or other tool is active on -the tablet. ABS_{X,Y} must be reported with the location of the tool. BTN_TOUCH -should be used to report when the tool is in contact with the tablet. -BTN_{STYLUS,STYLUS2} should be used to report buttons on the tool itself. Any -button may be used for buttons on the tablet except BTN_{MOUSE,LEFT}. -BTN_{0,1,2,etc} are good generic codes for unlabeled buttons. Do not use -meaningful buttons, like BTN_FORWARD, unless the button is labeled for that -purpose on the device. - -For new hardware, both INPUT_PROP_DIRECT and INPUT_PROP_POINTER should be set. diff --git a/Documentation/input/ff.rst b/Documentation/input/ff.rst new file mode 100644 index 0000000000000000000000000000000000000000..26d461998e08974f1e47e54f33cfc630c0a9879d --- /dev/null +++ b/Documentation/input/ff.rst @@ -0,0 +1,265 @@ +======================== +Force feedback for Linux +======================== + +:Author: Johann Deneux on 2001/04/22. +:Updated: Anssi Hannula on 2006/04/09. + +You may redistribute this file. Please remember to include shape.svg and +interactive.svg as well. + +Introduction +~~~~~~~~~~~~ + +This document describes how to use force feedback devices under Linux. The +goal is not to support these devices as if they were simple input-only devices +(as it is already the case), but to really enable the rendering of force +effects. +This document only describes the force feedback part of the Linux input +interface. Please read joystick.txt and input.txt before reading further this +document. + +Instructions to the user +~~~~~~~~~~~~~~~~~~~~~~~~ + +To enable force feedback, you have to: + +1. have your kernel configured with evdev and a driver that supports your + device. +2. make sure evdev module is loaded and /dev/input/event* device files are + created. + +Before you start, let me WARN you that some devices shake violently during the +initialisation phase. This happens for example with my "AVB Top Shot Pegasus". +To stop this annoying behaviour, move you joystick to its limits. Anyway, you +should keep a hand on your device, in order to avoid it to break down if +something goes wrong. + +If you have a serial iforce device, you need to start inputattach. See +joystick.txt for details. + +Does it work ? +-------------- + +There is an utility called fftest that will allow you to test the driver:: + + % fftest /dev/input/eventXX + +Instructions to the developer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All interactions are done using the event API. That is, you can use ioctl() +and write() on /dev/input/eventXX. +This information is subject to change. + +Querying device capabilities +---------------------------- + +:: + + #include + #include + + #define BITS_TO_LONGS(x) \ + (((x) + 8 * sizeof (unsigned long) - 1) / (8 * sizeof (unsigned long))) + unsigned long features[BITS_TO_LONGS(FF_CNT)]; + int ioctl(int file_descriptor, int request, unsigned long *features); + +"request" must be EVIOCGBIT(EV_FF, size of features array in bytes ) + +Returns the features supported by the device. features is a bitfield with the +following bits: + +- FF_CONSTANT can render constant force effects +- FF_PERIODIC can render periodic effects with the following waveforms: + + - FF_SQUARE square waveform + - FF_TRIANGLE triangle waveform + - FF_SINE sine waveform + - FF_SAW_UP sawtooth up waveform + - FF_SAW_DOWN sawtooth down waveform + - FF_CUSTOM custom waveform + +- FF_RAMP can render ramp effects +- FF_SPRING can simulate the presence of a spring +- FF_FRICTION can simulate friction +- FF_DAMPER can simulate damper effects +- FF_RUMBLE rumble effects +- FF_INERTIA can simulate inertia +- FF_GAIN gain is adjustable +- FF_AUTOCENTER autocenter is adjustable + +.. note:: + + - In most cases you should use FF_PERIODIC instead of FF_RUMBLE. All + devices that support FF_RUMBLE support FF_PERIODIC (square, triangle, + sine) and the other way around. + + - The exact syntax FF_CUSTOM is undefined for the time being as no driver + supports it yet. + +:: + + int ioctl(int fd, EVIOCGEFFECTS, int *n); + +Returns the number of effects the device can keep in its memory. + +Uploading effects to the device +------------------------------- + +:: + + #include + #include + + int ioctl(int file_descriptor, int request, struct ff_effect *effect); + +"request" must be EVIOCSFF. + +"effect" points to a structure describing the effect to upload. The effect is +uploaded, but not played. +The content of effect may be modified. In particular, its field "id" is set +to the unique id assigned by the driver. This data is required for performing +some operations (removing an effect, controlling the playback). +This if field must be set to -1 by the user in order to tell the driver to +allocate a new effect. + +Effects are file descriptor specific. + +See for a description of the ff_effect struct. You +should also find help in a few sketches, contained in files shape.svg +and interactive.svg: + +.. kernel-figure:: shape.svg + + Shape + +.. kernel-figure:: interactive.svg + + Interactive + + +Removing an effect from the device +---------------------------------- + +:: + + int ioctl(int fd, EVIOCRMFF, effect.id); + +This makes room for new effects in the device's memory. Note that this also +stops the effect if it was playing. + +Controlling the playback of effects +----------------------------------- + +Control of playing is done with write(). Below is an example: + +:: + + #include + #include + + struct input_event play; + struct input_event stop; + struct ff_effect effect; + int fd; + ... + fd = open("/dev/input/eventXX", O_RDWR); + ... + /* Play three times */ + play.type = EV_FF; + play.code = effect.id; + play.value = 3; + + write(fd, (const void*) &play, sizeof(play)); + ... + /* Stop an effect */ + stop.type = EV_FF; + stop.code = effect.id; + stop.value = 0; + + write(fd, (const void*) &play, sizeof(stop)); + +Setting the gain +---------------- + +Not all devices have the same strength. Therefore, users should set a gain +factor depending on how strong they want effects to be. This setting is +persistent across access to the driver. + +:: + + /* Set the gain of the device + int gain; /* between 0 and 100 */ + struct input_event ie; /* structure used to communicate with the driver */ + + ie.type = EV_FF; + ie.code = FF_GAIN; + ie.value = 0xFFFFUL * gain / 100; + + if (write(fd, &ie, sizeof(ie)) == -1) + perror("set gain"); + +Enabling/Disabling autocenter +----------------------------- + +The autocenter feature quite disturbs the rendering of effects in my opinion, +and I think it should be an effect, which computation depends on the game +type. But you can enable it if you want. + +:: + + int autocenter; /* between 0 and 100 */ + struct input_event ie; + + ie.type = EV_FF; + ie.code = FF_AUTOCENTER; + ie.value = 0xFFFFUL * autocenter / 100; + + if (write(fd, &ie, sizeof(ie)) == -1) + perror("set auto-center"); + +A value of 0 means "no auto-center". + +Dynamic update of an effect +--------------------------- + +Proceed as if you wanted to upload a new effect, except that instead of +setting the id field to -1, you set it to the wanted effect id. +Normally, the effect is not stopped and restarted. However, depending on the +type of device, not all parameters can be dynamically updated. For example, +the direction of an effect cannot be updated with iforce devices. In this +case, the driver stops the effect, up-load it, and restart it. + +Therefore it is recommended to dynamically change direction while the effect +is playing only when it is ok to restart the effect with a replay count of 1. + +Information about the status of effects +--------------------------------------- + +Every time the status of an effect is changed, an event is sent. The values +and meanings of the fields of the event are as follows:: + + struct input_event { + /* When the status of the effect changed */ + struct timeval time; + + /* Set to EV_FF_STATUS */ + unsigned short type; + + /* Contains the id of the effect */ + unsigned short code; + + /* Indicates the status */ + unsigned int value; + }; + + FF_STATUS_STOPPED The effect stopped playing + FF_STATUS_PLAYING The effect started to play + +.. note:: + + - Status feedback is only supported by iforce driver. If you have + a really good reason to use this, please contact + linux-joystick@atrey.karlin.mff.cuni.cz or anssi.hannula@gmail.com + so that support for it can be added to the rest of the drivers. diff --git a/Documentation/input/ff.txt b/Documentation/input/ff.txt deleted file mode 100644 index b3867bf49f8f9ec0743e3cc9384d8ed45449a972..0000000000000000000000000000000000000000 --- a/Documentation/input/ff.txt +++ /dev/null @@ -1,221 +0,0 @@ -Force feedback for Linux. -By Johann Deneux on 2001/04/22. -Updated by Anssi Hannula on 2006/04/09. -You may redistribute this file. Please remember to include shape.fig and -interactive.fig as well. ----------------------------------------------------------------------------- - -1. Introduction -~~~~~~~~~~~~~~~ -This document describes how to use force feedback devices under Linux. The -goal is not to support these devices as if they were simple input-only devices -(as it is already the case), but to really enable the rendering of force -effects. -This document only describes the force feedback part of the Linux input -interface. Please read joystick.txt and input.txt before reading further this -document. - -2. Instructions to the user -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To enable force feedback, you have to: - -1. have your kernel configured with evdev and a driver that supports your - device. -2. make sure evdev module is loaded and /dev/input/event* device files are - created. - -Before you start, let me WARN you that some devices shake violently during the -initialisation phase. This happens for example with my "AVB Top Shot Pegasus". -To stop this annoying behaviour, move you joystick to its limits. Anyway, you -should keep a hand on your device, in order to avoid it to break down if -something goes wrong. - -If you have a serial iforce device, you need to start inputattach. See -joystick.txt for details. - -2.1 Does it work ? -~~~~~~~~~~~~~~~~~~ -There is an utility called fftest that will allow you to test the driver. -% fftest /dev/input/eventXX - -3. Instructions to the developer -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -All interactions are done using the event API. That is, you can use ioctl() -and write() on /dev/input/eventXX. -This information is subject to change. - -3.1 Querying device capabilities -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#include -#include - -#define BITS_TO_LONGS(x) \ - (((x) + 8 * sizeof (unsigned long) - 1) / (8 * sizeof (unsigned long))) -unsigned long features[BITS_TO_LONGS(FF_CNT)]; -int ioctl(int file_descriptor, int request, unsigned long *features); - -"request" must be EVIOCGBIT(EV_FF, size of features array in bytes ) - -Returns the features supported by the device. features is a bitfield with the -following bits: -- FF_CONSTANT can render constant force effects -- FF_PERIODIC can render periodic effects with the following waveforms: - - FF_SQUARE square waveform - - FF_TRIANGLE triangle waveform - - FF_SINE sine waveform - - FF_SAW_UP sawtooth up waveform - - FF_SAW_DOWN sawtooth down waveform - - FF_CUSTOM custom waveform -- FF_RAMP can render ramp effects -- FF_SPRING can simulate the presence of a spring -- FF_FRICTION can simulate friction -- FF_DAMPER can simulate damper effects -- FF_RUMBLE rumble effects -- FF_INERTIA can simulate inertia -- FF_GAIN gain is adjustable -- FF_AUTOCENTER autocenter is adjustable - -Note: In most cases you should use FF_PERIODIC instead of FF_RUMBLE. All - devices that support FF_RUMBLE support FF_PERIODIC (square, triangle, - sine) and the other way around. - -Note: The exact syntax FF_CUSTOM is undefined for the time being as no driver - supports it yet. - - -int ioctl(int fd, EVIOCGEFFECTS, int *n); - -Returns the number of effects the device can keep in its memory. - -3.2 Uploading effects to the device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#include -#include - -int ioctl(int file_descriptor, int request, struct ff_effect *effect); - -"request" must be EVIOCSFF. - -"effect" points to a structure describing the effect to upload. The effect is -uploaded, but not played. -The content of effect may be modified. In particular, its field "id" is set -to the unique id assigned by the driver. This data is required for performing -some operations (removing an effect, controlling the playback). -This if field must be set to -1 by the user in order to tell the driver to -allocate a new effect. - -Effects are file descriptor specific. - -See for a description of the ff_effect struct. You should also -find help in a few sketches, contained in files shape.fig and interactive.fig. -You need xfig to visualize these files. - -3.3 Removing an effect from the device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -int ioctl(int fd, EVIOCRMFF, effect.id); - -This makes room for new effects in the device's memory. Note that this also -stops the effect if it was playing. - -3.4 Controlling the playback of effects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Control of playing is done with write(). Below is an example: - -#include -#include - - struct input_event play; - struct input_event stop; - struct ff_effect effect; - int fd; -... - fd = open("/dev/input/eventXX", O_RDWR); -... - /* Play three times */ - play.type = EV_FF; - play.code = effect.id; - play.value = 3; - - write(fd, (const void*) &play, sizeof(play)); -... - /* Stop an effect */ - stop.type = EV_FF; - stop.code = effect.id; - stop.value = 0; - - write(fd, (const void*) &play, sizeof(stop)); - -3.5 Setting the gain -~~~~~~~~~~~~~~~~~~~~ -Not all devices have the same strength. Therefore, users should set a gain -factor depending on how strong they want effects to be. This setting is -persistent across access to the driver. - -/* Set the gain of the device -int gain; /* between 0 and 100 */ -struct input_event ie; /* structure used to communicate with the driver */ - -ie.type = EV_FF; -ie.code = FF_GAIN; -ie.value = 0xFFFFUL * gain / 100; - -if (write(fd, &ie, sizeof(ie)) == -1) - perror("set gain"); - -3.6 Enabling/Disabling autocenter -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The autocenter feature quite disturbs the rendering of effects in my opinion, -and I think it should be an effect, which computation depends on the game -type. But you can enable it if you want. - -int autocenter; /* between 0 and 100 */ -struct input_event ie; - -ie.type = EV_FF; -ie.code = FF_AUTOCENTER; -ie.value = 0xFFFFUL * autocenter / 100; - -if (write(fd, &ie, sizeof(ie)) == -1) - perror("set auto-center"); - -A value of 0 means "no auto-center". - -3.7 Dynamic update of an effect -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Proceed as if you wanted to upload a new effect, except that instead of -setting the id field to -1, you set it to the wanted effect id. -Normally, the effect is not stopped and restarted. However, depending on the -type of device, not all parameters can be dynamically updated. For example, -the direction of an effect cannot be updated with iforce devices. In this -case, the driver stops the effect, up-load it, and restart it. - -Therefore it is recommended to dynamically change direction while the effect -is playing only when it is ok to restart the effect with a replay count of 1. - -3.8 Information about the status of effects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Every time the status of an effect is changed, an event is sent. The values -and meanings of the fields of the event are as follows: - -struct input_event { -/* When the status of the effect changed */ - struct timeval time; - -/* Set to EV_FF_STATUS */ - unsigned short type; - -/* Contains the id of the effect */ - unsigned short code; - -/* Indicates the status */ - unsigned int value; -}; - -FF_STATUS_STOPPED The effect stopped playing -FF_STATUS_PLAYING The effect started to play - -NOTE: Status feedback is only supported by iforce driver. If you have - a really good reason to use this, please contact - linux-joystick@atrey.karlin.mff.cuni.cz or anssi.hannula@gmail.com - so that support for it can be added to the rest of the drivers. - diff --git a/Documentation/input/gamepad.rst b/Documentation/input/gamepad.rst new file mode 100644 index 0000000000000000000000000000000000000000..4d5e7fb80a8456599608817dc8ca00478e2ee1be --- /dev/null +++ b/Documentation/input/gamepad.rst @@ -0,0 +1,191 @@ +--------------------------- +Linux Gamepad Specification +--------------------------- + +:Author: 2013 by David Herrmann + + +Introduction +~~~~~~~~~~~~ +Linux provides many different input drivers for gamepad hardware. To avoid +having user-space deal with different button-mappings for each gamepad, this +document defines how gamepads are supposed to report their data. + +Geometry +~~~~~~~~ +As "gamepad" we define devices which roughly look like this:: + + ____________________________ __ + / [__ZL__] [__ZR__] \ | + / [__ TL __] [__ TR __] \ | Front Triggers + __/________________________________\__ __| + / _ \ | + / /\ __ (N) \ | + / || __ |MO| __ _ _ \ | Main Pad + | <===DP===> |SE| |ST| (W) -|- (E) | | + \ || ___ ___ _ / | + /\ \/ / \ / \ (S) /\ __| + / \________ | LS | ____ | RS | ________/ \ | + | / \ \___/ / \ \___/ / \ | | Control Sticks + | / \_____/ \_____/ \ | __| + | / \ | + \_____/ \_____/ + + |________|______| |______|___________| + D-Pad Left Right Action Pad + Stick Stick + + |_____________| + Menu Pad + +Most gamepads have the following features: + + - Action-Pad + 4 buttons in diamonds-shape (on the right side). The buttons are + differently labeled on most devices so we define them as NORTH, + SOUTH, WEST and EAST. + - D-Pad (Direction-pad) + 4 buttons (on the left side) that point up, down, left and right. + - Menu-Pad + Different constellations, but most-times 2 buttons: SELECT - START + Furthermore, many gamepads have a fancy branded button that is used as + special system-button. It often looks different to the other buttons and + is used to pop up system-menus or system-settings. + - Analog-Sticks + Analog-sticks provide freely moveable sticks to control directions. Not + all devices have both or any, but they are present at most times. + Analog-sticks may also provide a digital button if you press them. + - Triggers + Triggers are located on the upper-side of the pad in vertical direction. + Not all devices provide them, but the upper buttons are normally named + Left- and Right-Triggers, the lower buttons Z-Left and Z-Right. + - Rumble + Many devices provide force-feedback features. But are mostly just + simple rumble motors. + +Detection +~~~~~~~~~ + +All gamepads that follow the protocol described here map BTN_GAMEPAD. This is +an alias for BTN_SOUTH/BTN_A. It can be used to identify a gamepad as such. +However, not all gamepads provide all features, so you need to test for all +features that you need, first. How each feature is mapped is described below. + +Legacy drivers often don't comply to these rules. As we cannot change them +for backwards-compatibility reasons, you need to provide fixup mappings in +user-space yourself. Some of them might also provide module-options that +change the mappings so you can advise users to set these. + +All new gamepads are supposed to comply with this mapping. Please report any +bugs, if they don't. + +There are a lot of less-featured/less-powerful devices out there, which re-use +the buttons from this protocol. However, they try to do this in a compatible +fashion. For example, the "Nintendo Wii Nunchuk" provides two trigger buttons +and one analog stick. It reports them as if it were a gamepad with only one +analog stick and two trigger buttons on the right side. +But that means, that if you only support "real" gamepads, you must test +devices for _all_ reported events that you need. Otherwise, you will also get +devices that report a small subset of the events. + +No other devices, that do not look/feel like a gamepad, shall report these +events. + +Events +~~~~~~ + +Gamepads report the following events: + +- Action-Pad: + + Every gamepad device has at least 2 action buttons. This means, that every + device reports BTN_SOUTH (which BTN_GAMEPAD is an alias for). Regardless + of the labels on the buttons, the codes are sent according to the + physical position of the buttons. + + Please note that 2- and 3-button pads are fairly rare and old. You might + want to filter gamepads that do not report all four. + + - 2-Button Pad: + + If only 2 action-buttons are present, they are reported as BTN_SOUTH and + BTN_EAST. For vertical layouts, the upper button is BTN_EAST. For + horizontal layouts, the button more on the right is BTN_EAST. + + - 3-Button Pad: + + If only 3 action-buttons are present, they are reported as (from left + to right): BTN_WEST, BTN_SOUTH, BTN_EAST + If the buttons are aligned perfectly vertically, they are reported as + (from top down): BTN_WEST, BTN_SOUTH, BTN_EAST + + - 4-Button Pad: + + If all 4 action-buttons are present, they can be aligned in two + different formations. If diamond-shaped, they are reported as BTN_NORTH, + BTN_WEST, BTN_SOUTH, BTN_EAST according to their physical location. + If rectangular-shaped, the upper-left button is BTN_NORTH, lower-left + is BTN_WEST, lower-right is BTN_SOUTH and upper-right is BTN_EAST. + +- D-Pad: + + Every gamepad provides a D-Pad with four directions: Up, Down, Left, Right + Some of these are available as digital buttons, some as analog buttons. Some + may even report both. The kernel does not convert between these so + applications should support both and choose what is more appropriate if + both are reported. + + - Digital buttons are reported as: + + BTN_DPAD_* + + - Analog buttons are reported as: + + ABS_HAT0X and ABS_HAT0Y + + (for ABS values negative is left/up, positive is right/down) + +- Analog-Sticks: + + The left analog-stick is reported as ABS_X, ABS_Y. The right analog stick is + reported as ABS_RX, ABS_RY. Zero, one or two sticks may be present. + If analog-sticks provide digital buttons, they are mapped accordingly as + BTN_THUMBL (first/left) and BTN_THUMBR (second/right). + + (for ABS values negative is left/up, positive is right/down) + +- Triggers: + + Trigger buttons can be available as digital or analog buttons or both. User- + space must correctly deal with any situation and choose the most appropriate + mode. + + Upper trigger buttons are reported as BTN_TR or ABS_HAT1X (right) and BTN_TL + or ABS_HAT1Y (left). Lower trigger buttons are reported as BTN_TR2 or + ABS_HAT2X (right/ZR) and BTN_TL2 or ABS_HAT2Y (left/ZL). + + If only one trigger-button combination is present (upper+lower), they are + reported as "right" triggers (BTN_TR/ABS_HAT1X). + + (ABS trigger values start at 0, pressure is reported as positive values) + +- Menu-Pad: + + Menu buttons are always digital and are mapped according to their location + instead of their labels. That is: + + - 1-button Pad: + + Mapped as BTN_START + + - 2-button Pad: + + Left button mapped as BTN_SELECT, right button mapped as BTN_START + + Many pads also have a third button which is branded or has a special symbol + and meaning. Such buttons are mapped as BTN_MODE. Examples are the Nintendo + "HOME" button, the XBox "X"-button or Sony "PS" button. + +- Rumble: + + Rumble is advertised as FF_RUMBLE. diff --git a/Documentation/input/gamepad.txt b/Documentation/input/gamepad.txt deleted file mode 100644 index 3f6d8a5e9cdc38f5ba0c2ec9a10ff68bb5949405..0000000000000000000000000000000000000000 --- a/Documentation/input/gamepad.txt +++ /dev/null @@ -1,159 +0,0 @@ - Linux Gamepad API ----------------------------------------------------------------------------- - -1. Intro -~~~~~~~~ -Linux provides many different input drivers for gamepad hardware. To avoid -having user-space deal with different button-mappings for each gamepad, this -document defines how gamepads are supposed to report their data. - -2. Geometry -~~~~~~~~~~~ -As "gamepad" we define devices which roughly look like this: - - ____________________________ __ - / [__ZL__] [__ZR__] \ | - / [__ TL __] [__ TR __] \ | Front Triggers - __/________________________________\__ __| - / _ \ | - / /\ __ (N) \ | - / || __ |MO| __ _ _ \ | Main Pad - | <===DP===> |SE| |ST| (W) -|- (E) | | - \ || ___ ___ _ / | - /\ \/ / \ / \ (S) /\ __| - / \________ | LS | ____ | RS | ________/ \ | - | / \ \___/ / \ \___/ / \ | | Control Sticks - | / \_____/ \_____/ \ | __| - | / \ | - \_____/ \_____/ - - |________|______| |______|___________| - D-Pad Left Right Action Pad - Stick Stick - - |_____________| - Menu Pad - -Most gamepads have the following features: - - Action-Pad - 4 buttons in diamonds-shape (on the right side). The buttons are - differently labeled on most devices so we define them as NORTH, - SOUTH, WEST and EAST. - - D-Pad (Direction-pad) - 4 buttons (on the left side) that point up, down, left and right. - - Menu-Pad - Different constellations, but most-times 2 buttons: SELECT - START - Furthermore, many gamepads have a fancy branded button that is used as - special system-button. It often looks different to the other buttons and - is used to pop up system-menus or system-settings. - - Analog-Sticks - Analog-sticks provide freely moveable sticks to control directions. Not - all devices have both or any, but they are present at most times. - Analog-sticks may also provide a digital button if you press them. - - Triggers - Triggers are located on the upper-side of the pad in vertical direction. - Not all devices provide them, but the upper buttons are normally named - Left- and Right-Triggers, the lower buttons Z-Left and Z-Right. - - Rumble - Many devices provide force-feedback features. But are mostly just - simple rumble motors. - -3. Detection -~~~~~~~~~~~~ -All gamepads that follow the protocol described here map BTN_GAMEPAD. This is -an alias for BTN_SOUTH/BTN_A. It can be used to identify a gamepad as such. -However, not all gamepads provide all features, so you need to test for all -features that you need, first. How each feature is mapped is described below. - -Legacy drivers often don't comply to these rules. As we cannot change them -for backwards-compatibility reasons, you need to provide fixup mappings in -user-space yourself. Some of them might also provide module-options that -change the mappings so you can advise users to set these. - -All new gamepads are supposed to comply with this mapping. Please report any -bugs, if they don't. - -There are a lot of less-featured/less-powerful devices out there, which re-use -the buttons from this protocol. However, they try to do this in a compatible -fashion. For example, the "Nintendo Wii Nunchuk" provides two trigger buttons -and one analog stick. It reports them as if it were a gamepad with only one -analog stick and two trigger buttons on the right side. -But that means, that if you only support "real" gamepads, you must test -devices for _all_ reported events that you need. Otherwise, you will also get -devices that report a small subset of the events. - -No other devices, that do not look/feel like a gamepad, shall report these -events. - -4. Events -~~~~~~~~~ -Gamepads report the following events: - -Action-Pad: - Every gamepad device has at least 2 action buttons. This means, that every - device reports BTN_SOUTH (which BTN_GAMEPAD is an alias for). Regardless - of the labels on the buttons, the codes are sent according to the - physical position of the buttons. - Please note that 2- and 3-button pads are fairly rare and old. You might - want to filter gamepads that do not report all four. - 2-Button Pad: - If only 2 action-buttons are present, they are reported as BTN_SOUTH and - BTN_EAST. For vertical layouts, the upper button is BTN_EAST. For - horizontal layouts, the button more on the right is BTN_EAST. - 3-Button Pad: - If only 3 action-buttons are present, they are reported as (from left - to right): BTN_WEST, BTN_SOUTH, BTN_EAST - If the buttons are aligned perfectly vertically, they are reported as - (from top down): BTN_WEST, BTN_SOUTH, BTN_EAST - 4-Button Pad: - If all 4 action-buttons are present, they can be aligned in two - different formations. If diamond-shaped, they are reported as BTN_NORTH, - BTN_WEST, BTN_SOUTH, BTN_EAST according to their physical location. - If rectangular-shaped, the upper-left button is BTN_NORTH, lower-left - is BTN_WEST, lower-right is BTN_SOUTH and upper-right is BTN_EAST. - -D-Pad: - Every gamepad provides a D-Pad with four directions: Up, Down, Left, Right - Some of these are available as digital buttons, some as analog buttons. Some - may even report both. The kernel does not convert between these so - applications should support both and choose what is more appropriate if - both are reported. - Digital buttons are reported as: - BTN_DPAD_* - Analog buttons are reported as: - ABS_HAT0X and ABS_HAT0Y - (for ABS values negative is left/up, positive is right/down) - -Analog-Sticks: - The left analog-stick is reported as ABS_X, ABS_Y. The right analog stick is - reported as ABS_RX, ABS_RY. Zero, one or two sticks may be present. - If analog-sticks provide digital buttons, they are mapped accordingly as - BTN_THUMBL (first/left) and BTN_THUMBR (second/right). - (for ABS values negative is left/up, positive is right/down) - -Triggers: - Trigger buttons can be available as digital or analog buttons or both. User- - space must correctly deal with any situation and choose the most appropriate - mode. - Upper trigger buttons are reported as BTN_TR or ABS_HAT1X (right) and BTN_TL - or ABS_HAT1Y (left). Lower trigger buttons are reported as BTN_TR2 or - ABS_HAT2X (right/ZR) and BTN_TL2 or ABS_HAT2Y (left/ZL). - If only one trigger-button combination is present (upper+lower), they are - reported as "right" triggers (BTN_TR/ABS_HAT1X). - (ABS trigger values start at 0, pressure is reported as positive values) - -Menu-Pad: - Menu buttons are always digital and are mapped according to their location - instead of their labels. That is: - 1-button Pad: Mapped as BTN_START - 2-button Pad: Left button mapped as BTN_SELECT, right button mapped as - BTN_START - Many pads also have a third button which is branded or has a special symbol - and meaning. Such buttons are mapped as BTN_MODE. Examples are the Nintendo - "HOME" button, the XBox "X"-button or Sony "PS" button. - -Rumble: - Rumble is advertised as FF_RUMBLE. - ----------------------------------------------------------------------------- - Written 2013 by David Herrmann diff --git a/Documentation/input/gameport-programming.rst b/Documentation/input/gameport-programming.rst new file mode 100644 index 0000000000000000000000000000000000000000..c96911df1c54afbdc97780becb7be50f4c752c1b --- /dev/null +++ b/Documentation/input/gameport-programming.rst @@ -0,0 +1,222 @@ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Programming gameport drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A basic classic gameport +~~~~~~~~~~~~~~~~~~~~~~~~ + +If the gameport doesn't provide more than the inb()/outb() functionality, +the code needed to register it with the joystick drivers is simple:: + + struct gameport gameport; + + gameport.io = MY_IO_ADDRESS; + gameport_register_port(&gameport); + +Make sure struct gameport is initialized to 0 in all other fields. The +gameport generic code will take care of the rest. + +If your hardware supports more than one io address, and your driver can +choose which one to program the hardware to, starting from the more exotic +addresses is preferred, because the likelihood of clashing with the standard +0x201 address is smaller. + +Eg. if your driver supports addresses 0x200, 0x208, 0x210 and 0x218, then +0x218 would be the address of first choice. + +If your hardware supports a gameport address that is not mapped to ISA io +space (is above 0x1000), use that one, and don't map the ISA mirror. + +Also, always request_region() on the whole io space occupied by the +gameport. Although only one ioport is really used, the gameport usually +occupies from one to sixteen addresses in the io space. + +Please also consider enabling the gameport on the card in the ->open() +callback if the io is mapped to ISA space - this way it'll occupy the io +space only when something really is using it. Disable it again in the +->close() callback. You also can select the io address in the ->open() +callback, so that it doesn't fail if some of the possible addresses are +already occupied by other gameports. + +Memory mapped gameport +~~~~~~~~~~~~~~~~~~~~~~ + +When a gameport can be accessed through MMIO, this way is preferred, because +it is faster, allowing more reads per second. Registering such a gameport +isn't as easy as a basic IO one, but not so much complex:: + + struct gameport gameport; + + void my_trigger(struct gameport *gameport) + { + my_mmio = 0xff; + } + + unsigned char my_read(struct gameport *gameport) + { + return my_mmio; + } + + gameport.read = my_read; + gameport.trigger = my_trigger; + gameport_register_port(&gameport); + +.. _gameport_pgm_cooked_mode: + +Cooked mode gameport +~~~~~~~~~~~~~~~~~~~~ + +There are gameports that can report the axis values as numbers, that means +the driver doesn't have to measure them the old way - an ADC is built into +the gameport. To register a cooked gameport:: + + struct gameport gameport; + + int my_cooked_read(struct gameport *gameport, int *axes, int *buttons) + { + int i; + + for (i = 0; i < 4; i++) + axes[i] = my_mmio[i]; + buttons[i] = my_mmio[4]; + } + + int my_open(struct gameport *gameport, int mode) + { + return -(mode != GAMEPORT_MODE_COOKED); + } + + gameport.cooked_read = my_cooked_read; + gameport.open = my_open; + gameport.fuzz = 8; + gameport_register_port(&gameport); + +The only confusing thing here is the fuzz value. Best determined by +experimentation, it is the amount of noise in the ADC data. Perfect +gameports can set this to zero, most common have fuzz between 8 and 32. +See analog.c and input.c for handling of fuzz - the fuzz value determines +the size of a gaussian filter window that is used to eliminate the noise +in the data. + +More complex gameports +~~~~~~~~~~~~~~~~~~~~~~ + +Gameports can support both raw and cooked modes. In that case combine either +examples 1+2 or 1+3. Gameports can support internal calibration - see below, +and also lightning.c and analog.c on how that works. If your driver supports +more than one gameport instance simultaneously, use the ->private member of +the gameport struct to point to your data. + +Unregistering a gameport +~~~~~~~~~~~~~~~~~~~~~~~~ + +Simple:: + + gameport_unregister_port(&gameport); + +The gameport structure +~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This section is outdated. There are several fields here that don't + match what's there at include/linux/gameport.h. + +:: + + struct gameport { + + void *private; + +A private pointer for free use in the gameport driver. (Not the joystick +driver!) + +:: + + int number; + +Number assigned to the gameport when registered. Informational purpose only. + +:: + + int io; + +I/O address for use with raw mode. You have to either set this, or ->read() +to some value if your gameport supports raw mode. + +:: + + int speed; + +Raw mode speed of the gameport reads in thousands of reads per second. + +:: + + int fuzz; + +If the gameport supports cooked mode, this should be set to a value that +represents the amount of noise in the data. See +:ref:`gameport_pgm_cooked_mode`. + +:: + + void (*trigger)(struct gameport *); + +Trigger. This function should trigger the ns558 oneshots. If set to NULL, +outb(0xff, io) will be used. + +:: + + unsigned char (*read)(struct gameport *); + +Read the buttons and ns558 oneshot bits. If set to NULL, inb(io) will be +used instead. + +:: + + int (*cooked_read)(struct gameport *, int *axes, int *buttons); + +If the gameport supports cooked mode, it should point this to its cooked +read function. It should fill axes[0..3] with four values of the joystick axes +and buttons[0] with four bits representing the buttons. + +:: + + int (*calibrate)(struct gameport *, int *axes, int *max); + +Function for calibrating the ADC hardware. When called, axes[0..3] should be +pre-filled by cooked data by the caller, max[0..3] should be pre-filled with +expected maximums for each axis. The calibrate() function should set the +sensitivity of the ADC hardware so that the maximums fit in its range and +recompute the axes[] values to match the new sensitivity or re-read them from +the hardware so that they give valid values. + +:: + + int (*open)(struct gameport *, int mode); + +Open() serves two purposes. First a driver either opens the port in raw or +in cooked mode, the open() callback can decide which modes are supported. +Second, resource allocation can happen here. The port can also be enabled +here. Prior to this call, other fields of the gameport struct (namely the io +member) need not to be valid. + +:: + + void (*close)(struct gameport *); + +Close() should free the resources allocated by open, possibly disabling the +gameport. + +:: + + struct gameport_dev *dev; + struct gameport *next; + +For internal use by the gameport layer. + +:: + + }; + +Enjoy! diff --git a/Documentation/input/gameport-programming.txt b/Documentation/input/gameport-programming.txt deleted file mode 100644 index 03a74fc3b49679c0326a59faa43b2e423751e0c5..0000000000000000000000000000000000000000 --- a/Documentation/input/gameport-programming.txt +++ /dev/null @@ -1,187 +0,0 @@ -Programming gameport drivers -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. A basic classic gameport -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If the gameport doesn't provide more than the inb()/outb() functionality, -the code needed to register it with the joystick drivers is simple: - - struct gameport gameport; - - gameport.io = MY_IO_ADDRESS; - gameport_register_port(&gameport); - -Make sure struct gameport is initialized to 0 in all other fields. The -gameport generic code will take care of the rest. - -If your hardware supports more than one io address, and your driver can -choose which one to program the hardware to, starting from the more exotic -addresses is preferred, because the likelihood of clashing with the standard -0x201 address is smaller. - -Eg. if your driver supports addresses 0x200, 0x208, 0x210 and 0x218, then -0x218 would be the address of first choice. - -If your hardware supports a gameport address that is not mapped to ISA io -space (is above 0x1000), use that one, and don't map the ISA mirror. - -Also, always request_region() on the whole io space occupied by the -gameport. Although only one ioport is really used, the gameport usually -occupies from one to sixteen addresses in the io space. - -Please also consider enabling the gameport on the card in the ->open() -callback if the io is mapped to ISA space - this way it'll occupy the io -space only when something really is using it. Disable it again in the -->close() callback. You also can select the io address in the ->open() -callback, so that it doesn't fail if some of the possible addresses are -already occupied by other gameports. - -2. Memory mapped gameport -~~~~~~~~~~~~~~~~~~~~~~~~~ - -When a gameport can be accessed through MMIO, this way is preferred, because -it is faster, allowing more reads per second. Registering such a gameport -isn't as easy as a basic IO one, but not so much complex: - - struct gameport gameport; - - void my_trigger(struct gameport *gameport) - { - my_mmio = 0xff; - } - - unsigned char my_read(struct gameport *gameport) - { - return my_mmio; - } - - gameport.read = my_read; - gameport.trigger = my_trigger; - gameport_register_port(&gameport); - -3. Cooked mode gameport -~~~~~~~~~~~~~~~~~~~~~~~ - -There are gameports that can report the axis values as numbers, that means -the driver doesn't have to measure them the old way - an ADC is built into -the gameport. To register a cooked gameport: - - struct gameport gameport; - - int my_cooked_read(struct gameport *gameport, int *axes, int *buttons) - { - int i; - - for (i = 0; i < 4; i++) - axes[i] = my_mmio[i]; - buttons[i] = my_mmio[4]; - } - - int my_open(struct gameport *gameport, int mode) - { - return -(mode != GAMEPORT_MODE_COOKED); - } - - gameport.cooked_read = my_cooked_read; - gameport.open = my_open; - gameport.fuzz = 8; - gameport_register_port(&gameport); - -The only confusing thing here is the fuzz value. Best determined by -experimentation, it is the amount of noise in the ADC data. Perfect -gameports can set this to zero, most common have fuzz between 8 and 32. -See analog.c and input.c for handling of fuzz - the fuzz value determines -the size of a gaussian filter window that is used to eliminate the noise -in the data. - -4. More complex gameports -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Gameports can support both raw and cooked modes. In that case combine either -examples 1+2 or 1+3. Gameports can support internal calibration - see below, -and also lightning.c and analog.c on how that works. If your driver supports -more than one gameport instance simultaneously, use the ->private member of -the gameport struct to point to your data. - -5. Unregistering a gameport -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Simple: - -gameport_unregister_port(&gameport); - -6. The gameport structure -~~~~~~~~~~~~~~~~~~~~~~~~~ - -struct gameport { - - void *private; - -A private pointer for free use in the gameport driver. (Not the joystick -driver!) - - int number; - -Number assigned to the gameport when registered. Informational purpose only. - - int io; - -I/O address for use with raw mode. You have to either set this, or ->read() -to some value if your gameport supports raw mode. - - int speed; - -Raw mode speed of the gameport reads in thousands of reads per second. - - int fuzz; - -If the gameport supports cooked mode, this should be set to a value that -represents the amount of noise in the data. See section 3. - - void (*trigger)(struct gameport *); - -Trigger. This function should trigger the ns558 oneshots. If set to NULL, -outb(0xff, io) will be used. - - unsigned char (*read)(struct gameport *); - -Read the buttons and ns558 oneshot bits. If set to NULL, inb(io) will be -used instead. - - int (*cooked_read)(struct gameport *, int *axes, int *buttons); - -If the gameport supports cooked mode, it should point this to its cooked -read function. It should fill axes[0..3] with four values of the joystick axes -and buttons[0] with four bits representing the buttons. - - int (*calibrate)(struct gameport *, int *axes, int *max); - -Function for calibrating the ADC hardware. When called, axes[0..3] should be -pre-filled by cooked data by the caller, max[0..3] should be pre-filled with -expected maximums for each axis. The calibrate() function should set the -sensitivity of the ADC hardware so that the maximums fit in its range and -recompute the axes[] values to match the new sensitivity or re-read them from -the hardware so that they give valid values. - - int (*open)(struct gameport *, int mode); - -Open() serves two purposes. First a driver either opens the port in raw or -in cooked mode, the open() callback can decide which modes are supported. -Second, resource allocation can happen here. The port can also be enabled -here. Prior to this call, other fields of the gameport struct (namely the io -member) need not to be valid. - - void (*close)(struct gameport *); - -Close() should free the resources allocated by open, possibly disabling the -gameport. - - struct gameport_dev *dev; - struct gameport *next; - -For internal use by the gameport layer. - -}; - -Enjoy! diff --git a/Documentation/input/gpio-tilt.txt b/Documentation/input/gpio-tilt.txt deleted file mode 100644 index 2cdfd9bcb1afc1015b4f98201f6b38cdab1691b3..0000000000000000000000000000000000000000 --- a/Documentation/input/gpio-tilt.txt +++ /dev/null @@ -1,103 +0,0 @@ -Driver for tilt-switches connected via GPIOs -============================================ - -Generic driver to read data from tilt switches connected via gpios. -Orientation can be provided by one or more than one tilt switches, -i.e. each tilt switch providing one axis, and the number of axes -is also not limited. - - -Data structures: ----------------- - -The array of struct gpio in the gpios field is used to list the gpios -that represent the current tilt state. - -The array of struct gpio_tilt_axis describes the axes that are reported -to the input system. The values set therein are used for the -input_set_abs_params calls needed to init the axes. - -The array of struct gpio_tilt_state maps gpio states to the corresponding -values to report. The gpio state is represented as a bitfield where the -bit-index corresponds to the index of the gpio in the struct gpio array. -In the same manner the values stored in the axes array correspond to -the elements of the gpio_tilt_axis-array. - - -Example: --------- - -Example configuration for a single TS1003 tilt switch that rotates around -one axis in 4 steps and emits the current tilt via two GPIOs. - -static int sg060_tilt_enable(struct device *dev) { - /* code to enable the sensors */ -}; - -static void sg060_tilt_disable(struct device *dev) { - /* code to disable the sensors */ -}; - -static struct gpio sg060_tilt_gpios[] = { - { SG060_TILT_GPIO_SENSOR1, GPIOF_IN, "tilt_sensor1" }, - { SG060_TILT_GPIO_SENSOR2, GPIOF_IN, "tilt_sensor2" }, -}; - -static struct gpio_tilt_state sg060_tilt_states[] = { - { - .gpios = (0 << 1) | (0 << 0), - .axes = (int[]) { - 0, - }, - }, { - .gpios = (0 << 1) | (1 << 0), - .axes = (int[]) { - 1, /* 90 degrees */ - }, - }, { - .gpios = (1 << 1) | (1 << 0), - .axes = (int[]) { - 2, /* 180 degrees */ - }, - }, { - .gpios = (1 << 1) | (0 << 0), - .axes = (int[]) { - 3, /* 270 degrees */ - }, - }, -}; - -static struct gpio_tilt_axis sg060_tilt_axes[] = { - { - .axis = ABS_RY, - .min = 0, - .max = 3, - .fuzz = 0, - .flat = 0, - }, -}; - -static struct gpio_tilt_platform_data sg060_tilt_pdata= { - .gpios = sg060_tilt_gpios, - .nr_gpios = ARRAY_SIZE(sg060_tilt_gpios), - - .axes = sg060_tilt_axes, - .nr_axes = ARRAY_SIZE(sg060_tilt_axes), - - .states = sg060_tilt_states, - .nr_states = ARRAY_SIZE(sg060_tilt_states), - - .debounce_interval = 100, - - .poll_interval = 1000, - .enable = sg060_tilt_enable, - .disable = sg060_tilt_disable, -}; - -static struct platform_device sg060_device_tilt = { - .name = "gpio-tilt-polled", - .id = -1, - .dev = { - .platform_data = &sg060_tilt_pdata, - }, -}; diff --git a/Documentation/input/iforce-protocol.txt b/Documentation/input/iforce-protocol.txt deleted file mode 100644 index 66287151c54a99fd76cab71888ac2b720f4c489b..0000000000000000000000000000000000000000 --- a/Documentation/input/iforce-protocol.txt +++ /dev/null @@ -1,258 +0,0 @@ -** Introduction -This document describes what I managed to discover about the protocol used to -specify force effects to I-Force 2.0 devices. None of this information comes -from Immerse. That's why you should not trust what is written in this -document. This document is intended to help understanding the protocol. -This is not a reference. Comments and corrections are welcome. To contact me, -send an email to: johann.deneux@gmail.com - -** WARNING ** -I shall not be held responsible for any damage or harm caused if you try to -send data to your I-Force device based on what you read in this document. - -** Preliminary Notes: -All values are hexadecimal with big-endian encoding (msb on the left). Beware, -values inside packets are encoded using little-endian. Bytes whose roles are -unknown are marked ??? Information that needs deeper inspection is marked (?) - -** General form of a packet ** -This is how packets look when the device uses the rs232 to communicate. -2B OP LEN DATA CS -CS is the checksum. It is equal to the exclusive or of all bytes. - -When using USB: -OP DATA -The 2B, LEN and CS fields have disappeared, probably because USB handles frames and -data corruption is handled or unsignificant. - -First, I describe effects that are sent by the device to the computer - -** Device input state -This packet is used to indicate the state of each button and the value of each -axis -OP= 01 for a joystick, 03 for a wheel -LEN= Varies from device to device -00 X-Axis lsb -01 X-Axis msb -02 Y-Axis lsb, or gas pedal for a wheel -03 Y-Axis msb, or brake pedal for a wheel -04 Throttle -05 Buttons -06 Lower 4 bits: Buttons - Upper 4 bits: Hat -07 Rudder - -** Device effects states -OP= 02 -LEN= Varies -00 ? Bit 1 (Value 2) is the value of the deadman switch -01 Bit 8 is set if the effect is playing. Bits 0 to 7 are the effect id. -02 ?? -03 Address of parameter block changed (lsb) -04 Address of parameter block changed (msb) -05 Address of second parameter block changed (lsb) -... depending on the number of parameter blocks updated - -** Force effect ** -OP= 01 -LEN= 0e -00 Channel (when playing several effects at the same time, each must be assigned a channel) -01 Wave form - Val 00 Constant - Val 20 Square - Val 21 Triangle - Val 22 Sine - Val 23 Sawtooth up - Val 24 Sawtooth down - Val 40 Spring (Force = f(pos)) - Val 41 Friction (Force = f(velocity)) and Inertia (Force = f(acceleration)) - - -02 Axes affected and trigger - Bits 4-7: Val 2 = effect along one axis. Byte 05 indicates direction - Val 4 = X axis only. Byte 05 must contain 5a - Val 8 = Y axis only. Byte 05 must contain b4 - Val c = X and Y axes. Bytes 05 must contain 60 - Bits 0-3: Val 0 = No trigger - Val x+1 = Button x triggers the effect - When the whole byte is 0, cancel the previously set trigger - -03-04 Duration of effect (little endian encoding, in ms) - -05 Direction of effect, if applicable. Else, see 02 for value to assign. - -06-07 Minimum time between triggering. - -08-09 Address of periodicity or magnitude parameters -0a-0b Address of attack and fade parameters, or ffff if none. -*or* -08-09 Address of interactive parameters for X-axis, or ffff if not applicable -0a-0b Address of interactive parameters for Y-axis, or ffff if not applicable - -0c-0d Delay before execution of effect (little endian encoding, in ms) - - -** Time based parameters ** - -*** Attack and fade *** -OP= 02 -LEN= 08 -00-01 Address where to store the parameters -02-03 Duration of attack (little endian encoding, in ms) -04 Level at end of attack. Signed byte. -05-06 Duration of fade. -07 Level at end of fade. - -*** Magnitude *** -OP= 03 -LEN= 03 -00-01 Address -02 Level. Signed byte. - -*** Periodicity *** -OP= 04 -LEN= 07 -00-01 Address -02 Magnitude. Signed byte. -03 Offset. Signed byte. -04 Phase. Val 00 = 0 deg, Val 40 = 90 degs. -05-06 Period (little endian encoding, in ms) - -** Interactive parameters ** -OP= 05 -LEN= 0a -00-01 Address -02 Positive Coeff -03 Negative Coeff -04+05 Offset (center) -06+07 Dead band (Val 01F4 = 5000 (decimal)) -08 Positive saturation (Val 0a = 1000 (decimal) Val 64 = 10000 (decimal)) -09 Negative saturation - -The encoding is a bit funny here: For coeffs, these are signed values. The -maximum value is 64 (100 decimal), the min is 9c. -For the offset, the minimum value is FE0C, the maximum value is 01F4. -For the deadband, the minimum value is 0, the max is 03E8. - -** Controls ** -OP= 41 -LEN= 03 -00 Channel -01 Start/Stop - Val 00: Stop - Val 01: Start and play once. - Val 41: Start and play n times (See byte 02 below) -02 Number of iterations n. - -** Init ** - -*** Querying features *** -OP= ff -Query command. Length varies according to the query type. -The general format of this packet is: -ff 01 QUERY [INDEX] CHECKSUM -responses are of the same form: -FF LEN QUERY VALUE_QUERIED CHECKSUM2 -where LEN = 1 + length(VALUE_QUERIED) - -**** Query ram size **** -QUERY = 42 ('B'uffer size) -The device should reply with the same packet plus two additional bytes -containing the size of the memory: -ff 03 42 03 e8 CS would mean that the device has 1000 bytes of ram available. - -**** Query number of effects **** -QUERY = 4e ('N'umber of effects) -The device should respond by sending the number of effects that can be played -at the same time (one byte) -ff 02 4e 14 CS would stand for 20 effects. - -**** Vendor's id **** -QUERY = 4d ('M'anufacturer) -Query the vendors'id (2 bytes) - -**** Product id ***** -QUERY = 50 ('P'roduct) -Query the product id (2 bytes) - -**** Open device **** -QUERY = 4f ('O'pen) -No data returned. - -**** Close device ***** -QUERY = 43 ('C')lose -No data returned. - -**** Query effect **** -QUERY = 45 ('E') -Send effect type. -Returns nonzero if supported (2 bytes) - -**** Firmware Version **** -QUERY = 56 ('V'ersion) -Sends back 3 bytes - major, minor, subminor - -*** Initialisation of the device *** - -**** Set Control **** -!!! Device dependent, can be different on different models !!! -OP= 40 [] -LEN= 2 or 3 -00 Idx - Idx 00 Set dead zone (0..2048) - Idx 01 Ignore Deadman sensor (0..1) - Idx 02 Enable comm watchdog (0..1) - Idx 03 Set the strength of the spring (0..100) - Idx 04 Enable or disable the spring (0/1) - Idx 05 Set axis saturation threshold (0..2048) - -**** Set Effect State **** -OP= 42 -LEN= 1 -00 State - Bit 3 Pause force feedback - Bit 2 Enable force feedback - Bit 0 Stop all effects - -**** Set overall gain **** -OP= 43 -LEN= 1 -00 Gain - Val 00 = 0% - Val 40 = 50% - Val 80 = 100% - -** Parameter memory ** - -Each device has a certain amount of memory to store parameters of effects. -The amount of RAM may vary, I encountered values from 200 to 1000 bytes. Below -is the amount of memory apparently needed for every set of parameters: - - period : 0c - - magnitude : 02 - - attack and fade : 0e - - interactive : 08 - -** Appendix: How to study the protocol ? ** - -1. Generate effects using the force editor provided with the DirectX SDK, or -use Immersion Studio (freely available at their web site in the developer section: -www.immersion.com) -2. Start a soft spying RS232 or USB (depending on where you connected your -joystick/wheel). I used ComPortSpy from fCoder (alpha version!) -3. Play the effect, and watch what happens on the spy screen. - -A few words about ComPortSpy: -At first glance, this software seems, hum, well... buggy. In fact, data appear with a -few seconds latency. Personally, I restart it every time I play an effect. -Remember it's free (as in free beer) and alpha! - -** URLS ** -Check www.immerse.com for Immersion Studio, and www.fcoder.com for ComPortSpy. - -** Author of this document ** -Johann Deneux -Home page at http://web.archive.org/web/*/http://www.esil.univ-mrs.fr - -Additions by Vojtech Pavlik. - -I-Force is trademark of Immersion Corp. diff --git a/Documentation/input/index.rst b/Documentation/input/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..7a3e71c2bd008c864f34d681a4e3ad68b9eaaaef --- /dev/null +++ b/Documentation/input/index.rst @@ -0,0 +1,20 @@ +============================= +The Linux Input Documentation +============================= + +Contents: + +.. toctree:: + :maxdepth: 2 + :numbered: + + input_uapi + input_kapi + devices/index + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/input/input-programming.rst b/Documentation/input/input-programming.rst new file mode 100644 index 0000000000000000000000000000000000000000..45a4c6e05e39bd753622e77db62c2aa74bfffbfb --- /dev/null +++ b/Documentation/input/input-programming.rst @@ -0,0 +1,302 @@ +=============================== +Creating an input device driver +=============================== + +The simplest example +~~~~~~~~~~~~~~~~~~~~ + +Here comes a very simple example of an input device driver. The device has +just one button and the button is accessible at i/o port BUTTON_PORT. When +pressed or released a BUTTON_IRQ happens. The driver could look like:: + + #include + #include + #include + + #include + #include + + static struct input_dev *button_dev; + + static irqreturn_t button_interrupt(int irq, void *dummy) + { + input_report_key(button_dev, BTN_0, inb(BUTTON_PORT) & 1); + input_sync(button_dev); + return IRQ_HANDLED; + } + + static int __init button_init(void) + { + int error; + + if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { + printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); + return -EBUSY; + } + + button_dev = input_allocate_device(); + if (!button_dev) { + printk(KERN_ERR "button.c: Not enough memory\n"); + error = -ENOMEM; + goto err_free_irq; + } + + button_dev->evbit[0] = BIT_MASK(EV_KEY); + button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0); + + error = input_register_device(button_dev); + if (error) { + printk(KERN_ERR "button.c: Failed to register device\n"); + goto err_free_dev; + } + + return 0; + + err_free_dev: + input_free_device(button_dev); + err_free_irq: + free_irq(BUTTON_IRQ, button_interrupt); + return error; + } + + static void __exit button_exit(void) + { + input_unregister_device(button_dev); + free_irq(BUTTON_IRQ, button_interrupt); + } + + module_init(button_init); + module_exit(button_exit); + +What the example does +~~~~~~~~~~~~~~~~~~~~~ + +First it has to include the file, which interfaces to the +input subsystem. This provides all the definitions needed. + +In the _init function, which is called either upon module load or when +booting the kernel, it grabs the required resources (it should also check +for the presence of the device). + +Then it allocates a new input device structure with input_allocate_device() +and sets up input bitfields. This way the device driver tells the other +parts of the input systems what it is - what events can be generated or +accepted by this input device. Our example device can only generate EV_KEY +type events, and from those only BTN_0 event code. Thus we only set these +two bits. We could have used:: + + set_bit(EV_KEY, button_dev.evbit); + set_bit(BTN_0, button_dev.keybit); + +as well, but with more than single bits the first approach tends to be +shorter. + +Then the example driver registers the input device structure by calling:: + + input_register_device(&button_dev); + +This adds the button_dev structure to linked lists of the input driver and +calls device handler modules _connect functions to tell them a new input +device has appeared. input_register_device() may sleep and therefore must +not be called from an interrupt or with a spinlock held. + +While in use, the only used function of the driver is:: + + button_interrupt() + +which upon every interrupt from the button checks its state and reports it +via the:: + + input_report_key() + +call to the input system. There is no need to check whether the interrupt +routine isn't reporting two same value events (press, press for example) to +the input system, because the input_report_* functions check that +themselves. + +Then there is the:: + + input_sync() + +call to tell those who receive the events that we've sent a complete report. +This doesn't seem important in the one button case, but is quite important +for for example mouse movement, where you don't want the X and Y values +to be interpreted separately, because that'd result in a different movement. + +dev->open() and dev->close() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In case the driver has to repeatedly poll the device, because it doesn't +have an interrupt coming from it and the polling is too expensive to be done +all the time, or if the device uses a valuable resource (eg. interrupt), it +can use the open and close callback to know when it can stop polling or +release the interrupt and when it must resume polling or grab the interrupt +again. To do that, we would add this to our example driver:: + + static int button_open(struct input_dev *dev) + { + if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { + printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); + return -EBUSY; + } + + return 0; + } + + static void button_close(struct input_dev *dev) + { + free_irq(IRQ_AMIGA_VERTB, button_interrupt); + } + + static int __init button_init(void) + { + ... + button_dev->open = button_open; + button_dev->close = button_close; + ... + } + +Note that input core keeps track of number of users for the device and +makes sure that dev->open() is called only when the first user connects +to the device and that dev->close() is called when the very last user +disconnects. Calls to both callbacks are serialized. + +The open() callback should return a 0 in case of success or any nonzero value +in case of failure. The close() callback (which is void) must always succeed. + +Basic event types +~~~~~~~~~~~~~~~~~ + +The most simple event type is EV_KEY, which is used for keys and buttons. +It's reported to the input system via:: + + input_report_key(struct input_dev *dev, int code, int value) + +See uapi/linux/input-event-codes.h for the allowable values of code (from 0 to +KEY_MAX). Value is interpreted as a truth value, ie any nonzero value means key +pressed, zero value means key released. The input code generates events only +in case the value is different from before. + +In addition to EV_KEY, there are two more basic event types: EV_REL and +EV_ABS. They are used for relative and absolute values supplied by the +device. A relative value may be for example a mouse movement in the X axis. +The mouse reports it as a relative difference from the last position, +because it doesn't have any absolute coordinate system to work in. Absolute +events are namely for joysticks and digitizers - devices that do work in an +absolute coordinate systems. + +Having the device report EV_REL buttons is as simple as with EV_KEY, simply +set the corresponding bits and call the:: + + input_report_rel(struct input_dev *dev, int code, int value) + +function. Events are generated only for nonzero value. + +However EV_ABS requires a little special care. Before calling +input_register_device, you have to fill additional fields in the input_dev +struct for each absolute axis your device has. If our button device had also +the ABS_X axis:: + + button_dev.absmin[ABS_X] = 0; + button_dev.absmax[ABS_X] = 255; + button_dev.absfuzz[ABS_X] = 4; + button_dev.absflat[ABS_X] = 8; + +Or, you can just say:: + + input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8); + +This setting would be appropriate for a joystick X axis, with the minimum of +0, maximum of 255 (which the joystick *must* be able to reach, no problem if +it sometimes reports more, but it must be able to always reach the min and +max values), with noise in the data up to +- 4, and with a center flat +position of size 8. + +If you don't need absfuzz and absflat, you can set them to zero, which mean +that the thing is precise and always returns to exactly the center position +(if it has any). + +BITS_TO_LONGS(), BIT_WORD(), BIT_MASK() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These three macros from bitops.h help some bitfield computations:: + + BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for + x bits + BIT_WORD(x) - returns the index in the array in longs for bit x + BIT_MASK(x) - returns the index in a long for bit x + +The id* and name fields +~~~~~~~~~~~~~~~~~~~~~~~ + +The dev->name should be set before registering the input device by the input +device driver. It's a string like 'Generic button device' containing a +user friendly name of the device. + +The id* fields contain the bus ID (PCI, USB, ...), vendor ID and device ID +of the device. The bus IDs are defined in input.h. The vendor and device ids +are defined in pci_ids.h, usb_ids.h and similar include files. These fields +should be set by the input device driver before registering it. + +The idtype field can be used for specific information for the input device +driver. + +The id and name fields can be passed to userland via the evdev interface. + +The keycode, keycodemax, keycodesize fields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These three fields should be used by input devices that have dense keymaps. +The keycode is an array used to map from scancodes to input system keycodes. +The keycode max should contain the size of the array and keycodesize the +size of each entry in it (in bytes). + +Userspace can query and alter current scancode to keycode mappings using +EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface. +When a device has all 3 aforementioned fields filled in, the driver may +rely on kernel's default implementation of setting and querying keycode +mappings. + +dev->getkeycode() and dev->setkeycode() +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +getkeycode() and setkeycode() callbacks allow drivers to override default +keycode/keycodesize/keycodemax mapping mechanism provided by input core +and implement sparse keycode maps. + +Key autorepeat +~~~~~~~~~~~~~~ + +... is simple. It is handled by the input.c module. Hardware autorepeat is +not used, because it's not present in many devices and even where it is +present, it is broken sometimes (at keyboards: Toshiba notebooks). To enable +autorepeat for your device, just set EV_REP in dev->evbit. All will be +handled by the input system. + +Other event types, handling output events +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The other event types up to now are: + +- EV_LED - used for the keyboard LEDs. +- EV_SND - used for keyboard beeps. + +They are very similar to for example key events, but they go in the other +direction - from the system to the input device driver. If your input device +driver can handle these events, it has to set the respective bits in evbit, +*and* also the callback routine:: + + button_dev->event = button_event; + + int button_event(struct input_dev *dev, unsigned int type, + unsigned int code, int value) + { + if (type == EV_SND && code == SND_BELL) { + outb(value, BUTTON_BELL); + return 0; + } + return -1; + } + +This callback routine can be called from an interrupt or a BH (although that +isn't a rule), and thus must not sleep, and must not take too long to finish. diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt deleted file mode 100644 index 7f8b9d97bc47d32359150822eaa16b23acfc13be..0000000000000000000000000000000000000000 --- a/Documentation/input/input-programming.txt +++ /dev/null @@ -1,302 +0,0 @@ -Programming input drivers -~~~~~~~~~~~~~~~~~~~~~~~~~ - -1. Creating an input device driver -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -1.0 The simplest example -~~~~~~~~~~~~~~~~~~~~~~~~ - -Here comes a very simple example of an input device driver. The device has -just one button and the button is accessible at i/o port BUTTON_PORT. When -pressed or released a BUTTON_IRQ happens. The driver could look like: - -#include -#include -#include - -#include -#include - -static struct input_dev *button_dev; - -static irqreturn_t button_interrupt(int irq, void *dummy) -{ - input_report_key(button_dev, BTN_0, inb(BUTTON_PORT) & 1); - input_sync(button_dev); - return IRQ_HANDLED; -} - -static int __init button_init(void) -{ - int error; - - if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { - printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); - return -EBUSY; - } - - button_dev = input_allocate_device(); - if (!button_dev) { - printk(KERN_ERR "button.c: Not enough memory\n"); - error = -ENOMEM; - goto err_free_irq; - } - - button_dev->evbit[0] = BIT_MASK(EV_KEY); - button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0); - - error = input_register_device(button_dev); - if (error) { - printk(KERN_ERR "button.c: Failed to register device\n"); - goto err_free_dev; - } - - return 0; - - err_free_dev: - input_free_device(button_dev); - err_free_irq: - free_irq(BUTTON_IRQ, button_interrupt); - return error; -} - -static void __exit button_exit(void) -{ - input_unregister_device(button_dev); - free_irq(BUTTON_IRQ, button_interrupt); -} - -module_init(button_init); -module_exit(button_exit); - -1.1 What the example does -~~~~~~~~~~~~~~~~~~~~~~~~~ - -First it has to include the file, which interfaces to the -input subsystem. This provides all the definitions needed. - -In the _init function, which is called either upon module load or when -booting the kernel, it grabs the required resources (it should also check -for the presence of the device). - -Then it allocates a new input device structure with input_allocate_device() -and sets up input bitfields. This way the device driver tells the other -parts of the input systems what it is - what events can be generated or -accepted by this input device. Our example device can only generate EV_KEY -type events, and from those only BTN_0 event code. Thus we only set these -two bits. We could have used - - set_bit(EV_KEY, button_dev.evbit); - set_bit(BTN_0, button_dev.keybit); - -as well, but with more than single bits the first approach tends to be -shorter. - -Then the example driver registers the input device structure by calling - - input_register_device(&button_dev); - -This adds the button_dev structure to linked lists of the input driver and -calls device handler modules _connect functions to tell them a new input -device has appeared. input_register_device() may sleep and therefore must -not be called from an interrupt or with a spinlock held. - -While in use, the only used function of the driver is - - button_interrupt() - -which upon every interrupt from the button checks its state and reports it -via the - - input_report_key() - -call to the input system. There is no need to check whether the interrupt -routine isn't reporting two same value events (press, press for example) to -the input system, because the input_report_* functions check that -themselves. - -Then there is the - - input_sync() - -call to tell those who receive the events that we've sent a complete report. -This doesn't seem important in the one button case, but is quite important -for for example mouse movement, where you don't want the X and Y values -to be interpreted separately, because that'd result in a different movement. - -1.2 dev->open() and dev->close() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In case the driver has to repeatedly poll the device, because it doesn't -have an interrupt coming from it and the polling is too expensive to be done -all the time, or if the device uses a valuable resource (eg. interrupt), it -can use the open and close callback to know when it can stop polling or -release the interrupt and when it must resume polling or grab the interrupt -again. To do that, we would add this to our example driver: - -static int button_open(struct input_dev *dev) -{ - if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { - printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); - return -EBUSY; - } - - return 0; -} - -static void button_close(struct input_dev *dev) -{ - free_irq(IRQ_AMIGA_VERTB, button_interrupt); -} - -static int __init button_init(void) -{ - ... - button_dev->open = button_open; - button_dev->close = button_close; - ... -} - -Note that input core keeps track of number of users for the device and -makes sure that dev->open() is called only when the first user connects -to the device and that dev->close() is called when the very last user -disconnects. Calls to both callbacks are serialized. - -The open() callback should return a 0 in case of success or any nonzero value -in case of failure. The close() callback (which is void) must always succeed. - -1.3 Basic event types -~~~~~~~~~~~~~~~~~~~~~ - -The most simple event type is EV_KEY, which is used for keys and buttons. -It's reported to the input system via: - - input_report_key(struct input_dev *dev, int code, int value) - -See linux/input.h for the allowable values of code (from 0 to KEY_MAX). -Value is interpreted as a truth value, ie any nonzero value means key -pressed, zero value means key released. The input code generates events only -in case the value is different from before. - -In addition to EV_KEY, there are two more basic event types: EV_REL and -EV_ABS. They are used for relative and absolute values supplied by the -device. A relative value may be for example a mouse movement in the X axis. -The mouse reports it as a relative difference from the last position, -because it doesn't have any absolute coordinate system to work in. Absolute -events are namely for joysticks and digitizers - devices that do work in an -absolute coordinate systems. - -Having the device report EV_REL buttons is as simple as with EV_KEY, simply -set the corresponding bits and call the - - input_report_rel(struct input_dev *dev, int code, int value) - -function. Events are generated only for nonzero value. - -However EV_ABS requires a little special care. Before calling -input_register_device, you have to fill additional fields in the input_dev -struct for each absolute axis your device has. If our button device had also -the ABS_X axis: - - button_dev.absmin[ABS_X] = 0; - button_dev.absmax[ABS_X] = 255; - button_dev.absfuzz[ABS_X] = 4; - button_dev.absflat[ABS_X] = 8; - -Or, you can just say: - - input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8); - -This setting would be appropriate for a joystick X axis, with the minimum of -0, maximum of 255 (which the joystick *must* be able to reach, no problem if -it sometimes reports more, but it must be able to always reach the min and -max values), with noise in the data up to +- 4, and with a center flat -position of size 8. - -If you don't need absfuzz and absflat, you can set them to zero, which mean -that the thing is precise and always returns to exactly the center position -(if it has any). - -1.4 BITS_TO_LONGS(), BIT_WORD(), BIT_MASK() -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -These three macros from bitops.h help some bitfield computations: - - BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for - x bits - BIT_WORD(x) - returns the index in the array in longs for bit x - BIT_MASK(x) - returns the index in a long for bit x - -1.5 The id* and name fields -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The dev->name should be set before registering the input device by the input -device driver. It's a string like 'Generic button device' containing a -user friendly name of the device. - -The id* fields contain the bus ID (PCI, USB, ...), vendor ID and device ID -of the device. The bus IDs are defined in input.h. The vendor and device ids -are defined in pci_ids.h, usb_ids.h and similar include files. These fields -should be set by the input device driver before registering it. - -The idtype field can be used for specific information for the input device -driver. - -The id and name fields can be passed to userland via the evdev interface. - -1.6 The keycode, keycodemax, keycodesize fields -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -These three fields should be used by input devices that have dense keymaps. -The keycode is an array used to map from scancodes to input system keycodes. -The keycode max should contain the size of the array and keycodesize the -size of each entry in it (in bytes). - -Userspace can query and alter current scancode to keycode mappings using -EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface. -When a device has all 3 aforementioned fields filled in, the driver may -rely on kernel's default implementation of setting and querying keycode -mappings. - -1.7 dev->getkeycode() and dev->setkeycode() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -getkeycode() and setkeycode() callbacks allow drivers to override default -keycode/keycodesize/keycodemax mapping mechanism provided by input core -and implement sparse keycode maps. - -1.8 Key autorepeat -~~~~~~~~~~~~~~~~~~ - -... is simple. It is handled by the input.c module. Hardware autorepeat is -not used, because it's not present in many devices and even where it is -present, it is broken sometimes (at keyboards: Toshiba notebooks). To enable -autorepeat for your device, just set EV_REP in dev->evbit. All will be -handled by the input system. - -1.9 Other event types, handling output events -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The other event types up to now are: - -EV_LED - used for the keyboard LEDs. -EV_SND - used for keyboard beeps. - -They are very similar to for example key events, but they go in the other -direction - from the system to the input device driver. If your input device -driver can handle these events, it has to set the respective bits in evbit, -*and* also the callback routine: - - button_dev->event = button_event; - -int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value); -{ - if (type == EV_SND && code == SND_BELL) { - outb(value, BUTTON_BELL); - return 0; - } - return -1; -} - -This callback routine can be called from an interrupt or a BH (although that -isn't a rule), and thus must not sleep, and must not take too long to finish. diff --git a/Documentation/input/input.rst b/Documentation/input/input.rst new file mode 100644 index 0000000000000000000000000000000000000000..3b3a22975106987781eda18172728a3884c59da5 --- /dev/null +++ b/Documentation/input/input.rst @@ -0,0 +1,281 @@ +.. include:: + +============ +Introduction +============ + +:Copyright: |copy| 1999-2001 Vojtech Pavlik - Sponsored by SuSE + +Architecture +============ + +Input subsystem a collection of drivers that is designed to support +all input devices under Linux. Most of the drivers reside in +drivers/input, although quite a few live in drivers/hid and +drivers/platform. + +The core of the input subsystem is the input module, which must be +loaded before any other of the input modules - it serves as a way of +communication between two groups of modules: + +Device drivers +-------------- + +These modules talk to the hardware (for example via USB), and provide +events (keystrokes, mouse movements) to the input module. + +Event handlers +-------------- + +These modules get events from input core and pass them where needed +via various interfaces - keystrokes to the kernel, mouse movements via +a simulated PS/2 interface to GPM and X, and so on. + +Simple Usage +============ + +For the most usual configuration, with one USB mouse and one USB keyboard, +you'll have to load the following modules (or have them built in to the +kernel):: + + input + mousedev + usbcore + uhci_hcd or ohci_hcd or ehci_hcd + usbhid + hid_generic + +After this, the USB keyboard will work straight away, and the USB mouse +will be available as a character device on major 13, minor 63:: + + crw-r--r-- 1 root root 13, 63 Mar 28 22:45 mice + +This device usually created automatically by the system. The commands +to create it by hand are:: + + cd /dev + mkdir input + mknod input/mice c 13 63 + +After that you have to point GPM (the textmode mouse cut&paste tool) and +XFree to this device to use it - GPM should be called like:: + + gpm -t ps2 -m /dev/input/mice + +And in X:: + + Section "Pointer" + Protocol "ImPS/2" + Device "/dev/input/mice" + ZAxisMapping 4 5 + EndSection + +When you do all of the above, you can use your USB mouse and keyboard. + +Detailed Description +==================== + +Event handlers +-------------- + +Event handlers distribute the events from the devices to userspace and +in-kernel consumers, as needed. + +evdev +~~~~~ + +``evdev`` is the generic input event interface. It passes the events +generated in the kernel straight to the program, with timestamps. The +event codes are the same on all architectures and are hardware +independent. + +This is the preferred interface for userspace to consume user +input, and all clients are encouraged to use it. + +See :ref:`event-interface` for notes on API. + +The devices are in /dev/input:: + + crw-r--r-- 1 root root 13, 64 Apr 1 10:49 event0 + crw-r--r-- 1 root root 13, 65 Apr 1 10:50 event1 + crw-r--r-- 1 root root 13, 66 Apr 1 10:50 event2 + crw-r--r-- 1 root root 13, 67 Apr 1 10:50 event3 + ... + +There are two ranges of minors: 64 through 95 is the static legacy +range. If there are more than 32 input devices in a system, additional +evdev nodes are created with minors starting with 256. + +keyboard +~~~~~~~~ + +``keyboard`` is in-kernel input handler ad is a part of VT code. It +consumes keyboard keystrokes and handles user input for VT consoles. + +mousedev +~~~~~~~~ + +``mousedev`` is a hack to make legacy programs that use mouse input +work. It takes events from either mice or digitizers/tablets and makes +a PS/2-style (a la /dev/psaux) mouse device available to the +userland. + +Mousedev devices in /dev/input (as shown above) are:: + + crw-r--r-- 1 root root 13, 32 Mar 28 22:45 mouse0 + crw-r--r-- 1 root root 13, 33 Mar 29 00:41 mouse1 + crw-r--r-- 1 root root 13, 34 Mar 29 00:41 mouse2 + crw-r--r-- 1 root root 13, 35 Apr 1 10:50 mouse3 + ... + ... + crw-r--r-- 1 root root 13, 62 Apr 1 10:50 mouse30 + crw-r--r-- 1 root root 13, 63 Apr 1 10:50 mice + +Each ``mouse`` device is assigned to a single mouse or digitizer, except +the last one - ``mice``. This single character device is shared by all +mice and digitizers, and even if none are connected, the device is +present. This is useful for hotplugging USB mice, so that older programs +that do not handle hotplug can open the device even when no mice are +present. + +CONFIG_INPUT_MOUSEDEV_SCREEN_[XY] in the kernel configuration are +the size of your screen (in pixels) in XFree86. This is needed if you +want to use your digitizer in X, because its movement is sent to X +via a virtual PS/2 mouse and thus needs to be scaled +accordingly. These values won't be used if you use a mouse only. + +Mousedev will generate either PS/2, ImPS/2 (Microsoft IntelliMouse) or +ExplorerPS/2 (IntelliMouse Explorer) protocols, depending on what the +program reading the data wishes. You can set GPM and X to any of +these. You'll need ImPS/2 if you want to make use of a wheel on a USB +mouse and ExplorerPS/2 if you want to use extra (up to 5) buttons. + +joydev +~~~~~~ + +``joydev`` implements v0.x and v1.x Linux joystick API. See +:ref:`joystick-api` for details. + +As soon as any joystick is connected, it can be accessed in /dev/input on:: + + crw-r--r-- 1 root root 13, 0 Apr 1 10:50 js0 + crw-r--r-- 1 root root 13, 1 Apr 1 10:50 js1 + crw-r--r-- 1 root root 13, 2 Apr 1 10:50 js2 + crw-r--r-- 1 root root 13, 3 Apr 1 10:50 js3 + ... + +And so on up to js31 in legacy range, and additional nodes with minors +above 256 if there are more joystick devices. + +Device drivers +-------------- + +Device drivers are the modules that generate events. + +hid-generic +~~~~~~~~~~~ + +``hid-generic`` is one of the largest and most complex driver of the +whole suite. It handles all HID devices, and because there is a very +wide variety of them, and because the USB HID specification isn't +simple, it needs to be this big. + +Currently, it handles USB mice, joysticks, gamepads, steering wheels +keyboards, trackballs and digitizers. + +However, USB uses HID also for monitor controls, speaker controls, UPSs, +LCDs and many other purposes. + +The monitor and speaker controls should be easy to add to the hid/input +interface, but for the UPSs and LCDs it doesn't make much sense. For this, +the hiddev interface was designed. See Documentation/hid/hiddev.txt +for more information about it. + +The usage of the usbhid module is very simple, it takes no parameters, +detects everything automatically and when a HID device is inserted, it +detects it appropriately. + +However, because the devices vary wildly, you might happen to have a +device that doesn't work well. In that case #define DEBUG at the beginning +of hid-core.c and send me the syslog traces. + +usbmouse +~~~~~~~~ + +For embedded systems, for mice with broken HID descriptors and just any +other use when the big usbhid wouldn't be a good choice, there is the +usbmouse driver. It handles USB mice only. It uses a simpler HIDBP +protocol. This also means the mice must support this simpler protocol. Not +all do. If you don't have any strong reason to use this module, use usbhid +instead. + +usbkbd +~~~~~~ + +Much like usbmouse, this module talks to keyboards with a simplified +HIDBP protocol. It's smaller, but doesn't support any extra special keys. +Use usbhid instead if there isn't any special reason to use this. + +psmouse +~~~~~~~ + +This is driver for all flavors of pointing devices using PS/2 +protocol, including Synaptics and ALPS touchpads, Intellimouse +Explorer devices, Logitech PS/2 mice and so on. + +atkbd +~~~~~ + +This is driver for PS/2 (AT) keyboards. + +iforce +~~~~~~ + +A driver for I-Force joysticks and wheels, both over USB and RS232. +It includes Force Feedback support now, even though Immersion +Corp. considers the protocol a trade secret and won't disclose a word +about it. + +Verifying if it works +===================== + +Typing a couple keys on the keyboard should be enough to check that +a keyboard works and is correctly connected to the kernel keyboard +driver. + +Doing a ``cat /dev/input/mouse0`` (c, 13, 32) will verify that a mouse +is also emulated; characters should appear if you move it. + +You can test the joystick emulation with the ``jstest`` utility, +available in the joystick package (see :ref:`joystick-doc`). + +You can test the event devices with the ``evtest`` utility. + +.. _event-interface: + +Event interface +=============== + +You can use blocking and nonblocking reads, and also select() on the +/dev/input/eventX devices, and you'll always get a whole number of input +events on a read. Their layout is:: + + struct input_event { + struct timeval time; + unsigned short type; + unsigned short code; + unsigned int value; + }; + +``time`` is the timestamp, it returns the time at which the event happened. +Type is for example EV_REL for relative moment, EV_KEY for a keypress or +release. More types are defined in include/uapi/linux/input-event-codes.h. + +``code`` is event code, for example REL_X or KEY_BACKSPACE, again a complete +list is in include/uapi/linux/input-event-codes.h. + +``value`` is the value the event carries. Either a relative change for +EV_REL, absolute new value for EV_ABS (joysticks ...), or 0 for EV_KEY for +release, 1 for keypress and 2 for autorepeat. + +See :ref:`input-event-codes` for more information about various even codes. diff --git a/Documentation/input/input.txt b/Documentation/input/input.txt deleted file mode 100644 index 7ebce100fe905316590096f9e9aa2a8e5c604f97..0000000000000000000000000000000000000000 --- a/Documentation/input/input.txt +++ /dev/null @@ -1,290 +0,0 @@ - Linux Input drivers v1.0 - (c) 1999-2001 Vojtech Pavlik - Sponsored by SuSE ----------------------------------------------------------------------------- - -0. Disclaimer -~~~~~~~~~~~~~ - This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your option) -any later version. - - This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - - You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., 59 -Temple Place, Suite 330, Boston, MA 02111-1307 USA - - Should you need to contact me, the author, you can do so either by e-mail -- mail your message to , or by paper mail: Vojtech Pavlik, -Simunkova 1594, Prague 8, 182 00 Czech Republic - - For your convenience, the GNU General Public License version 2 is included -in the package: See the file COPYING. - -1. Introduction -~~~~~~~~~~~~~~~ - This is a collection of drivers that is designed to support all input -devices under Linux. While it is currently used only on for USB input -devices, future use (say 2.5/2.6) is expected to expand to replace -most of the existing input system, which is why it lives in -drivers/input/ instead of drivers/usb/. - - The centre of the input drivers is the input module, which must be -loaded before any other of the input modules - it serves as a way of -communication between two groups of modules: - -1.1 Device drivers -~~~~~~~~~~~~~~~~~~ - These modules talk to the hardware (for example via USB), and provide -events (keystrokes, mouse movements) to the input module. - -1.2 Event handlers -~~~~~~~~~~~~~~~~~~ - These modules get events from input and pass them where needed via -various interfaces - keystrokes to the kernel, mouse movements via a -simulated PS/2 interface to GPM and X and so on. - -2. Simple Usage -~~~~~~~~~~~~~~~ - For the most usual configuration, with one USB mouse and one USB keyboard, -you'll have to load the following modules (or have them built in to the -kernel): - - input - mousedev - keybdev - usbcore - uhci_hcd or ohci_hcd or ehci_hcd - usbhid - - After this, the USB keyboard will work straight away, and the USB mouse -will be available as a character device on major 13, minor 63: - - crw-r--r-- 1 root root 13, 63 Mar 28 22:45 mice - - This device has to be created. - The commands to create it by hand are: - - cd /dev - mkdir input - mknod input/mice c 13 63 - - After that you have to point GPM (the textmode mouse cut&paste tool) and -XFree to this device to use it - GPM should be called like: - - gpm -t ps2 -m /dev/input/mice - - And in X: - - Section "Pointer" - Protocol "ImPS/2" - Device "/dev/input/mice" - ZAxisMapping 4 5 - EndSection - - When you do all of the above, you can use your USB mouse and keyboard. - -3. Detailed Description -~~~~~~~~~~~~~~~~~~~~~~~ -3.1 Device drivers -~~~~~~~~~~~~~~~~~~ - Device drivers are the modules that generate events. The events are -however not useful without being handled, so you also will need to use some -of the modules from section 3.2. - -3.1.1 usbhid -~~~~~~~~~~~~ - usbhid is the largest and most complex driver of the whole suite. It -handles all HID devices, and because there is a very wide variety of them, -and because the USB HID specification isn't simple, it needs to be this big. - - Currently, it handles USB mice, joysticks, gamepads, steering wheels -keyboards, trackballs and digitizers. - - However, USB uses HID also for monitor controls, speaker controls, UPSs, -LCDs and many other purposes. - - The monitor and speaker controls should be easy to add to the hid/input -interface, but for the UPSs and LCDs it doesn't make much sense. For this, -the hiddev interface was designed. See Documentation/hid/hiddev.txt -for more information about it. - - The usage of the usbhid module is very simple, it takes no parameters, -detects everything automatically and when a HID device is inserted, it -detects it appropriately. - - However, because the devices vary wildly, you might happen to have a -device that doesn't work well. In that case #define DEBUG at the beginning -of hid-core.c and send me the syslog traces. - -3.1.2 usbmouse -~~~~~~~~~~~~~~ - For embedded systems, for mice with broken HID descriptors and just any -other use when the big usbhid wouldn't be a good choice, there is the -usbmouse driver. It handles USB mice only. It uses a simpler HIDBP -protocol. This also means the mice must support this simpler protocol. Not -all do. If you don't have any strong reason to use this module, use usbhid -instead. - -3.1.3 usbkbd -~~~~~~~~~~~~ - Much like usbmouse, this module talks to keyboards with a simplified -HIDBP protocol. It's smaller, but doesn't support any extra special keys. -Use usbhid instead if there isn't any special reason to use this. - -3.1.4 wacom -~~~~~~~~~~~ - This is a driver for Wacom Graphire and Intuos tablets. Not for Wacom -PenPartner, that one is handled by the HID driver. Although the Intuos and -Graphire tablets claim that they are HID tablets as well, they are not and -thus need this specific driver. - -3.1.5 iforce -~~~~~~~~~~~~ - A driver for I-Force joysticks and wheels, both over USB and RS232. -It includes ForceFeedback support now, even though Immersion -Corp. considers the protocol a trade secret and won't disclose a word -about it. - -3.2 Event handlers -~~~~~~~~~~~~~~~~~~ - Event handlers distribute the events from the devices to userland and -kernel, as needed. - -3.2.1 keybdev -~~~~~~~~~~~~~ - keybdev is currently a rather ugly hack that translates the input -events into architecture-specific keyboard raw mode (Xlated AT Set2 on -x86), and passes them into the handle_scancode function of the -keyboard.c module. This works well enough on all architectures that -keybdev can generate rawmode on, other architectures can be added to -it. - - The right way would be to pass the events to keyboard.c directly, -best if keyboard.c would itself be an event handler. This is done in -the input patch, available on the webpage mentioned below. - -3.2.2 mousedev -~~~~~~~~~~~~~~ - mousedev is also a hack to make programs that use mouse input -work. It takes events from either mice or digitizers/tablets and makes -a PS/2-style (a la /dev/psaux) mouse device available to the -userland. Ideally, the programs could use a more reasonable interface, -for example evdev - - Mousedev devices in /dev/input (as shown above) are: - - crw-r--r-- 1 root root 13, 32 Mar 28 22:45 mouse0 - crw-r--r-- 1 root root 13, 33 Mar 29 00:41 mouse1 - crw-r--r-- 1 root root 13, 34 Mar 29 00:41 mouse2 - crw-r--r-- 1 root root 13, 35 Apr 1 10:50 mouse3 - ... - ... - crw-r--r-- 1 root root 13, 62 Apr 1 10:50 mouse30 - crw-r--r-- 1 root root 13, 63 Apr 1 10:50 mice - -Each 'mouse' device is assigned to a single mouse or digitizer, except -the last one - 'mice'. This single character device is shared by all -mice and digitizers, and even if none are connected, the device is -present. This is useful for hotplugging USB mice, so that programs -can open the device even when no mice are present. - - CONFIG_INPUT_MOUSEDEV_SCREEN_[XY] in the kernel configuration are -the size of your screen (in pixels) in XFree86. This is needed if you -want to use your digitizer in X, because its movement is sent to X -via a virtual PS/2 mouse and thus needs to be scaled -accordingly. These values won't be used if you use a mouse only. - - Mousedev will generate either PS/2, ImPS/2 (Microsoft IntelliMouse) or -ExplorerPS/2 (IntelliMouse Explorer) protocols, depending on what the -program reading the data wishes. You can set GPM and X to any of -these. You'll need ImPS/2 if you want to make use of a wheel on a USB -mouse and ExplorerPS/2 if you want to use extra (up to 5) buttons. - -3.2.3 joydev -~~~~~~~~~~~~ - Joydev implements v0.x and v1.x Linux joystick api, much like -drivers/char/joystick/joystick.c used to in earlier versions. See -joystick-api.txt in the Documentation subdirectory for details. As -soon as any joystick is connected, it can be accessed in /dev/input -on: - - crw-r--r-- 1 root root 13, 0 Apr 1 10:50 js0 - crw-r--r-- 1 root root 13, 1 Apr 1 10:50 js1 - crw-r--r-- 1 root root 13, 2 Apr 1 10:50 js2 - crw-r--r-- 1 root root 13, 3 Apr 1 10:50 js3 - ... - -And so on up to js31. - -3.2.4 evdev -~~~~~~~~~~~ - evdev is the generic input event interface. It passes the events -generated in the kernel straight to the program, with timestamps. The -API is still evolving, but should be usable now. It's described in -section 5. - - This should be the way for GPM and X to get keyboard and mouse -events. It allows for multihead in X without any specific multihead -kernel support. The event codes are the same on all architectures and -are hardware independent. - - The devices are in /dev/input: - - crw-r--r-- 1 root root 13, 64 Apr 1 10:49 event0 - crw-r--r-- 1 root root 13, 65 Apr 1 10:50 event1 - crw-r--r-- 1 root root 13, 66 Apr 1 10:50 event2 - crw-r--r-- 1 root root 13, 67 Apr 1 10:50 event3 - ... - -And so on up to event31. - -4. Verifying if it works -~~~~~~~~~~~~~~~~~~~~~~~~ - Typing a couple keys on the keyboard should be enough to check that -a USB keyboard works and is correctly connected to the kernel keyboard -driver. - - Doing a "cat /dev/input/mouse0" (c, 13, 32) will verify that a mouse -is also emulated; characters should appear if you move it. - - You can test the joystick emulation with the 'jstest' utility, -available in the joystick package (see Documentation/input/joystick.txt). - - You can test the event devices with the 'evtest' utility available -in the LinuxConsole project CVS archive (see the URL below). - -5. Event interface -~~~~~~~~~~~~~~~~~~ - Should you want to add event device support into any application (X, gpm, -svgalib ...) I will be happy to provide you any help I -can. Here goes a description of the current state of things, which is going -to be extended, but not changed incompatibly as time goes: - - You can use blocking and nonblocking reads, also select() on the -/dev/input/eventX devices, and you'll always get a whole number of input -events on a read. Their layout is: - -struct input_event { - struct timeval time; - unsigned short type; - unsigned short code; - unsigned int value; -}; - - 'time' is the timestamp, it returns the time at which the event happened. -Type is for example EV_REL for relative moment, EV_KEY for a keypress or -release. More types are defined in include/uapi/linux/input-event-codes.h. - - 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete -list is in include/uapi/linux/input-event-codes.h. - - 'value' is the value the event carries. Either a relative change for -EV_REL, absolute new value for EV_ABS (joysticks ...), or 0 for EV_KEY for -release, 1 for keypress and 2 for autorepeat. - diff --git a/Documentation/input/input_kapi.rst b/Documentation/input/input_kapi.rst new file mode 100644 index 0000000000000000000000000000000000000000..41f1b7e6b78ea56097300339f5c39894a9403fc4 --- /dev/null +++ b/Documentation/input/input_kapi.rst @@ -0,0 +1,17 @@ +.. include:: + +################################ +Linux Input Subsystem kernel API +################################ + +.. class:: toc-title + + Table of Contents + +.. toctree:: + :maxdepth: 2 + :numbered: + + input-programming + gameport-programming + notifier diff --git a/Documentation/input/input_uapi.rst b/Documentation/input/input_uapi.rst new file mode 100644 index 0000000000000000000000000000000000000000..4a0391609327a1d4931382d438e532ce75cf3305 --- /dev/null +++ b/Documentation/input/input_uapi.rst @@ -0,0 +1,22 @@ +.. include:: + +################################### +Linux Input Subsystem userspace API +################################### + +.. class:: toc-title + + Table of Contents + +.. toctree:: + :maxdepth: 2 + :numbered: + + input + event-codes + multi-touch-protocol + gamepad + ff + joydev/index + uinput + userio diff --git a/Documentation/input/interactive.fig b/Documentation/input/interactive.fig deleted file mode 100644 index 1e7de387723a972aff917cbb4d26a29aca79c198..0000000000000000000000000000000000000000 --- a/Documentation/input/interactive.fig +++ /dev/null @@ -1,42 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 6 - 1200 3600 1800 3600 2400 4800 3000 4800 4200 5700 4800 5700 -2 2 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 5 - 1200 3150 4800 3150 4800 6300 1200 6300 1200 3150 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 1200 4800 4800 4800 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4 - 2400 4800 2400 6525 1950 7125 1950 7800 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4 - 3000 4800 3000 6525 3600 7125 3600 7800 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 1 3 - 0 0 1.00 60.00 120.00 - 3825 5400 4125 5100 5400 5100 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 1 3 - 0 0 1.00 60.00 120.00 - 2100 4200 2400 3900 5400 3900 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 4800 5700 5400 5700 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 1800 3600 5400 3600 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 1 3 - 0 0 1.00 60.00 120.00 - 2700 4800 2700 4425 5400 4425 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1950 7800 3600 7800 -4 1 0 50 0 0 12 0.0000 4 135 810 2775 7725 Dead band\001 -4 0 0 50 0 0 12 0.0000 4 180 1155 5400 5700 right saturation\001 -4 0 0 50 0 0 12 0.0000 4 135 1065 5400 3600 left saturation\001 -4 0 0 50 0 0 12 0.0000 4 180 2505 5400 3900 left coeff ( positive in that case )\001 -4 0 0 50 0 0 12 0.0000 4 180 2640 5475 5100 right coeff ( negative in that case )\001 -4 0 0 50 0 0 12 0.0000 4 105 480 5400 4425 center\001 diff --git a/Documentation/input/interactive.svg b/Documentation/input/interactive.svg new file mode 100644 index 0000000000000000000000000000000000000000..a3513709a09bb83208e00fea7ea4266b9e998562 --- /dev/null +++ b/Documentation/input/interactive.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + Dead band + right saturation + left saturation + left coeff ( positive in that case ) + right coeff ( negative in that case ) + center + diff --git a/Documentation/input/joydev/index.rst b/Documentation/input/joydev/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..8d9666c7561c8bafdfaaa0cb07db36fd85a55daa --- /dev/null +++ b/Documentation/input/joydev/index.rst @@ -0,0 +1,18 @@ +.. include:: + +====================== +Linux Joystick support +====================== + +:Copyright: |copy| 1996-2000 Vojtech Pavlik - Sponsored by SuSE + +.. class:: toc-title + + Table of Contents + +.. toctree:: + :maxdepth: 3 + :numbered: + + joystick + joystick-api diff --git a/Documentation/input/joydev/joystick-api.rst b/Documentation/input/joydev/joystick-api.rst new file mode 100644 index 0000000000000000000000000000000000000000..95803e2e8cd00e978c308e7008ebc2f2532fca99 --- /dev/null +++ b/Documentation/input/joydev/joystick-api.rst @@ -0,0 +1,348 @@ +.. _joystick-api: + +===================== +Programming Interface +===================== + +:Author: Ragnar Hojland Espinosa - 7 Aug 1998 + +Introduction +============ + +.. important:: + This document describes legacy ``js`` interface. Newer clients are + encouraged to switch to the generic event (``evdev``) interface. + +The 1.0 driver uses a new, event based approach to the joystick driver. +Instead of the user program polling for the joystick values, the joystick +driver now reports only any changes of its state. See joystick-api.txt, +joystick.h and jstest.c included in the joystick package for more +information. The joystick device can be used in either blocking or +nonblocking mode, and supports select() calls. + +For backward compatibility the old (v0.x) interface is still included. +Any call to the joystick driver using the old interface will return values +that are compatible to the old interface. This interface is still limited +to 2 axes, and applications using it usually decode only 2 buttons, although +the driver provides up to 32. + +Initialization +============== + +Open the joystick device following the usual semantics (that is, with open). +Since the driver now reports events instead of polling for changes, +immediately after the open it will issue a series of synthetic events +(JS_EVENT_INIT) that you can read to obtain the initial state of the +joystick. + +By default, the device is opened in blocking mode:: + + int fd = open ("/dev/input/js0", O_RDONLY); + + +Event Reading +============= + +:: + + struct js_event e; + read (fd, &e, sizeof(e)); + +where js_event is defined as:: + + struct js_event { + __u32 time; /* event timestamp in milliseconds */ + __s16 value; /* value */ + __u8 type; /* event type */ + __u8 number; /* axis/button number */ + }; + +If the read is successful, it will return sizeof(e), unless you wanted to read +more than one event per read as described in section 3.1. + + +js_event.type +------------- + +The possible values of ``type`` are:: + + #define JS_EVENT_BUTTON 0x01 /* button pressed/released */ + #define JS_EVENT_AXIS 0x02 /* joystick moved */ + #define JS_EVENT_INIT 0x80 /* initial state of device */ + +As mentioned above, the driver will issue synthetic JS_EVENT_INIT ORed +events on open. That is, if it's issuing a INIT BUTTON event, the +current type value will be:: + + int type = JS_EVENT_BUTTON | JS_EVENT_INIT; /* 0x81 */ + +If you choose not to differentiate between synthetic or real events +you can turn off the JS_EVENT_INIT bits:: + + type &= ~JS_EVENT_INIT; /* 0x01 */ + + +js_event.number +--------------- + +The values of ``number`` correspond to the axis or button that +generated the event. Note that they carry separate numeration (that +is, you have both an axis 0 and a button 0). Generally, + + =============== ======= + Axis number + =============== ======= + 1st Axis X 0 + 1st Axis Y 1 + 2nd Axis X 2 + 2nd Axis Y 3 + ...and so on + =============== ======= + +Hats vary from one joystick type to another. Some can be moved in 8 +directions, some only in 4, The driver, however, always reports a hat as two +independent axis, even if the hardware doesn't allow independent movement. + + +js_event.value +-------------- + +For an axis, ``value`` is a signed integer between -32767 and +32767 +representing the position of the joystick along that axis. If you +don't read a 0 when the joystick is ``dead``, or if it doesn't span the +full range, you should recalibrate it (with, for example, jscal). + +For a button, ``value`` for a press button event is 1 and for a release +button event is 0. + +Though this:: + + if (js_event.type == JS_EVENT_BUTTON) { + buttons_state ^= (1 << js_event.number); + } + +may work well if you handle JS_EVENT_INIT events separately, + +:: + + if ((js_event.type & ~JS_EVENT_INIT) == JS_EVENT_BUTTON) { + if (js_event.value) + buttons_state |= (1 << js_event.number); + else + buttons_state &= ~(1 << js_event.number); + } + +is much safer since it can't lose sync with the driver. As you would +have to write a separate handler for JS_EVENT_INIT events in the first +snippet, this ends up being shorter. + + +js_event.time +------------- + +The time an event was generated is stored in ``js_event.time``. It's a time +in milliseconds since ... well, since sometime in the past. This eases the +task of detecting double clicks, figuring out if movement of axis and button +presses happened at the same time, and similar. + + +Reading +======= + +If you open the device in blocking mode, a read will block (that is, +wait) forever until an event is generated and effectively read. There +are two alternatives if you can't afford to wait forever (which is, +admittedly, a long time;) + + a) use select to wait until there's data to be read on fd, or + until it timeouts. There's a good example on the select(2) + man page. + + b) open the device in non-blocking mode (O_NONBLOCK) + + +O_NONBLOCK +---------- + +If read returns -1 when reading in O_NONBLOCK mode, this isn't +necessarily a "real" error (check errno(3)); it can just mean there +are no events pending to be read on the driver queue. You should read +all events on the queue (that is, until you get a -1). + +For example, + +:: + + while (1) { + while (read (fd, &e, sizeof(e)) > 0) { + process_event (e); + } + /* EAGAIN is returned when the queue is empty */ + if (errno != EAGAIN) { + /* error */ + } + /* do something interesting with processed events */ + } + +One reason for emptying the queue is that if it gets full you'll start +missing events since the queue is finite, and older events will get +overwritten. + +The other reason is that you want to know all what happened, and not +delay the processing till later. + +Why can get the queue full? Because you don't empty the queue as +mentioned, or because too much time elapses from one read to another +and too many events to store in the queue get generated. Note that +high system load may contribute to space those reads even more. + +If time between reads is enough to fill the queue and lose an event, +the driver will switch to startup mode and next time you read it, +synthetic events (JS_EVENT_INIT) will be generated to inform you of +the actual state of the joystick. + + +.. note:: + + As of version 1.2.8, the queue is circular and able to hold 64 + events. You can increment this size bumping up JS_BUFF_SIZE in + joystick.h and recompiling the driver. + + +In the above code, you might as well want to read more than one event +at a time using the typical read(2) functionality. For that, you would +replace the read above with something like:: + + struct js_event mybuffer[0xff]; + int i = read (fd, mybuffer, sizeof(mybuffer)); + +In this case, read would return -1 if the queue was empty, or some +other value in which the number of events read would be i / +sizeof(js_event) Again, if the buffer was full, it's a good idea to +process the events and keep reading it until you empty the driver queue. + + +IOCTLs +====== + +The joystick driver defines the following ioctl(2) operations:: + + /* function 3rd arg */ + #define JSIOCGAXES /* get number of axes char */ + #define JSIOCGBUTTONS /* get number of buttons char */ + #define JSIOCGVERSION /* get driver version int */ + #define JSIOCGNAME(len) /* get identifier string char */ + #define JSIOCSCORR /* set correction values &js_corr */ + #define JSIOCGCORR /* get correction values &js_corr */ + +For example, to read the number of axes:: + + char number_of_axes; + ioctl (fd, JSIOCGAXES, &number_of_axes); + + +JSIOGCVERSION +------------- + +JSIOGCVERSION is a good way to check in run-time whether the running +driver is 1.0+ and supports the event interface. If it is not, the +IOCTL will fail. For a compile-time decision, you can test the +JS_VERSION symbol:: + + #ifdef JS_VERSION + #if JS_VERSION > 0xsomething + + +JSIOCGNAME +---------- + +JSIOCGNAME(len) allows you to get the name string of the joystick - the same +as is being printed at boot time. The 'len' argument is the length of the +buffer provided by the application asking for the name. It is used to avoid +possible overrun should the name be too long:: + + char name[128]; + if (ioctl(fd, JSIOCGNAME(sizeof(name)), name) < 0) + strncpy(name, "Unknown", sizeof(name)); + printf("Name: %s\n", name); + + +JSIOC[SG]CORR +------------- + +For usage on JSIOC[SG]CORR I suggest you to look into jscal.c They are +not needed in a normal program, only in joystick calibration software +such as jscal or kcmjoy. These IOCTLs and data types aren't considered +to be in the stable part of the API, and therefore may change without +warning in following releases of the driver. + +Both JSIOCSCORR and JSIOCGCORR expect &js_corr to be able to hold +information for all axis. That is, struct js_corr corr[MAX_AXIS]; + +struct js_corr is defined as:: + + struct js_corr { + __s32 coef[8]; + __u16 prec; + __u16 type; + }; + +and ``type``:: + + #define JS_CORR_NONE 0x00 /* returns raw values */ + #define JS_CORR_BROKEN 0x01 /* broken line */ + + +Backward compatibility +====================== + +The 0.x joystick driver API is quite limited and its usage is deprecated. +The driver offers backward compatibility, though. Here's a quick summary:: + + struct JS_DATA_TYPE js; + while (1) { + if (read (fd, &js, JS_RETURN) != JS_RETURN) { + /* error */ + } + usleep (1000); + } + +As you can figure out from the example, the read returns immediately, +with the actual state of the joystick:: + + struct JS_DATA_TYPE { + int buttons; /* immediate button state */ + int x; /* immediate x axis value */ + int y; /* immediate y axis value */ + }; + +and JS_RETURN is defined as:: + + #define JS_RETURN sizeof(struct JS_DATA_TYPE) + +To test the state of the buttons, + +:: + + first_button_state = js.buttons & 1; + second_button_state = js.buttons & 2; + +The axis values do not have a defined range in the original 0.x driver, +except for that the values are non-negative. The 1.2.8+ drivers use a +fixed range for reporting the values, 1 being the minimum, 128 the +center, and 255 maximum value. + +The v0.8.0.2 driver also had an interface for 'digital joysticks', (now +called Multisystem joysticks in this driver), under /dev/djsX. This driver +doesn't try to be compatible with that interface. + + +Final Notes +=========== + +:: + + ____/| Comments, additions, and specially corrections are welcome. + \ o.O| Documentation valid for at least version 1.2.8 of the joystick + =(_)= driver and as usual, the ultimate source for documentation is + U to "Use The Source Luke" or, at your convenience, Vojtech ;) diff --git a/Documentation/input/joydev/joystick.rst b/Documentation/input/joydev/joystick.rst new file mode 100644 index 0000000000000000000000000000000000000000..9746fd76cc581914ba315c2bf81c7f829ac6954d --- /dev/null +++ b/Documentation/input/joydev/joystick.rst @@ -0,0 +1,585 @@ +.. include:: + +.. _joystick-doc: + +Introduction +============ + +The joystick driver for Linux provides support for a variety of joysticks +and similar devices. It is based on a larger project aiming to support all +input devices in Linux. + +The mailing list for the project is: + + linux-input@vger.kernel.org + +send "subscribe linux-input" to majordomo@vger.kernel.org to subscribe to it. + +Usage +===== + +For basic usage you just choose the right options in kernel config and +you should be set. + +Utilities +--------- + +For testing and other purposes (for example serial devices), there is a set +of utilities, such as ``jstest``, ``jscal``, and ``evtest``, +usually packaged as ``joystick``, ``input-utils``, ``evtest``, and so on. + +``inputattach`` utility is required if your joystick is connected to a +serial port. + +Device nodes +------------ + +For applications to be able to use the joysticks, device nodes should be +created in /dev. Normally it is done automatically by the system, but +it can also be done by hand:: + + cd /dev + rm js* + mkdir input + mknod input/js0 c 13 0 + mknod input/js1 c 13 1 + mknod input/js2 c 13 2 + mknod input/js3 c 13 3 + ln -s input/js0 js0 + ln -s input/js1 js1 + ln -s input/js2 js2 + ln -s input/js3 js3 + +For testing with inpututils it's also convenient to create these:: + + mknod input/event0 c 13 64 + mknod input/event1 c 13 65 + mknod input/event2 c 13 66 + mknod input/event3 c 13 67 + +Modules needed +-------------- + +For all joystick drivers to function, you'll need the userland interface +module in kernel, either loaded or compiled in:: + + modprobe joydev + +For gameport joysticks, you'll have to load the gameport driver as well:: + + modprobe ns558 + +And for serial port joysticks, you'll need the serial input line +discipline module loaded and the inputattach utility started:: + + modprobe serport + inputattach -xxx /dev/tts/X & + +In addition to that, you'll need the joystick driver module itself, most +usually you'll have an analog joystick:: + + modprobe analog + +For automatic module loading, something like this might work - tailor to +your needs:: + + alias tty-ldisc-2 serport + alias char-major-13 input + above input joydev ns558 analog + options analog map=gamepad,none,2btn + +Verifying that it works +----------------------- + +For testing the joystick driver functionality, there is the jstest +program in the utilities package. You run it by typing:: + + jstest /dev/input/js0 + +And it should show a line with the joystick values, which update as you +move the stick, and press its buttons. The axes should all be zero when the +joystick is in the center position. They should not jitter by themselves to +other close values, and they also should be steady in any other position of +the stick. They should have the full range from -32767 to 32767. If all this +is met, then it's all fine, and you can play the games. :) + +If it's not, then there might be a problem. Try to calibrate the joystick, +and if it still doesn't work, read the drivers section of this file, the +troubleshooting section, and the FAQ. + +Calibration +----------- + +For most joysticks you won't need any manual calibration, since the +joystick should be autocalibrated by the driver automagically. However, with +some analog joysticks, that either do not use linear resistors, or if you +want better precision, you can use the jscal program:: + + jscal -c /dev/input/js0 + +included in the joystick package to set better correction coefficients than +what the driver would choose itself. + +After calibrating the joystick you can verify if you like the new +calibration using the jstest command, and if you do, you then can save the +correction coefficients into a file:: + + jscal -p /dev/input/js0 > /etc/joystick.cal + +And add a line to your rc script executing that file:: + + source /etc/joystick.cal + +This way, after the next reboot your joystick will remain calibrated. You +can also add the ``jscal -p`` line to your shutdown script. + +HW specific driver information +============================== + +In this section each of the separate hardware specific drivers is described. + +Analog joysticks +---------------- + +The analog.c uses the standard analog inputs of the gameport, and thus +supports all standard joysticks and gamepads. It uses a very advanced +routine for this, allowing for data precision that can't be found on any +other system. + +It also supports extensions like additional hats and buttons compatible +with CH Flightstick Pro, ThrustMaster FCS or 6 and 8 button gamepads. Saitek +Cyborg 'digital' joysticks are also supported by this driver, because +they're basically souped up CHF sticks. + +However the only types that can be autodetected are: + +* 2-axis, 4-button joystick +* 3-axis, 4-button joystick +* 4-axis, 4-button joystick +* Saitek Cyborg 'digital' joysticks + +For other joystick types (more/less axes, hats, and buttons) support +you'll need to specify the types either on the kernel command line or on the +module command line, when inserting analog into the kernel. The +parameters are:: + + analog.map=,,,.... + +'type' is type of the joystick from the table below, defining joysticks +present on gameports in the system, starting with gameport0, second 'type' +entry defining joystick on gameport1 and so on. + + ========= ===================================================== + Type Meaning + ========= ===================================================== + none No analog joystick on that port + auto Autodetect joystick + 2btn 2-button n-axis joystick + y-joy Two 2-button 2-axis joysticks on an Y-cable + y-pad Two 2-button 2-axis gamepads on an Y-cable + fcs Thrustmaster FCS compatible joystick + chf Joystick with a CH Flightstick compatible hat + fullchf CH Flightstick compatible with two hats and 6 buttons + gamepad 4/6-button n-axis gamepad + gamepad8 8-button 2-axis gamepad + ========= ===================================================== + +In case your joystick doesn't fit in any of the above categories, you can +specify the type as a number by combining the bits in the table below. This +is not recommended unless you really know what are you doing. It's not +dangerous, but not simple either. + + ==== ========================= + Bit Meaning + ==== ========================= + 0 Axis X1 + 1 Axis Y1 + 2 Axis X2 + 3 Axis Y2 + 4 Button A + 5 Button B + 6 Button C + 7 Button D + 8 CHF Buttons X and Y + 9 CHF Hat 1 + 10 CHF Hat 2 + 11 FCS Hat + 12 Pad Button X + 13 Pad Button Y + 14 Pad Button U + 15 Pad Button V + 16 Saitek F1-F4 Buttons + 17 Saitek Digital Mode + 19 GamePad + 20 Joy2 Axis X1 + 21 Joy2 Axis Y1 + 22 Joy2 Axis X2 + 23 Joy2 Axis Y2 + 24 Joy2 Button A + 25 Joy2 Button B + 26 Joy2 Button C + 27 Joy2 Button D + 31 Joy2 GamePad + ==== ========================= + +Microsoft SideWinder joysticks +------------------------------ + +Microsoft 'Digital Overdrive' protocol is supported by the sidewinder.c +module. All currently supported joysticks: + +* Microsoft SideWinder 3D Pro +* Microsoft SideWinder Force Feedback Pro +* Microsoft SideWinder Force Feedback Wheel +* Microsoft SideWinder FreeStyle Pro +* Microsoft SideWinder GamePad (up to four, chained) +* Microsoft SideWinder Precision Pro +* Microsoft SideWinder Precision Pro USB + +are autodetected, and thus no module parameters are needed. + +There is one caveat with the 3D Pro. There are 9 buttons reported, +although the joystick has only 8. The 9th button is the mode switch on the +rear side of the joystick. However, moving it, you'll reset the joystick, +and make it unresponsive for about a one third of a second. Furthermore, the +joystick will also re-center itself, taking the position it was in during +this time as a new center position. Use it if you want, but think first. + +The SideWinder Standard is not a digital joystick, and thus is supported +by the analog driver described above. + +Logitech ADI devices +-------------------- + +Logitech ADI protocol is supported by the adi.c module. It should support +any Logitech device using this protocol. This includes, but is not limited +to: + +* Logitech CyberMan 2 +* Logitech ThunderPad Digital +* Logitech WingMan Extreme Digital +* Logitech WingMan Formula +* Logitech WingMan Interceptor +* Logitech WingMan GamePad +* Logitech WingMan GamePad USB +* Logitech WingMan GamePad Extreme +* Logitech WingMan Extreme Digital 3D + +ADI devices are autodetected, and the driver supports up to two (any +combination of) devices on a single gameport, using an Y-cable or chained +together. + +Logitech WingMan Joystick, Logitech WingMan Attack, Logitech WingMan +Extreme and Logitech WingMan ThunderPad are not digital joysticks and are +handled by the analog driver described above. Logitech WingMan Warrior and +Logitech Magellan are supported by serial drivers described below. Logitech +WingMan Force and Logitech WingMan Formula Force are supported by the +I-Force driver described below. Logitech CyberMan is not supported yet. + +Gravis GrIP +----------- + +Gravis GrIP protocol is supported by the grip.c module. It currently +supports: + +* Gravis GamePad Pro +* Gravis BlackHawk Digital +* Gravis Xterminator +* Gravis Xterminator DualControl + +All these devices are autodetected, and you can even use any combination +of up to two of these pads either chained together or using an Y-cable on a +single gameport. + +GrIP MultiPort isn't supported yet. Gravis Stinger is a serial device and is +supported by the stinger driver. Other Gravis joysticks are supported by the +analog driver. + +FPGaming A3D and MadCatz A3D +---------------------------- + +The Assassin 3D protocol created by FPGaming, is used both by FPGaming +themselves and is licensed to MadCatz. A3D devices are supported by the +a3d.c module. It currently supports: + +* FPGaming Assassin 3D +* MadCatz Panther +* MadCatz Panther XL + +All these devices are autodetected. Because the Assassin 3D and the Panther +allow connecting analog joysticks to them, you'll need to load the analog +driver as well to handle the attached joysticks. + +The trackball should work with USB mousedev module as a normal mouse. See +the USB documentation for how to setup an USB mouse. + +ThrustMaster DirectConnect (BSP) +-------------------------------- + +The TM DirectConnect (BSP) protocol is supported by the tmdc.c +module. This includes, but is not limited to: + +* ThrustMaster Millennium 3D Interceptor +* ThrustMaster 3D Rage Pad +* ThrustMaster Fusion Digital Game Pad + +Devices not directly supported, but hopefully working are: + +* ThrustMaster FragMaster +* ThrustMaster Attack Throttle + +If you have one of these, contact me. + +TMDC devices are autodetected, and thus no parameters to the module +are needed. Up to two TMDC devices can be connected to one gameport, using +an Y-cable. + +Creative Labs Blaster +--------------------- + +The Blaster protocol is supported by the cobra.c module. It supports only +the: + +* Creative Blaster GamePad Cobra + +Up to two of these can be used on a single gameport, using an Y-cable. + +Genius Digital joysticks +------------------------ + +The Genius digitally communicating joysticks are supported by the gf2k.c +module. This includes: + +* Genius Flight2000 F-23 joystick +* Genius Flight2000 F-31 joystick +* Genius G-09D gamepad + +Other Genius digital joysticks are not supported yet, but support can be +added fairly easily. + +InterAct Digital joysticks +-------------------------- + +The InterAct digitally communicating joysticks are supported by the +interact.c module. This includes: + +* InterAct HammerHead/FX gamepad +* InterAct ProPad8 gamepad + +Other InterAct digital joysticks are not supported yet, but support can be +added fairly easily. + +PDPI Lightning 4 gamecards +-------------------------- + +PDPI Lightning 4 gamecards are supported by the lightning.c module. +Once the module is loaded, the analog driver can be used to handle the +joysticks. Digitally communicating joystick will work only on port 0, while +using Y-cables, you can connect up to 8 analog joysticks to a single L4 +card, 16 in case you have two in your system. + +Trident 4DWave / Aureal Vortex +------------------------------ + +Soundcards with a Trident 4DWave DX/NX or Aureal Vortex/Vortex2 chipsets +provide an "Enhanced Game Port" mode where the soundcard handles polling the +joystick. This mode is supported by the pcigame.c module. Once loaded the +analog driver can use the enhanced features of these gameports.. + +Crystal SoundFusion +------------------- + +Soundcards with Crystal SoundFusion chipsets provide an "Enhanced Game +Port", much like the 4DWave or Vortex above. This, and also the normal mode +for the port of the SoundFusion is supported by the cs461x.c module. + +SoundBlaster Live! +------------------ + +The Live! has a special PCI gameport, which, although it doesn't provide +any "Enhanced" stuff like 4DWave and friends, is quite a bit faster than +its ISA counterparts. It also requires special support, hence the +emu10k1-gp.c module for it instead of the normal ns558.c one. + +SoundBlaster 64 and 128 - ES1370 and ES1371, ESS Solo1 and S3 SonicVibes +------------------------------------------------------------------------ + +These PCI soundcards have specific gameports. They are handled by the +sound drivers themselves. Make sure you select gameport support in the +joystick menu and sound card support in the sound menu for your appropriate +card. + +Amiga +----- + +Amiga joysticks, connected to an Amiga, are supported by the amijoy.c +driver. Since they can't be autodetected, the driver has a command line: + + amijoy.map=, + +a and b define the joysticks connected to the JOY0DAT and JOY1DAT ports of +the Amiga. + + ====== =========================== + Value Joystick type + ====== =========================== + 0 None + 1 1-button digital joystick + ====== =========================== + +No more joystick types are supported now, but that should change in the +future if I get an Amiga in the reach of my fingers. + +Game console and 8-bit pads and joysticks +----------------------------------------- + +These pads and joysticks are not designed for PCs and other computers +Linux runs on, and usually require a special connector for attaching +them through a parallel port. + +See :ref:`joystick-parport` for more info. + +SpaceTec/LabTec devices +----------------------- + +SpaceTec serial devices communicate using the SpaceWare protocol. It is +supported by the spaceorb.c and spaceball.c drivers. The devices currently +supported by spaceorb.c are: + +* SpaceTec SpaceBall Avenger +* SpaceTec SpaceOrb 360 + +Devices currently supported by spaceball.c are: + +* SpaceTec SpaceBall 4000 FLX + +In addition to having the spaceorb/spaceball and serport modules in the +kernel, you also need to attach a serial port to it. to do that, run the +inputattach program:: + + inputattach --spaceorb /dev/tts/x & + +or:: + + inputattach --spaceball /dev/tts/x & + +where /dev/tts/x is the serial port which the device is connected to. After +doing this, the device will be reported and will start working. + +There is one caveat with the SpaceOrb. The button #6, the on the bottom +side of the orb, although reported as an ordinary button, causes internal +recentering of the spaceorb, moving the zero point to the position in which +the ball is at the moment of pressing the button. So, think first before +you bind it to some other function. + +SpaceTec SpaceBall 2003 FLX and 3003 FLX are not supported yet. + +Logitech SWIFT devices +---------------------- + +The SWIFT serial protocol is supported by the warrior.c module. It +currently supports only the: + +* Logitech WingMan Warrior + +but in the future, Logitech CyberMan (the original one, not CM2) could be +supported as well. To use the module, you need to run inputattach after you +insert/compile the module into your kernel:: + + inputattach --warrior /dev/tts/x & + +/dev/tts/x is the serial port your Warrior is attached to. + +Magellan / Space Mouse +---------------------- + +The Magellan (or Space Mouse), manufactured by LogiCad3d (formerly Space +Systems), for many other companies (Logitech, HP, ...) is supported by the +joy-magellan module. It currently supports only the: + +* Magellan 3D +* Space Mouse + +models, the additional buttons on the 'Plus' versions are not supported yet. + +To use it, you need to attach the serial port to the driver using the:: + + inputattach --magellan /dev/tts/x & + +command. After that the Magellan will be detected, initialized, will beep, +and the /dev/input/jsX device should become usable. + +I-Force devices +--------------- + +All I-Force devices are supported by the iforce module. This includes: + +* AVB Mag Turbo Force +* AVB Top Shot Pegasus +* AVB Top Shot Force Feedback Racing Wheel +* Logitech WingMan Force +* Logitech WingMan Force Wheel +* Guillemot Race Leader Force Feedback +* Guillemot Force Feedback Racing Wheel +* Thrustmaster Motor Sport GT + +To use it, you need to attach the serial port to the driver using the:: + + inputattach --iforce /dev/tts/x & + +command. After that the I-Force device will be detected, and the +/dev/input/jsX device should become usable. + +In case you're using the device via the USB port, the inputattach command +isn't needed. + +The I-Force driver now supports force feedback via the event interface. + +Please note that Logitech WingMan 3D devices are _not_ supported by this +module, rather by hid. Force feedback is not supported for those devices. +Logitech gamepads are also hid devices. + +Gravis Stinger gamepad +---------------------- + +The Gravis Stinger serial port gamepad, designed for use with laptop +computers, is supported by the stinger.c module. To use it, attach the +serial port to the driver using:: + + inputattach --stinger /dev/tty/x & + +where x is the number of the serial port. + +Troubleshooting +=============== + +There is quite a high probability that you run into some problems. For +testing whether the driver works, if in doubt, use the jstest utility in +some of its modes. The most useful modes are "normal" - for the 1.x +interface, and "old" for the "0.x" interface. You run it by typing:: + + jstest --normal /dev/input/js0 + jstest --old /dev/input/js0 + +Additionally you can do a test with the evtest utility:: + + evtest /dev/input/event0 + +Oh, and read the FAQ! :) + +FAQ +=== + +:Q: Running 'jstest /dev/input/js0' results in "File not found" error. What's the + cause? +:A: The device files don't exist. Create them (see section 2.2). + +:Q: Is it possible to connect my old Atari/Commodore/Amiga/console joystick + or pad that uses a 9-pin D-type cannon connector to the serial port of my + PC? +:A: Yes, it is possible, but it'll burn your serial port or the pad. It + won't work, of course. + +:Q: My joystick doesn't work with Quake / Quake 2. What's the cause? +:A: Quake / Quake 2 don't support joystick. Use joy2key to simulate keypresses + for them. diff --git a/Documentation/input/joystick-api.txt b/Documentation/input/joystick-api.txt deleted file mode 100644 index 943b18eac91827d62b10ab79dceb519bb895effd..0000000000000000000000000000000000000000 --- a/Documentation/input/joystick-api.txt +++ /dev/null @@ -1,314 +0,0 @@ - Joystick API Documentation -*-Text-*- - - Ragnar Hojland Espinosa - - - 7 Aug 1998 - -1. Initialization -~~~~~~~~~~~~~~~~~ - -Open the joystick device following the usual semantics (that is, with open). -Since the driver now reports events instead of polling for changes, -immediately after the open it will issue a series of synthetic events -(JS_EVENT_INIT) that you can read to check the initial state of the -joystick. - -By default, the device is opened in blocking mode. - - int fd = open ("/dev/input/js0", O_RDONLY); - - -2. Event Reading -~~~~~~~~~~~~~~~~ - - struct js_event e; - read (fd, &e, sizeof(e)); - -where js_event is defined as - - struct js_event { - __u32 time; /* event timestamp in milliseconds */ - __s16 value; /* value */ - __u8 type; /* event type */ - __u8 number; /* axis/button number */ - }; - -If the read is successful, it will return sizeof(e), unless you wanted to read -more than one event per read as described in section 3.1. - - -2.1 js_event.type -~~~~~~~~~~~~~~~~~ - -The possible values of ``type'' are - - #define JS_EVENT_BUTTON 0x01 /* button pressed/released */ - #define JS_EVENT_AXIS 0x02 /* joystick moved */ - #define JS_EVENT_INIT 0x80 /* initial state of device */ - -As mentioned above, the driver will issue synthetic JS_EVENT_INIT ORed -events on open. That is, if it's issuing a INIT BUTTON event, the -current type value will be - - int type = JS_EVENT_BUTTON | JS_EVENT_INIT; /* 0x81 */ - -If you choose not to differentiate between synthetic or real events -you can turn off the JS_EVENT_INIT bits - - type &= ~JS_EVENT_INIT; /* 0x01 */ - - -2.2 js_event.number -~~~~~~~~~~~~~~~~~~~ - -The values of ``number'' correspond to the axis or button that -generated the event. Note that they carry separate numeration (that -is, you have both an axis 0 and a button 0). Generally, - - number - 1st Axis X 0 - 1st Axis Y 1 - 2nd Axis X 2 - 2nd Axis Y 3 - ...and so on - -Hats vary from one joystick type to another. Some can be moved in 8 -directions, some only in 4, The driver, however, always reports a hat as two -independent axis, even if the hardware doesn't allow independent movement. - - -2.3 js_event.value -~~~~~~~~~~~~~~~~~~ - -For an axis, ``value'' is a signed integer between -32767 and +32767 -representing the position of the joystick along that axis. If you -don't read a 0 when the joystick is `dead', or if it doesn't span the -full range, you should recalibrate it (with, for example, jscal). - -For a button, ``value'' for a press button event is 1 and for a release -button event is 0. - -Though this - - if (js_event.type == JS_EVENT_BUTTON) { - buttons_state ^= (1 << js_event.number); - } - -may work well if you handle JS_EVENT_INIT events separately, - - if ((js_event.type & ~JS_EVENT_INIT) == JS_EVENT_BUTTON) { - if (js_event.value) - buttons_state |= (1 << js_event.number); - else - buttons_state &= ~(1 << js_event.number); - } - -is much safer since it can't lose sync with the driver. As you would -have to write a separate handler for JS_EVENT_INIT events in the first -snippet, this ends up being shorter. - - -2.4 js_event.time -~~~~~~~~~~~~~~~~~ - -The time an event was generated is stored in ``js_event.time''. It's a time -in milliseconds since ... well, since sometime in the past. This eases the -task of detecting double clicks, figuring out if movement of axis and button -presses happened at the same time, and similar. - - -3. Reading -~~~~~~~~~~ - -If you open the device in blocking mode, a read will block (that is, -wait) forever until an event is generated and effectively read. There -are two alternatives if you can't afford to wait forever (which is, -admittedly, a long time;) - - a) use select to wait until there's data to be read on fd, or - until it timeouts. There's a good example on the select(2) - man page. - - b) open the device in non-blocking mode (O_NONBLOCK) - - -3.1 O_NONBLOCK -~~~~~~~~~~~~~~ - -If read returns -1 when reading in O_NONBLOCK mode, this isn't -necessarily a "real" error (check errno(3)); it can just mean there -are no events pending to be read on the driver queue. You should read -all events on the queue (that is, until you get a -1). - -For example, - - while (1) { - while (read (fd, &e, sizeof(e)) > 0) { - process_event (e); - } - /* EAGAIN is returned when the queue is empty */ - if (errno != EAGAIN) { - /* error */ - } - /* do something interesting with processed events */ - } - -One reason for emptying the queue is that if it gets full you'll start -missing events since the queue is finite, and older events will get -overwritten. - -The other reason is that you want to know all what happened, and not -delay the processing till later. - -Why can get the queue full? Because you don't empty the queue as -mentioned, or because too much time elapses from one read to another -and too many events to store in the queue get generated. Note that -high system load may contribute to space those reads even more. - -If time between reads is enough to fill the queue and lose an event, -the driver will switch to startup mode and next time you read it, -synthetic events (JS_EVENT_INIT) will be generated to inform you of -the actual state of the joystick. - -[As for version 1.2.8, the queue is circular and able to hold 64 - events. You can increment this size bumping up JS_BUFF_SIZE in - joystick.h and recompiling the driver.] - - -In the above code, you might as well want to read more than one event -at a time using the typical read(2) functionality. For that, you would -replace the read above with something like - - struct js_event mybuffer[0xff]; - int i = read (fd, mybuffer, sizeof(mybuffer)); - -In this case, read would return -1 if the queue was empty, or some -other value in which the number of events read would be i / -sizeof(js_event) Again, if the buffer was full, it's a good idea to -process the events and keep reading it until you empty the driver queue. - - -4. IOCTLs -~~~~~~~~~ - -The joystick driver defines the following ioctl(2) operations. - - /* function 3rd arg */ - #define JSIOCGAXES /* get number of axes char */ - #define JSIOCGBUTTONS /* get number of buttons char */ - #define JSIOCGVERSION /* get driver version int */ - #define JSIOCGNAME(len) /* get identifier string char */ - #define JSIOCSCORR /* set correction values &js_corr */ - #define JSIOCGCORR /* get correction values &js_corr */ - -For example, to read the number of axes - - char number_of_axes; - ioctl (fd, JSIOCGAXES, &number_of_axes); - - -4.1 JSIOGCVERSION -~~~~~~~~~~~~~~~~~ - -JSIOGCVERSION is a good way to check in run-time whether the running -driver is 1.0+ and supports the event interface. If it is not, the -IOCTL will fail. For a compile-time decision, you can test the -JS_VERSION symbol - - #ifdef JS_VERSION - #if JS_VERSION > 0xsomething - - -4.2 JSIOCGNAME -~~~~~~~~~~~~~~ - -JSIOCGNAME(len) allows you to get the name string of the joystick - the same -as is being printed at boot time. The 'len' argument is the length of the -buffer provided by the application asking for the name. It is used to avoid -possible overrun should the name be too long. - - char name[128]; - if (ioctl(fd, JSIOCGNAME(sizeof(name)), name) < 0) - strncpy(name, "Unknown", sizeof(name)); - printf("Name: %s\n", name); - - -4.3 JSIOC[SG]CORR -~~~~~~~~~~~~~~~~~ - -For usage on JSIOC[SG]CORR I suggest you to look into jscal.c They are -not needed in a normal program, only in joystick calibration software -such as jscal or kcmjoy. These IOCTLs and data types aren't considered -to be in the stable part of the API, and therefore may change without -warning in following releases of the driver. - -Both JSIOCSCORR and JSIOCGCORR expect &js_corr to be able to hold -information for all axis. That is, struct js_corr corr[MAX_AXIS]; - -struct js_corr is defined as - - struct js_corr { - __s32 coef[8]; - __u16 prec; - __u16 type; - }; - -and ``type'' - - #define JS_CORR_NONE 0x00 /* returns raw values */ - #define JS_CORR_BROKEN 0x01 /* broken line */ - - -5. Backward compatibility -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The 0.x joystick driver API is quite limited and its usage is deprecated. -The driver offers backward compatibility, though. Here's a quick summary: - - struct JS_DATA_TYPE js; - while (1) { - if (read (fd, &js, JS_RETURN) != JS_RETURN) { - /* error */ - } - usleep (1000); - } - -As you can figure out from the example, the read returns immediately, -with the actual state of the joystick. - - struct JS_DATA_TYPE { - int buttons; /* immediate button state */ - int x; /* immediate x axis value */ - int y; /* immediate y axis value */ - }; - -and JS_RETURN is defined as - - #define JS_RETURN sizeof(struct JS_DATA_TYPE) - -To test the state of the buttons, - - first_button_state = js.buttons & 1; - second_button_state = js.buttons & 2; - -The axis values do not have a defined range in the original 0.x driver, -except for that the values are non-negative. The 1.2.8+ drivers use a -fixed range for reporting the values, 1 being the minimum, 128 the -center, and 255 maximum value. - -The v0.8.0.2 driver also had an interface for 'digital joysticks', (now -called Multisystem joysticks in this driver), under /dev/djsX. This driver -doesn't try to be compatible with that interface. - - -6. Final Notes -~~~~~~~~~~~~~~ - -____/| Comments, additions, and specially corrections are welcome. -\ o.O| Documentation valid for at least version 1.2.8 of the joystick - =(_)= driver and as usual, the ultimate source for documentation is - U to "Use The Source Luke" or, at your convenience, Vojtech ;) - - - Ragnar -EOF diff --git a/Documentation/input/joystick-parport.txt b/Documentation/input/joystick-parport.txt deleted file mode 100644 index 56870c70a7965a1efbe2c628e8acd27d3664cc52..0000000000000000000000000000000000000000 --- a/Documentation/input/joystick-parport.txt +++ /dev/null @@ -1,542 +0,0 @@ - Linux Joystick parport drivers v2.0 - (c) 1998-2000 Vojtech Pavlik - (c) 1998 Andree Borrmann - Sponsored by SuSE ----------------------------------------------------------------------------- - -0. Disclaimer -~~~~~~~~~~~~~ - Any information in this file is provided as-is, without any guarantee that -it will be true. So, use it at your own risk. The possible damages that can -happen include burning your parallel port, and/or the sticks and joystick -and maybe even more. Like when a lightning kills you it is not our problem. - -1. Intro -~~~~~~~~ - The joystick parport drivers are used for joysticks and gamepads not -originally designed for PCs and other computers Linux runs on. Because of -that, PCs usually lack the right ports to connect these devices to. Parallel -port, because of its ability to change single bits at will, and providing -both output and input bits is the most suitable port on the PC for -connecting such devices. - -2. Devices supported -~~~~~~~~~~~~~~~~~~~~ - Many console and 8-bit computer gamepads and joysticks are supported. The -following subsections discuss usage of each. - -2.1 NES and SNES -~~~~~~~~~~~~~~~~ - The Nintendo Entertainment System and Super Nintendo Entertainment System -gamepads are widely available, and easy to get. Also, they are quite easy to -connect to a PC, and don't need much processing speed (108 us for NES and -165 us for SNES, compared to about 1000 us for PC gamepads) to communicate -with them. - - All NES and SNES use the same synchronous serial protocol, clocked from -the computer's side (and thus timing insensitive). To allow up to 5 NES -and/or SNES gamepads and/or SNES mice connected to the parallel port at once, -the output lines of the parallel port are shared, while one of 5 available -input lines is assigned to each gamepad. - - This protocol is handled by the gamecon.c driver, so that's the one -you'll use for NES, SNES gamepads and SNES mice. - - The main problem with PC parallel ports is that they don't have +5V power -source on any of their pins. So, if you want a reliable source of power -for your pads, use either keyboard or joystick port, and make a pass-through -cable. You can also pull the power directly from the power supply (the red -wire is +5V). - - If you want to use the parallel port only, you can take the power is from -some data pin. For most gamepad and parport implementations only one pin is -needed, and I'd recommend pin 9 for that, the highest data bit. On the other -hand, if you are not planning to use anything else than NES / SNES on the -port, anything between and including pin 4 and pin 9 will work. - -(pin 9) -----> Power - - Unfortunately, there are pads that need a lot more of power, and parallel -ports that can't give much current through the data pins. If this is your -case, you'll need to use diodes (as a prevention of destroying your parallel -port), and combine the currents of two or more data bits together. - - Diodes -(pin 9) ----|>|-------+------> Power - | -(pin 8) ----|>|-------+ - | -(pin 7) ----|>|-------+ - | - : - | -(pin 4) ----|>|-------+ - - Ground is quite easy. On PC's parallel port the ground is on any of the -pins from pin 18 to pin 25. So use any pin of these you like for the ground. - -(pin 18) -----> Ground - - NES and SNES pads have two input bits, Clock and Latch, which drive the -serial transfer. These are connected to pins 2 and 3 of the parallel port, -respectively. - -(pin 2) -----> Clock -(pin 3) -----> Latch - - And the last thing is the NES / SNES data wire. Only that isn't shared and -each pad needs its own data pin. The parallel port pins are: - -(pin 10) -----> Pad 1 data -(pin 11) -----> Pad 2 data -(pin 12) -----> Pad 3 data -(pin 13) -----> Pad 4 data -(pin 15) -----> Pad 5 data - - Note that pin 14 is not used, since it is not an input pin on the parallel -port. - - This is everything you need on the PC's side of the connection, now on to -the gamepads side. The NES and SNES have different connectors. Also, there -are quite a lot of NES clones, and because Nintendo used proprietary -connectors for their machines, the cloners couldn't and used standard D-Cannon -connectors. Anyway, if you've got a gamepad, and it has buttons A, B, Turbo -A, Turbo B, Select and Start, and is connected through 5 wires, then it is -either a NES or NES clone and will work with this connection. SNES gamepads -also use 5 wires, but have more buttons. They will work as well, of course. - -Pinout for NES gamepads Pinout for SNES gamepads and mice - - +----> Power +-----------------------\ - | 7 | o o o o | x x o | 1 - 5 +---------+ 7 +-----------------------/ - | x x o \ | | | | | - | o o o o | | | | | +-> Ground - 4 +------------+ 1 | | | +------------> Data - | | | | | | +---------------> Latch - | | | +-> Ground | +------------------> Clock - | | +----> Clock +---------------------> Power - | +-------> Latch - +----------> Data - -Pinout for NES clone (db9) gamepads Pinout for NES clone (db15) gamepads - - +---------> Clock +-----------------> Data - | +-------> Latch | +---> Ground - | | +-----> Data | | - | | | ___________________ - _____________ 8 \ o x x x x x x o / 1 - 5 \ x o o o x / 1 \ o x x o x x o / - \ x o x o / 15 `~~~~~~~~~~~~~' 9 - 9 `~~~~~~~' 6 | | | - | | | | +----> Clock - | +----> Power | +----------> Latch - +--------> Ground +----------------> Power - -2.2 Multisystem joysticks -~~~~~~~~~~~~~~~~~~~~~~~~~ - In the era of 8-bit machines, there was something like de-facto standard -for joystick ports. They were all digital, and all used D-Cannon 9 pin -connectors (db9). Because of that, a single joystick could be used without -hassle on Atari (130, 800XE, 800XL, 2600, 7200), Amiga, Commodore C64, -Amstrad CPC, Sinclair ZX Spectrum and many other machines. That's why these -joysticks are called "Multisystem". - - Now their pinout: - - +---------> Right - | +-------> Left - | | +-----> Down - | | | +---> Up - | | | | - _____________ -5 \ x o o o o / 1 - \ x o x o / - 9 `~~~~~~~' 6 - | | - | +----> Button - +--------> Ground - - However, as time passed, extensions to this standard developed, and these -were not compatible with each other: - - - Atari 130, 800/XL/XE MSX - - +-----------> Power - +---------> Right | +---------> Right - | +-------> Left | | +-------> Left - | | +-----> Down | | | +-----> Down - | | | +---> Up | | | | +---> Up - | | | | | | | | | - _____________ _____________ -5 \ x o o o o / 1 5 \ o o o o o / 1 - \ x o o o / \ o o o o / - 9 `~~~~~~~' 6 9 `~~~~~~~' 6 - | | | | | | | - | | +----> Button | | | +----> Button 1 - | +------> Power | | +------> Button 2 - +--------> Ground | +--------> Output 3 - +----------> Ground - - Amstrad CPC Commodore C64 - - +-----------> Analog Y - +---------> Right | +---------> Right - | +-------> Left | | +-------> Left - | | +-----> Down | | | +-----> Down - | | | +---> Up | | | | +---> Up - | | | | | | | | | - _____________ _____________ -5 \ x o o o o / 1 5 \ o o o o o / 1 - \ x o o o / \ o o o o / - 9 `~~~~~~~' 6 9 `~~~~~~~' 6 - | | | | | | | - | | +----> Button 1 | | | +----> Button - | +------> Button 2 | | +------> Power - +--------> Ground | +--------> Ground - +----------> Analog X - - Sinclair Spectrum +2A/+3 Amiga 1200 - - +-----------> Up +-----------> Button 3 - | +---------> Fire | +---------> Right - | | | | +-------> Left - | | +-----> Ground | | | +-----> Down - | | | | | | | +---> Up - | | | | | | | | - _____________ _____________ -5 \ o o x o x / 1 5 \ o o o o o / 1 - \ o o o o / \ o o o o / - 9 `~~~~~~~' 6 9 `~~~~~~~' 6 - | | | | | | | | - | | | +----> Right | | | +----> Button 1 - | | +------> Left | | +------> Power - | +--------> Ground | +--------> Ground - +----------> Down +----------> Button 2 - - And there were many others. - -2.2.1 Multisystem joysticks using db9.c -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - For the Multisystem joysticks, and their derivatives, the db9.c driver -was written. It allows only one joystick / gamepad per parallel port, but -the interface is easy to build and works with almost anything. - - For the basic 1-button Multisystem joystick you connect its wires to the -parallel port like this: - -(pin 1) -----> Power -(pin 18) -----> Ground - -(pin 2) -----> Up -(pin 3) -----> Down -(pin 4) -----> Left -(pin 5) -----> Right -(pin 6) -----> Button 1 - - However, if the joystick is switch based (eg. clicks when you move it), -you might or might not, depending on your parallel port, need 10 kOhm pullup -resistors on each of the direction and button signals, like this: - -(pin 2) ------------+------> Up - Resistor | -(pin 1) --[10kOhm]--+ - - Try without, and if it doesn't work, add them. For TTL based joysticks / -gamepads the pullups are not needed. - - For joysticks with two buttons you connect the second button to pin 7 on -the parallel port. - -(pin 7) -----> Button 2 - - And that's it. - - On a side note, if you have already built a different adapter for use with -the digital joystick driver 0.8.0.2, this is also supported by the db9.c -driver, as device type 8. (See section 3.2) - -2.2.2 Multisystem joysticks using gamecon.c -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - For some people just one joystick per parallel port is not enough, and/or -want to use them on one parallel port together with NES/SNES/PSX pads. This is -possible using the gamecon.c. It supports up to 5 devices of the above types, -including 1 and 2 buttons Multisystem joysticks. - - However, there is nothing for free. To allow more sticks to be used at -once, you need the sticks to be purely switch based (that is non-TTL), and -not to need power. Just a plain simple six switches inside. If your -joystick can do more (eg. turbofire) you'll need to disable it totally first -if you want to use gamecon.c. - - Also, the connection is a bit more complex. You'll need a bunch of diodes, -and one pullup resistor. First, you connect the Directions and the button -the same as for db9, however with the diodes between. - - Diodes -(pin 2) -----|<|----> Up -(pin 3) -----|<|----> Down -(pin 4) -----|<|----> Left -(pin 5) -----|<|----> Right -(pin 6) -----|<|----> Button 1 - - For two button sticks you also connect the other button. - -(pin 7) -----|<|----> Button 2 - - And finally, you connect the Ground wire of the joystick, like done in -this little schematic to Power and Data on the parallel port, as described -for the NES / SNES pads in section 2.1 of this file - that is, one data pin -for each joystick. The power source is shared. - -Data ------------+-----> Ground - Resistor | -Power --[10kOhm]--+ - - And that's all, here we go! - -2.2.3 Multisystem joysticks using turbografx.c -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The TurboGraFX interface, designed by - - Steffen Schwenke - - allows up to 7 Multisystem joysticks connected to the parallel port. In -Steffen's version, there is support for up to 5 buttons per joystick. However, -since this doesn't work reliably on all parallel ports, the turbografx.c driver -supports only one button per joystick. For more information on how to build the -interface, see - - http://www2.burg-halle.de/~schwenke/parport.html - -2.3 Sony Playstation -~~~~~~~~~~~~~~~~~~~~ - - The PSX controller is supported by the gamecon.c. Pinout of the PSX -controller (compatible with DirectPadPro): - - +---------+---------+---------+ -9 | o o o | o o o | o o o | 1 parallel - \________|_________|________/ port pins - | | | | | | - | | | | | +--------> Clock --- (4) - | | | | +------------> Select --- (3) - | | | +---------------> Power --- (5-9) - | | +------------------> Ground --- (18-25) - | +-------------------------> Command --- (2) - +----------------------------> Data --- (one of 10,11,12,13,15) - - The driver supports these controllers: - - * Standard PSX Pad - * NegCon PSX Pad - * Analog PSX Pad (red mode) - * Analog PSX Pad (green mode) - * PSX Rumble Pad - * PSX DDR Pad - -2.4 Sega -~~~~~~~~ - All the Sega controllers are more or less based on the standard 2-button -Multisystem joystick. However, since they don't use switches and use TTL -logic, the only driver usable with them is the db9.c driver. - -2.4.1 Sega Master System -~~~~~~~~~~~~~~~~~~~~~~~~ - The SMS gamepads are almost exactly the same as normal 2-button -Multisystem joysticks. Set the driver to Multi2 mode, use the corresponding -parallel port pins, and the following schematic: - - +-----------> Power - | +---------> Right - | | +-------> Left - | | | +-----> Down - | | | | +---> Up - | | | | | - _____________ -5 \ o o o o o / 1 - \ o o x o / - 9 `~~~~~~~' 6 - | | | - | | +----> Button 1 - | +--------> Ground - +----------> Button 2 - -2.4.2 Sega Genesis aka MegaDrive -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The Sega Genesis (in Europe sold as Sega MegaDrive) pads are an extension -to the Sega Master System pads. They use more buttons (3+1, 5+1, 6+1). Use -the following schematic: - - +-----------> Power - | +---------> Right - | | +-------> Left - | | | +-----> Down - | | | | +---> Up - | | | | | - _____________ -5 \ o o o o o / 1 - \ o o o o / - 9 `~~~~~~~' 6 - | | | | - | | | +----> Button 1 - | | +------> Select - | +--------> Ground - +----------> Button 2 - - The Select pin goes to pin 14 on the parallel port. - -(pin 14) -----> Select - - The rest is the same as for Multi2 joysticks using db9.c - -2.4.3 Sega Saturn -~~~~~~~~~~~~~~~~~ - Sega Saturn has eight buttons, and to transfer that, without hacks like -Genesis 6 pads use, it needs one more select pin. Anyway, it is still -handled by the db9.c driver. Its pinout is very different from anything -else. Use this schematic: - - +-----------> Select 1 - | +---------> Power - | | +-------> Up - | | | +-----> Down - | | | | +---> Ground - | | | | | - _____________ -5 \ o o o o o / 1 - \ o o o o / - 9 `~~~~~~~' 6 - | | | | - | | | +----> Select 2 - | | +------> Right - | +--------> Left - +----------> Power - - Select 1 is pin 14 on the parallel port, Select 2 is pin 16 on the -parallel port. - -(pin 14) -----> Select 1 -(pin 16) -----> Select 2 - - The other pins (Up, Down, Right, Left, Power, Ground) are the same as for -Multi joysticks using db9.c - -3. The drivers -~~~~~~~~~~~~~~ - There are three drivers for the parallel port interfaces. Each, as -described above, allows to connect a different group of joysticks and pads. -Here are described their command lines: - -3.1 gamecon.c -~~~~~~~~~~~~~ - Using gamecon.c you can connect up to five devices to one parallel port. It -uses the following kernel/module command line: - - gamecon.map=port,pad1,pad2,pad3,pad4,pad5 - - Where 'port' the number of the parport interface (eg. 0 for parport0). - - And 'pad1' to 'pad5' are pad types connected to different data input pins -(10,11,12,13,15), as described in section 2.1 of this file. - - The types are: - - Type | Joystick/Pad - -------------------- - 0 | None - 1 | SNES pad - 2 | NES pad - 4 | Multisystem 1-button joystick - 5 | Multisystem 2-button joystick - 6 | N64 pad - 7 | Sony PSX controller - 8 | Sony PSX DDR controller - 9 | SNES mouse - - The exact type of the PSX controller type is autoprobed when used, so -hot swapping should work (but is not recommended). - - Should you want to use more than one of parallel ports at once, you can use -gamecon.map2 and gamecon.map3 as additional command line parameters for two -more parallel ports. - - There are two options specific to PSX driver portion. gamecon.psx_delay sets -the command delay when talking to the controllers. The default of 25 should -work but you can try lowering it for better performance. If your pads don't -respond try raising it until they work. Setting the type to 8 allows the -driver to be used with Dance Dance Revolution or similar games. Arrow keys are -registered as key presses instead of X and Y axes. - -3.2 db9.c -~~~~~~~~~ - Apart from making an interface, there is nothing difficult on using the -db9.c driver. It uses the following kernel/module command line: - - db9.dev=port,type - - Where 'port' is the number of the parport interface (eg. 0 for parport0). - - Caveat here: This driver only works on bidirectional parallel ports. If -your parallel port is recent enough, you should have no trouble with this. -Old parallel ports may not have this feature. - - 'Type' is the type of joystick or pad attached: - - Type | Joystick/Pad - -------------------- - 0 | None - 1 | Multisystem 1-button joystick - 2 | Multisystem 2-button joystick - 3 | Genesis pad (3+1 buttons) - 5 | Genesis pad (5+1 buttons) - 6 | Genesis pad (6+2 buttons) - 7 | Saturn pad (8 buttons) - 8 | Multisystem 1-button joystick (v0.8.0.2 pin-out) - 9 | Two Multisystem 1-button joysticks (v0.8.0.2 pin-out) - 10 | Amiga CD32 pad - - Should you want to use more than one of these joysticks/pads at once, you -can use db9.dev2 and db9.dev3 as additional command line parameters for two -more joysticks/pads. - -3.3 turbografx.c -~~~~~~~~~~~~~~~~ - The turbografx.c driver uses a very simple kernel/module command line: - - turbografx.map=port,js1,js2,js3,js4,js5,js6,js7 - - Where 'port' is the number of the parport interface (eg. 0 for parport0). - - 'jsX' is the number of buttons the Multisystem joysticks connected to the -interface ports 1-7 have. For a standard multisystem joystick, this is 1. - - Should you want to use more than one of these interfaces at once, you can -use turbografx.map2 and turbografx.map3 as additional command line parameters -for two more interfaces. - -3.4 PC parallel port pinout -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - .----------------------------------------. - At the PC: \ 13 12 11 10 9 8 7 6 5 4 3 2 1 / - \ 25 24 23 22 21 20 19 18 17 16 15 14 / - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Pin | Name | Description - ~~~~~~|~~~~~~~~~|~~~~~~~~~~ - 1 | /STROBE | Strobe - 2-9 | D0-D7 | Data Bit 0-7 - 10 | /ACK | Acknowledge - 11 | BUSY | Busy - 12 | PE | Paper End - 13 | SELIN | Select In - 14 | /AUTOFD | Autofeed - 15 | /ERROR | Error - 16 | /INIT | Initialize - 17 | /SEL | Select - 18-25 | GND | Signal Ground - -3.5 End -~~~~~~~ - That's all, folks! Have fun! diff --git a/Documentation/input/joystick.txt b/Documentation/input/joystick.txt deleted file mode 100644 index 8d027dc86c1f001efa3576ab4b41363aea031af8..0000000000000000000000000000000000000000 --- a/Documentation/input/joystick.txt +++ /dev/null @@ -1,586 +0,0 @@ - Linux Joystick driver v2.0.0 - (c) 1996-2000 Vojtech Pavlik - Sponsored by SuSE ----------------------------------------------------------------------------- - -0. Disclaimer -~~~~~~~~~~~~~ - This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your option) -any later version. - - This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - - You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., 59 -Temple Place, Suite 330, Boston, MA 02111-1307 USA - - Should you need to contact me, the author, you can do so either by e-mail -- mail your message to , or by paper mail: Vojtech Pavlik, -Simunkova 1594, Prague 8, 182 00 Czech Republic - - For your convenience, the GNU General Public License version 2 is included -in the package: See the file COPYING. - -1. Intro -~~~~~~~~ - The joystick driver for Linux provides support for a variety of joysticks -and similar devices. It is based on a larger project aiming to support all -input devices in Linux. - - Should you encounter any problems while using the driver, or joysticks -this driver can't make complete use of, I'm very interested in hearing about -them. Bug reports and success stories are also welcome. - - The input project website is at: - - http://atrey.karlin.mff.cuni.cz/~vojtech/input/ - - There is also a mailing list for the driver at: - - listproc@atrey.karlin.mff.cuni.cz - -send "subscribe linux-joystick Your Name" to subscribe to it. - -2. Usage -~~~~~~~~ - For basic usage you just choose the right options in kernel config and -you should be set. - -2.1 inpututils -~~~~~~~~~~~~~~ -For testing and other purposes (for example serial devices), a set of -utilities is available at the abovementioned website. I suggest you download -and install it before going on. - -2.2 Device nodes -~~~~~~~~~~~~~~~~ -For applications to be able to use the joysticks, -you'll have to manually create these nodes in /dev: - -cd /dev -rm js* -mkdir input -mknod input/js0 c 13 0 -mknod input/js1 c 13 1 -mknod input/js2 c 13 2 -mknod input/js3 c 13 3 -ln -s input/js0 js0 -ln -s input/js1 js1 -ln -s input/js2 js2 -ln -s input/js3 js3 - -For testing with inpututils it's also convenient to create these: - -mknod input/event0 c 13 64 -mknod input/event1 c 13 65 -mknod input/event2 c 13 66 -mknod input/event3 c 13 67 - -2.4 Modules needed -~~~~~~~~~~~~~~~~~~ - For all joystick drivers to function, you'll need the userland interface -module in kernel, either loaded or compiled in: - - modprobe joydev - - For gameport joysticks, you'll have to load the gameport driver as well; - - modprobe ns558 - - And for serial port joysticks, you'll need the serial input line -discipline module loaded and the inputattach utility started: - - modprobe serport - inputattach -xxx /dev/tts/X & - - In addition to that, you'll need the joystick driver module itself, most -usually you'll have an analog joystick: - - modprobe analog - - For automatic module loading, something like this might work - tailor to -your needs: - - alias tty-ldisc-2 serport - alias char-major-13 input - above input joydev ns558 analog - options analog map=gamepad,none,2btn - -2.5 Verifying that it works -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - For testing the joystick driver functionality, there is the jstest -program in the utilities package. You run it by typing: - - jstest /dev/input/js0 - - And it should show a line with the joystick values, which update as you -move the stick, and press its buttons. The axes should all be zero when the -joystick is in the center position. They should not jitter by themselves to -other close values, and they also should be steady in any other position of -the stick. They should have the full range from -32767 to 32767. If all this -is met, then it's all fine, and you can play the games. :) - - If it's not, then there might be a problem. Try to calibrate the joystick, -and if it still doesn't work, read the drivers section of this file, the -troubleshooting section, and the FAQ. - -2.6. Calibration -~~~~~~~~~~~~~~~~ - For most joysticks you won't need any manual calibration, since the -joystick should be autocalibrated by the driver automagically. However, with -some analog joysticks, that either do not use linear resistors, or if you -want better precision, you can use the jscal program - - jscal -c /dev/input/js0 - - included in the joystick package to set better correction coefficients than -what the driver would choose itself. - - After calibrating the joystick you can verify if you like the new -calibration using the jstest command, and if you do, you then can save the -correction coefficients into a file - - jscal -p /dev/input/js0 > /etc/joystick.cal - - And add a line to your rc script executing that file - - source /etc/joystick.cal - - This way, after the next reboot your joystick will remain calibrated. You -can also add the jscal -p line to your shutdown script. - - -3. HW specific driver information -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In this section each of the separate hardware specific drivers is described. - -3.1 Analog joysticks -~~~~~~~~~~~~~~~~~~~~ - The analog.c uses the standard analog inputs of the gameport, and thus -supports all standard joysticks and gamepads. It uses a very advanced -routine for this, allowing for data precision that can't be found on any -other system. - - It also supports extensions like additional hats and buttons compatible -with CH Flightstick Pro, ThrustMaster FCS or 6 and 8 button gamepads. Saitek -Cyborg 'digital' joysticks are also supported by this driver, because -they're basically souped up CHF sticks. - - However the only types that can be autodetected are: - -* 2-axis, 4-button joystick -* 3-axis, 4-button joystick -* 4-axis, 4-button joystick -* Saitek Cyborg 'digital' joysticks - - For other joystick types (more/less axes, hats, and buttons) support -you'll need to specify the types either on the kernel command line or on the -module command line, when inserting analog into the kernel. The -parameters are: - - analog.map=,,,.... - - 'type' is type of the joystick from the table below, defining joysticks -present on gameports in the system, starting with gameport0, second 'type' -entry defining joystick on gameport1 and so on. - - Type | Meaning - ----------------------------------- - none | No analog joystick on that port - auto | Autodetect joystick - 2btn | 2-button n-axis joystick - y-joy | Two 2-button 2-axis joysticks on an Y-cable - y-pad | Two 2-button 2-axis gamepads on an Y-cable - fcs | Thrustmaster FCS compatible joystick - chf | Joystick with a CH Flightstick compatible hat - fullchf | CH Flightstick compatible with two hats and 6 buttons - gamepad | 4/6-button n-axis gamepad - gamepad8 | 8-button 2-axis gamepad - - In case your joystick doesn't fit in any of the above categories, you can -specify the type as a number by combining the bits in the table below. This -is not recommended unless you really know what are you doing. It's not -dangerous, but not simple either. - - Bit | Meaning - -------------------------- - 0 | Axis X1 - 1 | Axis Y1 - 2 | Axis X2 - 3 | Axis Y2 - 4 | Button A - 5 | Button B - 6 | Button C - 7 | Button D - 8 | CHF Buttons X and Y - 9 | CHF Hat 1 - 10 | CHF Hat 2 - 11 | FCS Hat - 12 | Pad Button X - 13 | Pad Button Y - 14 | Pad Button U - 15 | Pad Button V - 16 | Saitek F1-F4 Buttons - 17 | Saitek Digital Mode - 19 | GamePad - 20 | Joy2 Axis X1 - 21 | Joy2 Axis Y1 - 22 | Joy2 Axis X2 - 23 | Joy2 Axis Y2 - 24 | Joy2 Button A - 25 | Joy2 Button B - 26 | Joy2 Button C - 27 | Joy2 Button D - 31 | Joy2 GamePad - -3.2 Microsoft SideWinder joysticks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Microsoft 'Digital Overdrive' protocol is supported by the sidewinder.c -module. All currently supported joysticks: - -* Microsoft SideWinder 3D Pro -* Microsoft SideWinder Force Feedback Pro -* Microsoft SideWinder Force Feedback Wheel -* Microsoft SideWinder FreeStyle Pro -* Microsoft SideWinder GamePad (up to four, chained) -* Microsoft SideWinder Precision Pro -* Microsoft SideWinder Precision Pro USB - - are autodetected, and thus no module parameters are needed. - - There is one caveat with the 3D Pro. There are 9 buttons reported, -although the joystick has only 8. The 9th button is the mode switch on the -rear side of the joystick. However, moving it, you'll reset the joystick, -and make it unresponsive for about a one third of a second. Furthermore, the -joystick will also re-center itself, taking the position it was in during -this time as a new center position. Use it if you want, but think first. - - The SideWinder Standard is not a digital joystick, and thus is supported -by the analog driver described above. - -3.3 Logitech ADI devices -~~~~~~~~~~~~~~~~~~~~~~~~ - Logitech ADI protocol is supported by the adi.c module. It should support -any Logitech device using this protocol. This includes, but is not limited -to: - -* Logitech CyberMan 2 -* Logitech ThunderPad Digital -* Logitech WingMan Extreme Digital -* Logitech WingMan Formula -* Logitech WingMan Interceptor -* Logitech WingMan GamePad -* Logitech WingMan GamePad USB -* Logitech WingMan GamePad Extreme -* Logitech WingMan Extreme Digital 3D - - ADI devices are autodetected, and the driver supports up to two (any -combination of) devices on a single gameport, using an Y-cable or chained -together. - - Logitech WingMan Joystick, Logitech WingMan Attack, Logitech WingMan -Extreme and Logitech WingMan ThunderPad are not digital joysticks and are -handled by the analog driver described above. Logitech WingMan Warrior and -Logitech Magellan are supported by serial drivers described below. Logitech -WingMan Force and Logitech WingMan Formula Force are supported by the -I-Force driver described below. Logitech CyberMan is not supported yet. - -3.4 Gravis GrIP -~~~~~~~~~~~~~~~ - Gravis GrIP protocol is supported by the grip.c module. It currently -supports: - -* Gravis GamePad Pro -* Gravis BlackHawk Digital -* Gravis Xterminator -* Gravis Xterminator DualControl - - All these devices are autodetected, and you can even use any combination -of up to two of these pads either chained together or using an Y-cable on a -single gameport. - -GrIP MultiPort isn't supported yet. Gravis Stinger is a serial device and is -supported by the stinger driver. Other Gravis joysticks are supported by the -analog driver. - -3.5 FPGaming A3D and MadCatz A3D -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The Assassin 3D protocol created by FPGaming, is used both by FPGaming -themselves and is licensed to MadCatz. A3D devices are supported by the -a3d.c module. It currently supports: - -* FPGaming Assassin 3D -* MadCatz Panther -* MadCatz Panther XL - - All these devices are autodetected. Because the Assassin 3D and the Panther -allow connecting analog joysticks to them, you'll need to load the analog -driver as well to handle the attached joysticks. - - The trackball should work with USB mousedev module as a normal mouse. See -the USB documentation for how to setup an USB mouse. - -3.6 ThrustMaster DirectConnect (BSP) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The TM DirectConnect (BSP) protocol is supported by the tmdc.c -module. This includes, but is not limited to: - -* ThrustMaster Millennium 3D Interceptor -* ThrustMaster 3D Rage Pad -* ThrustMaster Fusion Digital Game Pad - - Devices not directly supported, but hopefully working are: - -* ThrustMaster FragMaster -* ThrustMaster Attack Throttle - - If you have one of these, contact me. - - TMDC devices are autodetected, and thus no parameters to the module -are needed. Up to two TMDC devices can be connected to one gameport, using -an Y-cable. - -3.7 Creative Labs Blaster -~~~~~~~~~~~~~~~~~~~~~~~~~ - The Blaster protocol is supported by the cobra.c module. It supports only -the: - -* Creative Blaster GamePad Cobra - - Up to two of these can be used on a single gameport, using an Y-cable. - -3.8 Genius Digital joysticks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The Genius digitally communicating joysticks are supported by the gf2k.c -module. This includes: - -* Genius Flight2000 F-23 joystick -* Genius Flight2000 F-31 joystick -* Genius G-09D gamepad - - Other Genius digital joysticks are not supported yet, but support can be -added fairly easily. - -3.9 InterAct Digital joysticks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The InterAct digitally communicating joysticks are supported by the -interact.c module. This includes: - -* InterAct HammerHead/FX gamepad -* InterAct ProPad8 gamepad - - Other InterAct digital joysticks are not supported yet, but support can be -added fairly easily. - -3.10 PDPI Lightning 4 gamecards -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PDPI Lightning 4 gamecards are supported by the lightning.c module. -Once the module is loaded, the analog driver can be used to handle the -joysticks. Digitally communicating joystick will work only on port 0, while -using Y-cables, you can connect up to 8 analog joysticks to a single L4 -card, 16 in case you have two in your system. - -3.11 Trident 4DWave / Aureal Vortex -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Soundcards with a Trident 4DWave DX/NX or Aureal Vortex/Vortex2 chipsets -provide an "Enhanced Game Port" mode where the soundcard handles polling the -joystick. This mode is supported by the pcigame.c module. Once loaded the -analog driver can use the enhanced features of these gameports.. - -3.13 Crystal SoundFusion -~~~~~~~~~~~~~~~~~~~~~~~~ - Soundcards with Crystal SoundFusion chipsets provide an "Enhanced Game -Port", much like the 4DWave or Vortex above. This, and also the normal mode -for the port of the SoundFusion is supported by the cs461x.c module. - -3.14 SoundBlaster Live! -~~~~~~~~~~~~~~~~~~~~~~~~ - The Live! has a special PCI gameport, which, although it doesn't provide -any "Enhanced" stuff like 4DWave and friends, is quite a bit faster than -its ISA counterparts. It also requires special support, hence the -emu10k1-gp.c module for it instead of the normal ns558.c one. - -3.15 SoundBlaster 64 and 128 - ES1370 and ES1371, ESS Solo1 and S3 SonicVibes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - These PCI soundcards have specific gameports. They are handled by the -sound drivers themselves. Make sure you select gameport support in the -joystick menu and sound card support in the sound menu for your appropriate -card. - -3.16 Amiga -~~~~~~~~~~ - Amiga joysticks, connected to an Amiga, are supported by the amijoy.c -driver. Since they can't be autodetected, the driver has a command line. - - amijoy.map=, - - a and b define the joysticks connected to the JOY0DAT and JOY1DAT ports of -the Amiga. - - Value | Joystick type - --------------------- - 0 | None - 1 | 1-button digital joystick - - No more joystick types are supported now, but that should change in the -future if I get an Amiga in the reach of my fingers. - -3.17 Game console and 8-bit pads and joysticks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -See joystick-parport.txt for more info. - -3.18 SpaceTec/LabTec devices -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - SpaceTec serial devices communicate using the SpaceWare protocol. It is -supported by the spaceorb.c and spaceball.c drivers. The devices currently -supported by spaceorb.c are: - -* SpaceTec SpaceBall Avenger -* SpaceTec SpaceOrb 360 - -Devices currently supported by spaceball.c are: - -* SpaceTec SpaceBall 4000 FLX - - In addition to having the spaceorb/spaceball and serport modules in the -kernel, you also need to attach a serial port to it. to do that, run the -inputattach program: - - inputattach --spaceorb /dev/tts/x & -or - inputattach --spaceball /dev/tts/x & - -where /dev/tts/x is the serial port which the device is connected to. After -doing this, the device will be reported and will start working. - - There is one caveat with the SpaceOrb. The button #6, the on the bottom -side of the orb, although reported as an ordinary button, causes internal -recentering of the spaceorb, moving the zero point to the position in which -the ball is at the moment of pressing the button. So, think first before -you bind it to some other function. - -SpaceTec SpaceBall 2003 FLX and 3003 FLX are not supported yet. - -3.19 Logitech SWIFT devices -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The SWIFT serial protocol is supported by the warrior.c module. It -currently supports only the: - -* Logitech WingMan Warrior - -but in the future, Logitech CyberMan (the original one, not CM2) could be -supported as well. To use the module, you need to run inputattach after you -insert/compile the module into your kernel: - - inputattach --warrior /dev/tts/x & - -/dev/tts/x is the serial port your Warrior is attached to. - -3.20 Magellan / Space Mouse -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The Magellan (or Space Mouse), manufactured by LogiCad3d (formerly Space -Systems), for many other companies (Logitech, HP, ...) is supported by the -joy-magellan module. It currently supports only the: - -* Magellan 3D -* Space Mouse - -models, the additional buttons on the 'Plus' versions are not supported yet. - - To use it, you need to attach the serial port to the driver using the - - inputattach --magellan /dev/tts/x & - -command. After that the Magellan will be detected, initialized, will beep, -and the /dev/input/jsX device should become usable. - -3.21 I-Force devices -~~~~~~~~~~~~~~~~~~~~ - All I-Force devices are supported by the iforce module. This includes: - -* AVB Mag Turbo Force -* AVB Top Shot Pegasus -* AVB Top Shot Force Feedback Racing Wheel -* Logitech WingMan Force -* Logitech WingMan Force Wheel -* Guillemot Race Leader Force Feedback -* Guillemot Force Feedback Racing Wheel -* Thrustmaster Motor Sport GT - - To use it, you need to attach the serial port to the driver using the - - inputattach --iforce /dev/tts/x & - -command. After that the I-Force device will be detected, and the -/dev/input/jsX device should become usable. - - In case you're using the device via the USB port, the inputattach command -isn't needed. - - The I-Force driver now supports force feedback via the event interface. - - Please note that Logitech WingMan *3D devices are _not_ supported by this -module, rather by hid. Force feedback is not supported for those devices. -Logitech gamepads are also hid devices. - -3.22 Gravis Stinger gamepad -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - The Gravis Stinger serial port gamepad, designed for use with laptop -computers, is supported by the stinger.c module. To use it, attach the -serial port to the driver using: - - inputattach --stinger /dev/tty/x & - -where x is the number of the serial port. - -4. Troubleshooting -~~~~~~~~~~~~~~~~~~ - There is quite a high probability that you run into some problems. For -testing whether the driver works, if in doubt, use the jstest utility in -some of its modes. The most useful modes are "normal" - for the 1.x -interface, and "old" for the "0.x" interface. You run it by typing: - - jstest --normal /dev/input/js0 - jstest --old /dev/input/js0 - - Additionally you can do a test with the evtest utility: - - evtest /dev/input/event0 - - Oh, and read the FAQ! :) - -5. FAQ -~~~~~~ -Q: Running 'jstest /dev/input/js0' results in "File not found" error. What's the - cause? -A: The device files don't exist. Create them (see section 2.2). - -Q: Is it possible to connect my old Atari/Commodore/Amiga/console joystick - or pad that uses a 9-pin D-type cannon connector to the serial port of my - PC? -A: Yes, it is possible, but it'll burn your serial port or the pad. It - won't work, of course. - -Q: My joystick doesn't work with Quake / Quake 2. What's the cause? -A: Quake / Quake 2 don't support joystick. Use joy2key to simulate keypresses - for them. - -6. Programming Interface -~~~~~~~~~~~~~~~~~~~~~~~~ - The 1.0 driver uses a new, event based approach to the joystick driver. -Instead of the user program polling for the joystick values, the joystick -driver now reports only any changes of its state. See joystick-api.txt, -joystick.h and jstest.c included in the joystick package for more -information. The joystick device can be used in either blocking or -nonblocking mode and supports select() calls. - - For backward compatibility the old (v0.x) interface is still included. -Any call to the joystick driver using the old interface will return values -that are compatible to the old interface. This interface is still limited -to 2 axes, and applications using it usually decode only 2 buttons, although -the driver provides up to 32. diff --git a/Documentation/input/multi-touch-protocol.rst b/Documentation/input/multi-touch-protocol.rst new file mode 100644 index 0000000000000000000000000000000000000000..8035868c56bc651a1d57af01f578a70a211e59d2 --- /dev/null +++ b/Documentation/input/multi-touch-protocol.rst @@ -0,0 +1,410 @@ +.. include:: + +========================= +Multi-touch (MT) Protocol +========================= + +:Copyright: |copy| 2009-2010 Henrik Rydberg + + +Introduction +------------ + +In order to utilize the full power of the new multi-touch and multi-user +devices, a way to report detailed data from multiple contacts, i.e., +objects in direct contact with the device surface, is needed. This +document describes the multi-touch (MT) protocol which allows kernel +drivers to report details for an arbitrary number of contacts. + +The protocol is divided into two types, depending on the capabilities of the +hardware. For devices handling anonymous contacts (type A), the protocol +describes how to send the raw data for all contacts to the receiver. For +devices capable of tracking identifiable contacts (type B), the protocol +describes how to send updates for individual contacts via event slots. + +.. note:: + MT potocol type A is obsolete, all kernel drivers have been + converted to use type B. + +Protocol Usage +-------------- + +Contact details are sent sequentially as separate packets of ABS_MT +events. Only the ABS_MT events are recognized as part of a contact +packet. Since these events are ignored by current single-touch (ST) +applications, the MT protocol can be implemented on top of the ST protocol +in an existing driver. + +Drivers for type A devices separate contact packets by calling +input_mt_sync() at the end of each packet. This generates a SYN_MT_REPORT +event, which instructs the receiver to accept the data for the current +contact and prepare to receive another. + +Drivers for type B devices separate contact packets by calling +input_mt_slot(), with a slot as argument, at the beginning of each packet. +This generates an ABS_MT_SLOT event, which instructs the receiver to +prepare for updates of the given slot. + +All drivers mark the end of a multi-touch transfer by calling the usual +input_sync() function. This instructs the receiver to act upon events +accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new set +of events/packets. + +The main difference between the stateless type A protocol and the stateful +type B slot protocol lies in the usage of identifiable contacts to reduce +the amount of data sent to userspace. The slot protocol requires the use of +the ABS_MT_TRACKING_ID, either provided by the hardware or computed from +the raw data [#f5]_. + +For type A devices, the kernel driver should generate an arbitrary +enumeration of the full set of anonymous contacts currently on the +surface. The order in which the packets appear in the event stream is not +important. Event filtering and finger tracking is left to user space [#f3]_. + +For type B devices, the kernel driver should associate a slot with each +identified contact, and use that slot to propagate changes for the contact. +Creation, replacement and destruction of contacts is achieved by modifying +the ABS_MT_TRACKING_ID of the associated slot. A non-negative tracking id +is interpreted as a contact, and the value -1 denotes an unused slot. A +tracking id not previously present is considered new, and a tracking id no +longer present is considered removed. Since only changes are propagated, +the full state of each initiated contact has to reside in the receiving +end. Upon receiving an MT event, one simply updates the appropriate +attribute of the current slot. + +Some devices identify and/or track more contacts than they can report to the +driver. A driver for such a device should associate one type B slot with each +contact that is reported by the hardware. Whenever the identity of the +contact associated with a slot changes, the driver should invalidate that +slot by changing its ABS_MT_TRACKING_ID. If the hardware signals that it is +tracking more contacts than it is currently reporting, the driver should use +a BTN_TOOL_*TAP event to inform userspace of the total number of contacts +being tracked by the hardware at that moment. The driver should do this by +explicitly sending the corresponding BTN_TOOL_*TAP event and setting +use_count to false when calling input_mt_report_pointer_emulation(). +The driver should only advertise as many slots as the hardware can report. +Userspace can detect that a driver can report more total contacts than slots +by noting that the largest supported BTN_TOOL_*TAP event is larger than the +total number of type B slots reported in the absinfo for the ABS_MT_SLOT axis. + +The minimum value of the ABS_MT_SLOT axis must be 0. + +Protocol Example A +------------------ + +Here is what a minimal event sequence for a two-contact touch would look +like for a type A device:: + + ABS_MT_POSITION_X x[0] + ABS_MT_POSITION_Y y[0] + SYN_MT_REPORT + ABS_MT_POSITION_X x[1] + ABS_MT_POSITION_Y y[1] + SYN_MT_REPORT + SYN_REPORT + +The sequence after moving one of the contacts looks exactly the same; the +raw data for all present contacts are sent between every synchronization +with SYN_REPORT. + +Here is the sequence after lifting the first contact:: + + ABS_MT_POSITION_X x[1] + ABS_MT_POSITION_Y y[1] + SYN_MT_REPORT + SYN_REPORT + +And here is the sequence after lifting the second contact:: + + SYN_MT_REPORT + SYN_REPORT + +If the driver reports one of BTN_TOUCH or ABS_PRESSURE in addition to the +ABS_MT events, the last SYN_MT_REPORT event may be omitted. Otherwise, the +last SYN_REPORT will be dropped by the input core, resulting in no +zero-contact event reaching userland. + + +Protocol Example B +------------------ + +Here is what a minimal event sequence for a two-contact touch would look +like for a type B device:: + + ABS_MT_SLOT 0 + ABS_MT_TRACKING_ID 45 + ABS_MT_POSITION_X x[0] + ABS_MT_POSITION_Y y[0] + ABS_MT_SLOT 1 + ABS_MT_TRACKING_ID 46 + ABS_MT_POSITION_X x[1] + ABS_MT_POSITION_Y y[1] + SYN_REPORT + +Here is the sequence after moving contact 45 in the x direction:: + + ABS_MT_SLOT 0 + ABS_MT_POSITION_X x[0] + SYN_REPORT + +Here is the sequence after lifting the contact in slot 0:: + + ABS_MT_TRACKING_ID -1 + SYN_REPORT + +The slot being modified is already 0, so the ABS_MT_SLOT is omitted. The +message removes the association of slot 0 with contact 45, thereby +destroying contact 45 and freeing slot 0 to be reused for another contact. + +Finally, here is the sequence after lifting the second contact:: + + ABS_MT_SLOT 1 + ABS_MT_TRACKING_ID -1 + SYN_REPORT + + +Event Usage +----------- + +A set of ABS_MT events with the desired properties is defined. The events +are divided into categories, to allow for partial implementation. The +minimum set consists of ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which +allows for multiple contacts to be tracked. If the device supports it, the +ABS_MT_TOUCH_MAJOR and ABS_MT_WIDTH_MAJOR may be used to provide the size +of the contact area and approaching tool, respectively. + +The TOUCH and WIDTH parameters have a geometrical interpretation; imagine +looking through a window at someone gently holding a finger against the +glass. You will see two regions, one inner region consisting of the part +of the finger actually touching the glass, and one outer region formed by +the perimeter of the finger. The center of the touching region (a) is +ABS_MT_POSITION_X/Y and the center of the approaching finger (b) is +ABS_MT_TOOL_X/Y. The touch diameter is ABS_MT_TOUCH_MAJOR and the finger +diameter is ABS_MT_WIDTH_MAJOR. Now imagine the person pressing the finger +harder against the glass. The touch region will increase, and in general, +the ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR, which is always smaller +than unity, is related to the contact pressure. For pressure-based devices, +ABS_MT_PRESSURE may be used to provide the pressure on the contact area +instead. Devices capable of contact hovering can use ABS_MT_DISTANCE to +indicate the distance between the contact and the surface. + +:: + + + Linux MT Win8 + __________ _______________________ + / \ | | + / \ | | + / ____ \ | | + / / \ \ | | + \ \ a \ \ | a | + \ \____/ \ | | + \ \ | | + \ b \ | b | + \ \ | | + \ \ | | + \ \ | | + \ / | | + \ / | | + \ / | | + \__________/ |_______________________| + + +In addition to the MAJOR parameters, the oval shape of the touch and finger +regions can be described by adding the MINOR parameters, such that MAJOR +and MINOR are the major and minor axis of an ellipse. The orientation of +the touch ellipse can be described with the ORIENTATION parameter, and the +direction of the finger ellipse is given by the vector (a - b). + +For type A devices, further specification of the touch shape is possible +via ABS_MT_BLOB_ID. + +The ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a +finger or a pen or something else. Finally, the ABS_MT_TRACKING_ID event +may be used to track identified contacts over time [#f5]_. + +In the type B protocol, ABS_MT_TOOL_TYPE and ABS_MT_TRACKING_ID are +implicitly handled by input core; drivers should instead call +input_mt_report_slot_state(). + + +Event Semantics +--------------- + +ABS_MT_TOUCH_MAJOR + The length of the major axis of the contact. The length should be given in + surface units. If the surface has an X times Y resolution, the largest + possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [#f4]_. + +ABS_MT_TOUCH_MINOR + The length, in surface units, of the minor axis of the contact. If the + contact is circular, this event can be omitted [#f4]_. + +ABS_MT_WIDTH_MAJOR + The length, in surface units, of the major axis of the approaching + tool. This should be understood as the size of the tool itself. The + orientation of the contact and the approaching tool are assumed to be the + same [#f4]_. + +ABS_MT_WIDTH_MINOR + The length, in surface units, of the minor axis of the approaching + tool. Omit if circular [#f4]_. + + The above four values can be used to derive additional information about + the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates + the notion of pressure. The fingers of the hand and the palm all have + different characteristic widths. + +ABS_MT_PRESSURE + The pressure, in arbitrary units, on the contact area. May be used instead + of TOUCH and WIDTH for pressure-based devices or any device with a spatial + signal intensity distribution. + +ABS_MT_DISTANCE + The distance, in surface units, between the contact and the surface. Zero + distance means the contact is touching the surface. A positive number means + the contact is hovering above the surface. + +ABS_MT_ORIENTATION + The orientation of the touching ellipse. The value should describe a signed + quarter of a revolution clockwise around the touch center. The signed value + range is arbitrary, but zero should be returned for an ellipse aligned with + the Y axis of the surface, a negative value when the ellipse is turned to + the left, and a positive value when the ellipse is turned to the + right. When completely aligned with the X axis, the range max should be + returned. + + Touch ellipsis are symmetrical by default. For devices capable of true 360 + degree orientation, the reported orientation must exceed the range max to + indicate more than a quarter of a revolution. For an upside-down finger, + range max * 2 should be returned. + + Orientation can be omitted if the touch area is circular, or if the + information is not available in the kernel driver. Partial orientation + support is possible if the device can distinguish between the two axis, but + not (uniquely) any values in between. In such cases, the range of + ABS_MT_ORIENTATION should be [0, 1] [#f4]_. + +ABS_MT_POSITION_X + The surface X coordinate of the center of the touching ellipse. + +ABS_MT_POSITION_Y + The surface Y coordinate of the center of the touching ellipse. + +ABS_MT_TOOL_X + The surface X coordinate of the center of the approaching tool. Omit if + the device cannot distinguish between the intended touch point and the + tool itself. + +ABS_MT_TOOL_Y + The surface Y coordinate of the center of the approaching tool. Omit if the + device cannot distinguish between the intended touch point and the tool + itself. + + The four position values can be used to separate the position of the touch + from the position of the tool. If both positions are present, the major + tool axis points towards the touch point [#f1]_. Otherwise, the tool axes are + aligned with the touch axes. + +ABS_MT_TOOL_TYPE + The type of approaching tool. A lot of kernel drivers cannot distinguish + between different tool types, such as a finger or a pen. In such cases, the + event should be omitted. The protocol currently supports MT_TOOL_FINGER, + MT_TOOL_PEN, and MT_TOOL_PALM [#f2]_. For type B devices, this event is + handled by input core; drivers should instead use + input_mt_report_slot_state(). A contact's ABS_MT_TOOL_TYPE may change over + time while still touching the device, because the firmware may not be able + to determine which tool is being used when it first appears. + +ABS_MT_BLOB_ID + The BLOB_ID groups several packets together into one arbitrarily shaped + contact. The sequence of points forms a polygon which defines the shape of + the contact. This is a low-level anonymous grouping for type A devices, and + should not be confused with the high-level trackingID [#f5]_. Most type A + devices do not have blob capability, so drivers can safely omit this event. + +ABS_MT_TRACKING_ID + The TRACKING_ID identifies an initiated contact throughout its life cycle + [#f5]_. The value range of the TRACKING_ID should be large enough to ensure + unique identification of a contact maintained over an extended period of + time. For type B devices, this event is handled by input core; drivers + should instead use input_mt_report_slot_state(). + + +Event Computation +----------------- + +The flora of different hardware unavoidably leads to some devices fitting +better to the MT protocol than others. To simplify and unify the mapping, +this section gives recipes for how to compute certain events. + +For devices reporting contacts as rectangular shapes, signed orientation +cannot be obtained. Assuming X and Y are the lengths of the sides of the +touching rectangle, here is a simple formula that retains the most +information possible:: + + ABS_MT_TOUCH_MAJOR := max(X, Y) + ABS_MT_TOUCH_MINOR := min(X, Y) + ABS_MT_ORIENTATION := bool(X > Y) + +The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that +the device can distinguish between a finger along the Y axis (0) and a +finger along the X axis (1). + +For win8 devices with both T and C coordinates, the position mapping is:: + + ABS_MT_POSITION_X := T_X + ABS_MT_POSITION_Y := T_Y + ABS_MT_TOOL_X := C_X + ABS_MT_TOOL_Y := C_Y + +Unfortunately, there is not enough information to specify both the touching +ellipse and the tool ellipse, so one has to resort to approximations. One +simple scheme, which is compatible with earlier usage, is:: + + ABS_MT_TOUCH_MAJOR := min(X, Y) + ABS_MT_TOUCH_MINOR := + ABS_MT_ORIENTATION := + ABS_MT_WIDTH_MAJOR := min(X, Y) + distance(T, C) + ABS_MT_WIDTH_MINOR := min(X, Y) + +Rationale: We have no information about the orientation of the touching +ellipse, so approximate it with an inscribed circle instead. The tool +ellipse should align with the vector (T - C), so the diameter must +increase with distance(T, C). Finally, assume that the touch diameter is +equal to the tool thickness, and we arrive at the formulas above. + +Finger Tracking +--------------- + +The process of finger tracking, i.e., to assign a unique trackingID to each +initiated contact on the surface, is a Euclidian Bipartite Matching +problem. At each event synchronization, the set of actual contacts is +matched to the set of contacts from the previous synchronization. A full +implementation can be found in [#f3]_. + + +Gestures +-------- + +In the specific application of creating gesture events, the TOUCH and WIDTH +parameters can be used to, e.g., approximate finger pressure or distinguish +between index finger and thumb. With the addition of the MINOR parameters, +one can also distinguish between a sweeping finger and a pointing finger, +and with ORIENTATION, one can detect twisting of fingers. + + +Notes +----- + +In order to stay compatible with existing applications, the data reported +in a finger packet must not be recognized as single-touch events. + +For type A devices, all finger data bypasses input filtering, since +subsequent events of the same type refer to different fingers. + +.. [#f1] Also, the difference (TOOL_X - POSITION_X) can be used to model tilt. +.. [#f2] The list can of course be extended. +.. [#f3] The mtdev project: http://bitmath.org/code/mtdev/. +.. [#f4] See the section on event computation. +.. [#f5] See the section on finger tracking. diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt deleted file mode 100644 index c51f1146f3bd8396f572cddb9c87ae2620d236ba..0000000000000000000000000000000000000000 --- a/Documentation/input/multi-touch-protocol.txt +++ /dev/null @@ -1,418 +0,0 @@ -Multi-touch (MT) Protocol -------------------------- - Copyright (C) 2009-2010 Henrik Rydberg - - -Introduction ------------- - -In order to utilize the full power of the new multi-touch and multi-user -devices, a way to report detailed data from multiple contacts, i.e., -objects in direct contact with the device surface, is needed. This -document describes the multi-touch (MT) protocol which allows kernel -drivers to report details for an arbitrary number of contacts. - -The protocol is divided into two types, depending on the capabilities of the -hardware. For devices handling anonymous contacts (type A), the protocol -describes how to send the raw data for all contacts to the receiver. For -devices capable of tracking identifiable contacts (type B), the protocol -describes how to send updates for individual contacts via event slots. - - -Protocol Usage --------------- - -Contact details are sent sequentially as separate packets of ABS_MT -events. Only the ABS_MT events are recognized as part of a contact -packet. Since these events are ignored by current single-touch (ST) -applications, the MT protocol can be implemented on top of the ST protocol -in an existing driver. - -Drivers for type A devices separate contact packets by calling -input_mt_sync() at the end of each packet. This generates a SYN_MT_REPORT -event, which instructs the receiver to accept the data for the current -contact and prepare to receive another. - -Drivers for type B devices separate contact packets by calling -input_mt_slot(), with a slot as argument, at the beginning of each packet. -This generates an ABS_MT_SLOT event, which instructs the receiver to -prepare for updates of the given slot. - -All drivers mark the end of a multi-touch transfer by calling the usual -input_sync() function. This instructs the receiver to act upon events -accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new set -of events/packets. - -The main difference between the stateless type A protocol and the stateful -type B slot protocol lies in the usage of identifiable contacts to reduce -the amount of data sent to userspace. The slot protocol requires the use of -the ABS_MT_TRACKING_ID, either provided by the hardware or computed from -the raw data [5]. - -For type A devices, the kernel driver should generate an arbitrary -enumeration of the full set of anonymous contacts currently on the -surface. The order in which the packets appear in the event stream is not -important. Event filtering and finger tracking is left to user space [3]. - -For type B devices, the kernel driver should associate a slot with each -identified contact, and use that slot to propagate changes for the contact. -Creation, replacement and destruction of contacts is achieved by modifying -the ABS_MT_TRACKING_ID of the associated slot. A non-negative tracking id -is interpreted as a contact, and the value -1 denotes an unused slot. A -tracking id not previously present is considered new, and a tracking id no -longer present is considered removed. Since only changes are propagated, -the full state of each initiated contact has to reside in the receiving -end. Upon receiving an MT event, one simply updates the appropriate -attribute of the current slot. - -Some devices identify and/or track more contacts than they can report to the -driver. A driver for such a device should associate one type B slot with each -contact that is reported by the hardware. Whenever the identity of the -contact associated with a slot changes, the driver should invalidate that -slot by changing its ABS_MT_TRACKING_ID. If the hardware signals that it is -tracking more contacts than it is currently reporting, the driver should use -a BTN_TOOL_*TAP event to inform userspace of the total number of contacts -being tracked by the hardware at that moment. The driver should do this by -explicitly sending the corresponding BTN_TOOL_*TAP event and setting -use_count to false when calling input_mt_report_pointer_emulation(). -The driver should only advertise as many slots as the hardware can report. -Userspace can detect that a driver can report more total contacts than slots -by noting that the largest supported BTN_TOOL_*TAP event is larger than the -total number of type B slots reported in the absinfo for the ABS_MT_SLOT axis. - -The minimum value of the ABS_MT_SLOT axis must be 0. - -Protocol Example A ------------------- - -Here is what a minimal event sequence for a two-contact touch would look -like for a type A device: - - ABS_MT_POSITION_X x[0] - ABS_MT_POSITION_Y y[0] - SYN_MT_REPORT - ABS_MT_POSITION_X x[1] - ABS_MT_POSITION_Y y[1] - SYN_MT_REPORT - SYN_REPORT - -The sequence after moving one of the contacts looks exactly the same; the -raw data for all present contacts are sent between every synchronization -with SYN_REPORT. - -Here is the sequence after lifting the first contact: - - ABS_MT_POSITION_X x[1] - ABS_MT_POSITION_Y y[1] - SYN_MT_REPORT - SYN_REPORT - -And here is the sequence after lifting the second contact: - - SYN_MT_REPORT - SYN_REPORT - -If the driver reports one of BTN_TOUCH or ABS_PRESSURE in addition to the -ABS_MT events, the last SYN_MT_REPORT event may be omitted. Otherwise, the -last SYN_REPORT will be dropped by the input core, resulting in no -zero-contact event reaching userland. - - -Protocol Example B ------------------- - -Here is what a minimal event sequence for a two-contact touch would look -like for a type B device: - - ABS_MT_SLOT 0 - ABS_MT_TRACKING_ID 45 - ABS_MT_POSITION_X x[0] - ABS_MT_POSITION_Y y[0] - ABS_MT_SLOT 1 - ABS_MT_TRACKING_ID 46 - ABS_MT_POSITION_X x[1] - ABS_MT_POSITION_Y y[1] - SYN_REPORT - -Here is the sequence after moving contact 45 in the x direction: - - ABS_MT_SLOT 0 - ABS_MT_POSITION_X x[0] - SYN_REPORT - -Here is the sequence after lifting the contact in slot 0: - - ABS_MT_TRACKING_ID -1 - SYN_REPORT - -The slot being modified is already 0, so the ABS_MT_SLOT is omitted. The -message removes the association of slot 0 with contact 45, thereby -destroying contact 45 and freeing slot 0 to be reused for another contact. - -Finally, here is the sequence after lifting the second contact: - - ABS_MT_SLOT 1 - ABS_MT_TRACKING_ID -1 - SYN_REPORT - - -Event Usage ------------ - -A set of ABS_MT events with the desired properties is defined. The events -are divided into categories, to allow for partial implementation. The -minimum set consists of ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which -allows for multiple contacts to be tracked. If the device supports it, the -ABS_MT_TOUCH_MAJOR and ABS_MT_WIDTH_MAJOR may be used to provide the size -of the contact area and approaching tool, respectively. - -The TOUCH and WIDTH parameters have a geometrical interpretation; imagine -looking through a window at someone gently holding a finger against the -glass. You will see two regions, one inner region consisting of the part -of the finger actually touching the glass, and one outer region formed by -the perimeter of the finger. The center of the touching region (a) is -ABS_MT_POSITION_X/Y and the center of the approaching finger (b) is -ABS_MT_TOOL_X/Y. The touch diameter is ABS_MT_TOUCH_MAJOR and the finger -diameter is ABS_MT_WIDTH_MAJOR. Now imagine the person pressing the finger -harder against the glass. The touch region will increase, and in general, -the ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR, which is always smaller -than unity, is related to the contact pressure. For pressure-based devices, -ABS_MT_PRESSURE may be used to provide the pressure on the contact area -instead. Devices capable of contact hovering can use ABS_MT_DISTANCE to -indicate the distance between the contact and the surface. - - - Linux MT Win8 - __________ _______________________ - / \ | | - / \ | | - / ____ \ | | - / / \ \ | | - \ \ a \ \ | a | - \ \____/ \ | | - \ \ | | - \ b \ | b | - \ \ | | - \ \ | | - \ \ | | - \ / | | - \ / | | - \ / | | - \__________/ |_______________________| - - -In addition to the MAJOR parameters, the oval shape of the touch and finger -regions can be described by adding the MINOR parameters, such that MAJOR -and MINOR are the major and minor axis of an ellipse. The orientation of -the touch ellipse can be described with the ORIENTATION parameter, and the -direction of the finger ellipse is given by the vector (a - b). - -For type A devices, further specification of the touch shape is possible -via ABS_MT_BLOB_ID. - -The ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a -finger or a pen or something else. Finally, the ABS_MT_TRACKING_ID event -may be used to track identified contacts over time [5]. - -In the type B protocol, ABS_MT_TOOL_TYPE and ABS_MT_TRACKING_ID are -implicitly handled by input core; drivers should instead call -input_mt_report_slot_state(). - - -Event Semantics ---------------- - -ABS_MT_TOUCH_MAJOR - -The length of the major axis of the contact. The length should be given in -surface units. If the surface has an X times Y resolution, the largest -possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4]. - -ABS_MT_TOUCH_MINOR - -The length, in surface units, of the minor axis of the contact. If the -contact is circular, this event can be omitted [4]. - -ABS_MT_WIDTH_MAJOR - -The length, in surface units, of the major axis of the approaching -tool. This should be understood as the size of the tool itself. The -orientation of the contact and the approaching tool are assumed to be the -same [4]. - -ABS_MT_WIDTH_MINOR - -The length, in surface units, of the minor axis of the approaching -tool. Omit if circular [4]. - -The above four values can be used to derive additional information about -the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates -the notion of pressure. The fingers of the hand and the palm all have -different characteristic widths. - -ABS_MT_PRESSURE - -The pressure, in arbitrary units, on the contact area. May be used instead -of TOUCH and WIDTH for pressure-based devices or any device with a spatial -signal intensity distribution. - -ABS_MT_DISTANCE - -The distance, in surface units, between the contact and the surface. Zero -distance means the contact is touching the surface. A positive number means -the contact is hovering above the surface. - -ABS_MT_ORIENTATION - -The orientation of the touching ellipse. The value should describe a signed -quarter of a revolution clockwise around the touch center. The signed value -range is arbitrary, but zero should be returned for an ellipse aligned with -the Y axis of the surface, a negative value when the ellipse is turned to -the left, and a positive value when the ellipse is turned to the -right. When completely aligned with the X axis, the range max should be -returned. - -Touch ellipsis are symmetrical by default. For devices capable of true 360 -degree orientation, the reported orientation must exceed the range max to -indicate more than a quarter of a revolution. For an upside-down finger, -range max * 2 should be returned. - -Orientation can be omitted if the touch area is circular, or if the -information is not available in the kernel driver. Partial orientation -support is possible if the device can distinguish between the two axis, but -not (uniquely) any values in between. In such cases, the range of -ABS_MT_ORIENTATION should be [0, 1] [4]. - -ABS_MT_POSITION_X - -The surface X coordinate of the center of the touching ellipse. - -ABS_MT_POSITION_Y - -The surface Y coordinate of the center of the touching ellipse. - -ABS_MT_TOOL_X - -The surface X coordinate of the center of the approaching tool. Omit if -the device cannot distinguish between the intended touch point and the -tool itself. - -ABS_MT_TOOL_Y - -The surface Y coordinate of the center of the approaching tool. Omit if the -device cannot distinguish between the intended touch point and the tool -itself. - -The four position values can be used to separate the position of the touch -from the position of the tool. If both positions are present, the major -tool axis points towards the touch point [1]. Otherwise, the tool axes are -aligned with the touch axes. - -ABS_MT_TOOL_TYPE - -The type of approaching tool. A lot of kernel drivers cannot distinguish -between different tool types, such as a finger or a pen. In such cases, the -event should be omitted. The protocol currently supports MT_TOOL_FINGER, -MT_TOOL_PEN, and MT_TOOL_PALM [2]. For type B devices, this event is handled -by input core; drivers should instead use input_mt_report_slot_state(). -A contact's ABS_MT_TOOL_TYPE may change over time while still touching the -device, because the firmware may not be able to determine which tool is being -used when it first appears. - -ABS_MT_BLOB_ID - -The BLOB_ID groups several packets together into one arbitrarily shaped -contact. The sequence of points forms a polygon which defines the shape of -the contact. This is a low-level anonymous grouping for type A devices, and -should not be confused with the high-level trackingID [5]. Most type A -devices do not have blob capability, so drivers can safely omit this event. - -ABS_MT_TRACKING_ID - -The TRACKING_ID identifies an initiated contact throughout its life cycle -[5]. The value range of the TRACKING_ID should be large enough to ensure -unique identification of a contact maintained over an extended period of -time. For type B devices, this event is handled by input core; drivers -should instead use input_mt_report_slot_state(). - - -Event Computation ------------------ - -The flora of different hardware unavoidably leads to some devices fitting -better to the MT protocol than others. To simplify and unify the mapping, -this section gives recipes for how to compute certain events. - -For devices reporting contacts as rectangular shapes, signed orientation -cannot be obtained. Assuming X and Y are the lengths of the sides of the -touching rectangle, here is a simple formula that retains the most -information possible: - - ABS_MT_TOUCH_MAJOR := max(X, Y) - ABS_MT_TOUCH_MINOR := min(X, Y) - ABS_MT_ORIENTATION := bool(X > Y) - -The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that -the device can distinguish between a finger along the Y axis (0) and a -finger along the X axis (1). - -For win8 devices with both T and C coordinates, the position mapping is - - ABS_MT_POSITION_X := T_X - ABS_MT_POSITION_Y := T_Y - ABS_MT_TOOL_X := C_X - ABS_MT_TOOL_Y := C_Y - -Unfortunately, there is not enough information to specify both the touching -ellipse and the tool ellipse, so one has to resort to approximations. One -simple scheme, which is compatible with earlier usage, is: - - ABS_MT_TOUCH_MAJOR := min(X, Y) - ABS_MT_TOUCH_MINOR := - ABS_MT_ORIENTATION := - ABS_MT_WIDTH_MAJOR := min(X, Y) + distance(T, C) - ABS_MT_WIDTH_MINOR := min(X, Y) - -Rationale: We have no information about the orientation of the touching -ellipse, so approximate it with an inscribed circle instead. The tool -ellipse should align with the vector (T - C), so the diameter must -increase with distance(T, C). Finally, assume that the touch diameter is -equal to the tool thickness, and we arrive at the formulas above. - -Finger Tracking ---------------- - -The process of finger tracking, i.e., to assign a unique trackingID to each -initiated contact on the surface, is a Euclidian Bipartite Matching -problem. At each event synchronization, the set of actual contacts is -matched to the set of contacts from the previous synchronization. A full -implementation can be found in [3]. - - -Gestures --------- - -In the specific application of creating gesture events, the TOUCH and WIDTH -parameters can be used to, e.g., approximate finger pressure or distinguish -between index finger and thumb. With the addition of the MINOR parameters, -one can also distinguish between a sweeping finger and a pointing finger, -and with ORIENTATION, one can detect twisting of fingers. - - -Notes ------ - -In order to stay compatible with existing applications, the data reported -in a finger packet must not be recognized as single-touch events. - -For type A devices, all finger data bypasses input filtering, since -subsequent events of the same type refer to different fingers. - -For example usage of the type A protocol, see the bcm5974 driver. For -example usage of the type B protocol, see the hid-egalax driver. - -[1] Also, the difference (TOOL_X - POSITION_X) can be used to model tilt. -[2] The list can of course be extended. -[3] The mtdev project: http://bitmath.org/code/mtdev/. -[4] See the section on event computation. -[5] See the section on finger tracking. diff --git a/Documentation/input/notifier.rst b/Documentation/input/notifier.rst new file mode 100644 index 0000000000000000000000000000000000000000..161350cb865ebc78eaf1a94b898c962b047da6d8 --- /dev/null +++ b/Documentation/input/notifier.rst @@ -0,0 +1,54 @@ +================= +Keyboard notifier +================= + +One can use register_keyboard_notifier to get called back on keyboard +events (see kbd_keycode() function for details). The passed structure is +keyboard_notifier_param: + +- 'vc' always provide the VC for which the keyboard event applies; +- 'down' is 1 for a key press event, 0 for a key release; +- 'shift' is the current modifier state, mask bit indexes are KG_*; +- 'value' depends on the type of event. + +- KBD_KEYCODE events are always sent before other events, value is the keycode. +- KBD_UNBOUND_KEYCODE events are sent if the keycode is not bound to a keysym. + value is the keycode. +- KBD_UNICODE events are sent if the keycode -> keysym translation produced a + unicode character. value is the unicode value. +- KBD_KEYSYM events are sent if the keycode -> keysym translation produced a + non-unicode character. value is the keysym. +- KBD_POST_KEYSYM events are sent after the treatment of non-unicode keysyms. + That permits one to inspect the resulting LEDs for instance. + +For each kind of event but the last, the callback may return NOTIFY_STOP in +order to "eat" the event: the notify loop is stopped and the keyboard event is +dropped. + +In a rough C snippet, we have:: + + kbd_keycode(keycode) { + ... + params.value = keycode; + if (notifier_call_chain(KBD_KEYCODE,¶ms) == NOTIFY_STOP) + || !bound) { + notifier_call_chain(KBD_UNBOUND_KEYCODE,¶ms); + return; + } + + if (unicode) { + param.value = unicode; + if (notifier_call_chain(KBD_UNICODE,¶ms) == NOTIFY_STOP) + return; + emit unicode; + return; + } + + params.value = keysym; + if (notifier_call_chain(KBD_KEYSYM,¶ms) == NOTIFY_STOP) + return; + apply keysym; + notifier_call_chain(KBD_POST_KEYSYM,¶ms); + } + +.. note:: This notifier is usually called from interrupt context. diff --git a/Documentation/input/notifier.txt b/Documentation/input/notifier.txt deleted file mode 100644 index 95172ca6f3d21c198a5adaa048aa4f08b27031b7..0000000000000000000000000000000000000000 --- a/Documentation/input/notifier.txt +++ /dev/null @@ -1,52 +0,0 @@ -Keyboard notifier - -One can use register_keyboard_notifier to get called back on keyboard -events (see kbd_keycode() function for details). The passed structure is -keyboard_notifier_param: - -- 'vc' always provide the VC for which the keyboard event applies; -- 'down' is 1 for a key press event, 0 for a key release; -- 'shift' is the current modifier state, mask bit indexes are KG_*; -- 'value' depends on the type of event. - -- KBD_KEYCODE events are always sent before other events, value is the keycode. -- KBD_UNBOUND_KEYCODE events are sent if the keycode is not bound to a keysym. - value is the keycode. -- KBD_UNICODE events are sent if the keycode -> keysym translation produced a - unicode character. value is the unicode value. -- KBD_KEYSYM events are sent if the keycode -> keysym translation produced a - non-unicode character. value is the keysym. -- KBD_POST_KEYSYM events are sent after the treatment of non-unicode keysyms. - That permits one to inspect the resulting LEDs for instance. - -For each kind of event but the last, the callback may return NOTIFY_STOP in -order to "eat" the event: the notify loop is stopped and the keyboard event is -dropped. - -In a rough C snippet, we have: - -kbd_keycode(keycode) { - ... - params.value = keycode; - if (notifier_call_chain(KBD_KEYCODE,¶ms) == NOTIFY_STOP) - || !bound) { - notifier_call_chain(KBD_UNBOUND_KEYCODE,¶ms); - return; - } - - if (unicode) { - param.value = unicode; - if (notifier_call_chain(KBD_UNICODE,¶ms) == NOTIFY_STOP) - return; - emit unicode; - return; - } - - params.value = keysym; - if (notifier_call_chain(KBD_KEYSYM,¶ms) == NOTIFY_STOP) - return; - apply keysym; - notifier_call_chain(KBD_POST_KEYSYM,¶ms); -} - -NOTE: This notifier is usually called from interrupt context. diff --git a/Documentation/input/ntrig.txt b/Documentation/input/ntrig.txt deleted file mode 100644 index be1fd981f73f7353241d5fddccd7c13da6e9c0f9..0000000000000000000000000000000000000000 --- a/Documentation/input/ntrig.txt +++ /dev/null @@ -1,126 +0,0 @@ -N-Trig touchscreen Driver -------------------------- - Copyright (c) 2008-2010 Rafi Rubin - Copyright (c) 2009-2010 Stephane Chatty - -This driver provides support for N-Trig pen and multi-touch sensors. Single -and multi-touch events are translated to the appropriate protocols for -the hid and input systems. Pen events are sufficiently hid compliant and -are left to the hid core. The driver also provides additional filtering -and utility functions accessible with sysfs and module parameters. - -This driver has been reported to work properly with multiple N-Trig devices -attached. - - -Parameters ----------- - -Note: values set at load time are global and will apply to all applicable -devices. Adjusting parameters with sysfs will override the load time values, -but only for that one device. - -The following parameters are used to configure filters to reduce noise: - -activate_slack number of fingers to ignore before processing events - -activation_height size threshold to activate immediately -activation_width - -min_height size threshold bellow which fingers are ignored -min_width both to decide activation and during activity - -deactivate_slack the number of "no contact" frames to ignore before - propagating the end of activity events - -When the last finger is removed from the device, it sends a number of empty -frames. By holding off on deactivation for a few frames we can tolerate false -erroneous disconnects, where the sensor may mistakenly not detect a finger that -is still present. Thus deactivate_slack addresses problems where a users might -see breaks in lines during drawing, or drop an object during a long drag. - - -Additional sysfs items ----------------------- - -These nodes just provide easy access to the ranges reported by the device. -sensor_logical_height the range for positions reported during activity -sensor_logical_width - -sensor_physical_height internal ranges not used for normal events but -sensor_physical_width useful for tuning - -All N-Trig devices with product id of 1 report events in the ranges of -X: 0-9600 -Y: 0-7200 -However not all of these devices have the same physical dimensions. Most -seem to be 12" sensors (Dell Latitude XT and XT2 and the HP TX2), and -at least one model (Dell Studio 17) has a 17" sensor. The ratio of physical -to logical sizes is used to adjust the size based filter parameters. - - -Filtering ---------- - -With the release of the early multi-touch firmwares it became increasingly -obvious that these sensors were prone to erroneous events. Users reported -seeing both inappropriately dropped contact and ghosts, contacts reported -where no finger was actually touching the screen. - -Deactivation slack helps prevent dropped contact for single touch use, but does -not address the problem of dropping one of more contacts while other contacts -are still active. Drops in the multi-touch context require additional -processing and should be handled in tandem with tacking. - -As observed ghost contacts are similar to actual use of the sensor, but they -seem to have different profiles. Ghost activity typically shows up as small -short lived touches. As such, I assume that the longer the continuous stream -of events the more likely those events are from a real contact, and that the -larger the size of each contact the more likely it is real. Balancing the -goals of preventing ghosts and accepting real events quickly (to minimize -user observable latency), the filter accumulates confidence for incoming -events until it hits thresholds and begins propagating. In the interest in -minimizing stored state as well as the cost of operations to make a decision, -I've kept that decision simple. - -Time is measured in terms of the number of fingers reported, not frames since -the probability of multiple simultaneous ghosts is expected to drop off -dramatically with increasing numbers. Rather than accumulate weight as a -function of size, I just use it as a binary threshold. A sufficiently large -contact immediately overrides the waiting period and leads to activation. - -Setting the activation size thresholds to large values will result in deciding -primarily on activation slack. If you see longer lived ghosts, turning up the -activation slack while reducing the size thresholds may suffice to eliminate -the ghosts while keeping the screen quite responsive to firm taps. - -Contacts continue to be filtered with min_height and min_width even after -the initial activation filter is satisfied. The intent is to provide -a mechanism for filtering out ghosts in the form of an extra finger while -you actually are using the screen. In practice this sort of ghost has -been far less problematic or relatively rare and I've left the defaults -set to 0 for both parameters, effectively turning off that filter. - -I don't know what the optimal values are for these filters. If the defaults -don't work for you, please play with the parameters. If you do find other -values more comfortable, I would appreciate feedback. - -The calibration of these devices does drift over time. If ghosts or contact -dropping worsen and interfere with the normal usage of your device, try -recalibrating it. - - -Calibration ------------ - -The N-Trig windows tools provide calibration and testing routines. Also an -unofficial unsupported set of user space tools including a calibrator is -available at: -http://code.launchpad.net/~rafi-seas/+junk/ntrig_calib - - -Tracking --------- - -As of yet, all tested N-Trig firmwares do not track fingers. When multiple -contacts are active they seem to be sorted primarily by Y position. diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt deleted file mode 100644 index 46a74f0c551a1b5a1ada3dc7e3ae248df1edbbff..0000000000000000000000000000000000000000 --- a/Documentation/input/rotary-encoder.txt +++ /dev/null @@ -1,126 +0,0 @@ -rotary-encoder - a generic driver for GPIO connected devices -Daniel Mack , Feb 2009 - -0. Function ------------ - -Rotary encoders are devices which are connected to the CPU or other -peripherals with two wires. The outputs are phase-shifted by 90 degrees -and by triggering on falling and rising edges, the turn direction can -be determined. - -Some encoders have both outputs low in stable states, others also have -a stable state with both outputs high (half-period mode) and some have -a stable state in all steps (quarter-period mode). - -The phase diagram of these two outputs look like this: - - _____ _____ _____ - | | | | | | - Channel A ____| |_____| |_____| |____ - - : : : : : : : : : : : : - __ _____ _____ _____ - | | | | | | | - Channel B |_____| |_____| |_____| |__ - - : : : : : : : : : : : : - Event a b c d a b c d a b c d - - |<-------->| - one step - - |<-->| - one step (half-period mode) - - |<>| - one step (quarter-period mode) - -For more information, please see - https://en.wikipedia.org/wiki/Rotary_encoder - - -1. Events / state machine -------------------------- - -In half-period mode, state a) and c) above are used to determine the -rotational direction based on the last stable state. Events are reported in -states b) and d) given that the new stable state is different from the last -(i.e. the rotation was not reversed half-way). - -Otherwise, the following apply: - -a) Rising edge on channel A, channel B in low state - This state is used to recognize a clockwise turn - -b) Rising edge on channel B, channel A in high state - When entering this state, the encoder is put into 'armed' state, - meaning that there it has seen half the way of a one-step transition. - -c) Falling edge on channel A, channel B in high state - This state is used to recognize a counter-clockwise turn - -d) Falling edge on channel B, channel A in low state - Parking position. If the encoder enters this state, a full transition - should have happened, unless it flipped back on half the way. The - 'armed' state tells us about that. - -2. Platform requirements ------------------------- - -As there is no hardware dependent call in this driver, the platform it is -used with must support gpiolib. Another requirement is that IRQs must be -able to fire on both edges. - - -3. Board integration --------------------- - -To use this driver in your system, register a platform_device with the -name 'rotary-encoder' and associate the IRQs and some specific platform -data with it. - -struct rotary_encoder_platform_data is declared in -include/linux/rotary-encoder.h and needs to be filled with the number of -steps the encoder has and can carry information about externally inverted -signals (because of an inverting buffer or other reasons). The encoder -can be set up to deliver input information as either an absolute or relative -axes. For relative axes the input event returns +/-1 for each step. For -absolute axes the position of the encoder can either roll over between zero -and the number of steps or will clamp at the maximum and zero depending on -the configuration. - -Because GPIO to IRQ mapping is platform specific, this information must -be given in separately to the driver. See the example below. - ------------------- - -/* board support file example */ - -#include -#include - -#define GPIO_ROTARY_A 1 -#define GPIO_ROTARY_B 2 - -static struct rotary_encoder_platform_data my_rotary_encoder_info = { - .steps = 24, - .axis = ABS_X, - .relative_axis = false, - .rollover = false, - .gpio_a = GPIO_ROTARY_A, - .gpio_b = GPIO_ROTARY_B, - .inverted_a = 0, - .inverted_b = 0, - .half_period = false, - .wakeup_source = false, -}; - -static struct platform_device rotary_encoder_device = { - .name = "rotary-encoder", - .id = 0, - .dev = { - .platform_data = &my_rotary_encoder_info, - } -}; - diff --git a/Documentation/input/sentelic.txt b/Documentation/input/sentelic.txt deleted file mode 100644 index 89251e2a3eba101fddf89e7d4da3a2f1b8fb5ba8..0000000000000000000000000000000000000000 --- a/Documentation/input/sentelic.txt +++ /dev/null @@ -1,873 +0,0 @@ -Copyright (C) 2002-2011 Sentelic Corporation. -Last update: Dec-07-2011 - -============================================================================== -* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons) -============================================================================== -A) MSID 4: Scrolling wheel mode plus Forward page(4th button) and Backward - page (5th button) -@1. Set sample rate to 200; -@2. Set sample rate to 200; -@3. Set sample rate to 80; -@4. Issuing the "Get device ID" command (0xF2) and waits for the response; -@5. FSP will respond 0x04. - -Packet 1 - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|W|W|W|W| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7 => Y overflow - Bit6 => X overflow - Bit5 => Y sign bit - Bit4 => X sign bit - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X Movement(9-bit 2's complement integers) -Byte 3: Y Movement(9-bit 2's complement integers) -Byte 4: Bit3~Bit0 => the scrolling wheel's movement since the last data report. - valid values, -8 ~ +7 - Bit4 => 1 = 4th mouse button is pressed, Forward one page. - 0 = 4th mouse button is not pressed. - Bit5 => 1 = 5th mouse button is pressed, Backward one page. - 0 = 5th mouse button is not pressed. - -B) MSID 6: Horizontal and Vertical scrolling. -@ Set bit 1 in register 0x40 to 1 - -# FSP replaces scrolling wheel's movement as 4 bits to show horizontal and - vertical scrolling. - -Packet 1 - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|r|l|u|d| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7 => Y overflow - Bit6 => X overflow - Bit5 => Y sign bit - Bit4 => X sign bit - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X Movement(9-bit 2's complement integers) -Byte 3: Y Movement(9-bit 2's complement integers) -Byte 4: Bit0 => the Vertical scrolling movement downward. - Bit1 => the Vertical scrolling movement upward. - Bit2 => the Horizontal scrolling movement leftward. - Bit3 => the Horizontal scrolling movement rightward. - Bit4 => 1 = 4th mouse button is pressed, Forward one page. - 0 = 4th mouse button is not pressed. - Bit5 => 1 = 5th mouse button is pressed, Backward one page. - 0 = 5th mouse button is not pressed. - -C) MSID 7: -# FSP uses 2 packets (8 Bytes) to represent Absolute Position. - so we have PACKET NUMBER to identify packets. - If PACKET NUMBER is 0, the packet is Packet 1. - If PACKET NUMBER is 1, the packet is Packet 2. - Please count this number in program. - -# MSID6 special packet will be enable at the same time when enable MSID 7. - -============================================================================== -* Absolute position for STL3886-G0. -============================================================================== -@ Set bit 2 or 3 in register 0x40 to 1 -@ Set bit 6 in register 0x40 to 1 - -Packet 1 (ABSOLUTE POSITION) - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|V|1|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|d|u|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordination packet - => 10, Notify packet - Bit5 => valid bit - Bit4 => 1 - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit4 => scroll up - Bit5 => scroll down - Bit6 => scroll left - Bit7 => scroll right - -Notify Packet for G0 - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |1|0|0|1|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |M|M|M|M|M|M|M|M| 4 |0|0|0|0|0|0|0|0| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordination packet - => 10, Notify packet - Bit5 => 0 - Bit4 => 1 - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: Message Type => 0x5A (Enable/Disable status packet) - Mode Type => 0xA5 (Normal/Icon mode status) -Byte 3: Message Type => 0x00 (Disabled) - => 0x01 (Enabled) - Mode Type => 0x00 (Normal) - => 0x01 (Icon) -Byte 4: Bit7~Bit0 => Don't Care - -============================================================================== -* Absolute position for STL3888-Ax. -============================================================================== -Packet 1 (ABSOLUTE POSITION) - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|V|A|1|L|0|1| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordination packet - => 10, Notify packet - => 11, Normal data packet with on-pad click - Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. - When both fingers are up, the last two reports have zero valid - bit. - Bit4 => arc - Bit3 => 1 - Bit2 => Left Button, 1 is pressed, 0 is released. - Bit1 => 0 - Bit0 => 1 -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit5~Bit4 => y1_g - Bit7~Bit6 => x1_g - -Packet 2 (ABSOLUTE POSITION) - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|V|A|1|R|1|0| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordinates packet - => 10, Notify packet - => 11, Normal data packet with on-pad click - Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. - When both fingers are up, the last two reports have zero valid - bit. - Bit4 => arc - Bit3 => 1 - Bit2 => Right Button, 1 is pressed, 0 is released. - Bit1 => 1 - Bit0 => 0 -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit5~Bit4 => y2_g - Bit7~Bit6 => x2_g - -Notify Packet for STL3888-Ax - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|d|u|0|0|0|0| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordinates packet - => 10, Notify packet - => 11, Normal data packet with on-pad click - Bit5 => 1 - Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): - 0: left button is generated by the on-pad command - 1: left button is generated by the external button - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode) -Byte 3: Bit7~Bit6 => Don't care - Bit5~Bit4 => Number of fingers - Bit3~Bit1 => Reserved - Bit0 => 1: enter gesture mode; 0: leaving gesture mode -Byte 4: Bit7 => scroll right button - Bit6 => scroll left button - Bit5 => scroll down button - Bit4 => scroll up button - * Note that if gesture and additional button (Bit4~Bit7) - happen at the same time, the button information will not - be sent. - Bit3~Bit0 => Reserved - -Sample sequence of Multi-finger, Multi-coordinate mode: - - notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1, - abs pkt 2, ..., notify packet (valid bit == 0) - -============================================================================== -* Absolute position for STL3888-B0. -============================================================================== -Packet 1(ABSOLUTE POSITION) - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|V|F|1|0|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|u|d|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordinates packet - => 10, Notify packet - => 11, Normal data packet with on-pad click - Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. - When both fingers are up, the last two reports have zero valid - bit. - Bit4 => finger up/down information. 1: finger down, 0: finger up. - Bit3 => 1 - Bit2 => finger index, 0 is the first finger, 1 is the second finger. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit4 => scroll down button - Bit5 => scroll up button - Bit6 => scroll left button - Bit7 => scroll right button - -Packet 2 (ABSOLUTE POSITION) - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|V|F|1|1|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|u|d|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordination packet - => 10, Notify packet - => 11, Normal data packet with on-pad click - Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. - When both fingers are up, the last two reports have zero valid - bit. - Bit4 => finger up/down information. 1: finger down, 0: finger up. - Bit3 => 1 - Bit2 => finger index, 0 is the first finger, 1 is the second finger. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit4 => scroll down button - Bit5 => scroll up button - Bit6 => scroll left button - Bit7 => scroll right button - -Notify Packet for STL3888-B0 - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordination packet - => 10, Notify packet - => 11, Normal data packet with on-pad click - Bit5 => 1 - Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): - 0: left button is generated by the on-pad command - 1: left button is generated by the external button - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode) -Byte 3: Bit7~Bit6 => Don't care - Bit5~Bit4 => Number of fingers - Bit3~Bit1 => Reserved - Bit0 => 1: enter gesture mode; 0: leaving gesture mode -Byte 4: Bit7 => scroll right button - Bit6 => scroll left button - Bit5 => scroll up button - Bit4 => scroll down button - * Note that if gesture and additional button(Bit4~Bit7) - happen at the same time, the button information will not - be sent. - Bit3~Bit0 => Reserved - -Sample sequence of Multi-finger, Multi-coordinate mode: - - notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1, - abs pkt 2, ..., notify packet (valid bit == 0) - -============================================================================== -* Absolute position for STL3888-Cx and STL3888-Dx. -============================================================================== -Single Finger, Absolute Coordinate Mode (SFAC) - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|0|P|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordinates packet - => 10, Notify packet - Bit5 => Coordinate mode(always 0 in SFAC mode): - 0: single-finger absolute coordinates (SFAC) mode - 1: multi-finger, multiple coordinates (MFMC) mode - Bit4 => 0: The LEFT button is generated by on-pad command (OPC) - 1: The LEFT button is generated by external button - Default is 1 even if the LEFT button is not pressed. - Bit3 => Always 1, as specified by PS/2 protocol. - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit4 => 4th mouse button(forward one page) - Bit5 => 5th mouse button(backward one page) - Bit6 => scroll left button - Bit7 => scroll right button - -Multi Finger, Multiple Coordinates Mode (MFMC): - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |0|1|1|P|1|F|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordination packet - => 10, Notify packet - Bit5 => Coordinate mode (always 1 in MFMC mode): - 0: single-finger absolute coordinates (SFAC) mode - 1: multi-finger, multiple coordinates (MFMC) mode - Bit4 => 0: The LEFT button is generated by on-pad command (OPC) - 1: The LEFT button is generated by external button - Default is 1 even if the LEFT button is not pressed. - Bit3 => Always 1, as specified by PS/2 protocol. - Bit2 => Finger index, 0 is the first finger, 1 is the second finger. - If bit 1 and 0 are all 1 and bit 4 is 0, the middle external - button is pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: X coordinate (xpos[9:2]) -Byte 3: Y coordinate (ypos[9:2]) -Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) - Bit3~Bit2 => X coordinate (ypos[1:0]) - Bit4 => 4th mouse button(forward one page) - Bit5 => 5th mouse button(backward one page) - Bit6 => scroll left button - Bit7 => scroll right button - - When one of the two fingers is up, the device will output four consecutive -MFMC#0 report packets with zero X and Y to represent 1st finger is up or -four consecutive MFMC#1 report packets with zero X and Y to represent that -the 2nd finger is up. On the other hand, if both fingers are up, the device -will output four consecutive single-finger, absolute coordinate(SFAC) packets -with zero X and Y. - -Notify Packet for STL3888-Cx/Dx - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |1|0|0|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0| - |---------------| |---------------| |---------------| |---------------| - -Byte 1: Bit7~Bit6 => 00, Normal data packet - => 01, Absolute coordinates packet - => 10, Notify packet - Bit5 => Always 0 - Bit4 => 0: The LEFT button is generated by on-pad command(OPC) - 1: The LEFT button is generated by external button - Default is 1 even if the LEFT button is not pressed. - Bit3 => 1 - Bit2 => Middle Button, 1 is pressed, 0 is not pressed. - Bit1 => Right Button, 1 is pressed, 0 is not pressed. - Bit0 => Left Button, 1 is pressed, 0 is not pressed. -Byte 2: Message type: - 0xba => gesture information - 0xc0 => one finger hold-rotating gesture -Byte 3: The first parameter for the received message: - 0xba => gesture ID (refer to the 'Gesture ID' section) - 0xc0 => region ID -Byte 4: The second parameter for the received message: - 0xba => N/A - 0xc0 => finger up/down information - -Sample sequence of Multi-finger, Multi-coordinates mode: - - notify packet (valid bit == 1), MFMC packet 1 (byte 1, bit 2 == 0), - MFMC packet 2 (byte 1, bit 2 == 1), MFMC packet 1, MFMC packet 2, - ..., notify packet (valid bit == 0) - - That is, when the device is in MFMC mode, the host will receive - interleaved absolute coordinate packets for each finger. - -============================================================================== -* FSP Enable/Disable packet -============================================================================== - Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 -BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| - 1 |Y|X|0|0|1|M|R|L| 2 |0|1|0|1|1|0|1|E| 3 | | | | | | | | | 4 | | | | | | | | | - |---------------| |---------------| |---------------| |---------------| - -FSP will send out enable/disable packet when FSP receive PS/2 enable/disable -command. Host will receive the packet which Middle, Right, Left button will -be set. The packet only use byte 0 and byte 1 as a pattern of original packet. -Ignore the other bytes of the packet. - -Byte 1: Bit7 => 0, Y overflow - Bit6 => 0, X overflow - Bit5 => 0, Y sign bit - Bit4 => 0, X sign bit - Bit3 => 1 - Bit2 => 1, Middle Button - Bit1 => 1, Right Button - Bit0 => 1, Left Button -Byte 2: Bit7~1 => (0101101b) - Bit0 => 1 = Enable - 0 = Disable -Byte 3: Don't care -Byte 4: Don't care (MOUSE ID 3, 4) -Byte 5~8: Don't care (Absolute packet) - -============================================================================== -* PS/2 Command Set -============================================================================== - -FSP supports basic PS/2 commanding set and modes, refer to following URL for -details about PS/2 commands: - -http://www.computer-engineering.org/ps2mouse/ - -============================================================================== -* Programming Sequence for Determining Packet Parsing Flow -============================================================================== -1. Identify FSP by reading device ID(0x00) and version(0x01) register - -2a. For FSP version < STL3888 Cx, determine number of buttons by reading - the 'test mode status' (0x20) register: - - buttons = reg[0x20] & 0x30 - - if buttons == 0x30 or buttons == 0x20: - # two/four buttons - Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' - section A for packet parsing detail(ignore byte 4, bit ~ 7) - elif buttons == 0x10: - # 6 buttons - Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' - section B for packet parsing detail - elif buttons == 0x00: - # 6 buttons - Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' - section A for packet parsing detail - -2b. For FSP version >= STL3888 Cx: - Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' - section A for packet parsing detail (ignore byte 4, bit ~ 7) - -============================================================================== -* Programming Sequence for Register Reading/Writing -============================================================================== - -Register inversion requirement: - - Following values needed to be inverted(the '~' operator in C) before being -sent to FSP: - - 0xe8, 0xe9, 0xee, 0xf2, 0xf3 and 0xff. - -Register swapping requirement: - - Following values needed to have their higher 4 bits and lower 4 bits being -swapped before being sent to FSP: - - 10, 20, 40, 60, 80, 100 and 200. - -Register reading sequence: - - 1. send 0xf3 PS/2 command to FSP; - - 2. send 0x66 PS/2 command to FSP; - - 3. send 0x88 PS/2 command to FSP; - - 4. send 0xf3 PS/2 command to FSP; - - 5. if the register address being to read is not required to be - inverted(refer to the 'Register inversion requirement' section), - goto step 6 - - 5a. send 0x68 PS/2 command to FSP; - - 5b. send the inverted register address to FSP and goto step 8; - - 6. if the register address being to read is not required to be - swapped(refer to the 'Register swapping requirement' section), - goto step 7 - - 6a. send 0xcc PS/2 command to FSP; - - 6b. send the swapped register address to FSP and goto step 8; - - 7. send 0x66 PS/2 command to FSP; - - 7a. send the original register address to FSP and goto step 8; - - 8. send 0xe9(status request) PS/2 command to FSP; - - 9. the 4th byte of the response read from FSP should be the - requested register value(?? indicates don't care byte): - - host: 0xe9 - 3888: 0xfa (??) (??) (val) - - * Note that since the Cx release, the hardware will return 1's - complement of the register value at the 3rd byte of status request - result: - - host: 0xe9 - 3888: 0xfa (??) (~val) (val) - -Register writing sequence: - - 1. send 0xf3 PS/2 command to FSP; - - 2. if the register address being to write is not required to be - inverted(refer to the 'Register inversion requirement' section), - goto step 3 - - 2a. send 0x74 PS/2 command to FSP; - - 2b. send the inverted register address to FSP and goto step 5; - - 3. if the register address being to write is not required to be - swapped(refer to the 'Register swapping requirement' section), - goto step 4 - - 3a. send 0x77 PS/2 command to FSP; - - 3b. send the swapped register address to FSP and goto step 5; - - 4. send 0x55 PS/2 command to FSP; - - 4a. send the register address to FSP and goto step 5; - - 5. send 0xf3 PS/2 command to FSP; - - 6. if the register value being to write is not required to be - inverted(refer to the 'Register inversion requirement' section), - goto step 7 - - 6a. send 0x47 PS/2 command to FSP; - - 6b. send the inverted register value to FSP and goto step 9; - - 7. if the register value being to write is not required to be - swapped(refer to the 'Register swapping requirement' section), - goto step 8 - - 7a. send 0x44 PS/2 command to FSP; - - 7b. send the swapped register value to FSP and goto step 9; - - 8. send 0x33 PS/2 command to FSP; - - 8a. send the register value to FSP; - - 9. the register writing sequence is completed. - - * Note that since the Cx release, the hardware will return 1's - complement of the register value at the 3rd byte of status request - result. Host can optionally send another 0xe9 (status request) PS/2 - command to FSP at the end of register writing to verify that the - register writing operation is successful (?? indicates don't care - byte): - - host: 0xe9 - 3888: 0xfa (??) (~val) (val) - -============================================================================== -* Programming Sequence for Page Register Reading/Writing -============================================================================== - - In order to overcome the limitation of maximum number of registers -supported, the hardware separates register into different groups called -'pages.' Each page is able to include up to 255 registers. - - The default page after power up is 0x82; therefore, if one has to get -access to register 0x8301, one has to use following sequence to switch -to page 0x83, then start reading/writing from/to offset 0x01 by using -the register read/write sequence described in previous section. - -Page register reading sequence: - - 1. send 0xf3 PS/2 command to FSP; - - 2. send 0x66 PS/2 command to FSP; - - 3. send 0x88 PS/2 command to FSP; - - 4. send 0xf3 PS/2 command to FSP; - - 5. send 0x83 PS/2 command to FSP; - - 6. send 0x88 PS/2 command to FSP; - - 7. send 0xe9(status request) PS/2 command to FSP; - - 8. the response read from FSP should be the requested page value. - -Page register writing sequence: - - 1. send 0xf3 PS/2 command to FSP; - - 2. send 0x38 PS/2 command to FSP; - - 3. send 0x88 PS/2 command to FSP; - - 4. send 0xf3 PS/2 command to FSP; - - 5. if the page address being written is not required to be - inverted(refer to the 'Register inversion requirement' section), - goto step 6 - - 5a. send 0x47 PS/2 command to FSP; - - 5b. send the inverted page address to FSP and goto step 9; - - 6. if the page address being written is not required to be - swapped(refer to the 'Register swapping requirement' section), - goto step 7 - - 6a. send 0x44 PS/2 command to FSP; - - 6b. send the swapped page address to FSP and goto step 9; - - 7. send 0x33 PS/2 command to FSP; - - 8. send the page address to FSP; - - 9. the page register writing sequence is completed. - -============================================================================== -* Gesture ID -============================================================================== - - Unlike other devices which sends multiple fingers' coordinates to host, -FSP processes multiple fingers' coordinates internally and convert them -into a 8 bits integer, namely 'Gesture ID.' Following is a list of -supported gesture IDs: - - ID Description - 0x86 2 finger straight up - 0x82 2 finger straight down - 0x80 2 finger straight right - 0x84 2 finger straight left - 0x8f 2 finger zoom in - 0x8b 2 finger zoom out - 0xc0 2 finger curve, counter clockwise - 0xc4 2 finger curve, clockwise - 0x2e 3 finger straight up - 0x2a 3 finger straight down - 0x28 3 finger straight right - 0x2c 3 finger straight left - 0x38 palm - -============================================================================== -* Register Listing -============================================================================== - - Registers are represented in 16 bits values. The higher 8 bits represent -the page address and the lower 8 bits represent the relative offset within -that particular page. Refer to the 'Programming Sequence for Page Register -Reading/Writing' section for instructions on how to change current page -address. - -offset width default r/w name -0x8200 bit7~bit0 0x01 RO device ID - -0x8201 bit7~bit0 RW version ID - 0xc1: STL3888 Ax - 0xd0 ~ 0xd2: STL3888 Bx - 0xe0 ~ 0xe1: STL3888 Cx - 0xe2 ~ 0xe3: STL3888 Dx - -0x8202 bit7~bit0 0x01 RO vendor ID - -0x8203 bit7~bit0 0x01 RO product ID - -0x8204 bit3~bit0 0x01 RW revision ID - -0x820b test mode status 1 - bit3 1 RO 0: rotate 180 degree - 1: no rotation - *only supported by H/W prior to Cx - -0x820f register file page control - bit2 0 RW 1: rotate 180 degree - 0: no rotation - *supported since Cx - - bit0 0 RW 1 to enable page 1 register files - *only supported by H/W prior to Cx - -0x8210 RW system control 1 - bit0 1 RW Reserved, must be 1 - bit1 0 RW Reserved, must be 0 - bit4 0 RW Reserved, must be 0 - bit5 1 RW register clock gating enable - 0: read only, 1: read/write enable - (Note that following registers does not require clock gating being - enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e - 40 41 42 43. In addition to that, this bit must be 1 when gesture - mode is enabled) - -0x8220 test mode status - bit5~bit4 RO number of buttons - 11 => 2, lbtn/rbtn - 10 => 4, lbtn/rbtn/scru/scrd - 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr - 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn - *only supported by H/W prior to Cx - -0x8231 RW on-pad command detection - bit7 0 RW on-pad command left button down tag - enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - -0x8234 RW on-pad command control 5 - bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5 - (Note that position unit is in 0.5 scanline) - *only supported by H/W prior to Cx - - bit7 0 RW on-pad tap zone enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - -0x8235 RW on-pad command control 6 - bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5 - (Note that position unit is in 0.5 scanline) - *only supported by H/W prior to Cx - -0x8236 RW on-pad command control 7 - bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5 - (Note that position unit is in 0.5 scanline) - *only supported by H/W prior to Cx - -0x8237 RW on-pad command control 8 - bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5 - (Note that position unit is in 0.5 scanline) - *only supported by H/W prior to Cx - -0x8240 RW system control 5 - bit1 0 RW FSP Intellimouse mode enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - - bit2 0 RW movement + abs. coordinate mode enable - 0: disable, 1: enable - (Note that this function has the functionality of bit 1 even when - bit 1 is not set. However, the format is different from that of bit 1. - In addition, when bit 1 and bit 2 are set at the same time, bit 2 will - override bit 1.) - *only supported by H/W prior to Cx - - bit3 0 RW abs. coordinate only mode enable - 0: disable, 1: enable - (Note that this function has the functionality of bit 1 even when - bit 1 is not set. However, the format is different from that of bit 1. - In addition, when bit 1, bit 2 and bit 3 are set at the same time, - bit 3 will override bit 1 and 2.) - *only supported by H/W prior to Cx - - bit5 0 RW auto switch enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - - bit6 0 RW G0 abs. + notify packet format enable - 0: disable, 1: enable - (Note that the absolute/relative coordinate output still depends on - bit 2 and 3. That is, if any of those bit is 1, host will receive - absolute coordinates; otherwise, host only receives packets with - relative coordinate.) - *only supported by H/W prior to Cx - - bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd - finger packet enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - -0x8243 RW on-pad control - bit0 0 RW on-pad control enable - 0: disable, 1: enable - (Note that if this bit is cleared, bit 3/5 will be ineffective) - *only supported by H/W prior to Cx - - bit3 0 RW on-pad fix vertical scrolling enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - - bit5 0 RW on-pad fix horizontal scrolling enable - 0: disable, 1: enable - *only supported by H/W prior to Cx - -0x8290 RW software control register 1 - bit0 0 RW absolute coordination mode - 0: disable, 1: enable - *supported since Cx - - bit1 0 RW gesture ID output - 0: disable, 1: enable - *supported since Cx - - bit2 0 RW two fingers' coordinates output - 0: disable, 1: enable - *supported since Cx - - bit3 0 RW finger up one packet output - 0: disable, 1: enable - *supported since Cx - - bit4 0 RW absolute coordination continuous mode - 0: disable, 1: enable - *supported since Cx - - bit6~bit5 00 RW gesture group selection - 00: basic - 01: suite - 10: suite pro - 11: advanced - *supported since Cx - - bit7 0 RW Bx packet output compatible mode - 0: disable, 1: enable *supported since Cx - *supported since Cx - - -0x833d RW on-pad command control 1 - bit7 1 RW on-pad command detection enable - 0: disable, 1: enable - *supported since Cx - -0x833e RW on-pad command detection - bit7 0 RW on-pad command left button down tag - enable. Works only in H/W based PS/2 - data packet mode. - 0: disable, 1: enable - *supported since Cx diff --git a/Documentation/input/shape.fig b/Documentation/input/shape.fig deleted file mode 100644 index c22bff83d06f74a116eb10020f1329ef814dd4ca..0000000000000000000000000000000000000000 --- a/Documentation/input/shape.fig +++ /dev/null @@ -1,65 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 6 - 4200 3600 4200 3075 4950 2325 7425 2325 8250 3150 8250 3600 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 4200 3675 4200 5400 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 8250 3675 8250 5400 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 3675 3600 8700 3600 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 8775 3600 10200 3600 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 8325 3150 9075 3150 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 7500 2325 10200 2325 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 3600 3600 3000 3600 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 4125 3075 3000 3075 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 4200 5400 8175 5400 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 10125 2325 10125 3600 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 3000 3150 3000 3600 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 9075 3150 9075 3600 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 4950 2325 4950 1200 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2 - 7425 2325 7425 1200 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4 - 4200 3075 4200 2400 3600 1800 3600 1200 -2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4 - 8250 3150 8250 2475 8775 1950 8775 1200 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 3600 1275 4950 1275 -2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 7425 1275 8700 1275 -4 1 0 50 0 0 12 0.0000 4 135 1140 6075 5325 Effect duration\001 -4 0 0 50 0 0 12 0.0000 4 180 1305 10200 3000 Effect magnitude\001 -4 0 0 50 0 0 12 0.0000 4 135 780 9150 3450 Fade level\001 -4 1 0 50 0 0 12 0.0000 4 180 1035 4275 1200 Attack length\001 -4 1 0 50 0 0 12 0.0000 4 180 885 8175 1200 Fade length\001 -4 2 0 50 0 0 12 0.0000 4 135 930 2925 3375 Attack level\001 diff --git a/Documentation/input/shape.svg b/Documentation/input/shape.svg new file mode 100644 index 0000000000000000000000000000000000000000..fc0af44db3f79d3328434bf60785d62bb4f90a50 --- /dev/null +++ b/Documentation/input/shape.svg @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Effect duration + Effect magnitude + Fade level + Attack length + Fade length + Attack level + diff --git a/Documentation/input/uinput.rst b/Documentation/input/uinput.rst new file mode 100644 index 0000000000000000000000000000000000000000..b8e90b6a126cdb5e58c8feb81407bc4121df0b80 --- /dev/null +++ b/Documentation/input/uinput.rst @@ -0,0 +1,245 @@ +============= +uinput module +============= + +Introduction +============ + +uinput is a kernel module that makes it possible to emulate input devices +from userspace. By writing to /dev/uinput (or /dev/input/uinput) device, a +process can create a virtual input device with specific capabilities. Once +this virtual device is created, the process can send events through it, +that will be delivered to userspace and in-kernel consumers. + +Interface +========= + +:: + + linux/uinput.h + +The uinput header defines ioctls to create, set up, and destroy virtual +devices. + +libevdev +======== + +libevdev is a wrapper library for evdev devices that provides interfaces to +create uinput devices and send events. libevdev is less error-prone than +accessing uinput directly, and should be considered for new software. + +For examples and more information about libevdev: +https://www.freedesktop.org/software/libevdev/doc/latest/ + +Examples +======== + +Keyboard events +--------------- + +This first example shows how to create a new virtual device, and how to +send a key event. All default imports and error handlers were removed for +the sake of simplicity. + +.. code-block:: c + + #include + + void emit(int fd, int type, int code, int val) + { + struct input_event ie; + + ie.type = type; + ie.code = code; + ie.value = val; + /* timestamp values below are ignored */ + ie.time.tv_sec = 0; + ie.time.tv_usec = 0; + + write(fd, &ie, sizeof(ie)); + } + + int main(void) + { + struct uinput_setup usetup; + + int fd = open("/dev/uinput", O_WRONLY | O_NONBLOCK); + + + /* + * The ioctls below will enable the device that is about to be + * created, to pass key events, in this case the space key. + */ + ioctl(fd, UI_SET_EVBIT, EV_KEY); + ioctl(fd, UI_SET_KEYBIT, KEY_SPACE); + + memset(&usetup, 0, sizeof(usetup)); + usetup.id.bustype = BUS_USB; + usetup.id.vendor = 0x1234; /* sample vendor */ + usetup.id.product = 0x5678; /* sample product */ + strcpy(usetup.name, "Example device"); + + ioctl(fd, UI_DEV_SETUP, &usetup); + ioctl(fd, UI_DEV_CREATE); + + /* + * On UI_DEV_CREATE the kernel will create the device node for this + * device. We are inserting a pause here so that userspace has time + * to detect, initialize the new device, and can start listening to + * the event, otherwise it will not notice the event we are about + * to send. This pause is only needed in our example code! + */ + sleep(1); + + /* Key press, report the event, send key release, and report again */ + emit(fd, EV_KEY, KEY_SPACE, 1); + emit(fd, EV_SYN, SYN_REPORT, 0); + emit(fd, EV_KEY, KEY_SPACE, 0); + emit(fd, EV_SYN, SYN_REPORT, 0); + + /* + * Give userspace some time to read the events before we destroy the + * device with UI_DEV_DESTOY. + */ + sleep(1); + + ioctl(fd, UI_DEV_DESTROY); + close(fd); + + return 0; + } + +Mouse movements +--------------- + +This example shows how to create a virtual device that behaves like a physical +mouse. + +.. code-block:: c + + #include + + /* emit function is identical to of the first example */ + + int main(void) + { + struct uinput_setup usetup; + int i = 50; + + int fd = open("/dev/uinput", O_WRONLY | O_NONBLOCK); + + /* enable mouse button left and relative events */ + ioctl(fd, UI_SET_EVBIT, EV_KEY); + ioctl(fd, UI_SET_KEYBIT, BTN_LEFT); + + ioctl(fd, UI_SET_EVBIT, EV_REL); + ioctl(fd, UI_SET_RELBIT, REL_X); + ioctl(fd, UI_SET_RELBIT, REL_Y); + + memset(&usetup, 0, sizeof(usetup)); + usetup.id.bustype = BUS_USB; + usetup.id.vendor = 0x1234; /* sample vendor */ + usetup.id.product = 0x5678; /* sample product */ + strcpy(usetup.name, "Example device"); + + ioctl(fd, UI_DEV_SETUP, &usetup); + ioctl(fd, UI_DEV_CREATE); + + /* + * On UI_DEV_CREATE the kernel will create the device node for this + * device. We are inserting a pause here so that userspace has time + * to detect, initialize the new device, and can start listening to + * the event, otherwise it will not notice the event we are about + * to send. This pause is only needed in our example code! + */ + sleep(1); + + /* Move the mouse diagonally, 5 units per axis */ + while (i--) { + emit(fd, EV_REL, REL_X, 5); + emit(fd, EV_REL, REL_Y, 5); + emit(fd, EV_SYN, SYN_REPORT, 0); + usleep(15000); + } + + /* + * Give userspace some time to read the events before we destroy the + * device with UI_DEV_DESTOY. + */ + sleep(1); + + ioctl(fd, UI_DEV_DESTROY); + close(fd); + + return 0; + } + + +uinput old interface +-------------------- + +Before uinput version 5, there wasn't a dedicated ioctl to set up a virtual +device. Programs supportinf older versions of uinput interface need to fill +a uinput_user_dev structure and write it to the uinput file descriptor to +configure the new uinput device. New code should not use the old interface +but interact with uinput via ioctl calls, or use libevdev. + +.. code-block:: c + + #include + + /* emit function is identical to of the first example */ + + int main(void) + { + struct uinput_user_dev uud; + int version, rc, fd; + + fd = open("/dev/uinput", O_WRONLY | O_NONBLOCK); + rc = ioctl(fd, UI_GET_VERSION, &version); + + if (rc == 0 && version >= 5) { + /* use UI_DEV_SETUP */ + return 0; + } + + /* + * The ioctls below will enable the device that is about to be + * created, to pass key events, in this case the space key. + */ + ioctl(fd, UI_SET_EVBIT, EV_KEY); + ioctl(fd, UI_SET_KEYBIT, KEY_SPACE); + + memset(&uud, 0, sizeof(uud)); + snprintf(uud.name, UINPUT_MAX_NAME_SIZE, "uinput old interface"); + write(fd, &uud, sizeof(uud)); + + ioctl(fd, UI_DEV_CREATE); + + /* + * On UI_DEV_CREATE the kernel will create the device node for this + * device. We are inserting a pause here so that userspace has time + * to detect, initialize the new device, and can start listening to + * the event, otherwise it will not notice the event we are about + * to send. This pause is only needed in our example code! + */ + sleep(1); + + /* Key press, report the event, send key release, and report again */ + emit(fd, EV_KEY, KEY_SPACE, 1); + emit(fd, EV_SYN, SYN_REPORT, 0); + emit(fd, EV_KEY, KEY_SPACE, 0); + emit(fd, EV_SYN, SYN_REPORT, 0); + + /* + * Give userspace some time to read the events before we destroy the + * device with UI_DEV_DESTOY. + */ + sleep(1); + + ioctl(fd, UI_DEV_DESTROY); + + close(fd); + return 0; + } + diff --git a/Documentation/input/userio.rst b/Documentation/input/userio.rst new file mode 100644 index 0000000000000000000000000000000000000000..f780c77931fe086dcc5f21648e0aac56566dec63 --- /dev/null +++ b/Documentation/input/userio.rst @@ -0,0 +1,85 @@ +.. include:: + +=================== +The userio Protocol +=================== + + +:Copyright: |copy| 2015 Stephen Chandler Paul + +Sponsored by Red Hat + + +Introduction +============= + +This module is intended to try to make the lives of input driver developers +easier by allowing them to test various serio devices (mainly the various +touchpads found on laptops) without having to have the physical device in front +of them. userio accomplishes this by allowing any privileged userspace program +to directly interact with the kernel's serio driver and control a virtual serio +port from there. + +Usage overview +============== + +In order to interact with the userio kernel module, one simply opens the +/dev/userio character device in their applications. Commands are sent to the +kernel module by writing to the device, and any data received from the serio +driver is read as-is from the /dev/userio device. All of the structures and +macros you need to interact with the device are defined in and +. + +Command Structure +================= + +The struct used for sending commands to /dev/userio is as follows:: + + struct userio_cmd { + __u8 type; + __u8 data; + }; + +``type`` describes the type of command that is being sent. This can be any one +of the USERIO_CMD macros defined in . ``data`` is the argument +that goes along with the command. In the event that the command doesn't have an +argument, this field can be left untouched and will be ignored by the kernel. +Each command should be sent by writing the struct directly to the character +device. In the event that the command you send is invalid, an error will be +returned by the character device and a more descriptive error will be printed +to the kernel log. Only one command can be sent at a time, any additional data +written to the character device after the initial command will be ignored. + +To close the virtual serio port, just close /dev/userio. + +Commands +======== + +USERIO_CMD_REGISTER +~~~~~~~~~~~~~~~~~~~ + +Registers the port with the serio driver and begins transmitting data back and +forth. Registration can only be performed once a port type is set with +USERIO_CMD_SET_PORT_TYPE. Has no argument. + +USERIO_CMD_SET_PORT_TYPE +~~~~~~~~~~~~~~~~~~~~~~~~ + +Sets the type of port we're emulating, where ``data`` is the port type being +set. Can be any of the macros from . For example: SERIO_8042 +would set the port type to be a normal PS/2 port. + +USERIO_CMD_SEND_INTERRUPT +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sends an interrupt through the virtual serio port to the serio driver, where +``data`` is the interrupt data being sent. + +Userspace tools +=============== + +The userio userspace tools are able to record PS/2 devices using some of the +debugging information from i8042, and play back the devices on /dev/userio. The +latest version of these tools can be found at: + + https://github.com/Lyude/ps2emu diff --git a/Documentation/input/userio.txt b/Documentation/input/userio.txt deleted file mode 100644 index 0880c0f447a6cee8158432fff7bb6c30c6af8d8c..0000000000000000000000000000000000000000 --- a/Documentation/input/userio.txt +++ /dev/null @@ -1,70 +0,0 @@ - The userio Protocol - (c) 2015 Stephen Chandler Paul - Sponsored by Red Hat --------------------------------------------------------------------------------- - -1. Introduction -~~~~~~~~~~~~~~~ - This module is intended to try to make the lives of input driver developers -easier by allowing them to test various serio devices (mainly the various -touchpads found on laptops) without having to have the physical device in front -of them. userio accomplishes this by allowing any privileged userspace program -to directly interact with the kernel's serio driver and control a virtual serio -port from there. - -2. Usage overview -~~~~~~~~~~~~~~~~~ - In order to interact with the userio kernel module, one simply opens the -/dev/userio character device in their applications. Commands are sent to the -kernel module by writing to the device, and any data received from the serio -driver is read as-is from the /dev/userio device. All of the structures and -macros you need to interact with the device are defined in and -. - -3. Command Structure -~~~~~~~~~~~~~~~~~~~~ - The struct used for sending commands to /dev/userio is as follows: - - struct userio_cmd { - __u8 type; - __u8 data; - }; - - "type" describes the type of command that is being sent. This can be any one -of the USERIO_CMD macros defined in . "data" is the argument -that goes along with the command. In the event that the command doesn't have an -argument, this field can be left untouched and will be ignored by the kernel. -Each command should be sent by writing the struct directly to the character -device. In the event that the command you send is invalid, an error will be -returned by the character device and a more descriptive error will be printed -to the kernel log. Only one command can be sent at a time, any additional data -written to the character device after the initial command will be ignored. - To close the virtual serio port, just close /dev/userio. - -4. Commands -~~~~~~~~~~~ - -4.1 USERIO_CMD_REGISTER -~~~~~~~~~~~~~~~~~~~~~~~ - Registers the port with the serio driver and begins transmitting data back and -forth. Registration can only be performed once a port type is set with -USERIO_CMD_SET_PORT_TYPE. Has no argument. - -4.2 USERIO_CMD_SET_PORT_TYPE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Sets the type of port we're emulating, where "data" is the port type being -set. Can be any of the macros from . For example: SERIO_8042 -would set the port type to be a normal PS/2 port. - -4.3 USERIO_CMD_SEND_INTERRUPT -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Sends an interrupt through the virtual serio port to the serio driver, where -"data" is the interrupt data being sent. - -5. Userspace tools -~~~~~~~~~~~~~~~~~~ - The userio userspace tools are able to record PS/2 devices using some of the -debugging information from i8042, and play back the devices on /dev/userio. The -latest version of these tools can be found at: - - https://github.com/Lyude/ps2emu diff --git a/Documentation/input/walkera0701.txt b/Documentation/input/walkera0701.txt deleted file mode 100644 index 49e3ac60dcef57d045720f5d9f96fc2b522a0f62..0000000000000000000000000000000000000000 --- a/Documentation/input/walkera0701.txt +++ /dev/null @@ -1,109 +0,0 @@ - -Walkera WK-0701 transmitter is supplied with a ready to fly Walkera -helicopters such as HM36, HM37, HM60. The walkera0701 module enables to use -this transmitter as joystick - -Devel homepage and download: -http://zub.fei.tuke.sk/walkera-wk0701/ - -or use cogito: -cg-clone http://zub.fei.tuke.sk/GIT/walkera0701-joystick - - -Connecting to PC: - -At back side of transmitter S-video connector can be found. Modulation -pulses from processor to HF part can be found at pin 2 of this connector, -pin 3 is GND. Between pin 3 and CPU 5k6 resistor can be found. To get -modulation pulses to PC, signal pulses must be amplified. - -Cable: (walkera TX to parport) - -Walkera WK-0701 TX S-VIDEO connector: - (back side of TX) - __ __ S-video: canon25 - / |_| \ pin 2 (signal) NPN parport - / O 4 3 O \ pin 3 (GND) LED ________________ 10 ACK - ( O 2 1 O ) | C - \ ___ / 2 ________________________|\|_____|/ - | [___] | |/| B |\ - ------- 3 __________________________________|________________ 25 GND - E - - -I use green LED and BC109 NPN transistor. - -Software: - -Build kernel with walkera0701 module. Module walkera0701 need exclusive -access to parport, modules like lp must be unloaded before loading -walkera0701 module, check dmesg for error messages. Connect TX to PC by -cable and run jstest /dev/input/js0 to see values from TX. If no value can -be changed by TX "joystick", check output from /proc/interrupts. Value for -(usually irq7) parport must increase if TX is on. - - - -Technical details: - -Driver use interrupt from parport ACK input bit to measure pulse length -using hrtimers. - -Frame format: -Based on walkera WK-0701 PCM Format description by Shaul Eizikovich. -(downloaded from http://www.smartpropoplus.com/Docs/Walkera_Wk-0701_PCM.pdf) - -Signal pulses: - (ANALOG) - SYNC BIN OCT - +---------+ +------+ - | | | | ---+ +------+ +--- - -Frame: - SYNC , BIN1, OCT1, BIN2, OCT2 ... BIN24, OCT24, BIN25, next frame SYNC .. - -pulse length: - Binary values: Analog octal values: - - 288 uS Binary 0 318 uS 000 - 438 uS Binary 1 398 uS 001 - 478 uS 010 - 558 uS 011 - 638 uS 100 - 1306 uS SYNC 718 uS 101 - 798 uS 110 - 878 uS 111 - -24 bin+oct values + 1 bin value = 24*4+1 bits = 97 bits - -(Warning, pulses on ACK are inverted by transistor, irq is raised up on sync -to bin change or octal value to bin change). - -Binary data representations: - -One binary and octal value can be grouped to nibble. 24 nibbles + one binary -values can be sampled between sync pulses. - -Values for first four channels (analog joystick values) can be found in -first 10 nibbles. Analog value is represented by one sign bit and 9 bit -absolute binary value. (10 bits per channel). Next nibble is checksum for -first ten nibbles. - -Next nibbles 12 .. 21 represents four channels (not all channels can be -directly controlled from TX). Binary representations are the same as in first -four channels. In nibbles 22 and 23 is a special magic number. Nibble 24 is -checksum for nibbles 12..23. - -After last octal value for nibble 24 and next sync pulse one additional -binary value can be sampled. This bit and magic number is not used in -software driver. Some details about this magic numbers can be found in -Walkera_Wk-0701_PCM.pdf. - -Checksum calculation: - -Summary of octal values in nibbles must be same as octal value in checksum -nibble (only first 3 bits are used). Binary value for checksum nibble is -calculated by sum of binary values in checked nibbles + sum of octal values -in checked nibbles divided by 8. Only bit 0 of this sum is used. - diff --git a/Documentation/input/xpad.txt b/Documentation/input/xpad.txt deleted file mode 100644 index d1b23f295db4ef989008fa9ced872f33838f03ed..0000000000000000000000000000000000000000 --- a/Documentation/input/xpad.txt +++ /dev/null @@ -1,226 +0,0 @@ -xpad - Linux USB driver for Xbox compatible controllers - -This driver exposes all first-party and third-party Xbox compatible -controllers. It has a long history and has enjoyed considerable usage -as Window's xinput library caused most PC games to focus on Xbox -controller compatibility. - -Due to backwards compatibility all buttons are reported as digital. -This only effects Original Xbox controllers. All later controller models -have only digital face buttons. - -Rumble is supported on some models of Xbox 360 controllers but not of -Original Xbox controllers nor on Xbox One controllers. As of writing -the Xbox One's rumble protocol has not been reverse engineered but in -the future could be supported. - - -0. Notes --------- -The number of buttons/axes reported varies based on 3 things: -- if you are using a known controller -- if you are using a known dance pad -- if using an unknown device (one not listed below), what you set in the - module configuration for "Map D-PAD to buttons rather than axes for unknown - pads" (module option dpad_to_buttons) - -If you set dpad_to_buttons to N and you are using an unknown device -the driver will map the directional pad to axes (X/Y). -If you said Y it will map the d-pad to buttons, which is needed for dance -style games to function correctly. The default is Y. - -dpad_to_buttons has no effect for known pads. A erroneous commit message -claimed dpad_to_buttons could be used to force behavior on known devices. -This is not true. Both dpad_to_buttons and triggers_to_buttons only affect -unknown controllers. - - -0.1 Normal Controllers ----------------------- -With a normal controller, the directional pad is mapped to its own X/Y axes. -The jstest-program from joystick-1.2.15 (jstest-version 2.1.0) will report 8 -axes and 10 buttons. - -All 8 axes work, though they all have the same range (-32768..32767) -and the zero-setting is not correct for the triggers (I don't know if that -is some limitation of jstest, since the input device setup should be fine. I -didn't have a look at jstest itself yet). - -All of the 10 buttons work (in digital mode). The six buttons on the -right side (A, B, X, Y, black, white) are said to be "analog" and -report their values as 8 bit unsigned, not sure what this is good for. - -I tested the controller with quake3, and configuration and -in game functionality were OK. However, I find it rather difficult to -play first person shooters with a pad. Your mileage may vary. - - -0.2 Xbox Dance Pads -------------------- -When using a known dance pad, jstest will report 6 axes and 14 buttons. - -For dance style pads (like the redoctane pad) several changes -have been made. The old driver would map the d-pad to axes, resulting -in the driver being unable to report when the user was pressing both -left+right or up+down, making DDR style games unplayable. - -Known dance pads automatically map the d-pad to buttons and will work -correctly out of the box. - -If your dance pad is recognized by the driver but is using axes instead -of buttons, see section 0.3 - Unknown Controllers - -I've tested this with Stepmania, and it works quite well. - - -0.3 Unknown Controllers ----------------------- -If you have an unknown xbox controller, it should work just fine with -the default settings. - -HOWEVER if you have an unknown dance pad not listed below, it will not -work UNLESS you set "dpad_to_buttons" to 1 in the module configuration. - -PLEASE, if you have an unknown controller, email Dom with -a dump from /proc/bus/usb and a description of the pad (manufacturer, country, -whether it is a dance pad or normal controller) so that we can add your pad -to the list of supported devices, ensuring that it will work out of the -box in the future. - - -1. USB adapters --------------- -All generations of Xbox controllers speak USB over the wire. -- Original Xbox controllers use a proprietary connector and require adapters. -- Wireless Xbox 360 controllers require a 'Xbox 360 Wireless Gaming Receiver - for Windows' -- Wired Xbox 360 controllers use standard USB connectors. -- Xbox One controllers can be wireless but speak Wi-Fi Direct and are not - yet supported. -- Xbox One controllers can be wired and use standard Micro-USB connectors. - - - -1.1 Original Xbox USB adapters --------------- -Using this driver with an Original Xbox controller requires an -adapter cable to break out the proprietary connector's pins to USB. -You can buy these online fairly cheap, or build your own. - -Such a cable is pretty easy to build. The Controller itself is a USB -compound device (a hub with three ports for two expansion slots and -the controller device) with the only difference in a nonstandard connector -(5 pins vs. 4 on standard USB 1.0 connectors). - -You just need to solder a USB connector onto the cable and keep the -yellow wire unconnected. The other pins have the same order on both -connectors so there is no magic to it. Detailed info on these matters -can be found on the net ([1], [2], [3]). - -Thanks to the trip splitter found on the cable you don't even need to cut the -original one. You can buy an extension cable and cut that instead. That way, -you can still use the controller with your X-Box, if you have one ;) - - - -2. Driver Installation ----------------------- - -Once you have the adapter cable, if needed, and the controller connected -the xpad module should be auto loaded. To confirm you can cat -/proc/bus/usb/devices. There should be an entry like the one at the end [4]. - - - -3. Supported Controllers ------------------------- -For a full list of supported controllers and associated vendor and product -IDs see the xpad_device[] array[6]. - -As of the historic version 0.0.6 (2006-10-10) the following devices -were supported: - original Microsoft XBOX controller (US), vendor=0x045e, product=0x0202 - smaller Microsoft XBOX controller (US), vendor=0x045e, product=0x0289 - original Microsoft XBOX controller (Japan), vendor=0x045e, product=0x0285 - InterAct PowerPad Pro (Germany), vendor=0x05fd, product=0x107a - RedOctane Xbox Dance Pad (US), vendor=0x0c12, product=0x8809 - -Unrecognized models of Xbox controllers should function as Generic -Xbox controllers. Unrecognized Dance Pad controllers require setting -the module option 'dpad_to_buttons'. - -If you have an unrecognized controller please see 0.3 - Unknown Controllers - - -4. Manual Testing ------------------ -To test this driver's functionality you may use 'jstest'. - -For example: -> modprobe xpad -> modprobe joydev -> jstest /dev/js0 - -If you're using a normal controller, there should be a single line showing -18 inputs (8 axes, 10 buttons), and its values should change if you move -the sticks and push the buttons. If you're using a dance pad, it should -show 20 inputs (6 axes, 14 buttons). - -It works? Voila, you're done ;) - - - -5. Thanks ---------- - -I have to thank ITO Takayuki for the detailed info on his site - http://euc.jp/periphs/xbox-controller.ja.html. - -His useful info and both the usb-skeleton as well as the iforce input driver -(Greg Kroah-Hartmann; Vojtech Pavlik) helped a lot in rapid prototyping -the basic functionality. - - - -6. References -------------- - -[1]: http://euc.jp/periphs/xbox-controller.ja.html (ITO Takayuki) -[2]: http://xpad.xbox-scene.com/ -[3]: http://www.markosweb.com/www/xboxhackz.com/ -[4]: /proc/bus/usb/devices - dump from InterAct PowerPad Pro (Germany): - -T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 5 Spd=12 MxCh= 0 -D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=32 #Cfgs= 1 -P: Vendor=05fd ProdID=107a Rev= 1.00 -C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA -I: If#= 0 Alt= 0 #EPs= 2 Cls=58(unk. ) Sub=42 Prot=00 Driver=(none) -E: Ad=81(I) Atr=03(Int.) MxPS= 32 Ivl= 10ms -E: Ad=02(O) Atr=03(Int.) MxPS= 32 Ivl= 10ms - -[5]: /proc/bus/usb/devices - dump from Redoctane Xbox Dance Pad (US): - -T: Bus=01 Lev=02 Prnt=09 Port=00 Cnt=01 Dev#= 10 Spd=12 MxCh= 0 -D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 -P: Vendor=0c12 ProdID=8809 Rev= 0.01 -S: Product=XBOX DDR -C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA -I: If#= 0 Alt= 0 #EPs= 2 Cls=58(unk. ) Sub=42 Prot=00 Driver=xpad -E: Ad=82(I) Atr=03(Int.) MxPS= 32 Ivl=4ms -E: Ad=02(O) Atr=03(Int.) MxPS= 32 Ivl=4ms - -[6]: http://lxr.free-electrons.com/ident?i=xpad_device - - - -7. Historic Edits ------------------ -Marko Friedemann -2002-07-16 - - original doc - -Dominic Cerquetti -2005-03-19 - - added stuff for dance pads, new d-pad->axes mappings - -Later changes may be viewed with 'git log Documentation/input/xpad.txt' diff --git a/Documentation/input/yealink.txt b/Documentation/input/yealink.txt deleted file mode 100644 index 8277b76ec50649f3a35f5f5407d135fc37312a0e..0000000000000000000000000000000000000000 --- a/Documentation/input/yealink.txt +++ /dev/null @@ -1,216 +0,0 @@ -Driver documentation for yealink usb-p1k phones - -0. Status -~~~~~~~~~ -The p1k is a relatively cheap usb 1.1 phone with: - - keyboard full support, yealink.ko / input event API - - LCD full support, yealink.ko / sysfs API - - LED full support, yealink.ko / sysfs API - - dialtone full support, yealink.ko / sysfs API - - ringtone full support, yealink.ko / sysfs API - - audio playback full support, snd_usb_audio.ko / alsa API - - audio record full support, snd_usb_audio.ko / alsa API - -For vendor documentation see http://www.yealink.com - - -1. Compilation (stand alone version) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Currently only kernel 2.6.x.y versions are supported. -In order to build the yealink.ko module do - - make - -If you encounter problems please check if in the MAKE_OPTS variable in -the Makefile is pointing to the location where your kernel sources -are located, default /usr/src/linux. - - -1.1 Troubleshooting -~~~~~~~~~~~~~~~~~~~ -Q: Module yealink compiled and installed without any problem but phone - is not initialized and does not react to any actions. -A: If you see something like: - hiddev0: USB HID v1.00 Device [Yealink Network Technology Ltd. VOIP USB Phone - in dmesg, it means that the hid driver has grabbed the device first. Try to - load module yealink before any other usb hid driver. Please see the - instructions provided by your distribution on module configuration. - -Q: Phone is working now (displays version and accepts keypad input) but I can't - find the sysfs files. -A: The sysfs files are located on the particular usb endpoint. On most - distributions you can do: "find /sys/ -name get_icons" for a hint. - - -2. keyboard features -~~~~~~~~~~~~~~~~~~~~ -The current mapping in the kernel is provided by the map_p1k_to_key -function: - - Physical USB-P1K button layout input events - - - up up - IN OUT left, right - down down - - pickup C hangup enter, backspace, escape - 1 2 3 1, 2, 3 - 4 5 6 4, 5, 6, - 7 8 9 7, 8, 9, - * 0 # *, 0, #, - - The "up" and "down" keys, are symbolised by arrows on the button. - The "pickup" and "hangup" keys are symbolised by a green and red phone - on the button. - - -3. LCD features -~~~~~~~~~~~~~~~ -The LCD is divided and organised as a 3 line display: - - |[] [][] [][] [][] in |[][] - |[] M [][] D [][] : [][] out |[][] - store - - NEW REP SU MO TU WE TH FR SA - - [] [] [] [] [] [] [] [] [] [] [] [] - [] [] [] [] [] [] [] [] [] [] [] [] - - -Line 1 Format (see below) : 18.e8.M8.88...188 - Icon names : M D : IN OUT STORE -Line 2 Format : ......... - Icon name : NEW REP SU MO TU WE TH FR SA -Line 3 Format : 888888888888 - - -Format description: - From a userspace perspective the world is separated into "digits" and "icons". - A digit can have a character set, an icon can only be ON or OFF. - - Format specifier - '8' : Generic 7 segment digit with individual addressable segments - - Reduced capability 7 segment digit, when segments are hard wired together. - '1' : 2 segments digit only able to produce a 1. - 'e' : Most significant day of the month digit, - able to produce at least 1 2 3. - 'M' : Most significant minute digit, - able to produce at least 0 1 2 3 4 5. - - Icons or pictograms: - '.' : For example like AM, PM, SU, a 'dot' .. or other single segment - elements. - - -4. Driver usage -~~~~~~~~~~~~~~~ -For userland the following interfaces are available using the sysfs interface: - /sys/.../ - line1 Read/Write, lcd line1 - line2 Read/Write, lcd line2 - line3 Read/Write, lcd line3 - - get_icons Read, returns a set of available icons. - hide_icon Write, hide the element by writing the icon name. - show_icon Write, display the element by writing the icon name. - - map_seg7 Read/Write, the 7 segments char set, common for all - yealink phones. (see map_to_7segment.h) - - ringtone Write, upload binary representation of a ringtone, - see yealink.c. status EXPERIMENTAL due to potential - races between async. and sync usb calls. - - -4.1 lineX -~~~~~~~~~ -Reading /sys/../lineX will return the format string with its current value: - - Example: - cat ./line3 - 888888888888 - Linux Rocks! - -Writing to /sys/../lineX will set the corresponding LCD line. - - Excess characters are ignored. - - If less characters are written than allowed, the remaining digits are - unchanged. - - The tab '\t'and '\n' char does not overwrite the original content. - - Writing a space to an icon will always hide its content. - - Example: - date +"%m.%e.%k:%M" | sed 's/^0/ /' > ./line1 - - Will update the LCD with the current date & time. - - -4.2 get_icons -~~~~~~~~~~~~~ -Reading will return all available icon names and its current settings: - - cat ./get_icons - on M - on D - on : - IN - OUT - STORE - NEW - REP - SU - MO - TU - WE - TH - FR - SA - LED - DIALTONE - RINGTONE - - -4.3 show/hide icons -~~~~~~~~~~~~~~~~~~~ -Writing to these files will update the state of the icon. -Only one icon at a time can be updated. - -If an icon is also on a ./lineX the corresponding value is -updated with the first letter of the icon. - - Example - light up the store icon: - echo -n "STORE" > ./show_icon - - cat ./line1 - 18.e8.M8.88...188 - S - - Example - sound the ringtone for 10 seconds: - echo -n RINGTONE > /sys/..../show_icon - sleep 10 - echo -n RINGTONE > /sys/..../hide_icon - - -5. Sound features -~~~~~~~~~~~~~~~~~ -Sound is supported by the ALSA driver: snd_usb_audio - -One 16-bit channel with sample and playback rates of 8000 Hz is the practical -limit of the device. - - Example - recording test: - arecord -v -d 10 -r 8000 -f S16_LE -t wav foobar.wav - - Example - playback test: - aplay foobar.wav - - -6. Credits & Acknowledgments -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Olivier Vandorpe, for starting the usbb2k-api project doing much of - the reverse engineering. - - Martin Diehl, for pointing out how to handle USB memory allocation. - - Dmitry Torokhov, for the numerous code reviews and suggestions. - diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 08244bea504877397ecabd2732e8e0cf00e6da09..1e9fcb4d0ec83ed4eaf2330f16775769ac366aa3 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -191,6 +191,7 @@ Code Seq#(hex) Include File Comments 'W' 00-1F linux/watchdog.h conflict! 'W' 00-1F linux/wanrouter.h conflict! (pre 3.9) 'W' 00-3F sound/asound.h conflict! +'W' 40-5F drivers/pci/switch/switchtec.c 'X' all fs/xfs/xfs_fs.h conflict! and fs/xfs/linux-2.6/xfs_ioctl32.h and include/linux/falloc.h @@ -212,7 +213,7 @@ Code Seq#(hex) Include File Comments 'c' 00-1F linux/chio.h conflict! 'c' 80-9F arch/s390/include/asm/chsc.h conflict! 'c' A0-AF arch/x86/include/asm/msr.h conflict! -'d' 00-FF linux/char/drm/drm/h conflict! +'d' 00-FF linux/char/drm/drm.h conflict! 'd' 02-40 pcmcia/ds.h conflict! 'd' F0-FF linux/digi1.h 'e' all linux/digi1.h conflict! @@ -308,6 +309,7 @@ Code Seq#(hex) Include File Comments 0xA3 80-8F Port ACL in development: 0xA3 90-9F linux/dtlk.h +0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem 0xAA 00-3F linux/uapi/linux/userfaultfd.h 0xAB 00-1F linux/nbd.h 0xAC 00-1F linux/raw.h diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 9b9c4797fc55653dec668d82ae8c43ccdab58e4b..e18daca65ccd2add180e5e74f97d0cc4743677cb 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -44,11 +44,11 @@ This document describes the Linux kernel Makefiles. --- 6.11 Post-link pass === 7 Kbuild syntax for exported headers - --- 7.1 header-y + --- 7.1 no-export-headers --- 7.2 genhdr-y - --- 7.3 destination-y - --- 7.4 generic-y - --- 7.5 generated-y + --- 7.3 generic-y + --- 7.4 generated-y + --- 7.5 mandatory-y === 8 Kbuild Variables === 9 Makefile language @@ -1236,7 +1236,7 @@ When kbuild executes, the following steps are followed (roughly): that may be shared between individual architectures. The recommended approach how to use a generic header file is to list the file in the Kbuild file. - See "7.4 generic-y" for further info on syntax etc. + See "7.3 generic-y" for further info on syntax etc. --- 6.11 Post-link pass @@ -1263,53 +1263,32 @@ The pre-processing does: - drop include of compiler.h - drop all sections that are kernel internal (guarded by ifdef __KERNEL__) -Each relevant directory contains a file name "Kbuild" which specifies the -headers to be exported. -See subsequent chapter for the syntax of the Kbuild file. - - --- 7.1 header-y - - header-y specifies header files to be exported. - - Example: - #include/linux/Kbuild - header-y += usb/ - header-y += aio_abi.h +All headers under include/uapi/, include/generated/uapi/, +arch//include/uapi/ and arch//include/generated/uapi/ +are exported. - The convention is to list one file per line and - preferably in alphabetic order. +A Kbuild file may be defined under arch//include/uapi/asm/ and +arch//include/asm/ to list asm files coming from asm-generic. +See subsequent chapter for the syntax of the Kbuild file. - header-y also specifies which subdirectories to visit. - A subdirectory is identified by a trailing '/' which - can be seen in the example above for the usb subdirectory. + --- 7.1 no-export-headers - Subdirectories are visited before their parent directories. + no-export-headers is essentially used by include/uapi/linux/Kbuild to + avoid exporting specific headers (e.g. kvm.h) on architectures that do + not support it. It should be avoided as much as possible. --- 7.2 genhdr-y - genhdr-y specifies generated files to be exported. - Generated files are special as they need to be looked - up in another directory when doing 'make O=...' builds. + genhdr-y specifies asm files to be generated. Example: - #include/linux/Kbuild - genhdr-y += version.h + #arch/x86/include/uapi/asm/Kbuild + genhdr-y += unistd_32.h + genhdr-y += unistd_64.h + genhdr-y += unistd_x32.h - --- 7.3 destination-y - When an architecture has a set of exported headers that needs to be - exported to a different directory destination-y is used. - destination-y specifies the destination directory for all exported - headers in the file where it is present. - - Example: - #arch/xtensa/platforms/s6105/include/platform/Kbuild - destination-y := include/linux - - In the example above all exported headers in the Kbuild file - will be located in the directory "include/linux" when exported. - - --- 7.4 generic-y + --- 7.3 generic-y If an architecture uses a verbatim copy of a header from include/asm-generic then this is listed in the file @@ -1336,7 +1315,7 @@ See subsequent chapter for the syntax of the Kbuild file. Example: termios.h #include - --- 7.5 generated-y + --- 7.4 generated-y If an architecture generates other header files alongside generic-y wrappers, and not included in genhdr-y, then generated-y specifies @@ -1349,6 +1328,15 @@ See subsequent chapter for the syntax of the Kbuild file. #arch/x86/include/asm/Kbuild generated-y += syscalls_32.h + --- 7.5 mandatory-y + + mandatory-y is essentially used by include/uapi/asm-generic/Kbuild.asm + to define the minimun set of headers that must be exported in + include/asm. + + The convention is to list one subdir per line and + preferably in alphabetic order. + === 8 Kbuild Variables The top Makefile exports the following variables: diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index b0eb27b956d95a2fc744d46b53647e05c7186e50..615434d81108ed28fc4f077a0e1b35cd609b2610 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -18,7 +18,7 @@ memory image to a dump file on the local disk, or across the network to a remote system. Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64, -s390x and arm architectures. +s390x, arm and arm64 architectures. When the system kernel boots, it reserves a small section of memory for the dump-capture kernel. This ensures that ongoing Direct Memory Access @@ -249,6 +249,13 @@ Dump-capture kernel config options (Arch Dependent, arm) AUTO_ZRELADDR=y +Dump-capture kernel config options (Arch Dependent, arm64) +---------------------------------------------------------- + +- Please note that kvm of the dump-capture kernel will not be enabled + on non-VHE systems even if it is configured. This is because the CPU + will not be reset to EL2 on panic. + Extended crashkernel syntax =========================== @@ -305,6 +312,8 @@ Boot into System Kernel kernel will automatically locate the crash kernel image within the first 512MB of RAM if X is not given. + On arm64, use "crashkernel=Y[@X]". Note that the start address of + the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000). Load the Dump-capture Kernel ============================ @@ -327,6 +336,8 @@ For s390x: - Use image or bzImage For arm: - Use zImage +For arm64: + - Use vmlinux or Image If you are using a uncompressed vmlinux image then use following command to load dump-capture kernel. @@ -370,6 +381,9 @@ For s390x: For arm: "1 maxcpus=1 reset_devices" +For arm64: + "1 maxcpus=1 reset_devices" + Notes on loading the dump-capture kernel: * By default, the ELF headers are stored in ELF64 format to support diff --git a/Documentation/kref.txt b/Documentation/kref.txt index ddf85a5dde0c12a435b9cbcc30f44159de5acc0b..d26a27ca964d4906e2955778393f99fa25d52153 100644 --- a/Documentation/kref.txt +++ b/Documentation/kref.txt @@ -84,6 +84,7 @@ int my_data_handler(void) task = kthread_run(more_data_handling, data, "more_data_handling"); if (task == ERR_PTR(-ENOMEM)) { rv = -ENOMEM; + kref_put(&data->refcount, data_release); goto out; } diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt index bcea12a0c5847e12211a14d43f356d0f1a1e77f0..e23fa91ea722fd82882eb80937ff8f4e87e65140 100644 --- a/Documentation/leds/leds-lp55xx.txt +++ b/Documentation/leds/leds-lp55xx.txt @@ -117,7 +117,7 @@ As soon as 'loading' is set to 0, registered callback is called. Inside the callback, the selected engine is loaded and memory is updated. To run programmed pattern, 'run_engine' attribute should be enabled. -The pattern sqeuence of LP8501 is similar to LP5523. +The pattern sequence of LP8501 is similar to LP5523. However pattern data is specific. Ex 1) Engine 1 is used echo 1 > /sys/bus/i2c/devices/xxxx/select_engine diff --git a/Documentation/lightnvm/pblk.txt b/Documentation/lightnvm/pblk.txt new file mode 100644 index 0000000000000000000000000000000000000000..1040ed1cec812dce0c17cadbb7c73a6febd28c61 --- /dev/null +++ b/Documentation/lightnvm/pblk.txt @@ -0,0 +1,21 @@ +pblk: Physical Block Device Target +================================== + +pblk implements a fully associative, host-based FTL that exposes a traditional +block I/O interface. Its primary responsibilities are: + + - Map logical addresses onto physical addresses (4KB granularity) in a + logical-to-physical (L2P) table. + - Maintain the integrity and consistency of the L2P table as well as its + recovery from normal tear down and power outage. + - Deal with controller- and media-specific constrains. + - Handle I/O errors. + - Implement garbage collection. + - Maintain consistency across the I/O stack during synchronization points. + +For more information please refer to: + + http://lightnvm.io + +which maintains updated FAQs, manual pages, technical documentation, tools, +contacts, etc. diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt index 9d2096c7160de53121ca723d2231db3a46a15e81..ecdb18104ab0e83f7d7639c590deba69f71213b6 100644 --- a/Documentation/livepatch/livepatch.txt +++ b/Documentation/livepatch/livepatch.txt @@ -72,7 +72,8 @@ example, they add a NULL pointer or a boundary check, fix a race by adding a missing memory barrier, or add some locking around a critical section. Most of these changes are self contained and the function presents itself the same way to the rest of the system. In this case, the functions might -be updated independently one by one. +be updated independently one by one. (This can be done by setting the +'immediate' flag in the klp_patch struct.) But there are more complex fixes. For example, a patch might change ordering of locking in multiple functions at the same time. Or a patch @@ -86,20 +87,141 @@ or no data are stored in the modified structures at the moment. The theory about how to apply functions a safe way is rather complex. The aim is to define a so-called consistency model. It attempts to define conditions when the new implementation could be used so that the system -stays consistent. The theory is not yet finished. See the discussion at -https://lkml.kernel.org/r/20141107140458.GA21774@suse.cz - -The current consistency model is very simple. It guarantees that either -the old or the new function is called. But various functions get redirected -one by one without any synchronization. - -In other words, the current implementation _never_ modifies the behavior -in the middle of the call. It is because it does _not_ rewrite the entire -function in the memory. Instead, the function gets redirected at the -very beginning. But this redirection is used immediately even when -some other functions from the same patch have not been redirected yet. - -See also the section "Limitations" below. +stays consistent. + +Livepatch has a consistency model which is a hybrid of kGraft and +kpatch: it uses kGraft's per-task consistency and syscall barrier +switching combined with kpatch's stack trace switching. There are also +a number of fallback options which make it quite flexible. + +Patches are applied on a per-task basis, when the task is deemed safe to +switch over. When a patch is enabled, livepatch enters into a +transition state where tasks are converging to the patched state. +Usually this transition state can complete in a few seconds. The same +sequence occurs when a patch is disabled, except the tasks converge from +the patched state to the unpatched state. + +An interrupt handler inherits the patched state of the task it +interrupts. The same is true for forked tasks: the child inherits the +patched state of the parent. + +Livepatch uses several complementary approaches to determine when it's +safe to patch tasks: + +1. The first and most effective approach is stack checking of sleeping + tasks. If no affected functions are on the stack of a given task, + the task is patched. In most cases this will patch most or all of + the tasks on the first try. Otherwise it'll keep trying + periodically. This option is only available if the architecture has + reliable stacks (HAVE_RELIABLE_STACKTRACE). + +2. The second approach, if needed, is kernel exit switching. A + task is switched when it returns to user space from a system call, a + user space IRQ, or a signal. It's useful in the following cases: + + a) Patching I/O-bound user tasks which are sleeping on an affected + function. In this case you have to send SIGSTOP and SIGCONT to + force it to exit the kernel and be patched. + b) Patching CPU-bound user tasks. If the task is highly CPU-bound + then it will get patched the next time it gets interrupted by an + IRQ. + c) In the future it could be useful for applying patches for + architectures which don't yet have HAVE_RELIABLE_STACKTRACE. In + this case you would have to signal most of the tasks on the + system. However this isn't supported yet because there's + currently no way to patch kthreads without + HAVE_RELIABLE_STACKTRACE. + +3. For idle "swapper" tasks, since they don't ever exit the kernel, they + instead have a klp_update_patch_state() call in the idle loop which + allows them to be patched before the CPU enters the idle state. + + (Note there's not yet such an approach for kthreads.) + +All the above approaches may be skipped by setting the 'immediate' flag +in the 'klp_patch' struct, which will disable per-task consistency and +patch all tasks immediately. This can be useful if the patch doesn't +change any function or data semantics. Note that, even with this flag +set, it's possible that some tasks may still be running with an old +version of the function, until that function returns. + +There's also an 'immediate' flag in the 'klp_func' struct which allows +you to specify that certain functions in the patch can be applied +without per-task consistency. This might be useful if you want to patch +a common function like schedule(), and the function change doesn't need +consistency but the rest of the patch does. + +For architectures which don't have HAVE_RELIABLE_STACKTRACE, the user +must set patch->immediate which causes all tasks to be patched +immediately. This option should be used with care, only when the patch +doesn't change any function or data semantics. + +In the future, architectures which don't have HAVE_RELIABLE_STACKTRACE +may be allowed to use per-task consistency if we can come up with +another way to patch kthreads. + +The /sys/kernel/livepatch//transition file shows whether a patch +is in transition. Only a single patch (the topmost patch on the stack) +can be in transition at a given time. A patch can remain in transition +indefinitely, if any of the tasks are stuck in the initial patch state. + +A transition can be reversed and effectively canceled by writing the +opposite value to the /sys/kernel/livepatch//enabled file while +the transition is in progress. Then all the tasks will attempt to +converge back to the original patch state. + +There's also a /proc//patch_state file which can be used to +determine which tasks are blocking completion of a patching operation. +If a patch is in transition, this file shows 0 to indicate the task is +unpatched and 1 to indicate it's patched. Otherwise, if no patch is in +transition, it shows -1. Any tasks which are blocking the transition +can be signaled with SIGSTOP and SIGCONT to force them to change their +patched state. + + +3.1 Adding consistency model support to new architectures +--------------------------------------------------------- + +For adding consistency model support to new architectures, there are a +few options: + +1) Add CONFIG_HAVE_RELIABLE_STACKTRACE. This means porting objtool, and + for non-DWARF unwinders, also making sure there's a way for the stack + tracing code to detect interrupts on the stack. + +2) Alternatively, ensure that every kthread has a call to + klp_update_patch_state() in a safe location. Kthreads are typically + in an infinite loop which does some action repeatedly. The safe + location to switch the kthread's patch state would be at a designated + point in the loop where there are no locks taken and all data + structures are in a well-defined state. + + The location is clear when using workqueues or the kthread worker + API. These kthreads process independent actions in a generic loop. + + It's much more complicated with kthreads which have a custom loop. + There the safe location must be carefully selected on a case-by-case + basis. + + In that case, arches without HAVE_RELIABLE_STACKTRACE would still be + able to use the non-stack-checking parts of the consistency model: + + a) patching user tasks when they cross the kernel/user space + boundary; and + + b) patching kthreads and idle tasks at their designated patch points. + + This option isn't as good as option 1 because it requires signaling + user tasks and waking kthreads to patch them. But it could still be + a good backup option for those architectures which don't have + reliable stack traces yet. + +In the meantime, patches for such architectures can bypass the +consistency model by setting klp_patch.immediate to true. This option +is perfectly fine for patches which don't change the semantics of the +patched functions. In practice, this is usable for ~90% of security +fixes. Use of this option also means the patch can't be unloaded after +it has been disabled. 4. Livepatch module @@ -134,7 +256,7 @@ Documentation/livepatch/module-elf-format.txt for more details. 4.2. Metadata ------------- +------------- The patch is described by several structures that split the information into three levels: @@ -156,6 +278,9 @@ into three levels: only for a particular object ( vmlinux or a kernel module ). Note that kallsyms allows for searching symbols according to the object name. + There's also an 'immediate' flag which, when set, patches the + function immediately, bypassing the consistency model safety checks. + + struct klp_object defines an array of patched functions (struct klp_func) in the same object. Where the object is either vmlinux (NULL) or a module name. @@ -172,10 +297,13 @@ into three levels: This structure handles all patched functions consistently and eventually, synchronously. The whole patch is applied only when all patched symbols are found. The only exception are symbols from objects - (kernel modules) that have not been loaded yet. Also if a more complex - consistency model is supported then a selected unit (thread, - kernel as a whole) will see the new code from the entire patch - only when it is in a safe state. + (kernel modules) that have not been loaded yet. + + Setting the 'immediate' flag applies the patch to all tasks + immediately, bypassing the consistency model safety checks. + + For more details on how the patch is applied on a per-task basis, + see the "Consistency model" section. 4.3. Livepatch module handling @@ -188,8 +316,15 @@ section "Livepatch life-cycle" below for more details about these two operations. Module removal is only safe when there are no users of the underlying -functions. The immediate consistency model is not able to detect this; -therefore livepatch modules cannot be removed. See "Limitations" below. +functions. The immediate consistency model is not able to detect this. The +code just redirects the functions at the very beginning and it does not +check if the functions are in use. In other words, it knows when the +functions get called but it does not know when the functions return. +Therefore it cannot be decided when the livepatch module can be safely +removed. This is solved by a hybrid consistency model. When the system is +transitioned to a new patch state (patched/unpatched) it is guaranteed that +no task sleeps or runs in the old code. + 5. Livepatch life-cycle ======================= @@ -239,9 +374,15 @@ Registered patches might be enabled either by calling klp_enable_patch() or by writing '1' to /sys/kernel/livepatch//enabled. The system will start using the new implementation of the patched functions at this stage. -In particular, if an original function is patched for the first time, a -function specific struct klp_ops is created and an universal ftrace handler -is registered. +When a patch is enabled, livepatch enters into a transition state where +tasks are converging to the patched state. This is indicated by a value +of '1' in /sys/kernel/livepatch//transition. Once all tasks have +been patched, the 'transition' value changes to '0'. For more +information about this process, see the "Consistency model" section. + +If an original function is patched for the first time, a function +specific struct klp_ops is created and an universal ftrace handler is +registered. Functions might be patched multiple times. The ftrace handler is registered only once for the given function. Further patches just add an entry to the @@ -261,6 +402,12 @@ by writing '0' to /sys/kernel/livepatch//enabled. At this stage either the code from the previously enabled patch or even the original code gets used. +When a patch is disabled, livepatch enters into a transition state where +tasks are converging to the unpatched state. This is indicated by a +value of '1' in /sys/kernel/livepatch//transition. Once all tasks +have been unpatched, the 'transition' value changes to '0'. For more +information about this process, see the "Consistency model" section. + Here all the functions (struct klp_func) associated with the to-be-disabled patch are removed from the corresponding struct klp_ops. The ftrace handler is unregistered and the struct klp_ops is freed when the func_stack list @@ -329,23 +476,6 @@ The current Livepatch implementation has several limitations: by "notrace". - + Livepatch modules can not be removed. - - The current implementation just redirects the functions at the very - beginning. It does not check if the functions are in use. In other - words, it knows when the functions get called but it does not - know when the functions return. Therefore it can not decide when - the livepatch module can be safely removed. - - This will get most likely solved once a more complex consistency model - is supported. The idea is that a safe state for patching should also - mean a safe state for removing the patch. - - Note that the patch itself might get disabled by writing zero - to /sys/kernel/livepatch//enabled. It causes that the new - code will not longer get called. But it does not guarantee - that anyone is not sleeping anywhere in the new code. - + Livepatch works reliably only when the dynamic ftrace is located at the very beginning of the function. diff --git a/Documentation/md/md-cluster.txt b/Documentation/md/md-cluster.txt index 38883276d31c379fe6de344537b08d0b4e10e6a9..82ee51604e9ad498ca52a66dc95086ae10d1e98b 100644 --- a/Documentation/md/md-cluster.txt +++ b/Documentation/md/md-cluster.txt @@ -77,7 +77,7 @@ There are three groups of locks for managing the device: 3.1.2 RESYNCING: informs other nodes that a resync is initiated or ended so that each node may suspend or resume the region. Each RESYNCING message identifies a range of the devices that the - sending node is about to resync. This over-rides any pervious + sending node is about to resync. This overrides any previous notification from that node: only one ranged can be resynced at a time per-node. @@ -321,4 +321,4 @@ The algorithm is: There are somethings which are not supported by cluster MD yet. -- update size and change array_sectors. +- change array_sectors. diff --git a/Documentation/md/raid5-ppl.txt b/Documentation/md/raid5-ppl.txt new file mode 100644 index 0000000000000000000000000000000000000000..127072b093632701f7bb598bae603dd2ec3ede26 --- /dev/null +++ b/Documentation/md/raid5-ppl.txt @@ -0,0 +1,44 @@ +Partial Parity Log + +Partial Parity Log (PPL) is a feature available for RAID5 arrays. The issue +addressed by PPL is that after a dirty shutdown, parity of a particular stripe +may become inconsistent with data on other member disks. If the array is also +in degraded state, there is no way to recalculate parity, because one of the +disks is missing. This can lead to silent data corruption when rebuilding the +array or using it is as degraded - data calculated from parity for array blocks +that have not been touched by a write request during the unclean shutdown can +be incorrect. Such condition is known as the RAID5 Write Hole. Because of +this, md by default does not allow starting a dirty degraded array. + +Partial parity for a write operation is the XOR of stripe data chunks not +modified by this write. It is just enough data needed for recovering from the +write hole. XORing partial parity with the modified chunks produces parity for +the stripe, consistent with its state before the write operation, regardless of +which chunk writes have completed. If one of the not modified data disks of +this stripe is missing, this updated parity can be used to recover its +contents. PPL recovery is also performed when starting an array after an +unclean shutdown and all disks are available, eliminating the need to resync +the array. Because of this, using write-intent bitmap and PPL together is not +supported. + +When handling a write request PPL writes partial parity before new data and +parity are dispatched to disks. PPL is a distributed log - it is stored on +array member drives in the metadata area, on the parity drive of a particular +stripe. It does not require a dedicated journaling drive. Write performance is +reduced by up to 30%-40% but it scales with the number of drives in the array +and the journaling drive does not become a bottleneck or a single point of +failure. + +Unlike raid5-cache, the other solution in md for closing the write hole, PPL is +not a true journal. It does not protect from losing in-flight data, only from +silent data corruption. If a dirty disk of a stripe is lost, no PPL recovery is +performed for this stripe (parity is not updated). So it is possible to have +arbitrary data in the written part of a stripe if that disk is lost. In such +case the behavior is the same as in plain raid5. + +PPL is available for md version-1 metadata and external (specifically IMSM) +metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl. + +Currently, volatile write-back cache should be disabled on all member drives +when using PPL. Otherwise it cannot guarantee consistency in case of power +failure. diff --git a/Documentation/media/Makefile b/Documentation/media/Makefile index 9b3e70b2cab29ddb02a3b61878f67569410764d1..36166952d555fbc5444e6c2c9c11c081c81df2f1 100644 --- a/Documentation/media/Makefile +++ b/Documentation/media/Makefile @@ -1,51 +1,6 @@ -# Rules to convert DOT and SVG to Sphinx images - -SRC_DIR=$(srctree)/Documentation/media - -DOTS = \ - uapi/v4l/pipeline.dot \ - -IMAGES = \ - typical_media_device.svg \ - uapi/dvb/dvbstb.svg \ - uapi/v4l/bayer.svg \ - uapi/v4l/constraints.svg \ - uapi/v4l/crop.svg \ - uapi/v4l/fieldseq_bt.svg \ - uapi/v4l/fieldseq_tb.svg \ - uapi/v4l/nv12mt.svg \ - uapi/v4l/nv12mt_example.svg \ - uapi/v4l/pipeline.svg \ - uapi/v4l/selection.svg \ - uapi/v4l/subdev-image-processing-full.svg \ - uapi/v4l/subdev-image-processing-scaling-multi-source.svg \ - uapi/v4l/subdev-image-processing-crop.svg \ - uapi/v4l/vbi_525.svg \ - uapi/v4l/vbi_625.svg \ - uapi/v4l/vbi_hsync.svg \ - -DOTTGT := $(patsubst %.dot,%.svg,$(DOTS)) -IMGDOT := $(patsubst %,$(SRC_DIR)/%,$(DOTTGT)) - -IMGTGT := $(patsubst %.svg,%.pdf,$(IMAGES)) -IMGPDF := $(patsubst %,$(SRC_DIR)/%,$(IMGTGT)) - -cmd = $(echo-cmd) $(cmd_$(1)) - -quiet_cmd_genpdf = GENPDF $2 - cmd_genpdf = convert $2 $3 - -quiet_cmd_gendot = DOT $2 - cmd_gendot = dot -Tsvg $2 > $3 || { rm -f $3; exit 1; } - -%.pdf: %.svg - @$(call cmd,genpdf,$<,$@) - -%.svg: %.dot - @$(call cmd,gendot,$<,$@) - # Rules to convert a .h file to inline RST documentation +SRC_DIR=$(srctree)/Documentation/media PARSER = $(srctree)/Documentation/sphinx/parse-headers.pl UAPI = $(srctree)/include/uapi/linux KAPI = $(srctree)/include/linux diff --git a/Documentation/media/intro.rst b/Documentation/media/intro.rst index 8f7490c9a8efcda19316968085fce830feeba624..9ce2e23a0236796c557b3dd371ad71db845a646a 100644 --- a/Documentation/media/intro.rst +++ b/Documentation/media/intro.rst @@ -13,9 +13,9 @@ A typical media device hardware is shown at :ref:`typical_media_device`. .. _typical_media_device: -.. figure:: typical_media_device.* - :alt: typical_media_device.pdf / typical_media_device.svg - :align: center +.. kernel-figure:: typical_media_device.svg + :alt: typical_media_device.svg + :align: center Typical Media Device diff --git a/Documentation/media/kapi/cec-core.rst b/Documentation/media/kapi/cec-core.rst index 81c6d8e93774a89bd4d1d745b512ea41e7d1a513..7a04c5386dc8a3ba91c87816eaf9295e194882c6 100644 --- a/Documentation/media/kapi/cec-core.rst +++ b/Documentation/media/kapi/cec-core.rst @@ -27,11 +27,8 @@ HDMI 1.3a specification is sufficient: http://www.microprocessor.org/HDMISpecification13a.pdf -The Kernel Interface -==================== - -CEC Adapter ------------ +CEC Adapter Interface +--------------------- The struct cec_adapter represents the CEC adapter hardware. It is created by calling cec_allocate_adapter() and deleted by calling cec_delete_adapter(): @@ -51,6 +48,7 @@ ops: priv: will be stored in adap->priv and can be used by the adapter ops. + Use cec_get_drvdata(adap) to get the priv pointer. name: the name of the CEC adapter. Note: this name will be copied. @@ -65,6 +63,10 @@ available_las: the number of simultaneous logical addresses that this adapter can handle. Must be 1 <= available_las <= CEC_MAX_LOG_ADDRS. +To obtain the priv pointer use this helper function: + +.. c:function:: + void *cec_get_drvdata(const struct cec_adapter *adap); To register the /dev/cecX device node and the remote control device (if CEC_CAP_RC is set) you call: diff --git a/Documentation/media/kapi/csi2.rst b/Documentation/media/kapi/csi2.rst index 2004db00b12b56a7b56639ec41589b80f2892440..e33fcb967922d460f41a865eb148ee46969427ac 100644 --- a/Documentation/media/kapi/csi2.rst +++ b/Documentation/media/kapi/csi2.rst @@ -45,10 +45,11 @@ where * - bits_per_sample - Number of bits per sample. -The transmitter drivers must configure the CSI-2 transmitter to *LP-11 -mode* whenever the transmitter is powered on but not active. Some -transmitters do this automatically but some have to be explicitly -programmed to do so. +The transmitter drivers must, if possible, configure the CSI-2 +transmitter to *LP-11 mode* whenever the transmitter is powered on but +not active. Some transmitters do this automatically but some have to +be explicitly programmed to do so, and some are unable to do so +altogether due to hardware constraints. Receiver drivers ---------------- diff --git a/Documentation/media/kapi/v4l2-core.rst b/Documentation/media/kapi/v4l2-core.rst index e9677150ed99d45c8922801e37599e8a93133d32..d8f6c46d26d58d58db6384434c4b2583ffc29155 100644 --- a/Documentation/media/kapi/v4l2-core.rst +++ b/Documentation/media/kapi/v4l2-core.rst @@ -1,4 +1,4 @@ -Video2Linux devices +Video4Linux devices ------------------- .. toctree:: diff --git a/Documentation/media/lirc.h.rst.exceptions b/Documentation/media/lirc.h.rst.exceptions index 246c850151d71343b0d212fbbc0f4f631590bfc0..c130617a9986ef4ccab1c060f319e00cb2251449 100644 --- a/Documentation/media/lirc.h.rst.exceptions +++ b/Documentation/media/lirc.h.rst.exceptions @@ -35,7 +35,6 @@ ignore define PULSE_MASK ignore define LIRC_MODE2_SPACE ignore define LIRC_MODE2_PULSE -ignore define LIRC_MODE2_TIMEOUT ignore define LIRC_VALUE_MASK ignore define LIRC_MODE2_MASK diff --git a/Documentation/media/uapi/cec/cec-func-ioctl.rst b/Documentation/media/uapi/cec/cec-func-ioctl.rst index 7dcfd178fb243c33f6ba23bb331236496018809c..22fb6304a2df048c20a5fb8b9046d69d78df3493 100644 --- a/Documentation/media/uapi/cec/cec-func-ioctl.rst +++ b/Documentation/media/uapi/cec/cec-func-ioctl.rst @@ -30,7 +30,7 @@ Arguments ``request`` CEC ioctl request code as defined in the cec.h header file, for - example :c:func:`CEC_ADAP_G_CAPS`. + example :ref:`CEC_ADAP_G_CAPS `. ``argp`` Pointer to a request-specific structure. diff --git a/Documentation/media/uapi/cec/cec-func-open.rst b/Documentation/media/uapi/cec/cec-func-open.rst index 0304388cd15976a7ff00eb46ca9bc9ef5f0a3c94..18dfb62f2efe7de04aa8812aa4d9f1bf0abf18fc 100644 --- a/Documentation/media/uapi/cec/cec-func-open.rst +++ b/Documentation/media/uapi/cec/cec-func-open.rst @@ -33,7 +33,7 @@ Arguments Open flags. Access mode must be ``O_RDWR``. When the ``O_NONBLOCK`` flag is given, the - :ref:`CEC_RECEIVE ` and :c:func:`CEC_DQEVENT` ioctls + :ref:`CEC_RECEIVE ` and :ref:`CEC_DQEVENT ` ioctls will return the ``EAGAIN`` error code when no message or event is available, and ioctls :ref:`CEC_TRANSMIT `, :ref:`CEC_ADAP_S_PHYS_ADDR ` and diff --git a/Documentation/media/uapi/cec/cec-func-poll.rst b/Documentation/media/uapi/cec/cec-func-poll.rst index 6a863cfda6e05172be7e60e930fbd0ff885b3ce4..fa0abd8fb16056ac370e35ae1f0a964ee1ead020 100644 --- a/Documentation/media/uapi/cec/cec-func-poll.rst +++ b/Documentation/media/uapi/cec/cec-func-poll.rst @@ -30,7 +30,7 @@ Arguments List of FD events to be watched ``nfds`` - Number of FD efents at the \*ufds array + Number of FD events at the \*ufds array ``timeout`` Timeout to wait for events @@ -49,7 +49,7 @@ is non-zero). CEC devices set the ``POLLIN`` and ``POLLRDNORM`` flags in the ``revents`` field if there are messages in the receive queue. If the transmit queue has room for new messages, the ``POLLOUT`` and ``POLLWRNORM`` flags are set. If there are events in the event queue, -then the ``POLLPRI`` flag is set. When the function timed out it returns +then the ``POLLPRI`` flag is set. When the function times out it returns a value of zero, on failure it returns -1 and the ``errno`` variable is set appropriately. diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst index 09f09bbe28d4ffb1d5b3291221c4b678e4e38c6e..fcf863ab6f4378343dc764f29c5baca37438f6f1 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst @@ -351,3 +351,16 @@ On success 0 is returned, on error -1 and the ``errno`` variable is set appropriately. The generic error codes are described at the :ref:`Generic Error Codes ` chapter. +The :ref:`ioctl CEC_ADAP_S_LOG_ADDRS ` can return the following +error codes: + +ENOTTY + The ``CEC_CAP_LOG_ADDRS`` capability wasn't set, so this ioctl is not supported. + +EBUSY + The CEC adapter is currently configuring itself, or it is already configured and + ``num_log_addrs`` is non-zero, or another filehandle is in exclusive follower or + initiator mode, or the filehandle is in mode ``CEC_MODE_NO_INITIATOR``. + +EINVAL + The contents of struct :c:type:`cec_log_addrs` is invalid. diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst index a3cdc75cec3e3c0754027869cee43ad8ca019260..9e49d4be35d5b6b7332ee5c6522400f57012b55e 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst @@ -78,3 +78,16 @@ Return Value On success 0 is returned, on error -1 and the ``errno`` variable is set appropriately. The generic error codes are described at the :ref:`Generic Error Codes ` chapter. + +The :ref:`ioctl CEC_ADAP_S_PHYS_ADDR ` can return the following +error codes: + +ENOTTY + The ``CEC_CAP_PHYS_ADDR`` capability wasn't set, so this ioctl is not supported. + +EBUSY + Another filehandle is in exclusive follower or initiator mode, or the filehandle + is in mode ``CEC_MODE_NO_INITIATOR``. + +EINVAL + The physical address is malformed. diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst index 6e589a1fae1704c0eea188c662537d37ea6142b5..4d3570c2e0b3662183743be39de726946a3b2595 100644 --- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst +++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst @@ -56,7 +56,7 @@ it is guaranteed that the state did change in between the two events. * - __u16 - ``phys_addr`` - The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no - valid physical address is set. + valid physical address is set. * - __u16 - ``log_addr_mask`` - The current set of claimed logical addresses. This is 0 if no logical @@ -174,3 +174,14 @@ Return Value On success 0 is returned, on error -1 and the ``errno`` variable is set appropriately. The generic error codes are described at the :ref:`Generic Error Codes ` chapter. + +The :ref:`ioctl CEC_DQEVENT ` can return the following +error codes: + +EAGAIN + This is returned when the filehandle is in non-blocking mode and there + are no pending events. + +ERESTARTSYS + An interrupt (e.g. Ctrl-C) arrived while in blocking mode waiting for + events to arrive. diff --git a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst index e4ded9df0a84a3be52a4f2a5fb09359e1d4fe285..664f0d47bbcdc753a81587ce24b7bd4b4c1f3b30 100644 --- a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst +++ b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst @@ -249,3 +249,15 @@ Return Value On success 0 is returned, on error -1 and the ``errno`` variable is set appropriately. The generic error codes are described at the :ref:`Generic Error Codes ` chapter. + +The :ref:`ioctl CEC_S_MODE ` can return the following +error codes: + +EINVAL + The requested mode is invalid. + +EPERM + Monitor mode is requested without having root permissions + +EBUSY + Someone else is already an exclusive follower or initiator. diff --git a/Documentation/media/uapi/cec/cec-ioc-receive.rst b/Documentation/media/uapi/cec/cec-ioc-receive.rst index dc2adb391c0a2d2183e3e00bc90af520e727c6dd..267044f7ac301244ffeae3d07439b6fc4470b534 100644 --- a/Documentation/media/uapi/cec/cec-ioc-receive.rst +++ b/Documentation/media/uapi/cec/cec-ioc-receive.rst @@ -51,13 +51,13 @@ A received message can be: be non-zero). To send a CEC message the application has to fill in the struct -:c:type:` cec_msg` and pass it to :ref:`ioctl CEC_TRANSMIT `. +:c:type:`cec_msg` and pass it to :ref:`ioctl CEC_TRANSMIT `. The :ref:`ioctl CEC_TRANSMIT ` is only available if ``CEC_CAP_TRANSMIT`` is set. If there is no more room in the transmit queue, then it will return -1 and set errno to the ``EBUSY`` error code. The transmit queue has enough room for 18 messages (about 1 second worth of 2-byte messages). Note that the CEC kernel framework will also reply -to core messages (see :ref:cec-core-processing), so it is not a good +to core messages (see :ref:`cec-core-processing`), so it is not a good idea to fully fill up the transmit queue. If the file descriptor is in non-blocking mode then the transmit will @@ -69,6 +69,18 @@ The ``sequence`` field is filled in for every transmit and this can be checked against the received messages to find the corresponding transmit result. +Normally calling :ref:`ioctl CEC_TRANSMIT ` when the physical +address is invalid (due to e.g. a disconnect) will return ``ENONET``. + +However, the CEC specification allows sending messages from 'Unregistered' to +'TV' when the physical address is invalid since some TVs pull the hotplug detect +pin of the HDMI connector low when they go into standby, or when switching to +another input. + +When the hotplug detect pin goes low the EDID disappears, and thus the +physical address, but the cable is still connected and CEC still works. +In order to detect/wake up the device it is allowed to send poll and 'Image/Text +View On' messages from initiator 0xf ('Unregistered') to destination 0 ('TV'). .. tabularcolumns:: |p{1.0cm}|p{3.5cm}|p{13.0cm}| @@ -289,3 +301,42 @@ Return Value On success 0 is returned, on error -1 and the ``errno`` variable is set appropriately. The generic error codes are described at the :ref:`Generic Error Codes ` chapter. + +The :ref:`ioctl CEC_RECEIVE ` can return the following +error codes: + +EAGAIN + No messages are in the receive queue, and the filehandle is in non-blocking mode. + +ETIMEDOUT + The ``timeout`` was reached while waiting for a message. + +ERESTARTSYS + The wait for a message was interrupted (e.g. by Ctrl-C). + +The :ref:`ioctl CEC_TRANSMIT ` can return the following +error codes: + +ENOTTY + The ``CEC_CAP_TRANSMIT`` capability wasn't set, so this ioctl is not supported. + +EPERM + The CEC adapter is not configured, i.e. :ref:`ioctl CEC_ADAP_S_LOG_ADDRS ` + has never been called. + +ENONET + The CEC adapter is not configured, i.e. :ref:`ioctl CEC_ADAP_S_LOG_ADDRS ` + was called, but the physical address is invalid so no logical address was claimed. + An exception is made in this case for transmits from initiator 0xf ('Unregistered') + to destination 0 ('TV'). In that case the transmit will proceed as usual. + +EBUSY + Another filehandle is in exclusive follower or initiator mode, or the filehandle + is in mode ``CEC_MODE_NO_INITIATOR``. This is also returned if the transmit + queue is full. + +EINVAL + The contents of struct :c:type:`cec_msg` is invalid. + +ERESTARTSYS + The wait for a successful transmit was interrupted (e.g. by Ctrl-C). diff --git a/Documentation/media/uapi/dvb/intro.rst b/Documentation/media/uapi/dvb/intro.rst index 2ed5c23102b492dc5cb134ac2b63c24a63dd466a..652c4aacd2c6da91b010e041c780c3bcb7f9ae17 100644 --- a/Documentation/media/uapi/dvb/intro.rst +++ b/Documentation/media/uapi/dvb/intro.rst @@ -55,9 +55,9 @@ Overview .. _stb_components: -.. figure:: dvbstb.* - :alt: dvbstb.pdf / dvbstb.svg - :align: center +.. kernel-figure:: dvbstb.svg + :alt: dvbstb.svg + :align: center Components of a DVB card/STB diff --git a/Documentation/media/uapi/mediactl/media-types.rst b/Documentation/media/uapi/mediactl/media-types.rst index 3e03dc2e60031b49636d4036a9a1943193cf494c..2a5164aea2b404b807033fd07bc5a6e896cac36c 100644 --- a/Documentation/media/uapi/mediactl/media-types.rst +++ b/Documentation/media/uapi/mediactl/media-types.rst @@ -284,7 +284,8 @@ Types and flags used to represent the media graph elements supported scaling ratios is entity-specific and can differ between the horizontal and vertical directions (in particular scaling can be supported in one direction only). Binning and - skipping are considered as scaling. + sub-sampling (occasionally also referred to as skipping) are + considered as scaling. - .. row 28 diff --git a/Documentation/media/uapi/rc/lirc-dev-intro.rst b/Documentation/media/uapi/rc/lirc-dev-intro.rst index ef97e40f2fd873c7f951f138d877bd4d656a314c..d1936eeb9ce06e607868072a2287b09a49f25dbf 100644 --- a/Documentation/media/uapi/rc/lirc-dev-intro.rst +++ b/Documentation/media/uapi/rc/lirc-dev-intro.rst @@ -27,6 +27,8 @@ What you should see for a chardev: $ ls -l /dev/lirc* crw-rw---- 1 root root 248, 0 Jul 2 22:20 /dev/lirc0 +.. _lirc_modes: + ********** LIRC modes ********** @@ -38,25 +40,62 @@ on the following table. ``LIRC_MODE_MODE2`` - The driver returns a sequence of pulse and space codes to userspace. + The driver returns a sequence of pulse and space codes to userspace, + as a series of u32 values. This mode is used only for IR receive. + The upper 8 bits determine the packet type, and the lower 24 bits + the payload. Use ``LIRC_VALUE()`` macro to get the payload, and + the macro ``LIRC_MODE2()`` will give you the type, which + is one of: + + ``LIRC_MODE2_PULSE`` + + Signifies the presence of IR in microseconds. + + ``LIRC_MODE2_SPACE`` + + Signifies absence of IR in microseconds. + + ``LIRC_MODE2_FREQUENCY`` + + If measurement of the carrier frequency was enabled with + :ref:`lirc_set_measure_carrier_mode` then this packet gives you + the carrier frequency in Hertz. + + ``LIRC_MODE2_TIMEOUT`` + + If timeout reports are enabled with + :ref:`lirc_set_rec_timeout_reports`, when the timeout set with + :ref:`lirc_set_rec_timeout` expires due to no IR being detected, + this packet will be sent, with the number of microseconds with + no IR. + .. _lirc-mode-lirccode: ``LIRC_MODE_LIRCCODE`` - The IR signal is decoded internally by the receiver. The LIRC interface - returns the scancode as an integer value. This is the usual mode used - by several TV media cards. + This mode can be used for IR receive and send. - This mode is used only for IR receive. + The IR signal is decoded internally by the receiver, or encoded by the + transmitter. The LIRC interface represents the scancode as byte string, + which might not be a u32, it can be any length. The value is entirely + driver dependent. This mode is used by some older lirc drivers. + + The length of each code depends on the driver, which can be retrieved + with :ref:`lirc_get_length`. This length is used both + for transmitting and receiving IR. .. _lirc-mode-pulse: ``LIRC_MODE_PULSE`` - On puse mode, a sequence of pulse/space integer values are written to the - lirc device using :Ref:`lirc-write`. + In pulse mode, a sequence of pulse/space integer values are written to the + lirc device using :ref:`lirc-write`. + + The values are alternating pulse and space lengths, in microseconds. The + first and last entry must be a pulse, so there must be an odd number + of entries. This mode is used only for IR send. diff --git a/Documentation/media/uapi/rc/lirc-get-features.rst b/Documentation/media/uapi/rc/lirc-get-features.rst index 79e07b4d44d681e04f2679ffb608d3943ecba905..64f89a4f9d9ce2d937cb6b518e900a96ba8b4975 100644 --- a/Documentation/media/uapi/rc/lirc-get-features.rst +++ b/Documentation/media/uapi/rc/lirc-get-features.rst @@ -48,8 +48,8 @@ LIRC features ``LIRC_CAN_REC_PULSE`` - The driver is capable of receiving using - :ref:`LIRC_MODE_PULSE `. + Unused. Kept just to avoid breaking uAPI. + :ref:`LIRC_MODE_PULSE ` can only be used for transmitting. .. _LIRC-CAN-REC-MODE2: @@ -156,19 +156,22 @@ LIRC features ``LIRC_CAN_SEND_PULSE`` - The driver supports sending using :ref:`LIRC_MODE_PULSE `. + The driver supports sending (also called as IR blasting or IR TX) using + :ref:`LIRC_MODE_PULSE `. .. _LIRC-CAN-SEND-MODE2: ``LIRC_CAN_SEND_MODE2`` - The driver supports sending using :ref:`LIRC_MODE_MODE2 `. + Unused. Kept just to avoid breaking uAPI. + :ref:`LIRC_MODE_MODE2 ` can only be used for receiving. .. _LIRC-CAN-SEND-LIRCCODE: ``LIRC_CAN_SEND_LIRCCODE`` - The driver supports sending codes (also called as IR blasting or IR TX). + The driver supports sending (also called as IR blasting or IR TX) using + :ref:`LIRC_MODE_LIRCCODE `. Return Value diff --git a/Documentation/media/uapi/rc/lirc-get-length.rst b/Documentation/media/uapi/rc/lirc-get-length.rst index 8c2747c8d2c9e2cab495e47acd87b19d86984a1f..3990af5de0e917fe135b8333e7ada35b90a9481d 100644 --- a/Documentation/media/uapi/rc/lirc-get-length.rst +++ b/Documentation/media/uapi/rc/lirc-get-length.rst @@ -30,7 +30,8 @@ Arguments Description =========== -Retrieves the code length in bits (only for ``LIRC-MODE-LIRCCODE``). +Retrieves the code length in bits (only for +:ref:`LIRC_MODE_LIRCCODE `). Reads on the device must be done in blocks matching the bit count. The bit could should be rounded up so that it matches full bytes. diff --git a/Documentation/media/uapi/rc/lirc-get-rec-mode.rst b/Documentation/media/uapi/rc/lirc-get-rec-mode.rst index a5023e0194c13223c2a6a9b433af41cbed5e3d58..a4eb6c0a26e9ca9481528b958086d945ca4524cc 100644 --- a/Documentation/media/uapi/rc/lirc-get-rec-mode.rst +++ b/Documentation/media/uapi/rc/lirc-get-rec-mode.rst @@ -35,8 +35,8 @@ Description Get/set supported receive modes. Only :ref:`LIRC_MODE_MODE2 ` and :ref:`LIRC_MODE_LIRCCODE ` are supported for IR -receive. - +receive. Use :ref:`lirc_get_features` to find out which modes the driver +supports. Return Value ============ diff --git a/Documentation/media/uapi/rc/lirc-get-send-mode.rst b/Documentation/media/uapi/rc/lirc-get-send-mode.rst index 51ac13428969add93744281d38604411945ef6c4..a169b234290e9023572a1ae8d78fdc434a5f9b02 100644 --- a/Documentation/media/uapi/rc/lirc-get-send-mode.rst +++ b/Documentation/media/uapi/rc/lirc-get-send-mode.rst @@ -34,9 +34,12 @@ Arguments Description =========== -Get/set supported transmit mode. +Get/set current transmit mode. -Only :ref:`LIRC_MODE_PULSE ` is supported by for IR send. +Only :ref:`LIRC_MODE_PULSE ` and +:ref:`LIRC_MODE_LIRCCODE ` is supported by for IR send, +depending on the driver. Use :ref:`lirc_get_features` to find out which +modes the driver supports. Return Value ============ diff --git a/Documentation/media/uapi/rc/lirc-read.rst b/Documentation/media/uapi/rc/lirc-read.rst index 4c678f60e87204908ad148b027ce60dc5ae4596b..ff14a69104e54f373a963eddacb259ffd4fc025d 100644 --- a/Documentation/media/uapi/rc/lirc-read.rst +++ b/Documentation/media/uapi/rc/lirc-read.rst @@ -44,17 +44,13 @@ descriptor ``fd`` into the buffer starting at ``buf``. If ``count`` is zero, :ref:`read() ` returns zero and has no other results. If ``count`` is greater than ``SSIZE_MAX``, the result is unspecified. -The lircd userspace daemon reads raw IR data from the LIRC chardev. The -exact format of the data depends on what modes a driver supports, and -what mode has been selected. lircd obtains supported modes and sets the -active mode via the ioctl interface, detailed at :ref:`lirc_func`. -The generally preferred mode for receive is -:ref:`LIRC_MODE_MODE2 `, in which packets containing an -int value describing an IR signal are read from the chardev. +The exact format of the data depends on what :ref:`lirc_modes` a driver +uses. Use :ref:`lirc_get_features` to get the supported mode. -See also -`http://www.lirc.org/html/technical.html `__ -for more info. +The generally preferred mode for receive is +:ref:`LIRC_MODE_MODE2 `, +in which packets containing an int value describing an IR signal are +read from the chardev. Return Value ============ diff --git a/Documentation/media/uapi/rc/lirc-set-rec-carrier-range.rst b/Documentation/media/uapi/rc/lirc-set-rec-carrier-range.rst index a83fbbfa0d3bcd12517907c4f14535d1f4a4b18f..a89246806c4b908a52931200f3cd2b2a804dd13c 100644 --- a/Documentation/media/uapi/rc/lirc-set-rec-carrier-range.rst +++ b/Documentation/media/uapi/rc/lirc-set-rec-carrier-range.rst @@ -9,7 +9,7 @@ ioctl LIRC_SET_REC_CARRIER_RANGE Name ==== -LIRC_SET_REC_CARRIER_RANGE - Set lower bond of the carrier used to modulate +LIRC_SET_REC_CARRIER_RANGE - Set lower bound of the carrier used to modulate IR receive. Synopsis diff --git a/Documentation/media/uapi/rc/lirc-set-rec-timeout-reports.rst b/Documentation/media/uapi/rc/lirc-set-rec-timeout-reports.rst index 9c501bbf4c626c25401545c35d0546d2a827403a..86353e6026955f9ff85005325b3d1cc267886a76 100644 --- a/Documentation/media/uapi/rc/lirc-set-rec-timeout-reports.rst +++ b/Documentation/media/uapi/rc/lirc-set-rec-timeout-reports.rst @@ -31,6 +31,8 @@ Arguments Description =========== +.. _lirc-mode2-timeout: + Enable or disable timeout reports for IR receive. By default, timeout reports should be turned off. diff --git a/Documentation/media/uapi/rc/lirc-write.rst b/Documentation/media/uapi/rc/lirc-write.rst index 3b035c6613b1b4656ef4abdd84048c10d79c88a8..2aad0fef4a5b19f242ff4d42841551214005d4a8 100644 --- a/Documentation/media/uapi/rc/lirc-write.rst +++ b/Documentation/media/uapi/rc/lirc-write.rst @@ -42,13 +42,16 @@ Description referenced by the file descriptor ``fd`` from the buffer starting at ``buf``. -The data written to the chardev is a pulse/space sequence of integer -values. Pulses and spaces are only marked implicitly by their position. -The data must start and end with a pulse, therefore, the data must -always include an uneven number of samples. The write function must -block until the data has been transmitted by the hardware. If more data -is provided than the hardware can send, the driver returns ``EINVAL``. - +The exact format of the data depends on what mode a driver uses, use +:ref:`lirc_get_features` to get the supported mode. + +When in :ref:`LIRC_MODE_PULSE ` mode, the data written to +the chardev is a pulse/space sequence of integer values. Pulses and spaces +are only marked implicitly by their position. The data must start and end +with a pulse, therefore, the data must always include an uneven number of +samples. The write function must block until the data has been transmitted +by the hardware. If more data is provided than the hardware can send, the +driver returns ``EINVAL``. Return Value ============ diff --git a/Documentation/media/uapi/v4l/buffer.rst b/Documentation/media/uapi/v4l/buffer.rst index ac58966ccb9b22530f343eea2e4c7d62c5151ba7..ae6ee73f151c658dc613860f2d39da1cac7de0dd 100644 --- a/Documentation/media/uapi/v4l/buffer.rst +++ b/Documentation/media/uapi/v4l/buffer.rst @@ -34,6 +34,125 @@ flags are copied from the OUTPUT video buffer to the CAPTURE video buffer. +Interactions between formats, controls and buffers +================================================== + +V4L2 exposes parameters that influence the buffer size, or the way data is +laid out in the buffer. Those parameters are exposed through both formats and +controls. One example of such a control is the ``V4L2_CID_ROTATE`` control +that modifies the direction in which pixels are stored in the buffer, as well +as the buffer size when the selected format includes padding at the end of +lines. + +The set of information needed to interpret the content of a buffer (e.g. the +pixel format, the line stride, the tiling orientation or the rotation) is +collectively referred to in the rest of this section as the buffer layout. + +Controls that can modify the buffer layout shall set the +``V4L2_CTRL_FLAG_MODIFY_LAYOUT`` flag. + +Modifying formats or controls that influence the buffer size or layout require +the stream to be stopped. Any attempt at such a modification while the stream +is active shall cause the ioctl setting the format or the control to return +the ``EBUSY`` error code. In that case drivers shall also set the +``V4L2_CTRL_FLAG_GRABBED`` flag when calling +:c:func:`VIDIOC_QUERYCTRL` or :c:func:`VIDIOC_QUERY_EXT_CTRL` for such a +control while the stream is active. + +.. note:: + + The :c:func:`VIDIOC_S_SELECTION` ioctl can, depending on the hardware (for + instance if the device doesn't include a scaler), modify the format in + addition to the selection rectangle. Similarly, the + :c:func:`VIDIOC_S_INPUT`, :c:func:`VIDIOC_S_OUTPUT`, :c:func:`VIDIOC_S_STD` + and :c:func:`VIDIOC_S_DV_TIMINGS` ioctls can also modify the format and + selection rectangles. When those ioctls result in a buffer size or layout + change, drivers shall handle that condition as they would handle it in the + :c:func:`VIDIOC_S_FMT` ioctl in all cases described in this section. + +Controls that only influence the buffer layout can be modified at any time +when the stream is stopped. As they don't influence the buffer size, no +special handling is needed to synchronize those controls with buffer +allocation and the ``V4L2_CTRL_FLAG_GRABBED`` flag is cleared once the +stream is stopped. + +Formats and controls that influence the buffer size interact with buffer +allocation. The simplest way to handle this is for drivers to always require +buffers to be reallocated in order to change those formats or controls. In +that case, to perform such changes, userspace applications shall first stop +the video stream with the :c:func:`VIDIOC_STREAMOFF` ioctl if it is running +and free all buffers with the :c:func:`VIDIOC_REQBUFS` ioctl if they are +allocated. After freeing all buffers the ``V4L2_CTRL_FLAG_GRABBED`` flag +for controls is cleared. The format or controls can then be modified, and +buffers shall then be reallocated and the stream restarted. A typical ioctl +sequence is + + #. VIDIOC_STREAMOFF + #. VIDIOC_REQBUFS(0) + #. VIDIOC_S_EXT_CTRLS + #. VIDIOC_S_FMT + #. VIDIOC_REQBUFS(n) + #. VIDIOC_QBUF + #. VIDIOC_STREAMON + +The second :c:func:`VIDIOC_REQBUFS` call will take the new format and control +value into account to compute the buffer size to allocate. Applications can +also retrieve the size by calling the :c:func:`VIDIOC_G_FMT` ioctl if needed. + +.. note:: + + The API doesn't mandate the above order for control (3.) and format (4.) + changes. Format and controls can be set in a different order, or even + interleaved, depending on the device and use case. For instance some + controls might behave differently for different pixel formats, in which + case the format might need to be set first. + +When reallocation is required, any attempt to modify format or controls that +influences the buffer size while buffers are allocated shall cause the format +or control set ioctl to return the ``EBUSY`` error. Any attempt to queue a +buffer too small for the current format or controls shall cause the +:c:func:`VIDIOC_QBUF` ioctl to return a ``EINVAL`` error. + +Buffer reallocation is an expensive operation. To avoid that cost, drivers can +(and are encouraged to) allow format or controls that influence the buffer +size to be changed with buffers allocated. In that case, a typical ioctl +sequence to modify format and controls is + + #. VIDIOC_STREAMOFF + #. VIDIOC_S_EXT_CTRLS + #. VIDIOC_S_FMT + #. VIDIOC_QBUF + #. VIDIOC_STREAMON + +For this sequence to operate correctly, queued buffers need to be large enough +for the new format or controls. Drivers shall return a ``ENOSPC`` error in +response to format change (:c:func:`VIDIOC_S_FMT`) or control changes +(:c:func:`VIDIOC_S_CTRL` or :c:func:`VIDIOC_S_EXT_CTRLS`) if buffers too small +for the new format are currently queued. As a simplification, drivers are +allowed to return a ``EBUSY`` error from these ioctls if any buffer is +currently queued, without checking the queued buffers sizes. + +Additionally, drivers shall return a ``EINVAL`` error from the +:c:func:`VIDIOC_QBUF` ioctl if the buffer being queued is too small for the +current format or controls. Together, these requirements ensure that queued +buffers will always be large enough for the configured format and controls. + +Userspace applications can query the buffer size required for a given format +and controls by first setting the desired control values and then trying the +desired format. The :c:func:`VIDIOC_TRY_FMT` ioctl will return the required +buffer size. + + #. VIDIOC_S_EXT_CTRLS(x) + #. VIDIOC_TRY_FMT() + #. VIDIOC_S_EXT_CTRLS(y) + #. VIDIOC_TRY_FMT() + +The :c:func:`VIDIOC_CREATE_BUFS` ioctl can then be used to allocate buffers +based on the queried sizes (for instance by allocating a set of buffers large +enough for all the desired formats and controls, or by allocating separate set +of appropriately sized buffers for each use case). + + .. c:type:: v4l2_buffer struct v4l2_buffer @@ -330,6 +449,9 @@ enum v4l2_buf_type - 12 - Buffer for Software Defined Radio (SDR) output stream, see :ref:`sdr`. + * - ``V4L2_BUF_TYPE_META_CAPTURE`` + - 13 + - Buffer for metadata capture, see :ref:`metadata`. diff --git a/Documentation/media/uapi/v4l/crop.rst b/Documentation/media/uapi/v4l/crop.rst index be58894c9c89d48a7bb58fff741d0313d8d1d4fe..182565b9ace492fcb622493ca7d674b426a56ac6 100644 --- a/Documentation/media/uapi/v4l/crop.rst +++ b/Documentation/media/uapi/v4l/crop.rst @@ -53,8 +53,8 @@ Cropping Structures .. _crop-scale: -.. figure:: crop.* - :alt: crop.pdf / crop.svg +.. kernel-figure:: crop.svg + :alt: crop.svg :align: center Image Cropping, Insertion and Scaling diff --git a/Documentation/media/uapi/v4l/depth-formats.rst b/Documentation/media/uapi/v4l/depth-formats.rst index 82f183870aaeb7c60e182768ead193d5f0d0eae8..d1641e9687a61d8fba79a53da38b393cd50a85ec 100644 --- a/Documentation/media/uapi/v4l/depth-formats.rst +++ b/Documentation/media/uapi/v4l/depth-formats.rst @@ -12,4 +12,5 @@ Depth data provides distance to points, mapped onto the image plane .. toctree:: :maxdepth: 1 + pixfmt-inzi pixfmt-z16 diff --git a/Documentation/media/uapi/v4l/dev-capture.rst b/Documentation/media/uapi/v4l/dev-capture.rst index 32b32055d0700666d450de225508accf68f62e26..4218742ab5d91364fc873ca049e878cfd053007e 100644 --- a/Documentation/media/uapi/v4l/dev-capture.rst +++ b/Documentation/media/uapi/v4l/dev-capture.rst @@ -42,8 +42,8 @@ Video capture devices shall support :ref:`audio input

    ,qdf2400_e44". - */ - if (!strcmp(device->options, "qdf2400_e44")) - device->con->write = qdf2400_e44_early_write; - else - device->con->write = pl011_early_write; + device->con->write = pl011_early_write; return 0; } OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup); -EARLYCON_DECLARE(qdf2400_e44, pl011_early_console_setup); + +/* + * On Qualcomm Datacenter Technologies QDF2400 SOCs affected by + * Erratum 44, traditional earlycon can be enabled by specifying + * "earlycon=qdf2400_e44,
    ". Any options are ignored. + * + * Alternatively, you can just specify "earlycon", and the early console + * will be enabled with the information from the SPCR table. In this + * case, the SPCR code will detect the need for the E44 work-around, + * and set the console name to "qdf2400_e44". + */ +static int __init +qdf2400_e44_early_console_setup(struct earlycon_device *device, + const char *opt) +{ + if (!device->port.membase) + return -ENODEV; + + device->con->write = qdf2400_e44_early_write; + return 0; +} +EARLYCON_DECLARE(qdf2400_e44, qdf2400_e44_early_console_setup); #else #define AMBA_CONSOLE NULL diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 1f50a83ef958609c3f27473b135e84e65303330d..c355ac9abafcc12adb5f8fd3205adaf4a16f86c1 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +70,7 @@ #include #include "serial_mctrl_gpio.h" +#include "atmel_serial.h" static void atmel_start_rx(struct uart_port *port); static void atmel_stop_rx(struct uart_port *port); @@ -119,8 +119,9 @@ struct atmel_uart_char { /* * at91: 6 USARTs and one DBGU port (SAM9260) * avr32: 4 + * samx7: 3 USARTs and 5 UARTs */ -#define ATMEL_MAX_UART 7 +#define ATMEL_MAX_UART 8 /* * We wrap our port structure around the generic uart_port. @@ -175,6 +176,7 @@ struct atmel_uart_port { unsigned int pending_status; spinlock_t lock_suspended; +#ifdef CONFIG_PM struct { u32 cr; u32 mr; @@ -185,6 +187,7 @@ struct atmel_uart_port { u32 fmr; u32 fimr; } cache; +#endif int (*prepare_rx)(struct uart_port *port); int (*prepare_tx)(struct uart_port *port); diff --git a/include/linux/atmel_serial.h b/drivers/tty/serial/atmel_serial.h similarity index 100% rename from include/linux/atmel_serial.h rename to drivers/tty/serial/atmel_serial.h diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c index ebd8569f9ad5da126b2ff15985943e630135925a..9fff25be87f9db91273aa96e5b015705e7fd9118 100644 --- a/drivers/tty/serial/efm32-uart.c +++ b/drivers/tty/serial/efm32-uart.c @@ -27,6 +27,7 @@ #define UARTn_FRAME 0x04 #define UARTn_FRAME_DATABITS__MASK 0x000f #define UARTn_FRAME_DATABITS(n) ((n) - 3) +#define UARTn_FRAME_PARITY__MASK 0x0300 #define UARTn_FRAME_PARITY_NONE 0x0000 #define UARTn_FRAME_PARITY_EVEN 0x0200 #define UARTn_FRAME_PARITY_ODD 0x0300 @@ -572,12 +573,16 @@ static void efm32_uart_console_get_options(struct efm32_uart_port *efm_port, 16 * (4 + (clkdiv >> 6))); frame = efm32_uart_read32(efm_port, UARTn_FRAME); - if (frame & UARTn_FRAME_PARITY_ODD) + switch (frame & UARTn_FRAME_PARITY__MASK) { + case UARTn_FRAME_PARITY_ODD: *parity = 'o'; - else if (frame & UARTn_FRAME_PARITY_EVEN) + break; + case UARTn_FRAME_PARITY_EVEN: *parity = 'e'; - else + break; + default: *parity = 'n'; + } *bits = (frame & UARTn_FRAME_DATABITS__MASK) - UARTn_FRAME_DATABITS(4) + 4; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index f02934ffb3293de45601f434be9dd7786cafdfe4..15df1ba7809510b45c6d96b17d72e92064301973 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1705,6 +1705,13 @@ lpuart_console_write(struct console *co, const char *s, unsigned int count) { struct lpuart_port *sport = lpuart_ports[co->index]; unsigned char old_cr2, cr2; + unsigned long flags; + int locked = 1; + + if (sport->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&sport->port.lock, flags); + else + spin_lock_irqsave(&sport->port.lock, flags); /* first save CR2 and then disable interrupts */ cr2 = old_cr2 = readb(sport->port.membase + UARTCR2); @@ -1719,6 +1726,9 @@ lpuart_console_write(struct console *co, const char *s, unsigned int count) barrier(); writeb(old_cr2, sport->port.membase + UARTCR2); + + if (locked) + spin_unlock_irqrestore(&sport->port.lock, flags); } static void @@ -1726,6 +1736,13 @@ lpuart32_console_write(struct console *co, const char *s, unsigned int count) { struct lpuart_port *sport = lpuart_ports[co->index]; unsigned long old_cr, cr; + unsigned long flags; + int locked = 1; + + if (sport->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&sport->port.lock, flags); + else + spin_lock_irqsave(&sport->port.lock, flags); /* first save CR2 and then disable interrupts */ cr = old_cr = lpuart32_read(sport->port.membase + UARTCTRL); @@ -1740,6 +1757,9 @@ lpuart32_console_write(struct console *co, const char *s, unsigned int count) barrier(); lpuart32_write(old_cr, sport->port.membase + UARTCTRL); + + if (locked) + spin_unlock_irqrestore(&sport->port.lock, flags); } /* diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c index 157883653256da9ebeae89d7dabead3285656c82..f190a84a02465668d64f3b6a57d6462588d781ad 100644 --- a/drivers/tty/serial/ifx6x60.c +++ b/drivers/tty/serial/ifx6x60.c @@ -1382,9 +1382,9 @@ static struct spi_driver ifx_spi_driver = { static void __exit ifx_spi_exit(void) { /* unregister */ + spi_unregister_driver(&ifx_spi_driver); tty_unregister_driver(tty_drv); put_tty_driver(tty_drv); - spi_unregister_driver(&ifx_spi_driver); unregister_reboot_notifier(&ifx_modem_reboot_notifier_block); } diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index e3e152cbc75e71d7bc4e966f8a4ec3a1884c2b95..bbefddd92bfeae77b637033af8028ac481642931 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -719,6 +719,27 @@ static irqreturn_t imx_rxint(int irq, void *dev_id) return IRQ_HANDLED; } +static void imx_disable_rx_int(struct imx_port *sport) +{ + unsigned long temp; + + sport->dma_is_rxing = 1; + + /* disable the receiver ready and aging timer interrupts */ + temp = readl(sport->port.membase + UCR1); + temp &= ~(UCR1_RRDYEN); + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR2); + temp &= ~(UCR2_ATEN); + writel(temp, sport->port.membase + UCR2); + + /* disable the rx errors interrupts */ + temp = readl(sport->port.membase + UCR4); + temp &= ~UCR4_OREN; + writel(temp, sport->port.membase + UCR4); +} + static void clear_rx_errors(struct imx_port *sport); static int start_rx_dma(struct imx_port *sport); /* @@ -734,21 +755,8 @@ static void imx_dma_rxint(struct imx_port *sport) temp = readl(sport->port.membase + USR2); if ((temp & USR2_RDR) && !sport->dma_is_rxing) { - sport->dma_is_rxing = 1; - /* disable the receiver ready and aging timer interrupts */ - temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_RRDYEN); - writel(temp, sport->port.membase + UCR1); - - temp = readl(sport->port.membase + UCR2); - temp &= ~(UCR2_ATEN); - writel(temp, sport->port.membase + UCR2); - - /* disable the rx errors interrupts */ - temp = readl(sport->port.membase + UCR4); - temp &= ~UCR4_OREN; - writel(temp, sport->port.membase + UCR4); + imx_disable_rx_int(sport); /* tell the DMA to receive the data. */ start_rx_dma(sport); @@ -1317,19 +1325,10 @@ static int imx_startup(struct uart_port *port) if (!is_imx1_uart(sport)) { temp = readl(sport->port.membase + UCR3); - /* - * The effect of RI and DCD differs depending on the UFCR_DCEDTE - * bit. In DCE mode they control the outputs, in DTE mode they - * enable the respective irqs. At least the DCD irq cannot be - * cleared on i.MX25 at least, so it's not usable and must be - * disabled. I don't have test hardware to check if RI has the - * same problem but I consider this likely so it's disabled for - * now, too. - */ - temp |= IMX21_UCR3_RXDMUXSEL | UCR3_ADNIMP | - UCR3_DTRDEN | UCR3_RI | UCR3_DCD; + temp |= UCR3_DTRDEN | UCR3_RI | UCR3_DCD; if (sport->dte_mode) + /* disable broken interrupts */ temp &= ~(UCR3_RI | UCR3_DCD); writel(temp, sport->port.membase + UCR3); @@ -1339,6 +1338,33 @@ static int imx_startup(struct uart_port *port) * Enable modem status interrupts */ imx_enable_ms(&sport->port); + + /* + * If the serial port is opened for reading start RX DMA immediately + * instead of waiting for RX FIFO interrupts. In our iMX53 the average + * delay for the first reception dropped from approximately 35000 + * microseconds to 1000 microseconds. + */ + if (sport->dma_is_enabled) { + struct tty_struct *tty = sport->port.state->port.tty; + struct tty_file_private *file_priv; + int readcnt = 0; + + spin_lock(&tty->files_lock); + + if (!list_empty(&tty->tty_files)) + list_for_each_entry(file_priv, &tty->tty_files, list) + if (!(file_priv->file->f_flags & O_WRONLY)) + readcnt++; + + spin_unlock(&tty->files_lock); + + if (readcnt > 0) { + imx_disable_rx_int(sport); + start_rx_dma(sport); + } + } + spin_unlock_irqrestore(&sport->port.lock, flags); return 0; @@ -1584,8 +1610,6 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, ufcr = readl(sport->port.membase + UFCR); ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div); - if (sport->dte_mode) - ufcr |= UFCR_DCEDTE; writel(ufcr, sport->port.membase + UFCR); writel(num, sport->port.membase + UBIR); @@ -2153,6 +2177,37 @@ static int serial_imx_probe(struct platform_device *pdev) UCR1_TXMPTYEN | UCR1_RTSDEN); writel_relaxed(reg, sport->port.membase + UCR1); + if (!is_imx1_uart(sport) && sport->dte_mode) { + /* + * The DCEDTE bit changes the direction of DSR, DCD, DTR and RI + * and influences if UCR3_RI and UCR3_DCD changes the level of RI + * and DCD (when they are outputs) or enables the respective + * irqs. So set this bit early, i.e. before requesting irqs. + */ + reg = readl(sport->port.membase + UFCR); + if (!(reg & UFCR_DCEDTE)) + writel(reg | UFCR_DCEDTE, sport->port.membase + UFCR); + + /* + * Disable UCR3_RI and UCR3_DCD irqs. They are also not + * enabled later because they cannot be cleared + * (confirmed on i.MX25) which makes them unusable. + */ + writel(IMX21_UCR3_RXDMUXSEL | UCR3_ADNIMP | UCR3_DSR, + sport->port.membase + UCR3); + + } else { + unsigned long ucr3 = UCR3_DSR; + + reg = readl(sport->port.membase + UFCR); + if (reg & UFCR_DCEDTE) + writel(reg & ~UFCR_DCEDTE, sport->port.membase + UFCR); + + if (!is_imx1_uart(sport)) + ucr3 |= IMX21_UCR3_RXDMUXSEL | UCR3_ADNIMP; + writel(ucr3, sport->port.membase + UCR3); + } + clk_disable_unprepare(sport->clk_ipg); /* diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 6c6f82ad8d5cb63982fc35c9047e64a8a18135a0..1ea05ac57aa7197b4f690776acc609d376faade4 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1597,6 +1597,9 @@ static struct omap_uart_port_info *of_get_uart_port_info(struct device *dev) of_property_read_u32(dev->of_node, "clock-frequency", &omap_up_info->uartclk); + + omap_up_info->flags = UPF_BOOT_AUTOCONF; + return omap_up_info; } @@ -1767,7 +1770,8 @@ static int serial_omap_probe(struct platform_device *pdev) return 0; err_add_port: - pm_runtime_put(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_qos_remove_request(&up->pm_qos_request); device_init_wakeup(up->dev, false); @@ -1780,9 +1784,13 @@ static int serial_omap_remove(struct platform_device *dev) { struct uart_omap_port *up = platform_get_drvdata(dev); + pm_runtime_get_sync(up->dev); + + uart_remove_one_port(&serial_omap_reg, &up->port); + + pm_runtime_dont_use_autosuspend(up->dev); pm_runtime_put_sync(up->dev); pm_runtime_disable(up->dev); - uart_remove_one_port(&serial_omap_reg, &up->port); pm_qos_remove_request(&up->pm_qos_request); device_init_wakeup(&dev->dev, false); diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 7a17aedbf902e05034129a832941f27fe5dc38c8..8aca18c4cdea4076e8666baff7d7f4953d6a6e1f 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -859,7 +859,7 @@ static void s3c24xx_serial_break_ctl(struct uart_port *port, int break_state) static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) { struct s3c24xx_uart_dma *dma = p->dma; - unsigned long flags; + int ret; /* Default slave configuration parameters */ dma->rx_conf.direction = DMA_DEV_TO_MEM; @@ -884,8 +884,8 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) dma->tx_chan = dma_request_chan(p->port.dev, "tx"); if (IS_ERR(dma->tx_chan)) { - dma_release_channel(dma->rx_chan); - return PTR_ERR(dma->tx_chan); + ret = PTR_ERR(dma->tx_chan); + goto err_release_rx; } dmaengine_slave_config(dma->tx_chan, &dma->tx_conf); @@ -894,26 +894,38 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p) dma->rx_size = PAGE_SIZE; dma->rx_buf = kmalloc(dma->rx_size, GFP_KERNEL); - if (!dma->rx_buf) { - dma_release_channel(dma->rx_chan); - dma_release_channel(dma->tx_chan); - return -ENOMEM; + ret = -ENOMEM; + goto err_release_tx; } - dma->rx_addr = dma_map_single(dma->rx_chan->device->dev, dma->rx_buf, + dma->rx_addr = dma_map_single(p->port.dev, dma->rx_buf, dma->rx_size, DMA_FROM_DEVICE); - - spin_lock_irqsave(&p->port.lock, flags); + if (dma_mapping_error(p->port.dev, dma->rx_addr)) { + ret = -EIO; + goto err_free_rx; + } /* TX buffer */ - dma->tx_addr = dma_map_single(dma->tx_chan->device->dev, - p->port.state->xmit.buf, + dma->tx_addr = dma_map_single(p->port.dev, p->port.state->xmit.buf, UART_XMIT_SIZE, DMA_TO_DEVICE); - - spin_unlock_irqrestore(&p->port.lock, flags); + if (dma_mapping_error(p->port.dev, dma->tx_addr)) { + ret = -EIO; + goto err_unmap_rx; + } return 0; + +err_unmap_rx: + dma_unmap_single(p->port.dev, dma->rx_addr, dma->rx_size, + DMA_FROM_DEVICE); +err_free_rx: + kfree(dma->rx_buf); +err_release_tx: + dma_release_channel(dma->tx_chan); +err_release_rx: + dma_release_channel(dma->rx_chan); + return ret; } static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) @@ -922,7 +934,7 @@ static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) if (dma->rx_chan) { dmaengine_terminate_all(dma->rx_chan); - dma_unmap_single(dma->rx_chan->device->dev, dma->rx_addr, + dma_unmap_single(p->port.dev, dma->rx_addr, dma->rx_size, DMA_FROM_DEVICE); kfree(dma->rx_buf); dma_release_channel(dma->rx_chan); @@ -931,7 +943,7 @@ static void s3c24xx_serial_release_dma(struct s3c24xx_uart_port *p) if (dma->tx_chan) { dmaengine_terminate_all(dma->tx_chan); - dma_unmap_single(dma->tx_chan->device->dev, dma->tx_addr, + dma_unmap_single(p->port.dev, dma->tx_addr, UART_XMIT_SIZE, DMA_TO_DEVICE); dma_release_channel(dma->tx_chan); dma->tx_chan = NULL; diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c index 771f361c47ea17850cf4b946d135795e100c0865..041625cc24bbe736cdc021fa0385fbe332f2d792 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include @@ -103,7 +103,7 @@ struct sbd_port { struct sbd_duart { struct sbd_port sport[2]; unsigned long mapctrl; - atomic_t map_guard; + refcount_t map_guard; }; #define to_sport(uport) container_of(uport, struct sbd_port, port) @@ -654,15 +654,13 @@ static void sbd_release_port(struct uart_port *uport) { struct sbd_port *sport = to_sport(uport); struct sbd_duart *duart = sport->duart; - int map_guard; iounmap(sport->memctrl); sport->memctrl = NULL; iounmap(uport->membase); uport->membase = NULL; - map_guard = atomic_add_return(-1, &duart->map_guard); - if (!map_guard) + if(refcount_dec_and_test(&duart->map_guard)) release_mem_region(duart->mapctrl, DUART_CHANREG_SPACING); release_mem_region(uport->mapbase, DUART_CHANREG_SPACING); } @@ -698,7 +696,6 @@ static int sbd_request_port(struct uart_port *uport) { const char *err = KERN_ERR "sbd: Unable to reserve MMIO resource\n"; struct sbd_duart *duart = to_sport(uport)->duart; - int map_guard; int ret = 0; if (!request_mem_region(uport->mapbase, DUART_CHANREG_SPACING, @@ -706,11 +703,11 @@ static int sbd_request_port(struct uart_port *uport) printk(err); return -EBUSY; } - map_guard = atomic_add_return(1, &duart->map_guard); - if (map_guard == 1) { + refcount_inc(&duart->map_guard); + if (refcount_read(&duart->map_guard) == 1) { if (!request_mem_region(duart->mapctrl, DUART_CHANREG_SPACING, "sb1250-duart")) { - atomic_add(-1, &duart->map_guard); + refcount_dec(&duart->map_guard); printk(err); ret = -EBUSY; } @@ -718,8 +715,7 @@ static int sbd_request_port(struct uart_port *uport) if (!ret) { ret = sbd_map_port(uport); if (ret) { - map_guard = atomic_add_return(-1, &duart->map_guard); - if (!map_guard) + if (refcount_dec_and_test(&duart->map_guard)) release_mem_region(duart->mapctrl, DUART_CHANREG_SPACING); } diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 3fe56894974a7c4c3ae04a4d54946043be1d243c..13bfd5dcffce5c0bfaee47879277e32aedf308a9 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2083,7 +2083,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport) mutex_lock(&port->mutex); tty_dev = device_find_child(uport->dev, &match, serial_match_port); - if (device_may_wakeup(tty_dev)) { + if (tty_dev && device_may_wakeup(tty_dev)) { if (!enable_irq_wake(uport->irq)) uport->irq_wake = 1; put_device(tty_dev); @@ -2117,9 +2117,8 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport) for (tries = 3; !ops->tx_empty(uport) && tries; tries--) msleep(10); if (!tries) - dev_err(uport->dev, "%s%d: Unable to drain transmitter\n", - drv->dev_name, - drv->tty_driver->name_base + uport->line); + dev_err(uport->dev, "%s: Unable to drain transmitter\n", + uport->name); ops->shutdown(uport); } @@ -2248,11 +2247,10 @@ uart_report_port(struct uart_driver *drv, struct uart_port *port) break; } - printk(KERN_INFO "%s%s%s%d at %s (irq = %d, base_baud = %d) is a %s\n", + pr_info("%s%s%s at %s (irq = %d, base_baud = %d) is a %s\n", port->dev ? dev_name(port->dev) : "", port->dev ? ": " : "", - drv->dev_name, - drv->tty_driver->name_base + port->line, + port->name, address, port->irq, port->uartclk / 16, uart_type(port)); } @@ -2331,9 +2329,6 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options) int flow = 'n'; int ret = 0; - if (!state) - return -1; - tport = &state->port; mutex_lock(&tport->mutex); @@ -2368,13 +2363,12 @@ static int uart_poll_get_char(struct tty_driver *driver, int line) struct uart_port *port; int ret = -1; - if (state) { - port = uart_port_ref(state); - if (port) { - ret = port->ops->poll_get_char(port); - uart_port_deref(port); - } + port = uart_port_ref(state); + if (port) { + ret = port->ops->poll_get_char(port); + uart_port_deref(port); } + return ret; } @@ -2384,9 +2378,6 @@ static void uart_poll_put_char(struct tty_driver *driver, int line, char ch) struct uart_state *state = drv->state + line; struct uart_port *port; - if (!state) - return; - port = uart_port_ref(state); if (!port) return; @@ -2751,6 +2742,12 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) state->pm_state = UART_PM_STATE_UNDEFINED; uport->cons = drv->cons; uport->minor = drv->tty_driver->minor_start + uport->line; + uport->name = kasprintf(GFP_KERNEL, "%s%d", drv->dev_name, + drv->tty_driver->name_base + uport->line); + if (!uport->name) { + ret = -ENOMEM; + goto out; + } /* * If this port is a console, then the spinlock is already @@ -2785,7 +2782,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) * Register the port whether it's detected or not. This allows * setserial to be used to alter this port's parameters. */ - tty_dev = tty_port_register_device_attr(port, drv->tty_driver, + tty_dev = tty_port_register_device_attr_serdev(port, drv->tty_driver, uport->line, uport->dev, port, uport->tty_groups); if (likely(!IS_ERR(tty_dev))) { device_set_wakeup_capable(tty_dev, 1); @@ -2848,7 +2845,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport) /* * Remove the devices from the tty layer */ - tty_unregister_device(drv->tty_driver, uport->line); + tty_port_unregister_device(port, drv->tty_driver, uport->line); tty = tty_port_tty_get(port); if (tty) { @@ -2868,6 +2865,7 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport) if (uport->type != PORT_UNKNOWN && uport->ops->release_port) uport->ops->release_port(uport); kfree(uport->tty_groups); + kfree(uport->name); /* * Indicate that there isn't a port here anymore. diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 9a47cc4f16a2798f7c0a084d44621d371a2f59a9..71707e8e6e3ffe768ad76e52b118b74e32ad35e0 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -683,24 +683,37 @@ static void sci_init_pins(struct uart_port *port, unsigned int cflag) } if (port->type == PORT_SCIFA || port->type == PORT_SCIFB) { + u16 data = serial_port_in(port, SCPDR); u16 ctrl = serial_port_in(port, SCPCR); /* Enable RXD and TXD pin functions */ ctrl &= ~(SCPCR_RXDC | SCPCR_TXDC); if (to_sci_port(port)->has_rtscts) { - /* RTS# is output, driven 1 */ - ctrl |= SCPCR_RTSC; - serial_port_out(port, SCPDR, - serial_port_in(port, SCPDR) | SCPDR_RTSD); + /* RTS# is output, active low, unless autorts */ + if (!(port->mctrl & TIOCM_RTS)) { + ctrl |= SCPCR_RTSC; + data |= SCPDR_RTSD; + } else if (!s->autorts) { + ctrl |= SCPCR_RTSC; + data &= ~SCPDR_RTSD; + } else { + /* Enable RTS# pin function */ + ctrl &= ~SCPCR_RTSC; + } /* Enable CTS# pin function */ ctrl &= ~SCPCR_CTSC; } + serial_port_out(port, SCPDR, data); serial_port_out(port, SCPCR, ctrl); } else if (sci_getreg(port, SCSPTR)->size) { u16 status = serial_port_in(port, SCSPTR); - /* RTS# is output, driven 1 */ - status |= SCSPTR_RTSIO | SCSPTR_RTSDT; + /* RTS# is always output; and active low, unless autorts */ + status |= SCSPTR_RTSIO; + if (!(port->mctrl & TIOCM_RTS)) + status |= SCSPTR_RTSDT; + else if (!s->autorts) + status &= ~SCSPTR_RTSDT; /* CTS# and SCK are inputs */ status &= ~(SCSPTR_CTSIO | SCSPTR_SCKIO); serial_port_out(port, SCSPTR, status); @@ -1985,11 +1998,13 @@ static int sci_startup(struct uart_port *port) dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); + sci_request_dma(port); + ret = sci_request_irq(s); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + sci_free_dma(port); return ret; - - sci_request_dma(port); + } return 0; } @@ -2021,8 +2036,8 @@ static void sci_shutdown(struct uart_port *port) } #endif - sci_free_dma(port); sci_free_irq(s); + sci_free_dma(port); } static int sci_sck_calc(struct sci_port *s, unsigned int bps, @@ -2157,10 +2172,6 @@ static void sci_reset(struct uart_port *port) unsigned int status; struct sci_port *s = to_sci_port(port); - do { - status = serial_port_in(port, SCxSR); - } while (!(status & SCxSR_TEND(port))); - serial_port_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ reg = sci_getreg(port, SCFCR); @@ -2374,6 +2385,10 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, serial_port_out(port, SCFCR, ctrl); } + if (port->flags & UPF_HARD_FLOW) { + /* Refresh (Auto) RTS */ + sci_set_mctrl(port, port->mctrl); + } scr_val |= SCSCR_RE | SCSCR_TE | (s->cfg->scscr & ~(SCSCR_CKE1 | SCSCR_CKE0)); diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index d98e3dc4838e637b34d3e3984c9570c656fa37b3..90996ad97b3718cfa85a0505a47b7e8d7be770c9 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -36,7 +36,7 @@ #define SPRD_FIFO_SIZE 128 #define SPRD_DEF_RATE 26000000 #define SPRD_BAUD_IO_LIMIT 3000000 -#define SPRD_TIMEOUT 256 +#define SPRD_TIMEOUT 256000 /* the offset of serial registers and BITs for them */ /* data registers */ diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index c334bcc59c649eedc2933ac29c4dc1ef45ae21d2..f5335be344f67ced2a601c81e3713a7d90e275c7 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -887,13 +887,12 @@ static void asc_console_write(struct console *co, const char *s, unsigned count) int locked = 1; u32 intenable; - local_irq_save(flags); if (port->sysrq) locked = 0; /* asc_interrupt has already claimed the lock */ else if (oops_in_progress) - locked = spin_trylock(&port->lock); + locked = spin_trylock_irqsave(&port->lock, flags); else - spin_lock(&port->lock); + spin_lock_irqsave(&port->lock, flags); /* * Disable interrupts so we don't get the IRQ line bouncing @@ -911,14 +910,13 @@ static void asc_console_write(struct console *co, const char *s, unsigned count) asc_out(port, ASC_INTEN, intenable); if (locked) - spin_unlock(&port->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&port->lock, flags); } static int asc_console_setup(struct console *co, char *options) { struct asc_port *ascport; - int baud = 9600; + int baud = 115200; int bits = 8; int parity = 'n'; int flow = 'n'; @@ -986,7 +984,7 @@ static struct platform_driver asc_serial_driver = { static int __init asc_init(void) { int ret; - static char banner[] __initdata = + static const char banner[] __initconst = KERN_INFO "STMicroelectronics ASC driver initialized\n"; printk(banner); diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 817bb0d3f326ed982631c135ddd5f7a4b170b7a5..c9b8d702dadc35abcb6f3b74a07f62e836b2f9e2 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -28,7 +28,7 @@ #define ULITE_NAME "ttyUL" #define ULITE_MAJOR 204 #define ULITE_MINOR 187 -#define ULITE_NR_UARTS 16 +#define ULITE_NR_UARTS CONFIG_SERIAL_UARTLITE_NR_UARTS /* --------------------------------------------------------------------- * Register definitions diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index ad77d0ed0c467dbc2b20f41a9959ed10cf3e8390..c0539950f8d7f29725dd4433e6905d5e3d07b1c7 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -30,6 +30,7 @@ #include #include #include +#include #define CDNS_UART_TTY_NAME "ttyPS" #define CDNS_UART_NAME "xuartps" @@ -176,6 +177,7 @@ MODULE_PARM_DESC(rx_timeout, "Rx timeout, 1-255"); #define CDNS_UART_BDIV_MIN 4 #define CDNS_UART_BDIV_MAX 255 #define CDNS_UART_CD_MAX 65535 +#define UART_AUTOSUSPEND_TIMEOUT 3000 /** * struct cdns_uart - device data @@ -1065,16 +1067,13 @@ static void cdns_uart_poll_put_char(struct uart_port *port, unsigned char c) static void cdns_uart_pm(struct uart_port *port, unsigned int state, unsigned int oldstate) { - struct cdns_uart *cdns_uart = port->private_data; - switch (state) { case UART_PM_STATE_OFF: - clk_disable(cdns_uart->uartclk); - clk_disable(cdns_uart->pclk); + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); break; default: - clk_enable(cdns_uart->pclk); - clk_enable(cdns_uart->uartclk); + pm_runtime_get_sync(port->dev); break; } } @@ -1353,12 +1352,7 @@ static int cdns_uart_suspend(struct device *device) * the suspend. */ uart_suspend_port(&cdns_uart_uart_driver, port); - if (console_suspend_enabled && !may_wake) { - struct cdns_uart *cdns_uart = port->private_data; - - clk_disable(cdns_uart->uartclk); - clk_disable(cdns_uart->pclk); - } else { + if (!(console_suspend_enabled && !may_wake)) { unsigned long flags = 0; spin_lock_irqsave(&port->lock, flags); @@ -1423,6 +1417,8 @@ static int cdns_uart_resume(struct device *device) ctrl_reg |= CDNS_UART_CR_TX_EN | CDNS_UART_CR_RX_EN; writel(ctrl_reg, port->membase + CDNS_UART_CR); + clk_disable(cdns_uart->uartclk); + clk_disable(cdns_uart->pclk); spin_unlock_irqrestore(&port->lock, flags); } else { spin_lock_irqsave(&port->lock, flags); @@ -1436,9 +1432,33 @@ static int cdns_uart_resume(struct device *device) return uart_resume_port(&cdns_uart_uart_driver, port); } #endif /* ! CONFIG_PM_SLEEP */ +static int __maybe_unused cdns_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct uart_port *port = platform_get_drvdata(pdev); + struct cdns_uart *cdns_uart = port->private_data; + + clk_disable(cdns_uart->uartclk); + clk_disable(cdns_uart->pclk); + return 0; +}; + +static int __maybe_unused cdns_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct uart_port *port = platform_get_drvdata(pdev); + struct cdns_uart *cdns_uart = port->private_data; -static SIMPLE_DEV_PM_OPS(cdns_uart_dev_pm_ops, cdns_uart_suspend, - cdns_uart_resume); + clk_enable(cdns_uart->pclk); + clk_enable(cdns_uart->uartclk); + return 0; +}; + +static const struct dev_pm_ops cdns_uart_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(cdns_uart_suspend, cdns_uart_resume) + SET_RUNTIME_PM_OPS(cdns_runtime_suspend, + cdns_runtime_resume, NULL) +}; static const struct cdns_platform_data zynqmp_uart_def = { .quirks = CDNS_UART_RXBS_SUPPORT, }; @@ -1501,12 +1521,12 @@ static int cdns_uart_probe(struct platform_device *pdev) return PTR_ERR(cdns_uart_data->uartclk); } - rc = clk_prepare(cdns_uart_data->pclk); + rc = clk_prepare_enable(cdns_uart_data->pclk); if (rc) { dev_err(&pdev->dev, "Unable to enable pclk clock.\n"); return rc; } - rc = clk_prepare(cdns_uart_data->uartclk); + rc = clk_prepare_enable(cdns_uart_data->uartclk); if (rc) { dev_err(&pdev->dev, "Unable to enable device clock.\n"); goto err_out_clk_dis_pclk; @@ -1558,6 +1578,11 @@ static int cdns_uart_probe(struct platform_device *pdev) cdns_uart_data->port = port; platform_set_drvdata(pdev, port); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + rc = uart_add_one_port(&cdns_uart_uart_driver, port); if (rc) { dev_err(&pdev->dev, @@ -1573,9 +1598,12 @@ static int cdns_uart_probe(struct platform_device *pdev) &cdns_uart_data->clk_rate_change_nb); #endif err_out_clk_disable: - clk_unprepare(cdns_uart_data->uartclk); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + clk_disable_unprepare(cdns_uart_data->uartclk); err_out_clk_dis_pclk: - clk_unprepare(cdns_uart_data->pclk); + clk_disable_unprepare(cdns_uart_data->pclk); return rc; } @@ -1599,8 +1627,11 @@ static int cdns_uart_remove(struct platform_device *pdev) #endif rc = uart_remove_one_port(&cdns_uart_uart_driver, port); port->mapbase = 0; - clk_unprepare(cdns_uart_data->uartclk); - clk_unprepare(cdns_uart_data->pclk); + clk_disable_unprepare(cdns_uart_data->uartclk); + clk_disable_unprepare(cdns_uart_data->pclk); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); return rc; } diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index 657eed82eeb372d96e5f480bae3eb7c92dee06fa..a2c308f7d6374515bf8a0ac0a9e6ea8b59f5032b 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -869,9 +869,9 @@ static int txholdbufs[MAX_TOTAL_DEVICES]; module_param(break_on_load, bool, 0); module_param(ttymajor, int, 0); -module_param_array(io, int, NULL, 0); -module_param_array(irq, int, NULL, 0); -module_param_array(dma, int, NULL, 0); +module_param_hw_array(io, int, ioport, NULL, 0); +module_param_hw_array(irq, int, irq, NULL, 0); +module_param_hw_array(dma, int, dma, NULL, 0); module_param(debug_level, int, 0); module_param_array(maxframe, int, NULL, 0); module_param_array(txdmabufs, int, NULL, 0); diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index c6fc7141d7b2814cc122d430c38d61bbde675358..3ffc1ce29023b5f6476d0006aa94b8b1b858c480 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -372,7 +372,7 @@ static void moom_callback(struct work_struct *ignored) mutex_lock(&oom_lock); if (!out_of_memory(&oc)) - pr_info("OOM request ignored because killer is disabled\n"); + pr_info("OOM request ignored. No task eligible\n"); mutex_unlock(&oom_lock); } @@ -446,7 +446,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { */ NULL, /* a */ &sysrq_reboot_op, /* b */ - &sysrq_crash_op, /* c & ibm_emac driver debug */ + &sysrq_crash_op, /* c */ &sysrq_showlocks_op, /* d */ &sysrq_term_op, /* e */ &sysrq_moom_op, /* f */ diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c new file mode 100644 index 0000000000000000000000000000000000000000..5c33fd25676d59bc6453d7f4a5f4a46b2aadabf1 --- /dev/null +++ b/drivers/tty/tty_baudrate.c @@ -0,0 +1,232 @@ +/* + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + */ + +#include +#include +#include +#include +#include + + +/* + * Routine which returns the baud rate of the tty + * + * Note that the baud_table needs to be kept in sync with the + * include/asm/termbits.h file. + */ +static const speed_t baud_table[] = { + 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, + 9600, 19200, 38400, 57600, 115200, 230400, 460800, +#ifdef __sparc__ + 76800, 153600, 307200, 614400, 921600 +#else + 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000, + 2500000, 3000000, 3500000, 4000000 +#endif +}; + +#ifndef __sparc__ +static const tcflag_t baud_bits[] = { + B0, B50, B75, B110, B134, B150, B200, B300, B600, + B1200, B1800, B2400, B4800, B9600, B19200, B38400, + B57600, B115200, B230400, B460800, B500000, B576000, + B921600, B1000000, B1152000, B1500000, B2000000, B2500000, + B3000000, B3500000, B4000000 +}; +#else +static const tcflag_t baud_bits[] = { + B0, B50, B75, B110, B134, B150, B200, B300, B600, + B1200, B1800, B2400, B4800, B9600, B19200, B38400, + B57600, B115200, B230400, B460800, B76800, B153600, + B307200, B614400, B921600 +}; +#endif + +static int n_baud_table = ARRAY_SIZE(baud_table); + +/** + * tty_termios_baud_rate + * @termios: termios structure + * + * Convert termios baud rate data into a speed. This should be called + * with the termios lock held if this termios is a terminal termios + * structure. May change the termios data. Device drivers can call this + * function but should use ->c_[io]speed directly as they are updated. + * + * Locking: none + */ + +speed_t tty_termios_baud_rate(struct ktermios *termios) +{ + unsigned int cbaud; + + cbaud = termios->c_cflag & CBAUD; + +#ifdef BOTHER + /* Magic token for arbitrary speed via c_ispeed/c_ospeed */ + if (cbaud == BOTHER) + return termios->c_ospeed; +#endif + if (cbaud & CBAUDEX) { + cbaud &= ~CBAUDEX; + + if (cbaud < 1 || cbaud + 15 > n_baud_table) + termios->c_cflag &= ~CBAUDEX; + else + cbaud += 15; + } + return baud_table[cbaud]; +} +EXPORT_SYMBOL(tty_termios_baud_rate); + +/** + * tty_termios_input_baud_rate + * @termios: termios structure + * + * Convert termios baud rate data into a speed. This should be called + * with the termios lock held if this termios is a terminal termios + * structure. May change the termios data. Device drivers can call this + * function but should use ->c_[io]speed directly as they are updated. + * + * Locking: none + */ + +speed_t tty_termios_input_baud_rate(struct ktermios *termios) +{ +#ifdef IBSHIFT + unsigned int cbaud = (termios->c_cflag >> IBSHIFT) & CBAUD; + + if (cbaud == B0) + return tty_termios_baud_rate(termios); + + /* Magic token for arbitrary speed via c_ispeed*/ + if (cbaud == BOTHER) + return termios->c_ispeed; + + if (cbaud & CBAUDEX) { + cbaud &= ~CBAUDEX; + + if (cbaud < 1 || cbaud + 15 > n_baud_table) + termios->c_cflag &= ~(CBAUDEX << IBSHIFT); + else + cbaud += 15; + } + return baud_table[cbaud]; +#else + return tty_termios_baud_rate(termios); +#endif +} +EXPORT_SYMBOL(tty_termios_input_baud_rate); + +/** + * tty_termios_encode_baud_rate + * @termios: ktermios structure holding user requested state + * @ispeed: input speed + * @ospeed: output speed + * + * Encode the speeds set into the passed termios structure. This is + * used as a library helper for drivers so that they can report back + * the actual speed selected when it differs from the speed requested + * + * For maximal back compatibility with legacy SYS5/POSIX *nix behaviour + * we need to carefully set the bits when the user does not get the + * desired speed. We allow small margins and preserve as much of possible + * of the input intent to keep compatibility. + * + * Locking: Caller should hold termios lock. This is already held + * when calling this function from the driver termios handler. + * + * The ifdefs deal with platforms whose owners have yet to update them + * and will all go away once this is done. + */ + +void tty_termios_encode_baud_rate(struct ktermios *termios, + speed_t ibaud, speed_t obaud) +{ + int i = 0; + int ifound = -1, ofound = -1; + int iclose = ibaud/50, oclose = obaud/50; + int ibinput = 0; + + if (obaud == 0) /* CD dropped */ + ibaud = 0; /* Clear ibaud to be sure */ + + termios->c_ispeed = ibaud; + termios->c_ospeed = obaud; + +#ifdef BOTHER + /* If the user asked for a precise weird speed give a precise weird + answer. If they asked for a Bfoo speed they may have problems + digesting non-exact replies so fuzz a bit */ + + if ((termios->c_cflag & CBAUD) == BOTHER) + oclose = 0; + if (((termios->c_cflag >> IBSHIFT) & CBAUD) == BOTHER) + iclose = 0; + if ((termios->c_cflag >> IBSHIFT) & CBAUD) + ibinput = 1; /* An input speed was specified */ +#endif + termios->c_cflag &= ~CBAUD; + + /* + * Our goal is to find a close match to the standard baud rate + * returned. Walk the baud rate table and if we get a very close + * match then report back the speed as a POSIX Bxxxx value by + * preference + */ + + do { + if (obaud - oclose <= baud_table[i] && + obaud + oclose >= baud_table[i]) { + termios->c_cflag |= baud_bits[i]; + ofound = i; + } + if (ibaud - iclose <= baud_table[i] && + ibaud + iclose >= baud_table[i]) { + /* For the case input == output don't set IBAUD bits + if the user didn't do so */ + if (ofound == i && !ibinput) + ifound = i; +#ifdef IBSHIFT + else { + ifound = i; + termios->c_cflag |= (baud_bits[i] << IBSHIFT); + } +#endif + } + } while (++i < n_baud_table); + + /* + * If we found no match then use BOTHER if provided or warn + * the user their platform maintainer needs to wake up if not. + */ +#ifdef BOTHER + if (ofound == -1) + termios->c_cflag |= BOTHER; + /* Set exact input bits only if the input and output differ or the + user already did */ + if (ifound == -1 && (ibaud != obaud || ibinput)) + termios->c_cflag |= (BOTHER << IBSHIFT); +#else + if (ifound == -1 || ofound == -1) + pr_warn_once("tty: Unable to return correct speed data as your architecture needs updating.\n"); +#endif +} +EXPORT_SYMBOL_GPL(tty_termios_encode_baud_rate); + +/** + * tty_encode_baud_rate - set baud rate of the tty + * @ibaud: input baud rate + * @obad: output baud rate + * + * Update the current termios data for the tty with the new speed + * settings. The caller must hold the termios_rwsem for the tty in + * question. + */ + +void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud) +{ + tty_termios_encode_baud_rate(&tty->termios, ibaud, obaud); +} +EXPORT_SYMBOL_GPL(tty_encode_baud_rate); diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index e6d1a6510886c5a807cda3b65d651046afe3a48d..0c150b5a9dd68d7bba0e91a50c45ecd5df831cac 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -377,65 +377,6 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) EXPORT_SYMBOL_GPL(tty_find_polling_driver); #endif -static int is_ignored(int sig) -{ - return (sigismember(¤t->blocked, sig) || - current->sighand->action[sig-1].sa.sa_handler == SIG_IGN); -} - -/** - * tty_check_change - check for POSIX terminal changes - * @tty: tty to check - * - * If we try to write to, or set the state of, a terminal and we're - * not in the foreground, send a SIGTTOU. If the signal is blocked or - * ignored, go ahead and perform the operation. (POSIX 7.2) - * - * Locking: ctrl_lock - */ - -int __tty_check_change(struct tty_struct *tty, int sig) -{ - unsigned long flags; - struct pid *pgrp, *tty_pgrp; - int ret = 0; - - if (current->signal->tty != tty) - return 0; - - rcu_read_lock(); - pgrp = task_pgrp(current); - - spin_lock_irqsave(&tty->ctrl_lock, flags); - tty_pgrp = tty->pgrp; - spin_unlock_irqrestore(&tty->ctrl_lock, flags); - - if (tty_pgrp && pgrp != tty->pgrp) { - if (is_ignored(sig)) { - if (sig == SIGTTIN) - ret = -EIO; - } else if (is_current_pgrp_orphaned()) - ret = -EIO; - else { - kill_pgrp(pgrp, sig, 1); - set_thread_flag(TIF_SIGPENDING); - ret = -ERESTARTSYS; - } - } - rcu_read_unlock(); - - if (!tty_pgrp) - tty_warn(tty, "sig=%d, tty->pgrp == NULL!\n", sig); - - return ret; -} - -int tty_check_change(struct tty_struct *tty) -{ - return __tty_check_change(tty, SIGTTOU); -} -EXPORT_SYMBOL(tty_check_change); - static ssize_t hung_up_tty_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -509,79 +450,6 @@ static const struct file_operations hung_up_tty_fops = { static DEFINE_SPINLOCK(redirect_lock); static struct file *redirect; - -void proc_clear_tty(struct task_struct *p) -{ - unsigned long flags; - struct tty_struct *tty; - spin_lock_irqsave(&p->sighand->siglock, flags); - tty = p->signal->tty; - p->signal->tty = NULL; - spin_unlock_irqrestore(&p->sighand->siglock, flags); - tty_kref_put(tty); -} - -/** - * proc_set_tty - set the controlling terminal - * - * Only callable by the session leader and only if it does not already have - * a controlling terminal. - * - * Caller must hold: tty_lock() - * a readlock on tasklist_lock - * sighand lock - */ -static void __proc_set_tty(struct tty_struct *tty) -{ - unsigned long flags; - - spin_lock_irqsave(&tty->ctrl_lock, flags); - /* - * The session and fg pgrp references will be non-NULL if - * tiocsctty() is stealing the controlling tty - */ - put_pid(tty->session); - put_pid(tty->pgrp); - tty->pgrp = get_pid(task_pgrp(current)); - spin_unlock_irqrestore(&tty->ctrl_lock, flags); - tty->session = get_pid(task_session(current)); - if (current->signal->tty) { - tty_debug(tty, "current tty %s not NULL!!\n", - current->signal->tty->name); - tty_kref_put(current->signal->tty); - } - put_pid(current->signal->tty_old_pgrp); - current->signal->tty = tty_kref_get(tty); - current->signal->tty_old_pgrp = NULL; -} - -static void proc_set_tty(struct tty_struct *tty) -{ - spin_lock_irq(¤t->sighand->siglock); - __proc_set_tty(tty); - spin_unlock_irq(¤t->sighand->siglock); -} - -struct tty_struct *get_current_tty(void) -{ - struct tty_struct *tty; - unsigned long flags; - - spin_lock_irqsave(¤t->sighand->siglock, flags); - tty = tty_kref_get(current->signal->tty); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - return tty; -} -EXPORT_SYMBOL_GPL(get_current_tty); - -static void session_clear_tty(struct pid *session) -{ - struct task_struct *p; - do_each_pid_task(session, PIDTYPE_SID, p) { - proc_clear_tty(p); - } while_each_pid_task(session, PIDTYPE_SID, p); -} - /** * tty_wakeup - request more data * @tty: terminal @@ -608,60 +476,6 @@ void tty_wakeup(struct tty_struct *tty) EXPORT_SYMBOL_GPL(tty_wakeup); -/** - * tty_signal_session_leader - sends SIGHUP to session leader - * @tty controlling tty - * @exit_session if non-zero, signal all foreground group processes - * - * Send SIGHUP and SIGCONT to the session leader and its process group. - * Optionally, signal all processes in the foreground process group. - * - * Returns the number of processes in the session with this tty - * as their controlling terminal. This value is used to drop - * tty references for those processes. - */ -static int tty_signal_session_leader(struct tty_struct *tty, int exit_session) -{ - struct task_struct *p; - int refs = 0; - struct pid *tty_pgrp = NULL; - - read_lock(&tasklist_lock); - if (tty->session) { - do_each_pid_task(tty->session, PIDTYPE_SID, p) { - spin_lock_irq(&p->sighand->siglock); - if (p->signal->tty == tty) { - p->signal->tty = NULL; - /* We defer the dereferences outside fo - the tasklist lock */ - refs++; - } - if (!p->signal->leader) { - spin_unlock_irq(&p->sighand->siglock); - continue; - } - __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); - __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); - put_pid(p->signal->tty_old_pgrp); /* A noop */ - spin_lock(&tty->ctrl_lock); - tty_pgrp = get_pid(tty->pgrp); - if (tty->pgrp) - p->signal->tty_old_pgrp = get_pid(tty->pgrp); - spin_unlock(&tty->ctrl_lock); - spin_unlock_irq(&p->sighand->siglock); - } while_each_pid_task(tty->session, PIDTYPE_SID, p); - } - read_unlock(&tasklist_lock); - - if (tty_pgrp) { - if (exit_session) - kill_pgrp(tty_pgrp, SIGHUP, exit_session); - put_pid(tty_pgrp); - } - - return refs; -} - /** * __tty_hangup - actual handler for hangup events * @work: tty device @@ -840,7 +654,7 @@ void tty_vhangup_self(void) * is complete. That guarantee is necessary for security reasons. */ -static void tty_vhangup_session(struct tty_struct *tty) +void tty_vhangup_session(struct tty_struct *tty) { tty_debug_hangup(tty, "session hangup\n"); __tty_hangup(tty, 1); @@ -861,106 +675,6 @@ int tty_hung_up_p(struct file *filp) EXPORT_SYMBOL(tty_hung_up_p); -/** - * disassociate_ctty - disconnect controlling tty - * @on_exit: true if exiting so need to "hang up" the session - * - * This function is typically called only by the session leader, when - * it wants to disassociate itself from its controlling tty. - * - * It performs the following functions: - * (1) Sends a SIGHUP and SIGCONT to the foreground process group - * (2) Clears the tty from being controlling the session - * (3) Clears the controlling tty for all processes in the - * session group. - * - * The argument on_exit is set to 1 if called when a process is - * exiting; it is 0 if called by the ioctl TIOCNOTTY. - * - * Locking: - * BTM is taken for hysterical raisins, and held when - * called from no_tty(). - * tty_mutex is taken to protect tty - * ->siglock is taken to protect ->signal/->sighand - * tasklist_lock is taken to walk process list for sessions - * ->siglock is taken to protect ->signal/->sighand - */ - -void disassociate_ctty(int on_exit) -{ - struct tty_struct *tty; - - if (!current->signal->leader) - return; - - tty = get_current_tty(); - if (tty) { - if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) { - tty_vhangup_session(tty); - } else { - struct pid *tty_pgrp = tty_get_pgrp(tty); - if (tty_pgrp) { - kill_pgrp(tty_pgrp, SIGHUP, on_exit); - if (!on_exit) - kill_pgrp(tty_pgrp, SIGCONT, on_exit); - put_pid(tty_pgrp); - } - } - tty_kref_put(tty); - - } else if (on_exit) { - struct pid *old_pgrp; - spin_lock_irq(¤t->sighand->siglock); - old_pgrp = current->signal->tty_old_pgrp; - current->signal->tty_old_pgrp = NULL; - spin_unlock_irq(¤t->sighand->siglock); - if (old_pgrp) { - kill_pgrp(old_pgrp, SIGHUP, on_exit); - kill_pgrp(old_pgrp, SIGCONT, on_exit); - put_pid(old_pgrp); - } - return; - } - - spin_lock_irq(¤t->sighand->siglock); - put_pid(current->signal->tty_old_pgrp); - current->signal->tty_old_pgrp = NULL; - - tty = tty_kref_get(current->signal->tty); - if (tty) { - unsigned long flags; - spin_lock_irqsave(&tty->ctrl_lock, flags); - put_pid(tty->session); - put_pid(tty->pgrp); - tty->session = NULL; - tty->pgrp = NULL; - spin_unlock_irqrestore(&tty->ctrl_lock, flags); - tty_kref_put(tty); - } else - tty_debug_hangup(tty, "no current tty\n"); - - spin_unlock_irq(¤t->sighand->siglock); - /* Now clear signal->tty under the lock */ - read_lock(&tasklist_lock); - session_clear_tty(task_session(current)); - read_unlock(&tasklist_lock); -} - -/** - * - * no_tty - Ensure the current process does not have a controlling tty - */ -void no_tty(void) -{ - /* FIXME: Review locking here. The tty_lock never covered any race - between a new association and proc_clear_tty but possible we need - to protect against this anyway */ - struct task_struct *tsk = current; - disassociate_ctty(0); - proc_clear_tty(tsk); -} - - /** * stop_tty - propagate flow control * @tty: tty to stop @@ -1520,7 +1234,7 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx) * This code guarantees that either everything succeeds and the * TTY is ready for operation, or else the table slots are vacated * and the allocated memory released. (Except that the termios - * and locked termios may be retained.) + * may be retained.) */ if (!try_module_get(driver->owner)) @@ -2163,38 +1877,13 @@ static int tty_open(struct inode *inode, struct file *filp) } clear_bit(TTY_HUPPED, &tty->flags); - - read_lock(&tasklist_lock); - spin_lock_irq(¤t->sighand->siglock); noctty = (filp->f_flags & O_NOCTTY) || - (IS_ENABLED(CONFIG_VT) && device == MKDEV(TTY_MAJOR, 0)) || - device == MKDEV(TTYAUX_MAJOR, 1) || - (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->driver->subtype == PTY_TYPE_MASTER); - - if (!noctty && - current->signal->leader && - !current->signal->tty && - tty->session == NULL) { - /* - * Don't let a process that only has write access to the tty - * obtain the privileges associated with having a tty as - * controlling terminal (being able to reopen it with full - * access through /dev/tty, being able to perform pushback). - * Many distributions set the group of all ttys to "tty" and - * grant write-only access to all terminals for setgid tty - * binaries, which should not imply full privileges on all ttys. - * - * This could theoretically break old code that performs open() - * on a write-only file descriptor. In that case, it might be - * necessary to also permit this if - * inode_permission(inode, MAY_READ) == 0. - */ - if (filp->f_mode & FMODE_READ) - __proc_set_tty(tty); - } - spin_unlock_irq(¤t->sighand->siglock); - read_unlock(&tasklist_lock); + (IS_ENABLED(CONFIG_VT) && device == MKDEV(TTY_MAJOR, 0)) || + device == MKDEV(TTYAUX_MAJOR, 1) || + (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER); + if (!noctty) + tty_open_proc_set_tty(filp, tty); tty_unlock(tty); return 0; } @@ -2456,211 +2145,6 @@ static int fionbio(struct file *file, int __user *p) return 0; } -/** - * tiocsctty - set controlling tty - * @tty: tty structure - * @arg: user argument - * - * This ioctl is used to manage job control. It permits a session - * leader to set this tty as the controlling tty for the session. - * - * Locking: - * Takes tty_lock() to serialize proc_set_tty() for this tty - * Takes tasklist_lock internally to walk sessions - * Takes ->siglock() when updating signal->tty - */ - -static int tiocsctty(struct tty_struct *tty, struct file *file, int arg) -{ - int ret = 0; - - tty_lock(tty); - read_lock(&tasklist_lock); - - if (current->signal->leader && (task_session(current) == tty->session)) - goto unlock; - - /* - * The process must be a session leader and - * not have a controlling tty already. - */ - if (!current->signal->leader || current->signal->tty) { - ret = -EPERM; - goto unlock; - } - - if (tty->session) { - /* - * This tty is already the controlling - * tty for another session group! - */ - if (arg == 1 && capable(CAP_SYS_ADMIN)) { - /* - * Steal it away - */ - session_clear_tty(tty->session); - } else { - ret = -EPERM; - goto unlock; - } - } - - /* See the comment in tty_open(). */ - if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto unlock; - } - - proc_set_tty(tty); -unlock: - read_unlock(&tasklist_lock); - tty_unlock(tty); - return ret; -} - -/** - * tty_get_pgrp - return a ref counted pgrp pid - * @tty: tty to read - * - * Returns a refcounted instance of the pid struct for the process - * group controlling the tty. - */ - -struct pid *tty_get_pgrp(struct tty_struct *tty) -{ - unsigned long flags; - struct pid *pgrp; - - spin_lock_irqsave(&tty->ctrl_lock, flags); - pgrp = get_pid(tty->pgrp); - spin_unlock_irqrestore(&tty->ctrl_lock, flags); - - return pgrp; -} -EXPORT_SYMBOL_GPL(tty_get_pgrp); - -/* - * This checks not only the pgrp, but falls back on the pid if no - * satisfactory pgrp is found. I dunno - gdb doesn't work correctly - * without this... - * - * The caller must hold rcu lock or the tasklist lock. - */ -static struct pid *session_of_pgrp(struct pid *pgrp) -{ - struct task_struct *p; - struct pid *sid = NULL; - - p = pid_task(pgrp, PIDTYPE_PGID); - if (p == NULL) - p = pid_task(pgrp, PIDTYPE_PID); - if (p != NULL) - sid = task_session(p); - - return sid; -} - -/** - * tiocgpgrp - get process group - * @tty: tty passed by user - * @real_tty: tty side of the tty passed by the user if a pty else the tty - * @p: returned pid - * - * Obtain the process group of the tty. If there is no process group - * return an error. - * - * Locking: none. Reference to current->signal->tty is safe. - */ - -static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) -{ - struct pid *pid; - int ret; - /* - * (tty == real_tty) is a cheap way of - * testing if the tty is NOT a master pty. - */ - if (tty == real_tty && current->signal->tty != real_tty) - return -ENOTTY; - pid = tty_get_pgrp(real_tty); - ret = put_user(pid_vnr(pid), p); - put_pid(pid); - return ret; -} - -/** - * tiocspgrp - attempt to set process group - * @tty: tty passed by user - * @real_tty: tty side device matching tty passed by user - * @p: pid pointer - * - * Set the process group of the tty to the session passed. Only - * permitted where the tty session is our session. - * - * Locking: RCU, ctrl lock - */ - -static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) -{ - struct pid *pgrp; - pid_t pgrp_nr; - int retval = tty_check_change(real_tty); - - if (retval == -EIO) - return -ENOTTY; - if (retval) - return retval; - if (!current->signal->tty || - (current->signal->tty != real_tty) || - (real_tty->session != task_session(current))) - return -ENOTTY; - if (get_user(pgrp_nr, p)) - return -EFAULT; - if (pgrp_nr < 0) - return -EINVAL; - rcu_read_lock(); - pgrp = find_vpid(pgrp_nr); - retval = -ESRCH; - if (!pgrp) - goto out_unlock; - retval = -EPERM; - if (session_of_pgrp(pgrp) != task_session(current)) - goto out_unlock; - retval = 0; - spin_lock_irq(&tty->ctrl_lock); - put_pid(real_tty->pgrp); - real_tty->pgrp = get_pid(pgrp); - spin_unlock_irq(&tty->ctrl_lock); -out_unlock: - rcu_read_unlock(); - return retval; -} - -/** - * tiocgsid - get session id - * @tty: tty passed by user - * @real_tty: tty side of the tty passed by the user if a pty else the tty - * @p: pointer to returned session id - * - * Obtain the session id of the tty. If there is no session - * return an error. - * - * Locking: none. Reference to current->signal->tty is safe. - */ - -static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) -{ - /* - * (tty == real_tty) is a cheap way of - * testing if the tty is NOT a master pty. - */ - if (tty == real_tty && current->signal->tty != real_tty) - return -ENOTTY; - if (!real_tty->session) - return -ENOTTY; - return put_user(pid_vnr(real_tty->session), p); -} - /** * tiocsetd - set line discipline * @tty: tty device @@ -2843,8 +2327,8 @@ static void tty_warn_deprecated_flags(struct serial_struct __user *ss) flags &= ASYNC_DEPRECATED; if (flags && __ratelimit(&depr_flags)) - pr_warning("%s: '%s' is using deprecated serial flags (with no effect): %.8x\n", - __func__, get_task_comm(comm, current), flags); + pr_warn("%s: '%s' is using deprecated serial flags (with no effect): %.8x\n", + __func__, get_task_comm(comm, current), flags); } /* @@ -2920,19 +2404,6 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) int excl = test_bit(TTY_EXCLUSIVE, &tty->flags); return put_user(excl, (int __user *)p); } - case TIOCNOTTY: - if (current->signal->tty != tty) - return -ENOTTY; - no_tty(); - return 0; - case TIOCSCTTY: - return tiocsctty(real_tty, file, arg); - case TIOCGPGRP: - return tiocgpgrp(tty, real_tty, p); - case TIOCSPGRP: - return tiocspgrp(tty, real_tty, p); - case TIOCGSID: - return tiocgsid(tty, real_tty, p); case TIOCGETD: return tiocgetd(tty, p); case TIOCSETD: @@ -2993,6 +2464,10 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCSSERIAL: tty_warn_deprecated_flags(p); break; + default: + retval = tty_jobctrl_ioctl(tty, real_tty, file, cmd, arg); + if (retval != -ENOIOCTLCMD) + return retval; } if (tty->ops->ioctl) { retval = tty->ops->ioctl(tty, cmd, arg); @@ -3293,9 +2768,9 @@ struct device *tty_register_device_attr(struct tty_driver *driver, { char name[64]; dev_t devt = MKDEV(driver->major, driver->minor_start) + index; - struct device *dev = NULL; - int retval = -ENODEV; - bool cdev = false; + struct ktermios *tp; + struct device *dev; + int retval; if (index >= driver->num) { pr_err("%s: Attempt to register invalid tty line number (%d)\n", @@ -3308,18 +2783,9 @@ struct device *tty_register_device_attr(struct tty_driver *driver, else tty_line_name(driver, index, name); - if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) { - retval = tty_cdev_add(driver, devt, index, 1); - if (retval) - goto error; - cdev = true; - } - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - retval = -ENOMEM; - goto error; - } + if (!dev) + return ERR_PTR(-ENOMEM); dev->devt = devt; dev->class = tty_class; @@ -3329,18 +2795,38 @@ struct device *tty_register_device_attr(struct tty_driver *driver, dev->groups = attr_grp; dev_set_drvdata(dev, drvdata); + dev_set_uevent_suppress(dev, 1); + retval = device_register(dev); if (retval) - goto error; + goto err_put; + + if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) { + /* + * Free any saved termios data so that the termios state is + * reset when reusing a minor number. + */ + tp = driver->termios[index]; + if (tp) { + driver->termios[index] = NULL; + kfree(tp); + } + + retval = tty_cdev_add(driver, devt, index, 1); + if (retval) + goto err_del; + } + + dev_set_uevent_suppress(dev, 0); + kobject_uevent(&dev->kobj, KOBJ_ADD); return dev; -error: +err_del: + device_del(dev); +err_put: put_device(dev); - if (cdev) { - cdev_del(driver->cdevs[index]); - driver->cdevs[index] = NULL; - } + return ERR_PTR(retval); } EXPORT_SYMBOL_GPL(tty_register_device_attr); @@ -3370,7 +2856,7 @@ EXPORT_SYMBOL(tty_unregister_device); /** * __tty_alloc_driver -- allocate tty driver * @lines: count of lines this driver can handle at most - * @owner: module which is repsonsible for this driver + * @owner: module which is responsible for this driver * @flags: some of TTY_DRIVER_* flags, will be set in driver->flags * * This should not be called directly, some of the provided macros should be @@ -3441,11 +2927,6 @@ static void destruct_tty_driver(struct kref *kref) struct ktermios *tp; if (driver->flags & TTY_DRIVER_INSTALLED) { - /* - * Free the termios and termios_locked structures because - * we don't want to get memory leaks when modular tty - * drivers are removed from the kernel. - */ for (i = 0; i < driver->num; i++) { tp = driver->termios[i]; if (tp) { @@ -3578,30 +3059,6 @@ void tty_default_fops(struct file_operations *fops) *fops = tty_fops; } -/* - * Initialize the console device. This is called *early*, so - * we can't necessarily depend on lots of kernel help here. - * Just do some early initializations, and do the complex setup - * later. - */ -void __init console_init(void) -{ - initcall_t *call; - - /* Setup the default TTY line discipline. */ - n_tty_init(); - - /* - * set up the console device so that later boot sequences can - * inform about problems etc.. - */ - call = __con_initcall_start; - while (call < __con_initcall_end) { - (*call)(); - call++; - } -} - static char *tty_devnode(struct device *dev, umode_t *mode) { if (!mode) diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index a9a978731c5b0d66f3a96b8cb076ef84ac7b3b0d..efa96e6c4c1b44f1e37a8fa94c8a4caa52e9a688 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -258,228 +258,6 @@ static void unset_locked_termios(struct tty_struct *tty, struct ktermios *old) /* FIXME: What should we do for i/ospeed */ } -/* - * Routine which returns the baud rate of the tty - * - * Note that the baud_table needs to be kept in sync with the - * include/asm/termbits.h file. - */ -static const speed_t baud_table[] = { - 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, - 9600, 19200, 38400, 57600, 115200, 230400, 460800, -#ifdef __sparc__ - 76800, 153600, 307200, 614400, 921600 -#else - 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000, - 2500000, 3000000, 3500000, 4000000 -#endif -}; - -#ifndef __sparc__ -static const tcflag_t baud_bits[] = { - B0, B50, B75, B110, B134, B150, B200, B300, B600, - B1200, B1800, B2400, B4800, B9600, B19200, B38400, - B57600, B115200, B230400, B460800, B500000, B576000, - B921600, B1000000, B1152000, B1500000, B2000000, B2500000, - B3000000, B3500000, B4000000 -}; -#else -static const tcflag_t baud_bits[] = { - B0, B50, B75, B110, B134, B150, B200, B300, B600, - B1200, B1800, B2400, B4800, B9600, B19200, B38400, - B57600, B115200, B230400, B460800, B76800, B153600, - B307200, B614400, B921600 -}; -#endif - -static int n_baud_table = ARRAY_SIZE(baud_table); - -/** - * tty_termios_baud_rate - * @termios: termios structure - * - * Convert termios baud rate data into a speed. This should be called - * with the termios lock held if this termios is a terminal termios - * structure. May change the termios data. Device drivers can call this - * function but should use ->c_[io]speed directly as they are updated. - * - * Locking: none - */ - -speed_t tty_termios_baud_rate(struct ktermios *termios) -{ - unsigned int cbaud; - - cbaud = termios->c_cflag & CBAUD; - -#ifdef BOTHER - /* Magic token for arbitrary speed via c_ispeed/c_ospeed */ - if (cbaud == BOTHER) - return termios->c_ospeed; -#endif - if (cbaud & CBAUDEX) { - cbaud &= ~CBAUDEX; - - if (cbaud < 1 || cbaud + 15 > n_baud_table) - termios->c_cflag &= ~CBAUDEX; - else - cbaud += 15; - } - return baud_table[cbaud]; -} -EXPORT_SYMBOL(tty_termios_baud_rate); - -/** - * tty_termios_input_baud_rate - * @termios: termios structure - * - * Convert termios baud rate data into a speed. This should be called - * with the termios lock held if this termios is a terminal termios - * structure. May change the termios data. Device drivers can call this - * function but should use ->c_[io]speed directly as they are updated. - * - * Locking: none - */ - -speed_t tty_termios_input_baud_rate(struct ktermios *termios) -{ -#ifdef IBSHIFT - unsigned int cbaud = (termios->c_cflag >> IBSHIFT) & CBAUD; - - if (cbaud == B0) - return tty_termios_baud_rate(termios); - - /* Magic token for arbitrary speed via c_ispeed*/ - if (cbaud == BOTHER) - return termios->c_ispeed; - - if (cbaud & CBAUDEX) { - cbaud &= ~CBAUDEX; - - if (cbaud < 1 || cbaud + 15 > n_baud_table) - termios->c_cflag &= ~(CBAUDEX << IBSHIFT); - else - cbaud += 15; - } - return baud_table[cbaud]; -#else - return tty_termios_baud_rate(termios); -#endif -} -EXPORT_SYMBOL(tty_termios_input_baud_rate); - -/** - * tty_termios_encode_baud_rate - * @termios: ktermios structure holding user requested state - * @ispeed: input speed - * @ospeed: output speed - * - * Encode the speeds set into the passed termios structure. This is - * used as a library helper for drivers so that they can report back - * the actual speed selected when it differs from the speed requested - * - * For maximal back compatibility with legacy SYS5/POSIX *nix behaviour - * we need to carefully set the bits when the user does not get the - * desired speed. We allow small margins and preserve as much of possible - * of the input intent to keep compatibility. - * - * Locking: Caller should hold termios lock. This is already held - * when calling this function from the driver termios handler. - * - * The ifdefs deal with platforms whose owners have yet to update them - * and will all go away once this is done. - */ - -void tty_termios_encode_baud_rate(struct ktermios *termios, - speed_t ibaud, speed_t obaud) -{ - int i = 0; - int ifound = -1, ofound = -1; - int iclose = ibaud/50, oclose = obaud/50; - int ibinput = 0; - - if (obaud == 0) /* CD dropped */ - ibaud = 0; /* Clear ibaud to be sure */ - - termios->c_ispeed = ibaud; - termios->c_ospeed = obaud; - -#ifdef BOTHER - /* If the user asked for a precise weird speed give a precise weird - answer. If they asked for a Bfoo speed they may have problems - digesting non-exact replies so fuzz a bit */ - - if ((termios->c_cflag & CBAUD) == BOTHER) - oclose = 0; - if (((termios->c_cflag >> IBSHIFT) & CBAUD) == BOTHER) - iclose = 0; - if ((termios->c_cflag >> IBSHIFT) & CBAUD) - ibinput = 1; /* An input speed was specified */ -#endif - termios->c_cflag &= ~CBAUD; - - /* - * Our goal is to find a close match to the standard baud rate - * returned. Walk the baud rate table and if we get a very close - * match then report back the speed as a POSIX Bxxxx value by - * preference - */ - - do { - if (obaud - oclose <= baud_table[i] && - obaud + oclose >= baud_table[i]) { - termios->c_cflag |= baud_bits[i]; - ofound = i; - } - if (ibaud - iclose <= baud_table[i] && - ibaud + iclose >= baud_table[i]) { - /* For the case input == output don't set IBAUD bits - if the user didn't do so */ - if (ofound == i && !ibinput) - ifound = i; -#ifdef IBSHIFT - else { - ifound = i; - termios->c_cflag |= (baud_bits[i] << IBSHIFT); - } -#endif - } - } while (++i < n_baud_table); - - /* - * If we found no match then use BOTHER if provided or warn - * the user their platform maintainer needs to wake up if not. - */ -#ifdef BOTHER - if (ofound == -1) - termios->c_cflag |= BOTHER; - /* Set exact input bits only if the input and output differ or the - user already did */ - if (ifound == -1 && (ibaud != obaud || ibinput)) - termios->c_cflag |= (BOTHER << IBSHIFT); -#else - if (ifound == -1 || ofound == -1) - pr_warn_once("tty: Unable to return correct speed data as your architecture needs updating.\n"); -#endif -} -EXPORT_SYMBOL_GPL(tty_termios_encode_baud_rate); - -/** - * tty_encode_baud_rate - set baud rate of the tty - * @ibaud: input baud rate - * @obad: output baud rate - * - * Update the current termios data for the tty with the new speed - * settings. The caller must hold the termios_rwsem for the tty in - * question. - */ - -void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud, speed_t obaud) -{ - tty_termios_encode_baud_rate(&tty->termios, ibaud, obaud); -} -EXPORT_SYMBOL_GPL(tty_encode_baud_rate); - /** * tty_termios_copy_hw - copy hardware settings * @new: New termios diff --git a/drivers/tty/tty_jobctrl.c b/drivers/tty/tty_jobctrl.c new file mode 100644 index 0000000000000000000000000000000000000000..e7032309ee8714e4a173de8fe17b993660d01d3f --- /dev/null +++ b/drivers/tty/tty_jobctrl.c @@ -0,0 +1,554 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int is_ignored(int sig) +{ + return (sigismember(¤t->blocked, sig) || + current->sighand->action[sig-1].sa.sa_handler == SIG_IGN); +} + +/** + * tty_check_change - check for POSIX terminal changes + * @tty: tty to check + * + * If we try to write to, or set the state of, a terminal and we're + * not in the foreground, send a SIGTTOU. If the signal is blocked or + * ignored, go ahead and perform the operation. (POSIX 7.2) + * + * Locking: ctrl_lock + */ +int __tty_check_change(struct tty_struct *tty, int sig) +{ + unsigned long flags; + struct pid *pgrp, *tty_pgrp; + int ret = 0; + + if (current->signal->tty != tty) + return 0; + + rcu_read_lock(); + pgrp = task_pgrp(current); + + spin_lock_irqsave(&tty->ctrl_lock, flags); + tty_pgrp = tty->pgrp; + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + + if (tty_pgrp && pgrp != tty->pgrp) { + if (is_ignored(sig)) { + if (sig == SIGTTIN) + ret = -EIO; + } else if (is_current_pgrp_orphaned()) + ret = -EIO; + else { + kill_pgrp(pgrp, sig, 1); + set_thread_flag(TIF_SIGPENDING); + ret = -ERESTARTSYS; + } + } + rcu_read_unlock(); + + if (!tty_pgrp) + tty_warn(tty, "sig=%d, tty->pgrp == NULL!\n", sig); + + return ret; +} + +int tty_check_change(struct tty_struct *tty) +{ + return __tty_check_change(tty, SIGTTOU); +} +EXPORT_SYMBOL(tty_check_change); + +void proc_clear_tty(struct task_struct *p) +{ + unsigned long flags; + struct tty_struct *tty; + spin_lock_irqsave(&p->sighand->siglock, flags); + tty = p->signal->tty; + p->signal->tty = NULL; + spin_unlock_irqrestore(&p->sighand->siglock, flags); + tty_kref_put(tty); +} + +/** + * proc_set_tty - set the controlling terminal + * + * Only callable by the session leader and only if it does not already have + * a controlling terminal. + * + * Caller must hold: tty_lock() + * a readlock on tasklist_lock + * sighand lock + */ +static void __proc_set_tty(struct tty_struct *tty) +{ + unsigned long flags; + + spin_lock_irqsave(&tty->ctrl_lock, flags); + /* + * The session and fg pgrp references will be non-NULL if + * tiocsctty() is stealing the controlling tty + */ + put_pid(tty->session); + put_pid(tty->pgrp); + tty->pgrp = get_pid(task_pgrp(current)); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + tty->session = get_pid(task_session(current)); + if (current->signal->tty) { + tty_debug(tty, "current tty %s not NULL!!\n", + current->signal->tty->name); + tty_kref_put(current->signal->tty); + } + put_pid(current->signal->tty_old_pgrp); + current->signal->tty = tty_kref_get(tty); + current->signal->tty_old_pgrp = NULL; +} + +static void proc_set_tty(struct tty_struct *tty) +{ + spin_lock_irq(¤t->sighand->siglock); + __proc_set_tty(tty); + spin_unlock_irq(¤t->sighand->siglock); +} + +/* + * Called by tty_open() to set the controlling tty if applicable. + */ +void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty) +{ + read_lock(&tasklist_lock); + spin_lock_irq(¤t->sighand->siglock); + if (current->signal->leader && + !current->signal->tty && + tty->session == NULL) { + /* + * Don't let a process that only has write access to the tty + * obtain the privileges associated with having a tty as + * controlling terminal (being able to reopen it with full + * access through /dev/tty, being able to perform pushback). + * Many distributions set the group of all ttys to "tty" and + * grant write-only access to all terminals for setgid tty + * binaries, which should not imply full privileges on all ttys. + * + * This could theoretically break old code that performs open() + * on a write-only file descriptor. In that case, it might be + * necessary to also permit this if + * inode_permission(inode, MAY_READ) == 0. + */ + if (filp->f_mode & FMODE_READ) + __proc_set_tty(tty); + } + spin_unlock_irq(¤t->sighand->siglock); + read_unlock(&tasklist_lock); +} + +struct tty_struct *get_current_tty(void) +{ + struct tty_struct *tty; + unsigned long flags; + + spin_lock_irqsave(¤t->sighand->siglock, flags); + tty = tty_kref_get(current->signal->tty); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + return tty; +} +EXPORT_SYMBOL_GPL(get_current_tty); + +/* + * Called from tty_release(). + */ +void session_clear_tty(struct pid *session) +{ + struct task_struct *p; + do_each_pid_task(session, PIDTYPE_SID, p) { + proc_clear_tty(p); + } while_each_pid_task(session, PIDTYPE_SID, p); +} + +/** + * tty_signal_session_leader - sends SIGHUP to session leader + * @tty controlling tty + * @exit_session if non-zero, signal all foreground group processes + * + * Send SIGHUP and SIGCONT to the session leader and its process group. + * Optionally, signal all processes in the foreground process group. + * + * Returns the number of processes in the session with this tty + * as their controlling terminal. This value is used to drop + * tty references for those processes. + */ +int tty_signal_session_leader(struct tty_struct *tty, int exit_session) +{ + struct task_struct *p; + int refs = 0; + struct pid *tty_pgrp = NULL; + + read_lock(&tasklist_lock); + if (tty->session) { + do_each_pid_task(tty->session, PIDTYPE_SID, p) { + spin_lock_irq(&p->sighand->siglock); + if (p->signal->tty == tty) { + p->signal->tty = NULL; + /* We defer the dereferences outside fo + the tasklist lock */ + refs++; + } + if (!p->signal->leader) { + spin_unlock_irq(&p->sighand->siglock); + continue; + } + __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); + __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); + put_pid(p->signal->tty_old_pgrp); /* A noop */ + spin_lock(&tty->ctrl_lock); + tty_pgrp = get_pid(tty->pgrp); + if (tty->pgrp) + p->signal->tty_old_pgrp = get_pid(tty->pgrp); + spin_unlock(&tty->ctrl_lock); + spin_unlock_irq(&p->sighand->siglock); + } while_each_pid_task(tty->session, PIDTYPE_SID, p); + } + read_unlock(&tasklist_lock); + + if (tty_pgrp) { + if (exit_session) + kill_pgrp(tty_pgrp, SIGHUP, exit_session); + put_pid(tty_pgrp); + } + + return refs; +} + +/** + * disassociate_ctty - disconnect controlling tty + * @on_exit: true if exiting so need to "hang up" the session + * + * This function is typically called only by the session leader, when + * it wants to disassociate itself from its controlling tty. + * + * It performs the following functions: + * (1) Sends a SIGHUP and SIGCONT to the foreground process group + * (2) Clears the tty from being controlling the session + * (3) Clears the controlling tty for all processes in the + * session group. + * + * The argument on_exit is set to 1 if called when a process is + * exiting; it is 0 if called by the ioctl TIOCNOTTY. + * + * Locking: + * BTM is taken for hysterical raisons, and held when + * called from no_tty(). + * tty_mutex is taken to protect tty + * ->siglock is taken to protect ->signal/->sighand + * tasklist_lock is taken to walk process list for sessions + * ->siglock is taken to protect ->signal/->sighand + */ +void disassociate_ctty(int on_exit) +{ + struct tty_struct *tty; + + if (!current->signal->leader) + return; + + tty = get_current_tty(); + if (tty) { + if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) { + tty_vhangup_session(tty); + } else { + struct pid *tty_pgrp = tty_get_pgrp(tty); + if (tty_pgrp) { + kill_pgrp(tty_pgrp, SIGHUP, on_exit); + if (!on_exit) + kill_pgrp(tty_pgrp, SIGCONT, on_exit); + put_pid(tty_pgrp); + } + } + tty_kref_put(tty); + + } else if (on_exit) { + struct pid *old_pgrp; + spin_lock_irq(¤t->sighand->siglock); + old_pgrp = current->signal->tty_old_pgrp; + current->signal->tty_old_pgrp = NULL; + spin_unlock_irq(¤t->sighand->siglock); + if (old_pgrp) { + kill_pgrp(old_pgrp, SIGHUP, on_exit); + kill_pgrp(old_pgrp, SIGCONT, on_exit); + put_pid(old_pgrp); + } + return; + } + + spin_lock_irq(¤t->sighand->siglock); + put_pid(current->signal->tty_old_pgrp); + current->signal->tty_old_pgrp = NULL; + + tty = tty_kref_get(current->signal->tty); + if (tty) { + unsigned long flags; + spin_lock_irqsave(&tty->ctrl_lock, flags); + put_pid(tty->session); + put_pid(tty->pgrp); + tty->session = NULL; + tty->pgrp = NULL; + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + tty_kref_put(tty); + } + + spin_unlock_irq(¤t->sighand->siglock); + /* Now clear signal->tty under the lock */ + read_lock(&tasklist_lock); + session_clear_tty(task_session(current)); + read_unlock(&tasklist_lock); +} + +/** + * + * no_tty - Ensure the current process does not have a controlling tty + */ +void no_tty(void) +{ + /* FIXME: Review locking here. The tty_lock never covered any race + between a new association and proc_clear_tty but possible we need + to protect against this anyway */ + struct task_struct *tsk = current; + disassociate_ctty(0); + proc_clear_tty(tsk); +} + +/** + * tiocsctty - set controlling tty + * @tty: tty structure + * @arg: user argument + * + * This ioctl is used to manage job control. It permits a session + * leader to set this tty as the controlling tty for the session. + * + * Locking: + * Takes tty_lock() to serialize proc_set_tty() for this tty + * Takes tasklist_lock internally to walk sessions + * Takes ->siglock() when updating signal->tty + */ +static int tiocsctty(struct tty_struct *tty, struct file *file, int arg) +{ + int ret = 0; + + tty_lock(tty); + read_lock(&tasklist_lock); + + if (current->signal->leader && (task_session(current) == tty->session)) + goto unlock; + + /* + * The process must be a session leader and + * not have a controlling tty already. + */ + if (!current->signal->leader || current->signal->tty) { + ret = -EPERM; + goto unlock; + } + + if (tty->session) { + /* + * This tty is already the controlling + * tty for another session group! + */ + if (arg == 1 && capable(CAP_SYS_ADMIN)) { + /* + * Steal it away + */ + session_clear_tty(tty->session); + } else { + ret = -EPERM; + goto unlock; + } + } + + /* See the comment in tty_open_proc_set_tty(). */ + if ((file->f_mode & FMODE_READ) == 0 && !capable(CAP_SYS_ADMIN)) { + ret = -EPERM; + goto unlock; + } + + proc_set_tty(tty); +unlock: + read_unlock(&tasklist_lock); + tty_unlock(tty); + return ret; +} + +/** + * tty_get_pgrp - return a ref counted pgrp pid + * @tty: tty to read + * + * Returns a refcounted instance of the pid struct for the process + * group controlling the tty. + */ +struct pid *tty_get_pgrp(struct tty_struct *tty) +{ + unsigned long flags; + struct pid *pgrp; + + spin_lock_irqsave(&tty->ctrl_lock, flags); + pgrp = get_pid(tty->pgrp); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + + return pgrp; +} +EXPORT_SYMBOL_GPL(tty_get_pgrp); + +/* + * This checks not only the pgrp, but falls back on the pid if no + * satisfactory pgrp is found. I dunno - gdb doesn't work correctly + * without this... + * + * The caller must hold rcu lock or the tasklist lock. + */ +static struct pid *session_of_pgrp(struct pid *pgrp) +{ + struct task_struct *p; + struct pid *sid = NULL; + + p = pid_task(pgrp, PIDTYPE_PGID); + if (p == NULL) + p = pid_task(pgrp, PIDTYPE_PID); + if (p != NULL) + sid = task_session(p); + + return sid; +} + +/** + * tiocgpgrp - get process group + * @tty: tty passed by user + * @real_tty: tty side of the tty passed by the user if a pty else the tty + * @p: returned pid + * + * Obtain the process group of the tty. If there is no process group + * return an error. + * + * Locking: none. Reference to current->signal->tty is safe. + */ +static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) +{ + struct pid *pid; + int ret; + /* + * (tty == real_tty) is a cheap way of + * testing if the tty is NOT a master pty. + */ + if (tty == real_tty && current->signal->tty != real_tty) + return -ENOTTY; + pid = tty_get_pgrp(real_tty); + ret = put_user(pid_vnr(pid), p); + put_pid(pid); + return ret; +} + +/** + * tiocspgrp - attempt to set process group + * @tty: tty passed by user + * @real_tty: tty side device matching tty passed by user + * @p: pid pointer + * + * Set the process group of the tty to the session passed. Only + * permitted where the tty session is our session. + * + * Locking: RCU, ctrl lock + */ +static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) +{ + struct pid *pgrp; + pid_t pgrp_nr; + int retval = tty_check_change(real_tty); + + if (retval == -EIO) + return -ENOTTY; + if (retval) + return retval; + if (!current->signal->tty || + (current->signal->tty != real_tty) || + (real_tty->session != task_session(current))) + return -ENOTTY; + if (get_user(pgrp_nr, p)) + return -EFAULT; + if (pgrp_nr < 0) + return -EINVAL; + rcu_read_lock(); + pgrp = find_vpid(pgrp_nr); + retval = -ESRCH; + if (!pgrp) + goto out_unlock; + retval = -EPERM; + if (session_of_pgrp(pgrp) != task_session(current)) + goto out_unlock; + retval = 0; + spin_lock_irq(&tty->ctrl_lock); + put_pid(real_tty->pgrp); + real_tty->pgrp = get_pid(pgrp); + spin_unlock_irq(&tty->ctrl_lock); +out_unlock: + rcu_read_unlock(); + return retval; +} + +/** + * tiocgsid - get session id + * @tty: tty passed by user + * @real_tty: tty side of the tty passed by the user if a pty else the tty + * @p: pointer to returned session id + * + * Obtain the session id of the tty. If there is no session + * return an error. + * + * Locking: none. Reference to current->signal->tty is safe. + */ +static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) +{ + /* + * (tty == real_tty) is a cheap way of + * testing if the tty is NOT a master pty. + */ + if (tty == real_tty && current->signal->tty != real_tty) + return -ENOTTY; + if (!real_tty->session) + return -ENOTTY; + return put_user(pid_vnr(real_tty->session), p); +} + +/* + * Called from tty_ioctl(). If tty is a pty then real_tty is the slave side, + * if not then tty == real_tty. + */ +long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, + struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *p = (void __user *)arg; + + switch (cmd) { + case TIOCNOTTY: + if (current->signal->tty != tty) + return -ENOTTY; + no_tty(); + return 0; + case TIOCSCTTY: + return tiocsctty(real_tty, file, arg); + case TIOCGPGRP: + return tiocgpgrp(tty, real_tty, p); + case TIOCSPGRP: + return tiocspgrp(tty, real_tty, p); + case TIOCGSID: + return tiocgsid(tty, real_tty, p); + } + return -ENOIOCTLCMD; +} diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 1d21a9c1d33e6e3c5ef007a25e4b4550fb62a51c..6b137194069fee47bc45b1866865ee27adfd33c5 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -128,20 +128,86 @@ struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp) +{ + tty_port_link_device(port, driver, index); + return tty_register_device_attr(driver, index, device, drvdata, + attr_grp); +} +EXPORT_SYMBOL_GPL(tty_port_register_device_attr); + +/** + * tty_port_register_device_attr_serdev - register tty or serdev device + * @port: tty_port of the device + * @driver: tty_driver for this device + * @index: index of the tty + * @device: parent if exists, otherwise NULL + * @drvdata: driver data for the device + * @attr_grp: attribute group for the device + * + * Register a serdev or tty device depending on if the parent device has any + * defined serdev clients or not. + */ +struct device *tty_port_register_device_attr_serdev(struct tty_port *port, + struct tty_driver *driver, unsigned index, + struct device *device, void *drvdata, + const struct attribute_group **attr_grp) { struct device *dev; tty_port_link_device(port, driver, index); dev = serdev_tty_port_register(port, device, driver, index); - if (PTR_ERR(dev) != -ENODEV) + if (PTR_ERR(dev) != -ENODEV) { /* Skip creating cdev if we registered a serdev device */ return dev; + } return tty_register_device_attr(driver, index, device, drvdata, attr_grp); } -EXPORT_SYMBOL_GPL(tty_port_register_device_attr); +EXPORT_SYMBOL_GPL(tty_port_register_device_attr_serdev); + +/** + * tty_port_register_device_serdev - register tty or serdev device + * @port: tty_port of the device + * @driver: tty_driver for this device + * @index: index of the tty + * @device: parent if exists, otherwise NULL + * + * Register a serdev or tty device depending on if the parent device has any + * defined serdev clients or not. + */ +struct device *tty_port_register_device_serdev(struct tty_port *port, + struct tty_driver *driver, unsigned index, + struct device *device) +{ + return tty_port_register_device_attr_serdev(port, driver, index, + device, NULL, NULL); +} +EXPORT_SYMBOL_GPL(tty_port_register_device_serdev); + +/** + * tty_port_unregister_device - deregister a tty or serdev device + * @port: tty_port of the device + * @driver: tty_driver for this device + * @index: index of the tty + * + * If a tty or serdev device is registered with a call to + * tty_port_register_device_serdev() then this function must be called when + * the device is gone. + */ +void tty_port_unregister_device(struct tty_port *port, + struct tty_driver *driver, unsigned index) +{ + int ret; + + ret = serdev_tty_port_unregister(port); + if (ret == 0) + return; + + tty_unregister_device(driver, index); +} +EXPORT_SYMBOL_GPL(tty_port_unregister_device); int tty_port_alloc_xmit_buf(struct tty_port *port) { @@ -189,9 +255,6 @@ static void tty_port_destructor(struct kref *kref) /* check if last port ref was dropped before tty release */ if (WARN_ON(port->itty)) return; - - serdev_tty_port_unregister(port); - if (port->xmit_buf) free_page((unsigned long)port->xmit_buf); tty_port_destroy(port); diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 36e1b8c7680f1d8fc5a64ba16e2fc52a0a379b40..accbd1257bc4a36938918965592e4a1e470098a6 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -80,21 +80,17 @@ void clear_selection(void) /* * User settable table: what characters are to be considered alphabetic? - * 256 bits. Locked by the console lock. + * 128 bits. Locked by the console lock. */ -static u32 inwordLut[8]={ +static u32 inwordLut[]={ 0x00000000, /* control chars */ - 0x03FF0000, /* digits */ + 0x03FFE000, /* digits and "-./" */ 0x87FFFFFE, /* uppercase and '_' */ 0x07FFFFFE, /* lowercase */ - 0x00000000, - 0x00000000, - 0xFF7FFFFF, /* latin-1 accented letters, not multiplication sign */ - 0xFF7FFFFF /* latin-1 accented letters, not division sign */ }; static inline int inword(const u16 c) { - return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1); + return c > 0x7f || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1); } /** @@ -106,10 +102,10 @@ static inline int inword(const u16 c) { */ int sel_loadlut(char __user *p) { - u32 tmplut[8]; - if (copy_from_user(tmplut, (u32 __user *)(p+4), 32)) + u32 tmplut[ARRAY_SIZE(inwordLut)]; + if (copy_from_user(tmplut, (u32 __user *)(p+4), sizeof(inwordLut))) return -EFAULT; - memcpy(inwordLut, tmplut, 32); + memcpy(inwordLut, tmplut, sizeof(inwordLut)); return 0; } diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 5c4933bb4b5336258f3d31f22971a1aea87c0dd9..9c9945284bcf240d319b98f986c2573b4d16e307 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -181,7 +181,7 @@ int console_blanked; static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */ static int vesa_off_interval; -static int blankinterval = 10*60; +static int blankinterval; core_param(consoleblank, blankinterval, int, 0444); static DECLARE_WORK(console_work, console_callback); diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 60ce7fd54e890e445daf81e04bcdd70d0ae09599..ff04b7f8549f06f5730afb1cf6334785afab9d05 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -66,7 +66,7 @@ static ssize_t map_size_show(struct uio_mem *mem, char *buf) static ssize_t map_offset_show(struct uio_mem *mem, char *buf) { - return sprintf(buf, "0x%llx\n", (unsigned long long)mem->addr & ~PAGE_MASK); + return sprintf(buf, "0x%llx\n", (unsigned long long)mem->offs); } struct map_sysfs_entry { @@ -279,7 +279,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { ret = -ENOMEM; - goto err_map_kobj; + goto err_map; } kobject_init(&map->kobj, &map_attr_type); map->mem = mem; @@ -289,7 +289,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) goto err_map_kobj; ret = kobject_uevent(&map->kobj, KOBJ_ADD); if (ret) - goto err_map; + goto err_map_kobj; } for (pi = 0; pi < MAX_UIO_PORT_REGIONS; pi++) { @@ -308,7 +308,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) portio = kzalloc(sizeof(*portio), GFP_KERNEL); if (!portio) { ret = -ENOMEM; - goto err_portio_kobj; + goto err_portio; } kobject_init(&portio->kobj, &portio_attr_type); portio->port = port; @@ -319,7 +319,7 @@ static int uio_dev_add_attributes(struct uio_device *idev) goto err_portio_kobj; ret = kobject_uevent(&portio->kobj, KOBJ_ADD); if (ret) - goto err_portio; + goto err_portio_kobj; } return 0; diff --git a/drivers/uio/uio_mf624.c b/drivers/uio/uio_mf624.c index d1f95a1567bba0a3f598538d5e11dca8308b7f82..35187c052e8c105c972067400c15f06e7749df93 100644 --- a/drivers/uio/uio_mf624.c +++ b/drivers/uio/uio_mf624.c @@ -127,6 +127,24 @@ static int mf624_irqcontrol(struct uio_info *info, s32 irq_on) return 0; } +static int mf624_setup_mem(struct pci_dev *dev, int bar, struct uio_mem *mem, const char *name) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + + mem->name = name; + mem->addr = start & PAGE_MASK; + mem->offs = start & ~PAGE_MASK; + if (!mem->addr) + return -ENODEV; + mem->size = ((start & ~PAGE_MASK) + len + PAGE_SIZE - 1) & PAGE_MASK; + mem->memtype = UIO_MEM_PHYS; + mem->internal_addr = pci_ioremap_bar(dev, bar); + if (!mem->internal_addr) + return -ENODEV; + return 0; +} + static int mf624_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct uio_info *info; @@ -147,37 +165,15 @@ static int mf624_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Note: Datasheet says device uses BAR0, BAR1, BAR2 -- do not trust it */ /* BAR0 */ - info->mem[0].name = "PCI chipset, interrupts, status " - "bits, special functions"; - info->mem[0].addr = pci_resource_start(dev, 0); - if (!info->mem[0].addr) + if (mf624_setup_mem(dev, 0, &info->mem[0], "PCI chipset, interrupts, status " + "bits, special functions")) goto out_release; - info->mem[0].size = pci_resource_len(dev, 0); - info->mem[0].memtype = UIO_MEM_PHYS; - info->mem[0].internal_addr = pci_ioremap_bar(dev, 0); - if (!info->mem[0].internal_addr) - goto out_release; - /* BAR2 */ - info->mem[1].name = "ADC, DAC, DIO"; - info->mem[1].addr = pci_resource_start(dev, 2); - if (!info->mem[1].addr) - goto out_unmap0; - info->mem[1].size = pci_resource_len(dev, 2); - info->mem[1].memtype = UIO_MEM_PHYS; - info->mem[1].internal_addr = pci_ioremap_bar(dev, 2); - if (!info->mem[1].internal_addr) + if (mf624_setup_mem(dev, 2, &info->mem[1], "ADC, DAC, DIO")) goto out_unmap0; /* BAR4 */ - info->mem[2].name = "Counter/timer chip"; - info->mem[2].addr = pci_resource_start(dev, 4); - if (!info->mem[2].addr) - goto out_unmap1; - info->mem[2].size = pci_resource_len(dev, 4); - info->mem[2].memtype = UIO_MEM_PHYS; - info->mem[2].internal_addr = pci_ioremap_bar(dev, 4); - if (!info->mem[2].internal_addr) + if (mf624_setup_mem(dev, 4, &info->mem[2], "Counter/timer chip")) goto out_unmap1; info->irq = dev->irq; diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index fbe493d44e816970fa367cfc71a986fbd1a27909..939a63bca82fb43c39dc9f46722262aa3b0037d1 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -35,7 +35,6 @@ config USB_COMMON config USB_ARCH_HAS_HCD def_bool y -# ARM SA1111 chips have a non-PCI based "OHCI-compatible" USB host interface. config USB tristate "Support for Host-side USB" depends on USB_ARCH_HAS_HCD @@ -73,6 +72,17 @@ config USB To compile this driver as a module, choose M here: the module will be called usbcore. +config USB_PCI + bool "PCI based USB host interface" + depends on PCI + default y + ---help--- + A lot of embeded system SOC (e.g. freescale T2080) have both + PCI and USB modules. But USB module is controlled by registers + directly, it have no relationship with PCI module. + + When say N here it will not build PCI related code in USB driver. + if USB source "drivers/usb/core/Kconfig" @@ -152,6 +162,8 @@ source "drivers/usb/phy/Kconfig" source "drivers/usb/gadget/Kconfig" +source "drivers/usb/typec/Kconfig" + config USB_LED_TRIG bool "USB LED Triggers" depends on LEDS_CLASS && LEDS_TRIGGERS diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 7791af6c102c7f4ea2c8ecdeda5cbc08fb03a55c..9650b351c26c7f2064facc118435cb1cea1b12ee 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_USB_ISP1760) += isp1760/ obj-$(CONFIG_USB_MON) += mon/ obj-$(CONFIG_USB_MTU3) += mtu3/ -obj-$(CONFIG_PCI) += host/ +obj-$(CONFIG_USB_PCI) += host/ obj-$(CONFIG_USB_EHCI_HCD) += host/ obj-$(CONFIG_USB_ISP116X_HCD) += host/ obj-$(CONFIG_USB_OHCI_HCD) += host/ @@ -49,7 +49,7 @@ obj-$(CONFIG_USB_MICROTEK) += image/ obj-$(CONFIG_USB_SERIAL) += serial/ obj-$(CONFIG_USB) += misc/ -obj-$(CONFIG_EARLY_PRINTK_DBGP) += early/ +obj-$(CONFIG_EARLY_PRINTK_USB) += early/ obj-$(CONFIG_USB_ATM) += atm/ obj-$(CONFIG_USB_SPEEDTOUCH) += atm/ @@ -62,3 +62,5 @@ obj-$(CONFIG_USB_GADGET) += gadget/ obj-$(CONFIG_USB_COMMON) += common/ obj-$(CONFIG_USBIP_CORE) += usbip/ + +obj-$(CONFIG_TYPEC) += typec/ diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index f9fe86b6f7b5b6ac5e35d944500cd74781f6dd26..d65a64c29b852af9ec47f48fe4d3e0024aa66743 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -474,7 +474,7 @@ static ssize_t cxacru_sysfs_store_adsl_config(struct device *dev, ret = sscanf(buf + pos, "%x=%x%n", &index, &value, &tmp); if (ret < 2) return -EINVAL; - if (index < 0 || index > 0x7f) + if (index > 0x7f) return -EINVAL; if (tmp < 0 || tmp > len - pos) return -EINVAL; diff --git a/drivers/usb/chipidea/Kconfig b/drivers/usb/chipidea/Kconfig index fc96f5cdcb5cc7744913f560f1c23f70fe6b8250..51f4157bbecfd9972d5c334c688eceeaa9f3ff74 100644 --- a/drivers/usb/chipidea/Kconfig +++ b/drivers/usb/chipidea/Kconfig @@ -20,7 +20,7 @@ config USB_CHIPIDEA_OF config USB_CHIPIDEA_PCI tristate - depends on PCI + depends on USB_PCI depends on NOP_USB_XCEIV default USB_CHIPIDEA diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index 59e22389c10b01c7a37c719c84c32b9b19b9a3e4..6743f85b1b7ad816397825b70ac20c59089f26c2 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -177,6 +177,7 @@ struct hw_bank { * @td_pool: allocation pool for transfer descriptors * @gadget: device side representation for peripheral controller * @driver: gadget driver + * @resume_state: save the state of gadget suspend from * @hw_ep_max: total number of endpoints supported by hardware * @ci_hw_ep: array of endpoints * @ep0_dir: ep0 direction @@ -227,6 +228,7 @@ struct ci_hdrc { struct usb_gadget gadget; struct usb_gadget_driver *driver; + enum usb_device_state resume_state; unsigned hw_ep_max; struct ci_hw_ep ci_hw_ep[ENDPT_MAX]; u32 ep0_dir; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 79ad8e91632e6d29f3a67a328b559d2b2be7f793..fe4fe24407296d52ba28a0408e39ba131b4e3737 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -783,9 +783,6 @@ struct platform_device *ci_hdrc_add_device(struct device *dev, } pdev->dev.parent = dev; - pdev->dev.dma_mask = dev->dma_mask; - pdev->dev.dma_parms = dev->dma_parms; - dma_set_coherent_mask(&pdev->dev, dev->coherent_dma_mask); ret = platform_device_add_resources(pdev, res, nres); if (ret) @@ -841,6 +838,59 @@ static void ci_get_otg_capable(struct ci_hdrc *ci) } } +static ssize_t ci_role_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ci_hdrc *ci = dev_get_drvdata(dev); + + if (ci->role != CI_ROLE_END) + return sprintf(buf, "%s\n", ci_role(ci)->name); + + return 0; +} + +static ssize_t ci_role_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t n) +{ + struct ci_hdrc *ci = dev_get_drvdata(dev); + enum ci_role role; + int ret; + + if (!(ci->roles[CI_ROLE_HOST] && ci->roles[CI_ROLE_GADGET])) { + dev_warn(dev, "Current configuration is not dual-role, quit\n"); + return -EPERM; + } + + for (role = CI_ROLE_HOST; role < CI_ROLE_END; role++) + if (!strncmp(buf, ci->roles[role]->name, + strlen(ci->roles[role]->name))) + break; + + if (role == CI_ROLE_END || role == ci->role) + return -EINVAL; + + pm_runtime_get_sync(dev); + disable_irq(ci->irq); + ci_role_stop(ci); + ret = ci_role_start(ci, role); + if (!ret && ci->role == CI_ROLE_GADGET) + ci_handle_vbus_change(ci); + enable_irq(ci->irq); + pm_runtime_put_sync(dev); + + return (ret == 0) ? n : ret; +} +static DEVICE_ATTR(role, 0644, ci_role_show, ci_role_store); + +static struct attribute *ci_attrs[] = { + &dev_attr_role.attr, + NULL, +}; + +static struct attribute_group ci_attr_group = { + .attrs = ci_attrs, +}; + static int ci_hdrc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1007,11 +1057,18 @@ static int ci_hdrc_probe(struct platform_device *pdev) ci_hdrc_otg_fsm_start(ci); device_set_wakeup_capable(&pdev->dev, true); - ret = dbg_create_files(ci); - if (!ret) - return 0; + if (ret) + goto stop; + + ret = sysfs_create_group(&dev->kobj, &ci_attr_group); + if (ret) + goto remove_debug; + + return 0; +remove_debug: + dbg_remove_files(ci); stop: ci_role_destroy(ci); deinit_phy: @@ -1033,6 +1090,7 @@ static int ci_hdrc_remove(struct platform_device *pdev) } dbg_remove_files(ci); + sysfs_remove_group(&ci->dev->kobj, &ci_attr_group); ci_role_destroy(ci); ci_hdrc_enter_lpm(ci, true); ci_usb_phy_exit(ci); diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 6d23eede4d8cda5014b041f965f3e060d201be74..1c31e8a088101ff70d3aa37553d1e9b2d906b324 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c @@ -294,7 +294,8 @@ static int ci_role_show(struct seq_file *s, void *data) { struct ci_hdrc *ci = s->private; - seq_printf(s, "%s\n", ci_role(ci)->name); + if (ci->role != CI_ROLE_END) + seq_printf(s, "%s\n", ci_role(ci)->name); return 0; } diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 915f3e91586e68adddce0367c3179753a90fd078..18cb8e46262d3ed3492d1045b7e15b9b4496f479 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -123,7 +123,8 @@ static int host_start(struct ci_hdrc *ci) if (usb_disabled()) return -ENODEV; - hcd = usb_create_hcd(&ci_ehci_hc_driver, ci->dev, dev_name(ci->dev)); + hcd = __usb_create_hcd(&ci_ehci_hc_driver, ci->dev->parent, + ci->dev, dev_name(ci->dev), NULL); if (!hcd) return -ENOMEM; diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index f88e9157fad07ea8443a8c72f036133418721cdf..d68b125796f987d7133fef50fc8486363a5ef305 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -423,7 +423,8 @@ static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq) hwreq->req.status = -EALREADY; - ret = usb_gadget_map_request(&ci->gadget, &hwreq->req, hwep->dir); + ret = usb_gadget_map_request_by_dev(ci->dev->parent, + &hwreq->req, hwep->dir); if (ret) return ret; @@ -603,7 +604,8 @@ static int _hardware_dequeue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq) list_del_init(&node->td); } - usb_gadget_unmap_request(&hwep->ci->gadget, &hwreq->req, hwep->dir); + usb_gadget_unmap_request_by_dev(hwep->ci->dev->parent, + &hwreq->req, hwep->dir); hwreq->req.actual += actual; @@ -1845,27 +1847,32 @@ static irqreturn_t udc_irq(struct ci_hdrc *ci) if (USBi_PCI & intr) { ci->gadget.speed = hw_port_is_high_speed(ci) ? USB_SPEED_HIGH : USB_SPEED_FULL; - if (ci->suspended && ci->driver->resume) { - spin_unlock(&ci->lock); - ci->driver->resume(&ci->gadget); - spin_lock(&ci->lock); + if (ci->suspended) { + if (ci->driver->resume) { + spin_unlock(&ci->lock); + ci->driver->resume(&ci->gadget); + spin_lock(&ci->lock); + } ci->suspended = 0; + usb_gadget_set_state(&ci->gadget, + ci->resume_state); } } if (USBi_UI & intr) isr_tr_complete_handler(ci); - if (USBi_SLI & intr) { + if ((USBi_SLI & intr) && !(ci->suspended)) { + ci->suspended = 1; + ci->resume_state = ci->gadget.state; if (ci->gadget.speed != USB_SPEED_UNKNOWN && ci->driver->suspend) { - ci->suspended = 1; spin_unlock(&ci->lock); ci->driver->suspend(&ci->gadget); - usb_gadget_set_state(&ci->gadget, - USB_STATE_SUSPENDED); spin_lock(&ci->lock); } + usb_gadget_set_state(&ci->gadget, + USB_STATE_SUSPENDED); } retval = IRQ_HANDLED; } else { @@ -1899,13 +1906,13 @@ static int udc_start(struct ci_hdrc *ci) INIT_LIST_HEAD(&ci->gadget.ep_list); /* alloc resources */ - ci->qh_pool = dma_pool_create("ci_hw_qh", dev, + ci->qh_pool = dma_pool_create("ci_hw_qh", dev->parent, sizeof(struct ci_hw_qh), 64, CI_HDRC_PAGE_SIZE); if (ci->qh_pool == NULL) return -ENOMEM; - ci->td_pool = dma_pool_create("ci_hw_td", dev, + ci->td_pool = dma_pool_create("ci_hw_td", dev->parent, sizeof(struct ci_hw_td), 64, CI_HDRC_PAGE_SIZE); if (ci->td_pool == NULL) { @@ -1973,6 +1980,8 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci) */ if (ci->is_otg) hw_write_otgsc(ci, OTGSC_BSVIE | OTGSC_BSVIS, OTGSC_BSVIS); + + ci->vbus_active = 0; } /** @@ -1984,6 +1993,7 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci) int ci_hdrc_gadget_init(struct ci_hdrc *ci) { struct ci_role_driver *rdrv; + int ret; if (!hw_read(ci, CAP_DCCPARAMS, DCCPARAMS_DC)) return -ENXIO; @@ -1996,7 +2006,10 @@ int ci_hdrc_gadget_init(struct ci_hdrc *ci) rdrv->stop = udc_id_switch_for_host; rdrv->irq = udc_irq; rdrv->name = "gadget"; - ci->roles[CI_ROLE_GADGET] = rdrv; - return udc_start(ci); + ret = udc_start(ci); + if (!ret) + ci->roles[CI_ROLE_GADGET] = rdrv; + + return ret; } diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index e77a4ed4f021da17f5c1f366e22b8447c366474a..9f4a0185dd609c09f6873f85b849c9553821dca1 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -108,6 +108,8 @@ struct imx_usbmisc { const struct usbmisc_ops *ops; }; +static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data); + static int usbmisc_imx25_init(struct imx_usbmisc_data *data) { struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev); @@ -242,10 +244,15 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN | MX53_USB_UHx_CTRL_ULPI_INT_EN; writel(val, reg); - /* Disable internal 60Mhz clock */ - reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET; - val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF; - writel(val, reg); + if (is_imx53_usbmisc(data)) { + /* Disable internal 60Mhz clock */ + reg = usbmisc->base + + MX53_USB_CLKONOFF_CTRL_OFFSET; + val = readl(reg) | + MX53_USB_CLKONOFF_CTRL_H2_INT60CKOFF; + writel(val, reg); + } + } if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH2_CTRL_OFFSET; @@ -267,10 +274,15 @@ static int usbmisc_imx53_init(struct imx_usbmisc_data *data) val = readl(reg) | MX53_USB_UHx_CTRL_WAKE_UP_EN | MX53_USB_UHx_CTRL_ULPI_INT_EN; writel(val, reg); - /* Disable internal 60Mhz clock */ - reg = usbmisc->base + MX53_USB_CLKONOFF_CTRL_OFFSET; - val = readl(reg) | MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF; - writel(val, reg); + + if (is_imx53_usbmisc(data)) { + /* Disable internal 60Mhz clock */ + reg = usbmisc->base + + MX53_USB_CLKONOFF_CTRL_OFFSET; + val = readl(reg) | + MX53_USB_CLKONOFF_CTRL_H3_INT60CKOFF; + writel(val, reg); + } } if (data->disable_oc) { reg = usbmisc->base + MX53_USB_UH3_CTRL_OFFSET; @@ -456,6 +468,10 @@ static const struct usbmisc_ops imx27_usbmisc_ops = { .init = usbmisc_imx27_init, }; +static const struct usbmisc_ops imx51_usbmisc_ops = { + .init = usbmisc_imx53_init, +}; + static const struct usbmisc_ops imx53_usbmisc_ops = { .init = usbmisc_imx53_init, }; @@ -479,6 +495,13 @@ static const struct usbmisc_ops imx7d_usbmisc_ops = { .set_wakeup = usbmisc_imx7d_set_wakeup, }; +static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data) +{ + struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev); + + return usbmisc->ops == &imx53_usbmisc_ops; +} + int imx_usbmisc_init(struct imx_usbmisc_data *data) { struct imx_usbmisc *usbmisc; @@ -536,7 +559,7 @@ static const struct of_device_id usbmisc_imx_dt_ids[] = { }, { .compatible = "fsl,imx51-usbmisc", - .data = &imx53_usbmisc_ops, + .data = &imx51_usbmisc_ops, }, { .compatible = "fsl,imx53-usbmisc", diff --git a/drivers/usb/class/Kconfig b/drivers/usb/class/Kconfig index bb8b73682a70ccf28dc7669368c892a187594007..971385fe9abce65689baf82988c5183edb054b82 100644 --- a/drivers/usb/class/Kconfig +++ b/drivers/usb/class/Kconfig @@ -12,7 +12,7 @@ config USB_ACM Please read for details. If your modem only reports "Cls=ff(vend.)" in the descriptors in - /proc/bus/usb/devices, then your modem will not work with this + /sys/kernel/debug/usb/devices, then your modem will not work with this driver. To compile this driver as a module, choose M here: the diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index d5388938bc7aaf56accce547046429eaf4fe419e..5357d83bbda2b87d8823b24995dbfcd626c2262f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -283,39 +284,13 @@ static DEVICE_ATTR(iCountryCodeRelDate, S_IRUGO, show_country_rel_date, NULL); * Interrupt handlers for various ACM device responses */ -/* control interface reports status changes with "interrupt" transfers */ -static void acm_ctrl_irq(struct urb *urb) +static void acm_process_notification(struct acm *acm, unsigned char *buf) { - struct acm *acm = urb->context; - struct usb_cdc_notification *dr = urb->transfer_buffer; - unsigned char *data; int newctrl; int difference; - int retval; - int status = urb->status; + struct usb_cdc_notification *dr = (struct usb_cdc_notification *)buf; + unsigned char *data = buf + sizeof(struct usb_cdc_notification); - switch (status) { - case 0: - /* success */ - break; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - /* this urb is terminated, clean up */ - dev_dbg(&acm->control->dev, - "%s - urb shutting down with status: %d\n", - __func__, status); - return; - default: - dev_dbg(&acm->control->dev, - "%s - nonzero urb status received: %d\n", - __func__, status); - goto exit; - } - - usb_mark_last_busy(acm->dev); - - data = (unsigned char *)(dr + 1); switch (dr->bNotificationType) { case USB_CDC_NOTIFY_NETWORK_CONNECTION: dev_dbg(&acm->control->dev, @@ -323,7 +298,15 @@ static void acm_ctrl_irq(struct urb *urb) break; case USB_CDC_NOTIFY_SERIAL_STATE: + if (le16_to_cpu(dr->wLength) != 2) { + dev_dbg(&acm->control->dev, + "%s - malformed serial state\n", __func__); + break; + } + newctrl = get_unaligned_le16(data); + dev_dbg(&acm->control->dev, + "%s - serial state: 0x%x\n", __func__, newctrl); if (!acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { dev_dbg(&acm->control->dev, @@ -359,13 +342,86 @@ static void acm_ctrl_irq(struct urb *urb) default: dev_dbg(&acm->control->dev, - "%s - unknown notification %d received: index %d " - "len %d data0 %d data1 %d\n", + "%s - unknown notification %d received: index %d len %d\n", __func__, - dr->bNotificationType, dr->wIndex, - dr->wLength, data[0], data[1]); + dr->bNotificationType, dr->wIndex, dr->wLength); + } +} + +/* control interface reports status changes with "interrupt" transfers */ +static void acm_ctrl_irq(struct urb *urb) +{ + struct acm *acm = urb->context; + struct usb_cdc_notification *dr = urb->transfer_buffer; + unsigned int current_size = urb->actual_length; + unsigned int expected_size, copy_size, alloc_size; + int retval; + int status = urb->status; + + switch (status) { + case 0: + /* success */ break; + case -ECONNRESET: + case -ENOENT: + case -ESHUTDOWN: + /* this urb is terminated, clean up */ + acm->nb_index = 0; + dev_dbg(&acm->control->dev, + "%s - urb shutting down with status: %d\n", + __func__, status); + return; + default: + dev_dbg(&acm->control->dev, + "%s - nonzero urb status received: %d\n", + __func__, status); + goto exit; + } + + usb_mark_last_busy(acm->dev); + + if (acm->nb_index) + dr = (struct usb_cdc_notification *)acm->notification_buffer; + + /* size = notification-header + (optional) data */ + expected_size = sizeof(struct usb_cdc_notification) + + le16_to_cpu(dr->wLength); + + if (current_size < expected_size) { + /* notification is transmitted fragmented, reassemble */ + if (acm->nb_size < expected_size) { + if (acm->nb_size) { + kfree(acm->notification_buffer); + acm->nb_size = 0; + } + alloc_size = roundup_pow_of_two(expected_size); + /* + * kmalloc ensures a valid notification_buffer after a + * use of kfree in case the previous allocation was too + * small. Final freeing is done on disconnect. + */ + acm->notification_buffer = + kmalloc(alloc_size, GFP_ATOMIC); + if (!acm->notification_buffer) + goto exit; + acm->nb_size = alloc_size; + } + + copy_size = min(current_size, + expected_size - acm->nb_index); + + memcpy(&acm->notification_buffer[acm->nb_index], + urb->transfer_buffer, copy_size); + acm->nb_index += copy_size; + current_size = acm->nb_index; + } + + if (current_size >= expected_size) { + /* notification complete */ + acm_process_notification(acm, (unsigned char *)dr); + acm->nb_index = 0; } + exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval && retval != -EPERM) @@ -1174,6 +1230,7 @@ static int acm_probe(struct usb_interface *intf, int combined_interfaces = 0; struct device *tty_dev; int rv = -ENOMEM; + int res; /* normal quirks */ quirks = (unsigned long)id->driver_info; @@ -1274,23 +1331,12 @@ static int acm_probe(struct usb_interface *intf, return -EINVAL; } look_for_collapsed_interface: - for (i = 0; i < 3; i++) { - struct usb_endpoint_descriptor *ep; - ep = &data_interface->cur_altsetting->endpoint[i].desc; - - if (usb_endpoint_is_int_in(ep)) - epctrl = ep; - else if (usb_endpoint_is_bulk_out(ep)) - epwrite = ep; - else if (usb_endpoint_is_bulk_in(ep)) - epread = ep; - else - return -EINVAL; - } - if (!epctrl || !epread || !epwrite) - return -ENODEV; - else - goto made_compressed_probe; + res = usb_find_common_endpoints(data_interface->cur_altsetting, + &epread, &epwrite, &epctrl, NULL); + if (res) + return res; + + goto made_compressed_probe; } skip_normal_probe: @@ -1488,6 +1534,9 @@ static int acm_probe(struct usb_interface *intf, epctrl->bInterval ? epctrl->bInterval : 16); acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; acm->ctrlurb->transfer_dma = acm->ctrl_dma; + acm->notification_buffer = NULL; + acm->nb_index = 0; + acm->nb_size = 0; dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor); @@ -1580,6 +1629,8 @@ static void acm_disconnect(struct usb_interface *intf) usb_free_coherent(acm->dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); acm_read_buffers_free(acm); + kfree(acm->notification_buffer); + if (!acm->combined_interfaces) usb_driver_release_interface(&acm_driver, intf == acm->control ? acm->data : acm->control); diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index c980f11cdf560a773e6d6bc4b8fccd232c0df200..7a2b3deafc90abc3d524463d32b49172a7778695 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -98,7 +98,9 @@ struct acm { struct acm_wb *putbuffer; /* for acm_tty_put_char() */ int rx_buflimit; spinlock_t read_lock; - int write_used; /* number of non-empty write buffers */ + u8 *notification_buffer; /* to reassemble fragmented notifications */ + unsigned int nb_index; + unsigned int nb_size; int transmitting; spinlock_t write_lock; struct mutex mutex; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 8fda45a45bd3713398922f3158f48674153514a6..08669fee6d7f646c4a52816a9d240c441edd21bf 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -58,7 +58,6 @@ MODULE_DEVICE_TABLE (usb, wdm_ids); #define WDM_SUSPENDING 8 #define WDM_RESETTING 9 #define WDM_OVERFLOW 10 -#define WDM_DRAIN_ON_OPEN 11 #define WDM_MAX 16 @@ -182,7 +181,7 @@ static void wdm_in_callback(struct urb *urb) "nonzero urb status received: -ESHUTDOWN\n"); goto skip_error; case -EPIPE: - dev_dbg(&desc->intf->dev, + dev_err(&desc->intf->dev, "nonzero urb status received: -EPIPE\n"); break; default: @@ -210,25 +209,6 @@ static void wdm_in_callback(struct urb *urb) desc->reslength = length; } } - - /* - * Handling devices with the WDM_DRAIN_ON_OPEN flag set: - * If desc->resp_count is unset, then the urb was submitted - * without a prior notification. If the device returned any - * data, then this implies that it had messages queued without - * notifying us. Continue reading until that queue is flushed. - */ - if (!desc->resp_count) { - if (!length) { - /* do not propagate the expected -EPIPE */ - desc->rerr = 0; - goto unlock; - } - dev_dbg(&desc->intf->dev, "got %d bytes without notification\n", length); - set_bit(WDM_RESPONDING, &desc->flags); - usb_submit_urb(desc->response, GFP_ATOMIC); - } - skip_error: set_bit(WDM_READ, &desc->flags); wake_up(&desc->wait); @@ -243,7 +223,6 @@ static void wdm_in_callback(struct urb *urb) service_outstanding_interrupt(desc); } -unlock: spin_unlock(&desc->iuspin); } @@ -686,17 +665,6 @@ static int wdm_open(struct inode *inode, struct file *file) dev_err(&desc->intf->dev, "Error submitting int urb - %d\n", rv); rv = usb_translate_errors(rv); - } else if (test_bit(WDM_DRAIN_ON_OPEN, &desc->flags)) { - /* - * Some devices keep pending messages queued - * without resending notifications. We must - * flush the message queue before we can - * assume a one-to-one relationship between - * notifications and messages in the queue - */ - dev_dbg(&desc->intf->dev, "draining queued data\n"); - set_bit(WDM_RESPONDING, &desc->flags); - rv = usb_submit_urb(desc->response, GFP_KERNEL); } } else { rv = 0; @@ -803,8 +771,7 @@ static void wdm_rxwork(struct work_struct *work) /* --- hotplug --- */ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep, - u16 bufsize, int (*manage_power)(struct usb_interface *, int), - bool drain_on_open) + u16 bufsize, int (*manage_power)(struct usb_interface *, int)) { int rv = -ENOMEM; struct wdm_device *desc; @@ -891,68 +858,6 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor desc->manage_power = manage_power; - /* - * "drain_on_open" enables a hack to work around a firmware - * issue observed on network functions, in particular MBIM - * functions. - * - * Quoting section 7 of the CDC-WMC r1.1 specification: - * - * "The firmware shall interpret GetEncapsulatedResponse as a - * request to read response bytes. The firmware shall send - * the next wLength bytes from the response. The firmware - * shall allow the host to retrieve data using any number of - * GetEncapsulatedResponse requests. The firmware shall - * return a zero- length reply if there are no data bytes - * available. - * - * The firmware shall send ResponseAvailable notifications - * periodically, using any appropriate algorithm, to inform - * the host that there is data available in the reply - * buffer. The firmware is allowed to send ResponseAvailable - * notifications even if there is no data available, but - * this will obviously reduce overall performance." - * - * These requirements, although they make equally sense, are - * often not implemented by network functions. Some firmwares - * will queue data indefinitely, without ever resending a - * notification. The result is that the driver and firmware - * loses "syncronization" if the driver ever fails to respond - * to a single notification, something which easily can happen - * on release(). When this happens, the driver will appear to - * never receive notifications for the most current data. Each - * notification will only cause a single read, which returns - * the oldest data in the firmware's queue. - * - * The "drain_on_open" hack resolves the situation by draining - * data from the firmware until none is returned, without a - * prior notification. - * - * This will inevitably race with the firmware, risking that - * we read data from the device before handling the associated - * notification. To make things worse, some of the devices - * needing the hack do not implement the "return zero if no - * data is available" requirement either. Instead they return - * an error on the subsequent read in this case. This means - * that "winning" the race can cause an unexpected EIO to - * userspace. - * - * "winning" the race is more likely on resume() than on - * open(), and the unexpected error is more harmful in the - * middle of an open session. The hack is therefore only - * applied on open(), and not on resume() where it logically - * would be equally necessary. So we define open() as the only - * driver <-> device "syncronization point". Should we happen - * to lose a notification after open(), then syncronization - * will be lost until release() - * - * The hack should not be enabled for CDC WDM devices - * conforming to the CDC-WMC r1.1 specification. This is - * ensured by setting drain_on_open to false in wdm_probe(). - */ - if (drain_on_open) - set_bit(WDM_DRAIN_ON_OPEN, &desc->flags); - spin_lock(&wdm_device_list_lock); list_add(&desc->device_list, &wdm_device_list); spin_unlock(&wdm_device_list_lock); @@ -1006,7 +911,7 @@ static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id) goto err; ep = &iface->endpoint[0].desc; - rv = wdm_create(intf, ep, maxcom, &wdm_manage_power, false); + rv = wdm_create(intf, ep, maxcom, &wdm_manage_power); err: return rv; @@ -1038,7 +943,7 @@ struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf, { int rv = -EINVAL; - rv = wdm_create(intf, ep, bufsize, manage_power, true); + rv = wdm_create(intf, ep, bufsize, manage_power); if (rv < 0) goto err; diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index cc61055fb9befcbc00a6a32b85996518d1aebbe4..fb87c17ed6faf8302c1eebd210a10be4a23f69ce 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -294,7 +294,7 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i /* * See the description for usblp_select_alts() below for the usage - * explanation. Look into your /proc/bus/usb/devices and dmesg in + * explanation. Look into your /sys/kernel/debug/usb/devices and dmesg in * case of any trouble. */ static int proto_bias = -1; @@ -1239,8 +1239,9 @@ static int usblp_select_alts(struct usblp *usblp) { struct usb_interface *if_alt; struct usb_host_interface *ifd; - struct usb_endpoint_descriptor *epd, *epwrite, *epread; - int p, i, e; + struct usb_endpoint_descriptor *epwrite, *epread; + int p, i; + int res; if_alt = usblp->intf; @@ -1260,31 +1261,21 @@ static int usblp_select_alts(struct usblp *usblp) ifd->desc.bInterfaceProtocol > USBLP_LAST_PROTOCOL) continue; - /* Look for bulk OUT and IN endpoints. */ - epwrite = epread = NULL; - for (e = 0; e < ifd->desc.bNumEndpoints; e++) { - epd = &ifd->endpoint[e].desc; - - if (usb_endpoint_is_bulk_out(epd)) - if (!epwrite) - epwrite = epd; - - if (usb_endpoint_is_bulk_in(epd)) - if (!epread) - epread = epd; + /* Look for the expected bulk endpoints. */ + if (ifd->desc.bInterfaceProtocol > 1) { + res = usb_find_common_endpoints(ifd, + &epread, &epwrite, NULL, NULL); + } else { + epread = NULL; + res = usb_find_bulk_out_endpoint(ifd, &epwrite); } /* Ignore buggy hardware without the right endpoints. */ - if (!epwrite || (ifd->desc.bInterfaceProtocol > 1 && !epread)) + if (res) continue; - /* - * Turn off reads for USB_CLASS_PRINTER/1/1 (unidirectional) - * interfaces and buggy bidirectional printers. - */ - if (ifd->desc.bInterfaceProtocol == 1) { - epread = NULL; - } else if (usblp->quirks & USBLP_QUIRK_BIDIR) { + /* Turn off reads for buggy bidirectional printers. */ + if (usblp->quirks & USBLP_QUIRK_BIDIR) { printk(KERN_INFO "usblp%d: Disabling reads from " "problematic bidirectional printer\n", usblp->minor); diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 8fb309a0ff6b5dae0c6f867cd323585aeff17252..578f424decc22b9f340732f6f381fc0d23135e72 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1375,7 +1375,7 @@ static int usbtmc_probe(struct usb_interface *intf, { struct usbtmc_device_data *data; struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; + struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in; int n; int retcode; @@ -1421,49 +1421,29 @@ static int usbtmc_probe(struct usb_interface *intf, iface_desc = data->intf->cur_altsetting; data->ifnum = iface_desc->desc.bInterfaceNumber; - /* Find bulk in endpoint */ - for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) { - endpoint = &iface_desc->endpoint[n].desc; - - if (usb_endpoint_is_bulk_in(endpoint)) { - data->bulk_in = endpoint->bEndpointAddress; - dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", - data->bulk_in); - break; - } - } - - /* Find bulk out endpoint */ - for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) { - endpoint = &iface_desc->endpoint[n].desc; - - if (usb_endpoint_is_bulk_out(endpoint)) { - data->bulk_out = endpoint->bEndpointAddress; - dev_dbg(&intf->dev, "Found Bulk out endpoint at %u\n", - data->bulk_out); - break; - } - } - - if (!data->bulk_out || !data->bulk_in) { + /* Find bulk endpoints */ + retcode = usb_find_common_endpoints(iface_desc, + &bulk_in, &bulk_out, NULL, NULL); + if (retcode) { dev_err(&intf->dev, "bulk endpoints not found\n"); - retcode = -ENODEV; goto err_put; } + data->bulk_in = bulk_in->bEndpointAddress; + dev_dbg(&intf->dev, "Found bulk in endpoint at %u\n", data->bulk_in); + + data->bulk_out = bulk_out->bEndpointAddress; + dev_dbg(&intf->dev, "Found Bulk out endpoint at %u\n", data->bulk_out); + /* Find int endpoint */ - for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) { - endpoint = &iface_desc->endpoint[n].desc; - - if (usb_endpoint_is_int_in(endpoint)) { - data->iin_ep_present = 1; - data->iin_ep = endpoint->bEndpointAddress; - data->iin_wMaxPacketSize = usb_endpoint_maxp(endpoint); - data->iin_interval = endpoint->bInterval; - dev_dbg(&intf->dev, "Found Int in endpoint at %u\n", + retcode = usb_find_int_in_endpoint(iface_desc, &int_in); + if (!retcode) { + data->iin_ep_present = 1; + data->iin_ep = int_in->bEndpointAddress; + data->iin_wMaxPacketSize = usb_endpoint_maxp(int_in); + data->iin_interval = int_in->bInterval; + dev_dbg(&intf->dev, "Found Int in endpoint at %u\n", data->iin_ep); - break; - } } retcode = get_capabilities(data); diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index c9480d77810c9ff93db21c18ac262a3a046c62b2..930e8f35f8df39255be60e2e8267f3acff72b2f9 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -107,7 +107,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, int len; struct ulpi *ulpi = to_ulpi_dev(dev); - len = of_device_get_modalias(dev, buf, PAGE_SIZE - 1); + len = of_device_modalias(dev, buf, PAGE_SIZE); if (len != -ENODEV) return len; diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 2f537bbdda093d90285789cd92f77da608dbbe14..b8fe31e409a5d6d13f04995a0707fafcf1e9daa0 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -31,6 +31,13 @@ #include #include +#ifdef VERBOSE +#define VDBG(fmt, args...) pr_debug("[%s] " fmt, \ + __func__, ## args) +#else +#define VDBG(stuff...) do {} while (0) +#endif + /* Change USB protocol when there is a protocol change */ static int otg_set_protocol(struct otg_fsm *fsm, int protocol) { diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig index 0e5a889742b34b5ee6f55597741aeeb70f984527..4d75d9a80001eda9039d853871969938b4b82a41 100644 --- a/drivers/usb/core/Kconfig +++ b/drivers/usb/core/Kconfig @@ -26,7 +26,7 @@ config USB_DEFAULT_PERSIST unplugged, causing any mounted filesystems to be lost. The persist feature can still be enabled for individual devices through the power/persist sysfs node. See - Documentation/usb/persist.txt for more info. + Documentation/driver-api/usb/persist.rst for more info. If you have any questions about this, say Y here, only say N if you know exactly what you are doing. diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile index b99b871c4b9d9b05c2f4a2f843485fc4ca965506..250ec1d662d9ecbb53ec59449f5cd229a5071e77 100644 --- a/drivers/usb/core/Makefile +++ b/drivers/usb/core/Makefile @@ -8,7 +8,7 @@ usbcore-y += devio.o notify.o generic.o quirks.o devices.o usbcore-y += port.o usbcore-$(CONFIG_OF) += of.o -usbcore-$(CONFIG_PCI) += hcd-pci.o +usbcore-$(CONFIG_USB_PCI) += hcd-pci.o usbcore-$(CONFIG_ACPI) += usb-acpi.o obj-$(CONFIG_USB) += usbcore.o diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index b9bf6e2eb6feabc49daebff549b361d06622d8e9..b64568cf572c9eade1bb5121c4118760bd845ef5 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -66,7 +66,7 @@ int hcd_buffer_create(struct usb_hcd *hcd) int i, size; if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!hcd->self.controller->dma_mask && + (!is_device_dma_capable(hcd->self.sysdev) && !(hcd->driver->flags & HCD_LOCAL_MEM))) return 0; @@ -75,7 +75,7 @@ int hcd_buffer_create(struct usb_hcd *hcd) if (!size) continue; snprintf(name, sizeof(name), "buffer-%d", size); - hcd->pool[i] = dma_pool_create(name, hcd->self.controller, + hcd->pool[i] = dma_pool_create(name, hcd->self.sysdev, size, size, 0); if (!hcd->pool[i]) { hcd_buffer_destroy(hcd); @@ -130,7 +130,7 @@ void *hcd_buffer_alloc( /* some USB hosts just use PIO */ if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!bus->controller->dma_mask && + (!is_device_dma_capable(bus->sysdev) && !(hcd->driver->flags & HCD_LOCAL_MEM))) { *dma = ~(dma_addr_t) 0; return kmalloc(size, mem_flags); @@ -140,7 +140,7 @@ void *hcd_buffer_alloc( if (size <= pool_max[i]) return dma_pool_alloc(hcd->pool[i], mem_flags, dma); } - return dma_alloc_coherent(hcd->self.controller, size, dma, mem_flags); + return dma_alloc_coherent(hcd->self.sysdev, size, dma, mem_flags); } void hcd_buffer_free( @@ -157,7 +157,7 @@ void hcd_buffer_free( return; if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!bus->controller->dma_mask && + (!is_device_dma_capable(bus->sysdev) && !(hcd->driver->flags & HCD_LOCAL_MEM))) { kfree(addr); return; @@ -169,5 +169,5 @@ void hcd_buffer_free( return; } } - dma_free_coherent(hcd->self.controller, size, addr, dma); + dma_free_coherent(hcd->self.sysdev, size, addr, dma); } diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index f2987ddb1cde3492189f21caa6b23e807218abe2..55dea2e7828fe027c632444c7888c29c204e47c1 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -24,7 +24,7 @@ * /devices contains USB topology, device, config, class, * interface, & endpoint data. * - * I considered using /proc/bus/usb/devices/device# for each device + * I considered using /dev/bus/usb/device# for each device * as it is attached or detached, but I didn't like this for some * reason -- maybe it's just too deep of a directory structure. * I also don't like looking in multiple places to gather and view @@ -40,7 +40,7 @@ * Converted the whole proc stuff to real * read methods. Now not the whole device list needs to fit * into one page, only the device list for one bus. - * Added a poll method to /proc/bus/usb/devices, to wake + * Added a poll method to /sys/kernel/debug/usb/devices, to wake * up an eventual usbd * 2000-01-04: Thomas Sailer * Turned into its own filesystem diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index cfc3cff6e8d5901be03e89d1423c6f66aa6a7d06..8e6ef671be9b60f948e521d4c273228f161cdbb6 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -475,11 +475,11 @@ static void snoop_urb(struct usb_device *udev, if (userurb) { /* Async */ if (when == SUBMIT) - dev_info(&udev->dev, "userurb %p, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " "length %u\n", userurb, ep, t, d, length); else - dev_info(&udev->dev, "userurb %p, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " "actual_length %u status %d\n", userurb, ep, t, d, length, timeout_or_status); @@ -1895,7 +1895,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); return retval; @@ -1912,7 +1912,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg) as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); } else { @@ -2043,7 +2043,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2060,7 +2060,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %p\n", as->userurb); + snoop(&ps->dev->dev, "reap %pK\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); } else { @@ -2489,7 +2489,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, #endif case USBDEVFS_DISCARDURB: - snoop(&dev->dev, "%s: DISCARDURB %p\n", __func__, p); + snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p); ret = proc_unlinkurb(ps, p); break; diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index cdee5130638b109e5be899c41b8a5f432620c4e3..eb87a259d55c3abcc4d54e929cba861a0b429fae 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1331,6 +1331,24 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) */ if (udev->parent && !PMSG_IS_AUTO(msg)) status = 0; + + /* + * If the device is inaccessible, don't try to resume + * suspended interfaces and just return the error. + */ + if (status && status != -EBUSY) { + int err; + u16 devstat; + + err = usb_get_status(udev, USB_RECIP_DEVICE, 0, + &devstat); + if (err) { + dev_err(&udev->dev, + "Failed to suspend device, error %d\n", + status); + goto done; + } + } } /* If the suspend failed, resume interfaces that did get suspended */ @@ -1763,6 +1781,9 @@ static int autosuspend_check(struct usb_device *udev) int w, i; struct usb_interface *intf; + if (udev->state == USB_STATE_NOTATTACHED) + return -ENODEV; + /* Fail if autosuspend is disabled, or any interfaces are in use, or * any interface drivers require remote wakeup but it isn't available. */ diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index e26bd5e773ada0685329ec8ed65084ff72d7242d..87ad6b6bfee8f724cf0400633a352b6d7ae21797 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -29,6 +29,7 @@ #define MAX_USB_MINORS 256 static const struct file_operations *usb_minors[MAX_USB_MINORS]; static DECLARE_RWSEM(minor_rwsem); +static DEFINE_MUTEX(init_usb_class_mutex); static int usb_open(struct inode *inode, struct file *file) { @@ -111,8 +112,9 @@ static void release_usb_class(struct kref *kref) static void destroy_usb_class(void) { - if (usb_class) - kref_put(&usb_class->kref, release_usb_class); + mutex_lock(&init_usb_class_mutex); + kref_put(&usb_class->kref, release_usb_class); + mutex_unlock(&init_usb_class_mutex); } int usb_major_init(void) @@ -173,7 +175,10 @@ int usb_register_dev(struct usb_interface *intf, if (intf->minor >= 0) return -EADDRINUSE; + mutex_lock(&init_usb_class_mutex); retval = init_usb_class(); + mutex_unlock(&init_usb_class_mutex); + if (retval) return retval; diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 79bdca5cb9c7ae8f01b1f4a25c7ceacd9c716e2c..5dea98358c05c46b09f913fee8b7c48d9f2c4f85 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1076,6 +1076,7 @@ static void usb_deregister_bus (struct usb_bus *bus) static int register_root_hub(struct usb_hcd *hcd) { struct device *parent_dev = hcd->self.controller; + struct device *sysdev = hcd->self.sysdev; struct usb_device *usb_dev = hcd->self.root_hub; const int devnum = 1; int retval; @@ -1122,7 +1123,7 @@ static int register_root_hub(struct usb_hcd *hcd) /* Did the HC die before the root hub was registered? */ if (HCD_DEAD(hcd)) usb_hc_died (hcd); /* This time clean up */ - usb_dev->dev.of_node = parent_dev->of_node; + usb_dev->dev.of_node = sysdev->of_node; } mutex_unlock(&usb_bus_idr_lock); @@ -1435,7 +1436,7 @@ void usb_hcd_unmap_urb_setup_for_dma(struct usb_hcd *hcd, struct urb *urb) { if (IS_ENABLED(CONFIG_HAS_DMA) && (urb->transfer_flags & URB_SETUP_MAP_SINGLE)) - dma_unmap_single(hcd->self.controller, + dma_unmap_single(hcd->self.sysdev, urb->setup_dma, sizeof(struct usb_ctrlrequest), DMA_TO_DEVICE); @@ -1468,19 +1469,19 @@ void usb_hcd_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb) dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; if (IS_ENABLED(CONFIG_HAS_DMA) && (urb->transfer_flags & URB_DMA_MAP_SG)) - dma_unmap_sg(hcd->self.controller, + dma_unmap_sg(hcd->self.sysdev, urb->sg, urb->num_sgs, dir); else if (IS_ENABLED(CONFIG_HAS_DMA) && (urb->transfer_flags & URB_DMA_MAP_PAGE)) - dma_unmap_page(hcd->self.controller, + dma_unmap_page(hcd->self.sysdev, urb->transfer_dma, urb->transfer_buffer_length, dir); else if (IS_ENABLED(CONFIG_HAS_DMA) && (urb->transfer_flags & URB_DMA_MAP_SINGLE)) - dma_unmap_single(hcd->self.controller, + dma_unmap_single(hcd->self.sysdev, urb->transfer_dma, urb->transfer_buffer_length, dir); @@ -1523,11 +1524,11 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, return ret; if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) { urb->setup_dma = dma_map_single( - hcd->self.controller, + hcd->self.sysdev, urb->setup_packet, sizeof(struct usb_ctrlrequest), DMA_TO_DEVICE); - if (dma_mapping_error(hcd->self.controller, + if (dma_mapping_error(hcd->self.sysdev, urb->setup_dma)) return -EAGAIN; urb->transfer_flags |= URB_SETUP_MAP_SINGLE; @@ -1558,7 +1559,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, } n = dma_map_sg( - hcd->self.controller, + hcd->self.sysdev, urb->sg, urb->num_sgs, dir); @@ -1573,12 +1574,12 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, } else if (urb->sg) { struct scatterlist *sg = urb->sg; urb->transfer_dma = dma_map_page( - hcd->self.controller, + hcd->self.sysdev, sg_page(sg), sg->offset, urb->transfer_buffer_length, dir); - if (dma_mapping_error(hcd->self.controller, + if (dma_mapping_error(hcd->self.sysdev, urb->transfer_dma)) ret = -EAGAIN; else @@ -1588,11 +1589,11 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, ret = -EAGAIN; } else { urb->transfer_dma = dma_map_single( - hcd->self.controller, + hcd->self.sysdev, urb->transfer_buffer, urb->transfer_buffer_length, dir); - if (dma_mapping_error(hcd->self.controller, + if (dma_mapping_error(hcd->self.sysdev, urb->transfer_dma)) ret = -EAGAIN; else @@ -1722,7 +1723,7 @@ int usb_hcd_unlink_urb (struct urb *urb, int status) if (retval == 0) retval = -EINPROGRESS; else if (retval != -EIDRM && retval != -EBUSY) - dev_dbg(&udev->dev, "hcd_unlink_urb %p fail %d\n", + dev_dbg(&udev->dev, "hcd_unlink_urb %pK fail %d\n", urb, retval); usb_put_dev(udev); } @@ -1889,7 +1890,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev, /* kick hcd */ unlink1(hcd, urb, -ESHUTDOWN); dev_dbg (hcd->self.controller, - "shutdown urb %p ep%d%s%s\n", + "shutdown urb %pK ep%d%s%s\n", urb, usb_endpoint_num(&ep->desc), is_in ? "in" : "out", ({ char *s; @@ -2498,24 +2499,8 @@ static void init_giveback_urb_bh(struct giveback_urb_bh *bh) tasklet_init(&bh->bh, usb_giveback_urb_bh, (unsigned long)bh); } -/** - * usb_create_shared_hcd - create and initialize an HCD structure - * @driver: HC driver that will use this hcd - * @dev: device for this HC, stored in hcd->self.controller - * @bus_name: value to store in hcd->self.bus_name - * @primary_hcd: a pointer to the usb_hcd structure that is sharing the - * PCI device. Only allocate certain resources for the primary HCD - * Context: !in_interrupt() - * - * Allocate a struct usb_hcd, with extra space at the end for the - * HC driver's private data. Initialize the generic members of the - * hcd structure. - * - * Return: On success, a pointer to the created and initialized HCD structure. - * On failure (e.g. if memory is unavailable), %NULL. - */ -struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, - struct device *dev, const char *bus_name, +struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, + struct device *sysdev, struct device *dev, const char *bus_name, struct usb_hcd *primary_hcd) { struct usb_hcd *hcd; @@ -2535,6 +2520,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, hcd->bandwidth_mutex = kmalloc(sizeof(*hcd->bandwidth_mutex), GFP_KERNEL); if (!hcd->bandwidth_mutex) { + kfree(hcd->address0_mutex); kfree(hcd); dev_dbg(dev, "hcd bandwidth mutex alloc failed\n"); return NULL; @@ -2556,8 +2542,9 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, usb_bus_init(&hcd->self); hcd->self.controller = dev; + hcd->self.sysdev = sysdev; hcd->self.bus_name = bus_name; - hcd->self.uses_dma = (dev->dma_mask != NULL); + hcd->self.uses_dma = (sysdev->dma_mask != NULL); init_timer(&hcd->rh_timer); hcd->rh_timer.function = rh_timer_func; @@ -2572,6 +2559,30 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, "USB Host Controller"; return hcd; } +EXPORT_SYMBOL_GPL(__usb_create_hcd); + +/** + * usb_create_shared_hcd - create and initialize an HCD structure + * @driver: HC driver that will use this hcd + * @dev: device for this HC, stored in hcd->self.controller + * @bus_name: value to store in hcd->self.bus_name + * @primary_hcd: a pointer to the usb_hcd structure that is sharing the + * PCI device. Only allocate certain resources for the primary HCD + * Context: !in_interrupt() + * + * Allocate a struct usb_hcd, with extra space at the end for the + * HC driver's private data. Initialize the generic members of the + * hcd structure. + * + * Return: On success, a pointer to the created and initialized HCD structure. + * On failure (e.g. if memory is unavailable), %NULL. + */ +struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, + struct device *dev, const char *bus_name, + struct usb_hcd *primary_hcd) +{ + return __usb_create_hcd(driver, dev, dev, bus_name, primary_hcd); +} EXPORT_SYMBOL_GPL(usb_create_shared_hcd); /** @@ -2591,7 +2602,7 @@ EXPORT_SYMBOL_GPL(usb_create_shared_hcd); struct usb_hcd *usb_create_hcd(const struct hc_driver *driver, struct device *dev, const char *bus_name) { - return usb_create_shared_hcd(driver, dev, bus_name, NULL); + return __usb_create_hcd(driver, dev, dev, bus_name, NULL); } EXPORT_SYMBOL_GPL(usb_create_hcd); @@ -2718,7 +2729,7 @@ int usb_add_hcd(struct usb_hcd *hcd, struct usb_device *rhdev; if (IS_ENABLED(CONFIG_USB_PHY) && !hcd->usb_phy) { - struct usb_phy *phy = usb_get_phy_dev(hcd->self.controller, 0); + struct usb_phy *phy = usb_get_phy_dev(hcd->self.sysdev, 0); if (IS_ERR(phy)) { retval = PTR_ERR(phy); @@ -2736,7 +2747,7 @@ int usb_add_hcd(struct usb_hcd *hcd, } if (IS_ENABLED(CONFIG_GENERIC_PHY) && !hcd->phy) { - struct phy *phy = phy_get(hcd->self.controller, "usb"); + struct phy *phy = phy_get(hcd->self.sysdev, "usb"); if (IS_ERR(phy)) { retval = PTR_ERR(phy); @@ -2784,7 +2795,7 @@ int usb_add_hcd(struct usb_hcd *hcd, */ retval = hcd_buffer_create(hcd); if (retval != 0) { - dev_dbg(hcd->self.controller, "pool alloc failed\n"); + dev_dbg(hcd->self.sysdev, "pool alloc failed\n"); goto err_create_buf; } @@ -2794,7 +2805,7 @@ int usb_add_hcd(struct usb_hcd *hcd, rhdev = usb_alloc_dev(NULL, &hcd->self, 0); if (rhdev == NULL) { - dev_err(hcd->self.controller, "unable to allocate root hub\n"); + dev_err(hcd->self.sysdev, "unable to allocate root hub\n"); retval = -ENOMEM; goto err_allocate_root_hub; } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5286bf67869a83e1d7e1d3f1ca0ebc87db5cf7a4..b8bb20d7acdb9f1480009605f10e2f5ae4045ec9 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -362,7 +362,8 @@ static void usb_set_lpm_parameters(struct usb_device *udev) } /* USB 2.0 spec Section 11.24.4.5 */ -static int get_hub_descriptor(struct usb_device *hdev, void *data) +static int get_hub_descriptor(struct usb_device *hdev, + struct usb_hub_descriptor *desc) { int i, ret, size; unsigned dtype; @@ -378,10 +379,18 @@ static int get_hub_descriptor(struct usb_device *hdev, void *data) for (i = 0; i < 3; i++) { ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB, - dtype << 8, 0, data, size, + dtype << 8, 0, desc, size, USB_CTRL_GET_TIMEOUT); - if (ret >= (USB_DT_HUB_NONVAR_SIZE + 2)) + if (hub_is_superspeed(hdev)) { + if (ret == size) + return ret; + } else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) { + /* Make sure we have the DeviceRemovable field. */ + size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1; + if (ret < size) + return -EMSGSIZE; return ret; + } } return -EINVAL; } @@ -1066,6 +1075,9 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) portstatus = portchange = 0; status = hub_port_status(hub, port1, &portstatus, &portchange); + if (status) + goto abort; + if (udev || (portstatus & USB_PORT_STAT_CONNECTION)) dev_dbg(&port_dev->dev, "status %04x change %04x\n", portstatus, portchange); @@ -1198,7 +1210,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Scan all ports that need attention */ kick_hub_wq(hub); - + abort: if (type == HUB_INIT2 || type == HUB_INIT3) { /* Allow autosuspend if it was suppressed */ disconnected: @@ -1310,7 +1322,7 @@ static int hub_configure(struct usb_hub *hub, } mutex_init(&hub->status_mutex); - hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL); + hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL); if (!hub->descriptor) { ret = -ENOMEM; goto fail; @@ -1318,13 +1330,19 @@ static int hub_configure(struct usb_hub *hub, /* Request the entire hub descriptor. * hub->descriptor can handle USB_MAXCHILDREN ports, - * but the hub can/will return fewer bytes here. + * but a (non-SS) hub can/will return fewer bytes here. */ ret = get_hub_descriptor(hdev, hub->descriptor); if (ret < 0) { message = "can't read hub descriptor"; goto fail; - } else if (hub->descriptor->bNbrPorts > USB_MAXCHILDREN) { + } + + maxchild = USB_MAXCHILDREN; + if (hub_is_superspeed(hdev)) + maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS); + + if (hub->descriptor->bNbrPorts > maxchild) { message = "hub has too many ports!"; ret = -ENODEV; goto fail; @@ -2084,6 +2102,12 @@ void usb_disconnect(struct usb_device **pdev) dev_info(&udev->dev, "USB disconnect, device number %d\n", udev->devnum); + /* + * Ensure that the pm runtime code knows that the USB device + * is in the process of being disconnected. + */ + pm_runtime_barrier(&udev->dev); + usb_lock_device(udev); hub_disconnect_children(udev); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 2184ef40a82ae494c11e82b4090545bd0dad30c8..4c38ea41ae969e6fac8ced209a655957322ba36f 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -474,6 +474,7 @@ EXPORT_SYMBOL_GPL(usb_sg_init); * significantly improve USB throughput. * * There are three kinds of completion for this function. + * * (1) success, where io->status is zero. The number of io->bytes * transferred is as requested. * (2) error, where io->status is a negative errno value. The number diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c index 3de4f8873984d2d04749f2f87b8b5c0fb2b9f59b..d563cbcf76cfbfb197aa1e45be1dc5f700102770 100644 --- a/drivers/usb/core/of.c +++ b/drivers/usb/core/of.c @@ -18,6 +18,7 @@ */ #include +#include #include /** @@ -46,3 +47,28 @@ struct device_node *usb_of_get_child_node(struct device_node *parent, } EXPORT_SYMBOL_GPL(usb_of_get_child_node); +/** + * usb_of_get_companion_dev - Find the companion device + * @dev: the device pointer to find a companion + * + * Find the companion device from platform bus. + * + * Takes a reference to the returned struct device which needs to be dropped + * after use. + * + * Return: On success, a pointer to the companion device, %NULL on failure. + */ +struct device *usb_of_get_companion_dev(struct device *dev) +{ + struct device_node *node; + struct platform_device *pdev = NULL; + + node = of_parse_phandle(dev->of_node, "companion", 0); + if (node) + pdev = of_find_device_by_node(node); + + of_node_put(node); + + return pdev ? &pdev->dev : NULL; +} +EXPORT_SYMBOL_GPL(usb_of_get_companion_dev); diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index d75cb8c0f7df76ca1c78d1c39de9db17f9039af0..47903d510955b03967da9a125092b6965118c0be 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -338,7 +338,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags) if (!urb || !urb->complete) return -EINVAL; if (urb->hcpriv) { - WARN_ONCE(1, "URB %p submitted while active\n", urb); + WARN_ONCE(1, "URB %pK submitted while active\n", urb); return -EBUSY; } diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index a2ccc69fb45c092a5220474c64b03418133f2178..28b053cacc90556acf7df562fdc502c51b25a02e 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -74,6 +74,140 @@ MODULE_PARM_DESC(autosuspend, "default autosuspend delay"); #define usb_autosuspend_delay 0 #endif +static bool match_endpoint(struct usb_endpoint_descriptor *epd, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out) +{ + switch (usb_endpoint_type(epd)) { + case USB_ENDPOINT_XFER_BULK: + if (usb_endpoint_dir_in(epd)) { + if (bulk_in && !*bulk_in) { + *bulk_in = epd; + break; + } + } else { + if (bulk_out && !*bulk_out) { + *bulk_out = epd; + break; + } + } + + return false; + case USB_ENDPOINT_XFER_INT: + if (usb_endpoint_dir_in(epd)) { + if (int_in && !*int_in) { + *int_in = epd; + break; + } + } else { + if (int_out && !*int_out) { + *int_out = epd; + break; + } + } + + return false; + default: + return false; + } + + return (!bulk_in || *bulk_in) && (!bulk_out || *bulk_out) && + (!int_in || *int_in) && (!int_out || *int_out); +} + +/** + * usb_find_common_endpoints() -- look up common endpoint descriptors + * @alt: alternate setting to search + * @bulk_in: pointer to descriptor pointer, or NULL + * @bulk_out: pointer to descriptor pointer, or NULL + * @int_in: pointer to descriptor pointer, or NULL + * @int_out: pointer to descriptor pointer, or NULL + * + * Search the alternate setting's endpoint descriptors for the first bulk-in, + * bulk-out, interrupt-in and interrupt-out endpoints and return them in the + * provided pointers (unless they are NULL). + * + * If a requested endpoint is not found, the corresponding pointer is set to + * NULL. + * + * Return: Zero if all requested descriptors were found, or -ENXIO otherwise. + */ +int usb_find_common_endpoints(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out) +{ + struct usb_endpoint_descriptor *epd; + int i; + + if (bulk_in) + *bulk_in = NULL; + if (bulk_out) + *bulk_out = NULL; + if (int_in) + *int_in = NULL; + if (int_out) + *int_out = NULL; + + for (i = 0; i < alt->desc.bNumEndpoints; ++i) { + epd = &alt->endpoint[i].desc; + + if (match_endpoint(epd, bulk_in, bulk_out, int_in, int_out)) + return 0; + } + + return -ENXIO; +} +EXPORT_SYMBOL_GPL(usb_find_common_endpoints); + +/** + * usb_find_common_endpoints_reverse() -- look up common endpoint descriptors + * @alt: alternate setting to search + * @bulk_in: pointer to descriptor pointer, or NULL + * @bulk_out: pointer to descriptor pointer, or NULL + * @int_in: pointer to descriptor pointer, or NULL + * @int_out: pointer to descriptor pointer, or NULL + * + * Search the alternate setting's endpoint descriptors for the last bulk-in, + * bulk-out, interrupt-in and interrupt-out endpoints and return them in the + * provided pointers (unless they are NULL). + * + * If a requested endpoint is not found, the corresponding pointer is set to + * NULL. + * + * Return: Zero if all requested descriptors were found, or -ENXIO otherwise. + */ +int usb_find_common_endpoints_reverse(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out) +{ + struct usb_endpoint_descriptor *epd; + int i; + + if (bulk_in) + *bulk_in = NULL; + if (bulk_out) + *bulk_out = NULL; + if (int_in) + *int_in = NULL; + if (int_out) + *int_out = NULL; + + for (i = alt->desc.bNumEndpoints - 1; i >= 0; --i) { + epd = &alt->endpoint[i].desc; + + if (match_endpoint(epd, bulk_in, bulk_out, int_in, int_out)) + return 0; + } + + return -ENXIO; +} +EXPORT_SYMBOL_GPL(usb_find_common_endpoints_reverse); /** * usb_find_alt_setting() - Given a configuration, find the alternate setting @@ -453,9 +587,9 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, * Note: calling dma_set_mask() on a USB device would set the * mask for the entire HCD, so don't do that. */ - dev->dev.dma_mask = bus->controller->dma_mask; - dev->dev.dma_pfn_offset = bus->controller->dma_pfn_offset; - set_dev_node(&dev->dev, dev_to_node(bus->controller)); + dev->dev.dma_mask = bus->sysdev->dma_mask; + dev->dev.dma_pfn_offset = bus->sysdev->dma_pfn_offset; + set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; atomic_set(&dev->urbnum, 0); @@ -803,7 +937,7 @@ struct urb *usb_buffer_map(struct urb *urb) if (!urb || !urb->dev || !(bus = urb->dev->bus) - || !(controller = bus->controller)) + || !(controller = bus->sysdev)) return NULL; if (controller->dma_mask) { @@ -841,7 +975,7 @@ void usb_buffer_dmasync(struct urb *urb) || !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) || !urb->dev || !(bus = urb->dev->bus) - || !(controller = bus->controller)) + || !(controller = bus->sysdev)) return; if (controller->dma_mask) { @@ -875,7 +1009,7 @@ void usb_buffer_unmap(struct urb *urb) || !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) || !urb->dev || !(bus = urb->dev->bus) - || !(controller = bus->controller)) + || !(controller = bus->sysdev)) return; if (controller->dma_mask) { @@ -925,7 +1059,7 @@ int usb_buffer_map_sg(const struct usb_device *dev, int is_in, if (!dev || !(bus = dev->bus) - || !(controller = bus->controller) + || !(controller = bus->sysdev) || !controller->dma_mask) return -EINVAL; @@ -961,7 +1095,7 @@ void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in, if (!dev || !(bus = dev->bus) - || !(controller = bus->controller) + || !(controller = bus->sysdev) || !controller->dma_mask) return; @@ -989,7 +1123,7 @@ void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in, if (!dev || !(bus = dev->bus) - || !(controller = bus->controller) + || !(controller = bus->sysdev) || !controller->dma_mask) return; diff --git a/drivers/usb/dwc2/Kconfig b/drivers/usb/dwc2/Kconfig index e838701d6dd54e624c356171833b929889027c38..b6a495e98fd848a4965f586ca216beab346f53fc 100644 --- a/drivers/usb/dwc2/Kconfig +++ b/drivers/usb/dwc2/Kconfig @@ -54,7 +54,7 @@ endchoice config USB_DWC2_PCI tristate "DWC2 PCI" - depends on PCI + depends on USB_PCI depends on USB_GADGET || !USB_GADGET default n select NOP_USB_XCEIV diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 1a7e8300508251f8b0c2a7274acadc63c9c42111..8367d4f985c1220773a7c6efc38df94d837c7f28 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -423,6 +423,10 @@ enum dwc2_ep0_state { * needed. * 0 - No (default) * 1 - Yes + * @activate_stm_fs_transceiver: Activate internal transceiver using GGPIO + * register. + * 0 - Deactivate the transceiver (default) + * 1 - Activate the transceiver * @g_dma: Enables gadget dma usage (default: autodetect). * @g_dma_desc: Enables gadget descriptor DMA (default: autodetect). * @g_rx_fifo_size: The periodic rx fifo size for the device, in @@ -477,6 +481,7 @@ struct dwc2_core_params { bool uframe_sched; bool external_id_pin_ctl; bool hibernation; + bool activate_stm_fs_transceiver; u16 max_packet_count; u32 max_transfer_size; u32 ahbcfg; diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index a73722e27d07835f6a02f53fbc702c7e27fcf11a..740c7e86d31b912bf0f3c861a687b369acdc5b3d 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -121,7 +121,7 @@ static void dwc2_init_fs_ls_pclk_sel(struct dwc2_hsotg *hsotg) static int dwc2_fs_phy_init(struct dwc2_hsotg *hsotg, bool select_phy) { - u32 usbcfg, i2cctl; + u32 usbcfg, ggpio, i2cctl; int retval = 0; /* @@ -145,6 +145,19 @@ static int dwc2_fs_phy_init(struct dwc2_hsotg *hsotg, bool select_phy) return retval; } } + + if (hsotg->params.activate_stm_fs_transceiver) { + ggpio = dwc2_readl(hsotg->regs + GGPIO); + if (!(ggpio & GGPIO_STM32_OTG_GCCFG_PWRDWN)) { + dev_dbg(hsotg->dev, "Activating transceiver\n"); + /* + * STM32F4x9 uses the GGPIO register as general + * core configuration register. + */ + ggpio |= GGPIO_STM32_OTG_GCCFG_PWRDWN; + dwc2_writel(ggpio, hsotg->regs + GGPIO); + } + } } /* @@ -3264,6 +3277,7 @@ static void dwc2_conn_id_status_change(struct work_struct *work) dwc2_core_init(hsotg, false); dwc2_enable_global_interrupts(hsotg); spin_lock_irqsave(&hsotg->lock, flags); + dwc2_hsotg_disconnect(hsotg); dwc2_hsotg_core_init_disconnected(hsotg, false); spin_unlock_irqrestore(&hsotg->lock, flags); dwc2_hsotg_core_connect(hsotg); diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h index bde72489ae6625b7a666f9a6e8c79e895d4d1441..4592012c47436c422e4840761717b48929e239bc 100644 --- a/drivers/usb/dwc2/hw.h +++ b/drivers/usb/dwc2/hw.h @@ -225,6 +225,8 @@ #define GPVNDCTL HSOTG_REG(0x0034) #define GGPIO HSOTG_REG(0x0038) +#define GGPIO_STM32_OTG_GCCFG_PWRDWN BIT(16) + #define GUID HSOTG_REG(0x003c) #define GSNPSID HSOTG_REG(0x0040) #define GHWCFG1 HSOTG_REG(0x0044) diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 2990c347289fcd877b831d12cc7372ee5f2b1166..a3ffe97170ffd7ccdaf42cc892963ec22793d380 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -120,6 +120,22 @@ static void dwc2_set_amcc_params(struct dwc2_hsotg *hsotg) p->ahbcfg = GAHBCFG_HBSTLEN_INCR16 << GAHBCFG_HBSTLEN_SHIFT; } +static void dwc2_set_stm32f4x9_fsotg_params(struct dwc2_hsotg *hsotg) +{ + struct dwc2_core_params *p = &hsotg->params; + + p->otg_cap = DWC2_CAP_PARAM_NO_HNP_SRP_CAPABLE; + p->speed = DWC2_SPEED_PARAM_FULL; + p->host_rx_fifo_size = 128; + p->host_nperio_tx_fifo_size = 96; + p->host_perio_tx_fifo_size = 96; + p->max_packet_count = 256; + p->phy_type = DWC2_PHY_TYPE_PARAM_FS; + p->i2c_enable = false; + p->uframe_sched = false; + p->activate_stm_fs_transceiver = true; +} + const struct of_device_id dwc2_of_match_table[] = { { .compatible = "brcm,bcm2835-usb", .data = dwc2_set_bcm_params }, { .compatible = "hisilicon,hi6220-usb", .data = dwc2_set_his_params }, @@ -128,11 +144,16 @@ const struct of_device_id dwc2_of_match_table[] = { { .compatible = "lantiq,xrx200-usb", .data = dwc2_set_ltq_params }, { .compatible = "snps,dwc2" }, { .compatible = "samsung,s3c6400-hsotg" }, + { .compatible = "amlogic,meson8-usb", + .data = dwc2_set_amlogic_params }, { .compatible = "amlogic,meson8b-usb", .data = dwc2_set_amlogic_params }, { .compatible = "amlogic,meson-gxbb-usb", .data = dwc2_set_amlogic_params }, { .compatible = "amcc,dwc-otg", .data = dwc2_set_amcc_params }, + { .compatible = "st,stm32f4x9-fsotg", + .data = dwc2_set_stm32f4x9_fsotg_params }, + { .compatible = "st,stm32f4x9-hsotg" }, {}, }; MODULE_DEVICE_TABLE(of, dwc2_of_match_table); diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 9564bc76c56f333fbb6535bdf2487f65c6241039..daf0d37acb37f2ccd2714e6be6bc14832b1cb937 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -214,20 +214,11 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg) hsotg->reset = devm_reset_control_get_optional(hsotg->dev, "dwc2"); if (IS_ERR(hsotg->reset)) { ret = PTR_ERR(hsotg->reset); - switch (ret) { - case -ENOENT: - case -ENOTSUPP: - hsotg->reset = NULL; - break; - default: - dev_err(hsotg->dev, "error getting reset control %d\n", - ret); - return ret; - } + dev_err(hsotg->dev, "error getting reset control %d\n", ret); + return ret; } - if (hsotg->reset) - reset_control_deassert(hsotg->reset); + reset_control_deassert(hsotg->reset); /* Set default UTMI width */ hsotg->phyif = GUSBCFG_PHYIF16; @@ -326,8 +317,7 @@ static int dwc2_driver_remove(struct platform_device *dev) if (hsotg->ll_hw_enabled) dwc2_lowlevel_hw_disable(hsotg); - if (hsotg->reset) - reset_control_assert(hsotg->reset); + reset_control_assert(hsotg->reset); return 0; } diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig index c5aa235863e8c7e88b30544f8e1aba98c9a18d03..ab8c0e0d3b60d54110c6a9a0daba4c10ef086eb1 100644 --- a/drivers/usb/dwc3/Kconfig +++ b/drivers/usb/dwc3/Kconfig @@ -41,6 +41,7 @@ config USB_DWC3_GADGET config USB_DWC3_DUAL_ROLE bool "Dual Role mode" depends on ((USB=y || USB=USB_DWC3) && (USB_GADGET=y || USB_GADGET=USB_DWC3)) + depends on (EXTCON=y || EXTCON=USB_DWC3) help This is the default mode of working of DWC3 controller where both host and gadget features are enabled. @@ -70,7 +71,7 @@ config USB_DWC3_EXYNOS config USB_DWC3_PCI tristate "PCIe-based Platforms" - depends on PCI && ACPI + depends on USB_PCI && ACPI default USB_DWC3 help If you're using the DesignWare Core IP with a PCIe, please say diff --git a/drivers/usb/dwc3/Makefile b/drivers/usb/dwc3/Makefile index ffca34029b21ff6c3bdb3e44ae5531c74d78856a..f15fabbd1e59e9d23e07936be6cfa05a3489975f 100644 --- a/drivers/usb/dwc3/Makefile +++ b/drivers/usb/dwc3/Makefile @@ -17,6 +17,10 @@ ifneq ($(filter y,$(CONFIG_USB_DWC3_GADGET) $(CONFIG_USB_DWC3_DUAL_ROLE)),) dwc3-y += gadget.o ep0.o endif +ifneq ($(CONFIG_USB_DWC3_DUAL_ROLE),) + dwc3-y += drd.o +endif + ifneq ($(CONFIG_USB_DWC3_ULPI),) dwc3-y += ulpi.o endif diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 369bab16a824599f2295dbdf91e825e1a69e4446..455d89a1cd6dbbb3e3f707bc42bada4a9569e5d7 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -100,7 +100,10 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc) return 0; } -void dwc3_set_mode(struct dwc3 *dwc, u32 mode) +static void dwc3_event_buffers_cleanup(struct dwc3 *dwc); +static int dwc3_event_buffers_setup(struct dwc3 *dwc); + +static void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) { u32 reg; @@ -110,6 +113,69 @@ void dwc3_set_mode(struct dwc3 *dwc, u32 mode) dwc3_writel(dwc->regs, DWC3_GCTL, reg); } +static void __dwc3_set_mode(struct work_struct *work) +{ + struct dwc3 *dwc = work_to_dwc(work); + unsigned long flags; + int ret; + + if (!dwc->desired_dr_role) + return; + + if (dwc->desired_dr_role == dwc->current_dr_role) + return; + + if (dwc->dr_mode != USB_DR_MODE_OTG) + return; + + switch (dwc->current_dr_role) { + case DWC3_GCTL_PRTCAP_HOST: + dwc3_host_exit(dwc); + break; + case DWC3_GCTL_PRTCAP_DEVICE: + dwc3_gadget_exit(dwc); + dwc3_event_buffers_cleanup(dwc); + break; + default: + break; + } + + spin_lock_irqsave(&dwc->lock, flags); + + dwc3_set_prtcap(dwc, dwc->desired_dr_role); + + dwc->current_dr_role = dwc->desired_dr_role; + + spin_unlock_irqrestore(&dwc->lock, flags); + + switch (dwc->desired_dr_role) { + case DWC3_GCTL_PRTCAP_HOST: + ret = dwc3_host_init(dwc); + if (ret) + dev_err(dwc->dev, "failed to initialize host\n"); + break; + case DWC3_GCTL_PRTCAP_DEVICE: + dwc3_event_buffers_setup(dwc); + ret = dwc3_gadget_init(dwc); + if (ret) + dev_err(dwc->dev, "failed to initialize peripheral\n"); + break; + default: + break; + } +} + +void dwc3_set_mode(struct dwc3 *dwc, u32 mode) +{ + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + dwc->desired_dr_role = mode; + spin_unlock_irqrestore(&dwc->lock, flags); + + queue_work(system_power_efficient_wq, &dwc->drd_work); +} + u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) { struct dwc3 *dwc = dep->dwc; @@ -397,8 +463,7 @@ static void dwc3_core_num_eps(struct dwc3 *dwc) { struct dwc3_hwparams *parms = &dwc->hwparams; - dwc->num_in_eps = DWC3_NUM_IN_EPS(parms); - dwc->num_out_eps = DWC3_NUM_EPS(parms) - dwc->num_in_eps; + dwc->num_eps = DWC3_NUM_EPS(parms); } static void dwc3_cache_hwparams(struct dwc3 *dwc) @@ -431,6 +496,12 @@ static int dwc3_phy_setup(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); + /* + * Make sure UX_EXIT_PX is cleared as that causes issues with some + * PHYs. Also, this bit is not supposed to be used in normal operation. + */ + reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; + /* * Above 1.94a, it is recommended to set DWC3_GUSB3PIPECTL_SUSPHY * to '0' during coreConsultant configuration. So default value @@ -714,21 +785,6 @@ static int dwc3_core_init(struct dwc3 *dwc) goto err4; } - switch (dwc->dr_mode) { - case USB_DR_MODE_PERIPHERAL: - dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_DEVICE); - break; - case USB_DR_MODE_HOST: - dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_HOST); - break; - case USB_DR_MODE_OTG: - dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_OTG); - break; - default: - dev_warn(dwc->dev, "Unsupported mode %d\n", dwc->dr_mode); - break; - } - /* * ENDXFER polling is available on version 3.10a and later of * the DWC_usb3 controller. It is NOT available in the @@ -846,6 +902,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) switch (dwc->dr_mode) { case USB_DR_MODE_PERIPHERAL: + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); ret = dwc3_gadget_init(dwc); if (ret) { if (ret != -EPROBE_DEFER) @@ -854,6 +911,7 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) } break; case USB_DR_MODE_HOST: + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST); ret = dwc3_host_init(dwc); if (ret) { if (ret != -EPROBE_DEFER) @@ -862,17 +920,11 @@ static int dwc3_core_init_mode(struct dwc3 *dwc) } break; case USB_DR_MODE_OTG: - ret = dwc3_host_init(dwc); - if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to initialize host\n"); - return ret; - } - - ret = dwc3_gadget_init(dwc); + INIT_WORK(&dwc->drd_work, __dwc3_set_mode); + ret = dwc3_drd_init(dwc); if (ret) { if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to initialize gadget\n"); + dev_err(dev, "failed to initialize dual-role\n"); return ret; } break; @@ -894,8 +946,7 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) dwc3_host_exit(dwc); break; case USB_DR_MODE_OTG: - dwc3_host_exit(dwc); - dwc3_gadget_exit(dwc); + dwc3_drd_exit(dwc); break; default: /* do nothing */ diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 2b9e4ca3c932e5691376dc9504d870804cccc695..981c77f5628e105731b41756b798c988b6c6692d 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -23,10 +23,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -39,9 +41,8 @@ /* Global constants */ #define DWC3_PULL_UP_TIMEOUT 500 /* ms */ -#define DWC3_ZLP_BUF_SIZE 1024 /* size of a superspeed bulk */ #define DWC3_BOUNCE_SIZE 1024 /* size of a superspeed bulk */ -#define DWC3_EP0_BOUNCE_SIZE 512 +#define DWC3_EP0_SETUP_SIZE 512 #define DWC3_ENDPOINTS_NUM 32 #define DWC3_XHCI_RESOURCES_NUM 2 @@ -66,7 +67,7 @@ #define DWC3_DEVICE_EVENT_OVERFLOW 11 #define DWC3_GEVNTCOUNT_MASK 0xfffc -#define DWC3_GEVNTCOUNT_EHB (1 << 31) +#define DWC3_GEVNTCOUNT_EHB BIT(31) #define DWC3_GSNPSID_MASK 0xffff0000 #define DWC3_GSNPSREV_MASK 0xffff @@ -116,20 +117,20 @@ #define DWC3_VER_NUMBER 0xc1a0 #define DWC3_VER_TYPE 0xc1a4 -#define DWC3_GUSB2PHYCFG(n) (0xc200 + (n * 0x04)) -#define DWC3_GUSB2I2CCTL(n) (0xc240 + (n * 0x04)) +#define DWC3_GUSB2PHYCFG(n) (0xc200 + ((n) * 0x04)) +#define DWC3_GUSB2I2CCTL(n) (0xc240 + ((n) * 0x04)) -#define DWC3_GUSB2PHYACC(n) (0xc280 + (n * 0x04)) +#define DWC3_GUSB2PHYACC(n) (0xc280 + ((n) * 0x04)) -#define DWC3_GUSB3PIPECTL(n) (0xc2c0 + (n * 0x04)) +#define DWC3_GUSB3PIPECTL(n) (0xc2c0 + ((n) * 0x04)) -#define DWC3_GTXFIFOSIZ(n) (0xc300 + (n * 0x04)) -#define DWC3_GRXFIFOSIZ(n) (0xc380 + (n * 0x04)) +#define DWC3_GTXFIFOSIZ(n) (0xc300 + ((n) * 0x04)) +#define DWC3_GRXFIFOSIZ(n) (0xc380 + ((n) * 0x04)) -#define DWC3_GEVNTADRLO(n) (0xc400 + (n * 0x10)) -#define DWC3_GEVNTADRHI(n) (0xc404 + (n * 0x10)) -#define DWC3_GEVNTSIZ(n) (0xc408 + (n * 0x10)) -#define DWC3_GEVNTCOUNT(n) (0xc40c + (n * 0x10)) +#define DWC3_GEVNTADRLO(n) (0xc400 + ((n) * 0x10)) +#define DWC3_GEVNTADRHI(n) (0xc404 + ((n) * 0x10)) +#define DWC3_GEVNTSIZ(n) (0xc408 + ((n) * 0x10)) +#define DWC3_GEVNTCOUNT(n) (0xc40c + ((n) * 0x10)) #define DWC3_GHWPARAMS8 0xc600 #define DWC3_GFLADJ 0xc630 @@ -143,13 +144,13 @@ #define DWC3_DGCMD 0xc714 #define DWC3_DALEPENA 0xc720 -#define DWC3_DEP_BASE(n) (0xc800 + (n * 0x10)) +#define DWC3_DEP_BASE(n) (0xc800 + ((n) * 0x10)) #define DWC3_DEPCMDPAR2 0x00 #define DWC3_DEPCMDPAR1 0x04 #define DWC3_DEPCMDPAR0 0x08 #define DWC3_DEPCMD 0x0c -#define DWC3_DEV_IMOD(n) (0xca00 + (n * 0x4)) +#define DWC3_DEV_IMOD(n) (0xca00 + ((n) * 0x4)) /* OTG Registers */ #define DWC3_OCFG 0xcc00 @@ -176,11 +177,11 @@ /* Global RX Threshold Configuration Register */ #define DWC3_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 19) #define DWC3_GRXTHRCFG_RXPKTCNT(n) (((n) & 0xf) << 24) -#define DWC3_GRXTHRCFG_PKTCNTSEL (1 << 29) +#define DWC3_GRXTHRCFG_PKTCNTSEL BIT(29) /* Global Configuration Register */ #define DWC3_GCTL_PWRDNSCALE(n) ((n) << 19) -#define DWC3_GCTL_U2RSTECN (1 << 16) +#define DWC3_GCTL_U2RSTECN BIT(16) #define DWC3_GCTL_RAMCLKSEL(x) (((x) & DWC3_GCTL_CLK_MASK) << 6) #define DWC3_GCTL_CLK_BUS (0) #define DWC3_GCTL_CLK_PIPE (1) @@ -193,24 +194,24 @@ #define DWC3_GCTL_PRTCAP_DEVICE 2 #define DWC3_GCTL_PRTCAP_OTG 3 -#define DWC3_GCTL_CORESOFTRESET (1 << 11) -#define DWC3_GCTL_SOFITPSYNC (1 << 10) +#define DWC3_GCTL_CORESOFTRESET BIT(11) +#define DWC3_GCTL_SOFITPSYNC BIT(10) #define DWC3_GCTL_SCALEDOWN(n) ((n) << 4) #define DWC3_GCTL_SCALEDOWN_MASK DWC3_GCTL_SCALEDOWN(3) -#define DWC3_GCTL_DISSCRAMBLE (1 << 3) -#define DWC3_GCTL_U2EXIT_LFPS (1 << 2) -#define DWC3_GCTL_GBLHIBERNATIONEN (1 << 1) -#define DWC3_GCTL_DSBLCLKGTNG (1 << 0) +#define DWC3_GCTL_DISSCRAMBLE BIT(3) +#define DWC3_GCTL_U2EXIT_LFPS BIT(2) +#define DWC3_GCTL_GBLHIBERNATIONEN BIT(1) +#define DWC3_GCTL_DSBLCLKGTNG BIT(0) /* Global User Control 1 Register */ -#define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW (1 << 24) +#define DWC3_GUCTL1_DEV_L1_EXIT_BY_HW BIT(24) /* Global USB2 PHY Configuration Register */ -#define DWC3_GUSB2PHYCFG_PHYSOFTRST (1 << 31) -#define DWC3_GUSB2PHYCFG_U2_FREECLK_EXISTS (1 << 30) -#define DWC3_GUSB2PHYCFG_SUSPHY (1 << 6) -#define DWC3_GUSB2PHYCFG_ULPI_UTMI (1 << 4) -#define DWC3_GUSB2PHYCFG_ENBLSLPM (1 << 8) +#define DWC3_GUSB2PHYCFG_PHYSOFTRST BIT(31) +#define DWC3_GUSB2PHYCFG_U2_FREECLK_EXISTS BIT(30) +#define DWC3_GUSB2PHYCFG_SUSPHY BIT(6) +#define DWC3_GUSB2PHYCFG_ULPI_UTMI BIT(4) +#define DWC3_GUSB2PHYCFG_ENBLSLPM BIT(8) #define DWC3_GUSB2PHYCFG_PHYIF(n) (n << 3) #define DWC3_GUSB2PHYCFG_PHYIF_MASK DWC3_GUSB2PHYCFG_PHYIF(1) #define DWC3_GUSB2PHYCFG_USBTRDTIM(n) (n << 10) @@ -221,25 +222,26 @@ #define UTMI_PHYIF_8_BIT 0 /* Global USB2 PHY Vendor Control Register */ -#define DWC3_GUSB2PHYACC_NEWREGREQ (1 << 25) -#define DWC3_GUSB2PHYACC_BUSY (1 << 23) -#define DWC3_GUSB2PHYACC_WRITE (1 << 22) +#define DWC3_GUSB2PHYACC_NEWREGREQ BIT(25) +#define DWC3_GUSB2PHYACC_BUSY BIT(23) +#define DWC3_GUSB2PHYACC_WRITE BIT(22) #define DWC3_GUSB2PHYACC_ADDR(n) (n << 16) #define DWC3_GUSB2PHYACC_EXTEND_ADDR(n) (n << 8) #define DWC3_GUSB2PHYACC_DATA(n) (n & 0xff) /* Global USB3 PIPE Control Register */ -#define DWC3_GUSB3PIPECTL_PHYSOFTRST (1 << 31) -#define DWC3_GUSB3PIPECTL_U2SSINP3OK (1 << 29) -#define DWC3_GUSB3PIPECTL_DISRXDETINP3 (1 << 28) -#define DWC3_GUSB3PIPECTL_REQP1P2P3 (1 << 24) +#define DWC3_GUSB3PIPECTL_PHYSOFTRST BIT(31) +#define DWC3_GUSB3PIPECTL_U2SSINP3OK BIT(29) +#define DWC3_GUSB3PIPECTL_DISRXDETINP3 BIT(28) +#define DWC3_GUSB3PIPECTL_UX_EXIT_PX BIT(27) +#define DWC3_GUSB3PIPECTL_REQP1P2P3 BIT(24) #define DWC3_GUSB3PIPECTL_DEP1P2P3(n) ((n) << 19) #define DWC3_GUSB3PIPECTL_DEP1P2P3_MASK DWC3_GUSB3PIPECTL_DEP1P2P3(7) #define DWC3_GUSB3PIPECTL_DEP1P2P3_EN DWC3_GUSB3PIPECTL_DEP1P2P3(1) -#define DWC3_GUSB3PIPECTL_DEPOCHANGE (1 << 18) -#define DWC3_GUSB3PIPECTL_SUSPHY (1 << 17) -#define DWC3_GUSB3PIPECTL_LFPSFILT (1 << 9) -#define DWC3_GUSB3PIPECTL_RX_DETOPOLL (1 << 8) +#define DWC3_GUSB3PIPECTL_DEPOCHANGE BIT(18) +#define DWC3_GUSB3PIPECTL_SUSPHY BIT(17) +#define DWC3_GUSB3PIPECTL_LFPSFILT BIT(9) +#define DWC3_GUSB3PIPECTL_RX_DETOPOLL BIT(8) #define DWC3_GUSB3PIPECTL_TX_DEEPH_MASK DWC3_GUSB3PIPECTL_TX_DEEPH(3) #define DWC3_GUSB3PIPECTL_TX_DEEPH(n) ((n) << 1) @@ -248,7 +250,7 @@ #define DWC3_GTXFIFOSIZ_TXFSTADDR(n) ((n) & 0xffff0000) /* Global Event Size Registers */ -#define DWC3_GEVNTSIZ_INTMASK (1 << 31) +#define DWC3_GEVNTSIZ_INTMASK BIT(31) #define DWC3_GEVNTSIZ_SIZE(n) ((n) & 0xffff) /* Global HWPARAMS0 Register */ @@ -289,18 +291,18 @@ #define DWC3_MAX_HIBER_SCRATCHBUFS 15 /* Global HWPARAMS6 Register */ -#define DWC3_GHWPARAMS6_EN_FPGA (1 << 7) +#define DWC3_GHWPARAMS6_EN_FPGA BIT(7) /* Global HWPARAMS7 Register */ #define DWC3_GHWPARAMS7_RAM1_DEPTH(n) ((n) & 0xffff) #define DWC3_GHWPARAMS7_RAM2_DEPTH(n) (((n) >> 16) & 0xffff) /* Global Frame Length Adjustment Register */ -#define DWC3_GFLADJ_30MHZ_SDBND_SEL (1 << 7) +#define DWC3_GFLADJ_30MHZ_SDBND_SEL BIT(7) #define DWC3_GFLADJ_30MHZ_MASK 0x3f /* Global User Control Register 2 */ -#define DWC3_GUCTL2_RST_ACTBITLATER (1 << 14) +#define DWC3_GUCTL2_RST_ACTBITLATER BIT(14) /* Device Configuration Register */ #define DWC3_DCFG_DEVADDR(addr) ((addr) << 3) @@ -310,23 +312,23 @@ #define DWC3_DCFG_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DCFG_SUPERSPEED (4 << 0) #define DWC3_DCFG_HIGHSPEED (0 << 0) -#define DWC3_DCFG_FULLSPEED (1 << 0) +#define DWC3_DCFG_FULLSPEED BIT(0) #define DWC3_DCFG_LOWSPEED (2 << 0) #define DWC3_DCFG_NUMP_SHIFT 17 #define DWC3_DCFG_NUMP(n) (((n) >> DWC3_DCFG_NUMP_SHIFT) & 0x1f) #define DWC3_DCFG_NUMP_MASK (0x1f << DWC3_DCFG_NUMP_SHIFT) -#define DWC3_DCFG_LPM_CAP (1 << 22) +#define DWC3_DCFG_LPM_CAP BIT(22) /* Device Control Register */ -#define DWC3_DCTL_RUN_STOP (1 << 31) -#define DWC3_DCTL_CSFTRST (1 << 30) -#define DWC3_DCTL_LSFTRST (1 << 29) +#define DWC3_DCTL_RUN_STOP BIT(31) +#define DWC3_DCTL_CSFTRST BIT(30) +#define DWC3_DCTL_LSFTRST BIT(29) #define DWC3_DCTL_HIRD_THRES_MASK (0x1f << 24) #define DWC3_DCTL_HIRD_THRES(n) ((n) << 24) -#define DWC3_DCTL_APPL1RES (1 << 23) +#define DWC3_DCTL_APPL1RES BIT(23) /* These apply for core versions 1.87a and earlier */ #define DWC3_DCTL_TRGTULST_MASK (0x0f << 17) @@ -341,15 +343,15 @@ #define DWC3_DCTL_LPM_ERRATA_MASK DWC3_DCTL_LPM_ERRATA(0xf) #define DWC3_DCTL_LPM_ERRATA(n) ((n) << 20) -#define DWC3_DCTL_KEEP_CONNECT (1 << 19) -#define DWC3_DCTL_L1_HIBER_EN (1 << 18) -#define DWC3_DCTL_CRS (1 << 17) -#define DWC3_DCTL_CSS (1 << 16) +#define DWC3_DCTL_KEEP_CONNECT BIT(19) +#define DWC3_DCTL_L1_HIBER_EN BIT(18) +#define DWC3_DCTL_CRS BIT(17) +#define DWC3_DCTL_CSS BIT(16) -#define DWC3_DCTL_INITU2ENA (1 << 12) -#define DWC3_DCTL_ACCEPTU2ENA (1 << 11) -#define DWC3_DCTL_INITU1ENA (1 << 10) -#define DWC3_DCTL_ACCEPTU1ENA (1 << 9) +#define DWC3_DCTL_INITU2ENA BIT(12) +#define DWC3_DCTL_ACCEPTU2ENA BIT(11) +#define DWC3_DCTL_INITU1ENA BIT(10) +#define DWC3_DCTL_ACCEPTU1ENA BIT(9) #define DWC3_DCTL_TSTCTRL_MASK (0xf << 1) #define DWC3_DCTL_ULSTCHNGREQ_MASK (0x0f << 5) @@ -364,36 +366,36 @@ #define DWC3_DCTL_ULSTCHNG_LOOPBACK (DWC3_DCTL_ULSTCHNGREQ(11)) /* Device Event Enable Register */ -#define DWC3_DEVTEN_VNDRDEVTSTRCVEDEN (1 << 12) -#define DWC3_DEVTEN_EVNTOVERFLOWEN (1 << 11) -#define DWC3_DEVTEN_CMDCMPLTEN (1 << 10) -#define DWC3_DEVTEN_ERRTICERREN (1 << 9) -#define DWC3_DEVTEN_SOFEN (1 << 7) -#define DWC3_DEVTEN_EOPFEN (1 << 6) -#define DWC3_DEVTEN_HIBERNATIONREQEVTEN (1 << 5) -#define DWC3_DEVTEN_WKUPEVTEN (1 << 4) -#define DWC3_DEVTEN_ULSTCNGEN (1 << 3) -#define DWC3_DEVTEN_CONNECTDONEEN (1 << 2) -#define DWC3_DEVTEN_USBRSTEN (1 << 1) -#define DWC3_DEVTEN_DISCONNEVTEN (1 << 0) +#define DWC3_DEVTEN_VNDRDEVTSTRCVEDEN BIT(12) +#define DWC3_DEVTEN_EVNTOVERFLOWEN BIT(11) +#define DWC3_DEVTEN_CMDCMPLTEN BIT(10) +#define DWC3_DEVTEN_ERRTICERREN BIT(9) +#define DWC3_DEVTEN_SOFEN BIT(7) +#define DWC3_DEVTEN_EOPFEN BIT(6) +#define DWC3_DEVTEN_HIBERNATIONREQEVTEN BIT(5) +#define DWC3_DEVTEN_WKUPEVTEN BIT(4) +#define DWC3_DEVTEN_ULSTCNGEN BIT(3) +#define DWC3_DEVTEN_CONNECTDONEEN BIT(2) +#define DWC3_DEVTEN_USBRSTEN BIT(1) +#define DWC3_DEVTEN_DISCONNEVTEN BIT(0) /* Device Status Register */ -#define DWC3_DSTS_DCNRD (1 << 29) +#define DWC3_DSTS_DCNRD BIT(29) /* This applies for core versions 1.87a and earlier */ -#define DWC3_DSTS_PWRUPREQ (1 << 24) +#define DWC3_DSTS_PWRUPREQ BIT(24) /* These apply for core versions 1.94a and later */ -#define DWC3_DSTS_RSS (1 << 25) -#define DWC3_DSTS_SSS (1 << 24) +#define DWC3_DSTS_RSS BIT(25) +#define DWC3_DSTS_SSS BIT(24) -#define DWC3_DSTS_COREIDLE (1 << 23) -#define DWC3_DSTS_DEVCTRLHLT (1 << 22) +#define DWC3_DSTS_COREIDLE BIT(23) +#define DWC3_DSTS_DEVCTRLHLT BIT(22) #define DWC3_DSTS_USBLNKST_MASK (0x0f << 18) #define DWC3_DSTS_USBLNKST(n) (((n) & DWC3_DSTS_USBLNKST_MASK) >> 18) -#define DWC3_DSTS_RXFIFOEMPTY (1 << 17) +#define DWC3_DSTS_RXFIFOEMPTY BIT(17) #define DWC3_DSTS_SOFFN_MASK (0x3fff << 3) #define DWC3_DSTS_SOFFN(n) (((n) & DWC3_DSTS_SOFFN_MASK) >> 3) @@ -403,7 +405,7 @@ #define DWC3_DSTS_SUPERSPEED_PLUS (5 << 0) /* DWC_usb31 only */ #define DWC3_DSTS_SUPERSPEED (4 << 0) #define DWC3_DSTS_HIGHSPEED (0 << 0) -#define DWC3_DSTS_FULLSPEED (1 << 0) +#define DWC3_DSTS_FULLSPEED BIT(0) #define DWC3_DSTS_LOWSPEED (2 << 0) /* Device Generic Command Register */ @@ -421,26 +423,26 @@ #define DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK 0x10 #define DWC3_DGCMD_STATUS(n) (((n) >> 12) & 0x0F) -#define DWC3_DGCMD_CMDACT (1 << 10) -#define DWC3_DGCMD_CMDIOC (1 << 8) +#define DWC3_DGCMD_CMDACT BIT(10) +#define DWC3_DGCMD_CMDIOC BIT(8) /* Device Generic Command Parameter Register */ -#define DWC3_DGCMDPAR_FORCE_LINKPM_ACCEPT (1 << 0) +#define DWC3_DGCMDPAR_FORCE_LINKPM_ACCEPT BIT(0) #define DWC3_DGCMDPAR_FIFO_NUM(n) ((n) << 0) #define DWC3_DGCMDPAR_RX_FIFO (0 << 5) -#define DWC3_DGCMDPAR_TX_FIFO (1 << 5) +#define DWC3_DGCMDPAR_TX_FIFO BIT(5) #define DWC3_DGCMDPAR_LOOPBACK_DIS (0 << 0) -#define DWC3_DGCMDPAR_LOOPBACK_ENA (1 << 0) +#define DWC3_DGCMDPAR_LOOPBACK_ENA BIT(0) /* Device Endpoint Command Register */ #define DWC3_DEPCMD_PARAM_SHIFT 16 #define DWC3_DEPCMD_PARAM(x) ((x) << DWC3_DEPCMD_PARAM_SHIFT) #define DWC3_DEPCMD_GET_RSC_IDX(x) (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f) #define DWC3_DEPCMD_STATUS(x) (((x) >> 12) & 0x0F) -#define DWC3_DEPCMD_HIPRI_FORCERM (1 << 11) -#define DWC3_DEPCMD_CLEARPENDIN (1 << 11) -#define DWC3_DEPCMD_CMDACT (1 << 10) -#define DWC3_DEPCMD_CMDIOC (1 << 8) +#define DWC3_DEPCMD_HIPRI_FORCERM BIT(11) +#define DWC3_DEPCMD_CLEARPENDIN BIT(11) +#define DWC3_DEPCMD_CMDACT BIT(10) +#define DWC3_DEPCMD_CMDIOC BIT(8) #define DWC3_DEPCMD_DEPSTARTCFG (0x09 << 0) #define DWC3_DEPCMD_ENDTRANSFER (0x08 << 0) @@ -458,7 +460,7 @@ #define DWC3_DEPCMD_CMD(x) ((x) & 0xf) /* The EP number goes 0..31 so ep0 is always out and ep1 is always in */ -#define DWC3_DALEPENA_EP(n) (1 << n) +#define DWC3_DALEPENA_EP(n) BIT(n) #define DWC3_DEPCMD_TYPE_CONTROL 0 #define DWC3_DEPCMD_TYPE_ISOC 1 @@ -500,8 +502,8 @@ struct dwc3_event_buffer { struct dwc3 *dwc; }; -#define DWC3_EP_FLAG_STALLED (1 << 0) -#define DWC3_EP_FLAG_WEDGED (1 << 1) +#define DWC3_EP_FLAG_STALLED BIT(0) +#define DWC3_EP_FLAG_WEDGED BIT(1) #define DWC3_EP_DIRECTION_TX true #define DWC3_EP_DIRECTION_RX false @@ -550,17 +552,17 @@ struct dwc3_ep { u32 saved_state; unsigned flags; -#define DWC3_EP_ENABLED (1 << 0) -#define DWC3_EP_STALL (1 << 1) -#define DWC3_EP_WEDGE (1 << 2) -#define DWC3_EP_BUSY (1 << 4) -#define DWC3_EP_PENDING_REQUEST (1 << 5) -#define DWC3_EP_MISSED_ISOC (1 << 6) -#define DWC3_EP_END_TRANSFER_PENDING (1 << 7) -#define DWC3_EP_TRANSFER_STARTED (1 << 8) +#define DWC3_EP_ENABLED BIT(0) +#define DWC3_EP_STALL BIT(1) +#define DWC3_EP_WEDGE BIT(2) +#define DWC3_EP_BUSY BIT(4) +#define DWC3_EP_PENDING_REQUEST BIT(5) +#define DWC3_EP_MISSED_ISOC BIT(6) +#define DWC3_EP_END_TRANSFER_PENDING BIT(7) +#define DWC3_EP_TRANSFER_STARTED BIT(8) /* This last one is specific to EP0 */ -#define DWC3_EP0_DIR_IN (1 << 31) +#define DWC3_EP0_DIR_IN BIT(31) /* * IMPORTANT: we *know* we have 256 TRBs in our @trb_pool, so we will @@ -638,13 +640,13 @@ enum dwc3_link_state { #define DWC3_TRB_STS_XFER_IN_PROG 4 /* TRB Control */ -#define DWC3_TRB_CTRL_HWO (1 << 0) -#define DWC3_TRB_CTRL_LST (1 << 1) -#define DWC3_TRB_CTRL_CHN (1 << 2) -#define DWC3_TRB_CTRL_CSP (1 << 3) +#define DWC3_TRB_CTRL_HWO BIT(0) +#define DWC3_TRB_CTRL_LST BIT(1) +#define DWC3_TRB_CTRL_CHN BIT(2) +#define DWC3_TRB_CTRL_CSP BIT(3) #define DWC3_TRB_CTRL_TRBCTL(n) (((n) & 0x3f) << 4) -#define DWC3_TRB_CTRL_ISP_IMI (1 << 10) -#define DWC3_TRB_CTRL_IOC (1 << 11) +#define DWC3_TRB_CTRL_ISP_IMI BIT(10) +#define DWC3_TRB_CTRL_IOC BIT(11) #define DWC3_TRB_CTRL_SID_SOFN(n) (((n) & 0xffff) << 14) #define DWC3_TRBCTL_TYPE(n) ((n) & (0x3f << 4)) @@ -746,6 +748,7 @@ struct dwc3_request { unsigned direction:1; unsigned mapped:1; unsigned started:1; + unsigned zero:1; }; /* @@ -758,15 +761,11 @@ struct dwc3_scratchpad_array { /** * struct dwc3 - representation of our controller - * @ctrl_req: usb control request which is used for ep0 + * @drd_work - workqueue used for role swapping * @ep0_trb: trb which is used for the ctrl_req - * @ep0_bounce: bounce buffer for ep0 - * @zlp_buf: used when request->zero is set * @setup_buf: used while precessing STD USB requests - * @ctrl_req_addr: dma address of ctrl_req * @ep0_trb: dma address of ep0_trb * @ep0_usb_req: dummy req used while handling STD USB requests - * @ep0_bounce_addr: dma address of ep0_bounce * @scratch_addr: dma address of scratchbuf * @ep0_in_setup: one control transfer is completed and enter setup phase * @lock: for synchronizing @@ -784,6 +783,10 @@ struct dwc3_scratchpad_array { * @maximum_speed: maximum speed requested (mainly for testing purposes) * @revision: revision register contents * @dr_mode: requested mode of operation + * @current_dr_role: current role of operation when in dual-role mode + * @desired_dr_role: desired role of operation when in dual-role mode + * @edev: extcon handle + * @edev_nb: extcon notifier * @hsphy_mode: UTMI phy mode, one of following: * - USBPHY_INTERFACE_MODE_UTMI * - USBPHY_INTERFACE_MODE_UTMIW @@ -799,8 +802,7 @@ struct dwc3_scratchpad_array { * @u2pel: parameter from Set SEL request. * @u1sel: parameter from Set SEL request. * @u1pel: parameter from Set SEL request. - * @num_out_eps: number of out endpoints - * @num_in_eps: number of in endpoints + * @num_eps: number of endpoints * @ep0_next_event: hold the next expected event * @ep0state: state of endpoint zero * @link_state: link state @@ -858,17 +860,13 @@ struct dwc3_scratchpad_array { * increments or 0 to disable. */ struct dwc3 { - struct usb_ctrlrequest *ctrl_req; + struct work_struct drd_work; struct dwc3_trb *ep0_trb; void *bounce; - void *ep0_bounce; - void *zlp_buf; void *scratchbuf; u8 *setup_buf; - dma_addr_t ctrl_req_addr; dma_addr_t ep0_trb_addr; dma_addr_t bounce_addr; - dma_addr_t ep0_bounce_addr; dma_addr_t scratch_addr; struct dwc3_request ep0_usb_req; struct completion ep0_in_setup; @@ -900,6 +898,10 @@ struct dwc3 { size_t regs_size; enum usb_dr_mode dr_mode; + u32 current_dr_role; + u32 desired_dr_role; + struct extcon_dev *edev; + struct notifier_block edev_nb; enum usb_phy_interface hsphy_mode; u32 fladj; @@ -960,8 +962,7 @@ struct dwc3 { u8 speed; - u8 num_out_eps; - u8 num_in_eps; + u8 num_eps; struct dwc3_hwparams hwparams; struct dentry *root; @@ -1010,7 +1011,7 @@ struct dwc3 { u16 imod_interval; }; -/* -------------------------------------------------------------------------- */ +#define work_to_dwc(w) (container_of((w), struct dwc3, drd_work)) /* -------------------------------------------------------------------------- */ @@ -1054,13 +1055,13 @@ struct dwc3_event_depevt { u32 status:4; /* Within XferNotReady */ -#define DEPEVT_STATUS_TRANSFER_ACTIVE (1 << 3) +#define DEPEVT_STATUS_TRANSFER_ACTIVE BIT(3) /* Within XferComplete */ -#define DEPEVT_STATUS_BUSERR (1 << 0) -#define DEPEVT_STATUS_SHORT (1 << 1) -#define DEPEVT_STATUS_IOC (1 << 2) -#define DEPEVT_STATUS_LST (1 << 3) +#define DEPEVT_STATUS_BUSERR BIT(0) +#define DEPEVT_STATUS_SHORT BIT(1) +#define DEPEVT_STATUS_IOC BIT(2) +#define DEPEVT_STATUS_LST BIT(3) /* Stream event only */ #define DEPEVT_STREAMEVT_FOUND 1 @@ -1221,6 +1222,16 @@ static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc, { return 0; } #endif +#if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) +int dwc3_drd_init(struct dwc3 *dwc); +void dwc3_drd_exit(struct dwc3 *dwc); +#else +static inline int dwc3_drd_init(struct dwc3 *dwc) +{ return 0; } +static inline void dwc3_drd_exit(struct dwc3 *dwc) +{ } +#endif + /* power management interface */ #if !IS_ENABLED(CONFIG_USB_DWC3_HOST) int dwc3_gadget_suspend(struct dwc3 *dwc); diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index eeed4ffd81312c9922c4fe2e4b83627a8c41059c..cb2d8d3f7f3d8d052bb3a94a924ce37100683bb4 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -124,6 +124,34 @@ dwc3_gadget_link_string(enum dwc3_link_state link_state) } } +/** + * dwc3_trb_type_string - returns TRB type as a string + * @type: the type of the TRB + */ +static inline const char *dwc3_trb_type_string(unsigned int type) +{ + switch (type) { + case DWC3_TRBCTL_NORMAL: + return "normal"; + case DWC3_TRBCTL_CONTROL_SETUP: + return "setup"; + case DWC3_TRBCTL_CONTROL_STATUS2: + return "status2"; + case DWC3_TRBCTL_CONTROL_STATUS3: + return "status3"; + case DWC3_TRBCTL_CONTROL_DATA: + return "data"; + case DWC3_TRBCTL_ISOCHRONOUS_FIRST: + return "isoc-first"; + case DWC3_TRBCTL_ISOCHRONOUS: + return "isoc"; + case DWC3_TRBCTL_LINK_TRB: + return "link"; + default: + return "UNKNOWN"; + } +} + static inline const char *dwc3_ep0_state_string(enum dwc3_ep0_state state) { switch (state) { diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c index 31926dda43c9638f96a3fa6a51a2f5ef6a6d2359..7be963dd8e3baecd4538a56c3179787cd9b85d29 100644 --- a/drivers/usb/dwc3/debugfs.c +++ b/drivers/usb/dwc3/debugfs.c @@ -300,7 +300,7 @@ static int dwc3_mode_show(struct seq_file *s, void *unused) seq_printf(s, "device\n"); break; case DWC3_GCTL_PRTCAP_OTG: - seq_printf(s, "OTG\n"); + seq_printf(s, "otg\n"); break; default: seq_printf(s, "UNKNOWN %08x\n", DWC3_GCTL_PRTCAP(reg)); @@ -319,7 +319,6 @@ static ssize_t dwc3_mode_write(struct file *file, { struct seq_file *s = file->private_data; struct dwc3 *dwc = s->private; - unsigned long flags; u32 mode = 0; char buf[32]; @@ -327,19 +326,16 @@ static ssize_t dwc3_mode_write(struct file *file, return -EFAULT; if (!strncmp(buf, "host", 4)) - mode |= DWC3_GCTL_PRTCAP_HOST; + mode = DWC3_GCTL_PRTCAP_HOST; if (!strncmp(buf, "device", 6)) - mode |= DWC3_GCTL_PRTCAP_DEVICE; + mode = DWC3_GCTL_PRTCAP_DEVICE; if (!strncmp(buf, "otg", 3)) - mode |= DWC3_GCTL_PRTCAP_OTG; + mode = DWC3_GCTL_PRTCAP_OTG; + + dwc3_set_mode(dwc, mode); - if (mode) { - spin_lock_irqsave(&dwc->lock, flags); - dwc3_set_mode(dwc, mode); - spin_unlock_irqrestore(&dwc->lock, flags); - } return count; } @@ -446,52 +442,7 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused) state = DWC3_DSTS_USBLNKST(reg); spin_unlock_irqrestore(&dwc->lock, flags); - switch (state) { - case DWC3_LINK_STATE_U0: - seq_printf(s, "U0\n"); - break; - case DWC3_LINK_STATE_U1: - seq_printf(s, "U1\n"); - break; - case DWC3_LINK_STATE_U2: - seq_printf(s, "U2\n"); - break; - case DWC3_LINK_STATE_U3: - seq_printf(s, "U3\n"); - break; - case DWC3_LINK_STATE_SS_DIS: - seq_printf(s, "SS.Disabled\n"); - break; - case DWC3_LINK_STATE_RX_DET: - seq_printf(s, "Rx.Detect\n"); - break; - case DWC3_LINK_STATE_SS_INACT: - seq_printf(s, "SS.Inactive\n"); - break; - case DWC3_LINK_STATE_POLL: - seq_printf(s, "Poll\n"); - break; - case DWC3_LINK_STATE_RECOV: - seq_printf(s, "Recovery\n"); - break; - case DWC3_LINK_STATE_HRESET: - seq_printf(s, "HRESET\n"); - break; - case DWC3_LINK_STATE_CMPLY: - seq_printf(s, "Compliance\n"); - break; - case DWC3_LINK_STATE_LPBK: - seq_printf(s, "Loopback\n"); - break; - case DWC3_LINK_STATE_RESET: - seq_printf(s, "Reset\n"); - break; - case DWC3_LINK_STATE_RESUME: - seq_printf(s, "Resume\n"); - break; - default: - seq_printf(s, "UNKNOWN %d\n", state); - } + seq_printf(s, "%s\n", dwc3_gadget_link_string(state)); return 0; } @@ -689,30 +640,6 @@ static int dwc3_ep_transfer_type_show(struct seq_file *s, void *unused) return 0; } -static inline const char *dwc3_trb_type_string(struct dwc3_trb *trb) -{ - switch (DWC3_TRBCTL_TYPE(trb->ctrl)) { - case DWC3_TRBCTL_NORMAL: - return "normal"; - case DWC3_TRBCTL_CONTROL_SETUP: - return "control-setup"; - case DWC3_TRBCTL_CONTROL_STATUS2: - return "control-status2"; - case DWC3_TRBCTL_CONTROL_STATUS3: - return "control-status3"; - case DWC3_TRBCTL_CONTROL_DATA: - return "control-data"; - case DWC3_TRBCTL_ISOCHRONOUS_FIRST: - return "isoc-first"; - case DWC3_TRBCTL_ISOCHRONOUS: - return "isoc"; - case DWC3_TRBCTL_LINK_TRB: - return "link"; - default: - return "UNKNOWN"; - } -} - static int dwc3_ep_trb_ring_show(struct seq_file *s, void *unused) { struct dwc3_ep *dep = s->private; @@ -733,10 +660,11 @@ static int dwc3_ep_trb_ring_show(struct seq_file *s, void *unused) for (i = 0; i < DWC3_TRB_NUM; i++) { struct dwc3_trb *trb = &dep->trb_pool[i]; + unsigned int type = DWC3_TRBCTL_TYPE(trb->ctrl); seq_printf(s, "%08x%08x,%d,%s,%d,%d,%d,%d,%d,%d\n", trb->bph, trb->bpl, trb->size, - dwc3_trb_type_string(trb), + dwc3_trb_type_string(type), !!(trb->ctrl & DWC3_TRB_CTRL_IOC), !!(trb->ctrl & DWC3_TRB_CTRL_ISP_IMI), !!(trb->ctrl & DWC3_TRB_CTRL_CSP), @@ -822,19 +750,8 @@ static void dwc3_debugfs_create_endpoint_dirs(struct dwc3 *dwc, { int i; - for (i = 0; i < dwc->num_in_eps; i++) { - u8 epnum = (i << 1) | 1; - struct dwc3_ep *dep = dwc->eps[epnum]; - - if (!dep) - continue; - - dwc3_debugfs_create_endpoint_dir(dep, parent); - } - - for (i = 0; i < dwc->num_out_eps; i++) { - u8 epnum = (i << 1); - struct dwc3_ep *dep = dwc->eps[epnum]; + for (i = 0; i < dwc->num_eps; i++) { + struct dwc3_ep *dep = dwc->eps[i]; if (!dep) continue; diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c new file mode 100644 index 0000000000000000000000000000000000000000..2765c51c7ef5fe0c0242ef0f09750fae2d727598 --- /dev/null +++ b/drivers/usb/dwc3/drd.c @@ -0,0 +1,85 @@ +/** + * drd.c - DesignWare USB3 DRD Controller Dual-role support + * + * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com + * + * Authors: Roger Quadros + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include "debug.h" +#include "core.h" +#include "gadget.h" + +static void dwc3_drd_update(struct dwc3 *dwc) +{ + int id; + + id = extcon_get_state(dwc->edev, EXTCON_USB_HOST); + if (id < 0) + id = 0; + + dwc3_set_mode(dwc, id ? + DWC3_GCTL_PRTCAP_HOST : + DWC3_GCTL_PRTCAP_DEVICE); +} + +static int dwc3_drd_notifier(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct dwc3 *dwc = container_of(nb, struct dwc3, edev_nb); + + dwc3_set_mode(dwc, event ? + DWC3_GCTL_PRTCAP_HOST : + DWC3_GCTL_PRTCAP_DEVICE); + + return NOTIFY_DONE; +} + +int dwc3_drd_init(struct dwc3 *dwc) +{ + int ret; + + if (dwc->dev->of_node) { + if (of_property_read_bool(dwc->dev->of_node, "extcon")) + dwc->edev = extcon_get_edev_by_phandle(dwc->dev, 0); + + if (IS_ERR(dwc->edev)) + return PTR_ERR(dwc->edev); + + dwc->edev_nb.notifier_call = dwc3_drd_notifier; + ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST, + &dwc->edev_nb); + if (ret < 0) { + dev_err(dwc->dev, "couldn't register cable notifier\n"); + return ret; + } + } + + dwc3_drd_update(dwc); + + return 0; +} + +void dwc3_drd_exit(struct dwc3 *dwc) +{ + extcon_unregister_notifier(dwc->edev, EXTCON_USB_HOST, + &dwc->edev_nb); + + dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_DEVICE); + flush_work(&dwc->drd_work); + dwc3_gadget_exit(dwc); +} diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index 1515d45ebcec0dd412b52550930053ab9a9a2fe9..98f74ff66120936bad7bb166c8cd2c5fb040b460 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -147,53 +147,53 @@ static int dwc3_exynos_probe(struct platform_device *pdev) exynos->vdd33 = devm_regulator_get(dev, "vdd33"); if (IS_ERR(exynos->vdd33)) { ret = PTR_ERR(exynos->vdd33); - goto err2; + goto vdd33_err; } ret = regulator_enable(exynos->vdd33); if (ret) { dev_err(dev, "Failed to enable VDD33 supply\n"); - goto err2; + goto vdd33_err; } exynos->vdd10 = devm_regulator_get(dev, "vdd10"); if (IS_ERR(exynos->vdd10)) { ret = PTR_ERR(exynos->vdd10); - goto err3; + goto vdd10_err; } ret = regulator_enable(exynos->vdd10); if (ret) { dev_err(dev, "Failed to enable VDD10 supply\n"); - goto err3; + goto vdd10_err; } ret = dwc3_exynos_register_phys(exynos); if (ret) { dev_err(dev, "couldn't register PHYs\n"); - goto err4; + goto phys_err; } if (node) { ret = of_platform_populate(node, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to add dwc3 core\n"); - goto err5; + goto populate_err; } } else { dev_err(dev, "no device node, failed to add dwc3 core\n"); ret = -ENODEV; - goto err5; + goto populate_err; } return 0; -err5: +populate_err: platform_device_unregister(exynos->usb2_phy); platform_device_unregister(exynos->usb3_phy); -err4: +phys_err: regulator_disable(exynos->vdd10); -err3: +vdd10_err: regulator_disable(exynos->vdd33); -err2: +vdd33_err: clk_disable_unprepare(exynos->axius_clk); axius_clk_err: clk_disable_unprepare(exynos->susp_clk); diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c index 72664700b8a25c7d9bcee89c16c68fa6b5792565..12ee23f53cdde17ea924344ff7e84d88cf9881cb 100644 --- a/drivers/usb/dwc3/dwc3-keystone.c +++ b/drivers/usb/dwc3/dwc3-keystone.c @@ -107,6 +107,10 @@ static int kdwc3_probe(struct platform_device *pdev) return PTR_ERR(kdwc->usbss); kdwc->clk = devm_clk_get(kdwc->dev, "usb"); + if (IS_ERR(kdwc->clk)) { + dev_err(kdwc->dev, "unable to get usb clock\n"); + return PTR_ERR(kdwc->clk); + } error = clk_prepare_enable(kdwc->clk); if (error < 0) { diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index f8d0747810e78d7cc7930fed1b71cfc9c5aeb048..98926504b55b5b425eb3c9dce66e889a524d2152 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -79,40 +79,40 @@ #define USBOTGSS_DEBUG_OFFSET 0x0600 /* SYSCONFIG REGISTER */ -#define USBOTGSS_SYSCONFIG_DMADISABLE (1 << 16) +#define USBOTGSS_SYSCONFIG_DMADISABLE BIT(16) /* IRQ_EOI REGISTER */ -#define USBOTGSS_IRQ_EOI_LINE_NUMBER (1 << 0) +#define USBOTGSS_IRQ_EOI_LINE_NUMBER BIT(0) /* IRQS0 BITS */ -#define USBOTGSS_IRQO_COREIRQ_ST (1 << 0) +#define USBOTGSS_IRQO_COREIRQ_ST BIT(0) /* IRQMISC BITS */ -#define USBOTGSS_IRQMISC_DMADISABLECLR (1 << 17) -#define USBOTGSS_IRQMISC_OEVT (1 << 16) -#define USBOTGSS_IRQMISC_DRVVBUS_RISE (1 << 13) -#define USBOTGSS_IRQMISC_CHRGVBUS_RISE (1 << 12) -#define USBOTGSS_IRQMISC_DISCHRGVBUS_RISE (1 << 11) -#define USBOTGSS_IRQMISC_IDPULLUP_RISE (1 << 8) -#define USBOTGSS_IRQMISC_DRVVBUS_FALL (1 << 5) -#define USBOTGSS_IRQMISC_CHRGVBUS_FALL (1 << 4) -#define USBOTGSS_IRQMISC_DISCHRGVBUS_FALL (1 << 3) -#define USBOTGSS_IRQMISC_IDPULLUP_FALL (1 << 0) +#define USBOTGSS_IRQMISC_DMADISABLECLR BIT(17) +#define USBOTGSS_IRQMISC_OEVT BIT(16) +#define USBOTGSS_IRQMISC_DRVVBUS_RISE BIT(13) +#define USBOTGSS_IRQMISC_CHRGVBUS_RISE BIT(12) +#define USBOTGSS_IRQMISC_DISCHRGVBUS_RISE BIT(11) +#define USBOTGSS_IRQMISC_IDPULLUP_RISE BIT(8) +#define USBOTGSS_IRQMISC_DRVVBUS_FALL BIT(5) +#define USBOTGSS_IRQMISC_CHRGVBUS_FALL BIT(4) +#define USBOTGSS_IRQMISC_DISCHRGVBUS_FALL BIT(3) +#define USBOTGSS_IRQMISC_IDPULLUP_FALL BIT(0) /* UTMI_OTG_STATUS REGISTER */ -#define USBOTGSS_UTMI_OTG_STATUS_DRVVBUS (1 << 5) -#define USBOTGSS_UTMI_OTG_STATUS_CHRGVBUS (1 << 4) -#define USBOTGSS_UTMI_OTG_STATUS_DISCHRGVBUS (1 << 3) -#define USBOTGSS_UTMI_OTG_STATUS_IDPULLUP (1 << 0) +#define USBOTGSS_UTMI_OTG_STATUS_DRVVBUS BIT(5) +#define USBOTGSS_UTMI_OTG_STATUS_CHRGVBUS BIT(4) +#define USBOTGSS_UTMI_OTG_STATUS_DISCHRGVBUS BIT(3) +#define USBOTGSS_UTMI_OTG_STATUS_IDPULLUP BIT(0) /* UTMI_OTG_CTRL REGISTER */ -#define USBOTGSS_UTMI_OTG_CTRL_SW_MODE (1 << 31) -#define USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT (1 << 9) -#define USBOTGSS_UTMI_OTG_CTRL_TXBITSTUFFENABLE (1 << 8) -#define USBOTGSS_UTMI_OTG_CTRL_IDDIG (1 << 4) -#define USBOTGSS_UTMI_OTG_CTRL_SESSEND (1 << 3) -#define USBOTGSS_UTMI_OTG_CTRL_SESSVALID (1 << 2) -#define USBOTGSS_UTMI_OTG_CTRL_VBUSVALID (1 << 1) +#define USBOTGSS_UTMI_OTG_CTRL_SW_MODE BIT(31) +#define USBOTGSS_UTMI_OTG_CTRL_POWERPRESENT BIT(9) +#define USBOTGSS_UTMI_OTG_CTRL_TXBITSTUFFENABLE BIT(8) +#define USBOTGSS_UTMI_OTG_CTRL_IDDIG BIT(4) +#define USBOTGSS_UTMI_OTG_CTRL_SESSEND BIT(3) +#define USBOTGSS_UTMI_OTG_CTRL_SESSVALID BIT(2) +#define USBOTGSS_UTMI_OTG_CTRL_VBUSVALID BIT(1) struct dwc3_omap { struct device *dev; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index cce0a220b6b0437f23102da3fa56f4f7373f9082..84a2cebfc712023182dbb4622cbc355b3545310d 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -39,6 +39,8 @@ #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 #define PCI_DEVICE_ID_INTEL_GLK 0x31aa +#define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee +#define PCI_DEVICE_ID_INTEL_CNPH 0xa36e #define PCI_INTEL_BXT_DSM_UUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 @@ -125,8 +127,10 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) if (pdev->device == PCI_DEVICE_ID_INTEL_BYT) { struct gpio_desc *gpio; - acpi_dev_add_driver_gpios(ACPI_COMPANION(&pdev->dev), + ret = devm_acpi_dev_add_driver_gpios(&pdev->dev, acpi_dwc3_byt_gpios); + if (ret) + dev_dbg(&pdev->dev, "failed to add mapping table\n"); /* * These GPIOs will turn on the USB2 PHY. Note that we have to @@ -242,7 +246,6 @@ static void dwc3_pci_remove(struct pci_dev *pci) device_init_wakeup(&pci->dev, false); pm_runtime_get(&pci->dev); - acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pci->dev)); platform_device_unregister(dwc->dwc3); } @@ -269,6 +272,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPLP), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPH), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index e689cede9b0e526ed4bc98e02d5e7b6294ce6d1d..a78c78e7a8c3c18ac3d50f1b4dc18ec9517ae248 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -39,14 +39,13 @@ static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep); static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req); -static void dwc3_ep0_prepare_one_trb(struct dwc3 *dwc, u8 epnum, +static void dwc3_ep0_prepare_one_trb(struct dwc3_ep *dep, dma_addr_t buf_dma, u32 len, u32 type, bool chain) { struct dwc3_trb *trb; - struct dwc3_ep *dep; - - dep = dwc->eps[epnum]; + struct dwc3 *dwc; + dwc = dep->dwc; trb = &dwc->ep0_trb[dep->trb_enqueue]; if (chain) @@ -69,16 +68,17 @@ static void dwc3_ep0_prepare_one_trb(struct dwc3 *dwc, u8 epnum, trace_dwc3_prepare_trb(dep, trb); } -static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum) +static int dwc3_ep0_start_trans(struct dwc3_ep *dep) { struct dwc3_gadget_ep_cmd_params params; - struct dwc3_ep *dep; + struct dwc3 *dwc; int ret; - dep = dwc->eps[epnum]; if (dep->flags & DWC3_EP_BUSY) return 0; + dwc = dep->dwc; + memset(¶ms, 0, sizeof(params)); params.param0 = upper_32_bits(dwc->ep0_trb_addr); params.param1 = lower_32_bits(dwc->ep0_trb_addr); @@ -279,13 +279,15 @@ int dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value) void dwc3_ep0_out_start(struct dwc3 *dwc) { + struct dwc3_ep *dep; int ret; complete(&dwc->ep0_in_setup); - dwc3_ep0_prepare_one_trb(dwc, 0, dwc->ctrl_req_addr, 8, + dep = dwc->eps[0]; + dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 8, DWC3_TRBCTL_CONTROL_SETUP, false); - ret = dwc3_ep0_start_trans(dwc, 0); + ret = dwc3_ep0_start_trans(dep); WARN_ON(ret < 0); } @@ -794,7 +796,7 @@ static int dwc3_ep0_std_request(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) static void dwc3_ep0_inspect_setup(struct dwc3 *dwc, const struct dwc3_event_depevt *event) { - struct usb_ctrlrequest *ctrl = dwc->ctrl_req; + struct usb_ctrlrequest *ctrl = (void *) dwc->ep0_trb; int ret = -EINVAL; u32 len; @@ -834,7 +836,6 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, struct usb_request *ur; struct dwc3_trb *trb; struct dwc3_ep *ep0; - unsigned transfer_size = 0; unsigned maxp; unsigned remaining_ur_length; void *buf; @@ -847,9 +848,7 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, ep0 = dwc->eps[0]; dwc->ep0_next_event = DWC3_EP0_NRDY_STATUS; - trb = dwc->ep0_trb; - trace_dwc3_complete_trb(ep0, trb); r = next_request(&ep0->pending_list); @@ -870,58 +869,23 @@ static void dwc3_ep0_complete_data(struct dwc3 *dwc, remaining_ur_length = ur->length; length = trb->size & DWC3_TRB_SIZE_MASK; - maxp = ep0->endpoint.maxpacket; - - if (dwc->ep0_bounced) { - /* - * Handle the first TRB before handling the bounce buffer if - * the request length is greater than the bounce buffer size - */ - if (ur->length > DWC3_EP0_BOUNCE_SIZE) { - transfer_size = ALIGN(ur->length - maxp, maxp); - transferred = transfer_size - length; - buf = (u8 *)buf + transferred; - ur->actual += transferred; - remaining_ur_length -= transferred; - - trb++; - length = trb->size & DWC3_TRB_SIZE_MASK; - - ep0->trb_enqueue = 0; - } - - transfer_size = roundup((ur->length - transfer_size), - maxp); - - transferred = min_t(u32, remaining_ur_length, - transfer_size - length); - memcpy(buf, dwc->ep0_bounce, transferred); - } else { - transferred = ur->length - length; - } - + transferred = ur->length - length; ur->actual += transferred; - if ((epnum & 1) && ur->actual < ur->length) { - /* for some reason we did not get everything out */ + if ((IS_ALIGNED(ur->length, ep0->endpoint.maxpacket) && + ur->length && ur->zero) || dwc->ep0_bounced) { + trb++; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + trace_dwc3_complete_trb(ep0, trb); + ep0->trb_enqueue = 0; + dwc->ep0_bounced = false; + } + if ((epnum & 1) && ur->actual < ur->length) dwc3_ep0_stall_and_restart(dwc); - } else { + else dwc3_gadget_giveback(ep0, r, 0); - - if (IS_ALIGNED(ur->length, ep0->endpoint.maxpacket) && - ur->length && ur->zero) { - int ret; - - dwc->ep0_next_event = DWC3_EP0_COMPLETE; - - dwc3_ep0_prepare_one_trb(dwc, epnum, dwc->ctrl_req_addr, - 0, DWC3_TRBCTL_CONTROL_DATA, false); - ret = dwc3_ep0_start_trans(dwc, epnum); - WARN_ON(ret < 0); - } - } } static void dwc3_ep0_complete_status(struct dwc3 *dwc, @@ -997,14 +961,13 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->direction = !!dep->number; if (req->request.length == 0) { - dwc3_ep0_prepare_one_trb(dwc, dep->number, - dwc->ctrl_req_addr, 0, + dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 0, DWC3_TRBCTL_CONTROL_DATA, false); - ret = dwc3_ep0_start_trans(dwc, dep->number); + ret = dwc3_ep0_start_trans(dep); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) && (dep->number == 0)) { - u32 transfer_size = 0; u32 maxpacket; + u32 rem; ret = usb_gadget_map_request_by_dev(dwc->sysdev, &req->request, dep->number); @@ -1012,36 +975,55 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, return; maxpacket = dep->endpoint.maxpacket; + rem = req->request.length % maxpacket; + dwc->ep0_bounced = true; - if (req->request.length > DWC3_EP0_BOUNCE_SIZE) { - transfer_size = ALIGN(req->request.length - maxpacket, - maxpacket); - dwc3_ep0_prepare_one_trb(dwc, dep->number, - req->request.dma, - transfer_size, - DWC3_TRBCTL_CONTROL_DATA, - true); - } - - transfer_size = roundup((req->request.length - transfer_size), - maxpacket); + /* prepare normal TRB */ + dwc3_ep0_prepare_one_trb(dep, req->request.dma, + req->request.length, + DWC3_TRBCTL_CONTROL_DATA, + true); + + /* Now prepare one extra TRB to align transfer size */ + dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, + maxpacket - rem, + DWC3_TRBCTL_CONTROL_DATA, + false); + ret = dwc3_ep0_start_trans(dep); + } else if (IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) && + req->request.length && req->request.zero) { + u32 maxpacket; + u32 rem; - dwc->ep0_bounced = true; + ret = usb_gadget_map_request_by_dev(dwc->sysdev, + &req->request, dep->number); + if (ret) + return; - dwc3_ep0_prepare_one_trb(dwc, dep->number, - dwc->ep0_bounce_addr, transfer_size, - DWC3_TRBCTL_CONTROL_DATA, false); - ret = dwc3_ep0_start_trans(dwc, dep->number); + maxpacket = dep->endpoint.maxpacket; + rem = req->request.length % maxpacket; + + /* prepare normal TRB */ + dwc3_ep0_prepare_one_trb(dep, req->request.dma, + req->request.length, + DWC3_TRBCTL_CONTROL_DATA, + true); + + /* Now prepare one extra TRB to align transfer size */ + dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, + 0, DWC3_TRBCTL_CONTROL_DATA, + false); + ret = dwc3_ep0_start_trans(dep); } else { ret = usb_gadget_map_request_by_dev(dwc->sysdev, &req->request, dep->number); if (ret) return; - dwc3_ep0_prepare_one_trb(dwc, dep->number, req->request.dma, + dwc3_ep0_prepare_one_trb(dep, req->request.dma, req->request.length, DWC3_TRBCTL_CONTROL_DATA, false); - ret = dwc3_ep0_start_trans(dwc, dep->number); + ret = dwc3_ep0_start_trans(dep); } WARN_ON(ret < 0); @@ -1055,9 +1037,8 @@ static int dwc3_ep0_start_control_status(struct dwc3_ep *dep) type = dwc->three_stage_setup ? DWC3_TRBCTL_CONTROL_STATUS3 : DWC3_TRBCTL_CONTROL_STATUS2; - dwc3_ep0_prepare_one_trb(dwc, dep->number, - dwc->ctrl_req_addr, 0, type, false); - return dwc3_ep0_start_trans(dwc, dep->number); + dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 0, type, false); + return dwc3_ep0_start_trans(dep); } static void __dwc3_ep0_do_control_status(struct dwc3 *dwc, struct dwc3_ep *dep) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 79e7a3480d51b07abf3988a51f1790f6caec7ca3..aea9a5b948b4beda91988152b80ff666e8890b26 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -171,7 +171,6 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, int status) { struct dwc3 *dwc = dep->dwc; - unsigned int unmap_after_complete = false; req->started = false; list_del(&req->list); @@ -181,19 +180,8 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - /* - * NOTICE we don't want to unmap before calling ->complete() if we're - * dealing with a bounced ep0 request. If we unmap it here, we would end - * up overwritting the contents of req->buf and this could confuse the - * gadget driver. - */ - if (dwc->ep0_bounced && dep->number <= 1) { - dwc->ep0_bounced = false; - unmap_after_complete = true; - } else { - usb_gadget_unmap_request_by_dev(dwc->sysdev, - &req->request, req->direction); - } + usb_gadget_unmap_request_by_dev(dwc->sysdev, + &req->request, req->direction); trace_dwc3_gadget_giveback(req); @@ -201,10 +189,6 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, usb_gadget_giveback_request(&dep->endpoint, &req->request); spin_lock(&dwc->lock); - if (unmap_after_complete) - usb_gadget_unmap_request_by_dev(dwc->sysdev, - &req->request, req->direction); - if (dep->number > 1) pm_runtime_put(dwc->dev); } @@ -1060,6 +1044,22 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep, false, 0, req->request.stream_id, req->request.short_not_ok, req->request.no_interrupt); + } else if (req->request.zero && req->request.length && + (IS_ALIGNED(req->request.length,dep->endpoint.maxpacket))) { + struct dwc3 *dwc = dep->dwc; + struct dwc3_trb *trb; + + req->zero = true; + + /* prepare normal TRB */ + dwc3_prepare_one_trb(dep, req, true, 0); + + /* Now prepare one extra TRB to handle ZLP */ + trb = &dep->trb_pool[dep->trb_enqueue]; + __dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, 0, + false, 0, req->request.stream_id, + req->request.short_not_ok, + req->request.no_interrupt); } else { dwc3_prepare_one_trb(dep, req, false, 0); } @@ -1184,8 +1184,11 @@ static void __dwc3_gadget_start_isoc(struct dwc3 *dwc, return; } - /* 4 micro frames in the future */ - uf = cur_uf + dep->interval * 4; + /* + * Schedule the first trb for one interval in the future or at + * least 4 microframes. + */ + uf = cur_uf + max_t(u32, 4, dep->interval); __dwc3_gadget_kick_transfer(dep, uf); } @@ -1258,45 +1261,30 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) __dwc3_gadget_start_isoc(dwc, dep, cur_uf); dep->flags &= ~DWC3_EP_PENDING_REQUEST; } + return 0; } - return 0; + + if ((dep->flags & DWC3_EP_BUSY) && + !(dep->flags & DWC3_EP_MISSED_ISOC)) { + WARN_ON_ONCE(!dep->resource_index); + ret = __dwc3_gadget_kick_transfer(dep, + dep->resource_index); + } + + goto out; } if (!dwc3_calc_trbs_left(dep)) return 0; ret = __dwc3_gadget_kick_transfer(dep, 0); +out: if (ret == -EBUSY) ret = 0; return ret; } -static void __dwc3_gadget_ep_zlp_complete(struct usb_ep *ep, - struct usb_request *request) -{ - dwc3_gadget_ep_free_request(ep, request); -} - -static int __dwc3_gadget_ep_queue_zlp(struct dwc3 *dwc, struct dwc3_ep *dep) -{ - struct dwc3_request *req; - struct usb_request *request; - struct usb_ep *ep = &dep->endpoint; - - request = dwc3_gadget_ep_alloc_request(ep, GFP_ATOMIC); - if (!request) - return -ENOMEM; - - request->length = 0; - request->buf = dwc->zlp_buf; - request->complete = __dwc3_gadget_ep_zlp_complete; - - req = to_dwc3_request(request); - - return __dwc3_gadget_ep_queue(dep, req); -} - static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request, gfp_t gfp_flags) { @@ -1310,17 +1298,6 @@ static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request, spin_lock_irqsave(&dwc->lock, flags); ret = __dwc3_gadget_ep_queue(dep, req); - - /* - * Okay, here's the thing, if gadget driver has requested for a ZLP by - * setting request->zero, instead of doing magic, we will just queue an - * extra usb_request ourselves so that it gets handled the same way as - * any other request. - */ - if (ret == 0 && request->zero && request->length && - (request->length % ep->maxpacket == 0)) - ret = __dwc3_gadget_ep_queue_zlp(dwc, dep); - spin_unlock_irqrestore(&dwc->lock, flags); return ret; @@ -1400,7 +1377,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, dwc3_ep_inc_deq(dep); } - if (r->unaligned) { + if (r->unaligned || r->zero) { trb = r->trb + r->num_pending_sgs + 1; trb->ctrl &= ~DWC3_TRB_CTRL_HWO; dwc3_ep_inc_deq(dep); @@ -1411,7 +1388,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, trb->ctrl &= ~DWC3_TRB_CTRL_HWO; dwc3_ep_inc_deq(dep); - if (r->unaligned) { + if (r->unaligned || r->zero) { trb = r->trb + 1; trb->ctrl &= ~DWC3_TRB_CTRL_HWO; dwc3_ep_inc_deq(dep); @@ -2006,14 +1983,15 @@ static const struct usb_gadget_ops dwc3_gadget_ops = { /* -------------------------------------------------------------------------- */ -static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc, - u8 num, u32 direction) +static int dwc3_gadget_init_endpoints(struct dwc3 *dwc, u8 num) { struct dwc3_ep *dep; - u8 i; + u8 epnum; + + INIT_LIST_HEAD(&dwc->gadget.ep_list); - for (i = 0; i < num; i++) { - u8 epnum = (i << 1) | (direction ? 1 : 0); + for (epnum = 0; epnum < num; epnum++) { + bool direction = epnum & 1; dep = kzalloc(sizeof(*dep), GFP_KERNEL); if (!dep) @@ -2021,12 +1999,12 @@ static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc, dep->dwc = dwc; dep->number = epnum; - dep->direction = !!direction; + dep->direction = direction; dep->regs = dwc->regs + DWC3_DEP_BASE(epnum); dwc->eps[epnum] = dep; snprintf(dep->name, sizeof(dep->name), "ep%d%s", epnum >> 1, - (epnum & 1) ? "in" : "out"); + direction ? "in" : "out"); dep->endpoint.name = dep->name; @@ -2053,7 +2031,7 @@ static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc, /* MDWIDTH is represented in bits, we need it in bytes */ mdwidth /= 8; - size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(i)); + size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(epnum >> 1)); size = DWC3_GTXFIFOSIZ_TXFDEF(size); /* FIFO Depth is in MDWDITH bytes. Multiply */ @@ -2103,7 +2081,7 @@ static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc, dep->endpoint.caps.type_int = true; } - dep->endpoint.caps.dir_in = !!direction; + dep->endpoint.caps.dir_in = direction; dep->endpoint.caps.dir_out = !direction; INIT_LIST_HEAD(&dep->pending_list); @@ -2113,27 +2091,6 @@ static int dwc3_gadget_init_hw_endpoints(struct dwc3 *dwc, return 0; } -static int dwc3_gadget_init_endpoints(struct dwc3 *dwc) -{ - int ret; - - INIT_LIST_HEAD(&dwc->gadget.ep_list); - - ret = dwc3_gadget_init_hw_endpoints(dwc, dwc->num_out_eps, 0); - if (ret < 0) { - dev_err(dwc->dev, "failed to initialize OUT endpoints\n"); - return ret; - } - - ret = dwc3_gadget_init_hw_endpoints(dwc, dwc->num_in_eps, 1); - if (ret < 0) { - dev_err(dwc->dev, "failed to initialize IN endpoints\n"); - return ret; - } - - return 0; -} - static void dwc3_gadget_free_endpoints(struct dwc3 *dwc) { struct dwc3_ep *dep; @@ -2197,7 +2154,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, * with one TRB pending in the ring. We need to manually clear HWO bit * from that TRB. */ - if (req->unaligned && (trb->ctrl & DWC3_TRB_CTRL_HWO)) { + if ((req->zero || req->unaligned) && (trb->ctrl & DWC3_TRB_CTRL_HWO)) { trb->ctrl &= ~DWC3_TRB_CTRL_HWO; return 1; } @@ -2291,11 +2248,12 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, event, status, chain); } - if (req->unaligned) { + if (req->unaligned || req->zero) { trb = &dep->trb_pool[dep->trb_dequeue]; ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, event, status, false); req->unaligned = false; + req->zero = false; } req->request.actual = length - req->remaining; @@ -3078,6 +3036,15 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) return IRQ_HANDLED; } + /* + * With PCIe legacy interrupt, test shows that top-half irq handler can + * be called again after HW interrupt deassertion. Check if bottom-half + * irq event handler completes before caching new event to prevent + * losing events. + */ + if (evt->flags & DWC3_EVENT_PENDING) + return IRQ_HANDLED; + count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0)); count &= DWC3_GEVNTCOUNT_MASK; if (!count) @@ -3161,49 +3128,26 @@ int dwc3_gadget_init(struct dwc3 *dwc) dwc->irq_gadget = irq; - dwc->ctrl_req = dma_alloc_coherent(dwc->sysdev, sizeof(*dwc->ctrl_req), - &dwc->ctrl_req_addr, GFP_KERNEL); - if (!dwc->ctrl_req) { - dev_err(dwc->dev, "failed to allocate ctrl request\n"); - ret = -ENOMEM; - goto err0; - } - dwc->ep0_trb = dma_alloc_coherent(dwc->sysdev, sizeof(*dwc->ep0_trb) * 2, &dwc->ep0_trb_addr, GFP_KERNEL); if (!dwc->ep0_trb) { dev_err(dwc->dev, "failed to allocate ep0 trb\n"); ret = -ENOMEM; - goto err1; + goto err0; } - dwc->setup_buf = kzalloc(DWC3_EP0_BOUNCE_SIZE, GFP_KERNEL); + dwc->setup_buf = kzalloc(DWC3_EP0_SETUP_SIZE, GFP_KERNEL); if (!dwc->setup_buf) { ret = -ENOMEM; - goto err2; - } - - dwc->ep0_bounce = dma_alloc_coherent(dwc->sysdev, - DWC3_EP0_BOUNCE_SIZE, &dwc->ep0_bounce_addr, - GFP_KERNEL); - if (!dwc->ep0_bounce) { - dev_err(dwc->dev, "failed to allocate ep0 bounce buffer\n"); - ret = -ENOMEM; - goto err3; - } - - dwc->zlp_buf = kzalloc(DWC3_ZLP_BUF_SIZE, GFP_KERNEL); - if (!dwc->zlp_buf) { - ret = -ENOMEM; - goto err4; + goto err1; } dwc->bounce = dma_alloc_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, &dwc->bounce_addr, GFP_KERNEL); if (!dwc->bounce) { ret = -ENOMEM; - goto err5; + goto err2; } init_completion(&dwc->ep0_in_setup); @@ -3241,39 +3185,31 @@ int dwc3_gadget_init(struct dwc3 *dwc) * sure we're starting from a well known location. */ - ret = dwc3_gadget_init_endpoints(dwc); + ret = dwc3_gadget_init_endpoints(dwc, dwc->num_eps); if (ret) - goto err6; + goto err3; ret = usb_add_gadget_udc(dwc->dev, &dwc->gadget); if (ret) { dev_err(dwc->dev, "failed to register udc\n"); - goto err6; + goto err4; } return 0; -err6: - dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce, - dwc->bounce_addr); - -err5: - kfree(dwc->zlp_buf); err4: dwc3_gadget_free_endpoints(dwc); - dma_free_coherent(dwc->sysdev, DWC3_EP0_BOUNCE_SIZE, - dwc->ep0_bounce, dwc->ep0_bounce_addr); err3: - kfree(dwc->setup_buf); + dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce, + dwc->bounce_addr); err2: - dma_free_coherent(dwc->sysdev, sizeof(*dwc->ep0_trb) * 2, - dwc->ep0_trb, dwc->ep0_trb_addr); + kfree(dwc->setup_buf); err1: - dma_free_coherent(dwc->sysdev, sizeof(*dwc->ctrl_req), - dwc->ctrl_req, dwc->ctrl_req_addr); + dma_free_coherent(dwc->sysdev, sizeof(*dwc->ep0_trb) * 2, + dwc->ep0_trb, dwc->ep0_trb_addr); err0: return ret; @@ -3284,22 +3220,12 @@ int dwc3_gadget_init(struct dwc3 *dwc) void dwc3_gadget_exit(struct dwc3 *dwc) { usb_del_gadget_udc(&dwc->gadget); - dwc3_gadget_free_endpoints(dwc); - dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce, - dwc->bounce_addr); - dma_free_coherent(dwc->sysdev, DWC3_EP0_BOUNCE_SIZE, - dwc->ep0_bounce, dwc->ep0_bounce_addr); - + dwc->bounce_addr); kfree(dwc->setup_buf); - kfree(dwc->zlp_buf); - dma_free_coherent(dwc->sysdev, sizeof(*dwc->ep0_trb) * 2, - dwc->ep0_trb, dwc->ep0_trb_addr); - - dma_free_coherent(dwc->sysdev, sizeof(*dwc->ctrl_req), - dwc->ctrl_req, dwc->ctrl_req_addr); + dwc->ep0_trb, dwc->ep0_trb_addr); } int dwc3_gadget_suspend(struct dwc3 *dwc) diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 265e223ab64554f6be78d37d15e36a86bb571c74..e4602d0e515bbd6473c534b227844a07bc792b30 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -29,16 +29,16 @@ struct dwc3; /* DEPCFG parameter 1 */ #define DWC3_DEPCFG_INT_NUM(n) (((n) & 0x1f) << 0) -#define DWC3_DEPCFG_XFER_COMPLETE_EN (1 << 8) -#define DWC3_DEPCFG_XFER_IN_PROGRESS_EN (1 << 9) -#define DWC3_DEPCFG_XFER_NOT_READY_EN (1 << 10) -#define DWC3_DEPCFG_FIFO_ERROR_EN (1 << 11) -#define DWC3_DEPCFG_STREAM_EVENT_EN (1 << 13) +#define DWC3_DEPCFG_XFER_COMPLETE_EN BIT(8) +#define DWC3_DEPCFG_XFER_IN_PROGRESS_EN BIT(9) +#define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10) +#define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11) +#define DWC3_DEPCFG_STREAM_EVENT_EN BIT(12) #define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) -#define DWC3_DEPCFG_STREAM_CAPABLE (1 << 24) +#define DWC3_DEPCFG_STREAM_CAPABLE BIT(24) #define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) -#define DWC3_DEPCFG_BULK_BASED (1 << 30) -#define DWC3_DEPCFG_FIFO_BASED (1 << 31) +#define DWC3_DEPCFG_BULK_BASED BIT(30) +#define DWC3_DEPCFG_FIFO_BASED BIT(31) /* DEPCFG parameter 0 */ #define DWC3_DEPCFG_EP_TYPE(n) (((n) & 0x3) << 1) @@ -47,10 +47,10 @@ struct dwc3; #define DWC3_DEPCFG_BURST_SIZE(n) (((n) & 0xf) << 22) #define DWC3_DEPCFG_DATA_SEQ_NUM(n) ((n) << 26) /* This applies for core versions earlier than 1.94a */ -#define DWC3_DEPCFG_IGN_SEQ_NUM (1 << 31) +#define DWC3_DEPCFG_IGN_SEQ_NUM BIT(31) /* These apply for core versions 1.94a and later */ #define DWC3_DEPCFG_ACTION_INIT (0 << 30) -#define DWC3_DEPCFG_ACTION_RESTORE (1 << 30) +#define DWC3_DEPCFG_ACTION_RESTORE BIT(30) #define DWC3_DEPCFG_ACTION_MODIFY (2 << 30) /* DEPXFERCFG parameter 0 */ diff --git a/drivers/usb/dwc3/trace.h b/drivers/usb/dwc3/trace.h index 2b124f94d858487ec9694683f618c0b1448e64d4..f1bd444d22a36b5a331bee6f2187d8a50094085d 100644 --- a/drivers/usb/dwc3/trace.h +++ b/drivers/usb/dwc3/trace.h @@ -27,31 +27,6 @@ #include "core.h" #include "debug.h" -DECLARE_EVENT_CLASS(dwc3_log_msg, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf), - TP_STRUCT__entry(__dynamic_array(char, msg, DWC3_MSG_MAX)), - TP_fast_assign( - vsnprintf(__get_str(msg), DWC3_MSG_MAX, vaf->fmt, *vaf->va); - ), - TP_printk("%s", __get_str(msg)) -); - -DEFINE_EVENT(dwc3_log_msg, dwc3_gadget, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(dwc3_log_msg, dwc3_core, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - -DEFINE_EVENT(dwc3_log_msg, dwc3_ep0, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); - DECLARE_EVENT_CLASS(dwc3_log_io, TP_PROTO(void *base, u32 offset, u32 value), TP_ARGS(base, offset, value), @@ -198,7 +173,7 @@ DECLARE_EVENT_CLASS(dwc3_log_generic_cmd, __entry->param = param; __entry->status = status; ), - TP_printk("cmd '%s' [%d] param %08x --> status: %s", + TP_printk("cmd '%s' [%x] param %08x --> status: %s", dwc3_gadget_generic_cmd_string(__entry->cmd), __entry->cmd, __entry->param, dwc3_gadget_generic_cmd_status_string(__entry->status) @@ -298,36 +273,7 @@ DECLARE_EVENT_CLASS(dwc3_log_trb, __entry->ctrl & DWC3_TRB_CTRL_CSP ? 'S' : 's', __entry->ctrl & DWC3_TRB_CTRL_ISP_IMI ? 'S' : 's', __entry->ctrl & DWC3_TRB_CTRL_IOC ? 'C' : 'c', - ({char *s; - switch (__entry->ctrl & 0x3f0) { - case DWC3_TRBCTL_NORMAL: - s = "normal"; - break; - case DWC3_TRBCTL_CONTROL_SETUP: - s = "setup"; - break; - case DWC3_TRBCTL_CONTROL_STATUS2: - s = "status2"; - break; - case DWC3_TRBCTL_CONTROL_STATUS3: - s = "status3"; - break; - case DWC3_TRBCTL_CONTROL_DATA: - s = "data"; - break; - case DWC3_TRBCTL_ISOCHRONOUS_FIRST: - s = "isoc-first"; - break; - case DWC3_TRBCTL_ISOCHRONOUS: - s = "isoc"; - break; - case DWC3_TRBCTL_LINK_TRB: - s = "link"; - break; - default: - s = "UNKNOWN"; - break; - } s; }) + dwc3_trb_type_string(DWC3_TRBCTL_TYPE(__entry->ctrl)) ) ); diff --git a/drivers/usb/early/Makefile b/drivers/usb/early/Makefile index 24bbe519c737647b202dfd00c020a32e1721830e..fcde2286da1cc18fecff8ccb6289dd733bd1e4de 100644 --- a/drivers/usb/early/Makefile +++ b/drivers/usb/early/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_EARLY_PRINTK_DBGP) += ehci-dbgp.o +obj-$(CONFIG_EARLY_PRINTK_USB_XDBC) += xhci-dbc.o diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c new file mode 100644 index 0000000000000000000000000000000000000000..1268818e2263a28645642e9789360eade8ee6b2b --- /dev/null +++ b/drivers/usb/early/xhci-dbc.c @@ -0,0 +1,1014 @@ +/** + * xhci-dbc.c - xHCI debug capability early driver + * + * Copyright (C) 2016 Intel Corporation + * + * Author: Lu Baolu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../host/xhci.h" +#include "xhci-dbc.h" + +static struct xdbc_state xdbc; +static bool early_console_keep; + +#define XDBC_TRACE +#ifdef XDBC_TRACE +#define xdbc_trace trace_printk +#else +static inline void xdbc_trace(const char *fmt, ...) { } +#endif /* XDBC_TRACE */ + +static void __iomem * __init xdbc_map_pci_mmio(u32 bus, u32 dev, u32 func) +{ + u64 val64, sz64, mask64; + void __iomem *base; + u32 val, sz; + u8 byte; + + val = read_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0); + write_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0, ~0); + sz = read_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0); + write_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0, val); + + if (val == 0xffffffff || sz == 0xffffffff) { + pr_notice("invalid mmio bar\n"); + return NULL; + } + + val64 = val & PCI_BASE_ADDRESS_MEM_MASK; + sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK; + mask64 = PCI_BASE_ADDRESS_MEM_MASK; + + if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + val = read_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0 + 4); + write_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0 + 4, ~0); + sz = read_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0 + 4); + write_pci_config(bus, dev, func, PCI_BASE_ADDRESS_0 + 4, val); + + val64 |= (u64)val << 32; + sz64 |= (u64)sz << 32; + mask64 |= ~0ULL << 32; + } + + sz64 &= mask64; + + if (!sz64) { + pr_notice("invalid mmio address\n"); + return NULL; + } + + sz64 = 1ULL << __ffs64(sz64); + + /* Check if the mem space is enabled: */ + byte = read_pci_config_byte(bus, dev, func, PCI_COMMAND); + if (!(byte & PCI_COMMAND_MEMORY)) { + byte |= PCI_COMMAND_MEMORY; + write_pci_config_byte(bus, dev, func, PCI_COMMAND, byte); + } + + xdbc.xhci_start = val64; + xdbc.xhci_length = sz64; + base = early_ioremap(val64, sz64); + + return base; +} + +static void * __init xdbc_get_page(dma_addr_t *dma_addr) +{ + void *virt; + + virt = alloc_bootmem_pages_nopanic(PAGE_SIZE); + if (!virt) + return NULL; + + if (dma_addr) + *dma_addr = (dma_addr_t)__pa(virt); + + return virt; +} + +static u32 __init xdbc_find_dbgp(int xdbc_num, u32 *b, u32 *d, u32 *f) +{ + u32 bus, dev, func, class; + + for (bus = 0; bus < XDBC_PCI_MAX_BUSES; bus++) { + for (dev = 0; dev < XDBC_PCI_MAX_DEVICES; dev++) { + for (func = 0; func < XDBC_PCI_MAX_FUNCTION; func++) { + + class = read_pci_config(bus, dev, func, PCI_CLASS_REVISION); + if ((class >> 8) != PCI_CLASS_SERIAL_USB_XHCI) + continue; + + if (xdbc_num-- != 0) + continue; + + *b = bus; + *d = dev; + *f = func; + + return 0; + } + } + } + + return -1; +} + +static int handshake(void __iomem *ptr, u32 mask, u32 done, int wait, int delay) +{ + u32 result; + + do { + result = readl(ptr); + result &= mask; + if (result == done) + return 0; + udelay(delay); + wait -= delay; + } while (wait > 0); + + return -ETIMEDOUT; +} + +static void __init xdbc_bios_handoff(void) +{ + int offset, timeout; + u32 val; + + offset = xhci_find_next_ext_cap(xdbc.xhci_base, 0, XHCI_EXT_CAPS_LEGACY); + val = readl(xdbc.xhci_base + offset); + + if (val & XHCI_HC_BIOS_OWNED) { + writel(val | XHCI_HC_OS_OWNED, xdbc.xhci_base + offset); + timeout = handshake(xdbc.xhci_base + offset, XHCI_HC_BIOS_OWNED, 0, 5000, 10); + + if (timeout) { + pr_notice("failed to hand over xHCI control from BIOS\n"); + writel(val & ~XHCI_HC_BIOS_OWNED, xdbc.xhci_base + offset); + } + } + + /* Disable BIOS SMIs and clear all SMI events: */ + val = readl(xdbc.xhci_base + offset + XHCI_LEGACY_CONTROL_OFFSET); + val &= XHCI_LEGACY_DISABLE_SMI; + val |= XHCI_LEGACY_SMI_EVENTS; + writel(val, xdbc.xhci_base + offset + XHCI_LEGACY_CONTROL_OFFSET); +} + +static int __init +xdbc_alloc_ring(struct xdbc_segment *seg, struct xdbc_ring *ring) +{ + seg->trbs = xdbc_get_page(&seg->dma); + if (!seg->trbs) + return -ENOMEM; + + ring->segment = seg; + + return 0; +} + +static void __init xdbc_free_ring(struct xdbc_ring *ring) +{ + struct xdbc_segment *seg = ring->segment; + + if (!seg) + return; + + free_bootmem(seg->dma, PAGE_SIZE); + ring->segment = NULL; +} + +static void xdbc_reset_ring(struct xdbc_ring *ring) +{ + struct xdbc_segment *seg = ring->segment; + struct xdbc_trb *link_trb; + + memset(seg->trbs, 0, PAGE_SIZE); + + ring->enqueue = seg->trbs; + ring->dequeue = seg->trbs; + ring->cycle_state = 1; + + if (ring != &xdbc.evt_ring) { + link_trb = &seg->trbs[XDBC_TRBS_PER_SEGMENT - 1]; + link_trb->field[0] = cpu_to_le32(lower_32_bits(seg->dma)); + link_trb->field[1] = cpu_to_le32(upper_32_bits(seg->dma)); + link_trb->field[3] = cpu_to_le32(TRB_TYPE(TRB_LINK)) | cpu_to_le32(LINK_TOGGLE); + } +} + +static inline void xdbc_put_utf16(u16 *s, const char *c, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + s[i] = cpu_to_le16(c[i]); +} + +static void xdbc_mem_init(void) +{ + struct xdbc_ep_context *ep_in, *ep_out; + struct usb_string_descriptor *s_desc; + struct xdbc_erst_entry *entry; + struct xdbc_strings *strings; + struct xdbc_context *ctx; + unsigned int max_burst; + u32 string_length; + int index = 0; + u32 dev_info; + + xdbc_reset_ring(&xdbc.evt_ring); + xdbc_reset_ring(&xdbc.in_ring); + xdbc_reset_ring(&xdbc.out_ring); + memset(xdbc.table_base, 0, PAGE_SIZE); + memset(xdbc.out_buf, 0, PAGE_SIZE); + + /* Initialize event ring segment table: */ + xdbc.erst_size = 16; + xdbc.erst_base = xdbc.table_base + index * XDBC_TABLE_ENTRY_SIZE; + xdbc.erst_dma = xdbc.table_dma + index * XDBC_TABLE_ENTRY_SIZE; + + index += XDBC_ERST_ENTRY_NUM; + entry = (struct xdbc_erst_entry *)xdbc.erst_base; + + entry->seg_addr = cpu_to_le64(xdbc.evt_seg.dma); + entry->seg_size = cpu_to_le32(XDBC_TRBS_PER_SEGMENT); + entry->__reserved_0 = 0; + + /* Initialize ERST registers: */ + writel(1, &xdbc.xdbc_reg->ersts); + xdbc_write64(xdbc.erst_dma, &xdbc.xdbc_reg->erstba); + xdbc_write64(xdbc.evt_seg.dma, &xdbc.xdbc_reg->erdp); + + /* Debug capability contexts: */ + xdbc.dbcc_size = 64 * 3; + xdbc.dbcc_base = xdbc.table_base + index * XDBC_TABLE_ENTRY_SIZE; + xdbc.dbcc_dma = xdbc.table_dma + index * XDBC_TABLE_ENTRY_SIZE; + + index += XDBC_DBCC_ENTRY_NUM; + + /* Popluate the strings: */ + xdbc.string_size = sizeof(struct xdbc_strings); + xdbc.string_base = xdbc.table_base + index * XDBC_TABLE_ENTRY_SIZE; + xdbc.string_dma = xdbc.table_dma + index * XDBC_TABLE_ENTRY_SIZE; + strings = (struct xdbc_strings *)xdbc.string_base; + + index += XDBC_STRING_ENTRY_NUM; + + /* Serial string: */ + s_desc = (struct usb_string_descriptor *)strings->serial; + s_desc->bLength = (strlen(XDBC_STRING_SERIAL) + 1) * 2; + s_desc->bDescriptorType = USB_DT_STRING; + + xdbc_put_utf16(s_desc->wData, XDBC_STRING_SERIAL, strlen(XDBC_STRING_SERIAL)); + string_length = s_desc->bLength; + string_length <<= 8; + + /* Product string: */ + s_desc = (struct usb_string_descriptor *)strings->product; + s_desc->bLength = (strlen(XDBC_STRING_PRODUCT) + 1) * 2; + s_desc->bDescriptorType = USB_DT_STRING; + + xdbc_put_utf16(s_desc->wData, XDBC_STRING_PRODUCT, strlen(XDBC_STRING_PRODUCT)); + string_length += s_desc->bLength; + string_length <<= 8; + + /* Manufacture string: */ + s_desc = (struct usb_string_descriptor *)strings->manufacturer; + s_desc->bLength = (strlen(XDBC_STRING_MANUFACTURER) + 1) * 2; + s_desc->bDescriptorType = USB_DT_STRING; + + xdbc_put_utf16(s_desc->wData, XDBC_STRING_MANUFACTURER, strlen(XDBC_STRING_MANUFACTURER)); + string_length += s_desc->bLength; + string_length <<= 8; + + /* String0: */ + strings->string0[0] = 4; + strings->string0[1] = USB_DT_STRING; + strings->string0[2] = 0x09; + strings->string0[3] = 0x04; + + string_length += 4; + + /* Populate info Context: */ + ctx = (struct xdbc_context *)xdbc.dbcc_base; + + ctx->info.string0 = cpu_to_le64(xdbc.string_dma); + ctx->info.manufacturer = cpu_to_le64(xdbc.string_dma + XDBC_MAX_STRING_LENGTH); + ctx->info.product = cpu_to_le64(xdbc.string_dma + XDBC_MAX_STRING_LENGTH * 2); + ctx->info.serial = cpu_to_le64(xdbc.string_dma + XDBC_MAX_STRING_LENGTH * 3); + ctx->info.length = cpu_to_le32(string_length); + + /* Populate bulk out endpoint context: */ + max_burst = DEBUG_MAX_BURST(readl(&xdbc.xdbc_reg->control)); + ep_out = (struct xdbc_ep_context *)&ctx->out; + + ep_out->ep_info1 = 0; + ep_out->ep_info2 = cpu_to_le32(EP_TYPE(BULK_OUT_EP) | MAX_PACKET(1024) | MAX_BURST(max_burst)); + ep_out->deq = cpu_to_le64(xdbc.out_seg.dma | xdbc.out_ring.cycle_state); + + /* Populate bulk in endpoint context: */ + ep_in = (struct xdbc_ep_context *)&ctx->in; + + ep_in->ep_info1 = 0; + ep_in->ep_info2 = cpu_to_le32(EP_TYPE(BULK_OUT_EP) | MAX_PACKET(1024) | MAX_BURST(max_burst)); + ep_in->deq = cpu_to_le64(xdbc.in_seg.dma | xdbc.in_ring.cycle_state); + + /* Set DbC context and info registers: */ + xdbc_write64(xdbc.dbcc_dma, &xdbc.xdbc_reg->dccp); + + dev_info = cpu_to_le32((XDBC_VENDOR_ID << 16) | XDBC_PROTOCOL); + writel(dev_info, &xdbc.xdbc_reg->devinfo1); + + dev_info = cpu_to_le32((XDBC_DEVICE_REV << 16) | XDBC_PRODUCT_ID); + writel(dev_info, &xdbc.xdbc_reg->devinfo2); + + xdbc.in_buf = xdbc.out_buf + XDBC_MAX_PACKET; + xdbc.in_dma = xdbc.out_dma + XDBC_MAX_PACKET; +} + +static void xdbc_do_reset_debug_port(u32 id, u32 count) +{ + void __iomem *ops_reg; + void __iomem *portsc; + u32 val, cap_length; + int i; + + cap_length = readl(xdbc.xhci_base) & 0xff; + ops_reg = xdbc.xhci_base + cap_length; + + id--; + for (i = id; i < (id + count); i++) { + portsc = ops_reg + 0x400 + i * 0x10; + val = readl(portsc); + if (!(val & PORT_CONNECT)) + writel(val | PORT_RESET, portsc); + } +} + +static void xdbc_reset_debug_port(void) +{ + u32 val, port_offset, port_count; + int offset = 0; + + do { + offset = xhci_find_next_ext_cap(xdbc.xhci_base, offset, XHCI_EXT_CAPS_PROTOCOL); + if (!offset) + break; + + val = readl(xdbc.xhci_base + offset); + if (XHCI_EXT_PORT_MAJOR(val) != 0x3) + continue; + + val = readl(xdbc.xhci_base + offset + 8); + port_offset = XHCI_EXT_PORT_OFF(val); + port_count = XHCI_EXT_PORT_COUNT(val); + + xdbc_do_reset_debug_port(port_offset, port_count); + } while (1); +} + +static void +xdbc_queue_trb(struct xdbc_ring *ring, u32 field1, u32 field2, u32 field3, u32 field4) +{ + struct xdbc_trb *trb, *link_trb; + + trb = ring->enqueue; + trb->field[0] = cpu_to_le32(field1); + trb->field[1] = cpu_to_le32(field2); + trb->field[2] = cpu_to_le32(field3); + trb->field[3] = cpu_to_le32(field4); + + ++(ring->enqueue); + if (ring->enqueue >= &ring->segment->trbs[TRBS_PER_SEGMENT - 1]) { + link_trb = ring->enqueue; + if (ring->cycle_state) + link_trb->field[3] |= cpu_to_le32(TRB_CYCLE); + else + link_trb->field[3] &= cpu_to_le32(~TRB_CYCLE); + + ring->enqueue = ring->segment->trbs; + ring->cycle_state ^= 1; + } +} + +static void xdbc_ring_doorbell(int target) +{ + writel(DOOR_BELL_TARGET(target), &xdbc.xdbc_reg->doorbell); +} + +static int xdbc_start(void) +{ + u32 ctrl, status; + int ret; + + ctrl = readl(&xdbc.xdbc_reg->control); + writel(ctrl | CTRL_DBC_ENABLE | CTRL_PORT_ENABLE, &xdbc.xdbc_reg->control); + ret = handshake(&xdbc.xdbc_reg->control, CTRL_DBC_ENABLE, CTRL_DBC_ENABLE, 100000, 100); + if (ret) { + xdbc_trace("failed to initialize hardware\n"); + return ret; + } + + /* Reset port to avoid bus hang: */ + if (xdbc.vendor == PCI_VENDOR_ID_INTEL) + xdbc_reset_debug_port(); + + /* Wait for port connection: */ + ret = handshake(&xdbc.xdbc_reg->portsc, PORTSC_CONN_STATUS, PORTSC_CONN_STATUS, 5000000, 100); + if (ret) { + xdbc_trace("waiting for connection timed out\n"); + return ret; + } + + /* Wait for debug device to be configured: */ + ret = handshake(&xdbc.xdbc_reg->control, CTRL_DBC_RUN, CTRL_DBC_RUN, 5000000, 100); + if (ret) { + xdbc_trace("waiting for device configuration timed out\n"); + return ret; + } + + /* Check port number: */ + status = readl(&xdbc.xdbc_reg->status); + if (!DCST_DEBUG_PORT(status)) { + xdbc_trace("invalid root hub port number\n"); + return -ENODEV; + } + + xdbc.port_number = DCST_DEBUG_PORT(status); + + xdbc_trace("DbC is running now, control 0x%08x port ID %d\n", + readl(&xdbc.xdbc_reg->control), xdbc.port_number); + + return 0; +} + +static int xdbc_bulk_transfer(void *data, int size, bool read) +{ + struct xdbc_ring *ring; + struct xdbc_trb *trb; + u32 length, control; + u32 cycle; + u64 addr; + + if (size > XDBC_MAX_PACKET) { + xdbc_trace("bad parameter, size %d\n", size); + return -EINVAL; + } + + if (!(xdbc.flags & XDBC_FLAGS_INITIALIZED) || + !(xdbc.flags & XDBC_FLAGS_CONFIGURED) || + (!read && (xdbc.flags & XDBC_FLAGS_OUT_STALL)) || + (read && (xdbc.flags & XDBC_FLAGS_IN_STALL))) { + + xdbc_trace("connection not ready, flags %08x\n", xdbc.flags); + return -EIO; + } + + ring = (read ? &xdbc.in_ring : &xdbc.out_ring); + trb = ring->enqueue; + cycle = ring->cycle_state; + length = TRB_LEN(size); + control = TRB_TYPE(TRB_NORMAL) | TRB_IOC; + + if (cycle) + control &= cpu_to_le32(~TRB_CYCLE); + else + control |= cpu_to_le32(TRB_CYCLE); + + if (read) { + memset(xdbc.in_buf, 0, XDBC_MAX_PACKET); + addr = xdbc.in_dma; + xdbc.flags |= XDBC_FLAGS_IN_PROCESS; + } else { + memset(xdbc.out_buf, 0, XDBC_MAX_PACKET); + memcpy(xdbc.out_buf, data, size); + addr = xdbc.out_dma; + xdbc.flags |= XDBC_FLAGS_OUT_PROCESS; + } + + xdbc_queue_trb(ring, lower_32_bits(addr), upper_32_bits(addr), length, control); + + /* + * Add a barrier between writes of trb fields and flipping + * the cycle bit: + */ + wmb(); + if (cycle) + trb->field[3] |= cpu_to_le32(cycle); + else + trb->field[3] &= cpu_to_le32(~TRB_CYCLE); + + xdbc_ring_doorbell(read ? IN_EP_DOORBELL : OUT_EP_DOORBELL); + + return size; +} + +static int xdbc_handle_external_reset(void) +{ + int ret = 0; + + xdbc.flags = 0; + writel(0, &xdbc.xdbc_reg->control); + ret = handshake(&xdbc.xdbc_reg->control, CTRL_DBC_ENABLE, 0, 100000, 10); + if (ret) + goto reset_out; + + xdbc_mem_init(); + + mmiowb(); + + ret = xdbc_start(); + if (ret < 0) + goto reset_out; + + xdbc_trace("dbc recovered\n"); + + xdbc.flags |= XDBC_FLAGS_INITIALIZED | XDBC_FLAGS_CONFIGURED; + + xdbc_bulk_transfer(NULL, XDBC_MAX_PACKET, true); + + return 0; + +reset_out: + xdbc_trace("failed to recover from external reset\n"); + return ret; +} + +static int __init xdbc_early_setup(void) +{ + int ret; + + writel(0, &xdbc.xdbc_reg->control); + ret = handshake(&xdbc.xdbc_reg->control, CTRL_DBC_ENABLE, 0, 100000, 100); + if (ret) + return ret; + + /* Allocate the table page: */ + xdbc.table_base = xdbc_get_page(&xdbc.table_dma); + if (!xdbc.table_base) + return -ENOMEM; + + /* Get and store the transfer buffer: */ + xdbc.out_buf = xdbc_get_page(&xdbc.out_dma); + if (!xdbc.out_buf) + return -ENOMEM; + + /* Allocate the event ring: */ + ret = xdbc_alloc_ring(&xdbc.evt_seg, &xdbc.evt_ring); + if (ret < 0) + return ret; + + /* Allocate IN/OUT endpoint transfer rings: */ + ret = xdbc_alloc_ring(&xdbc.in_seg, &xdbc.in_ring); + if (ret < 0) + return ret; + + ret = xdbc_alloc_ring(&xdbc.out_seg, &xdbc.out_ring); + if (ret < 0) + return ret; + + xdbc_mem_init(); + + mmiowb(); + + ret = xdbc_start(); + if (ret < 0) { + writel(0, &xdbc.xdbc_reg->control); + return ret; + } + + xdbc.flags |= XDBC_FLAGS_INITIALIZED | XDBC_FLAGS_CONFIGURED; + + xdbc_bulk_transfer(NULL, XDBC_MAX_PACKET, true); + + return 0; +} + +int __init early_xdbc_parse_parameter(char *s) +{ + unsigned long dbgp_num = 0; + u32 bus, dev, func, offset; + int ret; + + if (!early_pci_allowed()) + return -EPERM; + + if (strstr(s, "keep")) + early_console_keep = true; + + if (xdbc.xdbc_reg) + return 0; + + if (*s && kstrtoul(s, 0, &dbgp_num)) + dbgp_num = 0; + + pr_notice("dbgp_num: %lu\n", dbgp_num); + + /* Locate the host controller: */ + ret = xdbc_find_dbgp(dbgp_num, &bus, &dev, &func); + if (ret) { + pr_notice("failed to locate xhci host\n"); + return -ENODEV; + } + + xdbc.vendor = read_pci_config_16(bus, dev, func, PCI_VENDOR_ID); + xdbc.device = read_pci_config_16(bus, dev, func, PCI_DEVICE_ID); + xdbc.bus = bus; + xdbc.dev = dev; + xdbc.func = func; + + /* Map the IO memory: */ + xdbc.xhci_base = xdbc_map_pci_mmio(bus, dev, func); + if (!xdbc.xhci_base) + return -EINVAL; + + /* Locate DbC registers: */ + offset = xhci_find_next_ext_cap(xdbc.xhci_base, 0, XHCI_EXT_CAPS_DEBUG); + if (!offset) { + pr_notice("xhci host doesn't support debug capability\n"); + early_iounmap(xdbc.xhci_base, xdbc.xhci_length); + xdbc.xhci_base = NULL; + xdbc.xhci_length = 0; + + return -ENODEV; + } + xdbc.xdbc_reg = (struct xdbc_regs __iomem *)(xdbc.xhci_base + offset); + + return 0; +} + +int __init early_xdbc_setup_hardware(void) +{ + int ret; + + if (!xdbc.xdbc_reg) + return -ENODEV; + + xdbc_bios_handoff(); + + raw_spin_lock_init(&xdbc.lock); + + ret = xdbc_early_setup(); + if (ret) { + pr_notice("failed to setup the connection to host\n"); + + xdbc_free_ring(&xdbc.evt_ring); + xdbc_free_ring(&xdbc.out_ring); + xdbc_free_ring(&xdbc.in_ring); + + if (xdbc.table_dma) + free_bootmem(xdbc.table_dma, PAGE_SIZE); + + if (xdbc.out_dma) + free_bootmem(xdbc.out_dma, PAGE_SIZE); + + xdbc.table_base = NULL; + xdbc.out_buf = NULL; + } + + return ret; +} + +static void xdbc_handle_port_status(struct xdbc_trb *evt_trb) +{ + u32 port_reg; + + port_reg = readl(&xdbc.xdbc_reg->portsc); + if (port_reg & PORTSC_CONN_CHANGE) { + xdbc_trace("connect status change event\n"); + + /* Check whether cable unplugged: */ + if (!(port_reg & PORTSC_CONN_STATUS)) { + xdbc.flags = 0; + xdbc_trace("cable unplugged\n"); + } + } + + if (port_reg & PORTSC_RESET_CHANGE) + xdbc_trace("port reset change event\n"); + + if (port_reg & PORTSC_LINK_CHANGE) + xdbc_trace("port link status change event\n"); + + if (port_reg & PORTSC_CONFIG_CHANGE) + xdbc_trace("config error change\n"); + + /* Write back the value to clear RW1C bits: */ + writel(port_reg, &xdbc.xdbc_reg->portsc); +} + +static void xdbc_handle_tx_event(struct xdbc_trb *evt_trb) +{ + size_t remain_length; + u32 comp_code; + int ep_id; + + comp_code = GET_COMP_CODE(le32_to_cpu(evt_trb->field[2])); + remain_length = EVENT_TRB_LEN(le32_to_cpu(evt_trb->field[2])); + ep_id = TRB_TO_EP_ID(le32_to_cpu(evt_trb->field[3])); + + switch (comp_code) { + case COMP_SUCCESS: + remain_length = 0; + case COMP_SHORT_PACKET: + break; + case COMP_TRB_ERROR: + case COMP_BABBLE_DETECTED_ERROR: + case COMP_USB_TRANSACTION_ERROR: + case COMP_STALL_ERROR: + default: + if (ep_id == XDBC_EPID_OUT) + xdbc.flags |= XDBC_FLAGS_OUT_STALL; + if (ep_id == XDBC_EPID_IN) + xdbc.flags |= XDBC_FLAGS_IN_STALL; + + xdbc_trace("endpoint %d stalled\n", ep_id); + break; + } + + if (ep_id == XDBC_EPID_IN) { + xdbc.flags &= ~XDBC_FLAGS_IN_PROCESS; + xdbc_bulk_transfer(NULL, XDBC_MAX_PACKET, true); + } else if (ep_id == XDBC_EPID_OUT) { + xdbc.flags &= ~XDBC_FLAGS_OUT_PROCESS; + } else { + xdbc_trace("invalid endpoint id %d\n", ep_id); + } +} + +static void xdbc_handle_events(void) +{ + struct xdbc_trb *evt_trb; + bool update_erdp = false; + u32 reg; + u8 cmd; + + cmd = read_pci_config_byte(xdbc.bus, xdbc.dev, xdbc.func, PCI_COMMAND); + if (!(cmd & PCI_COMMAND_MASTER)) { + cmd |= PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY; + write_pci_config_byte(xdbc.bus, xdbc.dev, xdbc.func, PCI_COMMAND, cmd); + } + + if (!(xdbc.flags & XDBC_FLAGS_INITIALIZED)) + return; + + /* Handle external reset events: */ + reg = readl(&xdbc.xdbc_reg->control); + if (!(reg & CTRL_DBC_ENABLE)) { + if (xdbc_handle_external_reset()) { + xdbc_trace("failed to recover connection\n"); + return; + } + } + + /* Handle configure-exit event: */ + reg = readl(&xdbc.xdbc_reg->control); + if (reg & CTRL_DBC_RUN_CHANGE) { + writel(reg, &xdbc.xdbc_reg->control); + if (reg & CTRL_DBC_RUN) + xdbc.flags |= XDBC_FLAGS_CONFIGURED; + else + xdbc.flags &= ~XDBC_FLAGS_CONFIGURED; + } + + /* Handle endpoint stall event: */ + reg = readl(&xdbc.xdbc_reg->control); + if (reg & CTRL_HALT_IN_TR) { + xdbc.flags |= XDBC_FLAGS_IN_STALL; + } else { + xdbc.flags &= ~XDBC_FLAGS_IN_STALL; + if (!(xdbc.flags & XDBC_FLAGS_IN_PROCESS)) + xdbc_bulk_transfer(NULL, XDBC_MAX_PACKET, true); + } + + if (reg & CTRL_HALT_OUT_TR) + xdbc.flags |= XDBC_FLAGS_OUT_STALL; + else + xdbc.flags &= ~XDBC_FLAGS_OUT_STALL; + + /* Handle the events in the event ring: */ + evt_trb = xdbc.evt_ring.dequeue; + while ((le32_to_cpu(evt_trb->field[3]) & TRB_CYCLE) == xdbc.evt_ring.cycle_state) { + /* + * Add a barrier between reading the cycle flag and any + * reads of the event's flags/data below: + */ + rmb(); + + switch ((le32_to_cpu(evt_trb->field[3]) & TRB_TYPE_BITMASK)) { + case TRB_TYPE(TRB_PORT_STATUS): + xdbc_handle_port_status(evt_trb); + break; + case TRB_TYPE(TRB_TRANSFER): + xdbc_handle_tx_event(evt_trb); + break; + default: + break; + } + + ++(xdbc.evt_ring.dequeue); + if (xdbc.evt_ring.dequeue == &xdbc.evt_seg.trbs[TRBS_PER_SEGMENT]) { + xdbc.evt_ring.dequeue = xdbc.evt_seg.trbs; + xdbc.evt_ring.cycle_state ^= 1; + } + + evt_trb = xdbc.evt_ring.dequeue; + update_erdp = true; + } + + /* Update event ring dequeue pointer: */ + if (update_erdp) + xdbc_write64(__pa(xdbc.evt_ring.dequeue), &xdbc.xdbc_reg->erdp); +} + +static int xdbc_bulk_write(const char *bytes, int size) +{ + int ret, timeout = 0; + unsigned long flags; + +retry: + if (in_nmi()) { + if (!raw_spin_trylock_irqsave(&xdbc.lock, flags)) + return -EAGAIN; + } else { + raw_spin_lock_irqsave(&xdbc.lock, flags); + } + + xdbc_handle_events(); + + /* Check completion of the previous request: */ + if ((xdbc.flags & XDBC_FLAGS_OUT_PROCESS) && (timeout < 2000000)) { + raw_spin_unlock_irqrestore(&xdbc.lock, flags); + udelay(100); + timeout += 100; + goto retry; + } + + if (xdbc.flags & XDBC_FLAGS_OUT_PROCESS) { + raw_spin_unlock_irqrestore(&xdbc.lock, flags); + xdbc_trace("previous transfer not completed yet\n"); + + return -ETIMEDOUT; + } + + ret = xdbc_bulk_transfer((void *)bytes, size, false); + raw_spin_unlock_irqrestore(&xdbc.lock, flags); + + return ret; +} + +static void early_xdbc_write(struct console *con, const char *str, u32 n) +{ + static char buf[XDBC_MAX_PACKET]; + int chunk, ret; + int use_cr = 0; + + if (!xdbc.xdbc_reg) + return; + memset(buf, 0, XDBC_MAX_PACKET); + while (n > 0) { + for (chunk = 0; chunk < XDBC_MAX_PACKET && n > 0; str++, chunk++, n--) { + + if (!use_cr && *str == '\n') { + use_cr = 1; + buf[chunk] = '\r'; + str--; + n++; + continue; + } + + if (use_cr) + use_cr = 0; + buf[chunk] = *str; + } + + if (chunk > 0) { + ret = xdbc_bulk_write(buf, chunk); + if (ret < 0) + xdbc_trace("missed message {%s}\n", buf); + } + } +} + +static struct console early_xdbc_console = { + .name = "earlyxdbc", + .write = early_xdbc_write, + .flags = CON_PRINTBUFFER, + .index = -1, +}; + +void __init early_xdbc_register_console(void) +{ + if (early_console) + return; + + early_console = &early_xdbc_console; + if (early_console_keep) + early_console->flags &= ~CON_BOOT; + else + early_console->flags |= CON_BOOT; + register_console(early_console); +} + +static void xdbc_unregister_console(void) +{ + if (early_xdbc_console.flags & CON_ENABLED) + unregister_console(&early_xdbc_console); +} + +static int xdbc_scrub_function(void *ptr) +{ + unsigned long flags; + + while (true) { + raw_spin_lock_irqsave(&xdbc.lock, flags); + xdbc_handle_events(); + + if (!(xdbc.flags & XDBC_FLAGS_INITIALIZED)) { + raw_spin_unlock_irqrestore(&xdbc.lock, flags); + break; + } + + raw_spin_unlock_irqrestore(&xdbc.lock, flags); + schedule_timeout_interruptible(1); + } + + xdbc_unregister_console(); + writel(0, &xdbc.xdbc_reg->control); + xdbc_trace("dbc scrub function exits\n"); + + return 0; +} + +static int __init xdbc_init(void) +{ + unsigned long flags; + void __iomem *base; + int ret = 0; + u32 offset; + + if (!(xdbc.flags & XDBC_FLAGS_INITIALIZED)) + return 0; + + /* + * It's time to shut down the DbC, so that the debug + * port can be reused by the host controller: + */ + if (early_xdbc_console.index == -1 || + (early_xdbc_console.flags & CON_BOOT)) { + xdbc_trace("hardware not used anymore\n"); + goto free_and_quit; + } + + base = ioremap_nocache(xdbc.xhci_start, xdbc.xhci_length); + if (!base) { + xdbc_trace("failed to remap the io address\n"); + ret = -ENOMEM; + goto free_and_quit; + } + + raw_spin_lock_irqsave(&xdbc.lock, flags); + early_iounmap(xdbc.xhci_base, xdbc.xhci_length); + xdbc.xhci_base = base; + offset = xhci_find_next_ext_cap(xdbc.xhci_base, 0, XHCI_EXT_CAPS_DEBUG); + xdbc.xdbc_reg = (struct xdbc_regs __iomem *)(xdbc.xhci_base + offset); + raw_spin_unlock_irqrestore(&xdbc.lock, flags); + + kthread_run(xdbc_scrub_function, NULL, "%s", "xdbc"); + + return 0; + +free_and_quit: + xdbc_free_ring(&xdbc.evt_ring); + xdbc_free_ring(&xdbc.out_ring); + xdbc_free_ring(&xdbc.in_ring); + free_bootmem(xdbc.table_dma, PAGE_SIZE); + free_bootmem(xdbc.out_dma, PAGE_SIZE); + writel(0, &xdbc.xdbc_reg->control); + early_iounmap(xdbc.xhci_base, xdbc.xhci_length); + + return ret; +} +subsys_initcall(xdbc_init); diff --git a/drivers/usb/early/xhci-dbc.h b/drivers/usb/early/xhci-dbc.h new file mode 100644 index 0000000000000000000000000000000000000000..2df0f6e613fede532503e2bfdf94a0b0eca29937 --- /dev/null +++ b/drivers/usb/early/xhci-dbc.h @@ -0,0 +1,211 @@ +/* + * xhci-dbc.h - xHCI debug capability early driver + * + * Copyright (C) 2016 Intel Corporation + * + * Author: Lu Baolu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_XHCI_DBC_H +#define __LINUX_XHCI_DBC_H + +#include +#include + +/* + * xHCI Debug Capability Register interfaces: + */ +struct xdbc_regs { + __le32 capability; + __le32 doorbell; + __le32 ersts; /* Event Ring Segment Table Size*/ + __le32 __reserved_0; /* 0c~0f reserved bits */ + __le64 erstba; /* Event Ring Segment Table Base Address */ + __le64 erdp; /* Event Ring Dequeue Pointer */ + __le32 control; + __le32 status; + __le32 portsc; /* Port status and control */ + __le32 __reserved_1; /* 2b~28 reserved bits */ + __le64 dccp; /* Debug Capability Context Pointer */ + __le32 devinfo1; /* Device Descriptor Info Register 1 */ + __le32 devinfo2; /* Device Descriptor Info Register 2 */ +}; + +#define DEBUG_MAX_BURST(p) (((p) >> 16) & 0xff) + +#define CTRL_DBC_RUN BIT(0) +#define CTRL_PORT_ENABLE BIT(1) +#define CTRL_HALT_OUT_TR BIT(2) +#define CTRL_HALT_IN_TR BIT(3) +#define CTRL_DBC_RUN_CHANGE BIT(4) +#define CTRL_DBC_ENABLE BIT(31) + +#define DCST_DEBUG_PORT(p) (((p) >> 24) & 0xff) + +#define PORTSC_CONN_STATUS BIT(0) +#define PORTSC_CONN_CHANGE BIT(17) +#define PORTSC_RESET_CHANGE BIT(21) +#define PORTSC_LINK_CHANGE BIT(22) +#define PORTSC_CONFIG_CHANGE BIT(23) + +/* + * xHCI Debug Capability data structures: + */ +struct xdbc_trb { + __le32 field[4]; +}; + +struct xdbc_erst_entry { + __le64 seg_addr; + __le32 seg_size; + __le32 __reserved_0; +}; + +struct xdbc_info_context { + __le64 string0; + __le64 manufacturer; + __le64 product; + __le64 serial; + __le32 length; + __le32 __reserved_0[7]; +}; + +struct xdbc_ep_context { + __le32 ep_info1; + __le32 ep_info2; + __le64 deq; + __le32 tx_info; + __le32 __reserved_0[11]; +}; + +struct xdbc_context { + struct xdbc_info_context info; + struct xdbc_ep_context out; + struct xdbc_ep_context in; +}; + +#define XDBC_INFO_CONTEXT_SIZE 48 +#define XDBC_MAX_STRING_LENGTH 64 +#define XDBC_STRING_MANUFACTURER "Linux" +#define XDBC_STRING_PRODUCT "Remote GDB" +#define XDBC_STRING_SERIAL "0001" + +struct xdbc_strings { + char string0[XDBC_MAX_STRING_LENGTH]; + char manufacturer[XDBC_MAX_STRING_LENGTH]; + char product[XDBC_MAX_STRING_LENGTH]; + char serial[XDBC_MAX_STRING_LENGTH]; +}; + +#define XDBC_PROTOCOL 1 /* GNU Remote Debug Command Set */ +#define XDBC_VENDOR_ID 0x1d6b /* Linux Foundation 0x1d6b */ +#define XDBC_PRODUCT_ID 0x0004 /* __le16 idProduct; device 0004 */ +#define XDBC_DEVICE_REV 0x0010 /* 0.10 */ + +/* + * xHCI Debug Capability software state structures: + */ +struct xdbc_segment { + struct xdbc_trb *trbs; + dma_addr_t dma; +}; + +#define XDBC_TRBS_PER_SEGMENT 256 + +struct xdbc_ring { + struct xdbc_segment *segment; + struct xdbc_trb *enqueue; + struct xdbc_trb *dequeue; + u32 cycle_state; +}; + +#define XDBC_EPID_OUT 2 +#define XDBC_EPID_IN 3 + +struct xdbc_state { + u16 vendor; + u16 device; + u32 bus; + u32 dev; + u32 func; + void __iomem *xhci_base; + u64 xhci_start; + size_t xhci_length; + int port_number; + + /* DbC register base */ + struct xdbc_regs __iomem *xdbc_reg; + + /* DbC table page */ + dma_addr_t table_dma; + void *table_base; + + /* event ring segment table */ + dma_addr_t erst_dma; + size_t erst_size; + void *erst_base; + + /* event ring segments */ + struct xdbc_ring evt_ring; + struct xdbc_segment evt_seg; + + /* debug capability contexts */ + dma_addr_t dbcc_dma; + size_t dbcc_size; + void *dbcc_base; + + /* descriptor strings */ + dma_addr_t string_dma; + size_t string_size; + void *string_base; + + /* bulk OUT endpoint */ + struct xdbc_ring out_ring; + struct xdbc_segment out_seg; + void *out_buf; + dma_addr_t out_dma; + + /* bulk IN endpoint */ + struct xdbc_ring in_ring; + struct xdbc_segment in_seg; + void *in_buf; + dma_addr_t in_dma; + + u32 flags; + + /* spinlock for early_xdbc_write() reentrancy */ + raw_spinlock_t lock; +}; + +#define XDBC_PCI_MAX_BUSES 256 +#define XDBC_PCI_MAX_DEVICES 32 +#define XDBC_PCI_MAX_FUNCTION 8 + +#define XDBC_TABLE_ENTRY_SIZE 64 +#define XDBC_ERST_ENTRY_NUM 1 +#define XDBC_DBCC_ENTRY_NUM 3 +#define XDBC_STRING_ENTRY_NUM 4 + +/* Bits definitions for xdbc_state.flags: */ +#define XDBC_FLAGS_INITIALIZED BIT(0) +#define XDBC_FLAGS_IN_STALL BIT(1) +#define XDBC_FLAGS_OUT_STALL BIT(2) +#define XDBC_FLAGS_IN_PROCESS BIT(3) +#define XDBC_FLAGS_OUT_PROCESS BIT(4) +#define XDBC_FLAGS_CONFIGURED BIT(5) + +#define XDBC_MAX_PACKET 1024 + +/* Door bell target: */ +#define OUT_EP_DOORBELL 0 +#define IN_EP_DOORBELL 1 +#define DOOR_BELL_TARGET(p) (((p) & 0xff) << 8) + +#define xdbc_read64(regs) xhci_read_64(NULL, (regs)) +#define xdbc_write64(val, regs) xhci_write_64(NULL, (val), (regs)) + +#endif /* __LINUX_XHCI_DBC_H */ diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 8ad203296079f6a9cf5e7457db1017784725a710..c164d6b788c31f9922836f407c21e50bfce73f5a 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -212,7 +212,7 @@ config USB_F_TCM # this first set of drivers all depend on bulk-capable hardware. config USB_CONFIGFS - tristate "USB functions configurable through configfs" + tristate "USB Gadget functions configurable through configfs" select USB_LIBCOMPOSITE help A Linux USB "gadget" can be set up through configfs. @@ -458,8 +458,9 @@ config USB_CONFIGFS_F_TCM UAS utilizes the USB 3.0 feature called streams support. choice - tristate "USB Gadget Drivers" + tristate "USB Gadget precomposed configurations" default USB_ETH + optional help A Linux "Gadget Driver" talks to the USB Peripheral Controller driver through the abstract "gadget" API. Some other operating @@ -476,6 +477,12 @@ choice not be able work with that controller, or might need to implement a less common variant of a device class protocol. + The available choices each represent a single precomposed USB + gadget configuration. In the device model, each option contains + both the device instantiation as a child for a USB gadget + controller, and the relevant drivers for each function declared + by the device. + source "drivers/usb/gadget/legacy/Kconfig" endchoice diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 49d685ad0da90d1a1282dd9d25f31ad64db22087..45b554032332e882e9bc99cb5489044053bd663c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -315,6 +315,9 @@ void usb_remove_function(struct usb_configuration *c, struct usb_function *f) list_del(&f->list); if (f->unbind) f->unbind(c, f); + + if (f->bind_deactivated) + usb_function_activate(f); } EXPORT_SYMBOL_GPL(usb_remove_function); @@ -956,12 +959,8 @@ static void remove_config(struct usb_composite_dev *cdev, f = list_first_entry(&config->functions, struct usb_function, list); - list_del(&f->list); - if (f->unbind) { - DBG(cdev, "unbind function '%s'/%p\n", f->name, f); - f->unbind(config, f); - /* may free memory for "f" */ - } + + usb_remove_function(config, f); } list_del(&config->list); if (config->unbind) { diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index a0085571824d9b4352c7245625a30c3248d789f5..47dda3450abd9391de37cf19115abfa0b927a402 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -246,7 +246,6 @@ EXPORT_SYMBOL_GPL(ffs_lock); static struct ffs_dev *_ffs_find_dev(const char *name); static struct ffs_dev *_ffs_alloc_dev(void); -static int _ffs_name_dev(struct ffs_dev *dev, const char *name); static void _ffs_free_dev(struct ffs_dev *dev); static void *ffs_acquire_dev(const char *dev_name); static void ffs_release_dev(struct ffs_data *ffs_data); @@ -1571,14 +1570,14 @@ static void ffs_data_get(struct ffs_data *ffs) { ENTER(); - atomic_inc(&ffs->ref); + refcount_inc(&ffs->ref); } static void ffs_data_opened(struct ffs_data *ffs) { ENTER(); - atomic_inc(&ffs->ref); + refcount_inc(&ffs->ref); if (atomic_add_return(1, &ffs->opened) == 1 && ffs->state == FFS_DEACTIVATED) { ffs->state = FFS_CLOSING; @@ -1590,7 +1589,7 @@ static void ffs_data_put(struct ffs_data *ffs) { ENTER(); - if (unlikely(atomic_dec_and_test(&ffs->ref))) { + if (unlikely(refcount_dec_and_test(&ffs->ref))) { pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || @@ -1635,7 +1634,7 @@ static struct ffs_data *ffs_data_new(void) ENTER(); - atomic_set(&ffs->ref, 1); + refcount_set(&ffs->ref, 1); atomic_set(&ffs->opened, 0); ffs->state = FFS_READ_DESCRIPTORS; mutex_init(&ffs->mutex); @@ -1859,12 +1858,12 @@ static int ffs_func_eps_enable(struct ffs_function *func) ep->ep->driver_data = ep; ep->ep->desc = ds; - comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + - USB_DT_ENDPOINT_SIZE); - ep->ep->maxburst = comp_desc->bMaxBurst + 1; - - if (needs_comp_desc) + if (needs_comp_desc) { + comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + + USB_DT_ENDPOINT_SIZE); + ep->ep->maxburst = comp_desc->bMaxBurst + 1; ep->ep->comp_desc = comp_desc; + } ret = usb_ep_enable(ep->ep); if (likely(!ret)) { @@ -3302,9 +3301,10 @@ static struct ffs_dev *_ffs_do_find_dev(const char *name) { struct ffs_dev *dev; + if (!name) + return NULL; + list_for_each_entry(dev, &ffs_devices, entry) { - if (!dev->name || !name) - continue; if (strcmp(dev->name, name) == 0) return dev; } @@ -3380,42 +3380,11 @@ static void ffs_free_inst(struct usb_function_instance *f) kfree(opts); } -#define MAX_INST_NAME_LEN 40 - static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name) { - struct f_fs_opts *opts; - char *ptr; - const char *tmp; - int name_len, ret; - - name_len = strlen(name) + 1; - if (name_len > MAX_INST_NAME_LEN) + if (strlen(name) >= FIELD_SIZEOF(struct ffs_dev, name)) return -ENAMETOOLONG; - - ptr = kstrndup(name, name_len, GFP_KERNEL); - if (!ptr) - return -ENOMEM; - - opts = to_f_fs_opts(fi); - tmp = NULL; - - ffs_dev_lock(); - - tmp = opts->dev->name_allocated ? opts->dev->name : NULL; - ret = _ffs_name_dev(opts->dev, ptr); - if (ret) { - kfree(ptr); - ffs_dev_unlock(); - return ret; - } - opts->dev->name_allocated = true; - - ffs_dev_unlock(); - - kfree(tmp); - - return 0; + return ffs_name_dev(to_f_fs_opts(fi)->dev, name); } static struct usb_function_instance *ffs_alloc_inst(void) @@ -3545,32 +3514,19 @@ static struct ffs_dev *_ffs_alloc_dev(void) return dev; } -/* - * ffs_lock must be taken by the caller of this function - * The caller is responsible for "name" being available whenever f_fs needs it - */ -static int _ffs_name_dev(struct ffs_dev *dev, const char *name) +int ffs_name_dev(struct ffs_dev *dev, const char *name) { struct ffs_dev *existing; + int ret = 0; - existing = _ffs_do_find_dev(name); - if (existing) - return -EBUSY; - - dev->name = name; - - return 0; -} + ffs_dev_lock(); -/* - * The caller is responsible for "name" being available whenever f_fs needs it - */ -int ffs_name_dev(struct ffs_dev *dev, const char *name) -{ - int ret; + existing = _ffs_do_find_dev(name); + if (!existing) + strlcpy(dev->name, name, ARRAY_SIZE(dev->name)); + else if (existing != dev) + ret = -EBUSY; - ffs_dev_lock(); - ret = _ffs_name_dev(dev, name); ffs_dev_unlock(); return ret; @@ -3600,8 +3556,6 @@ EXPORT_SYMBOL_GPL(ffs_single_dev); static void _ffs_free_dev(struct ffs_dev *dev) { list_del(&dev->entry); - if (dev->name_allocated) - kfree(dev->name); /* Clear the private_data pointer to stop incorrect dev access */ if (dev->ffs_data) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 4c8aacc232c07b300b5efffebe25999f8d9d244c..74d57d6994da1602ad72798aabc922ecbea9a0b1 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -396,7 +396,11 @@ static int fsg_set_halt(struct fsg_dev *fsg, struct usb_ep *ep) /* Caller must hold fsg->lock */ static void wakeup_thread(struct fsg_common *common) { - smp_wmb(); /* ensure the write of bh->state is complete */ + /* + * Ensure the reading of thread_wakeup_needed + * and the writing of bh->state are completed + */ + smp_mb(); /* Tell the main thread that something has happened */ common->thread_wakeup_needed = 1; if (common->thread_task) @@ -627,7 +631,12 @@ static int sleep_thread(struct fsg_common *common, bool can_freeze) } __set_current_state(TASK_RUNNING); common->thread_wakeup_needed = 0; - smp_rmb(); /* ensure the latest bh->state is visible */ + + /* + * Ensure the writing of thread_wakeup_needed + * and the reading of bh->state are completed + */ + smp_mb(); return rc; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 224717e63a5300970867a663cd030d8cd62068f6..864819ff9a7d362962eb837755886423b57906fb 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index b4058f0000e4878efae4a475f834d06f16679fdd..6a1ce6a551587f232207612404df80807af215cd 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -281,7 +281,7 @@ static void pn_net_setup(struct net_device *dev) dev->tx_queue_len = 1; dev->netdev_ops = &pn_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->header_ops = &phonet_header_ops; } diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index c3cab77181d4c2677ea5214d6c9abc0b120d83e8..a8b40d07e927bb78e835b40efeea9bb83fb19bf8 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -178,6 +178,7 @@ static void rx_complete(struct usb_ep *ep, struct usb_request *req); static int rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags) { + struct usb_gadget *g = dev->gadget; struct sk_buff *skb; int retval = -ENOMEM; size_t size = 0; @@ -209,8 +210,11 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags) */ size += sizeof(struct ethhdr) + dev->net->mtu + RX_EXTRA; size += dev->port_usb->header_len; - size += out->maxpacket - 1; - size -= size % out->maxpacket; + + if (g->quirk_ep_out_aligned_size) { + size += out->maxpacket - 1; + size -= size % out->maxpacket; + } if (dev->port_usb->is_fixed) size = max_t(size_t, size, dev->port_usb->fixed_out_len); @@ -401,13 +405,12 @@ static int alloc_requests(struct eth_dev *dev, struct gether *link, unsigned n) static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags) { struct usb_request *req; + struct usb_request *tmp; unsigned long flags; /* fill unused rxq slots with some skb */ spin_lock_irqsave(&dev->req_lock, flags); - while (!list_empty(&dev->rx_reqs)) { - req = container_of(dev->rx_reqs.next, - struct usb_request, list); + list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) { list_del_init(&req->list); spin_unlock_irqrestore(&dev->req_lock, flags); @@ -527,7 +530,7 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - req = container_of(dev->tx_reqs.next, struct usb_request, list); + req = list_first_entry(&dev->tx_reqs, struct usb_request, list); list_del(&req->list); /* temporarily stop TX queue when the freelist empties */ @@ -1122,6 +1125,7 @@ void gether_disconnect(struct gether *link) { struct eth_dev *dev = link->ioport; struct usb_request *req; + struct usb_request *tmp; WARN_ON(!dev); if (!dev) @@ -1138,9 +1142,7 @@ void gether_disconnect(struct gether *link) */ usb_ep_disable(link->in_ep); spin_lock(&dev->req_lock); - while (!list_empty(&dev->tx_reqs)) { - req = container_of(dev->tx_reqs.next, - struct usb_request, list); + list_for_each_entry_safe(req, tmp, &dev->tx_reqs, list) { list_del(&req->list); spin_unlock(&dev->req_lock); @@ -1152,9 +1154,7 @@ void gether_disconnect(struct gether *link) usb_ep_disable(link->out_ep); spin_lock(&dev->req_lock); - while (!list_empty(&dev->rx_reqs)) { - req = container_of(dev->rx_reqs.next, - struct usb_request, list); + list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) { list_del(&req->list); spin_unlock(&dev->req_lock); diff --git a/drivers/usb/gadget/function/u_fs.h b/drivers/usb/gadget/function/u_fs.h index 4b6969451cdc28dcfabb4db8806a403df1199e0d..4378cc2fcac36fd69c71cdea53fef6e7ce6ce6e9 100644 --- a/drivers/usb/gadget/function/u_fs.h +++ b/drivers/usb/gadget/function/u_fs.h @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef VERBOSE_DEBUG #ifndef pr_vdebug @@ -39,15 +40,16 @@ struct f_fs_opts; struct ffs_dev { - const char *name; - bool name_allocated; - bool mounted; - bool desc_ready; - bool single; struct ffs_data *ffs_data; struct f_fs_opts *opts; struct list_head entry; + char name[41]; + + bool mounted; + bool desc_ready; + bool single; + int (*ffs_ready_callback)(struct ffs_data *ffs); void (*ffs_closed_callback)(struct ffs_data *ffs); void *(*ffs_acquire_dev_callback)(struct ffs_dev *dev); @@ -177,7 +179,7 @@ struct ffs_data { struct completion ep0req_completion; /* P: mutex */ /* reference counter */ - atomic_t ref; + refcount_t ref; /* how many files are opened (EP0 and others) */ atomic_t opened; diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 000677c991b0264ee079f46e79be1ab271001c9b..9b0805f55ad79b3a6b12069508ddc583d2765053 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1256,7 +1256,7 @@ static void gserial_console_exit(void) struct gscons_info *info = &gscons_info; unregister_console(&gserial_cons); - if (info->console_thread != NULL) + if (!IS_ERR_OR_NULL(info->console_thread)) kthread_stop(info->console_thread); gs_buf_free(&info->con_buf); } diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 4e037d2a7a60c3eefdd9f6585def29b23c02984b..844cb738bafd00537b567038301beb7f5850f034 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2125,7 +2125,7 @@ static struct configfs_item_operations uvc_item_ops = { .release = uvc_attr_release, }; -#define UVCG_OPTS_ATTR(cname, conv, str2u, uxx, vnoc, limit) \ +#define UVCG_OPTS_ATTR(cname, aname, conv, str2u, uxx, vnoc, limit) \ static ssize_t f_uvc_opts_##cname##_show( \ struct config_item *item, char *page) \ { \ @@ -2168,16 +2168,16 @@ end: \ return ret; \ } \ \ -UVC_ATTR(f_uvc_opts_, cname, aname) +UVC_ATTR(f_uvc_opts_, cname, cname) #define identity_conv(x) (x) -UVCG_OPTS_ATTR(streaming_interval, identity_conv, kstrtou8, u8, identity_conv, - 16); -UVCG_OPTS_ATTR(streaming_maxpacket, le16_to_cpu, kstrtou16, u16, le16_to_cpu, - 3072); -UVCG_OPTS_ATTR(streaming_maxburst, identity_conv, kstrtou8, u8, identity_conv, - 15); +UVCG_OPTS_ATTR(streaming_interval, streaming_interval, identity_conv, + kstrtou8, u8, identity_conv, 16); +UVCG_OPTS_ATTR(streaming_maxpacket, streaming_maxpacket, le16_to_cpu, + kstrtou16, u16, le16_to_cpu, 3072); +UVCG_OPTS_ATTR(streaming_maxburst, streaming_maxburst, identity_conv, + kstrtou8, u8, identity_conv, 15); #undef identity_conv diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index a2c916869293720e378ced6b265532846eca52a3..684900fcfe24c3c5ab206568f24da857434a654f 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,7 @@ enum ep0_state { struct dev_data { spinlock_t lock; - atomic_t count; + refcount_t count; enum ep0_state state; /* P: lock */ struct usb_gadgetfs_event event [N_EVENT]; unsigned ev_next; @@ -150,12 +151,12 @@ struct dev_data { static inline void get_dev (struct dev_data *data) { - atomic_inc (&data->count); + refcount_inc (&data->count); } static void put_dev (struct dev_data *data) { - if (likely (!atomic_dec_and_test (&data->count))) + if (likely (!refcount_dec_and_test (&data->count))) return; /* needs no more cleanup */ BUG_ON (waitqueue_active (&data->wait)); @@ -170,7 +171,7 @@ static struct dev_data *dev_new (void) if (!dev) return NULL; dev->state = STATE_DEV_DISABLED; - atomic_set (&dev->count, 1); + refcount_set (&dev->count, 1); spin_lock_init (&dev->lock); INIT_LIST_HEAD (&dev->epfiles); init_waitqueue_head (&dev->wait); @@ -190,7 +191,7 @@ enum ep_state { struct ep_data { struct mutex lock; enum ep_state state; - atomic_t count; + refcount_t count; struct dev_data *dev; /* must hold dev->lock before accessing ep or req */ struct usb_ep *ep; @@ -205,12 +206,12 @@ struct ep_data { static inline void get_ep (struct ep_data *data) { - atomic_inc (&data->count); + refcount_inc (&data->count); } static void put_ep (struct ep_data *data) { - if (likely (!atomic_dec_and_test (&data->count))) + if (likely (!refcount_dec_and_test (&data->count))) return; put_dev (data->dev); /* needs no more cleanup */ @@ -1182,8 +1183,10 @@ dev_release (struct inode *inode, struct file *fd) /* closing ep0 === shutdown all */ - if (dev->gadget_registered) + if (dev->gadget_registered) { usb_gadget_unregister_driver (&gadgetfs_driver); + dev->gadget_registered = false; + } /* at this point "good" hardware has disconnected the * device from USB; the host won't see it any more. @@ -1561,7 +1564,7 @@ static int activate_ep_files (struct dev_data *dev) init_waitqueue_head (&data->wait); strncpy (data->name, ep->name, sizeof (data->name) - 1); - atomic_set (&data->count, 1); + refcount_set (&data->count, 1); data->dev = dev; get_dev (dev); @@ -1676,9 +1679,10 @@ static void gadgetfs_suspend (struct usb_gadget *gadget) { struct dev_data *dev = get_gadget_data (gadget); + unsigned long flags; INFO (dev, "suspended from state %d\n", dev->state); - spin_lock (&dev->lock); + spin_lock_irqsave(&dev->lock, flags); switch (dev->state) { case STATE_DEV_SETUP: // VERY odd... host died?? case STATE_DEV_CONNECTED: @@ -1689,7 +1693,7 @@ gadgetfs_suspend (struct usb_gadget *gadget) default: break; } - spin_unlock (&dev->lock); + spin_unlock_irqrestore(&dev->lock, flags); } static struct usb_gadget_driver gadgetfs_driver = { diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 4b69f28a9af972dc5fc5584b9f8bbd6a38774f5a..1c14c283cc47de111d8f6e4990b62f5906dc89a1 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -62,8 +62,9 @@ config USB_ATMEL_USBA The fifo_mode parameter is used to select endpoint allocation mode. fifo_mode = 0 is used to let the driver autoconfigure the endpoints. - In this case 2 banks are allocated for isochronous endpoints and - only one bank is allocated for the rest of the endpoints. + In this case, for ep1 2 banks are allocated if it works in isochronous + mode and only 1 bank otherwise. For the rest of the endpoints + only 1 bank is allocated. fifo_mode = 1 is a generic maximum fifo size (1024 bytes) configuration allowing the usage of ep1 - ep6 @@ -191,6 +192,7 @@ config USB_RENESAS_USBHS_UDC config USB_RENESAS_USB3 tristate 'Renesas USB3.0 Peripheral controller' depends on ARCH_RENESAS || COMPILE_TEST + depends on EXTCON help Renesas USB3.0 Peripheral controller is a USB peripheral controller that supports super, high, and full speed USB 3.0 data transfers. @@ -253,6 +255,20 @@ config USB_MV_U3D MARVELL PXA2128 Processor series include a super speed USB3.0 device controller, which support super speed USB peripheral. +config USB_SNP_CORE + depends on USB_AMD5536UDC + tristate + help + This enables core driver support for Synopsys USB 2.0 Device + controller. + + This will be enabled when PCI or Platform driver for this UDC is + selected. Currently, this will be enabled by USB_SNP_UDC_PLAT or + USB_AMD5536UDC options. + + This IP is different to the High Speed OTG IP that can be enabled + by selecting USB_DWC2 or USB_DWC3 options. + # # Controllers available in both integrated and discrete versions # @@ -277,7 +293,8 @@ source "drivers/usb/gadget/udc/bdc/Kconfig" config USB_AMD5536UDC tristate "AMD5536 UDC" - depends on PCI + depends on USB_PCI + select USB_SNP_CORE help The AMD5536 UDC is part of the AMD Geode CS5536, an x86 southbridge. It is a USB Highspeed DMA capable USB device controller. Beside ep0 @@ -285,6 +302,9 @@ config USB_AMD5536UDC The UDC port supports OTG operation, and may be used as a host port if it's not being used to implement peripheral or OTG roles. + This UDC is based on Synopsys USB device controller IP and selects + CONFIG_USB_SNP_CORE option to build the core driver. + Say "y" to link the driver statically, or "m" to build a dynamically linked module called "amd5536udc" and force all gadget drivers to also be dynamically linked. @@ -327,7 +347,7 @@ config USB_NET2272_DMA config USB_NET2280 tristate "NetChip NET228x / PLX USB3x8x" - depends on PCI + depends on USB_PCI help NetChip 2280 / 2282 is a PCI based USB peripheral controller which supports both full and high speed USB 2.0 data transfers. @@ -352,7 +372,7 @@ config USB_NET2280 config USB_GOKU tristate "Toshiba TC86C001 'Goku-S'" - depends on PCI + depends on USB_PCI help The Toshiba TC86C001 is a PCI device which includes controllers for full speed USB devices, IDE, I2C, SIO, plus a USB host (OHCI). @@ -366,7 +386,7 @@ config USB_GOKU config USB_EG20T tristate "Intel QUARK X1000/EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7831) UDC" - depends on PCI + depends on USB_PCI help This is a USB device driver for EG20T PCH. EG20T PCH is the platform controller hub that is used in Intel's diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile index 98e74ed9f555547d8d4b778bb7d2fc4d19e553f1..626e1f1c62da9ad0430b68bfd721a14f6f1965ab 100644 --- a/drivers/usb/gadget/udc/Makefile +++ b/drivers/usb/gadget/udc/Makefile @@ -10,7 +10,8 @@ obj-$(CONFIG_USB_GADGET) += udc-core.o obj-$(CONFIG_USB_DUMMY_HCD) += dummy_hcd.o obj-$(CONFIG_USB_NET2272) += net2272.o obj-$(CONFIG_USB_NET2280) += net2280.o -obj-$(CONFIG_USB_AMD5536UDC) += amd5536udc.o +obj-$(CONFIG_USB_SNP_CORE) += amd5536udc.o +obj-$(CONFIG_USB_AMD5536UDC) += amd5536udc_pci.o obj-$(CONFIG_USB_PXA25X) += pxa25x_udc.o obj-$(CONFIG_USB_PXA27X) += pxa27x_udc.o obj-$(CONFIG_USB_GOKU) += goku_udc.o diff --git a/drivers/usb/gadget/udc/amd5536udc.c b/drivers/usb/gadget/udc/amd5536udc.c index ea03ca7ae29a22cd171584846891883e426da570..4ecd2f20ea480082a5a3abf2f9a7f8e756e6c495 100644 --- a/drivers/usb/gadget/udc/amd5536udc.c +++ b/drivers/usb/gadget/udc/amd5536udc.c @@ -11,27 +11,15 @@ */ /* - * The AMD5536 UDC is part of the x86 southbridge AMD Geode CS5536. - * It is a USB Highspeed DMA capable USB device controller. Beside ep0 it - * provides 4 IN and 4 OUT endpoints (bulk or interrupt type). - * - * Make sure that UDC is assigned to port 4 by BIOS settings (port can also - * be used as host port) and UOC bits PAD_EN and APU are set (should be done - * by BIOS init). - * - * UDC DMA requires 32-bit aligned buffers so DMA with gadget ether does not - * work without updating NET_IP_ALIGN. Or PIO mode (module param "use_dma=0") - * can be used with gadget ether. + * This file does the core driver implementation for the UDC that is based + * on Synopsys device controller IP (different than HS OTG IP) that is either + * connected through PCI bus or integrated to SoC platforms. */ -/* debug control */ -/* #define UDC_VERBOSE */ - /* Driver strings */ -#define UDC_MOD_DESCRIPTION "AMD 5536 UDC - USB Device Controller" +#define UDC_MOD_DESCRIPTION "Synopsys USB Device Controller" #define UDC_DRIVER_VERSION_STRING "01.00.0206" -/* system */ #include #include #include @@ -46,23 +34,12 @@ #include #include #include -#include -#include -#include -#include #include - +#include #include #include - -/* gadget stack */ -#include -#include - -/* udc specific */ #include "amd5536udc.h" - static void udc_tasklet_disconnect(unsigned long); static void empty_req_queue(struct udc_ep *); static void udc_setup_endpoints(struct udc *dev); @@ -72,7 +49,7 @@ static void udc_free_request(struct usb_ep *usbep, struct usb_request *usbreq); /* description */ static const char mod_desc[] = UDC_MOD_DESCRIPTION; -static const char name[] = "amd5536udc"; +static const char name[] = "udc"; /* structure to hold endpoint function pointers */ static const struct usb_ep_ops udc_ep_ops; @@ -208,30 +185,11 @@ static const struct { #undef EP_INFO }; -/* DMA usage flag */ -static bool use_dma = 1; -/* packet per buffer dma */ -static bool use_dma_ppb = 1; -/* with per descr. update */ -static bool use_dma_ppb_du; /* buffer fill mode */ static int use_dma_bufferfill_mode; -/* full speed only mode */ -static bool use_fullspeed; /* tx buffer size for high speed */ static unsigned long hs_tx_buf = UDC_EPIN_BUFF_SIZE; -/* module parameters */ -module_param(use_dma, bool, S_IRUGO); -MODULE_PARM_DESC(use_dma, "true for DMA"); -module_param(use_dma_ppb, bool, S_IRUGO); -MODULE_PARM_DESC(use_dma_ppb, "true for DMA in packet per buffer mode"); -module_param(use_dma_ppb_du, bool, S_IRUGO); -MODULE_PARM_DESC(use_dma_ppb_du, - "true for DMA in packet per buffer mode with descriptor update"); -module_param(use_fullspeed, bool, S_IRUGO); -MODULE_PARM_DESC(use_fullspeed, "true for fullspeed only"); - /*---------------------------------------------------------------------------*/ /* Prints UDC device registers and endpoint irq registers */ static void print_regs(struct udc *dev) @@ -267,7 +225,7 @@ static void print_regs(struct udc *dev) } /* Masks unused interrupts */ -static int udc_mask_unused_interrupts(struct udc *dev) +int udc_mask_unused_interrupts(struct udc *dev) { u32 tmp; @@ -287,6 +245,7 @@ static int udc_mask_unused_interrupts(struct udc *dev) return 0; } +EXPORT_SYMBOL_GPL(udc_mask_unused_interrupts); /* Enables endpoint 0 interrupts */ static int udc_enable_ep0_interrupts(struct udc *dev) @@ -306,7 +265,7 @@ static int udc_enable_ep0_interrupts(struct udc *dev) } /* Enables device interrupts for SET_INTF and SET_CONFIG */ -static int udc_enable_dev_setup_interrupts(struct udc *dev) +int udc_enable_dev_setup_interrupts(struct udc *dev) { u32 tmp; @@ -325,6 +284,7 @@ static int udc_enable_dev_setup_interrupts(struct udc *dev) return 0; } +EXPORT_SYMBOL_GPL(udc_enable_dev_setup_interrupts); /* Calculates fifo start of endpoint based on preceding endpoints */ static int udc_set_txfifo_addr(struct udc_ep *ep) @@ -583,7 +543,7 @@ udc_alloc_request(struct usb_ep *usbep, gfp_t gfp) if (ep->dma) { /* ep0 in requests are allocated from data pool here */ - dma_desc = pci_pool_alloc(ep->dev->data_requests, gfp, + dma_desc = dma_pool_alloc(ep->dev->data_requests, gfp, &req->td_phys); if (!dma_desc) { kfree(req); @@ -608,27 +568,23 @@ udc_alloc_request(struct usb_ep *usbep, gfp_t gfp) } /* frees pci pool descriptors of a DMA chain */ -static int udc_free_dma_chain(struct udc *dev, struct udc_request *req) +static void udc_free_dma_chain(struct udc *dev, struct udc_request *req) { - int ret_val = 0; - struct udc_data_dma *td; - struct udc_data_dma *td_last = NULL; + struct udc_data_dma *td = req->td_data; unsigned int i; + dma_addr_t addr_next = 0x00; + dma_addr_t addr = (dma_addr_t)td->next; + DBG(dev, "free chain req = %p\n", req); /* do not free first desc., will be done by free for request */ - td_last = req->td_data; - td = phys_to_virt(td_last->next); - for (i = 1; i < req->chain_len; i++) { - pci_pool_free(dev->data_requests, td, - (dma_addr_t)td_last->next); - td_last = td; - td = phys_to_virt(td_last->next); + td = phys_to_virt(addr); + addr_next = (dma_addr_t)td->next; + dma_pool_free(dev->data_requests, td, addr); + addr = addr_next; } - - return ret_val; } /* Frees request packet, called by gadget driver */ @@ -652,7 +608,7 @@ udc_free_request(struct usb_ep *usbep, struct usb_request *usbreq) if (req->chain_len > 1) udc_free_dma_chain(ep->dev, req); - pci_pool_free(ep->dev->data_requests, req->td_data, + dma_pool_free(ep->dev->data_requests, req->td_data, req->td_phys); } kfree(req); @@ -847,7 +803,7 @@ static int udc_create_dma_chain( for (i = buf_len; i < bytes; i += buf_len) { /* create or determine next desc. */ if (create_new_chain) { - td = pci_pool_alloc(ep->dev->data_requests, + td = dma_pool_alloc(ep->dev->data_requests, gfp_flags, &dma_addr); if (!td) return -ENOMEM; @@ -1507,7 +1463,7 @@ static void make_ep_lists(struct udc *dev) } /* Inits UDC context */ -static void udc_basic_init(struct udc *dev) +void udc_basic_init(struct udc *dev) { u32 tmp; @@ -1543,6 +1499,7 @@ static void udc_basic_init(struct udc *dev) dev->data_ep_enabled = 0; dev->data_ep_queued = 0; } +EXPORT_SYMBOL_GPL(udc_basic_init); /* init registers at driver load time */ static int startup_registers(struct udc *dev) @@ -3031,7 +2988,7 @@ __acquires(dev->lock) } /* Interrupt Service Routine, see Linux Kernel Doc for parameters */ -static irqreturn_t udc_irq(int irq, void *pdev) +irqreturn_t udc_irq(int irq, void *pdev) { struct udc *dev = pdev; u32 reg; @@ -3083,16 +3040,18 @@ static irqreturn_t udc_irq(int irq, void *pdev) spin_unlock(&dev->lock); return ret_val; } +EXPORT_SYMBOL_GPL(udc_irq); /* Tears down device */ -static void gadget_release(struct device *pdev) +void gadget_release(struct device *pdev) { struct amd5536udc *dev = dev_get_drvdata(pdev); kfree(dev); } +EXPORT_SYMBOL_GPL(gadget_release); /* Cleanup on device remove */ -static void udc_remove(struct udc *dev) +void udc_remove(struct udc *dev) { /* remove timer */ stop_timer++; @@ -3108,9 +3067,10 @@ static void udc_remove(struct udc *dev) del_timer_sync(&udc_pollstall_timer); udc = NULL; } +EXPORT_SYMBOL_GPL(udc_remove); /* free all the dma pools */ -static void free_dma_pools(struct udc *dev) +void free_dma_pools(struct udc *dev) { dma_pool_free(dev->stp_requests, dev->ep[UDC_EP0OUT_IX].td, dev->ep[UDC_EP0OUT_IX].td_phys); @@ -3119,35 +3079,10 @@ static void free_dma_pools(struct udc *dev) dma_pool_destroy(dev->stp_requests); dma_pool_destroy(dev->data_requests); } - -/* Reset all pci context */ -static void udc_pci_remove(struct pci_dev *pdev) -{ - struct udc *dev; - - dev = pci_get_drvdata(pdev); - - usb_del_gadget_udc(&udc->gadget); - /* gadget driver must not be registered */ - if (WARN_ON(dev->driver)) - return; - - /* dma pool cleanup */ - free_dma_pools(dev); - - /* reset controller */ - writel(AMD_BIT(UDC_DEVCFG_SOFTRESET), &dev->regs->cfg); - free_irq(pdev->irq, dev); - iounmap(dev->virt_addr); - release_mem_region(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - pci_disable_device(pdev); - - udc_remove(dev); -} +EXPORT_SYMBOL_GPL(free_dma_pools); /* create dma pools on init */ -static int init_dma_pools(struct udc *dev) +int init_dma_pools(struct udc *dev) { struct udc_stp_dma *td_stp; struct udc_data_dma *td_data; @@ -3210,9 +3145,10 @@ static int init_dma_pools(struct udc *dev) dev->data_requests = NULL; return retval; } +EXPORT_SYMBOL_GPL(init_dma_pools); /* general probe */ -static int udc_probe(struct udc *dev) +int udc_probe(struct udc *dev) { char tmp[128]; u32 reg; @@ -3276,137 +3212,7 @@ static int udc_probe(struct udc *dev) finished: return retval; } - -/* Called by pci bus driver to init pci context */ -static int udc_pci_probe( - struct pci_dev *pdev, - const struct pci_device_id *id -) -{ - struct udc *dev; - unsigned long resource; - unsigned long len; - int retval = 0; - - /* one udc only */ - if (udc) { - dev_dbg(&pdev->dev, "already probed\n"); - return -EBUSY; - } - - /* init */ - dev = kzalloc(sizeof(struct udc), GFP_KERNEL); - if (!dev) - return -ENOMEM; - - /* pci setup */ - if (pci_enable_device(pdev) < 0) { - retval = -ENODEV; - goto err_pcidev; - } - - /* PCI resource allocation */ - resource = pci_resource_start(pdev, 0); - len = pci_resource_len(pdev, 0); - - if (!request_mem_region(resource, len, name)) { - dev_dbg(&pdev->dev, "pci device used already\n"); - retval = -EBUSY; - goto err_memreg; - } - - dev->virt_addr = ioremap_nocache(resource, len); - if (!dev->virt_addr) { - dev_dbg(&pdev->dev, "start address cannot be mapped\n"); - retval = -EFAULT; - goto err_ioremap; - } - - if (!pdev->irq) { - dev_err(&pdev->dev, "irq not set\n"); - retval = -ENODEV; - goto err_irq; - } - - spin_lock_init(&dev->lock); - /* udc csr registers base */ - dev->csr = dev->virt_addr + UDC_CSR_ADDR; - /* dev registers base */ - dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; - /* ep registers base */ - dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; - /* fifo's base */ - dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); - dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); - - if (request_irq(pdev->irq, udc_irq, IRQF_SHARED, name, dev) != 0) { - dev_dbg(&pdev->dev, "request_irq(%d) fail\n", pdev->irq); - retval = -EBUSY; - goto err_irq; - } - - pci_set_drvdata(pdev, dev); - - /* chip revision for Hs AMD5536 */ - dev->chiprev = pdev->revision; - - pci_set_master(pdev); - pci_try_set_mwi(pdev); - - /* init dma pools */ - if (use_dma) { - retval = init_dma_pools(dev); - if (retval != 0) - goto err_dma; - } - - dev->phys_addr = resource; - dev->irq = pdev->irq; - dev->pdev = pdev; - - /* general probing */ - if (udc_probe(dev)) { - retval = -ENODEV; - goto err_probe; - } - return 0; - -err_probe: - if (use_dma) - free_dma_pools(dev); -err_dma: - free_irq(pdev->irq, dev); -err_irq: - iounmap(dev->virt_addr); -err_ioremap: - release_mem_region(resource, len); -err_memreg: - pci_disable_device(pdev); -err_pcidev: - kfree(dev); - return retval; -} - -/* PCI device parameters */ -static const struct pci_device_id pci_id[] = { - { - PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x2096), - .class = PCI_CLASS_SERIAL_USB_DEVICE, - .class_mask = 0xffffffff, - }, - {}, -}; -MODULE_DEVICE_TABLE(pci, pci_id); - -/* PCI functions */ -static struct pci_driver udc_pci_driver = { - .name = (char *) name, - .id_table = pci_id, - .probe = udc_pci_probe, - .remove = udc_pci_remove, -}; - -module_pci_driver(udc_pci_driver); +EXPORT_SYMBOL_GPL(udc_probe); MODULE_DESCRIPTION(UDC_MOD_DESCRIPTION); MODULE_AUTHOR("Thomas Dahlmann"); diff --git a/drivers/usb/gadget/udc/amd5536udc.h b/drivers/usb/gadget/udc/amd5536udc.h index 4638d707f1698940eea06131fe0baf0cfbb2032f..fae49bf3833e603a8bb7e0a305d0655bea97a73e 100644 --- a/drivers/usb/gadget/udc/amd5536udc.h +++ b/drivers/usb/gadget/udc/amd5536udc.h @@ -13,6 +13,12 @@ #ifndef AMD5536UDC_H #define AMD5536UDC_H +/* debug control */ +/* #define UDC_VERBOSE */ + +#include +#include + /* various constants */ #define UDC_RDE_TIMER_SECONDS 1 #define UDC_RDE_TIMER_DIV 10 @@ -545,8 +551,8 @@ struct udc { u32 __iomem *txfifo; /* DMA desc pools */ - struct pci_pool *data_requests; - struct pci_pool *stp_requests; + struct dma_pool *data_requests; + struct dma_pool *stp_requests; /* device data */ unsigned long phys_addr; @@ -567,6 +573,36 @@ union udc_setup_data { struct usb_ctrlrequest request; }; +/* Function declarations */ +int udc_enable_dev_setup_interrupts(struct udc *dev); +int udc_mask_unused_interrupts(struct udc *dev); +irqreturn_t udc_irq(int irq, void *pdev); +void gadget_release(struct device *pdev); +void udc_basic_init(struct udc *dev); +void free_dma_pools(struct udc *dev); +int init_dma_pools(struct udc *dev); +void udc_remove(struct udc *dev); +int udc_probe(struct udc *dev); + +/* DMA usage flag */ +static bool use_dma = 1; +/* packet per buffer dma */ +static bool use_dma_ppb = 1; +/* with per descr. update */ +static bool use_dma_ppb_du; +/* full speed only mode */ +static bool use_fullspeed; + +/* module parameters */ +module_param(use_dma, bool, S_IRUGO); +MODULE_PARM_DESC(use_dma, "true for DMA"); +module_param(use_dma_ppb, bool, S_IRUGO); +MODULE_PARM_DESC(use_dma_ppb, "true for DMA in packet per buffer mode"); +module_param(use_dma_ppb_du, bool, S_IRUGO); +MODULE_PARM_DESC(use_dma_ppb_du, + "true for DMA in packet per buffer mode with descriptor update"); +module_param(use_fullspeed, bool, S_IRUGO); +MODULE_PARM_DESC(use_fullspeed, "true for fullspeed only"); /* *--------------------------------------------------------------------------- * SET and GET bitfields in u32 values diff --git a/drivers/usb/gadget/udc/amd5536udc_pci.c b/drivers/usb/gadget/udc/amd5536udc_pci.c new file mode 100644 index 0000000000000000000000000000000000000000..2a2d0a96fe246d4be570da05c228ea97528e86d3 --- /dev/null +++ b/drivers/usb/gadget/udc/amd5536udc_pci.c @@ -0,0 +1,217 @@ +/* + * amd5536udc_pci.c -- AMD 5536 UDC high/full speed USB device controller + * + * Copyright (C) 2005-2007 AMD (http://www.amd.com) + * Author: Thomas Dahlmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +/* + * The AMD5536 UDC is part of the x86 southbridge AMD Geode CS5536. + * It is a USB Highspeed DMA capable USB device controller. Beside ep0 it + * provides 4 IN and 4 OUT endpoints (bulk or interrupt type). + * + * Make sure that UDC is assigned to port 4 by BIOS settings (port can also + * be used as host port) and UOC bits PAD_EN and APU are set (should be done + * by BIOS init). + * + * UDC DMA requires 32-bit aligned buffers so DMA with gadget ether does not + * work without updating NET_IP_ALIGN. Or PIO mode (module param "use_dma=0") + * can be used with gadget ether. + * + * This file does pci device registration, and the core driver implementation + * is done in amd5536udc.c + * + * The driver is split so as to use the core UDC driver which is based on + * Synopsys device controller IP (different than HS OTG IP) in UDCs + * integrated to SoC platforms. + * + */ + +/* Driver strings */ +#define UDC_MOD_DESCRIPTION "AMD 5536 UDC - USB Device Controller" + +/* system */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* udc specific */ +#include "amd5536udc.h" + +/* pointer to device object */ +static struct udc *udc; + +/* description */ +static const char mod_desc[] = UDC_MOD_DESCRIPTION; +static const char name[] = "amd5536udc-pci"; + +/* Reset all pci context */ +static void udc_pci_remove(struct pci_dev *pdev) +{ + struct udc *dev; + + dev = pci_get_drvdata(pdev); + + usb_del_gadget_udc(&udc->gadget); + /* gadget driver must not be registered */ + if (WARN_ON(dev->driver)) + return; + + /* dma pool cleanup */ + free_dma_pools(dev); + + /* reset controller */ + writel(AMD_BIT(UDC_DEVCFG_SOFTRESET), &dev->regs->cfg); + free_irq(pdev->irq, dev); + iounmap(dev->virt_addr); + release_mem_region(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + pci_disable_device(pdev); + + udc_remove(dev); +} + +/* Called by pci bus driver to init pci context */ +static int udc_pci_probe( + struct pci_dev *pdev, + const struct pci_device_id *id +) +{ + struct udc *dev; + unsigned long resource; + unsigned long len; + int retval = 0; + + /* one udc only */ + if (udc) { + dev_dbg(&pdev->dev, "already probed\n"); + return -EBUSY; + } + + /* init */ + dev = kzalloc(sizeof(struct udc), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* pci setup */ + if (pci_enable_device(pdev) < 0) { + retval = -ENODEV; + goto err_pcidev; + } + + /* PCI resource allocation */ + resource = pci_resource_start(pdev, 0); + len = pci_resource_len(pdev, 0); + + if (!request_mem_region(resource, len, name)) { + dev_dbg(&pdev->dev, "pci device used already\n"); + retval = -EBUSY; + goto err_memreg; + } + + dev->virt_addr = ioremap_nocache(resource, len); + if (!dev->virt_addr) { + dev_dbg(&pdev->dev, "start address cannot be mapped\n"); + retval = -EFAULT; + goto err_ioremap; + } + + if (!pdev->irq) { + dev_err(&pdev->dev, "irq not set\n"); + retval = -ENODEV; + goto err_irq; + } + + spin_lock_init(&dev->lock); + /* udc csr registers base */ + dev->csr = dev->virt_addr + UDC_CSR_ADDR; + /* dev registers base */ + dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; + /* ep registers base */ + dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; + /* fifo's base */ + dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); + dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); + + if (request_irq(pdev->irq, udc_irq, IRQF_SHARED, name, dev) != 0) { + dev_dbg(&pdev->dev, "request_irq(%d) fail\n", pdev->irq); + retval = -EBUSY; + goto err_irq; + } + + pci_set_drvdata(pdev, dev); + + /* chip revision for Hs AMD5536 */ + dev->chiprev = pdev->revision; + + pci_set_master(pdev); + pci_try_set_mwi(pdev); + + /* init dma pools */ + if (use_dma) { + retval = init_dma_pools(dev); + if (retval != 0) + goto err_dma; + } + + dev->phys_addr = resource; + dev->irq = pdev->irq; + dev->pdev = pdev; + + /* general probing */ + if (udc_probe(dev)) { + retval = -ENODEV; + goto err_probe; + } + return 0; + +err_probe: + if (use_dma) + free_dma_pools(dev); +err_dma: + free_irq(pdev->irq, dev); +err_irq: + iounmap(dev->virt_addr); +err_ioremap: + release_mem_region(resource, len); +err_memreg: + pci_disable_device(pdev); +err_pcidev: + kfree(dev); + return retval; +} + +/* PCI device parameters */ +static const struct pci_device_id pci_id[] = { + { + PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x2096), + .class = PCI_CLASS_SERIAL_USB_DEVICE, + .class_mask = 0xffffffff, + }, + {}, +}; +MODULE_DEVICE_TABLE(pci, pci_id); + +/* PCI functions */ +static struct pci_driver udc_pci_driver = { + .name = (char *) name, + .id_table = pci_id, + .probe = udc_pci_probe, + .remove = udc_pci_remove, +}; +module_pci_driver(udc_pci_driver); + +MODULE_DESCRIPTION(UDC_MOD_DESCRIPTION); +MODULE_AUTHOR("Thomas Dahlmann"); +MODULE_LICENSE("GPL"); diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 2035906b8ced173c2e869a3272334d4265acf79c..3ccc34176a5aabb43cc4ee0bba90efa6fb32f551 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -321,7 +321,6 @@ static inline void usba_cleanup_debugfs(struct usba_udc *udc) static ushort fifo_mode; -/* "modprobe ... fifo_mode=1" etc */ module_param(fifo_mode, ushort, 0x0); MODULE_PARM_DESC(fifo_mode, "Endpoint configuration mode"); @@ -371,7 +370,7 @@ static struct usba_fifo_cfg mode_4_cfg[] = { }; /* Add additional configurations here */ -int usba_config_fifo_table(struct usba_udc *udc) +static int usba_config_fifo_table(struct usba_udc *udc) { int n; @@ -1076,11 +1075,9 @@ static int atmel_usba_start(struct usb_gadget *gadget, struct usb_gadget_driver *driver); static int atmel_usba_stop(struct usb_gadget *gadget); -static struct usb_ep *atmel_usba_match_ep( - struct usb_gadget *gadget, - struct usb_endpoint_descriptor *desc, - struct usb_ss_ep_comp_descriptor *ep_comp -) +static struct usb_ep *atmel_usba_match_ep(struct usb_gadget *gadget, + struct usb_endpoint_descriptor *desc, + struct usb_ss_ep_comp_descriptor *ep_comp) { struct usb_ep *_ep; struct usba_ep *ep; @@ -1100,7 +1097,6 @@ static struct usb_ep *atmel_usba_match_ep( ep = to_usba_ep(_ep); switch (usb_endpoint_type(desc)) { - case USB_ENDPOINT_XFER_CONTROL: break; @@ -1141,7 +1137,7 @@ static struct usb_ep *atmel_usba_match_ep( ep->udc->configured_ep++; } -return _ep; + return _ep; } static const struct usb_gadget_ops usba_udc_ops = { @@ -1855,8 +1851,8 @@ static irqreturn_t usba_udc_irq(int irq, void *devid) * but it's clearly harmless... */ if (!(usba_ep_readl(ep0, CFG) & USBA_EPT_MAPPED)) - dev_dbg(&udc->pdev->dev, - "ODD: EP0 configuration is invalid!\n"); + dev_err(&udc->pdev->dev, + "ODD: EP0 configuration is invalid!\n"); /* Preallocate other endpoints */ n = fifo_mode ? udc->num_ep : udc->configured_ep; @@ -1864,8 +1860,8 @@ static irqreturn_t usba_udc_irq(int irq, void *devid) ep = &udc->usba_ep[i]; usba_ep_writel(ep, CFG, ep->ept_cfg); if (!(usba_ep_readl(ep, CFG) & USBA_EPT_MAPPED)) - dev_dbg(&udc->pdev->dev, - "ODD: EP%d configuration is invalid!\n", i); + dev_err(&udc->pdev->dev, + "ODD: EP%d configuration is invalid!\n", i); } } @@ -2089,8 +2085,9 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, while ((pp = of_get_next_child(np, pp))) udc->num_ep++; udc->configured_ep = 1; - } else + } else { udc->num_ep = usba_config_fifo_table(udc); + } eps = devm_kzalloc(&pdev->dev, sizeof(struct usba_ep) * udc->num_ep, GFP_KERNEL); @@ -2118,14 +2115,34 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, dev_err(&pdev->dev, "of_probe: fifo-size error(%d)\n", ret); goto err; } - ep->fifo_size = fifo_mode ? udc->fifo_cfg[i].fifo_size : val; + if (fifo_mode) { + if (val < udc->fifo_cfg[i].fifo_size) { + dev_warn(&pdev->dev, + "Using max fifo-size value from DT\n"); + ep->fifo_size = val; + } else { + ep->fifo_size = udc->fifo_cfg[i].fifo_size; + } + } else { + ep->fifo_size = val; + } ret = of_property_read_u32(pp, "atmel,nb-banks", &val); if (ret) { dev_err(&pdev->dev, "of_probe: nb-banks error(%d)\n", ret); goto err; } - ep->nr_banks = fifo_mode ? udc->fifo_cfg[i].nr_banks : val; + if (fifo_mode) { + if (val < udc->fifo_cfg[i].nr_banks) { + dev_warn(&pdev->dev, + "Using max nb-banks value from DT\n"); + ep->nr_banks = val; + } else { + ep->nr_banks = udc->fifo_cfg[i].nr_banks; + } + } else { + ep->nr_banks = val; + } ep->can_dma = of_property_read_bool(pp, "atmel,can-dma"); ep->can_isoc = of_property_read_bool(pp, "atmel,can-isoc"); diff --git a/drivers/usb/gadget/udc/bdc/Kconfig b/drivers/usb/gadget/udc/bdc/Kconfig index 0d7b8c9f72fda17e4a5cfb904592806af817b2ae..eb8b553923609fbd09a0631ae3eaed3b1c5fff87 100644 --- a/drivers/usb/gadget/udc/bdc/Kconfig +++ b/drivers/usb/gadget/udc/bdc/Kconfig @@ -14,7 +14,7 @@ if USB_BDC_UDC comment "Platform Support" config USB_BDC_PCI tristate "BDC support for PCIe based platforms" - depends on PCI + depends on USB_PCI default USB_BDC_UDC help Enable support for platforms which have BDC connected through PCIe, such as Lego3 FPGA platform. diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d685d82dcf487c9f53bb3cd821424159d2e445d1..efce68e9a8e038557bdc36c8263bad03cef7e108 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1273,6 +1273,7 @@ void usb_del_gadget_udc(struct usb_gadget *gadget) flush_work(&gadget->work); device_unregister(&udc->dev); device_unregister(&gadget->dev); + memset(&gadget->dev, 0x00, sizeof(gadget->dev)); } EXPORT_SYMBOL_GPL(usb_del_gadget_udc); diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 8cabc5944d5f1d834db7dd2186777cd79536016b..7635fd7cc328caa371751d7a6ac15a5bb9ebdbae 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -442,23 +442,16 @@ static void set_link_state(struct dummy_hcd *dum_hcd) /* Report reset and disconnect events to the driver */ if (dum->driver && (disconnect || reset)) { stop_activity(dum); - spin_unlock(&dum->lock); if (reset) usb_gadget_udc_reset(&dum->gadget, dum->driver); else dum->driver->disconnect(&dum->gadget); - spin_lock(&dum->lock); } } else if (dum_hcd->active != dum_hcd->old_active) { - if (dum_hcd->old_active && dum->driver->suspend) { - spin_unlock(&dum->lock); + if (dum_hcd->old_active && dum->driver->suspend) dum->driver->suspend(&dum->gadget); - spin_lock(&dum->lock); - } else if (!dum_hcd->old_active && dum->driver->resume) { - spin_unlock(&dum->lock); + else if (!dum_hcd->old_active && dum->driver->resume) dum->driver->resume(&dum->gadget); - spin_lock(&dum->lock); - } } dum_hcd->old_status = dum_hcd->port_status; @@ -983,7 +976,9 @@ static int dummy_udc_stop(struct usb_gadget *g) struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; + spin_lock_irq(&dum->lock); dum->driver = NULL; + spin_unlock_irq(&dum->lock); return 0; } @@ -2008,7 +2003,7 @@ ss_hub_descriptor(struct usb_hub_descriptor *desc) HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; desc->u.ss.bHubHdrDecLat = 0x04; /* Worst case: 0.4 micro sec*/ - desc->u.ss.DeviceRemovable = 0xffff; + desc->u.ss.DeviceRemovable = 0; } static inline void hub_descriptor(struct usb_hub_descriptor *desc) @@ -2020,8 +2015,8 @@ static inline void hub_descriptor(struct usb_hub_descriptor *desc) HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); desc->bNbrPorts = 1; - desc->u.hs.DeviceRemovable[0] = 0xff; - desc->u.hs.DeviceRemovable[1] = 0xff; + desc->u.hs.DeviceRemovable[0] = 0; + desc->u.hs.DeviceRemovable[1] = 0xff; /* PortPwrCtrlMask */ } static int dummy_hub_control( @@ -2062,16 +2057,13 @@ static int dummy_hub_control( } break; case USB_PORT_FEAT_POWER: - if (hcd->speed == HCD_USB3) { - if (dum_hcd->port_status & USB_PORT_STAT_POWER) - dev_dbg(dummy_dev(dum_hcd), - "power-off\n"); - } else - if (dum_hcd->port_status & - USB_SS_PORT_STAT_POWER) - dev_dbg(dummy_dev(dum_hcd), - "power-off\n"); - /* FALLS THROUGH */ + dev_dbg(dummy_dev(dum_hcd), "power-off\n"); + if (hcd->speed == HCD_USB3) + dum_hcd->port_status &= ~USB_SS_PORT_STAT_POWER; + else + dum_hcd->port_status &= ~USB_PORT_STAT_POWER; + set_link_state(dum_hcd); + break; default: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); @@ -2242,14 +2234,13 @@ static int dummy_hub_control( if ((dum_hcd->port_status & USB_SS_PORT_STAT_POWER) != 0) { dum_hcd->port_status |= (1 << wValue); - set_link_state(dum_hcd); } } else if ((dum_hcd->port_status & USB_PORT_STAT_POWER) != 0) { dum_hcd->port_status |= (1 << wValue); - set_link_state(dum_hcd); } + set_link_state(dum_hcd); } break; case GetPortErrorCount: diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index b76fcdb763a0d27c5606e97807b18bf14e05ba9f..6f2f71c054be27f66cf56d56012aaf7332d2dad4 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -2675,6 +2675,8 @@ static const struct platform_device_id fsl_udc_devtype[] = { .name = "imx-udc-mx27", }, { .name = "imx-udc-mx51", + }, { + .name = "fsl-usb2-udc", }, { /* sentinel */ } diff --git a/drivers/usb/gadget/udc/mv_u3d_core.c b/drivers/usb/gadget/udc/mv_u3d_core.c index d365449a295a8ab823f9358146a0a910bcabba2d..772049afe1666404dcdf0ee1310feb344d574f33 100644 --- a/drivers/usb/gadget/udc/mv_u3d_core.c +++ b/drivers/usb/gadget/udc/mv_u3d_core.c @@ -1835,13 +1835,18 @@ static int mv_u3d_probe(struct platform_device *dev) } /* we will access controller register, so enable the u3d controller */ - clk_enable(u3d->clk); + retval = clk_enable(u3d->clk); + if (retval) { + dev_err(&dev->dev, "clk_enable error %d\n", retval); + goto err_u3d_enable; + } if (pdata->phy_init) { retval = pdata->phy_init(u3d->phy_regs); if (retval) { dev_err(&dev->dev, "init phy error %d\n", retval); - goto err_u3d_enable; + clk_disable(u3d->clk); + goto err_phy_init; } } @@ -1974,15 +1979,13 @@ static int mv_u3d_probe(struct platform_device *dev) dma_free_coherent(&dev->dev, u3d->ep_context_size, u3d->ep_context, u3d->ep_context_dma); err_alloc_ep_context: - if (pdata->phy_deinit) - pdata->phy_deinit(u3d->phy_regs); - clk_disable(u3d->clk); +err_phy_init: err_u3d_enable: iounmap(u3d->cap_regs); err_map_cap_regs: err_get_cap_regs: -err_get_clk: clk_put(u3d->clk); +err_get_clk: kfree(u3d); err_alloc_private: err_pdata: diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c index 27ebb0d5449d0dda58863ffb28bcbd8f3514f93f..76f56c5762f9659e27aeb1304a4ff145986685ab 100644 --- a/drivers/usb/gadget/udc/mv_udc_core.c +++ b/drivers/usb/gadget/udc/mv_udc_core.c @@ -445,7 +445,8 @@ static int mv_ep_enable(struct usb_ep *_ep, struct mv_dqh *dqh; u16 max = 0; u32 bit_pos, epctrlx, direction; - unsigned char zlt = 0, ios = 0, mult = 0; + const unsigned char zlt = 1; + unsigned char ios, mult; unsigned long flags; ep = container_of(_ep, struct mv_ep, ep); @@ -465,8 +466,6 @@ static int mv_ep_enable(struct usb_ep *_ep, * disable HW zero length termination select * driver handles zero length packet through req->req.zero */ - zlt = 1; - bit_pos = 1 << ((direction == EP_DIR_OUT ? 0 : 16) + ep->ep_num); /* Check if the Endpoint is Primed */ @@ -481,16 +480,16 @@ static int mv_ep_enable(struct usb_ep *_ep, (unsigned)bit_pos); goto en_done; } + /* Set the max packet length, interrupt on Setup and Mult fields */ + ios = 0; + mult = 0; switch (desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) { case USB_ENDPOINT_XFER_BULK: - zlt = 1; - mult = 0; + case USB_ENDPOINT_XFER_INT: break; case USB_ENDPOINT_XFER_CONTROL: ios = 1; - case USB_ENDPOINT_XFER_INT: - mult = 0; break; case USB_ENDPOINT_XFER_ISOC: /* Calculate transactions needed for high bandwidth iso */ diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index 7dc0102abdfe240ade3729e847b57b43fde768f1..8f85a51bd2b3ca5237f078ed00ee4d7ad201e10a 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -653,7 +653,7 @@ net2272_request_dma(struct net2272 *dev, unsigned ep, u32 buf, dev->dma_busy = 1; /* initialize platform's dma */ -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI /* NET2272 addr, buffer addr, length, etc. */ switch (dev->dev_id) { case PCI_DEVICE_ID_RDK1: @@ -701,7 +701,7 @@ static void net2272_start_dma(struct net2272 *dev) { /* start platform's dma controller */ -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI switch (dev->dev_id) { case PCI_DEVICE_ID_RDK1: writeb((1 << CHANNEL_ENABLE) | (1 << CHANNEL_START), @@ -797,7 +797,7 @@ net2272_kick_dma(struct net2272_ep *ep, struct net2272_request *req) static void net2272_cancel_dma(struct net2272 *dev) { -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI switch (dev->dev_id) { case PCI_DEVICE_ID_RDK1: writeb(0, dev->rdk1.plx9054_base_addr + DMACSR0); @@ -2306,7 +2306,7 @@ net2272_probe_fin(struct net2272 *dev, unsigned int irqflags) return ret; } -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI /* * wrap this driver around the specified device, but diff --git a/drivers/usb/gadget/udc/net2272.h b/drivers/usb/gadget/udc/net2272.h index 127ab03fcde3ba40bf539d231ac92baaeff7f265..69bc9c3c6ce40b871cdf508fb403da74cb7c8ac5 100644 --- a/drivers/usb/gadget/udc/net2272.h +++ b/drivers/usb/gadget/udc/net2272.h @@ -472,7 +472,7 @@ struct net2272 { unsigned int base_shift; u16 __iomem *base_addr; union { -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI struct { void __iomem *plx9054_base_addr; void __iomem *epld_base_addr; diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 3828c2ec8623b155c9948ae90dcebdc4a0106ce6..f2cbd7f8005e149e028a11409aaa6aedc0716c36 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -569,7 +569,7 @@ static struct usb_request if (ep->dma) { struct net2280_dma *td; - td = pci_pool_alloc(ep->dev->requests, gfp_flags, + td = dma_pool_alloc(ep->dev->requests, gfp_flags, &req->td_dma); if (!td) { kfree(req); @@ -597,7 +597,7 @@ static void net2280_free_request(struct usb_ep *_ep, struct usb_request *_req) req = container_of(_req, struct net2280_request, req); WARN_ON(!list_empty(&req->queue)); if (req->td) - pci_pool_free(ep->dev->requests, req->td, req->td_dma); + dma_pool_free(ep->dev->requests, req->td, req->td_dma); kfree(req); } @@ -2470,11 +2470,8 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver) nuke(&dev->ep[i]); /* report disconnect; the driver is already quiesced */ - if (driver) { - spin_unlock(&dev->lock); + if (driver) driver->disconnect(&dev->gadget); - spin_lock(&dev->lock); - } usb_reinit(dev); } @@ -3348,8 +3345,6 @@ static void handle_stat0_irqs(struct net2280 *dev, u32 stat) BIT(PCI_RETRY_ABORT_INTERRUPT)) static void handle_stat1_irqs(struct net2280 *dev, u32 stat) -__releases(dev->lock) -__acquires(dev->lock) { struct net2280_ep *ep; u32 tmp, num, mask, scratch; @@ -3390,14 +3385,12 @@ __acquires(dev->lock) if (disconnect || reset) { stop_activity(dev, dev->driver); ep0_start(dev); - spin_unlock(&dev->lock); if (reset) usb_gadget_udc_reset (&dev->gadget, dev->driver); else (dev->driver->disconnect) (&dev->gadget); - spin_lock(&dev->lock); return; } } @@ -3579,10 +3572,10 @@ static void net2280_remove(struct pci_dev *pdev) for (i = 1; i < 5; i++) { if (!dev->ep[i].dummy) continue; - pci_pool_free(dev->requests, dev->ep[i].dummy, + dma_pool_free(dev->requests, dev->ep[i].dummy, dev->ep[i].td_dma); } - pci_pool_destroy(dev->requests); + dma_pool_destroy(dev->requests); } if (dev->got_irq) free_irq(pdev->irq, dev); @@ -3724,7 +3717,7 @@ static int net2280_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* DMA setup */ /* NOTE: we know only the 32 LSBs of dma addresses may be nonzero */ - dev->requests = pci_pool_create("requests", pdev, + dev->requests = dma_pool_create("requests", &pdev->dev, sizeof(struct net2280_dma), 0 /* no alignment requirements */, 0 /* or page-crossing issues */); @@ -3736,7 +3729,7 @@ static int net2280_probe(struct pci_dev *pdev, const struct pci_device_id *id) for (i = 1; i < 5; i++) { struct net2280_dma *td; - td = pci_pool_alloc(dev->requests, GFP_KERNEL, + td = dma_pool_alloc(dev->requests, GFP_KERNEL, &dev->ep[i].td_dma); if (!td) { ep_dbg(dev, "can't get dummy %d\n", i); diff --git a/drivers/usb/gadget/udc/net2280.h b/drivers/usb/gadget/udc/net2280.h index 2736a95751c3834fe5110f36201fa6363a00f33f..1088c3745999a364c225a809f68e3bd237faed95 100644 --- a/drivers/usb/gadget/udc/net2280.h +++ b/drivers/usb/gadget/udc/net2280.h @@ -187,7 +187,7 @@ struct net2280 { struct usb338x_ll_chi_regs __iomem *ll_chicken_reg; struct usb338x_pl_regs __iomem *plregs; - struct pci_pool *requests; + struct dma_pool *requests; /* statistics...*/ }; diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 8a365aad66fe2e38eaf0a869ae0f774349f694f2..84dcbcd756f04062b423fe95bd7b7938b206c4e6 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -355,8 +355,8 @@ struct pch_udc_dev { vbus_session:1, set_cfg_not_acked:1, waiting_zlp_ack:1; - struct pci_pool *data_requests; - struct pci_pool *stp_requests; + struct dma_pool *data_requests; + struct dma_pool *stp_requests; dma_addr_t dma_addr; struct usb_ctrlrequest setup_data; void __iomem *base_addr; @@ -1522,7 +1522,8 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, /* do not free first desc., will be done by free for request */ td = phys_to_virt(addr); addr2 = (dma_addr_t)td->next; - pci_pool_free(dev->data_requests, td, addr); + dma_pool_free(dev->data_requests, td, addr); + td->next = 0x00; addr = addr2; } req->chain_len = 1; @@ -1538,7 +1539,7 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, * * Return codes: * 0: success, - * -ENOMEM: pci_pool_alloc invocation fails + * -ENOMEM: dma_pool_alloc invocation fails */ static int pch_udc_create_dma_chain(struct pch_udc_ep *ep, struct pch_udc_request *req, @@ -1564,7 +1565,7 @@ static int pch_udc_create_dma_chain(struct pch_udc_ep *ep, if (bytes <= buf_len) break; last = td; - td = pci_pool_alloc(ep->dev->data_requests, gfp_flags, + td = dma_pool_alloc(ep->dev->data_requests, gfp_flags, &dma_addr); if (!td) goto nomem; @@ -1769,7 +1770,7 @@ static struct usb_request *pch_udc_alloc_request(struct usb_ep *usbep, if (!ep->dev->dma_addr) return &req->req; /* ep0 in requests are allocated from data pool here */ - dma_desc = pci_pool_alloc(ep->dev->data_requests, gfp, + dma_desc = dma_pool_alloc(ep->dev->data_requests, gfp, &req->td_data_phys); if (NULL == dma_desc) { kfree(req); @@ -1808,7 +1809,7 @@ static void pch_udc_free_request(struct usb_ep *usbep, if (req->td_data != NULL) { if (req->chain_len > 1) pch_udc_free_dma_chain(ep->dev, req); - pci_pool_free(ep->dev->data_requests, req->td_data, + dma_pool_free(ep->dev->data_requests, req->td_data, req->td_data_phys); } kfree(req); @@ -2913,7 +2914,7 @@ static int init_dma_pools(struct pch_udc_dev *dev) void *ep0out_buf; /* DMA setup */ - dev->data_requests = pci_pool_create("data_requests", dev->pdev, + dev->data_requests = dma_pool_create("data_requests", &dev->pdev->dev, sizeof(struct pch_udc_data_dma_desc), 0, 0); if (!dev->data_requests) { dev_err(&dev->pdev->dev, "%s: can't get request data pool\n", @@ -2922,7 +2923,7 @@ static int init_dma_pools(struct pch_udc_dev *dev) } /* dma desc for setup data */ - dev->stp_requests = pci_pool_create("setup requests", dev->pdev, + dev->stp_requests = dma_pool_create("setup requests", &dev->pdev->dev, sizeof(struct pch_udc_stp_dma_desc), 0, 0); if (!dev->stp_requests) { dev_err(&dev->pdev->dev, "%s: can't get setup request pool\n", @@ -2930,7 +2931,7 @@ static int init_dma_pools(struct pch_udc_dev *dev) return -ENOMEM; } /* setup */ - td_stp = pci_pool_alloc(dev->stp_requests, GFP_KERNEL, + td_stp = dma_pool_alloc(dev->stp_requests, GFP_KERNEL, &dev->ep[UDC_EP0OUT_IDX].td_stp_phys); if (!td_stp) { dev_err(&dev->pdev->dev, @@ -2940,7 +2941,7 @@ static int init_dma_pools(struct pch_udc_dev *dev) dev->ep[UDC_EP0OUT_IDX].td_stp = td_stp; /* data: 0 packets !? */ - td_data = pci_pool_alloc(dev->data_requests, GFP_KERNEL, + td_data = dma_pool_alloc(dev->data_requests, GFP_KERNEL, &dev->ep[UDC_EP0OUT_IDX].td_data_phys); if (!td_data) { dev_err(&dev->pdev->dev, @@ -3020,22 +3021,21 @@ static void pch_udc_remove(struct pci_dev *pdev) dev_err(&pdev->dev, "%s: gadget driver still bound!!!\n", __func__); /* dma pool cleanup */ - if (dev->data_requests) - pci_pool_destroy(dev->data_requests); + dma_pool_destroy(dev->data_requests); if (dev->stp_requests) { /* cleanup DMA desc's for ep0in */ if (dev->ep[UDC_EP0OUT_IDX].td_stp) { - pci_pool_free(dev->stp_requests, + dma_pool_free(dev->stp_requests, dev->ep[UDC_EP0OUT_IDX].td_stp, dev->ep[UDC_EP0OUT_IDX].td_stp_phys); } if (dev->ep[UDC_EP0OUT_IDX].td_data) { - pci_pool_free(dev->stp_requests, + dma_pool_free(dev->stp_requests, dev->ep[UDC_EP0OUT_IDX].td_data, dev->ep[UDC_EP0OUT_IDX].td_data_phys); } - pci_pool_destroy(dev->stp_requests); + dma_pool_destroy(dev->stp_requests); } if (dev->dma_addr) diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index 832c4fdbe98512a2b70b6b9e9424acc21a09b83b..d48e239660c31170696042313ebd1a66e79621db 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -1608,9 +1608,6 @@ static int pxa_udc_pullup(struct usb_gadget *_gadget, int is_active) return 0; } -static void udc_enable(struct pxa_udc *udc); -static void udc_disable(struct pxa_udc *udc); - /** * pxa_udc_vbus_session - Called by external transceiver to enable/disable udc * @_gadget: usb gadget diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 2218f91e92a61cd8078afe857ba347463aa8ff63..cd4c885297213bd3e1a5b024d0f2fdde3e6232d6 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -37,6 +38,9 @@ #define USB3_USB_INT_ENA_2 0x22c #define USB3_STUP_DAT_0 0x230 #define USB3_STUP_DAT_1 0x234 +#define USB3_USB_OTG_STA 0x268 +#define USB3_USB_OTG_INT_STA 0x26c +#define USB3_USB_OTG_INT_ENA 0x270 #define USB3_P0_MOD 0x280 #define USB3_P0_CON 0x288 #define USB3_P0_STA 0x28c @@ -124,6 +128,9 @@ /* USB_INT_ENA_2 and USB_INT_STA_2 */ #define USB_INT_2_PIPE(n) BIT(n) +/* USB_OTG_STA, USB_OTG_INT_STA and USB_OTG_INT_ENA */ +#define USB_OTG_IDMON BIT(4) + /* P0_MOD */ #define P0_MOD_DIR BIT(6) @@ -257,6 +264,8 @@ struct renesas_usb3 { struct usb_gadget gadget; struct usb_gadget_driver *driver; + struct extcon_dev *extcon; + struct work_struct extcon_work; struct renesas_usb3_ep *usb3_ep; int num_usb3_eps; @@ -269,6 +278,8 @@ struct renesas_usb3 { u8 ep0_buf[USB3_EP0_BUF_SIZE]; bool softconnect; bool workaround_for_vbus; + bool extcon_host; /* check id and set EXTCON_USB_HOST */ + bool extcon_usb; /* check vbus and set EXTCON_USB */ }; #define gadget_to_renesas_usb3(_gadget) \ @@ -332,6 +343,15 @@ static int usb3_wait(struct renesas_usb3 *usb3, u32 reg, u32 mask, return -EBUSY; } +static void renesas_usb3_extcon_work(struct work_struct *work) +{ + struct renesas_usb3 *usb3 = container_of(work, struct renesas_usb3, + extcon_work); + + extcon_set_state_sync(usb3->extcon, EXTCON_USB_HOST, usb3->extcon_host); + extcon_set_state_sync(usb3->extcon, EXTCON_USB, usb3->extcon_usb); +} + static void usb3_enable_irq_1(struct renesas_usb3 *usb3, u32 bits) { usb3_set_bit(usb3, bits, USB3_USB_INT_ENA_1); @@ -352,6 +372,11 @@ static void usb3_disable_pipe_irq(struct renesas_usb3 *usb3, int num) usb3_clear_bit(usb3, USB_INT_2_PIPE(num), USB3_USB_INT_ENA_2); } +static bool usb3_is_host(struct renesas_usb3 *usb3) +{ + return !(usb3_read(usb3, USB3_DRD_CON) & DRD_CON_PERI_CON); +} + static void usb3_init_axi_bridge(struct renesas_usb3 *usb3) { /* Set AXI_INT */ @@ -362,10 +387,6 @@ static void usb3_init_axi_bridge(struct renesas_usb3 *usb3) static void usb3_init_epc_registers(struct renesas_usb3 *usb3) { - /* FIXME: How to change host / peripheral mode as well? */ - usb3_set_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON); - usb3_clear_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON); - usb3_write(usb3, ~0, USB3_USB_INT_STA_1); usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG); } @@ -531,18 +552,70 @@ static void usb3_check_vbus(struct renesas_usb3 *usb3) if (usb3->workaround_for_vbus) { usb3_connect(usb3); } else { - if (usb3_read(usb3, USB3_USB_STA) & USB_STA_VBUS_STA) + usb3->extcon_usb = !!(usb3_read(usb3, USB3_USB_STA) & + USB_STA_VBUS_STA); + if (usb3->extcon_usb) usb3_connect(usb3); else usb3_disconnect(usb3); + + schedule_work(&usb3->extcon_work); } } +static void usb3_set_mode(struct renesas_usb3 *usb3, bool host) +{ + if (host) + usb3_clear_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON); + else + usb3_set_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON); +} + +static void usb3_vbus_out(struct renesas_usb3 *usb3, bool enable) +{ + if (enable) + usb3_set_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON); + else + usb3_clear_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON); +} + +static void usb3_mode_config(struct renesas_usb3 *usb3, bool host, bool a_dev) +{ + unsigned long flags; + + spin_lock_irqsave(&usb3->lock, flags); + usb3_set_mode(usb3, host); + usb3_vbus_out(usb3, a_dev); + if (!host && a_dev) /* for A-Peripheral */ + usb3_connect(usb3); + spin_unlock_irqrestore(&usb3->lock, flags); +} + +static bool usb3_is_a_device(struct renesas_usb3 *usb3) +{ + return !(usb3_read(usb3, USB3_USB_OTG_STA) & USB_OTG_IDMON); +} + +static void usb3_check_id(struct renesas_usb3 *usb3) +{ + usb3->extcon_host = usb3_is_a_device(usb3); + + if (usb3->extcon_host) + usb3_mode_config(usb3, true, true); + else + usb3_mode_config(usb3, false, false); + + schedule_work(&usb3->extcon_work); +} + static void renesas_usb3_init_controller(struct renesas_usb3 *usb3) { usb3_init_axi_bridge(usb3); usb3_init_epc_registers(usb3); + usb3_write(usb3, USB_OTG_IDMON, USB3_USB_OTG_INT_STA); + usb3_write(usb3, USB_OTG_IDMON, USB3_USB_OTG_INT_ENA); + usb3_check_id(usb3); usb3_check_vbus(usb3); } @@ -550,7 +623,7 @@ static void renesas_usb3_stop_controller(struct renesas_usb3 *usb3) { usb3_disconnect(usb3); usb3_write(usb3, 0, USB3_P0_INT_ENA); - usb3_write(usb3, 0, USB3_PN_INT_ENA); + usb3_write(usb3, 0, USB3_USB_OTG_INT_ENA); usb3_write(usb3, 0, USB3_USB_INT_ENA_1); usb3_write(usb3, 0, USB3_USB_INT_ENA_2); usb3_write(usb3, 0, USB3_AXI_INT_ENA); @@ -1401,7 +1474,13 @@ static void usb3_request_done_pipen(struct renesas_usb3 *usb3, struct renesas_usb3_request *usb3_req, int status) { - usb3_pn_stop(usb3); + unsigned long flags; + + spin_lock_irqsave(&usb3->lock, flags); + if (usb3_pn_change(usb3, usb3_ep->num)) + usb3_pn_stop(usb3); + spin_unlock_irqrestore(&usb3->lock, flags); + usb3_disable_pipe_irq(usb3, usb3_ep->num); usb3_request_done(usb3_ep, usb3_req, status); @@ -1430,30 +1509,46 @@ static void usb3_irq_epc_pipen_bfrdy(struct renesas_usb3 *usb3, int num) { struct renesas_usb3_ep *usb3_ep = usb3_get_ep(usb3, num); struct renesas_usb3_request *usb3_req = usb3_get_request(usb3_ep); + bool done = false; if (!usb3_req) return; + spin_lock(&usb3->lock); + if (usb3_pn_change(usb3, num)) + goto out; + if (usb3_ep->dir_in) { /* Do not stop the IN pipe here to detect LSTTR interrupt */ if (!usb3_write_pipe(usb3_ep, usb3_req, USB3_PN_WRITE)) usb3_clear_bit(usb3, PN_INT_BFRDY, USB3_PN_INT_ENA); } else { if (!usb3_read_pipe(usb3_ep, usb3_req, USB3_PN_READ)) - usb3_request_done_pipen(usb3, usb3_ep, usb3_req, 0); + done = true; } + +out: + /* need to unlock because usb3_request_done_pipen() locks it */ + spin_unlock(&usb3->lock); + + if (done) + usb3_request_done_pipen(usb3, usb3_ep, usb3_req, 0); } static void usb3_irq_epc_pipen(struct renesas_usb3 *usb3, int num) { u32 pn_int_sta; - if (usb3_pn_change(usb3, num) < 0) + spin_lock(&usb3->lock); + if (usb3_pn_change(usb3, num) < 0) { + spin_unlock(&usb3->lock); return; + } pn_int_sta = usb3_read(usb3, USB3_PN_INT_STA); pn_int_sta &= usb3_read(usb3, USB3_PN_INT_ENA); usb3_write(usb3, pn_int_sta, USB3_PN_INT_STA); + spin_unlock(&usb3->lock); if (pn_int_sta & PN_INT_LSTTR) usb3_irq_epc_pipen_lsttr(usb3, num); if (pn_int_sta & PN_INT_BFRDY) @@ -1474,10 +1569,22 @@ static void usb3_irq_epc_int_2(struct renesas_usb3 *usb3, u32 int_sta_2) } } +static void usb3_irq_idmon_change(struct renesas_usb3 *usb3) +{ + usb3_check_id(usb3); +} + +static void usb3_irq_otg_int(struct renesas_usb3 *usb3, u32 otg_int_sta) +{ + if (otg_int_sta & USB_OTG_IDMON) + usb3_irq_idmon_change(usb3); +} + static void usb3_irq_epc(struct renesas_usb3 *usb3) { u32 int_sta_1 = usb3_read(usb3, USB3_USB_INT_STA_1); u32 int_sta_2 = usb3_read(usb3, USB3_USB_INT_STA_2); + u32 otg_int_sta = usb3_read(usb3, USB3_USB_OTG_INT_STA); int_sta_1 &= usb3_read(usb3, USB3_USB_INT_ENA_1); if (int_sta_1) { @@ -1488,6 +1595,12 @@ static void usb3_irq_epc(struct renesas_usb3 *usb3) int_sta_2 &= usb3_read(usb3, USB3_USB_INT_ENA_2); if (int_sta_2) usb3_irq_epc_int_2(usb3, int_sta_2); + + otg_int_sta &= usb3_read(usb3, USB3_USB_OTG_INT_ENA); + if (otg_int_sta) { + usb3_write(usb3, otg_int_sta, USB3_USB_OTG_INT_STA); + usb3_irq_otg_int(usb3, otg_int_sta); + } } static irqreturn_t renesas_usb3_irq(int irq, void *_usb3) @@ -1568,6 +1681,7 @@ static int usb3_disable_pipe_n(struct renesas_usb3_ep *usb3_ep) spin_lock_irqsave(&usb3->lock, flags); if (!usb3_pn_change(usb3, usb3_ep->num)) { + usb3_write(usb3, 0, USB3_PN_INT_ENA); usb3_write(usb3, 0, USB3_PN_RAMMAP); usb3_clear_bit(usb3, PN_CON_EN, USB3_PN_CON); } @@ -1707,6 +1821,9 @@ static int renesas_usb3_start(struct usb_gadget *gadget, /* hook up the driver */ usb3->driver = driver; + pm_runtime_enable(usb3_to_dev(usb3)); + pm_runtime_get_sync(usb3_to_dev(usb3)); + renesas_usb3_init_controller(usb3); return 0; @@ -1715,14 +1832,14 @@ static int renesas_usb3_start(struct usb_gadget *gadget, static int renesas_usb3_stop(struct usb_gadget *gadget) { struct renesas_usb3 *usb3 = gadget_to_renesas_usb3(gadget); - unsigned long flags; - spin_lock_irqsave(&usb3->lock, flags); usb3->softconnect = false; usb3->gadget.speed = USB_SPEED_UNKNOWN; usb3->driver = NULL; renesas_usb3_stop_controller(usb3); - spin_unlock_irqrestore(&usb3->lock, flags); + + pm_runtime_put(usb3_to_dev(usb3)); + pm_runtime_disable(usb3_to_dev(usb3)); return 0; } @@ -1756,13 +1873,48 @@ static const struct usb_gadget_ops renesas_usb3_gadget_ops = { .set_selfpowered = renesas_usb3_set_selfpowered, }; +static ssize_t role_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct renesas_usb3 *usb3 = dev_get_drvdata(dev); + bool new_mode_is_host; + + if (!usb3->driver) + return -ENODEV; + + if (!strncmp(buf, "host", strlen("host"))) + new_mode_is_host = true; + else if (!strncmp(buf, "peripheral", strlen("peripheral"))) + new_mode_is_host = false; + else + return -EINVAL; + + if (new_mode_is_host == usb3_is_host(usb3)) + return -EINVAL; + + usb3_mode_config(usb3, new_mode_is_host, usb3_is_a_device(usb3)); + + return count; +} + +static ssize_t role_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct renesas_usb3 *usb3 = dev_get_drvdata(dev); + + if (!usb3->driver) + return -ENODEV; + + return sprintf(buf, "%s\n", usb3_is_host(usb3) ? "host" : "peripheral"); +} +static DEVICE_ATTR_RW(role); + /*------- platform_driver ------------------------------------------------*/ static int renesas_usb3_remove(struct platform_device *pdev) { struct renesas_usb3 *usb3 = platform_get_drvdata(pdev); - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); + device_remove_file(&pdev->dev, &dev_attr_role); usb_del_gadget_udc(&usb3->gadget); @@ -1894,6 +2046,12 @@ static const struct of_device_id usb3_of_match[] = { }; MODULE_DEVICE_TABLE(of, usb3_of_match); +static const unsigned int renesas_usb3_cable[] = { + EXTCON_USB, + EXTCON_USB_HOST, + EXTCON_NONE, +}; + static int renesas_usb3_probe(struct platform_device *pdev) { struct renesas_usb3 *usb3; @@ -1937,6 +2095,17 @@ static int renesas_usb3_probe(struct platform_device *pdev) if (ret < 0) return ret; + INIT_WORK(&usb3->extcon_work, renesas_usb3_extcon_work); + usb3->extcon = devm_extcon_dev_allocate(&pdev->dev, renesas_usb3_cable); + if (IS_ERR(usb3->extcon)) + return PTR_ERR(usb3->extcon); + + ret = devm_extcon_dev_register(&pdev->dev, usb3->extcon); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to register extcon\n"); + return ret; + } + /* for ep0 handling */ usb3->ep0_req = __renesas_usb3_ep_alloc_request(GFP_KERNEL); if (!usb3->ep0_req) @@ -1946,15 +2115,19 @@ static int renesas_usb3_probe(struct platform_device *pdev) if (ret < 0) goto err_add_udc; - usb3->workaround_for_vbus = priv->workaround_for_vbus; + ret = device_create_file(&pdev->dev, &dev_attr_role); + if (ret < 0) + goto err_dev_create; - pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); + usb3->workaround_for_vbus = priv->workaround_for_vbus; dev_info(&pdev->dev, "probed\n"); return 0; +err_dev_create: + usb_del_gadget_udc(&usb3->gadget); + err_add_udc: __renesas_usb3_ep_free_request(usb3->ep0_req); diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 407d947b34ea5b524c467c7c992d9774ded5414a..ababb91d654ab2a7ee3885da9c35ae8465290013 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -30,7 +30,7 @@ if USB_XHCI_HCD config USB_XHCI_PCI tristate - depends on PCI + depends on USB_PCI default y config USB_XHCI_PLATFORM @@ -139,7 +139,7 @@ if USB_EHCI_HCD config USB_EHCI_PCI tristate - depends on PCI + depends on USB_PCI default y config USB_EHCI_HCD_PMC_MSP @@ -188,7 +188,7 @@ config USB_EHCI_HCD_OMAP config USB_EHCI_HCD_ORION tristate "Support for Marvell EBU on-chip EHCI USB controller" - depends on USB_EHCI_HCD && PLAT_ORION + depends on USB_EHCI_HCD && (PLAT_ORION || ARCH_MVEBU) default y ---help--- Enables support for the on-chip EHCI controller on Marvell's @@ -525,7 +525,7 @@ config USB_OHCI_HCD_PPC_OF config USB_OHCI_HCD_PCI tristate "OHCI support for PCI-bus USB controllers" - depends on PCI + depends on USB_PCI default y select USB_OHCI_LITTLE_ENDIAN ---help--- @@ -606,7 +606,7 @@ endif # USB_OHCI_HCD config USB_UHCI_HCD tristate "UHCI HCD (most Intel and VIA) support" - depends on PCI || USB_UHCI_SUPPORT_NON_PCI_HC + depends on USB_PCI || USB_UHCI_SUPPORT_NON_PCI_HC ---help--- The Universal Host Controller Interface is a standard by Intel for accessing the USB hardware in the PC (which is also called the USB @@ -739,7 +739,7 @@ config USB_RENESAS_USBHS_HCD config USB_WHCI_HCD tristate "Wireless USB Host Controller Interface (WHCI) driver" - depends on PCI && USB && UWB + depends on USB_PCI && USB && UWB select USB_WUSB select UWB_WHCI help diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile index 2644537b7bcfb5cc8452243838815d3aa5669409..c77b0a38557b5bfcd6339302fc780ccdf9a14e35 100644 --- a/drivers/usb/host/Makefile +++ b/drivers/usb/host/Makefile @@ -27,9 +27,7 @@ endif obj-$(CONFIG_USB_WHCI_HCD) += whci/ -ifneq ($(CONFIG_USB), ) - obj-$(CONFIG_PCI) += pci-quirks.o -endif +obj-$(CONFIG_USB_PCI) += pci-quirks.o obj-$(CONFIG_USB_EHCI_HCD) += ehci-hcd.o obj-$(CONFIG_USB_EHCI_PCI) += ehci-pci.o diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 1a2614aae42cabd8859f1ab327034d18c5f46c12..cbb9b8e12c3ce7e8f18cd8d0765dfc2dc34bd29b 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -803,7 +803,7 @@ static ssize_t fill_registers_buffer(struct debug_buffer *buf) size -= temp; next += temp; -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI /* EHCI 0.96 and later may have "extended capabilities" */ if (dev_is_pci(hcd->self.controller)) { struct pci_dev *pdev; diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 3733aab46efece4fada516c9a590ed8050a467d1..4a08b70c81aa8f31b7baa1c1ea122416448a6c03 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -96,8 +96,8 @@ static int fsl_ehci_drv_probe(struct platform_device *pdev) } irq = res->start; - hcd = usb_create_hcd(&fsl_ehci_hc_driver, &pdev->dev, - dev_name(&pdev->dev)); + hcd = __usb_create_hcd(&fsl_ehci_hc_driver, pdev->dev.parent, + &pdev->dev, dev_name(&pdev->dev), NULL); if (!hcd) { retval = -ENOMEM; goto err1; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index ac2c4eab478db7f91c8b0f705eeadd22cd818604..6e834b83a1040de0a59a9f4cd30a185f558e3fd2 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -597,7 +597,7 @@ static int ehci_run (struct usb_hcd *hcd) /* * hcc_params controls whether ehci->regs->segment must (!!!) * be used; it constrains QH/ITD/SITD and QTD locations. - * pci_pool consistent memory always uses segment zero. + * dma_pool consistent memory always uses segment zero. * streaming mappings for I/O buffers, like pci_map_single(), * can return segments above 4GB, if the device allows. * diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c index 4de43011df23aa970472eda236edb1e334dad0a3..9b7e639772153b43e8f14a7d18df8094f9339662 100644 --- a/drivers/usb/host/ehci-mem.c +++ b/drivers/usb/host/ehci-mem.c @@ -138,7 +138,7 @@ static void ehci_mem_cleanup (struct ehci_hcd *ehci) ehci->sitd_pool = NULL; if (ehci->periodic) - dma_free_coherent (ehci_to_hcd(ehci)->self.controller, + dma_free_coherent(ehci_to_hcd(ehci)->self.sysdev, ehci->periodic_size * sizeof (u32), ehci->periodic, ehci->periodic_dma); ehci->periodic = NULL; @@ -155,7 +155,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags) /* QTDs for control/bulk/intr transfers */ ehci->qtd_pool = dma_pool_create ("ehci_qtd", - ehci_to_hcd(ehci)->self.controller, + ehci_to_hcd(ehci)->self.sysdev, sizeof (struct ehci_qtd), 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); @@ -165,7 +165,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags) /* QHs for control/bulk/intr transfers */ ehci->qh_pool = dma_pool_create ("ehci_qh", - ehci_to_hcd(ehci)->self.controller, + ehci_to_hcd(ehci)->self.sysdev, sizeof(struct ehci_qh_hw), 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); @@ -179,7 +179,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags) /* ITD for high speed ISO transfers */ ehci->itd_pool = dma_pool_create ("ehci_itd", - ehci_to_hcd(ehci)->self.controller, + ehci_to_hcd(ehci)->self.sysdev, sizeof (struct ehci_itd), 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); @@ -189,7 +189,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags) /* SITD for full/low speed split ISO transfers */ ehci->sitd_pool = dma_pool_create ("ehci_sitd", - ehci_to_hcd(ehci)->self.controller, + ehci_to_hcd(ehci)->self.sysdev, sizeof (struct ehci_sitd), 32 /* byte alignment (for hw parts) */, 4096 /* can't cross 4K */); @@ -199,7 +199,7 @@ static int ehci_mem_init (struct ehci_hcd *ehci, gfp_t flags) /* Hardware periodic table */ ehci->periodic = (__le32 *) - dma_alloc_coherent (ehci_to_hcd(ehci)->self.controller, + dma_alloc_coherent(ehci_to_hcd(ehci)->self.sysdev, ehci->periodic_size * sizeof(__le32), &ehci->periodic_dma, flags); if (ehci->periodic == NULL) { diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c index ee8d5faa01947cd6d2c8cf708273c0521762a797..1aec87ec68df6768090581ccac4c836dbee651dd 100644 --- a/drivers/usb/host/ehci-orion.c +++ b/drivers/usb/host/ehci-orion.c @@ -47,6 +47,18 @@ #define USB_PHY_IVREF_CTRL 0x440 #define USB_PHY_TST_GRP_CTRL 0x450 +#define USB_SBUSCFG 0x90 + +/* BAWR = BARD = 3 : Align read/write bursts packets larger than 128 bytes */ +#define USB_SBUSCFG_BAWR_ALIGN_128B (0x3 << 6) +#define USB_SBUSCFG_BARD_ALIGN_128B (0x3 << 3) +/* AHBBRST = 3 : Align AHB Burst to INCR16 (64 bytes) */ +#define USB_SBUSCFG_AHBBRST_INCR16 (0x3 << 0) + +#define USB_SBUSCFG_DEF_VAL (USB_SBUSCFG_BAWR_ALIGN_128B \ + | USB_SBUSCFG_BARD_ALIGN_128B \ + | USB_SBUSCFG_AHBBRST_INCR16) + #define DRIVER_DESC "EHCI orion driver" #define hcd_to_orion_priv(h) ((struct orion_ehci_hcd *)hcd_to_ehci(h)->priv) @@ -151,8 +163,31 @@ ehci_orion_conf_mbus_windows(struct usb_hcd *hcd, } } +static int ehci_orion_drv_reset(struct usb_hcd *hcd) +{ + struct device *dev = hcd->self.controller; + int ret; + + ret = ehci_setup(hcd); + if (ret) + return ret; + + /* + * For SoC without hlock, need to program sbuscfg value to guarantee + * AHB master's burst would not overrun or underrun FIFO. + * + * sbuscfg reg has to be set after usb controller reset, otherwise + * the value would be override to 0. + */ + if (of_device_is_compatible(dev->of_node, "marvell,armada-3700-ehci")) + wrl(USB_SBUSCFG, USB_SBUSCFG_DEF_VAL); + + return ret; +} + static const struct ehci_driver_overrides orion_overrides __initconst = { .extra_priv_size = sizeof(struct orion_ehci_hcd), + .reset = ehci_orion_drv_reset, }; static int ehci_orion_drv_probe(struct platform_device *pdev) @@ -310,6 +345,7 @@ static int ehci_orion_drv_remove(struct platform_device *pdev) static const struct of_device_id ehci_orion_dt_ids[] = { { .compatible = "marvell,orion-ehci", }, + { .compatible = "marvell,armada-3700-ehci", }, {}, }; MODULE_DEVICE_TABLE(of, ehci_orion_dt_ids); diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c index a268d9e8d6cfb17b214278e23c44267cf8c06e33..f1908ea9fbd863f9d693d6a852e154e72087770c 100644 --- a/drivers/usb/host/ehci-platform.c +++ b/drivers/usb/host/ehci-platform.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "ehci.h" @@ -220,6 +221,9 @@ static int ehci_platform_probe(struct platform_device *dev) if (IS_ERR(priv->phys[phy_num])) { err = PTR_ERR(priv->phys[phy_num]); goto err_put_hcd; + } else if (!hcd->phy) { + /* Avoiding phy_get() in usb_add_hcd() */ + hcd->phy = priv->phys[phy_num]; } } @@ -297,6 +301,7 @@ static int ehci_platform_probe(struct platform_device *dev) goto err_power; device_wakeup_enable(hcd->self.controller); + device_enable_async_suspend(hcd->self.controller); platform_set_drvdata(dev, hcd); return err; @@ -370,6 +375,7 @@ static int ehci_platform_resume(struct device *dev) struct usb_ehci_pdata *pdata = dev_get_platdata(dev); struct platform_device *pdev = to_platform_device(dev); struct ehci_platform_priv *priv = hcd_to_ehci_priv(hcd); + struct device *companion_dev; if (pdata->power_on) { int err = pdata->power_on(pdev); @@ -377,6 +383,12 @@ static int ehci_platform_resume(struct device *dev) return err; } + companion_dev = usb_of_get_companion_dev(hcd->self.controller); + if (companion_dev) { + device_pm_wait_for_dev(hcd->self.controller, companion_dev); + put_device(companion_dev); + } + ehci_resume(hcd, priv->reset_on_resume); return 0; } diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index 1c5b34b74860f1c3aed374f2cfcd0668b1124bcf..ced08dc229ad02ef394816b657e7d5343b1d2e84 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -5047,7 +5047,7 @@ static int fotg210_run(struct usb_hcd *hcd) /* * hcc_params controls whether fotg210->regs->segment must (!!!) * be used; it constrains QH/ITD/SITD and QTD locations. - * pci_pool consistent memory always uses segment zero. + * dma_pool consistent memory always uses segment zero. * streaming mappings for I/O buffers, like pci_map_single(), * can return segments above 4GB, if the device allows. * diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index b6daf2e6998936021e590a444633dbe28e100878..44924824fa414a28dfbf10a715b658e6b493a462 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -231,7 +231,8 @@ static int ohci_urb_enqueue ( /* Start up the I/O watchdog timer, if it's not running */ if (!timer_pending(&ohci->io_watchdog) && - list_empty(&ohci->eds_in_use)) { + list_empty(&ohci->eds_in_use) && + !(ohci->flags & OHCI_QUIRK_QEMU)) { ohci->prev_frame_no = ohci_frame_no(ohci); mod_timer(&ohci->io_watchdog, jiffies + IO_WATCHDOG_DELAY); @@ -994,7 +995,7 @@ static void ohci_stop (struct usb_hcd *hcd) /*-------------------------------------------------------------------------*/ -#if defined(CONFIG_PM) || defined(CONFIG_PCI) +#if defined(CONFIG_PM) || defined(CONFIG_USB_PCI) /* must not be called from interrupt context */ int ohci_restart(struct ohci_hcd *ohci) diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index bb1509675727b374586d61917920578cc7631a45..a84aebe9b0a97880ef1f83a17b483e7586f190b1 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -164,6 +164,15 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) return 0; } +static int ohci_quirk_qemu(struct usb_hcd *hcd) +{ + struct ohci_hcd *ohci = hcd_to_ohci(hcd); + + ohci->flags |= OHCI_QUIRK_QEMU; + ohci_dbg(ohci, "enabled qemu quirk\n"); + return 0; +} + /* List of quirks for OHCI */ static const struct pci_device_id ohci_pci_quirks[] = { { @@ -214,6 +223,13 @@ static const struct pci_device_id ohci_pci_quirks[] = { PCI_DEVICE(PCI_VENDOR_ID_ATI, 0x4399), .driver_data = (unsigned long)ohci_quirk_amd700, }, + { + .vendor = PCI_VENDOR_ID_APPLE, + .device = 0x003f, + .subvendor = PCI_SUBVENDOR_ID_REDHAT_QUMRANET, + .subdevice = PCI_SUBDEVICE_ID_QEMU, + .driver_data = (unsigned long)ohci_quirk_qemu, + }, /* FIXME for some of the early AMD 760 southbridges, OHCI * won't work at all. blacklist them. diff --git a/drivers/usb/host/ohci-platform.c b/drivers/usb/host/ohci-platform.c index 898b74086c1292d1c63110795557155962adc1ba..6368fce43197c9bdfe80d5fd5718eedbea50e9fb 100644 --- a/drivers/usb/host/ohci-platform.c +++ b/drivers/usb/host/ohci-platform.c @@ -183,6 +183,9 @@ static int ohci_platform_probe(struct platform_device *dev) if (IS_ERR(priv->phys[phy_num])) { err = PTR_ERR(priv->phys[phy_num]); goto err_put_hcd; + } else if (!hcd->phy) { + /* Avoiding phy_get() in usb_add_hcd() */ + hcd->phy = priv->phys[phy_num]; } } diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index 37f1725e7a469c45fa035a46ed9827a5b92bc6c7..12742d002d2dc421c61dc25352ff687247b1b3cf 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -418,6 +418,7 @@ struct ohci_hcd { #define OHCI_QUIRK_AMD_PLL 0x200 /* AMD PLL quirk*/ #define OHCI_QUIRK_AMD_PREFETCH 0x400 /* pre-fetch for ISO transfer */ #define OHCI_QUIRK_GLOBAL_SUSPEND 0x800 /* must suspend ports */ +#define OHCI_QUIRK_QEMU 0x1000 /* relax timing expectations */ // there are also chip quirks/bugs in init logic @@ -438,7 +439,7 @@ struct ohci_hcd { }; -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI static inline int quirk_nec(struct ohci_hcd *ohci) { return ohci->flags & OHCI_QUIRK_NEC; diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c index bcf531c44c70cf220c9ab268cdbed1b13144c91e..ed20fb34c897f5ce0fe4f004f1dd35ebb9c35e36 100644 --- a/drivers/usb/host/oxu210hp-hcd.c +++ b/drivers/usb/host/oxu210hp-hcd.c @@ -2708,7 +2708,7 @@ static int oxu_run(struct usb_hcd *hcd) /* hcc_params controls whether oxu->regs->segment must (!!!) * be used; it constrains QH/ITD/SITD and QTD locations. - * pci_pool consistent memory always uses segment zero. + * dma_pool consistent memory always uses segment zero. * streaming mappings for I/O buffers, like pci_map_single(), * can return segments above 4GB, if the device allows. * diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index c622ddf21c94e61f6c8089cdc493088f33973599..0222195bd5b0e2a954ec8d2e841720b577c7282a 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -1,7 +1,7 @@ #ifndef __LINUX_USB_PCI_QUIRKS_H #define __LINUX_USB_PCI_QUIRKS_H -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI void uhci_reset_hc(struct pci_dev *pdev, unsigned long base); int uhci_check_and_reset_hc(struct pci_dev *pdev, unsigned long base); int usb_amd_find_chipset_info(void); @@ -21,6 +21,6 @@ static inline void usb_amd_quirk_pll_enable(void) {} static inline void usb_amd_dev_put(void) {} static inline void usb_disable_xhci_ports(struct pci_dev *xhci_pdev) {} static inline void sb800_prefetch(struct device *dev, int on) {} -#endif /* CONFIG_PCI */ +#endif /* CONFIG_USB_PCI */ #endif /* __LINUX_USB_PCI_QUIRKS_H */ diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index bfa7fa3d2eea0143b89298b3c722f9627af98fce..7bf78be1fd32503571df68999197008c85b64879 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -1269,7 +1269,7 @@ static void set_td_timer(struct r8a66597 *r8a66597, struct r8a66597_td *td) time = 30; break; default: - time = 300; + time = 50; break; } @@ -1785,6 +1785,7 @@ static void r8a66597_td_timer(unsigned long _r8a66597) pipe = td->pipe; pipe_stop(r8a66597, pipe); + /* Select a different address or endpoint */ new_td = td; do { list_move_tail(&new_td->queue, @@ -1794,7 +1795,8 @@ static void r8a66597_td_timer(unsigned long _r8a66597) new_td = td; break; } - } while (td != new_td && td->address == new_td->address); + } while (td != new_td && td->address == new_td->address && + td->pipe->info.epnum == new_td->pipe->info.epnum); start_transfer(r8a66597, new_td); diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 683098afa93ea150ad9888875b6e11ad2d5d33be..94b150196d4f5d575ba5775aae1a29f112df0f55 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -837,7 +837,7 @@ static int uhci_count_ports(struct usb_hcd *hcd) static const char hcd_name[] = "uhci_hcd"; -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI #include "uhci-pci.c" #define PCI_DRIVER uhci_pci_driver #endif diff --git a/drivers/usb/host/uhci-hcd.h b/drivers/usb/host/uhci-hcd.h index 6f986d82472d7daf2fad8ff14bac8b201d3391fb..7fa318a3091d6556a1e9bb46fa3eda0f4ba39625 100644 --- a/drivers/usb/host/uhci-hcd.h +++ b/drivers/usb/host/uhci-hcd.h @@ -530,7 +530,7 @@ static inline void uhci_writeb(const struct uhci_hcd *uhci, u8 val, int reg) #else /* Support non-PCI host controllers */ -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI /* Support PCI and non-PCI host controllers */ #define uhci_has_pci_registers(u) ((u)->io_addr != 0) #else diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c index 2b4a00fa735dfef5c51b4a9f583e5783e87ad957..2c83b37ae8f26168c1f09b5471285241c12f41f4 100644 --- a/drivers/usb/host/xhci-dbg.c +++ b/drivers/usb/host/xhci-dbg.c @@ -254,157 +254,6 @@ void xhci_print_registers(struct xhci_hcd *xhci) xhci_print_ports(xhci); } -void xhci_print_trb_offsets(struct xhci_hcd *xhci, union xhci_trb *trb) -{ - int i; - for (i = 0; i < 4; i++) - xhci_dbg(xhci, "Offset 0x%x = 0x%x\n", - i*4, trb->generic.field[i]); -} - -/** - * Debug a transfer request block (TRB). - */ -void xhci_debug_trb(struct xhci_hcd *xhci, union xhci_trb *trb) -{ - u64 address; - u32 type = le32_to_cpu(trb->link.control) & TRB_TYPE_BITMASK; - - switch (type) { - case TRB_TYPE(TRB_LINK): - xhci_dbg(xhci, "Link TRB:\n"); - xhci_print_trb_offsets(xhci, trb); - - address = le64_to_cpu(trb->link.segment_ptr); - xhci_dbg(xhci, "Next ring segment DMA address = 0x%llx\n", address); - - xhci_dbg(xhci, "Interrupter target = 0x%x\n", - GET_INTR_TARGET(le32_to_cpu(trb->link.intr_target))); - xhci_dbg(xhci, "Cycle bit = %u\n", - le32_to_cpu(trb->link.control) & TRB_CYCLE); - xhci_dbg(xhci, "Toggle cycle bit = %u\n", - le32_to_cpu(trb->link.control) & LINK_TOGGLE); - xhci_dbg(xhci, "No Snoop bit = %u\n", - le32_to_cpu(trb->link.control) & TRB_NO_SNOOP); - break; - case TRB_TYPE(TRB_TRANSFER): - address = le64_to_cpu(trb->trans_event.buffer); - /* - * FIXME: look at flags to figure out if it's an address or if - * the data is directly in the buffer field. - */ - xhci_dbg(xhci, "DMA address or buffer contents= %llu\n", address); - break; - case TRB_TYPE(TRB_COMPLETION): - address = le64_to_cpu(trb->event_cmd.cmd_trb); - xhci_dbg(xhci, "Command TRB pointer = %llu\n", address); - xhci_dbg(xhci, "Completion status = %u\n", - GET_COMP_CODE(le32_to_cpu(trb->event_cmd.status))); - xhci_dbg(xhci, "Flags = 0x%x\n", - le32_to_cpu(trb->event_cmd.flags)); - break; - default: - xhci_dbg(xhci, "Unknown TRB with TRB type ID %u\n", - (unsigned int) type>>10); - xhci_print_trb_offsets(xhci, trb); - break; - } -} - -/** - * Debug a segment with an xHCI ring. - * - * @return The Link TRB of the segment, or NULL if there is no Link TRB - * (which is a bug, since all segments must have a Link TRB). - * - * Prints out all TRBs in the segment, even those after the Link TRB. - * - * XXX: should we print out TRBs that the HC owns? As long as we don't - * write, that should be fine... We shouldn't expect that the memory pointed to - * by the TRB is valid at all. Do we care about ones the HC owns? Probably, - * for HC debugging. - */ -void xhci_debug_segment(struct xhci_hcd *xhci, struct xhci_segment *seg) -{ - int i; - u64 addr = seg->dma; - union xhci_trb *trb = seg->trbs; - - for (i = 0; i < TRBS_PER_SEGMENT; i++) { - trb = &seg->trbs[i]; - xhci_dbg(xhci, "@%016llx %08x %08x %08x %08x\n", addr, - lower_32_bits(le64_to_cpu(trb->link.segment_ptr)), - upper_32_bits(le64_to_cpu(trb->link.segment_ptr)), - le32_to_cpu(trb->link.intr_target), - le32_to_cpu(trb->link.control)); - addr += sizeof(*trb); - } -} - -void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring) -{ - xhci_dbg(xhci, "Ring deq = %p (virt), 0x%llx (dma)\n", - ring->dequeue, - (unsigned long long)xhci_trb_virt_to_dma(ring->deq_seg, - ring->dequeue)); - xhci_dbg(xhci, "Ring deq updated %u times\n", - ring->deq_updates); - xhci_dbg(xhci, "Ring enq = %p (virt), 0x%llx (dma)\n", - ring->enqueue, - (unsigned long long)xhci_trb_virt_to_dma(ring->enq_seg, - ring->enqueue)); - xhci_dbg(xhci, "Ring enq updated %u times\n", - ring->enq_updates); -} - -/** - * Debugging for an xHCI ring, which is a queue broken into multiple segments. - * - * Print out each segment in the ring. Check that the DMA address in - * each link segment actually matches the segment's stored DMA address. - * Check that the link end bit is only set at the end of the ring. - * Check that the dequeue and enqueue pointers point to real data in this ring - * (not some other ring). - */ -void xhci_debug_ring(struct xhci_hcd *xhci, struct xhci_ring *ring) -{ - /* FIXME: Throw an error if any segment doesn't have a Link TRB */ - struct xhci_segment *seg; - struct xhci_segment *first_seg = ring->first_seg; - xhci_debug_segment(xhci, first_seg); - - if (!ring->enq_updates && !ring->deq_updates) { - xhci_dbg(xhci, " Ring has not been updated\n"); - return; - } - for (seg = first_seg->next; seg != first_seg; seg = seg->next) - xhci_debug_segment(xhci, seg); -} - -void xhci_dbg_ep_rings(struct xhci_hcd *xhci, - unsigned int slot_id, unsigned int ep_index, - struct xhci_virt_ep *ep) -{ - int i; - struct xhci_ring *ring; - - if (ep->ep_state & EP_HAS_STREAMS) { - for (i = 1; i < ep->stream_info->num_streams; i++) { - ring = ep->stream_info->stream_rings[i]; - xhci_dbg(xhci, "Dev %d endpoint %d stream ID %d:\n", - slot_id, ep_index, i); - xhci_debug_segment(xhci, ring->deq_seg); - } - } else { - ring = ep->ring; - if (!ring) - return; - xhci_dbg(xhci, "Dev %d endpoint ring %d:\n", - slot_id, ep_index); - xhci_debug_segment(xhci, ring->deq_seg); - } -} - void xhci_dbg_erst(struct xhci_hcd *xhci, struct xhci_erst *erst) { u64 addr = erst->erst_dma_addr; @@ -434,166 +283,13 @@ void xhci_dbg_cmd_ptrs(struct xhci_hcd *xhci) upper_32_bits(val)); } -/* Print the last 32 bytes for 64-byte contexts */ -static void dbg_rsvd64(struct xhci_hcd *xhci, u64 *ctx, dma_addr_t dma) -{ - int i; - for (i = 0; i < 4; i++) { - xhci_dbg(xhci, "@%p (virt) @%08llx " - "(dma) %#08llx - rsvd64[%d]\n", - &ctx[4 + i], (unsigned long long)dma, - ctx[4 + i], i); - dma += 8; - } -} - char *xhci_get_slot_state(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx) { struct xhci_slot_ctx *slot_ctx = xhci_get_slot_ctx(xhci, ctx); + int state = GET_SLOT_STATE(le32_to_cpu(slot_ctx->dev_state)); - switch (GET_SLOT_STATE(le32_to_cpu(slot_ctx->dev_state))) { - case SLOT_STATE_ENABLED: - return "enabled/disabled"; - case SLOT_STATE_DEFAULT: - return "default"; - case SLOT_STATE_ADDRESSED: - return "addressed"; - case SLOT_STATE_CONFIGURED: - return "configured"; - default: - return "reserved"; - } -} - -static void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx) -{ - /* Fields are 32 bits wide, DMA addresses are in bytes */ - int field_size = 32 / 8; - int i; - - struct xhci_slot_ctx *slot_ctx = xhci_get_slot_ctx(xhci, ctx); - dma_addr_t dma = ctx->dma + - ((unsigned long)slot_ctx - (unsigned long)ctx->bytes); - int csz = HCC_64BYTE_CONTEXT(xhci->hcc_params); - - xhci_dbg(xhci, "Slot Context:\n"); - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - dev_info\n", - &slot_ctx->dev_info, - (unsigned long long)dma, slot_ctx->dev_info); - dma += field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - dev_info2\n", - &slot_ctx->dev_info2, - (unsigned long long)dma, slot_ctx->dev_info2); - dma += field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - tt_info\n", - &slot_ctx->tt_info, - (unsigned long long)dma, slot_ctx->tt_info); - dma += field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - dev_state\n", - &slot_ctx->dev_state, - (unsigned long long)dma, slot_ctx->dev_state); - dma += field_size; - for (i = 0; i < 4; i++) { - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - rsvd[%d]\n", - &slot_ctx->reserved[i], (unsigned long long)dma, - slot_ctx->reserved[i], i); - dma += field_size; - } - - if (csz) - dbg_rsvd64(xhci, (u64 *)slot_ctx, dma); -} - -static void xhci_dbg_ep_ctx(struct xhci_hcd *xhci, - struct xhci_container_ctx *ctx, - unsigned int last_ep) -{ - int i, j; - int last_ep_ctx = 31; - /* Fields are 32 bits wide, DMA addresses are in bytes */ - int field_size = 32 / 8; - int csz = HCC_64BYTE_CONTEXT(xhci->hcc_params); - - if (last_ep < 31) - last_ep_ctx = last_ep + 1; - for (i = 0; i < last_ep_ctx; i++) { - unsigned int epaddr = xhci_get_endpoint_address(i); - struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, ctx, i); - dma_addr_t dma = ctx->dma + - ((unsigned long)ep_ctx - (unsigned long)ctx->bytes); - - xhci_dbg(xhci, "%s Endpoint %02d Context (ep_index %02d):\n", - usb_endpoint_out(epaddr) ? "OUT" : "IN", - epaddr & USB_ENDPOINT_NUMBER_MASK, i); - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - ep_info\n", - &ep_ctx->ep_info, - (unsigned long long)dma, ep_ctx->ep_info); - dma += field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - ep_info2\n", - &ep_ctx->ep_info2, - (unsigned long long)dma, ep_ctx->ep_info2); - dma += field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08llx - deq\n", - &ep_ctx->deq, - (unsigned long long)dma, ep_ctx->deq); - dma += 2*field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - tx_info\n", - &ep_ctx->tx_info, - (unsigned long long)dma, ep_ctx->tx_info); - dma += field_size; - for (j = 0; j < 3; j++) { - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - rsvd[%d]\n", - &ep_ctx->reserved[j], - (unsigned long long)dma, - ep_ctx->reserved[j], j); - dma += field_size; - } - - if (csz) - dbg_rsvd64(xhci, (u64 *)ep_ctx, dma); - } -} - -void xhci_dbg_ctx(struct xhci_hcd *xhci, - struct xhci_container_ctx *ctx, - unsigned int last_ep) -{ - int i; - /* Fields are 32 bits wide, DMA addresses are in bytes */ - int field_size = 32 / 8; - dma_addr_t dma = ctx->dma; - int csz = HCC_64BYTE_CONTEXT(xhci->hcc_params); - - if (ctx->type == XHCI_CTX_TYPE_INPUT) { - struct xhci_input_control_ctx *ctrl_ctx = - xhci_get_input_control_ctx(ctx); - if (!ctrl_ctx) { - xhci_warn(xhci, "Could not get input context, bad type.\n"); - return; - } - - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - drop flags\n", - &ctrl_ctx->drop_flags, (unsigned long long)dma, - ctrl_ctx->drop_flags); - dma += field_size; - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - add flags\n", - &ctrl_ctx->add_flags, (unsigned long long)dma, - ctrl_ctx->add_flags); - dma += field_size; - for (i = 0; i < 6; i++) { - xhci_dbg(xhci, "@%p (virt) @%08llx (dma) %#08x - rsvd2[%d]\n", - &ctrl_ctx->rsvd2[i], (unsigned long long)dma, - ctrl_ctx->rsvd2[i], i); - dma += field_size; - } - - if (csz) - dbg_rsvd64(xhci, (u64 *)ctrl_ctx, dma); - } - - xhci_dbg_slot_ctx(xhci, ctx); - xhci_dbg_ep_ctx(xhci, ctx, last_ep); + return xhci_slot_state_string(state); } void xhci_dbg_trace(struct xhci_hcd *xhci, void (*trace)(struct va_format *), diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 3bddeaa1e2d768feebb99c5e091de947562b799c..0dde49c35dd23d858feb2d537840b568fd1c4643 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -392,10 +392,8 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) trace_xhci_stop_device(virt_dev); cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO); - if (!cmd) { - xhci_dbg(xhci, "Couldn't allocate command structure.\n"); + if (!cmd) return -ENOMEM; - } spin_lock_irqsave(&xhci->lock, flags); for (i = LAST_EP_INDEX; i > 0; i--) { @@ -421,7 +419,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) wait_for_completion(cmd->completion); if (cmd->status == COMP_COMMAND_ABORTED || - cmd->status == COMP_STOPPED) { + cmd->status == COMP_COMMAND_RING_STOPPED) { xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n"); ret = -ETIME; } @@ -540,6 +538,119 @@ static int xhci_get_ports(struct usb_hcd *hcd, __le32 __iomem ***port_array) return max_ports; } +static __le32 __iomem *xhci_get_port_io_addr(struct usb_hcd *hcd, int index) +{ + __le32 __iomem **port_array; + + xhci_get_ports(hcd, &port_array); + return port_array[index]; +} + +/* + * xhci_set_port_power() must be called with xhci->lock held. + * It will release and re-aquire the lock while calling ACPI + * method. + */ +static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, + u16 index, bool on, unsigned long *flags) +{ + __le32 __iomem *addr; + u32 temp; + + addr = xhci_get_port_io_addr(hcd, index); + temp = readl(addr); + temp = xhci_port_state_to_neutral(temp); + if (on) { + /* Power on */ + writel(temp | PORT_POWER, addr); + temp = readl(addr); + xhci_dbg(xhci, "set port power, actual port %d status = 0x%x\n", + index, temp); + } else { + /* Power off */ + writel(temp & ~PORT_POWER, addr); + } + + spin_unlock_irqrestore(&xhci->lock, *flags); + temp = usb_acpi_power_manageable(hcd->self.root_hub, + index); + if (temp) + usb_acpi_set_power_state(hcd->self.root_hub, + index, on); + spin_lock_irqsave(&xhci->lock, *flags); +} + +static void xhci_port_set_test_mode(struct xhci_hcd *xhci, + u16 test_mode, u16 wIndex) +{ + u32 temp; + __le32 __iomem *addr; + + /* xhci only supports test mode for usb2 ports, i.e. xhci->main_hcd */ + addr = xhci_get_port_io_addr(xhci->main_hcd, wIndex); + temp = readl(addr + PORTPMSC); + temp |= test_mode << PORT_TEST_MODE_SHIFT; + writel(temp, addr + PORTPMSC); + xhci->test_mode = test_mode; + if (test_mode == TEST_FORCE_EN) + xhci_start(xhci); +} + +static int xhci_enter_test_mode(struct xhci_hcd *xhci, + u16 test_mode, u16 wIndex, unsigned long *flags) +{ + int i, retval; + + /* Disable all Device Slots */ + xhci_dbg(xhci, "Disable all slots\n"); + for (i = 1; i <= HCS_MAX_SLOTS(xhci->hcs_params1); i++) { + retval = xhci_disable_slot(xhci, NULL, i); + if (retval) + xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", + i, retval); + } + /* Put all ports to the Disable state by clear PP */ + xhci_dbg(xhci, "Disable all port (PP = 0)\n"); + /* Power off USB3 ports*/ + for (i = 0; i < xhci->num_usb3_ports; i++) + xhci_set_port_power(xhci, xhci->shared_hcd, i, false, flags); + /* Power off USB2 ports*/ + for (i = 0; i < xhci->num_usb2_ports; i++) + xhci_set_port_power(xhci, xhci->main_hcd, i, false, flags); + /* Stop the controller */ + xhci_dbg(xhci, "Stop controller\n"); + retval = xhci_halt(xhci); + if (retval) + return retval; + /* Disable runtime PM for test mode */ + pm_runtime_forbid(xhci_to_hcd(xhci)->self.controller); + /* Set PORTPMSC.PTC field to enter selected test mode */ + /* Port is selected by wIndex. port_id = wIndex + 1 */ + xhci_dbg(xhci, "Enter Test Mode: %d, Port_id=%d\n", + test_mode, wIndex + 1); + xhci_port_set_test_mode(xhci, test_mode, wIndex); + return retval; +} + +static int xhci_exit_test_mode(struct xhci_hcd *xhci) +{ + int retval; + + if (!xhci->test_mode) { + xhci_err(xhci, "Not in test mode, do nothing.\n"); + return 0; + } + if (xhci->test_mode == TEST_FORCE_EN && + !(xhci->xhc_state & XHCI_STATE_HALTED)) { + retval = xhci_halt(xhci); + if (retval) + return retval; + } + pm_runtime_allow(xhci_to_hcd(xhci)->self.controller); + xhci->test_mode = 0; + return xhci_reset(xhci); +} + void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array, int port_id, u32 link_state) { @@ -895,6 +1006,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 link_state = 0; u16 wake_mask = 0; u16 timeout = 0; + u16 test_mode = 0; max_ports = xhci_get_ports(hcd, &port_array); bus_state = &xhci->bus_state[hcd_index(hcd)]; @@ -935,7 +1047,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; wIndex--; temp = readl(port_array[wIndex]); - if (temp == 0xffffffff) { + if (temp == ~(u32)0) { + xhci_hc_died(xhci); retval = -ENODEV; break; } @@ -968,6 +1081,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, link_state = (wIndex & 0xff00) >> 3; if (wValue == USB_PORT_FEAT_REMOTE_WAKE_MASK) wake_mask = wIndex & 0xff00; + if (wValue == USB_PORT_FEAT_TEST) + test_mode = (wIndex & 0xff00) >> 8; /* The MSB of wIndex is the U1/U2 timeout */ timeout = (wIndex & 0xff00) >> 8; wIndex &= 0xff; @@ -975,7 +1090,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; wIndex--; temp = readl(port_array[wIndex]); - if (temp == 0xffffffff) { + if (temp == ~(u32)0) { + xhci_hc_died(xhci); retval = -ENODEV; break; } @@ -1092,18 +1208,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, * However, hub_wq will ignore the roothub events until * the roothub is registered. */ - writel(temp | PORT_POWER, port_array[wIndex]); - - temp = readl(port_array[wIndex]); - xhci_dbg(xhci, "set port power, actual port %d status = 0x%x\n", wIndex, temp); - - spin_unlock_irqrestore(&xhci->lock, flags); - temp = usb_acpi_power_manageable(hcd->self.root_hub, - wIndex); - if (temp) - usb_acpi_set_power_state(hcd->self.root_hub, - wIndex, true); - spin_lock_irqsave(&xhci->lock, flags); + xhci_set_port_power(xhci, hcd, wIndex, true, &flags); break; case USB_PORT_FEAT_RESET: temp = (temp | PORT_RESET); @@ -1142,6 +1247,15 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, temp |= PORT_U2_TIMEOUT(timeout); writel(temp, port_array[wIndex] + PORTPMSC); break; + case USB_PORT_FEAT_TEST: + /* 4.19.6 Port Test Modes (USB2 Test Mode) */ + if (hcd->speed != HCD_USB2) + goto error; + if (test_mode > TEST_FORCE_EN || test_mode < TEST_J) + goto error; + retval = xhci_enter_test_mode(xhci, test_mode, wIndex, + &flags); + break; default: goto error; } @@ -1153,7 +1267,8 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, goto error; wIndex--; temp = readl(port_array[wIndex]); - if (temp == 0xffffffff) { + if (temp == ~(u32)0) { + xhci_hc_died(xhci); retval = -ENODEV; break; } @@ -1207,15 +1322,10 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, port_array[wIndex], temp); break; case USB_PORT_FEAT_POWER: - writel(temp & ~PORT_POWER, port_array[wIndex]); - - spin_unlock_irqrestore(&xhci->lock, flags); - temp = usb_acpi_power_manageable(hcd->self.root_hub, - wIndex); - if (temp) - usb_acpi_set_power_state(hcd->self.root_hub, - wIndex, false); - spin_lock_irqsave(&xhci->lock, flags); + xhci_set_port_power(xhci, hcd, wIndex, false, &flags); + break; + case USB_PORT_FEAT_TEST: + retval = xhci_exit_test_mode(xhci); break; default: goto error; @@ -1269,7 +1379,8 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) /* For each port, did anything change? If so, set that bit in buf. */ for (i = 0; i < max_ports; i++) { temp = readl(port_array[i]); - if (temp == 0xffffffff) { + if (temp == ~(u32)0) { + xhci_hc_died(xhci); retval = -ENODEV; break; } diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index ba1853f4e407a9558af25e6573ced5a7ec6d01f6..fddf2731f798ea3a05a5b18bae7749c064eb4af7 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -56,7 +56,7 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, } if (max_packet) { - seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA); + seg->bounce_buf = kzalloc(max_packet, flags); if (!seg->bounce_buf) { dma_pool_free(xhci->segment_pool, seg->trbs, dma); kfree(seg); @@ -288,6 +288,8 @@ void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring) if (!ring) return; + trace_xhci_ring_free(ring); + if (ring->first_seg) { if (ring->type == TYPE_STREAM) xhci_remove_stream_mapping(ring); @@ -313,9 +315,6 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring, * handling ring expansion, set the cycle state equal to the old ring. */ ring->cycle_state = cycle_state; - /* Not necessary for new rings, but needed for re-initialized rings */ - ring->enq_updates = 0; - ring->deq_updates = 0; /* * Each segment has a link TRB, and leave an extra TRB for SW @@ -400,6 +399,7 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci, cpu_to_le32(LINK_TOGGLE); } xhci_initialize_ring_info(ring, cycle_state); + trace_xhci_ring_alloc(ring); return ring; fail: @@ -504,6 +504,7 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring, } xhci_link_rings(xhci, ring, first, last, num_segs); + trace_xhci_ring_expansion(ring); xhci_dbg_trace(xhci, trace_xhci_dbg_ring_expansion, "ring expansion succeed, now has %d segments", ring->num_segs); @@ -586,7 +587,7 @@ static void xhci_free_stream_ctx(struct xhci_hcd *xhci, unsigned int num_stream_ctxs, struct xhci_stream_ctx *stream_ctx, dma_addr_t dma) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; size_t size = sizeof(struct xhci_stream_ctx) * num_stream_ctxs; if (size > MEDIUM_STREAM_ARRAY_SIZE) @@ -614,7 +615,7 @@ static struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci, unsigned int num_stream_ctxs, dma_addr_t *dma, gfp_t mem_flags) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; size_t size = sizeof(struct xhci_stream_ctx) * num_stream_ctxs; if (size > MEDIUM_STREAM_ARRAY_SIZE) @@ -1502,6 +1503,17 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, */ max_esit_payload = xhci_get_max_esit_payload(udev, ep); interval = xhci_get_endpoint_interval(udev, ep); + + /* Periodic endpoint bInterval limit quirk */ + if (usb_endpoint_xfer_int(&ep->desc) || + usb_endpoint_xfer_isoc(&ep->desc)) { + if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) && + udev->speed >= USB_SPEED_HIGH && + interval >= 7) { + interval = 6; + } + } + mult = xhci_get_endpoint_mult(udev, ep); max_packet = usb_endpoint_maxp(&ep->desc); max_burst = xhci_get_endpoint_max_burst(udev, ep); @@ -1686,7 +1698,7 @@ void xhci_slot_copy(struct xhci_hcd *xhci, static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags) { int i; - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; int num_sp = HCS_MAX_SCRATCHPAD(xhci->hcs_params2); xhci_dbg_trace(xhci, trace_xhci_dbg_init, @@ -1709,36 +1721,27 @@ static int scratchpad_alloc(struct xhci_hcd *xhci, gfp_t flags) if (!xhci->scratchpad->sp_buffers) goto fail_sp3; - xhci->scratchpad->sp_dma_buffers = - kzalloc(sizeof(dma_addr_t) * num_sp, flags); - - if (!xhci->scratchpad->sp_dma_buffers) - goto fail_sp4; - xhci->dcbaa->dev_context_ptrs[0] = cpu_to_le64(xhci->scratchpad->sp_dma); for (i = 0; i < num_sp; i++) { dma_addr_t dma; - void *buf = dma_alloc_coherent(dev, xhci->page_size, &dma, + void *buf = dma_zalloc_coherent(dev, xhci->page_size, &dma, flags); if (!buf) - goto fail_sp5; + goto fail_sp4; xhci->scratchpad->sp_array[i] = dma; xhci->scratchpad->sp_buffers[i] = buf; - xhci->scratchpad->sp_dma_buffers[i] = dma; } return 0; - fail_sp5: + fail_sp4: for (i = i - 1; i >= 0; i--) { dma_free_coherent(dev, xhci->page_size, xhci->scratchpad->sp_buffers[i], - xhci->scratchpad->sp_dma_buffers[i]); + xhci->scratchpad->sp_array[i]); } - kfree(xhci->scratchpad->sp_dma_buffers); - fail_sp4: kfree(xhci->scratchpad->sp_buffers); fail_sp3: @@ -1758,7 +1761,7 @@ static void scratchpad_free(struct xhci_hcd *xhci) { int num_sp; int i; - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; if (!xhci->scratchpad) return; @@ -1768,9 +1771,8 @@ static void scratchpad_free(struct xhci_hcd *xhci) for (i = 0; i < num_sp; i++) { dma_free_coherent(dev, xhci->page_size, xhci->scratchpad->sp_buffers[i], - xhci->scratchpad->sp_dma_buffers[i]); + xhci->scratchpad->sp_array[i]); } - kfree(xhci->scratchpad->sp_dma_buffers); kfree(xhci->scratchpad->sp_buffers); dma_free_coherent(dev, num_sp * sizeof(u64), xhci->scratchpad->sp_array, @@ -1831,7 +1833,7 @@ void xhci_free_command(struct xhci_hcd *xhci, void xhci_mem_cleanup(struct xhci_hcd *xhci) { - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; int size; int i, j, num_ports; @@ -2117,11 +2119,12 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, { u32 temp, port_offset, port_count; int i; - u8 major_revision; + u8 major_revision, minor_revision; struct xhci_hub *rhub; temp = readl(addr); major_revision = XHCI_EXT_PORT_MAJOR(temp); + minor_revision = XHCI_EXT_PORT_MINOR(temp); if (major_revision == 0x03) { rhub = &xhci->usb3_rhub; @@ -2135,7 +2138,9 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, return; } rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp); - rhub->min_rev = XHCI_EXT_PORT_MINOR(temp); + + if (rhub->min_rev < minor_revision) + rhub->min_rev = minor_revision; /* Port offset and count in the third dword, see section 7.2 */ temp = readl(addr + 2); @@ -2305,10 +2310,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) /* Place limits on the number of roothub ports so that the hub * descriptors aren't longer than the USB core will allocate. */ - if (xhci->num_usb3_ports > 15) { + if (xhci->num_usb3_ports > USB_SS_MAXPORTS) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "Limiting USB 3.0 roothub ports to 15."); - xhci->num_usb3_ports = 15; + "Limiting USB 3.0 roothub ports to %u.", + USB_SS_MAXPORTS); + xhci->num_usb3_ports = USB_SS_MAXPORTS; } if (xhci->num_usb2_ports > USB_MAXCHILDREN) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, @@ -2373,7 +2379,7 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) { dma_addr_t dma; - struct device *dev = xhci_to_hcd(xhci)->self.controller; + struct device *dev = xhci_to_hcd(xhci)->self.sysdev; unsigned int val, val2; u64 val_64; struct xhci_segment *seg; @@ -2419,7 +2425,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) writel(val, &xhci->op_regs->config_reg); /* - * Section 5.4.8 - doorbell array must be + * xHCI section 5.4.6 - doorbell array must be * "physically contiguous and 64-byte (cache line) aligned". */ xhci->dcbaa = dma_alloc_coherent(dev, sizeof(*xhci->dcbaa), &dma, @@ -2480,7 +2486,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) (xhci->cmd_ring->first_seg->dma & (u64) ~CMD_RING_RSVD_BITS) | xhci->cmd_ring->cycle_state; xhci_dbg_trace(xhci, trace_xhci_dbg_init, - "// Setting command ring address to 0x%x", val); + "// Setting command ring address to 0x%016llx", val_64); xhci_write_64(xhci, val_64, &xhci->op_regs->cmd_ring); xhci_dbg_cmd_ptrs(xhci); @@ -2601,7 +2607,6 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) return 0; fail: - xhci_warn(xhci, "Couldn't initialize memory\n"); xhci_halt(xhci); xhci_reset(xhci); xhci_mem_cleanup(xhci); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index fc99f51d12e183603cd692592b6d77351131a1da..1bcf971141c09a69f3cd1674cca282bcc6ec8d46 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -52,6 +52,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 +#define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 static const char hcd_name[] = "xhci_hcd"; @@ -166,7 +167,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -175,7 +177,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) xhci->quirks |= XHCI_MISSING_CAS; if (pdev->vendor == PCI_VENDOR_ID_ETRON && @@ -198,6 +201,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1042) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && + pdev->device == 0x1142) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + + if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7; if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6ed468fa7d5e593ca2e0fdabc6048fb7f2dd9f5d..c04144b25a673a2f2438854ea4183dd7cc0b0fab 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -54,6 +55,16 @@ static int xhci_priv_init_quirk(struct usb_hcd *hcd) return priv->init_quirk(hcd); } +static int xhci_priv_resume_quirk(struct usb_hcd *hcd) +{ + struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); + + if (!priv->resume_quirk) + return 0; + + return priv->resume_quirk(hcd); +} + static void xhci_plat_quirks(struct device *dev, struct xhci_hcd *xhci) { /* @@ -92,18 +103,21 @@ static const struct xhci_plat_priv xhci_plat_renesas_rcar_gen2 = { .firmware_name = XHCI_RCAR_FIRMWARE_NAME_V1, .init_quirk = xhci_rcar_init_quirk, .plat_start = xhci_rcar_start, + .resume_quirk = xhci_rcar_resume_quirk, }; static const struct xhci_plat_priv xhci_plat_renesas_rcar_gen3 = { .firmware_name = XHCI_RCAR_FIRMWARE_NAME_V2, .init_quirk = xhci_rcar_init_quirk, .plat_start = xhci_rcar_start, + .resume_quirk = xhci_rcar_resume_quirk, }; static const struct xhci_plat_priv xhci_plat_renesas_rcar_r8a7796 = { .firmware_name = XHCI_RCAR_FIRMWARE_NAME_V3, .init_quirk = xhci_rcar_init_quirk, .plat_start = xhci_rcar_start, + .resume_quirk = xhci_rcar_resume_quirk, }; static const struct of_device_id usb_xhci_of_match[] = { @@ -148,6 +162,7 @@ static int xhci_plat_probe(struct platform_device *pdev) { const struct of_device_id *match; const struct hc_driver *driver; + struct device *sysdev; struct xhci_hcd *xhci; struct resource *res; struct usb_hcd *hcd; @@ -162,26 +177,49 @@ static int xhci_plat_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENODEV; + return irq; + + /* + * sysdev must point to a device that is known to the system firmware + * or PCI hardware. We handle these three cases here: + * 1. xhci_plat comes from firmware + * 2. xhci_plat is child of a device from firmware (dwc3-plat) + * 3. xhci_plat is grandchild of a pci device (dwc3-pci) + */ + sysdev = &pdev->dev; + if (sysdev->parent && !sysdev->of_node && sysdev->parent->of_node) + sysdev = sysdev->parent; +#ifdef CONFIG_PCI + else if (sysdev->parent && sysdev->parent->parent && + sysdev->parent->parent->bus == &pci_bus_type) + sysdev = sysdev->parent->parent; +#endif /* Try to set 64-bit DMA first */ - if (!pdev->dev.dma_mask) + if (WARN_ON(!sysdev->dma_mask)) /* Platform did not initialize dma_mask */ - ret = dma_coerce_mask_and_coherent(&pdev->dev, + ret = dma_coerce_mask_and_coherent(sysdev, DMA_BIT_MASK(64)); else - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(sysdev, DMA_BIT_MASK(64)); /* If seting 64-bit DMA mask fails, fall back to 32-bit DMA mask */ if (ret) { - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(sysdev, DMA_BIT_MASK(32)); if (ret) return ret; } - hcd = usb_create_hcd(driver, &pdev->dev, dev_name(&pdev->dev)); - if (!hcd) - return -ENOMEM; + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); + + hcd = __usb_create_hcd(driver, sysdev, &pdev->dev, + dev_name(&pdev->dev), NULL); + if (!hcd) { + ret = -ENOMEM; + goto disable_runtime; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); hcd->regs = devm_ioremap_resource(&pdev->dev, res); @@ -222,20 +260,20 @@ static int xhci_plat_probe(struct platform_device *pdev) xhci->clk = clk; xhci->main_hcd = hcd; - xhci->shared_hcd = usb_create_shared_hcd(driver, &pdev->dev, + xhci->shared_hcd = __usb_create_hcd(driver, sysdev, &pdev->dev, dev_name(&pdev->dev), hcd); if (!xhci->shared_hcd) { ret = -ENOMEM; goto disable_clk; } - if (device_property_read_bool(&pdev->dev, "usb3-lpm-capable")) + if (device_property_read_bool(sysdev, "usb3-lpm-capable")) xhci->quirks |= XHCI_LPM_SUPPORT; if (device_property_read_bool(&pdev->dev, "quirk-broken-port-ped")) xhci->quirks |= XHCI_BROKEN_PORT_PED; - hcd->usb_phy = devm_usb_get_phy_by_phandle(&pdev->dev, "usb-phy", 0); + hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0); if (IS_ERR(hcd->usb_phy)) { ret = PTR_ERR(hcd->usb_phy); if (ret == -EPROBE_DEFER) @@ -258,6 +296,15 @@ static int xhci_plat_probe(struct platform_device *pdev) if (ret) goto dealloc_usb2_hcd; + device_enable_async_suspend(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + + /* + * Prevent runtime pm from being on as default, users should enable + * runtime pm using power/control in sysfs. + */ + pm_runtime_forbid(&pdev->dev); + return 0; @@ -277,6 +324,10 @@ static int xhci_plat_probe(struct platform_device *pdev) put_hcd: usb_put_hcd(hcd); +disable_runtime: + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return ret; } @@ -298,14 +349,17 @@ static int xhci_plat_remove(struct platform_device *dev) clk_disable_unprepare(clk); usb_put_hcd(hcd); + pm_runtime_set_suspended(&dev->dev); + pm_runtime_disable(&dev->dev); + return 0; } -#ifdef CONFIG_PM_SLEEP -static int xhci_plat_suspend(struct device *dev) +static int __maybe_unused xhci_plat_suspend(struct device *dev) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); + int ret; /* * xhci_suspend() needs `do_wakeup` to know whether host is allowed @@ -315,24 +369,53 @@ static int xhci_plat_suspend(struct device *dev) * reconsider this when xhci_plat_suspend enlarges its scope, e.g., * also applies to runtime suspend. */ - return xhci_suspend(xhci, device_may_wakeup(dev)); + ret = xhci_suspend(xhci, device_may_wakeup(dev)); + + if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk)) + clk_disable_unprepare(xhci->clk); + + return ret; } -static int xhci_plat_resume(struct device *dev) +static int __maybe_unused xhci_plat_resume(struct device *dev) { struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); + int ret; + + if (!device_may_wakeup(dev) && !IS_ERR(xhci->clk)) + clk_prepare_enable(xhci->clk); + + ret = xhci_priv_resume_quirk(hcd); + if (ret) + return ret; + + return xhci_resume(xhci, 0); +} + +static int __maybe_unused xhci_plat_runtime_suspend(struct device *dev) +{ + struct usb_hcd *hcd = dev_get_drvdata(dev); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + + return xhci_suspend(xhci, true); +} + +static int __maybe_unused xhci_plat_runtime_resume(struct device *dev) +{ + struct usb_hcd *hcd = dev_get_drvdata(dev); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); return xhci_resume(xhci, 0); } static const struct dev_pm_ops xhci_plat_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(xhci_plat_suspend, xhci_plat_resume) + + SET_RUNTIME_PM_OPS(xhci_plat_runtime_suspend, + xhci_plat_runtime_resume, + NULL) }; -#define DEV_PM_OPS (&xhci_plat_pm_ops) -#else -#define DEV_PM_OPS NULL -#endif /* CONFIG_PM */ static const struct acpi_device_id usb_xhci_acpi_match[] = { /* XHCI-compliant USB Controller */ @@ -347,7 +430,7 @@ static struct platform_driver usb_xhci_driver = { .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", - .pm = DEV_PM_OPS, + .pm = &xhci_plat_pm_ops, .of_match_table = of_match_ptr(usb_xhci_of_match), .acpi_match_table = ACPI_PTR(usb_xhci_acpi_match), }, diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 9af0cb48053f27bd07cd3ad1d79b3a6815700fa4..29b227895b0732352765afa042819b4d0ae9c864 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -17,6 +17,7 @@ struct xhci_plat_priv { const char *firmware_name; void (*plat_start)(struct usb_hcd *); int (*init_quirk)(struct usb_hcd *); + int (*resume_quirk)(struct usb_hcd *); }; #define hcd_to_xhci_priv(h) ((struct xhci_plat_priv *)hcd_to_xhci(h)->priv) diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index d28df386e780bd04608ad199535ee326119b932c..07278228214b80066df2ea32a5828fdf1066f34d 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -198,3 +198,14 @@ int xhci_rcar_init_quirk(struct usb_hcd *hcd) return xhci_rcar_download_firmware(hcd); } + +int xhci_rcar_resume_quirk(struct usb_hcd *hcd) +{ + int ret; + + ret = xhci_rcar_download_firmware(hcd); + if (!ret) + xhci_rcar_start(hcd); + + return ret; +} diff --git a/drivers/usb/host/xhci-rcar.h b/drivers/usb/host/xhci-rcar.h index d2ffe20401cf9f372bbcc0be4a7443e0e3b92b60..d247951147a1b9b99772bbd0093667c3f5976634 100644 --- a/drivers/usb/host/xhci-rcar.h +++ b/drivers/usb/host/xhci-rcar.h @@ -18,6 +18,7 @@ #if IS_ENABLED(CONFIG_USB_XHCI_RCAR) void xhci_rcar_start(struct usb_hcd *hcd); int xhci_rcar_init_quirk(struct usb_hcd *hcd); +int xhci_rcar_resume_quirk(struct usb_hcd *hcd); #else static inline void xhci_rcar_start(struct usb_hcd *hcd) { @@ -27,5 +28,10 @@ static inline int xhci_rcar_init_quirk(struct usb_hcd *hcd) { return 0; } + +static inline int xhci_rcar_resume_quirk(struct usb_hcd *hcd) +{ + return 0; +} #endif #endif /* _XHCI_RCAR_H */ diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index a3309aa02993dfa79e52a2b93a87b9efa289d498..03f63f50afb6213d58b498f559021db28f8a4d5a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -167,8 +167,6 @@ static void next_trb(struct xhci_hcd *xhci, */ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring) { - ring->deq_updates++; - /* event ring doesn't have link trbs, check for last trb */ if (ring->type == TYPE_EVENT) { if (!last_trb_on_seg(ring->deq_seg, ring->dequeue)) { @@ -191,6 +189,9 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring) ring->deq_seg = ring->deq_seg->next; ring->dequeue = ring->deq_seg->trbs; } + + trace_xhci_inc_deq(ring); + return; } @@ -223,7 +224,6 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, ring->num_trbs_free--; next = ++(ring->enqueue); - ring->enq_updates++; /* Update the dequeue pointer further if that was a link TRB */ while (trb_is_link(next)) { @@ -259,6 +259,8 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, ring->enqueue = ring->enq_seg->trbs; next = ring->enqueue; } + + trace_xhci_inc_enq(ring); } /* @@ -321,7 +323,7 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if (i_cmd->status != COMP_COMMAND_ABORTED) continue; - i_cmd->status = COMP_STOPPED; + i_cmd->status = COMP_COMMAND_RING_STOPPED; xhci_dbg(xhci, "Turn aborted command %p to no-op\n", i_cmd->command_trb); @@ -359,21 +361,19 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); - /* Section 4.6.1.2 of xHCI 1.0 spec says software should - * time the completion od all xHCI commands, including - * the Command Abort operation. If software doesn't see - * CRR negated in a timely manner (e.g. longer than 5 - * seconds), then it should assume that the there are - * larger problems with the xHC and assert HCRST. + /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the + * completion of the Command Abort operation. If CRR is not negated in 5 + * seconds then driver handles it as if host died (-ENODEV). + * In the future we should distinguish between -ENODEV and -ETIMEDOUT + * and try to recover a -ETIMEDOUT with a host controller reset. */ ret = xhci_handshake(&xhci->op_regs->cmd_ring, CMD_RING_RUNNING, 0, 5 * 1000 * 1000); if (ret < 0) { - xhci_err(xhci, - "Stop command ring failed, maybe the host is dead\n"); - xhci->xhc_state |= XHCI_STATE_DYING; + xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret); xhci_halt(xhci); - return -ESHUTDOWN; + xhci_hc_died(xhci); + return ret; } /* * Writing the CMD_RING_ABORT bit should cause a cmd completion event, @@ -641,8 +641,8 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, xhci_urb_free_priv(urb_priv); usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock(&xhci->lock); - usb_hcd_giveback_urb(hcd, urb, status); trace_xhci_urb_giveback(urb); + usb_hcd_giveback_urb(hcd, urb, status); spin_lock(&xhci->lock); } @@ -689,6 +689,8 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, struct xhci_virt_ep *ep; struct xhci_td *cur_td = NULL; struct xhci_td *last_unlinked_td; + struct xhci_ep_ctx *ep_ctx; + struct xhci_virt_device *vdev; struct xhci_dequeue_state deq_state; @@ -702,6 +704,11 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, memset(&deq_state, 0, sizeof(deq_state)); ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3])); + + vdev = xhci->devs[slot_id]; + ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index); + trace_xhci_handle_cmd_stop_ep(ep_ctx); + ep = &xhci->devs[slot_id]->eps[ep_index]; last_unlinked_td = list_last_entry(&ep->cancelled_td_list, struct xhci_td, cancelled_td_list); @@ -866,6 +873,40 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci, } } +/* + * host controller died, register read returns 0xffffffff + * Complete pending commands, mark them ABORTED. + * URBs need to be given back as usb core might be waiting with device locks + * held for the URBs to finish during device disconnect, blocking host remove. + * + * Call with xhci->lock held. + * lock is relased and re-acquired while giving back urb. + */ +void xhci_hc_died(struct xhci_hcd *xhci) +{ + int i, j; + + if (xhci->xhc_state & XHCI_STATE_DYING) + return; + + xhci_err(xhci, "xHCI host controller not responding, assume dead\n"); + xhci->xhc_state |= XHCI_STATE_DYING; + + xhci_cleanup_command_queue(xhci); + + /* return any pending urbs, remove may be waiting for them */ + for (i = 0; i <= HCS_MAX_SLOTS(xhci->hcs_params1); i++) { + if (!xhci->devs[i]) + continue; + for (j = 0; j < 31; j++) + xhci_kill_endpoint_urbs(xhci, i, j); + } + + /* inform usb core hc died if PCI remove isn't already handling it */ + if (!(xhci->xhc_state & XHCI_STATE_REMOVING)) + usb_hc_died(xhci_to_hcd(xhci)); +} + /* Watchdog timer function for when a stop endpoint command fails to complete. * In this case, we assume the host controller is broken or dying or dead. The * host may still be completing some other events, so we have to be careful to @@ -887,7 +928,6 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) { struct xhci_hcd *xhci; struct xhci_virt_ep *ep; - int ret, i, j; unsigned long flags; ep = (struct xhci_virt_ep *) arg; @@ -904,52 +944,22 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) } xhci_warn(xhci, "xHCI host not responding to stop endpoint command.\n"); - xhci_warn(xhci, "Assuming host is dying, halting host.\n"); - /* Oops, HC is dead or dying or at least not responding to the stop - * endpoint command. - */ - - xhci->xhc_state |= XHCI_STATE_DYING; ep->ep_state &= ~EP_STOP_CMD_PENDING; - /* Disable interrupts from the host controller and start halting it */ - xhci_quiesce(xhci); - spin_unlock_irqrestore(&xhci->lock, flags); + xhci_halt(xhci); - ret = xhci_halt(xhci); + /* + * handle a stop endpoint cmd timeout as if host died (-ENODEV). + * In the future we could distinguish between -ENODEV and -ETIMEDOUT + * and try to recover a -ETIMEDOUT with a host controller reset + */ + xhci_hc_died(xhci); - spin_lock_irqsave(&xhci->lock, flags); - if (ret < 0) { - /* This is bad; the host is not responding to commands and it's - * not allowing itself to be halted. At least interrupts are - * disabled. If we call usb_hc_died(), it will attempt to - * disconnect all device drivers under this host. Those - * disconnect() methods will wait for all URBs to be unlinked, - * so we must complete them. - */ - xhci_warn(xhci, "Non-responsive xHCI host is not halting.\n"); - xhci_warn(xhci, "Completing active URBs anyway.\n"); - /* We could turn all TDs on the rings to no-ops. This won't - * help if the host has cached part of the ring, and is slow if - * we want to preserve the cycle bit. Skip it and hope the host - * doesn't touch the memory. - */ - } - for (i = 0; i < MAX_HC_SLOTS; i++) { - if (!xhci->devs[i]) - continue; - for (j = 0; j < 31; j++) - xhci_kill_endpoint_urbs(xhci, i, j); - } spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Calling usb_hc_died()"); - usb_hc_died(xhci_to_hcd(xhci)); xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "xHCI host controller is dead."); } - static void update_ring_for_set_deq_completion(struct xhci_hcd *xhci, struct xhci_virt_device *dev, struct xhci_ring *ep_ring, @@ -1029,6 +1039,8 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index); slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx); + trace_xhci_handle_cmd_set_deq(slot_ctx); + trace_xhci_handle_cmd_set_deq_ep(ep_ctx); if (cmd_comp_code != COMP_SUCCESS) { unsigned int ep_state; @@ -1099,9 +1111,15 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id, union xhci_trb *trb, u32 cmd_comp_code) { + struct xhci_virt_device *vdev; + struct xhci_ep_ctx *ep_ctx; unsigned int ep_index; ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3])); + vdev = xhci->devs[slot_id]; + ep_ctx = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index); + trace_xhci_handle_cmd_reset_ep(ep_ctx); + /* This command will only fail if the endpoint wasn't halted, * but we don't care. */ @@ -1114,11 +1132,11 @@ static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id, */ if (xhci->quirks & XHCI_RESET_EP_QUIRK) { struct xhci_command *command; + command = xhci_alloc_command(xhci, false, false, GFP_ATOMIC); - if (!command) { - xhci_warn(xhci, "WARN Cannot submit cfg ep: ENOMEM\n"); + if (!command) return; - } + xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "Queueing configure endpoint command"); xhci_queue_configure_endpoint(xhci, command, @@ -1143,10 +1161,15 @@ static void xhci_handle_cmd_enable_slot(struct xhci_hcd *xhci, int slot_id, static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) { struct xhci_virt_device *virt_dev; + struct xhci_slot_ctx *slot_ctx; virt_dev = xhci->devs[slot_id]; if (!virt_dev) return; + + slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->out_ctx); + trace_xhci_handle_cmd_disable_slot(slot_ctx); + if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); @@ -1158,6 +1181,7 @@ static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, { struct xhci_virt_device *virt_dev; struct xhci_input_control_ctx *ctrl_ctx; + struct xhci_ep_ctx *ep_ctx; unsigned int ep_index; unsigned int ep_state; u32 add_flags, drop_flags; @@ -1182,6 +1206,9 @@ static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, /* Input ctx add_flags are the endpoint index plus one */ ep_index = xhci_last_valid_endpoint(add_flags) - 1; + ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->out_ctx, ep_index); + trace_xhci_handle_cmd_config_ep(ep_ctx); + /* A usb_set_interface() call directly after clearing a halted * condition may race on this quirky hardware. Not worth * worrying about, since this is prototype hardware. Not sure @@ -1206,9 +1233,26 @@ static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, return; } +static void xhci_handle_cmd_addr_dev(struct xhci_hcd *xhci, int slot_id) +{ + struct xhci_virt_device *vdev; + struct xhci_slot_ctx *slot_ctx; + + vdev = xhci->devs[slot_id]; + slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx); + trace_xhci_handle_cmd_addr_dev(slot_ctx); +} + static void xhci_handle_cmd_reset_dev(struct xhci_hcd *xhci, int slot_id, struct xhci_event_cmd *event) { + struct xhci_virt_device *vdev; + struct xhci_slot_ctx *slot_ctx; + + vdev = xhci->devs[slot_id]; + slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx); + trace_xhci_handle_cmd_reset_dev(slot_ctx); + xhci_dbg(xhci, "Completed reset device command.\n"); if (!xhci->devs[slot_id]) xhci_warn(xhci, "Reset device command completion " @@ -1250,7 +1294,6 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci) void xhci_handle_command_timeout(struct work_struct *work) { struct xhci_hcd *xhci; - int ret; unsigned long flags; u64 hw_ring_state; @@ -1271,22 +1314,17 @@ void xhci_handle_command_timeout(struct work_struct *work) /* Make sure command ring is running before aborting it */ hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); + if (hw_ring_state == ~(u64)0) { + xhci_hc_died(xhci); + goto time_out_completed; + } + if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && (hw_ring_state & CMD_RING_RUNNING)) { /* Prevent new doorbell, and start command abort */ xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; xhci_dbg(xhci, "Command timeout\n"); - ret = xhci_abort_cmd_ring(xhci, flags); - if (unlikely(ret == -ESHUTDOWN)) { - xhci_err(xhci, "Abort command ring failed\n"); - xhci_cleanup_command_queue(xhci); - spin_unlock_irqrestore(&xhci->lock, flags); - usb_hc_died(xhci_to_hcd(xhci)->primary_hcd); - xhci_dbg(xhci, "xHCI host controller is dead.\n"); - - return; - } - + xhci_abort_cmd_ring(xhci, flags); goto time_out_completed; } @@ -1342,7 +1380,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd_comp_code = GET_COMP_CODE(le32_to_cpu(event->status)); /* If CMD ring stopped we own the trbs between enqueue and dequeue */ - if (cmd_comp_code == COMP_STOPPED) { + if (cmd_comp_code == COMP_COMMAND_RING_STOPPED) { complete_all(&xhci->cmd_ring_stop_completion); return; } @@ -1384,6 +1422,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, case TRB_EVAL_CONTEXT: break; case TRB_ADDR_DEV: + xhci_handle_cmd_addr_dev(xhci, slot_id); break; case TRB_STOP_RING: WARN_ON(slot_id != TRB_TO_SLOT_ID( @@ -1397,8 +1436,8 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, break; case TRB_CMD_NOOP: /* Is this an aborted command turned to NO-OP? */ - if (cmd->status == COMP_STOPPED) - cmd_comp_code = COMP_STOPPED; + if (cmd->status == COMP_COMMAND_RING_STOPPED) + cmd_comp_code = COMP_COMMAND_RING_STOPPED; break; case TRB_RESET_EP: WARN_ON(slot_id != TRB_TO_SLOT_ID( @@ -2243,7 +2282,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags)); xdev = xhci->devs[slot_id]; if (!xdev) { - xhci_err(xhci, "ERROR Transfer event pointed to bad slot\n"); + xhci_err(xhci, "ERROR Transfer event pointed to bad slot %u\n", + slot_id); xhci_err(xhci, "@%016llx %08x %08x %08x %08x\n", (unsigned long long) xhci_trb_virt_to_dma( xhci->event_ring->deq_seg, @@ -2252,8 +2292,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, upper_32_bits(le64_to_cpu(event->buffer)), le32_to_cpu(event->transfer_len), le32_to_cpu(event->flags)); - xhci_dbg(xhci, "Event ring:\n"); - xhci_debug_segment(xhci, xhci->event_ring->deq_seg); return -ENODEV; } @@ -2263,8 +2301,9 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index); if (!ep_ring || GET_EP_CTX_STATE(ep_ctx) == EP_STATE_DISABLED) { - xhci_err(xhci, "ERROR Transfer event for disabled endpoint " - "or incorrect stream ring\n"); + xhci_err(xhci, + "ERROR Transfer event for disabled endpoint slot %u ep %u or incorrect stream ring\n", + slot_id, ep_index); xhci_err(xhci, "@%016llx %08x %08x %08x %08x\n", (unsigned long long) xhci_trb_virt_to_dma( xhci->event_ring->deq_seg, @@ -2273,8 +2312,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, upper_32_bits(le64_to_cpu(event->buffer)), le32_to_cpu(event->transfer_len), le32_to_cpu(event->flags)); - xhci_dbg(xhci, "Event ring:\n"); - xhci_debug_segment(xhci, xhci->event_ring->deq_seg); return -ENODEV; } @@ -2298,45 +2335,62 @@ static int handle_tx_event(struct xhci_hcd *xhci, trb_comp_code = COMP_SHORT_PACKET; else xhci_warn_ratelimited(xhci, - "WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk?\n"); + "WARN Successful completion on short TX for slot %u ep %u: needs XHCI_TRUST_TX_LENGTH quirk?\n", + slot_id, ep_index); case COMP_SHORT_PACKET: break; case COMP_STOPPED: - xhci_dbg(xhci, "Stopped on Transfer TRB\n"); + xhci_dbg(xhci, "Stopped on Transfer TRB for slot %u ep %u\n", + slot_id, ep_index); break; case COMP_STOPPED_LENGTH_INVALID: - xhci_dbg(xhci, "Stopped on No-op or Link TRB\n"); + xhci_dbg(xhci, + "Stopped on No-op or Link TRB for slot %u ep %u\n", + slot_id, ep_index); break; case COMP_STOPPED_SHORT_PACKET: - xhci_dbg(xhci, "Stopped with short packet transfer detected\n"); + xhci_dbg(xhci, + "Stopped with short packet transfer detected for slot %u ep %u\n", + slot_id, ep_index); break; case COMP_STALL_ERROR: - xhci_dbg(xhci, "Stalled endpoint\n"); + xhci_dbg(xhci, "Stalled endpoint for slot %u ep %u\n", slot_id, + ep_index); ep->ep_state |= EP_HALTED; status = -EPIPE; break; case COMP_TRB_ERROR: - xhci_warn(xhci, "WARN: TRB error on endpoint\n"); + xhci_warn(xhci, + "WARN: TRB error for slot %u ep %u on endpoint\n", + slot_id, ep_index); status = -EILSEQ; break; case COMP_SPLIT_TRANSACTION_ERROR: case COMP_USB_TRANSACTION_ERROR: - xhci_dbg(xhci, "Transfer error on endpoint\n"); + xhci_dbg(xhci, "Transfer error for slot %u ep %u on endpoint\n", + slot_id, ep_index); status = -EPROTO; break; case COMP_BABBLE_DETECTED_ERROR: - xhci_dbg(xhci, "Babble error on endpoint\n"); + xhci_dbg(xhci, "Babble error for slot %u ep %u on endpoint\n", + slot_id, ep_index); status = -EOVERFLOW; break; case COMP_DATA_BUFFER_ERROR: - xhci_warn(xhci, "WARN: HC couldn't access mem fast enough\n"); + xhci_warn(xhci, + "WARN: HC couldn't access mem fast enough for slot %u ep %u\n", + slot_id, ep_index); status = -ENOSR; break; case COMP_BANDWIDTH_OVERRUN_ERROR: - xhci_warn(xhci, "WARN: bandwidth overrun event on endpoint\n"); + xhci_warn(xhci, + "WARN: bandwidth overrun event for slot %u ep %u on endpoint\n", + slot_id, ep_index); break; case COMP_ISOCH_BUFFER_OVERRUN: - xhci_warn(xhci, "WARN: buffer overrun event on endpoint\n"); + xhci_warn(xhci, + "WARN: buffer overrun event for slot %u ep %u on endpoint", + slot_id, ep_index); break; case COMP_RING_UNDERRUN: /* @@ -2360,7 +2414,9 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_index); goto cleanup; case COMP_INCOMPATIBLE_DEVICE_ERROR: - xhci_warn(xhci, "WARN: detect an incompatible device"); + xhci_warn(xhci, + "WARN: detect an incompatible device for slot %u ep %u", + slot_id, ep_index); status = -EPROTO; break; case COMP_MISSED_SERVICE_ERROR: @@ -2371,19 +2427,24 @@ static int handle_tx_event(struct xhci_hcd *xhci, * short transfer when process the ep_ring next time. */ ep->skip = true; - xhci_dbg(xhci, "Miss service interval error, set skip flag\n"); + xhci_dbg(xhci, + "Miss service interval error for slot %u ep %u, set skip flag\n", + slot_id, ep_index); goto cleanup; case COMP_NO_PING_RESPONSE_ERROR: ep->skip = true; - xhci_dbg(xhci, "No Ping response error, Skip one Isoc TD\n"); + xhci_dbg(xhci, + "No Ping response error for slot %u ep %u, Skip one Isoc TD\n", + slot_id, ep_index); goto cleanup; default: if (xhci_is_vendor_info_code(xhci, trb_comp_code)) { status = 0; break; } - xhci_warn(xhci, "ERROR Unknown event condition %u, HC probably busted\n", - trb_comp_code); + xhci_warn(xhci, + "ERROR Unknown event condition %u for slot %u ep %u , HC probably busted\n", + trb_comp_code, slot_id, ep_index); goto cleanup; } @@ -2402,15 +2463,11 @@ static int handle_tx_event(struct xhci_hcd *xhci, xhci_warn(xhci, "WARN Event TRB for slot %d ep %d with no TDs queued?\n", TRB_TO_SLOT_ID(le32_to_cpu(event->flags)), ep_index); - xhci_dbg(xhci, "Event TRB with TRB type ID %u\n", - (le32_to_cpu(event->flags) & - TRB_TYPE_BITMASK)>>10); - xhci_print_trb_offsets(xhci, (union xhci_trb *) event); } if (ep->skip) { ep->skip = false; - xhci_dbg(xhci, "td_list is empty while skip " - "flag set. Clear skip flag.\n"); + xhci_dbg(xhci, "td_list is empty while skip flag set. Clear skip flag for slot %u ep %u.\n", + slot_id, ep_index); } goto cleanup; } @@ -2418,8 +2475,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, /* We've skipped all the TDs on the ep ring when ep->skip set */ if (ep->skip && td_num == 0) { ep->skip = false; - xhci_dbg(xhci, "All tds on the ep_ring skipped. " - "Clear skip flag.\n"); + xhci_dbg(xhci, "All tds on the ep_ring skipped. Clear skip flag for slot %u ep %u.\n", + slot_id, ep_index); goto cleanup; } @@ -2478,7 +2535,9 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_ring->last_td_was_short = false; if (ep->skip) { - xhci_dbg(xhci, "Found td. Clear skip flag.\n"); + xhci_dbg(xhci, + "Found td. Clear skip flag for slot %u ep %u.\n", + slot_id, ep_index); ep->skip = false; } @@ -2495,7 +2554,9 @@ static int handle_tx_event(struct xhci_hcd *xhci, * the TD. */ if (trb_is_noop(ep_trb)) { - xhci_dbg(xhci, "ep_trb is a no-op TRB. Skip it\n"); + xhci_dbg(xhci, + "ep_trb is a no-op TRB. Skip it for slot %u ep %u\n", + slot_id, ep_index); goto cleanup; } @@ -2616,14 +2677,16 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) struct xhci_hcd *xhci = hcd_to_xhci(hcd); union xhci_trb *event_ring_deq; irqreturn_t ret = IRQ_NONE; + unsigned long flags; dma_addr_t deq; u64 temp_64; u32 status; - spin_lock(&xhci->lock); + spin_lock_irqsave(&xhci->lock, flags); /* Check if the xHC generated the interrupt, or the irq is shared */ status = readl(&xhci->op_regs->status); - if (status == 0xffffffff) { + if (status == ~(u32)0) { + xhci_hc_died(xhci); ret = IRQ_HANDLED; goto out; } @@ -2645,12 +2708,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) */ status |= STS_EINT; writel(status, &xhci->op_regs->status); - /* FIXME when MSI-X is supported and there are multiple vectors */ - /* Clear the MSI-X event interrupt status */ - if (hcd->irq) { + if (!hcd->msi_enabled) { u32 irq_pending; - /* Acknowledge the PCI interrupt */ irq_pending = readl(&xhci->ir_set->irq_pending); irq_pending |= IMAN_IP; writel(irq_pending, &xhci->ir_set->irq_pending); @@ -2695,7 +2755,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) ret = IRQ_HANDLED; out: - spin_unlock(&xhci->lock); + spin_unlock_irqrestore(&xhci->lock, flags); return ret; } @@ -3942,10 +4002,8 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci, /* This function gets called from contexts where it cannot sleep */ cmd = xhci_alloc_command(xhci, false, false, GFP_ATOMIC); - if (!cmd) { - xhci_warn(xhci, "WARN Cannot submit Set TR Deq Ptr: ENOMEM\n"); + if (!cmd) return; - } ep->queued_deq_seg = deq_state->new_deq_seg; ep->queued_deq_ptr = deq_state->new_deq_ptr; diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 1ac2cdf8eece9542b551a525a338b67d2e8b30c0..8ce96de10e8a1cb0adbb1eb61b7abe3e5b084573 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -231,6 +231,7 @@ DECLARE_EVENT_CLASS(xhci_log_urb, __field(int, epnum) __field(int, dir_in) __field(int, type) + __field(int, slot_id) ), TP_fast_assign( __entry->urb = urb; @@ -245,8 +246,9 @@ DECLARE_EVENT_CLASS(xhci_log_urb, __entry->epnum = usb_endpoint_num(&urb->ep->desc); __entry->dir_in = usb_endpoint_dir_in(&urb->ep->desc); __entry->type = usb_endpoint_type(&urb->ep->desc); + __entry->slot_id = urb->dev->slot_id; ), - TP_printk("ep%d%s-%s: urb %p pipe %u length %d/%d sgs %d/%d stream %d flags %08x", + TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x", __entry->epnum, __entry->dir_in ? "in" : "out", ({ char *s; switch (__entry->type) { @@ -264,8 +266,8 @@ DECLARE_EVENT_CLASS(xhci_log_urb, break; default: s = "UNKNOWN"; - } s; }), __entry->urb, __entry->pipe, __entry->actual, - __entry->length, __entry->num_mapped_sgs, + } s; }), __entry->urb, __entry->pipe, __entry->slot_id, + __entry->actual, __entry->length, __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream, __entry->flags ) ); @@ -285,6 +287,172 @@ DEFINE_EVENT(xhci_log_urb, xhci_urb_dequeue, TP_ARGS(urb) ); +DECLARE_EVENT_CLASS(xhci_log_ep_ctx, + TP_PROTO(struct xhci_ep_ctx *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(u32, info) + __field(u32, info2) + __field(u64, deq) + __field(u32, tx_info) + ), + TP_fast_assign( + __entry->info = le32_to_cpu(ctx->ep_info); + __entry->info2 = le32_to_cpu(ctx->ep_info2); + __entry->deq = le64_to_cpu(ctx->deq); + __entry->tx_info = le32_to_cpu(ctx->tx_info); + ), + TP_printk("%s", xhci_decode_ep_context(__entry->info, + __entry->info2, __entry->deq, __entry->tx_info) + ) +); + +DEFINE_EVENT(xhci_log_ep_ctx, xhci_handle_cmd_stop_ep, + TP_PROTO(struct xhci_ep_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_ep_ctx, xhci_handle_cmd_set_deq_ep, + TP_PROTO(struct xhci_ep_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_ep_ctx, xhci_handle_cmd_reset_ep, + TP_PROTO(struct xhci_ep_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_ep_ctx, xhci_handle_cmd_config_ep, + TP_PROTO(struct xhci_ep_ctx *ctx), + TP_ARGS(ctx) +); + +DECLARE_EVENT_CLASS(xhci_log_slot_ctx, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(u32, info) + __field(u32, info2) + __field(u32, tt_info) + __field(u32, state) + ), + TP_fast_assign( + __entry->info = le32_to_cpu(ctx->dev_info); + __entry->info2 = le32_to_cpu(ctx->dev_info2); + __entry->tt_info = le64_to_cpu(ctx->tt_info); + __entry->state = le32_to_cpu(ctx->dev_state); + ), + TP_printk("%s", xhci_decode_slot_context(__entry->info, + __entry->info2, __entry->tt_info, + __entry->state) + ) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_alloc_dev, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_free_dev, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_handle_cmd_disable_slot, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_discover_or_reset_device, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_setup_device_slot, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_handle_cmd_addr_dev, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_handle_cmd_reset_dev, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DEFINE_EVENT(xhci_log_slot_ctx, xhci_handle_cmd_set_deq, + TP_PROTO(struct xhci_slot_ctx *ctx), + TP_ARGS(ctx) +); + +DECLARE_EVENT_CLASS(xhci_log_ring, + TP_PROTO(struct xhci_ring *ring), + TP_ARGS(ring), + TP_STRUCT__entry( + __field(u32, type) + __field(void *, ring) + __field(dma_addr_t, enq) + __field(dma_addr_t, deq) + __field(dma_addr_t, enq_seg) + __field(dma_addr_t, deq_seg) + __field(unsigned int, num_segs) + __field(unsigned int, stream_id) + __field(unsigned int, cycle_state) + __field(unsigned int, num_trbs_free) + __field(unsigned int, bounce_buf_len) + ), + TP_fast_assign( + __entry->ring = ring; + __entry->type = ring->type; + __entry->num_segs = ring->num_segs; + __entry->stream_id = ring->stream_id; + __entry->enq_seg = ring->enq_seg->dma; + __entry->deq_seg = ring->deq_seg->dma; + __entry->cycle_state = ring->cycle_state; + __entry->num_trbs_free = ring->num_trbs_free; + __entry->bounce_buf_len = ring->bounce_buf_len; + __entry->enq = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); + __entry->deq = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); + ), + TP_printk("%s %p: enq %pad(%pad) deq %pad(%pad) segs %d stream %d free_trbs %d bounce %d cycle %d", + xhci_ring_type_string(__entry->type), __entry->ring, + &__entry->enq, &__entry->enq_seg, + &__entry->deq, &__entry->deq_seg, + __entry->num_segs, + __entry->stream_id, + __entry->num_trbs_free, + __entry->bounce_buf_len, + __entry->cycle_state + ) +); + +DEFINE_EVENT(xhci_log_ring, xhci_ring_alloc, + TP_PROTO(struct xhci_ring *ring), + TP_ARGS(ring) +); + +DEFINE_EVENT(xhci_log_ring, xhci_ring_free, + TP_PROTO(struct xhci_ring *ring), + TP_ARGS(ring) +); + +DEFINE_EVENT(xhci_log_ring, xhci_ring_expansion, + TP_PROTO(struct xhci_ring *ring), + TP_ARGS(ring) +); + +DEFINE_EVENT(xhci_log_ring, xhci_inc_enq, + TP_PROTO(struct xhci_ring *ring), + TP_ARGS(ring) +); + +DEFINE_EVENT(xhci_log_ring, xhci_inc_deq, + TP_PROTO(struct xhci_ring *ring), + TP_ARGS(ring) +); #endif /* __XHCI_TRACE_H */ /* this part must be outside header guard */ diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 953fd8f62df0787b0479286c12513656f141614f..30f47d92a6104ef52ae5e782e1f305fa0a517cad 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -125,7 +125,7 @@ int xhci_halt(struct xhci_hcd *xhci) /* * Set the run bit and wait for the host to be running. */ -static int xhci_start(struct xhci_hcd *xhci) +int xhci_start(struct xhci_hcd *xhci) { u32 temp; int ret; @@ -216,31 +216,21 @@ int xhci_reset(struct xhci_hcd *xhci) return ret; } -#ifdef CONFIG_PCI -static int xhci_free_msi(struct xhci_hcd *xhci) -{ - int i; - - if (!xhci->msix_entries) - return -EINVAL; - - for (i = 0; i < xhci->msix_count; i++) - if (xhci->msix_entries[i].vector) - free_irq(xhci->msix_entries[i].vector, - xhci_to_hcd(xhci)); - return 0; -} +#ifdef CONFIG_USB_PCI /* * Set up MSI */ static int xhci_setup_msi(struct xhci_hcd *xhci) { int ret; + /* + * TODO:Check with MSI Soc for sysdev + */ struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); - ret = pci_enable_msi(pdev); - if (ret) { + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, "failed to allocate MSI entry"); return ret; @@ -251,34 +241,12 @@ static int xhci_setup_msi(struct xhci_hcd *xhci) if (ret) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, "disable MSI interrupt"); - pci_disable_msi(pdev); + pci_free_irq_vectors(pdev); } return ret; } -/* - * Free IRQs - * free all IRQs request - */ -static void xhci_free_irq(struct xhci_hcd *xhci) -{ - struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller); - int ret; - - /* return if using legacy interrupt */ - if (xhci_to_hcd(xhci)->irq > 0) - return; - - ret = xhci_free_msi(xhci); - if (!ret) - return; - if (pdev->irq > 0) - free_irq(pdev->irq, xhci_to_hcd(xhci)); - - return; -} - /* * Set up MSI-X */ @@ -298,28 +266,17 @@ static int xhci_setup_msix(struct xhci_hcd *xhci) xhci->msix_count = min(num_online_cpus() + 1, HCS_MAX_INTRS(xhci->hcs_params1)); - xhci->msix_entries = - kmalloc((sizeof(struct msix_entry))*xhci->msix_count, - GFP_KERNEL); - if (!xhci->msix_entries) - return -ENOMEM; - - for (i = 0; i < xhci->msix_count; i++) { - xhci->msix_entries[i].entry = i; - xhci->msix_entries[i].vector = 0; - } - - ret = pci_enable_msix_exact(pdev, xhci->msix_entries, xhci->msix_count); - if (ret) { + ret = pci_alloc_irq_vectors(pdev, xhci->msix_count, xhci->msix_count, + PCI_IRQ_MSIX); + if (ret < 0) { xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Failed to enable MSI-X"); - goto free_entries; + return ret; } for (i = 0; i < xhci->msix_count; i++) { - ret = request_irq(xhci->msix_entries[i].vector, - xhci_msi_irq, - 0, "xhci_hcd", xhci_to_hcd(xhci)); + ret = request_irq(pci_irq_vector(pdev, i), xhci_msi_irq, 0, + "xhci_hcd", xhci_to_hcd(xhci)); if (ret) goto disable_msix; } @@ -329,11 +286,9 @@ static int xhci_setup_msix(struct xhci_hcd *xhci) disable_msix: xhci_dbg_trace(xhci, trace_xhci_dbg_init, "disable MSI-X interrupt"); - xhci_free_irq(xhci); - pci_disable_msix(pdev); -free_entries: - kfree(xhci->msix_entries); - xhci->msix_entries = NULL; + while (--i >= 0) + free_irq(pci_irq_vector(pdev, i), xhci_to_hcd(xhci)); + pci_free_irq_vectors(pdev); return ret; } @@ -346,27 +301,33 @@ static void xhci_cleanup_msix(struct xhci_hcd *xhci) if (xhci->quirks & XHCI_PLAT) return; - xhci_free_irq(xhci); + /* return if using legacy interrupt */ + if (hcd->irq > 0) + return; + + if (hcd->msix_enabled) { + int i; - if (xhci->msix_entries) { - pci_disable_msix(pdev); - kfree(xhci->msix_entries); - xhci->msix_entries = NULL; + for (i = 0; i < xhci->msix_count; i++) + free_irq(pci_irq_vector(pdev, i), xhci_to_hcd(xhci)); } else { - pci_disable_msi(pdev); + free_irq(pci_irq_vector(pdev, 0), xhci_to_hcd(xhci)); } + pci_free_irq_vectors(pdev); hcd->msix_enabled = 0; - return; } static void __maybe_unused xhci_msix_sync_irqs(struct xhci_hcd *xhci) { - int i; + struct usb_hcd *hcd = xhci_to_hcd(xhci); + + if (hcd->msix_enabled) { + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + int i; - if (xhci->msix_entries) { for (i = 0; i < xhci->msix_count; i++) - synchronize_irq(xhci->msix_entries[i].vector); + synchronize_irq(pci_irq_vector(pdev, i)); } } @@ -398,9 +359,10 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd) /* fall back to msi*/ ret = xhci_setup_msi(xhci); - if (!ret) - /* hcd->irq is 0, we have MSI */ + if (!ret) { + hcd->msi_enabled = 1; return 0; + } if (!pdev->irq) { xhci_err(xhci, "No msi-x/msi found and no IRQ in BIOS\n"); @@ -539,7 +501,7 @@ static int xhci_all_ports_seen_u0(struct xhci_hcd *xhci) * device contexts (?), set up a command ring segment (or two?), create event * ring (one for now). */ -int xhci_init(struct usb_hcd *hcd) +static int xhci_init(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); int retval = 0; @@ -619,16 +581,10 @@ int xhci_run(struct usb_hcd *hcd) if (ret) return ret; - xhci_dbg(xhci, "Command ring memory map follows:\n"); - xhci_debug_ring(xhci, xhci->cmd_ring); - xhci_dbg_ring_ptrs(xhci, xhci->cmd_ring); xhci_dbg_cmd_ptrs(xhci); xhci_dbg(xhci, "ERST memory map follows:\n"); xhci_dbg_erst(xhci, &xhci->erst); - xhci_dbg(xhci, "Event ring:\n"); - xhci_debug_ring(xhci, xhci->event_ring); - xhci_dbg_ring_ptrs(xhci, xhci->event_ring); temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); temp_64 &= ~ERST_PTR_MASK; xhci_dbg_trace(xhci, trace_xhci_dbg_init, @@ -661,9 +617,11 @@ int xhci_run(struct usb_hcd *hcd) if (xhci->quirks & XHCI_NEC_HOST) { struct xhci_command *command; + command = xhci_alloc_command(xhci, false, false, GFP_KERNEL); if (!command) return -ENOMEM; + xhci_queue_vendor_command(xhci, command, 0, 0, 0, TRB_TYPE(TRB_NEC_GET_FW)); } @@ -682,28 +640,28 @@ EXPORT_SYMBOL_GPL(xhci_run); * Disable device contexts, disable IRQs, and quiesce the HC. * Reset the HC, finish any completed transactions, and cleanup memory. */ -void xhci_stop(struct usb_hcd *hcd) +static void xhci_stop(struct usb_hcd *hcd) { u32 temp; struct xhci_hcd *xhci = hcd_to_xhci(hcd); mutex_lock(&xhci->mutex); - if (!(xhci->xhc_state & XHCI_STATE_HALTED)) { - spin_lock_irq(&xhci->lock); - - xhci->xhc_state |= XHCI_STATE_HALTED; - xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; - xhci_halt(xhci); - xhci_reset(xhci); - spin_unlock_irq(&xhci->lock); - } - + /* Only halt host and free memory after both hcds are removed */ if (!usb_hcd_is_primary_hcd(hcd)) { + /* usb core will free this hcd shortly, unset pointer */ + xhci->shared_hcd = NULL; mutex_unlock(&xhci->mutex); return; } + spin_lock_irq(&xhci->lock); + xhci->xhc_state |= XHCI_STATE_HALTED; + xhci->cmd_ring_state = CMD_RING_STATE_STOPPED; + xhci_halt(xhci); + xhci_reset(xhci); + spin_unlock_irq(&xhci->lock); + xhci_cleanup_msix(xhci); /* Deleting Compliance Mode Recovery Timer */ @@ -721,7 +679,7 @@ void xhci_stop(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Disabling event ring interrupts"); temp = readl(&xhci->op_regs->status); - writel(temp & ~STS_EINT, &xhci->op_regs->status); + writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); temp = readl(&xhci->ir_set->irq_pending); writel(ER_IRQ_DISABLE(temp), &xhci->ir_set->irq_pending); xhci_print_ir_set(xhci, 0); @@ -743,12 +701,12 @@ void xhci_stop(struct usb_hcd *hcd) * * This will only ever be called with the main usb_hcd (the USB3 roothub). */ -void xhci_shutdown(struct usb_hcd *hcd) +static void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); if (xhci->quirks & XHCI_SPURIOUS_REBOOT) - usb_disable_xhci_ports(to_pci_dev(hcd->self.controller)); + usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); spin_lock_irq(&xhci->lock); xhci_halt(xhci); @@ -765,7 +723,7 @@ void xhci_shutdown(struct usb_hcd *hcd) /* Yet another workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - pci_set_power_state(to_pci_dev(hcd->self.controller), PCI_D3hot); + pci_set_power_state(to_pci_dev(hcd->self.sysdev), PCI_D3hot); } #ifdef CONFIG_PM @@ -1054,7 +1012,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "// Disabling event ring interrupts\n"); temp = readl(&xhci->op_regs->status); - writel(temp & ~STS_EINT, &xhci->op_regs->status); + writel((temp & ~0x1fff) | STS_EINT, &xhci->op_regs->status); temp = readl(&xhci->ir_set->irq_pending); writel(ER_IRQ_DISABLE(temp), &xhci->ir_set->irq_pending); xhci_print_ir_set(xhci, 0); @@ -1176,7 +1134,7 @@ unsigned int xhci_get_endpoint_address(unsigned int ep_index) * endpoint index to create a bitmask. The slot context is bit 0, endpoint 0 is * bit 1, etc. */ -unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc) +static unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc) { return 1 << (xhci_get_endpoint_index(desc) + 1); } @@ -1185,7 +1143,7 @@ unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc) * endpoint index to create a bitmask. The slot context is bit 0, endpoint 0 is * bit 1, etc. */ -unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index) +static unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index) { return 1 << (ep_index + 1); } @@ -1306,11 +1264,6 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id, ctrl_ctx->add_flags = cpu_to_le32(EP0_FLAG); ctrl_ctx->drop_flags = 0; - xhci_dbg(xhci, "Slot %d input context\n", slot_id); - xhci_dbg_ctx(xhci, command->in_ctx, ep_index); - xhci_dbg(xhci, "Slot %d output context\n", slot_id); - xhci_dbg_ctx(xhci, out_ctx, ep_index); - ret = xhci_configure_endpoint(xhci, urb->dev, command, true, false); @@ -1329,7 +1282,7 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id, * non-error returns are a promise to giveback() the urb later * we drop ownership so next owner (or urb unlink) can get it */ -int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) +static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); unsigned long flags; @@ -1465,7 +1418,7 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) * Note that this function can be called in any context, or so says * usb_hcd_unlink_urb() */ -int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) +static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) { unsigned long flags; int ret, i; @@ -1501,10 +1454,16 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) if (!ep || !ep_ring) goto err_giveback; + /* If xHC is dead take it down and return ALL URBs in xhci_hc_died() */ temp = readl(&xhci->op_regs->status); - if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) { + if (temp == ~(u32)0 || xhci->xhc_state & XHCI_STATE_DYING) { + xhci_hc_died(xhci); + goto done; + } + + if (xhci->xhc_state & XHCI_STATE_HALTED) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "HW died, freeing TD."); + "HC halted, freeing TD manually."); for (i = urb_priv->num_tds_done; i < urb_priv->num_tds; i++) { @@ -1576,7 +1535,7 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * disabled, so there's no need for mutual exclusion to protect * the xhci->devs[slot_id] structure. */ -int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, +static int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep) { struct xhci_hcd *xhci; @@ -1659,7 +1618,7 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, * configuration or alt setting is installed in the device, so there's no need * for mutual exclusion to protect the xhci->devs[slot_id] structure. */ -int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, +static int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep) { struct xhci_hcd *xhci; @@ -1805,7 +1764,7 @@ static int xhci_configure_endpoint_result(struct xhci_hcd *xhci, switch (*cmd_status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for configure endpoint command\n"); ret = -ETIME; break; @@ -1852,11 +1811,10 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci, struct usb_device *udev, u32 *cmd_status) { int ret; - struct xhci_virt_device *virt_dev = xhci->devs[udev->slot_id]; switch (*cmd_status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for evaluate context command\n"); ret = -ETIME; break; @@ -1873,7 +1831,6 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci, case COMP_CONTEXT_STATE_ERROR: dev_warn(&udev->dev, "WARN: invalid context state for evaluate context command.\n"); - xhci_dbg_ctx(xhci, virt_dev->out_ctx, 1); ret = -EINVAL; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: @@ -2330,7 +2287,7 @@ static unsigned int xhci_get_ss_bw_consumed(struct xhci_bw_info *ep_bw) } -void xhci_drop_ep_from_interval_table(struct xhci_hcd *xhci, +static void xhci_drop_ep_from_interval_table(struct xhci_hcd *xhci, struct xhci_bw_info *ep_bw, struct xhci_interval_bw_table *bw_table, struct usb_device *udev, @@ -2595,6 +2552,12 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, return -EINVAL; spin_lock_irqsave(&xhci->lock, flags); + + if (xhci->xhc_state & XHCI_STATE_DYING) { + spin_unlock_irqrestore(&xhci->lock, flags); + return -ESHUTDOWN; + } + virt_dev = xhci->devs[udev->slot_id]; ctrl_ctx = xhci_get_input_control_ctx(command->in_ctx); @@ -2689,7 +2652,7 @@ static void xhci_check_bw_drop_ep_streams(struct xhci_hcd *xhci, * else should be touching the xhci->devs[slot_id] structure, so we * don't need to take the xhci->lock for manipulating that. */ -int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { int i; int ret = 0; @@ -2746,9 +2709,6 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) break; } } - xhci_dbg(xhci, "New Input Control Context:\n"); - xhci_dbg_ctx(xhci, virt_dev->in_ctx, - LAST_CTX_TO_EP_NUM(le32_to_cpu(slot_ctx->dev_info))); ret = xhci_configure_endpoint(xhci, udev, command, false, false); @@ -2756,10 +2716,6 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) /* Callee should call reset_bandwidth() */ goto command_cleanup; - xhci_dbg(xhci, "Output context after successful config ep cmd:\n"); - xhci_dbg_ctx(xhci, virt_dev->out_ctx, - LAST_CTX_TO_EP_NUM(le32_to_cpu(slot_ctx->dev_info))); - /* Free any rings that were dropped, but not changed. */ for (i = 1; i < 31; i++) { if ((le32_to_cpu(ctrl_ctx->drop_flags) & (1 << (i + 1))) && @@ -2793,7 +2749,7 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) return ret; } -void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +static void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci; struct xhci_virt_device *virt_dev; @@ -2826,9 +2782,6 @@ static void xhci_setup_input_ctx_for_config_ep(struct xhci_hcd *xhci, ctrl_ctx->drop_flags = cpu_to_le32(drop_flags); xhci_slot_copy(xhci, in_ctx, out_ctx); ctrl_ctx->add_flags |= cpu_to_le32(SLOT_FLAG); - - xhci_dbg(xhci, "Input Context:\n"); - xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags)); } static void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci, @@ -2919,7 +2872,7 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, * Context: in_interrupt */ -void xhci_endpoint_reset(struct usb_hcd *hcd, +static void xhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep) { struct xhci_hcd *xhci; @@ -3095,7 +3048,7 @@ static u32 xhci_calculate_no_streams_bitmask(struct xhci_hcd *xhci, * hardware or endpoints claim they can't support the number of requested * stream IDs. */ -int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, +static int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, unsigned int num_streams, gfp_t mem_flags) { @@ -3129,10 +3082,9 @@ int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, } config_cmd = xhci_alloc_command(xhci, true, true, mem_flags); - if (!config_cmd) { - xhci_dbg(xhci, "Could not allocate xHCI command structure.\n"); + if (!config_cmd) return -ENOMEM; - } + ctrl_ctx = xhci_get_input_control_ctx(config_cmd->in_ctx); if (!ctrl_ctx) { xhci_warn(xhci, "%s: Could not get input context, bad type.\n", @@ -3259,7 +3211,7 @@ int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, * Modify the endpoint context state, submit a configure endpoint command, * and free all endpoint rings for streams if that completes successfully. */ -int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev, +static int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint **eps, unsigned int num_eps, gfp_t mem_flags) { @@ -3391,7 +3343,8 @@ void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci, * re-initialization during S3/S4. In this case, call xhci_alloc_dev() to * re-allocate the device. */ -int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_discover_or_reset_device(struct usb_hcd *hcd, + struct usb_device *udev) { int ret, i; unsigned long flags; @@ -3443,6 +3396,8 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) SLOT_STATE_DISABLED) return 0; + trace_xhci_discover_or_reset_device(slot_ctx); + xhci_dbg(xhci, "Resetting device with slot ID %u\n", slot_id); /* Allocate the command structure that holds the struct completion. * Assume we're in process context, since the normal device reset @@ -3478,7 +3433,7 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) ret = reset_device_cmd->status; switch (ret) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout waiting for reset device command\n"); ret = -ETIME; goto command_cleanup; @@ -3539,9 +3494,6 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) } /* If necessary, update the number of active TTs on this root port */ xhci_update_tt_active_eps(xhci, virt_dev, old_active_eps); - - xhci_dbg(xhci, "Output context after successful reset device cmd:\n"); - xhci_dbg_ctx(xhci, virt_dev->out_ctx, last_freed_endpoint); ret = 0; command_cleanup: @@ -3554,12 +3506,11 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) * disconnected, and all traffic has been stopped and the endpoints have been * disabled. Free any HC data structures associated with that device. */ -void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) +static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct xhci_virt_device *virt_dev; - unsigned long flags; - u32 state; + struct xhci_slot_ctx *slot_ctx; int i, ret; struct xhci_command *command; @@ -3587,6 +3538,8 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) } virt_dev = xhci->devs[udev->slot_id]; + slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->out_ctx); + trace_xhci_free_dev(slot_ctx); /* Stop any wayward timer functions (which may grab the lock) */ for (i = 0; i < 31; i++) { @@ -3594,30 +3547,50 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } + xhci_disable_slot(xhci, command, udev->slot_id); + /* + * Event command completion handler will free any data structures + * associated with the slot. XXX Can free sleep? + */ +} + +int xhci_disable_slot(struct xhci_hcd *xhci, struct xhci_command *command, + u32 slot_id) +{ + unsigned long flags; + u32 state; + int ret = 0; + struct xhci_virt_device *virt_dev; + + virt_dev = xhci->devs[slot_id]; + if (!virt_dev) + return -EINVAL; + if (!command) + command = xhci_alloc_command(xhci, false, false, GFP_KERNEL); + if (!command) + return -ENOMEM; + spin_lock_irqsave(&xhci->lock, flags); /* Don't disable the slot if the host controller is dead. */ state = readl(&xhci->op_regs->status); if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING) || (xhci->xhc_state & XHCI_STATE_HALTED)) { - xhci_free_virt_device(xhci, udev->slot_id); + xhci_free_virt_device(xhci, slot_id); spin_unlock_irqrestore(&xhci->lock, flags); kfree(command); - return; + return ret; } - if (xhci_queue_slot_control(xhci, command, TRB_DISABLE_SLOT, - udev->slot_id)) { + ret = xhci_queue_slot_control(xhci, command, TRB_DISABLE_SLOT, + slot_id); + if (ret) { spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "FIXME: allocate a command ring segment\n"); - return; + return ret; } xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); - - /* - * Event command completion handler will free any data structures - * associated with the slot. XXX Can free sleep? - */ + return ret; } /* @@ -3650,6 +3623,8 @@ static int xhci_reserve_host_control_ep_resources(struct xhci_hcd *xhci) int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_virt_device *vdev; + struct xhci_slot_ctx *slot_ctx; unsigned long flags; int ret, slot_id; struct xhci_command *command; @@ -3705,6 +3680,10 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n"); goto disable_slot; } + vdev = xhci->devs[slot_id]; + slot_ctx = xhci_get_slot_ctx(xhci, vdev->out_ctx); + trace_xhci_alloc_dev(slot_ctx); + udev->slot_id = slot_id; #ifndef CONFIG_USB_DEFAULT_PERSIST @@ -3724,15 +3703,10 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) disable_slot: /* Disable slot, if we can do it without mem alloc */ - spin_lock_irqsave(&xhci->lock, flags); kfree(command->completion); command->completion = NULL; command->status = 0; - if (!xhci_queue_slot_control(xhci, command, TRB_DISABLE_SLOT, - udev->slot_id)) - xhci_ring_cmd_db(xhci); - spin_unlock_irqrestore(&xhci->lock, flags); - return 0; + return xhci_disable_slot(xhci, command, udev->slot_id); } /* @@ -3779,9 +3753,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, ret = -EINVAL; goto out; } + slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->out_ctx); + trace_xhci_setup_device_slot(slot_ctx); if (setup == SETUP_CONTEXT_ONLY) { - slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->out_ctx); if (GET_SLOT_STATE(le32_to_cpu(slot_ctx->dev_state)) == SLOT_STATE_DEFAULT) { xhci_dbg(xhci, "Slot already in default state\n"); @@ -3818,8 +3793,6 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, ctrl_ctx->add_flags = cpu_to_le32(SLOT_FLAG | EP0_FLAG); ctrl_ctx->drop_flags = 0; - xhci_dbg(xhci, "Slot ID %d Input Context:\n", udev->slot_id); - xhci_dbg_ctx(xhci, virt_dev->in_ctx, 2); trace_xhci_address_ctx(xhci, virt_dev->in_ctx, le32_to_cpu(slot_ctx->dev_info) >> 27); @@ -3845,7 +3818,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, */ switch (command->status) { case COMP_COMMAND_ABORTED: - case COMP_STOPPED: + case COMP_COMMAND_RING_STOPPED: xhci_warn(xhci, "Timeout while waiting for setup device command\n"); ret = -ETIME; break; @@ -3872,8 +3845,6 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, xhci_err(xhci, "ERROR: unexpected setup %s command completion code 0x%x.\n", act, command->status); - xhci_dbg(xhci, "Slot ID %d Output Context:\n", udev->slot_id); - xhci_dbg_ctx(xhci, virt_dev->out_ctx, 2); trace_xhci_address_ctx(xhci, virt_dev->out_ctx, 1); ret = -EINVAL; break; @@ -3892,17 +3863,12 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, xhci_dbg_trace(xhci, trace_xhci_dbg_address, "Output Context DMA address = %#08llx", (unsigned long long)virt_dev->out_ctx->dma); - xhci_dbg(xhci, "Slot ID %d Input Context:\n", udev->slot_id); - xhci_dbg_ctx(xhci, virt_dev->in_ctx, 2); trace_xhci_address_ctx(xhci, virt_dev->in_ctx, le32_to_cpu(slot_ctx->dev_info) >> 27); - xhci_dbg(xhci, "Slot ID %d Output Context:\n", udev->slot_id); - xhci_dbg_ctx(xhci, virt_dev->out_ctx, 2); /* * USB core uses address 1 for the roothubs, so we add one to the * address given back to us by the HC. */ - slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->out_ctx); trace_xhci_address_ctx(xhci, virt_dev->out_ctx, le32_to_cpu(slot_ctx->dev_info) >> 27); /* Zero the input context control for later use */ @@ -3921,12 +3887,12 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, return ret; } -int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) { return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ADDRESS); } -int xhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) { return xhci_setup_device(hcd, udev, SETUP_CONTEXT_ONLY); } @@ -4003,14 +3969,10 @@ static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci, xhci_dbg_trace(xhci, trace_xhci_dbg_context_change, "Set up evaluate context for LPM MEL change."); - xhci_dbg(xhci, "Slot %u Input Context:\n", udev->slot_id); - xhci_dbg_ctx(xhci, command->in_ctx, 0); /* Issue and wait for the evaluate context command. */ ret = xhci_configure_endpoint(xhci, udev, command, true, true); - xhci_dbg(xhci, "Slot %u Output Context:\n", udev->slot_id); - xhci_dbg_ctx(xhci, virt_dev->out_ctx, 0); if (!ret) { spin_lock_irqsave(&xhci->lock, flags); @@ -4083,7 +4045,7 @@ static int xhci_calculate_usb2_hw_lpm_params(struct usb_device *udev) return PORT_BESLD(besld) | PORT_L1_TIMEOUT(l1) | PORT_HIRDM(hirdm); } -int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, +static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, struct usb_device *udev, int enable) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -4207,7 +4169,7 @@ static int xhci_check_usb2_port_capability(struct xhci_hcd *xhci, int port, return 0; } -int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); int portnum = udev->portnum - 1; @@ -4616,7 +4578,7 @@ static int calculate_max_exit_latency(struct usb_device *udev, } /* Returns the USB3 hub-encoded value for the U1/U2 timeout. */ -int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, +static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { struct xhci_hcd *xhci; @@ -4647,7 +4609,7 @@ int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, return hub_encoded_timeout; } -int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, +static int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { struct xhci_hcd *xhci; @@ -4663,24 +4625,24 @@ int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, } #else /* CONFIG_PM */ -int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, +static int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, struct usb_device *udev, int enable) { return 0; } -int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev) +static int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev) { return 0; } -int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, +static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { return USB3_LPM_DISABLED; } -int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, +static int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { return 0; @@ -4692,7 +4654,7 @@ int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, /* Once a hub descriptor is fetched for a device, we need to update the xHC's * internal data structures for the device. */ -int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, +static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -4713,11 +4675,11 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, xhci_warn(xhci, "Cannot update hub desc for unknown device.\n"); return -EINVAL; } + config_cmd = xhci_alloc_command(xhci, true, true, mem_flags); - if (!config_cmd) { - xhci_dbg(xhci, "Could not allocate xHCI command structure.\n"); + if (!config_cmd) return -ENOMEM; - } + ctrl_ctx = xhci_get_input_control_ctx(config_cmd->in_ctx); if (!ctrl_ctx) { xhci_warn(xhci, "%s: Could not get input context, bad type.\n", @@ -4778,8 +4740,6 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, xhci_dbg(xhci, "Set up %s for hub device.\n", (xhci->hci_version > 0x95) ? "configure endpoint" : "evaluate context"); - xhci_dbg(xhci, "Slot %u Input Context:\n", hdev->slot_id); - xhci_dbg_ctx(xhci, config_cmd->in_ctx, 0); /* Issue and wait for the configure endpoint or * evaluate context command. @@ -4791,14 +4751,11 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ret = xhci_configure_endpoint(xhci, hdev, config_cmd, true, false); - xhci_dbg(xhci, "Slot %u Output Context:\n", hdev->slot_id); - xhci_dbg_ctx(xhci, vdev->out_ctx, 0); - xhci_free_command(xhci, config_cmd); return ret; } -int xhci_get_frame(struct usb_hcd *hcd) +static int xhci_get_frame(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); /* EHCI mods by the periodic size. Why? */ @@ -4808,7 +4765,11 @@ int xhci_get_frame(struct usb_hcd *hcd) int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) { struct xhci_hcd *xhci; - struct device *dev = hcd->self.controller; + /* + * TODO: Check with DWC3 clients for sysdev according to + * quirks + */ + struct device *dev = hcd->self.sysdev; int retval; /* Accept arbitrarily long scatter-gather lists */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index da3eb695fe5407a6e8fc5dbab8ac5b97a54e32bd..73a28a986d5e408f14e9b526d0d66ad18072d77d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -425,6 +425,7 @@ struct xhci_op_regs { #define PORT_L1DS_MASK (0xff << 8) #define PORT_L1DS(p) (((p) & 0xff) << 8) #define PORT_HLE (1 << 16) +#define PORT_TEST_MODE_SHIFT 28 /* USB3 Protocol PORTLI Port Link Information */ #define PORT_RX_LANES(p) (((p) >> 16) & 0xf) @@ -617,6 +618,7 @@ struct xhci_slot_ctx { #define ROUTE_STRING_MASK (0xfffff) /* Device speed - values defined by PORTSC Device Speed field - 20:23 */ #define DEV_SPEED (0xf << 20) +#define GET_DEV_SPEED(n) (((n) & DEV_SPEED) >> 20) /* bit 24 reserved */ /* Is this LS/FS device connected through a HS hub? - bit 25 */ #define DEV_MTT (0x1 << 25) @@ -637,6 +639,7 @@ struct xhci_slot_ctx { #define DEVINFO_TO_ROOT_HUB_PORT(p) (((p) >> 16) & 0xff) /* Maximum number of ports under a hub device */ #define XHCI_MAX_PORTS(p) (((p) & 0xff) << 24) +#define DEVINFO_TO_MAX_PORTS(p) (((p) & (0xff << 24)) >> 24) /* tt_info bitmasks */ /* @@ -651,6 +654,7 @@ struct xhci_slot_ctx { */ #define TT_PORT (0xff << 8) #define TT_THINK_TIME(p) (((p) & 0x3) << 16) +#define GET_TT_THINK_TIME(p) (((p) & (0x3 << 16)) >> 16) /* dev_state bitmasks */ /* USB device address - assigned by the HC */ @@ -1562,10 +1566,8 @@ struct xhci_ring { struct xhci_segment *last_seg; union xhci_trb *enqueue; struct xhci_segment *enq_seg; - unsigned int enq_updates; union xhci_trb *dequeue; struct xhci_segment *deq_seg; - unsigned int deq_updates; struct list_head td_list; /* * Write the cycle state into the TRB cycle field to give ownership of @@ -1604,7 +1606,6 @@ struct xhci_scratchpad { u64 *sp_array; dma_addr_t sp_dma; void **sp_buffers; - dma_addr_t *sp_dma_buffers; }; struct urb_priv { @@ -1722,7 +1723,6 @@ struct xhci_hcd { int page_shift; /* msi-x vectors */ int msix_count; - struct msix_entry *msix_entries; /* optional clock */ struct clk *clk; /* data structures */ @@ -1818,6 +1818,7 @@ struct xhci_hcd { #define XHCI_MISSING_CAS (1 << 24) /* For controller with a broken Port Disable implementation */ #define XHCI_BROKEN_PORT_PED (1 << 25) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) unsigned int num_active_eps; unsigned int limit_active_eps; @@ -1843,6 +1844,7 @@ struct xhci_hcd { /* Compliance Mode Recovery Data */ struct timer_list comp_mode_recovery_timer; u32 port_status_u0; + u16 test_mode; /* Compliance Mode Timer Triggered every 2 seconds */ #define COMP_MODE_RCVRY_MSECS 2000 @@ -1918,19 +1920,10 @@ void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num); void xhci_print_registers(struct xhci_hcd *xhci); void xhci_dbg_regs(struct xhci_hcd *xhci); void xhci_print_run_regs(struct xhci_hcd *xhci); -void xhci_print_trb_offsets(struct xhci_hcd *xhci, union xhci_trb *trb); -void xhci_debug_trb(struct xhci_hcd *xhci, union xhci_trb *trb); -void xhci_debug_segment(struct xhci_hcd *xhci, struct xhci_segment *seg); -void xhci_debug_ring(struct xhci_hcd *xhci, struct xhci_ring *ring); void xhci_dbg_erst(struct xhci_hcd *xhci, struct xhci_erst *erst); void xhci_dbg_cmd_ptrs(struct xhci_hcd *xhci); -void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring); -void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep); char *xhci_get_slot_state(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx); -void xhci_dbg_ep_rings(struct xhci_hcd *xhci, - unsigned int slot_id, unsigned int ep_index, - struct xhci_virt_ep *ep); void xhci_dbg_trace(struct xhci_hcd *xhci, void (*trace)(struct va_format *), const char *fmt, ...); @@ -1944,16 +1937,8 @@ void xhci_copy_ep0_dequeue_into_input_ctx(struct xhci_hcd *xhci, struct usb_device *udev); unsigned int xhci_get_endpoint_index(struct usb_endpoint_descriptor *desc); unsigned int xhci_get_endpoint_address(unsigned int ep_index); -unsigned int xhci_get_endpoint_flag(struct usb_endpoint_descriptor *desc); -unsigned int xhci_get_endpoint_flag_from_index(unsigned int ep_index); unsigned int xhci_last_valid_endpoint(u32 added_ctxs); void xhci_endpoint_zero(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_host_endpoint *ep); -void xhci_drop_ep_from_interval_table(struct xhci_hcd *xhci, - struct xhci_bw_info *ep_bw, - struct xhci_interval_bw_table *bw_table, - struct usb_device *udev, - struct xhci_virt_ep *virt_ep, - struct xhci_tt_bw_info *tt_info); void xhci_update_tt_active_eps(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, int old_active_eps); @@ -2010,53 +1995,25 @@ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, int usec); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); +int xhci_start(struct xhci_hcd *xhci); int xhci_reset(struct xhci_hcd *xhci); -int xhci_init(struct usb_hcd *hcd); int xhci_run(struct usb_hcd *hcd); -void xhci_stop(struct usb_hcd *hcd); -void xhci_shutdown(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_init_driver(struct hc_driver *drv, const struct xhci_driver_overrides *over); +int xhci_disable_slot(struct xhci_hcd *xhci, + struct xhci_command *command, u32 slot_id); -#ifdef CONFIG_PM int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup); int xhci_resume(struct xhci_hcd *xhci, bool hibernated); -#else -#define xhci_suspend NULL -#define xhci_resume NULL -#endif -int xhci_get_frame(struct usb_hcd *hcd); irqreturn_t xhci_irq(struct usb_hcd *hcd); irqreturn_t xhci_msi_irq(int irq, void *hcd); int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev); -void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev); int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags); -int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev, - struct usb_host_endpoint **eps, unsigned int num_eps, - unsigned int num_streams, gfp_t mem_flags); -int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev, - struct usb_host_endpoint **eps, unsigned int num_eps, - gfp_t mem_flags); -int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev); -int xhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev); -int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev); -int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd, - struct usb_device *udev, int enable); -int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, - struct usb_tt *tt, gfp_t mem_flags); -int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags); -int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status); -int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); -int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); -void xhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep); -int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev); -int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); -void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); @@ -2100,9 +2057,6 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci, struct xhci_dequeue_state *deq_state); void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, unsigned int ep_index, struct xhci_td *td); -void xhci_queue_config_ep_quirk(struct xhci_hcd *xhci, - unsigned int slot_id, unsigned int ep_index, - struct xhci_dequeue_state *deq_state); void xhci_stop_endpoint_command_watchdog(unsigned long arg); void xhci_handle_command_timeout(struct work_struct *work); @@ -2113,16 +2067,13 @@ void xhci_cleanup_command_queue(struct xhci_hcd *xhci); /* xHCI roothub code */ void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array, int port_id, u32 link_state); -int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, - struct usb_device *udev, enum usb3_link_state state); -int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, - struct usb_device *udev, enum usb3_link_state state); void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array, int port_id, u32 port_bit); int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex, char *buf, u16 wLength); int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); +void xhci_hc_died(struct xhci_hcd *xhci); #ifdef CONFIG_PM int xhci_bus_suspend(struct usb_hcd *hcd); @@ -2153,6 +2104,22 @@ static inline struct xhci_ring *xhci_urb_to_transfer_ring(struct xhci_hcd *xhci, urb->stream_id); } +static inline char *xhci_slot_state_string(u32 state) +{ + switch (state) { + case SLOT_STATE_ENABLED: + return "enabled/disabled"; + case SLOT_STATE_DEFAULT: + return "default"; + case SLOT_STATE_ADDRESSED: + return "addressed"; + case SLOT_STATE_CONFIGURED: + return "configured"; + default: + return "reserved"; + } +} + static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, u32 field3) { @@ -2162,14 +2129,12 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, switch (type) { case TRB_LINK: sprintf(str, - "TRB %08x%08x status '%s' len %d slot %d ep %d type '%s' flags %c:%c", - field1, field0, - xhci_trb_comp_code_string(GET_COMP_CODE(field2)), - EVENT_TRB_LEN(field2), TRB_TO_SLOT_ID(field3), - /* Macro decrements 1, maybe it shouldn't?!? */ - TRB_TO_EP_INDEX(field3) + 1, - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), - field3 & EVENT_DATA ? 'E' : 'e', + "LINK %08x%08x intr %d type '%s' flags %c:%c:%c:%c", + field1, field0, GET_INTR_TARGET(field2), + xhci_trb_type_string(type), + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_TC ? 'T' : 't', field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_TRANSFER: @@ -2187,37 +2152,52 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, EVENT_TRB_LEN(field2), TRB_TO_SLOT_ID(field3), /* Macro decrements 1, maybe it shouldn't?!? */ TRB_TO_EP_INDEX(field3) + 1, - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field3 & EVENT_DATA ? 'E' : 'e', field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_SETUP: - sprintf(str, - "bRequestType %02x bRequest %02x wValue %02x%02x wIndex %02x%02x wLength %d length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c:%c:%c:%c:%c", - field0 & 0xff, - (field0 & 0xff00) >> 8, - (field0 & 0xff000000) >> 24, - (field0 & 0xff0000) >> 16, - (field1 & 0xff00) >> 8, - field1 & 0xff, - (field1 & 0xff000000) >> 16 | - (field1 & 0xff0000) >> 16, - TRB_LEN(field2), GET_TD_SIZE(field2), - GET_INTR_TARGET(field2), - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), - field3 & TRB_BEI ? 'B' : 'b', - field3 & TRB_IDT ? 'I' : 'i', - field3 & TRB_IOC ? 'I' : 'i', - field3 & TRB_CHAIN ? 'C' : 'c', - field3 & TRB_NO_SNOOP ? 'S' : 's', - field3 & TRB_ISP ? 'I' : 'i', - field3 & TRB_ENT ? 'E' : 'e', - field3 & TRB_CYCLE ? 'C' : 'c'); + sprintf(str, "bRequestType %02x bRequest %02x wValue %02x%02x wIndex %02x%02x wLength %d length %d TD size %d intr %d type '%s' flags %c:%c:%c", + field0 & 0xff, + (field0 & 0xff00) >> 8, + (field0 & 0xff000000) >> 24, + (field0 & 0xff0000) >> 16, + (field1 & 0xff00) >> 8, + field1 & 0xff, + (field1 & 0xff000000) >> 16 | + (field1 & 0xff0000) >> 16, + TRB_LEN(field2), GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + xhci_trb_type_string(type), + field3 & TRB_IDT ? 'I' : 'i', + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CYCLE ? 'C' : 'c'); break; - case TRB_NORMAL: case TRB_DATA: + sprintf(str, "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c:%c:%c:%c", + field1, field0, TRB_LEN(field2), GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + xhci_trb_type_string(type), + field3 & TRB_IDT ? 'I' : 'i', + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_NO_SNOOP ? 'S' : 's', + field3 & TRB_ISP ? 'I' : 'i', + field3 & TRB_ENT ? 'E' : 'e', + field3 & TRB_CYCLE ? 'C' : 'c'); + break; case TRB_STATUS: + sprintf(str, "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c", + field1, field0, TRB_LEN(field2), GET_TD_SIZE(field2), + GET_INTR_TARGET(field2), + xhci_trb_type_string(type), + field3 & TRB_IOC ? 'I' : 'i', + field3 & TRB_CHAIN ? 'C' : 'c', + field3 & TRB_ENT ? 'E' : 'e', + field3 & TRB_CYCLE ? 'C' : 'c'); + break; + case TRB_NORMAL: case TRB_ISOC: case TRB_EVENT_DATA: case TRB_TR_NOOP: @@ -2225,7 +2205,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, "Buffer %08x%08x length %d TD size %d intr %d type '%s' flags %c:%c:%c:%c:%c:%c:%c:%c", field1, field0, TRB_LEN(field2), GET_TD_SIZE(field2), GET_INTR_TARGET(field2), - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field3 & TRB_BEI ? 'B' : 'b', field3 & TRB_IDT ? 'I' : 'i', field3 & TRB_IOC ? 'I' : 'i', @@ -2240,21 +2220,21 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_ENABLE_SLOT: sprintf(str, "%s: flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_DISABLE_SLOT: case TRB_NEG_BANDWIDTH: sprintf(str, "%s: slot %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_ADDR_DEV: sprintf(str, "%s: ctx %08x%08x slot %d flags %c:%c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_SLOT_ID(field3), field3 & TRB_BSR ? 'B' : 'b', @@ -2263,7 +2243,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_CONFIG_EP: sprintf(str, "%s: ctx %08x%08x slot %d flags %c:%c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_SLOT_ID(field3), field3 & TRB_DC ? 'D' : 'd', @@ -2272,7 +2252,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_EVAL_CONTEXT: sprintf(str, "%s: ctx %08x%08x slot %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_SLOT_ID(field3), field3 & TRB_CYCLE ? 'C' : 'c'); @@ -2280,7 +2260,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_RESET_EP: sprintf(str, "%s: ctx %08x%08x slot %d ep %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_SLOT_ID(field3), /* Macro decrements 1, maybe it shouldn't?!? */ @@ -2290,7 +2270,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_STOP_RING: sprintf(str, "%s: slot %d sp %d ep %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), TRB_TO_SUSPEND_PORT(field3), /* Macro decrements 1, maybe it shouldn't?!? */ @@ -2300,7 +2280,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_SET_DEQ: sprintf(str, "%s: deq %08x%08x stream %d slot %d ep %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_STREAM_ID(field2), TRB_TO_SLOT_ID(field3), @@ -2311,14 +2291,14 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_RESET_DEV: sprintf(str, "%s: slot %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_FORCE_EVENT: sprintf(str, "%s: event %08x%08x vf intr %d vf id %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_VF_INTR_TARGET(field2), TRB_TO_VF_ID(field3), @@ -2327,14 +2307,14 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_SET_LT: sprintf(str, "%s: belt %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), TRB_TO_BELT(field3), field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_GET_BW: sprintf(str, "%s: ctx %08x%08x slot %d speed %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field1, field0, TRB_TO_SLOT_ID(field3), TRB_TO_DEV_SPEED(field3), @@ -2343,7 +2323,7 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, case TRB_FORCE_HEADER: sprintf(str, "%s: info %08x%08x%08x pkt type %d roothub port %d flags %c", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field2, field1, field0 & 0xffffffe0, TRB_TO_PACKET_TYPE(field0), TRB_TO_ROOTHUB_PORT(field3), @@ -2352,12 +2332,155 @@ static inline const char *xhci_decode_trb(u32 field0, u32 field1, u32 field2, default: sprintf(str, "type '%s' -> raw %08x %08x %08x %08x", - xhci_trb_type_string(TRB_FIELD_TO_TYPE(field3)), + xhci_trb_type_string(type), field0, field1, field2, field3); } return str; } +static inline const char *xhci_decode_slot_context(u32 info, u32 info2, + u32 tt_info, u32 state) +{ + static char str[1024]; + u32 speed; + u32 hub; + u32 mtt; + int ret = 0; + + speed = info & DEV_SPEED; + hub = info & DEV_HUB; + mtt = info & DEV_MTT; + + ret = sprintf(str, "RS %05x %s%s%s Ctx Entries %d MEL %d us Port# %d/%d", + info & ROUTE_STRING_MASK, + ({ char *s; + switch (speed) { + case SLOT_SPEED_FS: + s = "full-speed"; + break; + case SLOT_SPEED_LS: + s = "low-speed"; + break; + case SLOT_SPEED_HS: + s = "high-speed"; + break; + case SLOT_SPEED_SS: + s = "super-speed"; + break; + case SLOT_SPEED_SSP: + s = "super-speed plus"; + break; + default: + s = "UNKNOWN speed"; + } s; }), + mtt ? " multi-TT" : "", + hub ? " Hub" : "", + (info & LAST_CTX_MASK) >> 27, + info2 & MAX_EXIT, + DEVINFO_TO_ROOT_HUB_PORT(info2), + DEVINFO_TO_MAX_PORTS(info2)); + + ret += sprintf(str + ret, " [TT Slot %d Port# %d TTT %d Intr %d] Addr %d State %s", + tt_info & TT_SLOT, (tt_info & TT_PORT) >> 8, + GET_TT_THINK_TIME(tt_info), GET_INTR_TARGET(tt_info), + state & DEV_ADDR_MASK, + xhci_slot_state_string(GET_SLOT_STATE(state))); + + return str; +} + +static inline const char *xhci_ep_state_string(u8 state) +{ + switch (state) { + case EP_STATE_DISABLED: + return "disabled"; + case EP_STATE_RUNNING: + return "running"; + case EP_STATE_HALTED: + return "halted"; + case EP_STATE_STOPPED: + return "stopped"; + case EP_STATE_ERROR: + return "error"; + default: + return "INVALID"; + } +} + +static inline const char *xhci_ep_type_string(u8 type) +{ + switch (type) { + case ISOC_OUT_EP: + return "Isoc OUT"; + case BULK_OUT_EP: + return "Bulk OUT"; + case INT_OUT_EP: + return "Int OUT"; + case CTRL_EP: + return "Ctrl"; + case ISOC_IN_EP: + return "Isoc IN"; + case BULK_IN_EP: + return "Bulk IN"; + case INT_IN_EP: + return "Int IN"; + default: + return "INVALID"; + } +} + +static inline const char *xhci_decode_ep_context(u32 info, u32 info2, u64 deq, + u32 tx_info) +{ + static char str[1024]; + int ret; + + u32 esit; + u16 maxp; + u16 avg; + + u8 max_pstr; + u8 ep_state; + u8 interval; + u8 ep_type; + u8 burst; + u8 cerr; + u8 mult; + u8 lsa; + u8 hid; + + esit = EP_MAX_ESIT_PAYLOAD_HI(info) << 16 | + EP_MAX_ESIT_PAYLOAD_LO(tx_info); + + ep_state = info & EP_STATE_MASK; + max_pstr = info & EP_MAXPSTREAMS_MASK; + interval = CTX_TO_EP_INTERVAL(info); + mult = CTX_TO_EP_MULT(info) + 1; + lsa = info & EP_HAS_LSA; + + cerr = (info2 & (3 << 1)) >> 1; + ep_type = CTX_TO_EP_TYPE(info2); + hid = info2 & (1 << 7); + burst = CTX_TO_MAX_BURST(info2); + maxp = MAX_PACKET_DECODED(info2); + + avg = EP_AVG_TRB_LENGTH(tx_info); + + ret = sprintf(str, "State %s mult %d max P. Streams %d %s", + xhci_ep_state_string(ep_state), mult, + max_pstr, lsa ? "LSA " : ""); + + ret += sprintf(str + ret, "interval %d us max ESIT payload %d CErr %d ", + (1 << interval) * 125, esit, cerr); + + ret += sprintf(str + ret, "Type %s %sburst %d maxp %d deq %016llx ", + xhci_ep_type_string(ep_type), hid ? "HID" : "", + burst, maxp, deq); + + ret += sprintf(str + ret, "avg trb len %d", avg); + + return str; +} #endif /* __LINUX_XHCI_HCD_H */ diff --git a/drivers/usb/isp1760/isp1760-if.c b/drivers/usb/isp1760/isp1760-if.c index 79205b31e4a9d5caf01d5c2e06f0349805cb9846..bc68bbab7fa170796e06efc11080c67598bcc32b 100644 --- a/drivers/usb/isp1760/isp1760-if.c +++ b/drivers/usb/isp1760/isp1760-if.c @@ -21,11 +21,11 @@ #include "isp1760-core.h" #include "isp1760-regs.h" -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI #include #endif -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI static int isp1761_pci_init(struct pci_dev *dev) { resource_size_t mem_start; @@ -286,7 +286,7 @@ static int __init isp1760_init(void) ret = platform_driver_register(&isp1760_plat_driver); if (!ret) any_ret = 0; -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI ret = pci_register_driver(&isp1761_pci_driver); if (!ret) any_ret = 0; @@ -301,7 +301,7 @@ module_init(isp1760_init); static void __exit isp1760_exit(void) { platform_driver_unregister(&isp1760_plat_driver); -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI pci_unregister_driver(&isp1761_pci_driver); #endif isp1760_deinit_kmem_cache(); diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index db9a9e6ff6bee9c74840b7f5734b086795a090fa..dfd54ea4808faa1efbd98921887cf29b3bd3f1d3 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -655,24 +655,15 @@ static int adu_probe(struct usb_interface *interface, { struct usb_device *udev = interface_to_usbdev(interface); struct adu_device *dev = NULL; - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - int retval = -ENODEV; + int retval = -ENOMEM; int in_end_size; int out_end_size; - int i; - - if (udev == NULL) { - dev_err(&interface->dev, "udev is NULL.\n"); - goto exit; - } + int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct adu_device), GFP_KERNEL); - if (!dev) { - retval = -ENOMEM; - goto exit; - } + if (!dev) + return -ENOMEM; mutex_init(&dev->mtx); spin_lock_init(&dev->buflock); @@ -680,24 +671,13 @@ static int adu_probe(struct usb_interface *interface, init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); - iface_desc = &interface->altsetting[0]; - - /* set up the endpoint information */ - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(endpoint)) - dev->interrupt_in_endpoint = endpoint; - - if (usb_endpoint_is_int_out(endpoint)) - dev->interrupt_out_endpoint = endpoint; - } - if (dev->interrupt_in_endpoint == NULL) { - dev_err(&interface->dev, "interrupt in endpoint not found\n"); - goto error; - } - if (dev->interrupt_out_endpoint == NULL) { - dev_err(&interface->dev, "interrupt out endpoint not found\n"); + res = usb_find_common_endpoints_reverse(&interface->altsetting[0], + NULL, NULL, + &dev->interrupt_in_endpoint, + &dev->interrupt_out_endpoint); + if (res) { + dev_err(&interface->dev, "interrupt endpoints not found\n"); + retval = res; goto error; } @@ -705,10 +685,8 @@ static int adu_probe(struct usb_interface *interface, out_end_size = usb_endpoint_maxp(dev->interrupt_out_endpoint); dev->read_buffer_primary = kmalloc((4 * in_end_size), GFP_KERNEL); - if (!dev->read_buffer_primary) { - retval = -ENOMEM; + if (!dev->read_buffer_primary) goto error; - } /* debug code prime the buffer */ memset(dev->read_buffer_primary, 'a', in_end_size); @@ -717,10 +695,8 @@ static int adu_probe(struct usb_interface *interface, memset(dev->read_buffer_primary + (3 * in_end_size), 'd', in_end_size); dev->read_buffer_secondary = kmalloc((4 * in_end_size), GFP_KERNEL); - if (!dev->read_buffer_secondary) { - retval = -ENOMEM; + if (!dev->read_buffer_secondary) goto error; - } /* debug code prime the buffer */ memset(dev->read_buffer_secondary, 'e', in_end_size); @@ -748,6 +724,7 @@ static int adu_probe(struct usb_interface *interface, if (!usb_string(udev, udev->descriptor.iSerialNumber, dev->serial_number, sizeof(dev->serial_number))) { dev_err(&interface->dev, "Could not retrieve serial number\n"); + retval = -EIO; goto error; } dev_dbg(&interface->dev,"serial_number=%s", dev->serial_number); @@ -770,8 +747,8 @@ static int adu_probe(struct usb_interface *interface, dev_info(&interface->dev, "ADU%d %s now attached to /dev/usb/adutux%d\n", le16_to_cpu(udev->descriptor.idProduct), dev->serial_number, (dev->minor - ADU_MINOR_BASE)); -exit: - return retval; + + return 0; error: adu_delete(dev); diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index da5ff401a35489614db878afadf8cc8bbdffc852..8efdc500e790b5b5d15705f0a211b7e69fe05532 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -212,28 +212,21 @@ static int appledisplay_probe(struct usb_interface *iface, struct backlight_properties props; struct appledisplay *pdata; struct usb_device *udev = interface_to_usbdev(iface); - struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int int_in_endpointAddr = 0; - int i, retval = -ENOMEM, brightness; + int retval, brightness; char bl_name[20]; /* set up the endpoint information */ /* use only the first interrupt-in endpoint */ - iface_desc = iface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - endpoint = &iface_desc->endpoint[i].desc; - if (!int_in_endpointAddr && usb_endpoint_is_int_in(endpoint)) { - /* we found an interrupt in endpoint */ - int_in_endpointAddr = endpoint->bEndpointAddress; - break; - } - } - if (!int_in_endpointAddr) { + retval = usb_find_int_in_endpoint(iface->cur_altsetting, &endpoint); + if (retval) { dev_err(&iface->dev, "Could not find int-in endpoint\n"); - return -EIO; + return retval; } + int_in_endpointAddr = endpoint->bEndpointAddress; + /* allocate memory for our device state and initialize it */ pdata = kzalloc(sizeof(struct appledisplay), GFP_KERNEL); if (!pdata) { diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index aa350dc9eb25c2db5a785fbd5147229d0cf06963..15d4e64d3b65277df6e9e5ac79454a9320d9d108 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -117,28 +117,26 @@ static int chaoskey_probe(struct usb_interface *interface, { struct usb_device *udev = interface_to_usbdev(interface); struct usb_host_interface *altsetting = interface->cur_altsetting; - int i; - int in_ep = -1; + struct usb_endpoint_descriptor *epd; + int in_ep; struct chaoskey *dev; int result = -ENOMEM; int size; + int res; usb_dbg(interface, "probe %s-%s", udev->product, udev->serial); /* Find the first bulk IN endpoint and its packet size */ - for (i = 0; i < altsetting->desc.bNumEndpoints; i++) { - if (usb_endpoint_is_bulk_in(&altsetting->endpoint[i].desc)) { - in_ep = usb_endpoint_num(&altsetting->endpoint[i].desc); - size = usb_endpoint_maxp(&altsetting->endpoint[i].desc); - break; - } + res = usb_find_bulk_in_endpoint(altsetting, &epd); + if (res) { + usb_dbg(interface, "no IN endpoint found"); + return res; } + in_ep = usb_endpoint_num(epd); + size = usb_endpoint_maxp(epd); + /* Validate endpoint and size */ - if (in_ep == -1) { - usb_dbg(interface, "no IN endpoint found"); - return -ENODEV; - } if (size <= 0) { usb_dbg(interface, "invalid size (%d)", size); return -ENODEV; @@ -194,7 +192,7 @@ static int chaoskey_probe(struct usb_interface *interface, dev->in_ep = in_ep; - if (udev->descriptor.idVendor != ALEA_VENDOR_ID) + if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) dev->reads_started = 1; dev->size = size; diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 01a9373b7e18a16fa0b8bc35625fffbe0eefc15b..8291499d058178a27b3b951427e8ef5cf2fcb907 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -2700,10 +2700,8 @@ static int ftdi_elan_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - size_t buffer_size; - int i; - int retval = -ENOMEM; + struct usb_endpoint_descriptor *bulk_in, *bulk_out; + int retval; struct usb_ftdi *ftdi; ftdi = kzalloc(sizeof(struct usb_ftdi), GFP_KERNEL); @@ -2720,31 +2718,25 @@ static int ftdi_elan_probe(struct usb_interface *interface, ftdi->interface = interface; mutex_init(&ftdi->u132_lock); ftdi->expected = 4; + iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - if (!ftdi->bulk_in_endpointAddr && - usb_endpoint_is_bulk_in(endpoint)) { - buffer_size = usb_endpoint_maxp(endpoint); - ftdi->bulk_in_size = buffer_size; - ftdi->bulk_in_endpointAddr = endpoint->bEndpointAddress; - ftdi->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL); - if (!ftdi->bulk_in_buffer) { - retval = -ENOMEM; - goto error; - } - } - if (!ftdi->bulk_out_endpointAddr && - usb_endpoint_is_bulk_out(endpoint)) { - ftdi->bulk_out_endpointAddr = - endpoint->bEndpointAddress; - } - } - if (!(ftdi->bulk_in_endpointAddr && ftdi->bulk_out_endpointAddr)) { + retval = usb_find_common_endpoints(iface_desc, + &bulk_in, &bulk_out, NULL, NULL); + if (retval) { dev_err(&ftdi->udev->dev, "Could not find both bulk-in and bulk-out endpoints\n"); - retval = -ENODEV; goto error; } + + ftdi->bulk_in_size = usb_endpoint_maxp(bulk_in); + ftdi->bulk_in_endpointAddr = bulk_in->bEndpointAddress; + ftdi->bulk_in_buffer = kmalloc(ftdi->bulk_in_size, GFP_KERNEL); + if (!ftdi->bulk_in_buffer) { + retval = -ENOMEM; + goto error; + } + + ftdi->bulk_out_endpointAddr = bulk_out->bEndpointAddress; + dev_info(&ftdi->udev->dev, "interface %d has I=%02X O=%02X\n", iface_desc->desc.bInterfaceNumber, ftdi->bulk_in_endpointAddr, ftdi->bulk_out_endpointAddr); diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 502bfe30a077a20120616af74b378c62bc2ec6d9..81fcbf024c65837cb38f03ebed5124dfba1e40c2 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -360,26 +360,22 @@ static int idmouse_probe(struct usb_interface *interface, dev->interface = interface; /* set up the endpoint information - use only the first bulk-in endpoint */ - endpoint = &iface_desc->endpoint[0].desc; - if (!dev->bulk_in_endpointAddr && usb_endpoint_is_bulk_in(endpoint)) { - /* we found a bulk in endpoint */ - dev->orig_bi_size = usb_endpoint_maxp(endpoint); - dev->bulk_in_size = 0x200; /* works _much_ faster */ - dev->bulk_in_endpointAddr = endpoint->bEndpointAddress; - dev->bulk_in_buffer = - kmalloc(IMGSIZE + dev->bulk_in_size, GFP_KERNEL); - - if (!dev->bulk_in_buffer) { - idmouse_delete(dev); - return -ENOMEM; - } + result = usb_find_bulk_in_endpoint(iface_desc, &endpoint); + if (result) { + dev_err(&interface->dev, "Unable to find bulk-in endpoint.\n"); + idmouse_delete(dev); + return result; } - if (!(dev->bulk_in_endpointAddr)) { - dev_err(&interface->dev, "Unable to find bulk-in endpoint.\n"); + dev->orig_bi_size = usb_endpoint_maxp(endpoint); + dev->bulk_in_size = 0x200; /* works _much_ faster */ + dev->bulk_in_endpointAddr = endpoint->bEndpointAddress; + dev->bulk_in_buffer = kmalloc(IMGSIZE + dev->bulk_in_size, GFP_KERNEL); + if (!dev->bulk_in_buffer) { idmouse_delete(dev); - return -ENODEV; + return -ENOMEM; } + /* allow device read, write and ioctl */ dev->present = 1; diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 37c63cb39714b86ada39d3232565ef1595ed71c6..83b05a287b0c58f2b0b29f91fcddd0f307510b88 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -554,7 +554,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, info.revision = le16_to_cpu(dev->udev->descriptor.bcdDevice); /* 0==UNKNOWN, 1==LOW(usb1.1) ,2=FULL(usb1.1), 3=HIGH(usb2.0) */ - info.speed = le16_to_cpu(dev->udev->speed); + info.speed = dev->udev->speed; info.if_num = dev->interface->cur_altsetting->desc.bInterfaceNumber; info.report_size = dev->report_size; @@ -756,9 +756,8 @@ static int iowarrior_probe(struct usb_interface *interface, struct usb_device *udev = interface_to_usbdev(interface); struct iowarrior *dev = NULL; struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - int i; int retval = -ENOMEM; + int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct iowarrior), GFP_KERNEL); @@ -781,27 +780,19 @@ static int iowarrior_probe(struct usb_interface *interface, iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); - /* set up the endpoint information */ - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(endpoint)) - dev->int_in_endpoint = endpoint; - if (usb_endpoint_is_int_out(endpoint)) - /* this one will match for the IOWarrior56 only */ - dev->int_out_endpoint = endpoint; - } - - if (!dev->int_in_endpoint) { + res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint); + if (res) { dev_err(&interface->dev, "no interrupt-in endpoint found\n"); - retval = -ENODEV; + retval = res; goto error; } if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { - if (!dev->int_out_endpoint) { + res = usb_find_last_int_out_endpoint(iface_desc, + &dev->int_out_endpoint); + if (res) { dev_err(&interface->dev, "no interrupt-out endpoint found\n"); - retval = -ENODEV; + retval = res; goto error; } } diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 3bc5356832db777424111b74cefbcfe2e77ce4e4..9d9487c66f87a21e4a62b88250cdbc97e1e92313 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -122,13 +122,13 @@ MODULE_SUPPORTED_DEVICE("LD USB Devices"); * avoid racing conditions and get better performance of the driver. */ static int ring_buffer_size = 128; -module_param(ring_buffer_size, int, 0); +module_param(ring_buffer_size, int, 0000); MODULE_PARM_DESC(ring_buffer_size, "Read ring buffer size in reports"); /* The write_buffer can contain more than one interrupt out transfer. */ static int write_buffer_size = 10; -module_param(write_buffer_size, int, 0); +module_param(write_buffer_size, int, 0000); MODULE_PARM_DESC(write_buffer_size, "Write buffer size in reports"); /* As of kernel version 2.6.4 ehci-hcd uses an @@ -141,30 +141,30 @@ MODULE_PARM_DESC(write_buffer_size, "Write buffer size in reports"); * or set to 1 to use the standard interval from the endpoint descriptors. */ static int min_interrupt_in_interval = 2; -module_param(min_interrupt_in_interval, int, 0); +module_param(min_interrupt_in_interval, int, 0000); MODULE_PARM_DESC(min_interrupt_in_interval, "Minimum interrupt in interval in ms"); static int min_interrupt_out_interval = 2; -module_param(min_interrupt_out_interval, int, 0); +module_param(min_interrupt_out_interval, int, 0000); MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in ms"); /* Structure to hold all of our device specific stuff */ struct ld_usb { struct mutex mutex; /* locks this structure */ - struct usb_interface* intf; /* save off the usb interface pointer */ + struct usb_interface *intf; /* save off the usb interface pointer */ int open_count; /* number of times this port has been opened */ - char* ring_buffer; + char *ring_buffer; unsigned int ring_head; unsigned int ring_tail; wait_queue_head_t read_wait; wait_queue_head_t write_wait; - char* interrupt_in_buffer; - struct usb_endpoint_descriptor* interrupt_in_endpoint; - struct urb* interrupt_in_urb; + char *interrupt_in_buffer; + struct usb_endpoint_descriptor *interrupt_in_endpoint; + struct urb *interrupt_in_urb; int interrupt_in_interval; size_t interrupt_in_endpoint_size; int interrupt_in_running; @@ -172,9 +172,9 @@ struct ld_usb { int buffer_overflow; spinlock_t rbsl; - char* interrupt_out_buffer; - struct usb_endpoint_descriptor* interrupt_out_endpoint; - struct urb* interrupt_out_urb; + char *interrupt_out_buffer; + struct usb_endpoint_descriptor *interrupt_out_endpoint; + struct urb *interrupt_out_urb; int interrupt_out_interval; size_t interrupt_out_endpoint_size; int interrupt_out_busy; @@ -244,7 +244,7 @@ static void ld_usb_interrupt_in_callback(struct urb *urb) if (urb->actual_length > 0) { next_ring_head = (dev->ring_head+1) % ring_buffer_size; if (next_ring_head != dev->ring_tail) { - actual_buffer = (size_t*)(dev->ring_buffer + dev->ring_head*(sizeof(size_t)+dev->interrupt_in_endpoint_size)); + actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_head * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); /* actual_buffer gets urb->actual_length + interrupt_in_buffer */ *actual_buffer = urb->actual_length; memcpy(actual_buffer+1, dev->interrupt_in_buffer, urb->actual_length); @@ -483,7 +483,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, } /* actual_buffer contains actual_length + interrupt_in_buffer */ - actual_buffer = (size_t*)(dev->ring_buffer + dev->ring_tail*(sizeof(size_t)+dev->interrupt_in_endpoint_size)); + actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n", @@ -561,7 +561,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size); if (bytes_to_write < count) - dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n",count-bytes_to_write); + dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n", count-bytes_to_write); dev_dbg(&dev->intf->dev, "%s: count = %zd, bytes_to_write = %zd\n", __func__, count, bytes_to_write); @@ -650,10 +650,9 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id * struct usb_device *udev = interface_to_usbdev(intf); struct ld_usb *dev = NULL; struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; char *buffer; - int i; int retval = -ENOMEM; + int res; /* allocate memory for our device state and initialize it */ @@ -681,21 +680,17 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id * iface_desc = intf->cur_altsetting; - /* set up the endpoint information */ - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(endpoint)) - dev->interrupt_in_endpoint = endpoint; - - if (usb_endpoint_is_int_out(endpoint)) - dev->interrupt_out_endpoint = endpoint; - } - if (dev->interrupt_in_endpoint == NULL) { + res = usb_find_last_int_in_endpoint(iface_desc, + &dev->interrupt_in_endpoint); + if (res) { dev_err(&intf->dev, "Interrupt in endpoint not found\n"); + retval = res; goto error; } - if (dev->interrupt_out_endpoint == NULL) + + res = usb_find_last_int_out_endpoint(iface_desc, + &dev->interrupt_out_endpoint); + if (res) dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n"); dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint); diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 322a042d6e59ab55ec10e9fb8961a3080ec8f6f9..0782ac6f5edfaa0542dfe173bd6969c8bba77372 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -317,9 +317,16 @@ static int tower_open (struct inode *inode, struct file *file) int subminor; int retval = 0; struct usb_interface *interface; - struct tower_reset_reply reset_reply; + struct tower_reset_reply *reset_reply; int result; + reset_reply = kmalloc(sizeof(*reset_reply), GFP_KERNEL); + + if (!reset_reply) { + retval = -ENOMEM; + goto exit; + } + nonseekable_open(inode, file); subminor = iminor(inode); @@ -364,8 +371,8 @@ static int tower_open (struct inode *inode, struct file *file) USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, - &reset_reply, - sizeof(reset_reply), + reset_reply, + sizeof(*reset_reply), 1000); if (result < 0) { dev_err(&dev->udev->dev, @@ -406,6 +413,7 @@ static int tower_open (struct inode *inode, struct file *file) mutex_unlock(&dev->lock); exit: + kfree(reset_reply); return retval; } @@ -806,10 +814,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device struct device *idev = &interface->dev; struct usb_device *udev = interface_to_usbdev(interface); struct lego_usb_tower *dev = NULL; - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor* endpoint; - struct tower_get_version_reply get_version_reply; - int i; + struct tower_get_version_reply *get_version_reply = NULL; int retval = -ENOMEM; int result; @@ -846,25 +851,13 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device dev->interrupt_out_urb = NULL; dev->interrupt_out_busy = 0; - iface_desc = interface->cur_altsetting; - - /* set up the endpoint information */ - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_xfer_int(endpoint)) { - if (usb_endpoint_dir_in(endpoint)) - dev->interrupt_in_endpoint = endpoint; - else - dev->interrupt_out_endpoint = endpoint; - } - } - if(dev->interrupt_in_endpoint == NULL) { - dev_err(idev, "interrupt in endpoint not found\n"); - goto error; - } - if (dev->interrupt_out_endpoint == NULL) { - dev_err(idev, "interrupt out endpoint not found\n"); + result = usb_find_common_endpoints_reverse(interface->cur_altsetting, + NULL, NULL, + &dev->interrupt_in_endpoint, + &dev->interrupt_out_endpoint); + if (result) { + dev_err(idev, "interrupt endpoints not found\n"); + retval = result; goto error; } @@ -886,6 +879,13 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device dev->interrupt_in_interval = interrupt_in_interval ? interrupt_in_interval : dev->interrupt_in_endpoint->bInterval; dev->interrupt_out_interval = interrupt_out_interval ? interrupt_out_interval : dev->interrupt_out_endpoint->bInterval; + get_version_reply = kmalloc(sizeof(*get_version_reply), GFP_KERNEL); + + if (!get_version_reply) { + retval = -ENOMEM; + goto error; + } + /* get the firmware version and log it */ result = usb_control_msg (udev, usb_rcvctrlpipe(udev, 0), @@ -893,18 +893,19 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_DEVICE, 0, 0, - &get_version_reply, - sizeof(get_version_reply), + get_version_reply, + sizeof(*get_version_reply), 1000); if (result < 0) { dev_err(idev, "LEGO USB Tower get version control request failed\n"); retval = result; goto error; } - dev_info(&interface->dev, "LEGO USB Tower firmware version is %d.%d " - "build %d\n", get_version_reply.major, - get_version_reply.minor, - le16_to_cpu(get_version_reply.build_no)); + dev_info(&interface->dev, + "LEGO USB Tower firmware version is %d.%d build %d\n", + get_version_reply->major, + get_version_reply->minor, + le16_to_cpu(get_version_reply->build_no)); /* we can register the device now, as it is ready */ usb_set_intfdata (interface, dev); @@ -925,9 +926,11 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device USB_MAJOR, dev->minor); exit: + kfree(get_version_reply); return retval; error: + kfree(get_version_reply); tower_delete(dev); return retval; } diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c index d3d12475326663def2ce1f389f06f040d2a56126..2142132a1f82ee93881f3f938fa5f69352d2648d 100644 --- a/drivers/usb/misc/lvstest.c +++ b/drivers/usb/misc/lvstest.c @@ -193,7 +193,7 @@ static ssize_t u2_timeout_store(struct device *dev, return ret; } - if (val < 0 || val > 127) + if (val > 127) return -EINVAL; ret = lvs_rh_set_port_feature(hdev, lvs->portnum | (val << 8), @@ -222,7 +222,7 @@ static ssize_t u1_timeout_store(struct device *dev, return ret; } - if (val < 0 || val > 127) + if (val > 127) return -EINVAL; ret = lvs_rh_set_port_feature(hdev, lvs->portnum | (val << 8), @@ -367,10 +367,9 @@ static int lvs_rh_probe(struct usb_interface *intf, hdev = interface_to_usbdev(intf); desc = intf->cur_altsetting; - if (desc->desc.bNumEndpoints < 1) - return -ENODEV; - - endpoint = &desc->endpoint[0].desc; + ret = usb_find_int_in_endpoint(desc, &endpoint); + if (ret) + return ret; /* valid only for SS root hub */ if (hdev->descriptor.bDeviceProtocol != USB_HUB_PR_SS || hdev->parent) { @@ -433,6 +432,7 @@ static void lvs_rh_disconnect(struct usb_interface *intf) struct lvs_rh *lvs = usb_get_intfdata(intf); sysfs_remove_group(&intf->dev.kobj, &lvs_attr_group); + usb_poison_urb(lvs->urb); /* used in scheduled work */ flush_work(&lvs->rh_work); usb_free_urb(lvs->urb); } diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c index 4b5777ec1501fe37a82c3bd3b23b35d8ab96ec79..f019d80ca9e40451a8a3bbca0671b95935a7798f 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ b/drivers/usb/misc/sisusbvga/sisusb_con.c @@ -816,7 +816,7 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb, mutex_unlock(&sisusb->lock); - return 1; + return true; } /* Interface routine */ @@ -973,7 +973,7 @@ sisusbcon_set_origin(struct vc_data *c) mutex_unlock(&sisusb->lock); - return 1; + return true; } /* Interface routine */ diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c index 9f48419abc46b97ebdb562fa4a0b606e7223d67e..0f5ad896c7e3161186ea2540994841d6bfaa5b3d 100644 --- a/drivers/usb/misc/usblcd.c +++ b/drivers/usb/misc/usblcd.c @@ -313,16 +313,15 @@ static int lcd_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_lcd *dev = NULL; - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - size_t buffer_size; + struct usb_endpoint_descriptor *bulk_in, *bulk_out; int i; - int retval = -ENOMEM; + int retval; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) - goto error; + return -ENOMEM; + kref_init(&dev->kref); sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES); init_usb_anchor(&dev->submitted); @@ -338,33 +337,24 @@ static int lcd_probe(struct usb_interface *interface, /* set up the endpoint information */ /* use only the first bulk-in and bulk-out endpoints */ - iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (!dev->bulk_in_endpointAddr && - usb_endpoint_is_bulk_in(endpoint)) { - /* we found a bulk in endpoint */ - buffer_size = usb_endpoint_maxp(endpoint); - dev->bulk_in_size = buffer_size; - dev->bulk_in_endpointAddr = endpoint->bEndpointAddress; - dev->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL); - if (!dev->bulk_in_buffer) - goto error; - } - - if (!dev->bulk_out_endpointAddr && - usb_endpoint_is_bulk_out(endpoint)) { - /* we found a bulk out endpoint */ - dev->bulk_out_endpointAddr = endpoint->bEndpointAddress; - } - } - if (!(dev->bulk_in_endpointAddr && dev->bulk_out_endpointAddr)) { + retval = usb_find_common_endpoints(interface->cur_altsetting, + &bulk_in, &bulk_out, NULL, NULL); + if (retval) { dev_err(&interface->dev, "Could not find both bulk-in and bulk-out endpoints\n"); goto error; } + dev->bulk_in_size = usb_endpoint_maxp(bulk_in); + dev->bulk_in_endpointAddr = bulk_in->bEndpointAddress; + dev->bulk_in_buffer = kmalloc(dev->bulk_in_size, GFP_KERNEL); + if (!dev->bulk_in_buffer) { + retval = -ENOMEM; + goto error; + } + + dev->bulk_out_endpointAddr = bulk_out->bEndpointAddress; + /* save our data pointer in this interface device */ usb_set_intfdata(interface, dev); @@ -390,8 +380,7 @@ static int lcd_probe(struct usb_interface *interface, return 0; error: - if (dev) - kref_put(&dev->kref, lcd_delete); + kref_put(&dev->kref, lcd_delete); return retval; } diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 17c0810682579a7fbba5e3fcef1b377519b8f0d8..eee82ca55b7b383bc507c39af7e64bb66a63c6a8 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -124,6 +124,20 @@ static struct usb_device *testdev_to_usbdev(struct usbtest_dev *test) /*-------------------------------------------------------------------------*/ +static inline void endpoint_update(int edi, + struct usb_host_endpoint **in, + struct usb_host_endpoint **out, + struct usb_host_endpoint *e) +{ + if (edi) { + if (!*in) + *in = e; + } else { + if (!*out) + *out = e; + } +} + static int get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf) { @@ -151,46 +165,26 @@ get_endpoints(struct usbtest_dev *dev, struct usb_interface *intf) */ for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) { struct usb_host_endpoint *e; + int edi; e = alt->endpoint + ep; + edi = usb_endpoint_dir_in(&e->desc); + switch (usb_endpoint_type(&e->desc)) { case USB_ENDPOINT_XFER_BULK: - break; + endpoint_update(edi, &in, &out, e); + continue; case USB_ENDPOINT_XFER_INT: if (dev->info->intr) - goto try_intr; + endpoint_update(edi, &int_in, &int_out, e); + continue; case USB_ENDPOINT_XFER_ISOC: if (dev->info->iso) - goto try_iso; + endpoint_update(edi, &iso_in, &iso_out, e); /* FALLTHROUGH */ default: continue; } - if (usb_endpoint_dir_in(&e->desc)) { - if (!in) - in = e; - } else { - if (!out) - out = e; - } - continue; -try_intr: - if (usb_endpoint_dir_in(&e->desc)) { - if (!int_in) - int_in = e; - } else { - if (!int_out) - int_out = e; - } - continue; -try_iso: - if (usb_endpoint_dir_in(&e->desc)) { - if (!iso_in) - iso_in = e; - } else { - if (!iso_out) - iso_out = e; - } } if ((in && out) || iso_in || iso_out || int_in || int_out) goto found; diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 07014cad6dbe357bca938561eaf976dbbee902d4..5947373700a197a4e62bb62bbf5940c33271d610 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -689,7 +689,7 @@ static int uss720_probe(struct usb_interface *intf, { struct usb_device *usbdev = usb_get_dev(interface_to_usbdev(intf)); struct usb_host_interface *interface; - struct usb_host_endpoint *endpoint; + struct usb_endpoint_descriptor *epd; struct parport_uss720_private *priv; struct parport *pp; unsigned char reg; @@ -745,9 +745,11 @@ static int uss720_probe(struct usb_interface *intf, get_1284_register(pp, 0, ®, GFP_KERNEL); dev_dbg(&intf->dev, "reg: %7ph\n", priv->reg); - endpoint = &interface->endpoint[2]; - dev_dbg(&intf->dev, "epaddr %d interval %d\n", - endpoint->desc.bEndpointAddress, endpoint->desc.bInterval); + i = usb_find_last_int_in_endpoint(interface, &epd); + if (!i) { + dev_dbg(&intf->dev, "epaddr %d interval %d\n", + epd->bEndpointAddress, epd->bInterval); + } parport_announce_port(pp); usb_set_intfdata(intf, pp); diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 54e53ac4c08fd7c47ebf1d271083d2313b92ade0..58abdf28620a537e37184eec548d72ea57fb5dba 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -195,8 +195,8 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int retval = -ENOMEM; - int i; DEFINE_WAIT(wait); + int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); @@ -212,20 +212,14 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ /* set up the endpoint information */ iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_int_in(endpoint)) { - dev->int_in_endpointAddr = endpoint->bEndpointAddress; - break; - } - } - if (!dev->int_in_endpointAddr) { - retval = -ENODEV; + res = usb_find_int_in_endpoint(iface_desc, &endpoint); + if (res) { dev_err(&interface->dev, "Could not find endpoints\n"); + retval = res; goto error; } + dev->int_in_endpointAddr = endpoint->bEndpointAddress; /* allocate control URB */ dev->cntl_urb = usb_alloc_urb(0, GFP_KERNEL); diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c index 1a8987e7c5b0648c031fb5aece274a4939fc2ead..11a0d3b84c5eaef31217fd0645edf197ed2f29da 100644 --- a/drivers/usb/mtu3/mtu3_dr.c +++ b/drivers/usb/mtu3/mtu3_dr.c @@ -223,25 +223,25 @@ static int ssusb_extcon_register(struct otg_switch_mtk *otg_sx) return 0; otg_sx->vbus_nb.notifier_call = ssusb_vbus_notifier; - ret = extcon_register_notifier(edev, EXTCON_USB, + ret = devm_extcon_register_notifier(ssusb->dev, edev, EXTCON_USB, &otg_sx->vbus_nb); if (ret < 0) dev_err(ssusb->dev, "failed to register notifier for USB\n"); otg_sx->id_nb.notifier_call = ssusb_id_notifier; - ret = extcon_register_notifier(edev, EXTCON_USB_HOST, + ret = devm_extcon_register_notifier(ssusb->dev, edev, EXTCON_USB_HOST, &otg_sx->id_nb); if (ret < 0) dev_err(ssusb->dev, "failed to register notifier for USB-HOST\n"); dev_dbg(ssusb->dev, "EXTCON_USB: %d, EXTCON_USB_HOST: %d\n", - extcon_get_cable_state_(edev, EXTCON_USB), - extcon_get_cable_state_(edev, EXTCON_USB_HOST)); + extcon_get_state(edev, EXTCON_USB), + extcon_get_state(edev, EXTCON_USB_HOST)); /* default as host, switch to device mode if needed */ - if (extcon_get_cable_state_(edev, EXTCON_USB_HOST) == false) + if (extcon_get_state(edev, EXTCON_USB_HOST) == false) ssusb_set_mailbox(otg_sx, MTU3_ID_FLOAT); - if (extcon_get_cable_state_(edev, EXTCON_USB) == true) + if (extcon_get_state(edev, EXTCON_USB) == true) ssusb_set_mailbox(otg_sx, MTU3_VBUS_VALID); return 0; @@ -367,13 +367,6 @@ void ssusb_otg_switch_exit(struct ssusb_mtk *ssusb) cancel_delayed_work(&otg_sx->extcon_reg_dwork); - if (otg_sx->edev) { - extcon_unregister_notifier(otg_sx->edev, - EXTCON_USB, &otg_sx->vbus_nb); - extcon_unregister_notifier(otg_sx->edev, - EXTCON_USB_HOST, &otg_sx->id_nb); - } - if (otg_sx->manual_drd_enabled) ssusb_debugfs_exit(ssusb); } diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 72a2a504084897da62e91d476768d60ac954c51f..5506a9c03c1f64ab0ff6c6c038c2346ddbf46a10 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -160,8 +160,8 @@ config USB_TI_CPPI_DMA Enable DMA transfers when TI CPPI DMA is available. config USB_TI_CPPI41_DMA - bool 'TI CPPI 4.1 (AM335x)' - depends on ARCH_OMAP && DMADEVICES + bool 'TI CPPI 4.1' + depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX) && DMADEVICES select TI_CPPI41 config USB_TUSB_OMAP_DMA diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index c4fabe952ca6a34dd3cb5c1f938e9ecc80c470ba..a13bd362504399a88aba8d8f688d66f0fefecb7f 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -582,9 +582,10 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx) maxpacket = length; n_bds = 1; } else { - n_bds = length / maxpacket; - if (!length || (length % maxpacket)) - n_bds++; + if (length) + n_bds = DIV_ROUND_UP(length, maxpacket); + else + n_bds = 1; n_bds = min(n_bds, (unsigned) NUM_TXCHAN_BD); length = min(n_bds * maxpacket, length); } @@ -790,9 +791,7 @@ cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket) n_bds = 0xffff / maxpacket; length = n_bds * maxpacket; } else { - n_bds = length / maxpacket; - if (length % maxpacket) - n_bds++; + n_bds = DIV_ROUND_UP(length, maxpacket); } if (n_bds == 1) onepacket = 1; diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index d79c288ccbf2db0fcb08f04ad0678cad343503b4..df88123274ca722dee54fdf311fdcda0044ba517 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -456,12 +457,41 @@ static inline u8 get_vbus_power(struct device *dev) return current_uA / 1000 / 2; } +#ifdef CONFIG_USB_TI_CPPI41_DMA +static void da8xx_dma_controller_callback(struct dma_controller *c) +{ + struct musb *musb = c->musb; + void __iomem *reg_base = musb->ctrl_base; + + musb_writel(reg_base, DA8XX_USB_END_OF_INTR_REG, 0); +} + +static struct dma_controller * +da8xx_dma_controller_create(struct musb *musb, void __iomem *base) +{ + struct dma_controller *controller; + + controller = cppi41_dma_controller_create(musb, base); + if (IS_ERR_OR_NULL(controller)) + return controller; + + controller->dma_callback = da8xx_dma_controller_callback; + + return controller; +} +#endif + static const struct musb_platform_ops da8xx_ops = { - .quirks = MUSB_INDEXED_EP | MUSB_PRESERVE_SESSION, + .quirks = MUSB_INDEXED_EP | MUSB_PRESERVE_SESSION | + MUSB_DMA_CPPI41 | MUSB_DA8XX, .init = da8xx_musb_init, .exit = da8xx_musb_exit, .fifo_mode = 2, +#ifdef CONFIG_USB_TI_CPPI41_DMA + .dma_init = da8xx_dma_controller_create, + .dma_exit = cppi41_dma_controller_destroy, +#endif .enable = da8xx_musb_enable, .disable = da8xx_musb_disable, @@ -483,6 +513,12 @@ static const struct musb_hdrc_config da8xx_config = { .multipoint = 1, }; +static struct of_dev_auxdata da8xx_auxdata_lookup[] = { + OF_DEV_AUXDATA("ti,da830-cppi41", 0x01e01000, "cppi41-dmaengine", + NULL), + {} +}; + static int da8xx_probe(struct platform_device *pdev) { struct resource musb_resources[2]; @@ -533,6 +569,11 @@ static int da8xx_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, glue); + ret = of_platform_populate(pdev->dev.of_node, NULL, + da8xx_auxdata_lookup, &pdev->dev); + if (ret) + return ret; + memset(musb_resources, 0x00, sizeof(*musb_resources) * ARRAY_SIZE(musb_resources)); diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 0c3664ab705eed549e73d53dea13976066182ee1..870da18f5077cd762ae826c84c84e67ab1a8862e 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2332,7 +2332,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) setup_timer(&musb->otg_timer, musb_otg_timer_func, (unsigned long) musb); /* attach to the IRQ */ - if (request_irq(nIrq, musb->isr, 0, dev_name(dev), musb)) { + if (request_irq(nIrq, musb->isr, IRQF_SHARED, dev_name(dev), musb)) { dev_err(dev, "request_irq %d failed!\n", nIrq); status = -ENODEV; goto fail3; diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 5b708be6d1d1d4293e04cc36bc4403ba1bff17e7..3e98d4268a6478a36348b7144dd38fe49167670b 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -172,6 +172,7 @@ struct musb_io; */ struct musb_platform_ops { +#define MUSB_DA8XX BIT(8) #define MUSB_PRESERVE_SESSION BIT(7) #define MUSB_DMA_UX500 BIT(6) #define MUSB_DMA_CPPI41 BIT(5) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index 355655f8a3fbc9c4e3d8acd03eefa31a5736fb2f..e7c8b1b8bf2296229b2eef3343811e6cbbc9b317 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -571,6 +571,10 @@ static int cppi41_dma_channel_abort(struct dma_channel *channel) } } + /* DA8xx Advisory 2.3.27: wait 250 ms before to start the teardown */ + if (musb->io.quirks & MUSB_DA8XX) + mdelay(250); + tdbit = 1 << cppi41_channel->port_num; if (is_tx) tdbit <<= 16; diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 9c7ee26ef388062bdc5e1f1fb2097fc010882950..bc6a9be2ccc55696cb703f04b97e3d0fb08ca59c 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -245,6 +245,11 @@ static int dsps_check_status(struct musb *musb, void *unused) dsps_mod_timer_optional(glue); break; case OTG_STATE_A_WAIT_BCON: + /* keep VBUS on for host-only mode */ + if (musb->port_mode == MUSB_PORT_MODE_HOST) { + dsps_mod_timer_optional(glue); + break; + } musb_writeb(musb->mregs, MUSB_DEVCTL, 0); skip_session = 1; /* fall */ diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index ac3a4952abb4b290b019ac59fbddd7004b50d949..dbe617a735d8400576e53a84e488ea4ff9fc8466 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2780,10 +2780,11 @@ int musb_host_setup(struct musb *musb, int power_budget) int ret; struct usb_hcd *hcd = musb->hcd; - MUSB_HST_MODE(musb); - musb->xceiv->otg->default_a = 1; - musb->xceiv->otg->state = OTG_STATE_A_IDLE; - + if (musb->port_mode == MUSB_PORT_MODE_HOST) { + MUSB_HST_MODE(musb); + musb->xceiv->otg->default_a = 1; + musb->xceiv->otg->state = OTG_STATE_A_IDLE; + } otg_set_host(musb->xceiv->otg, &hcd->self); hcd->self.otg_port = 1; musb->xceiv->otg->host = &hcd->self; diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index 8b43c4b99f045325fd5c7160ee28cca73f8738af..7870b37e0ea5c6b5a1767e9469193d791107071c 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -219,6 +219,7 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz, u32 dma_remaining; int src_burst, dst_burst; u16 csr; + u32 psize; int ch; s8 dmareq; s8 sync_dev; @@ -390,15 +391,19 @@ static int tusb_omap_dma_program(struct dma_channel *channel, u16 packet_sz, if (chdat->tx) { /* Send transfer_packet_sz packets at a time */ - musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, - chdat->transfer_packet_sz); + psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET); + psize &= ~0x7ff; + psize |= chdat->transfer_packet_sz; + musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize); musb_writel(ep_conf, TUSB_EP_TX_OFFSET, TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len)); } else { /* Receive transfer_packet_sz packets at a time */ - musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, - chdat->transfer_packet_sz << 16); + psize = musb_readl(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET); + psize &= ~(0x7ff << 16); + psize |= (chdat->transfer_packet_sz << 16); + musb_writel(ep_conf, TUSB_EP_MAX_PACKET_SIZE_OFFSET, psize); musb_writel(ep_conf, TUSB_EP_RX_OFFSET, TUSB_EP_CONFIG_XFR_SIZE(chdat->transfer_len)); diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 61cef7511a506af48e3aa2165e0ac992b9cf4d8d..3006f569c068106559b64d15c8acdb0c53e8c10c 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -74,13 +74,6 @@ config AM335X_PHY_USB This driver provides PHY support for that phy which part for the AM335x SoC. -config SAMSUNG_USBPHY - tristate - help - Enable this to support Samsung USB phy helper driver for Samsung SoCs. - This driver provides common interface to interact, for Samsung USB 2.0 PHY - driver and later for Samsung USB 3.0 PHY driver. - config TWL6030_USB tristate "TWL6030 USB Transceiver Driver" depends on TWL4030_CORE && OMAP_USB2 && USB_MUSB_OMAP2PLUS diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile index b433e5d89be4bddac3af69d87dbaea1ec67126cf..e7c9ca8cafb01ee9482910c4c7c2fa545b4f7f3b 100644 --- a/drivers/usb/phy/Makefile +++ b/drivers/usb/phy/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_TAHVO_USB) += phy-tahvo.o obj-$(CONFIG_AM335X_CONTROL_USB) += phy-am335x-control.o obj-$(CONFIG_AM335X_PHY_USB) += phy-am335x.o obj-$(CONFIG_OMAP_OTG) += phy-omap-otg.o -obj-$(CONFIG_SAMSUNG_USBPHY) += phy-samsung-usb.o obj-$(CONFIG_TWL6030_USB) += phy-twl6030-usb.o obj-$(CONFIG_USB_EHCI_TEGRA) += phy-tegra-usb.o obj-$(CONFIG_USB_GPIO_VBUS) += phy-gpio-vbus-usb.o diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c index 392ab422163c236084f73252fe8577f386c1d7c1..cf8f40ae6e017943c9ae97e50fca17d50304d51e 100644 --- a/drivers/usb/phy/phy-fsl-usb.c +++ b/drivers/usb/phy/phy-fsl-usb.c @@ -44,6 +44,13 @@ #include "phy-fsl-usb.h" +#ifdef VERBOSE +#define VDBG(fmt, args...) pr_debug("[%s] " fmt, \ + __func__, ## args) +#else +#define VDBG(stuff...) do {} while (0) +#endif + #define DRIVER_VERSION "Rev. 1.55" #define DRIVER_AUTHOR "Jerry Huang/Li Yang" #define DRIVER_DESC "Freescale USB OTG Transceiver Driver" diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c index 80a9845cd93fba16e50ae7797606b7b9b925c887..569c2200ba42a915902385ed61f71bd8e0f124cc 100644 --- a/drivers/usb/serial/aircable.c +++ b/drivers/usb/serial/aircable.c @@ -29,12 +29,6 @@ * is any other control code, I will simply check for the first * one. * - * The driver registers himself with the USB-serial core and the USB Core. I had - * to implement a probe function against USB-serial, because other way, the - * driver was attaching himself to both interfaces. I have tried with different - * configurations of usb_serial_driver with out exit, only the probe function - * could handle this correctly. - * * I have taken some info from a Greg Kroah-Hartman article: * http://www.linuxjournal.com/article/6573 * And from Linux Device Driver Kit CD, which is a great work, the authors taken @@ -93,30 +87,17 @@ static int aircable_prepare_write_buffer(struct usb_serial_port *port, return count + HCI_HEADER_LENGTH; } -static int aircable_probe(struct usb_serial *serial, - const struct usb_device_id *id) +static int aircable_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { - struct usb_host_interface *iface_desc = serial->interface-> - cur_altsetting; - struct usb_endpoint_descriptor *endpoint; - int num_bulk_out = 0; - int i; - - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_bulk_out(endpoint)) { - dev_dbg(&serial->dev->dev, - "found bulk out on endpoint %d\n", i); - ++num_bulk_out; - } - } - - if (num_bulk_out == 0) { - dev_dbg(&serial->dev->dev, "Invalid interface, discarding\n"); + /* Ignore the first interface, which has no bulk endpoints. */ + if (epds->num_bulk_out == 0) { + dev_dbg(&serial->interface->dev, + "ignoring interface with no bulk-out endpoints\n"); return -ENODEV; } - return 0; + return 1; } static int aircable_process_packet(struct usb_serial_port *port, @@ -164,9 +145,8 @@ static struct usb_serial_driver aircable_device = { .name = "aircable", }, .id_table = id_table, - .num_ports = 1, .bulk_out_size = HCI_COMPLETE_FRAME, - .probe = aircable_probe, + .calc_num_ports = aircable_calc_num_ports, .process_read_urb = aircable_process_read_urb, .prepare_write_buffer = aircable_prepare_write_buffer, .throttle = usb_serial_generic_throttle, diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c index 2779e59c30f1e3532d036b37c97f799d963c52fb..0adbd38b4eeaef03ab7f4230158bd37cdb918a01 100644 --- a/drivers/usb/serial/ark3116.c +++ b/drivers/usb/serial/ark3116.c @@ -122,19 +122,6 @@ static inline int calc_divisor(int bps) return (12000000 + 2*bps) / (4*bps); } -static int ark3116_attach(struct usb_serial *serial) -{ - /* make sure we have our end-points */ - if (serial->num_bulk_in == 0 || - serial->num_bulk_out == 0 || - serial->num_interrupt_in == 0) { - dev_err(&serial->interface->dev, "missing endpoint\n"); - return -ENODEV; - } - - return 0; -} - static int ark3116_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -671,7 +658,9 @@ static struct usb_serial_driver ark3116_device = { }, .id_table = id_table, .num_ports = 1, - .attach = ark3116_attach, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .port_probe = ark3116_port_probe, .port_remove = ark3116_port_remove, .set_termios = ark3116_set_termios, diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 80260b08398b2e55833c65d21e0be70cc43ccf01..47fbd9f0c0c7a8508f23b562f4a220dbd19bc33f 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -50,7 +50,6 @@ #define CYBERJACK_PRODUCT_ID 0x0100 /* Function prototypes */ -static int cyberjack_attach(struct usb_serial *serial); static int cyberjack_port_probe(struct usb_serial_port *port); static int cyberjack_port_remove(struct usb_serial_port *port); static int cyberjack_open(struct tty_struct *tty, @@ -78,7 +77,7 @@ static struct usb_serial_driver cyberjack_device = { .description = "Reiner SCT Cyberjack USB card reader", .id_table = id_table, .num_ports = 1, - .attach = cyberjack_attach, + .num_bulk_out = 1, .port_probe = cyberjack_port_probe, .port_remove = cyberjack_port_remove, .open = cyberjack_open, @@ -102,14 +101,6 @@ struct cyberjack_private { short wrsent; /* Data already sent */ }; -static int cyberjack_attach(struct usb_serial *serial) -{ - if (serial->num_bulk_out < serial->num_ports) - return -ENODEV; - - return 0; -} - static int cyberjack_port_probe(struct usb_serial_port *port) { struct cyberjack_private *priv; diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 6537d3ca2797d8573236578e3088f6dbce1ce1b5..2ce39af32cfa614c2b7ccd26d2df1581cfc89c16 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -273,6 +273,8 @@ static struct usb_serial_driver digi_acceleport_2_device = { .description = "Digi 2 port USB adapter", .id_table = id_table_2, .num_ports = 3, + .num_bulk_in = 4, + .num_bulk_out = 4, .open = digi_open, .close = digi_close, .dtr_rts = digi_dtr_rts, @@ -302,6 +304,8 @@ static struct usb_serial_driver digi_acceleport_4_device = { .description = "Digi 4 port USB adapter", .id_table = id_table_4, .num_ports = 4, + .num_bulk_in = 5, + .num_bulk_out = 5, .open = digi_open, .close = digi_close, .write = digi_write, @@ -1251,27 +1255,8 @@ static int digi_port_init(struct usb_serial_port *port, unsigned port_num) static int digi_startup(struct usb_serial *serial) { - struct device *dev = &serial->interface->dev; struct digi_serial *serial_priv; int ret; - int i; - - /* check whether the device has the expected number of endpoints */ - if (serial->num_port_pointers < serial->type->num_ports + 1) { - dev_err(dev, "OOB endpoints missing\n"); - return -ENODEV; - } - - for (i = 0; i < serial->type->num_ports + 1 ; i++) { - if (!serial->port[i]->read_urb) { - dev_err(dev, "bulk-in endpoint missing\n"); - return -ENODEV; - } - if (!serial->port[i]->write_urb) { - dev_err(dev, "bulk-out endpoint missing\n"); - return -ENODEV; - } - } serial_priv = kzalloc(sizeof(*serial_priv), GFP_KERNEL); if (!serial_priv) diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c index 22f23a429a95cb6a1cdeaca445a92035fd3e98e8..3d616a2a9f963a96e73ae865651d40e868aeb4c9 100644 --- a/drivers/usb/serial/f81534.c +++ b/drivers/usb/serial/f81534.c @@ -611,20 +611,30 @@ static int f81534_find_config_idx(struct usb_serial *serial, u8 *index) * The f81534_calc_num_ports() will run to "new style" with checking * F81534_PORT_UNAVAILABLE section. */ -static int f81534_calc_num_ports(struct usb_serial *serial) +static int f81534_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { + struct device *dev = &serial->interface->dev; + int size_bulk_in = usb_endpoint_maxp(epds->bulk_in[0]); + int size_bulk_out = usb_endpoint_maxp(epds->bulk_out[0]); u8 setting[F81534_CUSTOM_DATA_SIZE]; u8 setting_idx; u8 num_port = 0; int status; size_t i; + if (size_bulk_out != F81534_WRITE_BUFFER_SIZE || + size_bulk_in != F81534_MAX_RECEIVE_BLOCK_SIZE) { + dev_err(dev, "unsupported endpoint max packet size\n"); + return -ENODEV; + } + /* Check had custom setting */ status = f81534_find_config_idx(serial, &setting_idx); if (status) { dev_err(&serial->interface->dev, "%s: find idx failed: %d\n", __func__, status); - return 0; + return status; } /* @@ -640,7 +650,7 @@ static int f81534_calc_num_ports(struct usb_serial *serial) dev_err(&serial->interface->dev, "%s: get custom data failed: %d\n", __func__, status); - return 0; + return status; } dev_dbg(&serial->interface->dev, @@ -656,7 +666,7 @@ static int f81534_calc_num_ports(struct usb_serial *serial) dev_err(&serial->interface->dev, "%s: read failed: %d\n", __func__, status); - return 0; + return status; } dev_dbg(&serial->interface->dev, "%s: read default config\n", @@ -671,12 +681,24 @@ static int f81534_calc_num_ports(struct usb_serial *serial) ++num_port; } - if (num_port) - return num_port; + if (!num_port) { + dev_warn(&serial->interface->dev, + "no config found, assuming 4 ports\n"); + num_port = 4; /* Nothing found, oldest version IC */ + } + + /* + * Setup bulk-out endpoint multiplexing. All ports share the same + * bulk-out endpoint. + */ + BUILD_BUG_ON(ARRAY_SIZE(epds->bulk_out) < F81534_NUM_PORT); + + for (i = 1; i < num_port; ++i) + epds->bulk_out[i] = epds->bulk_out[0]; - dev_warn(&serial->interface->dev, "%s: Read Failed. default 4 ports\n", - __func__); - return 4; /* Nothing found, oldest version IC */ + epds->num_bulk_out = num_port; + + return num_port; } static void f81534_set_termios(struct tty_struct *tty, @@ -1067,96 +1089,6 @@ static void f81534_write_usb_callback(struct urb *urb) } } -static int f81534_setup_ports(struct usb_serial *serial) -{ - struct usb_serial_port *port; - u8 port0_out_address; - int buffer_size; - size_t i; - - /* - * In our system architecture, we had 2 or 4 serial ports, - * but only get 1 set of bulk in/out endpoints. - * - * The usb-serial subsystem will generate port 0 data, - * but port 1/2/3 will not. It's will generate write URB and buffer - * by following code and use the port0 read URB for read operation. - */ - for (i = 1; i < serial->num_ports; ++i) { - port0_out_address = serial->port[0]->bulk_out_endpointAddress; - buffer_size = serial->port[0]->bulk_out_size; - port = serial->port[i]; - - if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL)) - return -ENOMEM; - - port->bulk_out_size = buffer_size; - port->bulk_out_endpointAddress = port0_out_address; - - port->write_urbs[0] = usb_alloc_urb(0, GFP_KERNEL); - if (!port->write_urbs[0]) - return -ENOMEM; - - port->bulk_out_buffers[0] = kzalloc(buffer_size, GFP_KERNEL); - if (!port->bulk_out_buffers[0]) - return -ENOMEM; - - usb_fill_bulk_urb(port->write_urbs[0], serial->dev, - usb_sndbulkpipe(serial->dev, - port0_out_address), - port->bulk_out_buffers[0], buffer_size, - serial->type->write_bulk_callback, port); - - port->write_urb = port->write_urbs[0]; - port->bulk_out_buffer = port->bulk_out_buffers[0]; - } - - return 0; -} - -static int f81534_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - struct usb_endpoint_descriptor *endpoint; - struct usb_host_interface *iface_desc; - struct device *dev; - int num_bulk_in = 0; - int num_bulk_out = 0; - int size_bulk_in = 0; - int size_bulk_out = 0; - int i; - - dev = &serial->interface->dev; - iface_desc = serial->interface->cur_altsetting; - - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_bulk_in(endpoint)) { - ++num_bulk_in; - size_bulk_in = usb_endpoint_maxp(endpoint); - } - - if (usb_endpoint_is_bulk_out(endpoint)) { - ++num_bulk_out; - size_bulk_out = usb_endpoint_maxp(endpoint); - } - } - - if (num_bulk_in != 1 || num_bulk_out != 1) { - dev_err(dev, "expected endpoints not found\n"); - return -ENODEV; - } - - if (size_bulk_out != F81534_WRITE_BUFFER_SIZE || - size_bulk_in != F81534_MAX_RECEIVE_BLOCK_SIZE) { - dev_err(dev, "unsupported endpoint max packet size\n"); - return -ENODEV; - } - - return 0; -} - static int f81534_attach(struct usb_serial *serial) { struct f81534_serial_private *serial_priv; @@ -1173,10 +1105,6 @@ static int f81534_attach(struct usb_serial *serial) mutex_init(&serial_priv->urb_mutex); - status = f81534_setup_ports(serial); - if (status) - return status; - /* Check had custom setting */ status = f81534_find_config_idx(serial, &serial_priv->setting_idx); if (status) { @@ -1380,12 +1308,13 @@ static struct usb_serial_driver f81534_device = { }, .description = DRIVER_DESC, .id_table = f81534_id_table, + .num_bulk_in = 1, + .num_bulk_out = 1, .open = f81534_open, .close = f81534_close, .write = f81534_write, .tx_empty = f81534_tx_empty, .calc_num_ports = f81534_calc_num_ports, - .probe = f81534_probe, .attach = f81534_attach, .port_probe = f81534_port_probe, .dtr_rts = f81534_dtr_rts, diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c540de15aad2c7d301543aef202b3560fb2f7399..aba74f817dc6594064241013b92e31138e587dce 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -809,10 +809,10 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, - { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID), - .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, - { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID), - .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(OLIMEX_VID, OLIMEX_ARM_USB_TINY_H_PID, 1) }, { USB_DEVICE(FIC_VID, FIC_NEO1973_DEBUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_OOCDLINK_PID), @@ -873,6 +873,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_AND_INTERFACE_INFO(MICROCHIP_VID, MICROCHIP_USB_BOARD_PID, USB_CLASS_VENDOR_SPEC, USB_SUBCLASS_VENDOR_SPEC, 0x00) }, + { USB_DEVICE_INTERFACE_NUMBER(ACTEL_VID, MICROSEMI_ARROW_SF2PLUS_BOARD_PID, 2) }, { USB_DEVICE(JETI_VID, JETI_SPC1201_PID) }, { USB_DEVICE(MARVELL_VID, MARVELL_SHEEVAPLUG_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, @@ -1406,6 +1407,9 @@ static int write_latency_timer(struct usb_serial_port *port) int rv; int l = priv->latency; + if (priv->chip_type == SIO || priv->chip_type == FT8U232AM) + return -EINVAL; + if (priv->flags & ASYNC_LOW_LATENCY) l = 1; @@ -1422,7 +1426,7 @@ static int write_latency_timer(struct usb_serial_port *port) return rv; } -static int read_latency_timer(struct usb_serial_port *port) +static int _read_latency_timer(struct usb_serial_port *port) { struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; @@ -1440,11 +1444,10 @@ static int read_latency_timer(struct usb_serial_port *port) 0, priv->interface, buf, 1, WDR_TIMEOUT); if (rv < 1) { - dev_err(&port->dev, "Unable to read latency timer: %i\n", rv); if (rv >= 0) rv = -EIO; } else { - priv->latency = buf[0]; + rv = buf[0]; } kfree(buf); @@ -1452,6 +1455,25 @@ static int read_latency_timer(struct usb_serial_port *port) return rv; } +static int read_latency_timer(struct usb_serial_port *port) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + int rv; + + if (priv->chip_type == SIO || priv->chip_type == FT8U232AM) + return -EINVAL; + + rv = _read_latency_timer(port); + if (rv < 0) { + dev_err(&port->dev, "Unable to read latency timer: %i\n", rv); + return rv; + } + + priv->latency = rv; + + return 0; +} + static int get_serial_info(struct usb_serial_port *port, struct serial_struct __user *retinfo) { @@ -1505,9 +1527,9 @@ static int set_serial_info(struct tty_struct *tty, (new_serial.flags & ASYNC_FLAGS)); priv->custom_divisor = new_serial.custom_divisor; +check_and_exit: write_latency_timer(port); -check_and_exit: if ((old_priv.flags & ASYNC_SPD_MASK) != (priv->flags & ASYNC_SPD_MASK)) { if ((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) @@ -1603,9 +1625,19 @@ static void ftdi_determine_type(struct usb_serial_port *port) priv->baud_base = 12000000 / 16; } else if (version < 0x400) { /* Assume it's an FT8U232AM (or FT8U245AM) */ - /* (It might be a BM because of the iSerialNumber bug, - * but it will still work as an AM device.) */ priv->chip_type = FT8U232AM; + /* + * It might be a BM type because of the iSerialNumber bug. + * If iSerialNumber==0 and the latency timer is readable, + * assume it is BM type. + */ + if (udev->descriptor.iSerialNumber == 0 && + _read_latency_timer(port) >= 0) { + dev_dbg(&port->dev, + "%s: has latency timer so not an AM type\n", + __func__); + priv->chip_type = FT232BM; + } } else if (version < 0x600) { /* Assume it's an FT232BM (or FT245BM) */ priv->chip_type = FT232BM; @@ -1685,9 +1717,12 @@ static ssize_t latency_timer_store(struct device *dev, { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); - int v = simple_strtoul(valbuf, NULL, 10); + u8 v; int rv; + if (kstrtou8(valbuf, 10, &v)) + return -EINVAL; + priv->latency = v; rv = write_latency_timer(port); if (rv < 0) @@ -1704,10 +1739,13 @@ static ssize_t store_event_char(struct device *dev, struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); struct usb_device *udev = port->serial->dev; - int v = simple_strtoul(valbuf, NULL, 10); + unsigned int v; int rv; - dev_dbg(&port->dev, "%s: setting event char = %i\n", __func__, v); + if (kstrtouint(valbuf, 0, &v) || v >= 0x200) + return -EINVAL; + + dev_dbg(&port->dev, "%s: setting event char = 0x%03x\n", __func__, v); rv = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 48ee04c94a7541ad6c5f9023be107246207c56fd..4fcf1cecb6d721ad366666eec49774bcf9d00808 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -873,9 +873,17 @@ #define FIC_VID 0x1457 #define FIC_NEO1973_DEBUG_PID 0x5118 +/* + * Actel / Microsemi + */ +#define ACTEL_VID 0x1514 +#define MICROSEMI_ARROW_SF2PLUS_BOARD_PID 0x2008 + /* Olimex */ #define OLIMEX_VID 0x15BA #define OLIMEX_ARM_USB_OCD_PID 0x0003 +#define OLIMEX_ARM_USB_TINY_PID 0x0004 +#define OLIMEX_ARM_USB_TINY_H_PID 0x002a #define OLIMEX_ARM_USB_OCD_H_PID 0x002b /* diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 49ce2be90fa00e5b4305784f2532e34069752072..35cb8c0e584fc19f4550e24eb7b35fed3819bcc4 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -37,13 +37,41 @@ MODULE_PARM_DESC(product, "User specified USB idProduct"); static struct usb_device_id generic_device_ids[2]; /* Initially all zeroes. */ -struct usb_serial_driver usb_serial_generic_device = { +static int usb_serial_generic_probe(struct usb_serial *serial, + const struct usb_device_id *id) +{ + struct device *dev = &serial->interface->dev; + + dev_info(dev, "The \"generic\" usb-serial driver is only for testing and one-off prototypes.\n"); + dev_info(dev, "Tell linux-usb@vger.kernel.org to add your device to a proper driver.\n"); + + return 0; +} + +static int usb_serial_generic_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) +{ + struct device *dev = &serial->interface->dev; + int num_ports; + + num_ports = max(epds->num_bulk_in, epds->num_bulk_out); + + if (num_ports == 0) { + dev_err(dev, "device has no bulk endpoints\n"); + return -ENODEV; + } + + return num_ports; +} + +static struct usb_serial_driver usb_serial_generic_device = { .driver = { .owner = THIS_MODULE, .name = "generic", }, .id_table = generic_device_ids, - .num_ports = 1, + .probe = usb_serial_generic_probe, + .calc_num_ports = usb_serial_generic_calc_num_ports, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, .resume = usb_serial_generic_resume, diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index bb7673e80a57aeeba501cd7dc4628394c491f1d0..bdf8bd814a9aaf89d646545cac01da659ab475b6 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -1544,11 +1544,6 @@ static void edge_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { struct edgeport_port *edge_port = usb_get_serial_port_data(port); - unsigned int cflag; - - cflag = tty->termios.c_cflag; - dev_dbg(&port->dev, "%s - clfag %08x iflag %08x\n", __func__, tty->termios.c_cflag, tty->termios.c_iflag); - dev_dbg(&port->dev, "%s - old clfag %08x old iflag %08x\n", __func__, old_termios->c_cflag, old_termios->c_iflag); if (edge_port == NULL) return; @@ -2844,14 +2839,9 @@ static int edge_startup(struct usb_serial *serial) bool interrupt_in_found; bool bulk_in_found; bool bulk_out_found; - static __u32 descriptor[3] = { EDGE_COMPATIBILITY_MASK0, - EDGE_COMPATIBILITY_MASK1, - EDGE_COMPATIBILITY_MASK2 }; - - if (serial->num_bulk_in < 1 || serial->num_interrupt_in < 1) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } + static const __u32 descriptor[3] = { EDGE_COMPATIBILITY_MASK0, + EDGE_COMPATIBILITY_MASK1, + EDGE_COMPATIBILITY_MASK2 }; dev = serial->dev; @@ -3120,6 +3110,9 @@ static struct usb_serial_driver edgeport_2port_device = { .description = "Edgeport 2 port adapter", .id_table = edgeport_2port_id_table, .num_ports = 2, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, @@ -3152,6 +3145,9 @@ static struct usb_serial_driver edgeport_4port_device = { .description = "Edgeport 4 port adapter", .id_table = edgeport_4port_id_table, .num_ports = 4, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, @@ -3184,6 +3180,9 @@ static struct usb_serial_driver edgeport_8port_device = { .description = "Edgeport 8 port adapter", .id_table = edgeport_8port_id_table, .num_ports = 8, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, @@ -3216,6 +3215,9 @@ static struct usb_serial_driver epic_device = { .description = "EPiC device", .id_table = Epic_port_id_table, .num_ports = 1, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index a76b95d32157871f5e2964b629784a7642da8480..6cefb9cb133d7bad8678e9b9400b077afc75474e 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1933,13 +1933,6 @@ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port) if (edge_serial->num_ports_open == 0) { /* we are the first port to open, post the interrupt urb */ urb = edge_serial->serial->port[0]->interrupt_in_urb; - if (!urb) { - dev_err(&port->dev, - "%s - no interrupt urb present, exiting\n", - __func__); - status = -EINVAL; - goto release_es_lock; - } urb->context = edge_serial; status = usb_submit_urb(urb, GFP_KERNEL); if (status) { @@ -1959,12 +1952,6 @@ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port) /* start up our bulk read urb */ urb = port->read_urb; - if (!urb) { - dev_err(&port->dev, "%s - no read urb present, exiting\n", - __func__); - status = -EINVAL; - goto unlink_int_urb; - } edge_port->ep_read_urb_state = EDGE_READ_URB_RUNNING; urb->context = edge_port; status = usb_submit_urb(urb, GFP_KERNEL); @@ -2349,8 +2336,11 @@ static void change_port_settings(struct tty_struct *tty, if (!baud) { /* pick a default, any default... */ baud = 9600; - } else + } else { + /* Avoid a zero divisor. */ + baud = min(baud, 461550); tty_encode_baud_rate(tty, baud, baud); + } edge_port->baud_rate = baud; config->wBaudRate = (__u16)((461550L + baud/2) / baud); @@ -2385,14 +2375,6 @@ static void edge_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { struct edgeport_port *edge_port = usb_get_serial_port_data(port); - unsigned int cflag; - - cflag = tty->termios.c_cflag; - - dev_dbg(&port->dev, "%s - clfag %08x iflag %08x\n", __func__, - tty->termios.c_cflag, tty->termios.c_iflag); - dev_dbg(&port->dev, "%s - old clfag %08x old iflag %08x\n", __func__, - old_termios->c_cflag, old_termios->c_iflag); if (edge_port == NULL) return; @@ -2544,19 +2526,31 @@ static void edge_heartbeat_work(struct work_struct *work) edge_heartbeat_schedule(serial); } -static int edge_startup(struct usb_serial *serial) +static int edge_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { - struct edgeport_serial *edge_serial; - int status; - u16 product_id; + struct device *dev = &serial->interface->dev; + unsigned char num_ports = serial->type->num_ports; /* Make sure we have the required endpoints when in download mode. */ if (serial->interface->cur_altsetting->desc.bNumEndpoints > 1) { - if (serial->num_bulk_in < serial->num_ports || - serial->num_bulk_out < serial->num_ports) + if (epds->num_bulk_in < num_ports || + epds->num_bulk_out < num_ports || + epds->num_interrupt_in < 1) { + dev_err(dev, "required endpoints missing\n"); return -ENODEV; + } } + return num_ports; +} + +static int edge_startup(struct usb_serial *serial) +{ + struct edgeport_serial *edge_serial; + int status; + u16 product_id; + /* create our private serial structure */ edge_serial = kzalloc(sizeof(struct edgeport_serial), GFP_KERNEL); if (!edge_serial) @@ -2736,11 +2730,13 @@ static struct usb_serial_driver edgeport_1port_device = { .description = "Edgeport TI 1 port adapter", .id_table = edgeport_1port_id_table, .num_ports = 1, + .num_bulk_out = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, .unthrottle = edge_unthrottle, .attach = edge_startup, + .calc_num_ports = edge_calc_num_ports, .disconnect = edge_disconnect, .release = edge_release, .port_probe = edge_port_probe, @@ -2773,11 +2769,13 @@ static struct usb_serial_driver edgeport_2port_device = { .description = "Edgeport TI 2 port adapter", .id_table = edgeport_2port_id_table, .num_ports = 2, + .num_bulk_out = 1, .open = edge_open, .close = edge_close, .throttle = edge_throttle, .unthrottle = edge_unthrottle, .attach = edge_startup, + .calc_num_ports = edge_calc_num_ports, .disconnect = edge_disconnect, .release = edge_release, .port_probe = edge_port_probe, diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index ec1b8f2c11837fecd39468c1e053880d72da9f94..cde0dcdce9c4d1402b69076c2a6a228b36963833 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -33,7 +33,8 @@ static int initial_wait; /* Function prototypes for an ipaq */ static int ipaq_open(struct tty_struct *tty, struct usb_serial_port *port); -static int ipaq_calc_num_ports(struct usb_serial *serial); +static int ipaq_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds); static int ipaq_startup(struct usb_serial *serial); static const struct usb_device_id ipaq_id_table[] = { @@ -550,42 +551,38 @@ static int ipaq_open(struct tty_struct *tty, return usb_serial_generic_open(tty, port); } -static int ipaq_calc_num_ports(struct usb_serial *serial) +static int ipaq_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { /* - * some devices have 3 endpoints, the 3rd of which - * must be ignored as it would make the core - * create a second port which oopses when used + * Some of the devices in ipaq_id_table[] are composite, and we + * shouldn't bind to all the interfaces. This test will rule out + * some obviously invalid possibilities. */ - int ipaq_num_ports = 1; - - dev_dbg(&serial->dev->dev, "%s - numberofendpoints: %d\n", __func__, - (int)serial->interface->cur_altsetting->desc.bNumEndpoints); + if (epds->num_bulk_in == 0 || epds->num_bulk_out == 0) + return -ENODEV; /* - * a few devices have 4 endpoints, seemingly Yakuma devices, - * and we need the second pair, so let them have 2 ports - * - * TODO: can we drop port 1 ? + * A few devices have four endpoints, seemingly Yakuma devices, and + * we need the second pair. */ - if (serial->interface->cur_altsetting->desc.bNumEndpoints > 3) { - ipaq_num_ports = 2; + if (epds->num_bulk_in > 1 && epds->num_bulk_out > 1) { + epds->bulk_in[0] = epds->bulk_in[1]; + epds->bulk_out[0] = epds->bulk_out[1]; } - return ipaq_num_ports; -} + /* + * Other devices have 3 endpoints, but we only use the first bulk in + * and out endpoints. + */ + epds->num_bulk_in = 1; + epds->num_bulk_out = 1; + return 1; +} static int ipaq_startup(struct usb_serial *serial) { - /* Some of the devices in ipaq_id_table[] are composite, and we - * shouldn't bind to all the interfaces. This test will rule out - * some obviously invalid possibilities. - */ - if (serial->num_bulk_in < serial->num_ports || - serial->num_bulk_out < serial->num_ports) - return -ENODEV; - if (serial->dev->actconfig->desc.bConfigurationValue != 1) { /* * FIXME: HP iPaq rx3715, possibly others, have 1 config that @@ -597,10 +594,6 @@ static int ipaq_startup(struct usb_serial *serial) return -ENODEV; } - dev_dbg(&serial->dev->dev, - "%s - iPAQ module configured for %d ports\n", __func__, - serial->num_ports); - return usb_reset_configuration(serial->dev); } diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 73956d48a0c57bb5566fb0c1b47a581e438bb4a1..f9734a96d51681d689c5028e6777fec03c401d5a 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -197,6 +197,7 @@ static u8 ir_xbof_change(u8 xbof) static int ir_startup(struct usb_serial *serial) { struct usb_irda_cs_descriptor *irda_desc; + int rates; irda_desc = irda_usb_find_class_desc(serial, 0); if (!irda_desc) { @@ -205,18 +206,20 @@ static int ir_startup(struct usb_serial *serial) return -ENODEV; } + rates = le16_to_cpu(irda_desc->wBaudRate); + dev_dbg(&serial->dev->dev, "%s - Baud rates supported:%s%s%s%s%s%s%s%s%s\n", __func__, - (irda_desc->wBaudRate & USB_IRDA_BR_2400) ? " 2400" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_9600) ? " 9600" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_19200) ? " 19200" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_38400) ? " 38400" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_57600) ? " 57600" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_115200) ? " 115200" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_576000) ? " 576000" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_1152000) ? " 1152000" : "", - (irda_desc->wBaudRate & USB_IRDA_BR_4000000) ? " 4000000" : ""); + (rates & USB_IRDA_BR_2400) ? " 2400" : "", + (rates & USB_IRDA_BR_9600) ? " 9600" : "", + (rates & USB_IRDA_BR_19200) ? " 19200" : "", + (rates & USB_IRDA_BR_38400) ? " 38400" : "", + (rates & USB_IRDA_BR_57600) ? " 57600" : "", + (rates & USB_IRDA_BR_115200) ? " 115200" : "", + (rates & USB_IRDA_BR_576000) ? " 576000" : "", + (rates & USB_IRDA_BR_1152000) ? " 1152000" : "", + (rates & USB_IRDA_BR_4000000) ? " 4000000" : ""); switch (irda_desc->bmAdditionalBOFs) { case USB_IRDA_AB_48: diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 030390f37b0a8e4822a58b9e1e3fa412736ad451..18fc992a245fcd8c700aea392ed2a1fcf9d584ea 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -68,16 +68,6 @@ struct iuu_private { u32 clk; }; -static int iuu_attach(struct usb_serial *serial) -{ - unsigned char num_ports = serial->num_ports; - - if (serial->num_bulk_in < num_ports || serial->num_bulk_out < num_ports) - return -ENODEV; - - return 0; -} - static int iuu_port_probe(struct usb_serial_port *port) { struct iuu_private *priv; @@ -598,9 +588,8 @@ static void read_buf_callback(struct urb *urb) } dev_dbg(&port->dev, "%s - %i chars to write\n", __func__, urb->actual_length); - if (data == NULL) - dev_dbg(&port->dev, "%s - data is NULL !!!\n", __func__); - if (urb->actual_length && data) { + + if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); } @@ -665,10 +654,8 @@ static void iuu_uart_read_callback(struct urb *urb) /* error stop all */ return; } - if (data == NULL) - dev_dbg(&port->dev, "%s - data is NULL !!!\n", __func__); - if (urb->actual_length == 1 && data != NULL) + if (urb->actual_length == 1) len = (int) data[0]; if (urb->actual_length > 1) { @@ -1183,6 +1170,8 @@ static struct usb_serial_driver iuu_device = { }, .id_table = id_table, .num_ports = 1, + .num_bulk_in = 1, + .num_bulk_out = 1, .bulk_in_size = 512, .bulk_out_size = 512, .open = iuu_open, @@ -1193,7 +1182,6 @@ static struct usb_serial_driver iuu_device = { .tiocmset = iuu_tiocmset, .set_termios = iuu_set_termios, .init_termios = iuu_init_termios, - .attach = iuu_attach, .port_probe = iuu_port_probe, .port_remove = iuu_port_remove, }; diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index d2dab2a341b827d558eea480858b791344fd38b6..196908dd25a1e8f6591643a0149f4d83c46f3c62 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -708,19 +708,6 @@ MODULE_FIRMWARE("keyspan_pda/keyspan_pda.fw"); MODULE_FIRMWARE("keyspan_pda/xircom_pgs.fw"); #endif -static int keyspan_pda_attach(struct usb_serial *serial) -{ - unsigned char num_ports = serial->num_ports; - - if (serial->num_bulk_out < num_ports || - serial->num_interrupt_in < num_ports) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } - - return 0; -} - static int keyspan_pda_port_probe(struct usb_serial_port *port) { @@ -784,6 +771,8 @@ static struct usb_serial_driver keyspan_pda_device = { .description = "Keyspan PDA", .id_table = id_table_std, .num_ports = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .dtr_rts = keyspan_pda_dtr_rts, .open = keyspan_pda_open, .close = keyspan_pda_close, @@ -798,7 +787,6 @@ static struct usb_serial_driver keyspan_pda_device = { .break_ctl = keyspan_pda_break_ctl, .tiocmget = keyspan_pda_tiocmget, .tiocmset = keyspan_pda_tiocmset, - .attach = keyspan_pda_attach, .port_probe = keyspan_pda_port_probe, .port_remove = keyspan_pda_port_remove, }; diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 813035f51fe73a4e0de69ce76e6c3329ba4d376c..3024b9b253605160a5f5e7038159bcc47ec4d1ea 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -51,7 +51,6 @@ /* Function prototypes */ -static int kobil_attach(struct usb_serial *serial); static int kobil_port_probe(struct usb_serial_port *probe); static int kobil_port_remove(struct usb_serial_port *probe); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port); @@ -87,7 +86,7 @@ static struct usb_serial_driver kobil_device = { .description = "KOBIL USB smart card terminal", .id_table = id_table, .num_ports = 1, - .attach = kobil_attach, + .num_interrupt_out = 1, .port_probe = kobil_port_probe, .port_remove = kobil_port_remove, .ioctl = kobil_ioctl, @@ -115,16 +114,6 @@ struct kobil_private { }; -static int kobil_attach(struct usb_serial *serial) -{ - if (serial->num_interrupt_out < serial->num_ports) { - dev_err(&serial->interface->dev, "missing interrupt-out endpoint\n"); - return -ENODEV; - } - - return 0; -} - static int kobil_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index edbc81f205c253f41f7afa2d3375ef7818e0ae86..70f346f1aa86f4a7c9eb880cd725cd0d2e6fb1b5 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -189,7 +189,7 @@ static int mct_u232_set_baud_rate(struct tty_struct *tty, return -ENOMEM; divisor = mct_u232_calculate_baud_rate(serial, value, &speed); - put_unaligned_le32(cpu_to_le32(divisor), buf); + put_unaligned_le32(divisor, buf); rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_BAUD_RATE_REQUEST, MCT_U232_SET_REQUEST_TYPE, diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index f075121c6e32329d136935d2d1d56ae8b49c784f..a453965f9e9a3153ffcbb28d4a6257a4db094cd3 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -973,11 +973,24 @@ static void mos7720_bulk_out_data_callback(struct urb *urb) tty_port_tty_wakeup(&mos7720_port->port->port); } -static int mos77xx_calc_num_ports(struct usb_serial *serial) +static int mos77xx_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); - if (product == MOSCHIP_DEVICE_ID_7715) + + if (product == MOSCHIP_DEVICE_ID_7715) { + /* + * The 7715 uses the first bulk in/out endpoint pair for the + * parallel port, and the second for the serial port. We swap + * the endpoint descriptors here so that the the first and + * only registered port structure uses the serial-port + * endpoints. + */ + swap(epds->bulk_in[0], epds->bulk_in[1]); + swap(epds->bulk_out[0], epds->bulk_out[1]); + return 1; + } return 2; } @@ -1395,7 +1408,7 @@ struct divisor_table_entry { /* Define table of divisors for moschip 7720 hardware * * These assume a 3.6864MHz crystal, the standard /16, and * * MCR.7 = 0. */ -static struct divisor_table_entry divisor_table[] = { +static const struct divisor_table_entry divisor_table[] = { { 50, 2304}, { 110, 1047}, /* 2094.545455 => 230450 => .0217 % over */ { 134, 857}, /* 1713.011152 => 230398.5 => .00065% under */ @@ -1675,7 +1688,6 @@ static void mos7720_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { int status; - unsigned int cflag; struct usb_serial *serial; struct moschip_port *mos7720_port; @@ -1691,16 +1703,6 @@ static void mos7720_set_termios(struct tty_struct *tty, return; } - dev_dbg(&port->dev, "setting termios - ASPIRE\n"); - - cflag = tty->termios.c_cflag; - - dev_dbg(&port->dev, "%s - cflag %08x iflag %08x\n", __func__, - tty->termios.c_cflag, RELEVANT_IFLAG(tty->termios.c_iflag)); - - dev_dbg(&port->dev, "%s - old cflag %08x old iflag %08x\n", __func__, - old_termios->c_cflag, RELEVANT_IFLAG(old_termios->c_iflag)); - /* change the port settings to the new ones specified */ change_port_settings(tty, mos7720_port, old_termios); @@ -1900,54 +1902,24 @@ static int mos7720_startup(struct usb_serial *serial) u16 product; int ret_val; - if (serial->num_bulk_in < 2 || serial->num_bulk_out < 2) { - dev_err(&serial->interface->dev, "missing bulk endpoints\n"); - return -ENODEV; - } - product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; - /* - * The 7715 uses the first bulk in/out endpoint pair for the parallel - * port, and the second for the serial port. Because the usbserial core - * assumes both pairs are serial ports, we must engage in a bit of - * subterfuge and swap the pointers for ports 0 and 1 in order to make - * port 0 point to the serial port. However, both moschip devices use a - * single interrupt-in endpoint for both ports (as mentioned a little - * further down), and this endpoint was assigned to port 0. So after - * the swap, we must copy the interrupt endpoint elements from port 1 - * (as newly assigned) to port 0, and null out port 1 pointers. - */ - if (product == MOSCHIP_DEVICE_ID_7715) { - struct usb_serial_port *tmp = serial->port[0]; - serial->port[0] = serial->port[1]; - serial->port[1] = tmp; - serial->port[0]->interrupt_in_urb = tmp->interrupt_in_urb; - serial->port[0]->interrupt_in_buffer = tmp->interrupt_in_buffer; - serial->port[0]->interrupt_in_endpointAddress = - tmp->interrupt_in_endpointAddress; - serial->port[1]->interrupt_in_urb = NULL; - serial->port[1]->interrupt_in_buffer = NULL; - - if (serial->port[0]->interrupt_in_urb) { - struct urb *urb = serial->port[0]->interrupt_in_urb; - - urb->complete = mos7715_interrupt_callback; - } - } - /* setting configuration feature to one */ usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); -#ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT if (product == MOSCHIP_DEVICE_ID_7715) { + struct urb *urb = serial->port[0]->interrupt_in_urb; + + urb->complete = mos7715_interrupt_callback; + +#ifdef CONFIG_USB_SERIAL_MOS7715_PARPORT ret_val = mos7715_parport_init(serial); if (ret_val < 0) return ret_val; - } #endif + } /* start the interrupt urb */ ret_val = usb_submit_urb(serial->port[0]->interrupt_in_urb, GFP_KERNEL); if (ret_val) { @@ -2039,6 +2011,9 @@ static struct usb_serial_driver moschip7720_2port_driver = { }, .description = "Moschip 2 port adapter", .id_table = id_table, + .num_bulk_in = 2, + .num_bulk_out = 2, + .num_interrupt_in = 1, .calc_num_ports = mos77xx_calc_num_ports, .open = mos7720_open, .close = mos7720_close, diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 3821c53fcee9ec2c269ce562ef25d7a67ed34965..e8669aae14b3504d5e8cf2a6db25900edd66e269 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1868,7 +1868,6 @@ static void mos7840_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { int status; - unsigned int cflag; struct usb_serial *serial; struct moschip_port *mos7840_port; @@ -1890,15 +1889,6 @@ static void mos7840_set_termios(struct tty_struct *tty, return; } - dev_dbg(&port->dev, "%s", "setting termios - \n"); - - cflag = tty->termios.c_cflag; - - dev_dbg(&port->dev, "%s - clfag %08x iflag %08x\n", __func__, - tty->termios.c_cflag, RELEVANT_IFLAG(tty->termios.c_iflag)); - dev_dbg(&port->dev, "%s - old clfag %08x old iflag %08x\n", __func__, - old_termios->c_cflag, RELEVANT_IFLAG(old_termios->c_iflag)); - /* change the port settings to the new ones specified */ mos7840_change_port_settings(tty, mos7840_port, old_termios); @@ -2104,26 +2094,27 @@ static int mos7840_probe(struct usb_serial *serial, return 0; } -static int mos7840_calc_num_ports(struct usb_serial *serial) +static int mos7840_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { int device_type = (unsigned long)usb_get_serial_data(serial); - int mos7840_num_ports; + int num_ports; - mos7840_num_ports = (device_type >> 4) & 0x000F; + num_ports = (device_type >> 4) & 0x000F; - return mos7840_num_ports; -} + /* + * num_ports is currently never zero as device_type is one of + * MOSCHIP_DEVICE_ID_78{1,2,4}0. + */ + if (num_ports == 0) + return -ENODEV; -static int mos7840_attach(struct usb_serial *serial) -{ - if (serial->num_bulk_in < serial->num_ports || - serial->num_bulk_out < serial->num_ports || - serial->num_interrupt_in < 1) { + if (epds->num_bulk_in < num_ports || epds->num_bulk_out < num_ports) { dev_err(&serial->interface->dev, "missing endpoints\n"); return -ENODEV; } - return 0; + return num_ports; } static int mos7840_port_probe(struct usb_serial_port *port) @@ -2384,7 +2375,7 @@ static struct usb_serial_driver moschip7840_4port_device = { }, .description = DRIVER_DESC, .id_table = id_table, - .num_ports = 4, + .num_interrupt_in = 1, .open = mos7840_open, .close = mos7840_close, .write = mos7840_write, @@ -2401,7 +2392,6 @@ static struct usb_serial_driver moschip7840_4port_device = { .tiocmset = mos7840_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, - .attach = mos7840_attach, .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index c88215a0fa3d8d0eaa22be4e873cf59d2525e481..3aef091fe88b6b69540e012cc0842dcdb8e77e59 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -946,20 +946,39 @@ static void mxuport_set_termios(struct tty_struct *tty, * Determine how many ports this device has dynamically. It will be * called after the probe() callback is called, but before attach(). */ -static int mxuport_calc_num_ports(struct usb_serial *serial) +static int mxuport_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { unsigned long features = (unsigned long)usb_get_serial_data(serial); + int num_ports; + int i; - if (features & MX_UPORT_2_PORT) - return 2; - if (features & MX_UPORT_4_PORT) - return 4; - if (features & MX_UPORT_8_PORT) - return 8; - if (features & MX_UPORT_16_PORT) - return 16; + if (features & MX_UPORT_2_PORT) { + num_ports = 2; + } else if (features & MX_UPORT_4_PORT) { + num_ports = 4; + } else if (features & MX_UPORT_8_PORT) { + num_ports = 8; + } else if (features & MX_UPORT_16_PORT) { + num_ports = 16; + } else { + dev_warn(&serial->interface->dev, + "unknown device, assuming two ports\n"); + num_ports = 2; + } - return 0; + /* + * Setup bulk-out endpoint multiplexing. All ports share the same + * bulk-out endpoint. + */ + BUILD_BUG_ON(ARRAY_SIZE(epds->bulk_out) < 16); + + for (i = 1; i < num_ports; ++i) + epds->bulk_out[i] = epds->bulk_out[0]; + + epds->num_bulk_out = num_ports; + + return num_ports; } /* Get the version of the firmware currently running. */ @@ -1142,102 +1161,11 @@ static int mxuport_port_probe(struct usb_serial_port *port) port->port_number); } -static int mxuport_alloc_write_urb(struct usb_serial *serial, - struct usb_serial_port *port, - struct usb_serial_port *port0, - int j) -{ - struct usb_device *dev = interface_to_usbdev(serial->interface); - - set_bit(j, &port->write_urbs_free); - port->write_urbs[j] = usb_alloc_urb(0, GFP_KERNEL); - if (!port->write_urbs[j]) - return -ENOMEM; - - port->bulk_out_buffers[j] = kmalloc(port0->bulk_out_size, GFP_KERNEL); - if (!port->bulk_out_buffers[j]) - return -ENOMEM; - - usb_fill_bulk_urb(port->write_urbs[j], dev, - usb_sndbulkpipe(dev, port->bulk_out_endpointAddress), - port->bulk_out_buffers[j], - port->bulk_out_size, - serial->type->write_bulk_callback, - port); - return 0; -} - - -static int mxuport_alloc_write_urbs(struct usb_serial *serial, - struct usb_serial_port *port, - struct usb_serial_port *port0) -{ - int j; - int ret; - - for (j = 0; j < ARRAY_SIZE(port->write_urbs); ++j) { - ret = mxuport_alloc_write_urb(serial, port, port0, j); - if (ret) - return ret; - } - return 0; -} - - static int mxuport_attach(struct usb_serial *serial) { struct usb_serial_port *port0 = serial->port[0]; struct usb_serial_port *port1 = serial->port[1]; - struct usb_serial_port *port; int err; - int i; - int j; - - /* - * Throw away all but the first allocated write URBs so we can - * set them up again to fit the multiplexing scheme. - */ - for (i = 1; i < serial->num_bulk_out; ++i) { - port = serial->port[i]; - for (j = 0; j < ARRAY_SIZE(port->write_urbs); ++j) { - usb_free_urb(port->write_urbs[j]); - kfree(port->bulk_out_buffers[j]); - port->write_urbs[j] = NULL; - port->bulk_out_buffers[j] = NULL; - } - port->write_urbs_free = 0; - } - - /* - * All write data is sent over the first bulk out endpoint, - * with an added header to indicate the port. Allocate URBs - * for each port to the first bulk out endpoint. - */ - for (i = 1; i < serial->num_ports; ++i) { - port = serial->port[i]; - port->bulk_out_size = port0->bulk_out_size; - port->bulk_out_endpointAddress = - port0->bulk_out_endpointAddress; - - err = mxuport_alloc_write_urbs(serial, port, port0); - if (err) - return err; - - port->write_urb = port->write_urbs[0]; - port->bulk_out_buffer = port->bulk_out_buffers[0]; - - /* - * Ensure each port has a fifo. The framework only - * allocates a fifo to ports with a bulk out endpoint, - * where as we need one for every port. - */ - if (!kfifo_initialized(&port->write_fifo)) { - err = kfifo_alloc(&port->write_fifo, PAGE_SIZE, - GFP_KERNEL); - if (err) - return err; - } - } /* * All data from the ports is received on the first bulk in @@ -1366,7 +1294,8 @@ static struct usb_serial_driver mxuport_device = { }, .description = "MOXA UPort", .id_table = mxuport_idtable, - .num_ports = 0, + .num_bulk_in = 2, + .num_bulk_out = 1, .probe = mxuport_probe, .port_probe = mxuport_port_probe, .attach = mxuport_attach, diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index dd706953b4660905cc5abf5e85477bfb6bdb4149..efcd7feed6f4d1c882627736de30253ca2858da6 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -1,6 +1,8 @@ /* * USB ZyXEL omni.net LCD PLUS driver * + * Copyright (C) 2013,2017 Johan Hovold + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 as published by the Free Software Foundation. @@ -32,12 +34,10 @@ /* function prototypes */ static void omninet_process_read_urb(struct urb *urb); -static void omninet_write_bulk_callback(struct urb *urb); -static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, - const unsigned char *buf, int count); -static int omninet_write_room(struct tty_struct *tty); -static void omninet_disconnect(struct usb_serial *serial); -static int omninet_attach(struct usb_serial *serial); +static int omninet_prepare_write_buffer(struct usb_serial_port *port, + void *buf, size_t count); +static int omninet_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds); static int omninet_port_probe(struct usb_serial_port *port); static int omninet_port_remove(struct usb_serial_port *port); @@ -55,15 +55,12 @@ static struct usb_serial_driver zyxel_omninet_device = { }, .description = "ZyXEL - omni.net lcd plus usb", .id_table = id_table, - .num_ports = 1, - .attach = omninet_attach, + .num_bulk_out = 2, + .calc_num_ports = omninet_calc_num_ports, .port_probe = omninet_port_probe, .port_remove = omninet_port_remove, - .write = omninet_write, - .write_room = omninet_write_room, - .write_bulk_callback = omninet_write_bulk_callback, .process_read_urb = omninet_process_read_urb, - .disconnect = omninet_disconnect, + .prepare_write_buffer = omninet_prepare_write_buffer, }; static struct usb_serial_driver * const serial_drivers[] = { @@ -104,15 +101,14 @@ struct omninet_data { __u8 od_outseq; /* Sequence number for bulk_out URBs */ }; -static int omninet_attach(struct usb_serial *serial) +static int omninet_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { - /* The second bulk-out endpoint is used for writing. */ - if (serial->num_bulk_out < 2) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } + /* We need only the second bulk-out for our single-port device. */ + epds->bulk_out[0] = epds->bulk_out[1]; + epds->num_bulk_out = 1; - return 0; + return 1; } static int omninet_port_probe(struct usb_serial_port *port) @@ -159,96 +155,24 @@ static void omninet_process_read_urb(struct urb *urb) tty_flip_buffer_push(&port->port); } -static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, - const unsigned char *buf, int count) +static int omninet_prepare_write_buffer(struct usb_serial_port *port, + void *buf, size_t count) { - struct usb_serial *serial = port->serial; - struct usb_serial_port *wport = serial->port[1]; - struct omninet_data *od = usb_get_serial_port_data(port); - struct omninet_header *header = (struct omninet_header *) - wport->write_urb->transfer_buffer; - - int result; - - if (count == 0) { - dev_dbg(&port->dev, "%s - write request of 0 bytes\n", __func__); - return 0; - } - - if (!test_and_clear_bit(0, &port->write_urbs_free)) { - dev_dbg(&port->dev, "%s - already writing\n", __func__); - return 0; - } - - count = (count > OMNINET_PAYLOADSIZE) ? OMNINET_PAYLOADSIZE : count; - - memcpy(wport->write_urb->transfer_buffer + OMNINET_HEADERLEN, - buf, count); - - usb_serial_debug_data(&port->dev, __func__, count, - wport->write_urb->transfer_buffer); - - header->oh_seq = od->od_outseq++; - header->oh_len = count; - header->oh_xxx = 0x03; - header->oh_pad = 0x00; - - /* send the data out the bulk port, always 64 bytes */ - wport->write_urb->transfer_buffer_length = OMNINET_BULKOUTSIZE; - - result = usb_submit_urb(wport->write_urb, GFP_ATOMIC); - if (result) { - set_bit(0, &wport->write_urbs_free); - dev_err_console(port, - "%s - failed submitting write urb, error %d\n", - __func__, result); - } else - result = count; - - return result; -} + struct omninet_header *header = buf; + count = min_t(size_t, count, OMNINET_PAYLOADSIZE); -static int omninet_write_room(struct tty_struct *tty) -{ - struct usb_serial_port *port = tty->driver_data; - struct usb_serial *serial = port->serial; - struct usb_serial_port *wport = serial->port[1]; - - int room = 0; /* Default: no room */ - - if (test_bit(0, &wport->write_urbs_free)) - room = wport->bulk_out_size - OMNINET_HEADERLEN; - - dev_dbg(&port->dev, "%s - returns %d\n", __func__, room); - - return room; -} - -static void omninet_write_bulk_callback(struct urb *urb) -{ -/* struct omninet_header *header = (struct omninet_header *) - urb->transfer_buffer; */ - struct usb_serial_port *port = urb->context; - int status = urb->status; - - set_bit(0, &port->write_urbs_free); - if (status) { - dev_dbg(&port->dev, "%s - nonzero write bulk status received: %d\n", - __func__, status); - return; - } + count = kfifo_out_locked(&port->write_fifo, buf + OMNINET_HEADERLEN, + count, &port->lock); - usb_serial_port_softint(port); -} - - -static void omninet_disconnect(struct usb_serial *serial) -{ - struct usb_serial_port *wport = serial->port[1]; + header->oh_seq = od->od_outseq++; + header->oh_len = count; + header->oh_xxx = 0x03; + header->oh_pad = 0x00; - usb_kill_urb(wport->write_urb); + /* always 64 bytes */ + return OMNINET_BULKOUTSIZE; } module_usb_serial_driver(serial_drivers, id_table); diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index 3937b9c3cc691b778cfb7a310a6e5400ee53d095..58657d64678ba06eff913acec8c7584a19502db9 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -367,16 +367,6 @@ static int opticon_ioctl(struct tty_struct *tty, return -ENOIOCTLCMD; } -static int opticon_startup(struct usb_serial *serial) -{ - if (!serial->num_bulk_in) { - dev_err(&serial->dev->dev, "no bulk in endpoint\n"); - return -ENODEV; - } - - return 0; -} - static int opticon_port_probe(struct usb_serial_port *port) { struct opticon_private *priv; @@ -408,8 +398,8 @@ static struct usb_serial_driver opticon_device = { }, .id_table = id_table, .num_ports = 1, + .num_bulk_in = 1, .bulk_in_size = 256, - .attach = opticon_startup, .port_probe = opticon_port_probe, .port_remove = opticon_port_remove, .open = opticon_open, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index af67a0de6b5d475d2be95952ddfb2354546a0fbb..3bf61acfc26b9cfdee5496feeb64bdbcfd58015c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -281,6 +281,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 +#define TELIT_PRODUCT_ME910 0x1100 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -640,6 +641,11 @@ static const struct option_blacklist_info simcom_sim7100e_blacklist = { .reserved = BIT(5) | BIT(6), }; +static const struct option_blacklist_info telit_me910_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(1) | BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1235,6 +1241,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), + .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index b8bf52bf7a944d5fddd6976f90a63db3cd6807e5..b11eead469eee85860119eda06db9aecec5e1838 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -134,7 +134,6 @@ static int oti6858_chars_in_buffer(struct tty_struct *tty); static int oti6858_tiocmget(struct tty_struct *tty); static int oti6858_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); -static int oti6858_attach(struct usb_serial *serial); static int oti6858_port_probe(struct usb_serial_port *port); static int oti6858_port_remove(struct usb_serial_port *port); @@ -146,6 +145,9 @@ static struct usb_serial_driver oti6858_device = { }, .id_table = id_table, .num_ports = 1, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 1, .open = oti6858_open, .close = oti6858_close, .write = oti6858_write, @@ -159,7 +161,6 @@ static struct usb_serial_driver oti6858_device = { .write_bulk_callback = oti6858_write_bulk_callback, .write_room = oti6858_write_room, .chars_in_buffer = oti6858_chars_in_buffer, - .attach = oti6858_attach, .port_probe = oti6858_port_probe, .port_remove = oti6858_port_remove, }; @@ -326,20 +327,6 @@ static void send_data(struct work_struct *work) usb_serial_port_softint(port); } -static int oti6858_attach(struct usb_serial *serial) -{ - unsigned char num_ports = serial->num_ports; - - if (serial->num_bulk_in < num_ports || - serial->num_bulk_out < num_ports || - serial->num_interrupt_in < num_ports) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } - - return 0; -} - static int oti6858_port_probe(struct usb_serial_port *port) { struct oti6858_private *priv; diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index ca69eb42071bcb1ed5d01706698e2289d5757b03..c9ebefd8f35fdbe5491627f4df89b80c87aeabad 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -33,9 +33,11 @@ #define PL2303_QUIRK_UART_STATE_IDX0 BIT(0) #define PL2303_QUIRK_LEGACY BIT(1) +#define PL2303_QUIRK_ENDPOINT_HACK BIT(2) static const struct usb_device_id id_table[] = { - { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID), + .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ2) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_DCU11) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_RSAQ3) }, @@ -48,7 +50,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, - { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID), + .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, @@ -68,7 +71,8 @@ static const struct usb_device_id id_table[] = { .driver_info = PL2303_QUIRK_UART_STATE_IDX0 }, { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_X75), .driver_info = PL2303_QUIRK_UART_STATE_IDX0 }, - { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_EF81) }, + { USB_DEVICE(SIEMENS_VENDOR_ID, SIEMENS_PRODUCT_ID_EF81), + .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_ID_S81) }, /* Benq/Siemens S81 */ { USB_DEVICE(SYNTECH_VENDOR_ID, SYNTECH_PRODUCT_ID) }, { USB_DEVICE(NOKIA_CA42_VENDOR_ID, NOKIA_CA42_PRODUCT_ID) }, @@ -78,7 +82,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SPEEDDRAGON_VENDOR_ID, SPEEDDRAGON_PRODUCT_ID) }, { USB_DEVICE(DATAPILOT_U2_VENDOR_ID, DATAPILOT_U2_PRODUCT_ID) }, { USB_DEVICE(BELKIN_VENDOR_ID, BELKIN_PRODUCT_ID) }, - { USB_DEVICE(ALCOR_VENDOR_ID, ALCOR_PRODUCT_ID) }, + { USB_DEVICE(ALCOR_VENDOR_ID, ALCOR_PRODUCT_ID), + .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(WS002IN_VENDOR_ID, WS002IN_PRODUCT_ID) }, { USB_DEVICE(COREGA_VENDOR_ID, COREGA_PRODUCT_ID) }, { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, @@ -218,20 +223,68 @@ static int pl2303_probe(struct usb_serial *serial, return 0; } +/* + * Use interrupt endpoint from first interface if available. + * + * This is needed due to the looney way its endpoints are set up. + */ +static int pl2303_endpoint_hack(struct usb_serial *serial, + struct usb_serial_endpoints *epds) +{ + struct usb_interface *interface = serial->interface; + struct usb_device *dev = serial->dev; + struct device *ddev = &interface->dev; + struct usb_host_interface *iface_desc; + struct usb_endpoint_descriptor *endpoint; + unsigned int i; + + if (interface == dev->actconfig->interface[0]) + return 0; + + /* check out the endpoints of the other interface */ + iface_desc = dev->actconfig->interface[0]->cur_altsetting; + + for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { + endpoint = &iface_desc->endpoint[i].desc; + + if (!usb_endpoint_is_int_in(endpoint)) + continue; + + dev_dbg(ddev, "found interrupt in on separate interface\n"); + if (epds->num_interrupt_in < ARRAY_SIZE(epds->interrupt_in)) + epds->interrupt_in[epds->num_interrupt_in++] = endpoint; + } + + return 0; +} + +static int pl2303_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) +{ + unsigned long quirks = (unsigned long)usb_get_serial_data(serial); + struct device *dev = &serial->interface->dev; + int ret; + + if (quirks & PL2303_QUIRK_ENDPOINT_HACK) { + ret = pl2303_endpoint_hack(serial, epds); + if (ret) + return ret; + } + + if (epds->num_interrupt_in < 1) { + dev_err(dev, "required interrupt-in endpoint missing\n"); + return -ENODEV; + } + + return 1; +} + static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; - unsigned char num_ports = serial->num_ports; enum pl2303_type type = TYPE_01; unsigned char *buf; - if (serial->num_bulk_in < num_ports || - serial->num_bulk_out < num_ports || - serial->num_interrupt_in < num_ports) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } - spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; @@ -938,7 +991,9 @@ static struct usb_serial_driver pl2303_device = { .name = "pl2303", }, .id_table = id_table, - .num_ports = 1, + .num_bulk_in = 1, + .num_bulk_out = 1, + .num_interrupt_in = 0, /* see pl2303_calc_num_ports */ .bulk_in_size = 256, .bulk_out_size = 256, .open = pl2303_open, @@ -954,6 +1009,7 @@ static struct usb_serial_driver pl2303_device = { .process_read_urb = pl2303_process_read_urb, .read_int_callback = pl2303_read_int_callback, .probe = pl2303_probe, + .calc_num_ports = pl2303_calc_num_ports, .attach = pl2303_startup, .release = pl2303_release, .port_probe = pl2303_port_probe, diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 38b3f0d8cd580f2366136003934d00a475b1d7f1..fd509ed6cf7065725c097ab1bc4eac84359be1f6 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ + {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index fdbb904d153f735b83c007137eaa42d5bf76a6b4..60e17d1444c39fc50f4fbd85a579e967ba823775 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -246,7 +246,8 @@ static inline int update_mctrl(struct qt2_port_private *port_priv, return status; } -static int qt2_calc_num_ports(struct usb_serial *serial) +static int qt2_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { struct qt2_device_detail d; int i; @@ -600,7 +601,6 @@ static void qt2_process_read_urb(struct urb *urb) escapeflag = true; break; case QT2_CONTROL_ESCAPE: - tty_buffer_request_room(&port->port, 2); tty_insert_flip_string(&port->port, ch, 2); i += 2; escapeflag = true; @@ -615,8 +615,7 @@ static void qt2_process_read_urb(struct urb *urb) continue; } - tty_buffer_request_room(&port->port, 1); - tty_insert_flip_string(&port->port, ch, 1); + tty_insert_flip_char(&port->port, *ch, TTY_NORMAL); } tty_flip_buffer_push(&port->port); diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 465e851b281585b3b7445177868a061cecfd3c15..4c4ac4705ac03f8d850a653fe91e35b78bfe25b6 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -85,7 +85,8 @@ static int sierra_vsc_set_nmea(struct usb_device *udev, __u16 enable) USB_CTRL_SET_TIMEOUT); /* int timeout */ } -static int sierra_calc_num_ports(struct usb_serial *serial) +static int sierra_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { int num_ports = 0; u8 ifnum, numendpoints; diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index ddfd787c461cb5367c1f932b3edffb42c0812832..5167b6564c8b4574016aa48d068bb290e3ade06e 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -154,19 +154,6 @@ static int spcp8x5_probe(struct usb_serial *serial, return 0; } -static int spcp8x5_attach(struct usb_serial *serial) -{ - unsigned char num_ports = serial->num_ports; - - if (serial->num_bulk_in < num_ports || - serial->num_bulk_out < num_ports) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } - - return 0; -} - static int spcp8x5_port_probe(struct usb_serial_port *port) { const struct usb_device_id *id = usb_get_serial_data(port->serial); @@ -488,6 +475,8 @@ static struct usb_serial_driver spcp8x5_device = { }, .id_table = id_table, .num_ports = 1, + .num_bulk_in = 1, + .num_bulk_out = 1, .open = spcp8x5_open, .dtr_rts = spcp8x5_dtr_rts, .carrier_raised = spcp8x5_carrier_raised, @@ -496,7 +485,6 @@ static struct usb_serial_driver spcp8x5_device = { .tiocmget = spcp8x5_tiocmget, .tiocmset = spcp8x5_tiocmset, .probe = spcp8x5_probe, - .attach = spcp8x5_attach, .port_probe = spcp8x5_port_probe, .port_remove = spcp8x5_port_remove, }; diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index 37f3ad15ed06a5f6c1cf9db54ca0beef801d523f..0d1727232d0c1b2168804a557a71a24c2dcce247 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -147,16 +147,6 @@ static void symbol_unthrottle(struct tty_struct *tty) } } -static int symbol_startup(struct usb_serial *serial) -{ - if (!serial->num_interrupt_in) { - dev_err(&serial->dev->dev, "no interrupt-in endpoint\n"); - return -ENODEV; - } - - return 0; -} - static int symbol_port_probe(struct usb_serial_port *port) { struct symbol_private *priv; @@ -188,7 +178,7 @@ static struct usb_serial_driver symbol_device = { }, .id_table = id_table, .num_ports = 1, - .attach = symbol_startup, + .num_interrupt_in = 1, .port_probe = symbol_port_probe, .port_remove = symbol_port_remove, .open = symbol_open, diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 3107bf5d1c966bbe35f4b0c84af803fcf0c0308e..8fc3854e5e6967dc32f1a04e5ba95a304f10b683 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -427,6 +427,7 @@ static struct usb_serial_driver ti_1port_device = { .description = "TI USB 3410 1 port adapter", .id_table = ti_id_table_3410, .num_ports = 1, + .num_bulk_out = 1, .attach = ti_startup, .release = ti_release, .port_probe = ti_port_probe, @@ -459,6 +460,7 @@ static struct usb_serial_driver ti_2port_device = { .description = "TI USB 5052 2 port adapter", .id_table = ti_id_table_5052, .num_ports = 2, + .num_bulk_out = 1, .attach = ti_startup, .release = ti_release, .port_probe = ti_port_probe, @@ -927,7 +929,6 @@ static void ti_set_termios(struct tty_struct *tty, { struct ti_port *tport = usb_get_serial_port_data(port); struct ti_uart_config *config; - tcflag_t cflag, iflag; int baud; int status; int port_number = port->port_number; @@ -935,13 +936,6 @@ static void ti_set_termios(struct tty_struct *tty, u16 wbaudrate; u16 wflags = 0; - cflag = tty->termios.c_cflag; - iflag = tty->termios.c_iflag; - - dev_dbg(&port->dev, "%s - cflag %08x, iflag %08x\n", __func__, cflag, iflag); - dev_dbg(&port->dev, "%s - old clfag %08x, old iflag %08x\n", __func__, - old_termios->c_cflag, old_termios->c_iflag); - config = kmalloc(sizeof(*config), GFP_KERNEL); if (!config) return; diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 4a037b4a79cf3168cb45d60d1b6488ac21681570..c7ca95f64edc2b178dec37f852dbb90333efe637 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -38,7 +38,6 @@ #include #include #include -#include "pl2303.h" #define DRIVER_AUTHOR "Greg Kroah-Hartman " #define DRIVER_DESC "USB Serial Driver core" @@ -710,6 +709,39 @@ static const struct tty_port_operations serial_port_ops = { .shutdown = serial_port_shutdown, }; +static void find_endpoints(struct usb_serial *serial, + struct usb_serial_endpoints *epds) +{ + struct device *dev = &serial->interface->dev; + struct usb_host_interface *iface_desc; + struct usb_endpoint_descriptor *epd; + unsigned int i; + + BUILD_BUG_ON(ARRAY_SIZE(epds->bulk_in) < USB_MAXENDPOINTS / 2); + BUILD_BUG_ON(ARRAY_SIZE(epds->bulk_out) < USB_MAXENDPOINTS / 2); + BUILD_BUG_ON(ARRAY_SIZE(epds->interrupt_in) < USB_MAXENDPOINTS / 2); + BUILD_BUG_ON(ARRAY_SIZE(epds->interrupt_out) < USB_MAXENDPOINTS / 2); + + iface_desc = serial->interface->cur_altsetting; + for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { + epd = &iface_desc->endpoint[i].desc; + + if (usb_endpoint_is_bulk_in(epd)) { + dev_dbg(dev, "found bulk in on endpoint %u\n", i); + epds->bulk_in[epds->num_bulk_in++] = epd; + } else if (usb_endpoint_is_bulk_out(epd)) { + dev_dbg(dev, "found bulk out on endpoint %u\n", i); + epds->bulk_out[epds->num_bulk_out++] = epd; + } else if (usb_endpoint_is_int_in(epd)) { + dev_dbg(dev, "found interrupt in on endpoint %u\n", i); + epds->interrupt_in[epds->num_interrupt_in++] = epd; + } else if (usb_endpoint_is_int_out(epd)) { + dev_dbg(dev, "found interrupt out on endpoint %u\n", i); + epds->interrupt_out[epds->num_interrupt_out++] = epd; + } + } +} + static int usb_serial_probe(struct usb_interface *interface, const struct usb_device_id *id) { @@ -717,23 +749,15 @@ static int usb_serial_probe(struct usb_interface *interface, struct usb_device *dev = interface_to_usbdev(interface); struct usb_serial *serial = NULL; struct usb_serial_port *port; - struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; - struct usb_endpoint_descriptor *interrupt_in_endpoint[MAX_NUM_PORTS]; - struct usb_endpoint_descriptor *interrupt_out_endpoint[MAX_NUM_PORTS]; - struct usb_endpoint_descriptor *bulk_in_endpoint[MAX_NUM_PORTS]; - struct usb_endpoint_descriptor *bulk_out_endpoint[MAX_NUM_PORTS]; + struct usb_serial_endpoints *epds; struct usb_serial_driver *type = NULL; int retval; int buffer_size; int i; int j; - int num_interrupt_in = 0; - int num_interrupt_out = 0; - int num_bulk_in = 0; - int num_bulk_out = 0; int num_ports = 0; - int max_endpoints; + unsigned char max_endpoints; mutex_lock(&table_lock); type = search_serial_device(interface); @@ -752,8 +776,8 @@ static int usb_serial_probe(struct usb_interface *interface, serial = create_serial(dev, interface, type); if (!serial) { - module_put(type->driver.owner); - return -ENOMEM; + retval = -ENOMEM; + goto err_put_module; } /* if this device type has a probe function, call it */ @@ -765,129 +789,48 @@ static int usb_serial_probe(struct usb_interface *interface, if (retval) { dev_dbg(ddev, "sub driver rejected device\n"); - usb_serial_put(serial); - module_put(type->driver.owner); - return retval; + goto err_put_serial; } } /* descriptor matches, let's find the endpoints needed */ - /* check out the endpoints */ - iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (usb_endpoint_is_bulk_in(endpoint)) { - /* we found a bulk in endpoint */ - dev_dbg(ddev, "found bulk in on endpoint %d\n", i); - if (num_bulk_in < MAX_NUM_PORTS) { - bulk_in_endpoint[num_bulk_in] = endpoint; - ++num_bulk_in; - } - } - - if (usb_endpoint_is_bulk_out(endpoint)) { - /* we found a bulk out endpoint */ - dev_dbg(ddev, "found bulk out on endpoint %d\n", i); - if (num_bulk_out < MAX_NUM_PORTS) { - bulk_out_endpoint[num_bulk_out] = endpoint; - ++num_bulk_out; - } - } - - if (usb_endpoint_is_int_in(endpoint)) { - /* we found a interrupt in endpoint */ - dev_dbg(ddev, "found interrupt in on endpoint %d\n", i); - if (num_interrupt_in < MAX_NUM_PORTS) { - interrupt_in_endpoint[num_interrupt_in] = - endpoint; - ++num_interrupt_in; - } - } - - if (usb_endpoint_is_int_out(endpoint)) { - /* we found an interrupt out endpoint */ - dev_dbg(ddev, "found interrupt out on endpoint %d\n", i); - if (num_interrupt_out < MAX_NUM_PORTS) { - interrupt_out_endpoint[num_interrupt_out] = - endpoint; - ++num_interrupt_out; - } - } + epds = kzalloc(sizeof(*epds), GFP_KERNEL); + if (!epds) { + retval = -ENOMEM; + goto err_put_serial; } -#if IS_ENABLED(CONFIG_USB_SERIAL_PL2303) - /* BEGIN HORRIBLE HACK FOR PL2303 */ - /* this is needed due to the looney way its endpoints are set up */ - if (((le16_to_cpu(dev->descriptor.idVendor) == PL2303_VENDOR_ID) && - (le16_to_cpu(dev->descriptor.idProduct) == PL2303_PRODUCT_ID)) || - ((le16_to_cpu(dev->descriptor.idVendor) == ATEN_VENDOR_ID) && - (le16_to_cpu(dev->descriptor.idProduct) == ATEN_PRODUCT_ID)) || - ((le16_to_cpu(dev->descriptor.idVendor) == ALCOR_VENDOR_ID) && - (le16_to_cpu(dev->descriptor.idProduct) == ALCOR_PRODUCT_ID)) || - ((le16_to_cpu(dev->descriptor.idVendor) == SIEMENS_VENDOR_ID) && - (le16_to_cpu(dev->descriptor.idProduct) == SIEMENS_PRODUCT_ID_EF81))) { - if (interface != dev->actconfig->interface[0]) { - /* check out the endpoints of the other interface*/ - iface_desc = dev->actconfig->interface[0]->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_int_in(endpoint)) { - /* we found a interrupt in endpoint */ - dev_dbg(ddev, "found interrupt in for Prolific device on separate interface\n"); - if (num_interrupt_in < MAX_NUM_PORTS) { - interrupt_in_endpoint[num_interrupt_in] = endpoint; - ++num_interrupt_in; - } - } - } - } + find_endpoints(serial, epds); - /* Now make sure the PL-2303 is configured correctly. - * If not, give up now and hope this hack will work - * properly during a later invocation of usb_serial_probe - */ - if (num_bulk_in == 0 || num_bulk_out == 0) { - dev_info(ddev, "PL-2303 hack: descriptors matched but endpoints did not\n"); - usb_serial_put(serial); - module_put(type->driver.owner); - return -ENODEV; - } - } - /* END HORRIBLE HACK FOR PL2303 */ -#endif - -#ifdef CONFIG_USB_SERIAL_GENERIC - if (type == &usb_serial_generic_device) { - num_ports = num_bulk_out; - if (num_ports == 0) { - dev_err(ddev, "Generic device with no bulk out, not allowed.\n"); - usb_serial_put(serial); - module_put(type->driver.owner); - return -EIO; - } - dev_info(ddev, "The \"generic\" usb-serial driver is only for testing and one-off prototypes.\n"); - dev_info(ddev, "Tell linux-usb@vger.kernel.org to add your device to a proper driver.\n"); + if (epds->num_bulk_in < type->num_bulk_in || + epds->num_bulk_out < type->num_bulk_out || + epds->num_interrupt_in < type->num_interrupt_in || + epds->num_interrupt_out < type->num_interrupt_out) { + dev_err(ddev, "required endpoints missing\n"); + retval = -ENODEV; + goto err_free_epds; } -#endif - if (!num_ports) { - /* if this device type has a calc_num_ports function, call it */ - if (type->calc_num_ports) - num_ports = type->calc_num_ports(serial); - if (!num_ports) - num_ports = type->num_ports; + + if (type->calc_num_ports) { + retval = type->calc_num_ports(serial, epds); + if (retval < 0) + goto err_free_epds; + num_ports = retval; } + if (!num_ports) + num_ports = type->num_ports; + if (num_ports > MAX_NUM_PORTS) { dev_warn(ddev, "too many ports requested: %d\n", num_ports); num_ports = MAX_NUM_PORTS; } - serial->num_ports = num_ports; - serial->num_bulk_in = num_bulk_in; - serial->num_bulk_out = num_bulk_out; - serial->num_interrupt_in = num_interrupt_in; - serial->num_interrupt_out = num_interrupt_out; + serial->num_ports = (unsigned char)num_ports; + serial->num_bulk_in = epds->num_bulk_in; + serial->num_bulk_out = epds->num_bulk_out; + serial->num_interrupt_in = epds->num_interrupt_in; + serial->num_interrupt_out = epds->num_interrupt_out; /* found all that we need */ dev_info(ddev, "%s converter detected\n", type->description); @@ -895,10 +838,10 @@ static int usb_serial_probe(struct usb_interface *interface, /* create our ports, we need as many as the max endpoints */ /* we don't use num_ports here because some devices have more endpoint pairs than ports */ - max_endpoints = max(num_bulk_in, num_bulk_out); - max_endpoints = max(max_endpoints, num_interrupt_in); - max_endpoints = max(max_endpoints, num_interrupt_out); - max_endpoints = max(max_endpoints, (int)serial->num_ports); + max_endpoints = max(epds->num_bulk_in, epds->num_bulk_out); + max_endpoints = max(max_endpoints, epds->num_interrupt_in); + max_endpoints = max(max_endpoints, epds->num_interrupt_out); + max_endpoints = max(max_endpoints, serial->num_ports); serial->num_port_pointers = max_endpoints; dev_dbg(ddev, "setting up %d port structure(s)\n", max_endpoints); @@ -923,8 +866,8 @@ static int usb_serial_probe(struct usb_interface *interface, } /* set up the endpoint information */ - for (i = 0; i < num_bulk_in; ++i) { - endpoint = bulk_in_endpoint[i]; + for (i = 0; i < epds->num_bulk_in; ++i) { + endpoint = epds->bulk_in[i]; port = serial->port[i]; buffer_size = max_t(int, serial->type->bulk_in_size, usb_endpoint_maxp(endpoint)); @@ -952,8 +895,8 @@ static int usb_serial_probe(struct usb_interface *interface, port->bulk_in_buffer = port->bulk_in_buffers[0]; } - for (i = 0; i < num_bulk_out; ++i) { - endpoint = bulk_out_endpoint[i]; + for (i = 0; i < epds->num_bulk_out; ++i) { + endpoint = epds->bulk_out[i]; port = serial->port[i]; if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL)) goto probe_error; @@ -985,8 +928,8 @@ static int usb_serial_probe(struct usb_interface *interface, } if (serial->type->read_int_callback) { - for (i = 0; i < num_interrupt_in; ++i) { - endpoint = interrupt_in_endpoint[i]; + for (i = 0; i < epds->num_interrupt_in; ++i) { + endpoint = epds->interrupt_in[i]; port = serial->port[i]; port->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!port->interrupt_in_urb) @@ -1005,13 +948,13 @@ static int usb_serial_probe(struct usb_interface *interface, serial->type->read_int_callback, port, endpoint->bInterval); } - } else if (num_interrupt_in) { + } else if (epds->num_interrupt_in) { dev_dbg(ddev, "The device claims to support interrupt in transfers, but read_int_callback is not defined\n"); } if (serial->type->write_int_callback) { - for (i = 0; i < num_interrupt_out; ++i) { - endpoint = interrupt_out_endpoint[i]; + for (i = 0; i < epds->num_interrupt_out; ++i) { + endpoint = epds->interrupt_out[i]; port = serial->port[i]; port->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!port->interrupt_out_urb) @@ -1031,7 +974,7 @@ static int usb_serial_probe(struct usb_interface *interface, serial->type->write_int_callback, port, endpoint->bInterval); } - } else if (num_interrupt_out) { + } else if (epds->num_interrupt_out) { dev_dbg(ddev, "The device claims to support interrupt out transfers, but write_int_callback is not defined\n"); } @@ -1053,12 +996,6 @@ static int usb_serial_probe(struct usb_interface *interface, serial->attached = 1; } - /* Avoid race with tty_open and serial_install by setting the - * disconnected flag and not clearing it until all ports have been - * registered. - */ - serial->disconnected = 1; - if (allocate_minors(serial, num_ports)) { dev_err(ddev, "No more free serial minor numbers\n"); goto probe_error; @@ -1076,18 +1013,23 @@ static int usb_serial_probe(struct usb_interface *interface, dev_err(ddev, "Error registering port device, continuing\n"); } - serial->disconnected = 0; - if (num_ports > 0) usb_serial_console_init(serial->port[0]->minor); exit: + kfree(epds); module_put(type->driver.owner); return 0; probe_error: + retval = -EIO; +err_free_epds: + kfree(epds); +err_put_serial: usb_serial_put(serial); +err_put_module: module_put(type->driver.owner); - return -EIO; + + return retval; } static void usb_serial_disconnect(struct usb_interface *interface) diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index ca2fa5bbe17e1a9406079a89571e8dd31a083c9a..12f4c5a91e628eaf043a5d103d86a0838164da96 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -17,7 +17,7 @@ #define USB_DEBUG_MAX_PACKET_SIZE 8 #define USB_DEBUG_BRK_SIZE 8 -static char USB_DEBUG_BRK[USB_DEBUG_BRK_SIZE] = { +static const char USB_DEBUG_BRK[USB_DEBUG_BRK_SIZE] = { 0x00, 0xff, 0x01, @@ -32,7 +32,18 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0525, 0x127a) }, { }, }; -MODULE_DEVICE_TABLE(usb, id_table); + +static const struct usb_device_id dbc_id_table[] = { + { USB_DEVICE(0x1d6b, 0x0004) }, + { }, +}; + +static const struct usb_device_id id_table_combined[] = { + { USB_DEVICE(0x0525, 0x127a) }, + { USB_DEVICE(0x1d6b, 0x0004) }, + { }, +}; +MODULE_DEVICE_TABLE(usb, id_table_combined); /* This HW really does not support a serial break, so one will be * emulated when ever the break state is set to true. @@ -71,9 +82,20 @@ static struct usb_serial_driver debug_device = { .process_read_urb = usb_debug_process_read_urb, }; +static struct usb_serial_driver dbc_device = { + .driver = { + .owner = THIS_MODULE, + .name = "xhci_dbc", + }, + .id_table = dbc_id_table, + .num_ports = 1, + .break_ctl = usb_debug_break_ctl, + .process_read_urb = usb_debug_process_read_urb, +}; + static struct usb_serial_driver * const serial_drivers[] = { - &debug_device, NULL + &debug_device, &dbc_device, NULL }; -module_usb_serial_driver(serial_drivers, id_table); +module_usb_serial_driver(serial_drivers, id_table_combined); MODULE_LICENSE("GPL"); diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 337a0be89fcf0767f67731a9b0a50700e1df9318..9f3317a940ef6a8d88767eaa919bb4b61f6f13b6 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -40,11 +40,12 @@ static int visor_open(struct tty_struct *tty, struct usb_serial_port *port); static void visor_close(struct usb_serial_port *port); static int visor_probe(struct usb_serial *serial, const struct usb_device_id *id); -static int visor_calc_num_ports(struct usb_serial *serial); +static int visor_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds); +static int clie_5_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds); static void visor_read_int_callback(struct urb *urb); static int clie_3_5_startup(struct usb_serial *serial); -static int treo_attach(struct usb_serial *serial); -static int clie_5_attach(struct usb_serial *serial); static int palm_os_3_probe(struct usb_serial *serial, const struct usb_device_id *id); static int palm_os_4_probe(struct usb_serial *serial, @@ -174,7 +175,6 @@ static struct usb_serial_driver handspring_device = { .close = visor_close, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, - .attach = treo_attach, .probe = visor_probe, .calc_num_ports = visor_calc_num_ports, .read_int_callback = visor_read_int_callback, @@ -189,14 +189,14 @@ static struct usb_serial_driver clie_5_device = { .description = "Sony Clie 5.0", .id_table = clie_id_5_table, .num_ports = 2, + .num_bulk_out = 2, .bulk_out_size = 256, .open = visor_open, .close = visor_close, .throttle = usb_serial_generic_throttle, .unthrottle = usb_serial_generic_unthrottle, - .attach = clie_5_attach, .probe = visor_probe, - .calc_num_ports = visor_calc_num_ports, + .calc_num_ports = clie_5_calc_num_ports, .read_int_callback = visor_read_int_callback, }; @@ -466,16 +466,60 @@ static int visor_probe(struct usb_serial *serial, return retval; } -static int visor_calc_num_ports(struct usb_serial *serial) +static int visor_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) { + unsigned int vid = le16_to_cpu(serial->dev->descriptor.idVendor); int num_ports = (int)(long)(usb_get_serial_data(serial)); if (num_ports) usb_set_serial_data(serial, NULL); + /* + * Only swap the bulk endpoints for the Handspring devices with + * interrupt in endpoints, which for now are the Treo devices. + */ + if (!(vid == HANDSPRING_VENDOR_ID || vid == KYOCERA_VENDOR_ID) || + epds->num_interrupt_in == 0) + goto out; + + if (epds->num_bulk_in < 2 || epds->num_interrupt_in < 2) { + dev_err(&serial->interface->dev, "missing endpoints\n"); + return -ENODEV; + } + + /* + * It appears that Treos and Kyoceras want to use the + * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint, + * so let's swap the 1st and 2nd bulk in and interrupt endpoints. + * Note that swapping the bulk out endpoints would break lots of + * apps that want to communicate on the second port. + */ + swap(epds->bulk_in[0], epds->bulk_in[1]); + swap(epds->interrupt_in[0], epds->interrupt_in[1]); +out: return num_ports; } +static int clie_5_calc_num_ports(struct usb_serial *serial, + struct usb_serial_endpoints *epds) +{ + /* + * TH55 registers 2 ports. + * Communication in from the UX50/TH55 uses the first bulk-in + * endpoint, while communication out to the UX50/TH55 uses the second + * bulk-out endpoint. + */ + + /* + * FIXME: Should we swap the descriptors instead of using the same + * bulk-out endpoint for both ports? + */ + epds->bulk_out[0] = epds->bulk_out[1]; + + return serial->type->num_ports; +} + static int clie_3_5_startup(struct usb_serial *serial) { struct device *dev = &serial->dev->dev; @@ -531,94 +575,6 @@ static int clie_3_5_startup(struct usb_serial *serial) return result; } -static int treo_attach(struct usb_serial *serial) -{ - struct usb_serial_port *swap_port; - - /* Only do this endpoint hack for the Handspring devices with - * interrupt in endpoints, which for now are the Treo devices. */ - if (!((le16_to_cpu(serial->dev->descriptor.idVendor) - == HANDSPRING_VENDOR_ID) || - (le16_to_cpu(serial->dev->descriptor.idVendor) - == KYOCERA_VENDOR_ID)) || - (serial->num_interrupt_in == 0)) - return 0; - - if (serial->num_bulk_in < 2 || serial->num_interrupt_in < 2) { - dev_err(&serial->interface->dev, "missing endpoints\n"); - return -ENODEV; - } - - /* - * It appears that Treos and Kyoceras want to use the - * 1st bulk in endpoint to communicate with the 2nd bulk out endpoint, - * so let's swap the 1st and 2nd bulk in and interrupt endpoints. - * Note that swapping the bulk out endpoints would break lots of - * apps that want to communicate on the second port. - */ -#define COPY_PORT(dest, src) \ - do { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(src->read_urbs); ++i) { \ - dest->read_urbs[i] = src->read_urbs[i]; \ - dest->read_urbs[i]->context = dest; \ - dest->bulk_in_buffers[i] = src->bulk_in_buffers[i]; \ - } \ - dest->read_urb = src->read_urb; \ - dest->bulk_in_endpointAddress = src->bulk_in_endpointAddress;\ - dest->bulk_in_buffer = src->bulk_in_buffer; \ - dest->bulk_in_size = src->bulk_in_size; \ - dest->interrupt_in_urb = src->interrupt_in_urb; \ - dest->interrupt_in_urb->context = dest; \ - dest->interrupt_in_endpointAddress = \ - src->interrupt_in_endpointAddress;\ - dest->interrupt_in_buffer = src->interrupt_in_buffer; \ - } while (0); - - swap_port = kmalloc(sizeof(*swap_port), GFP_KERNEL); - if (!swap_port) - return -ENOMEM; - COPY_PORT(swap_port, serial->port[0]); - COPY_PORT(serial->port[0], serial->port[1]); - COPY_PORT(serial->port[1], swap_port); - kfree(swap_port); - - return 0; -} - -static int clie_5_attach(struct usb_serial *serial) -{ - struct usb_serial_port *port; - unsigned int pipe; - int j; - - /* TH55 registers 2 ports. - Communication in from the UX50/TH55 uses bulk_in_endpointAddress - from port 0. Communication out to the UX50/TH55 uses - bulk_out_endpointAddress from port 1 - - Lets do a quick and dirty mapping - */ - - /* some sanity check */ - if (serial->num_bulk_out < 2) { - dev_err(&serial->interface->dev, "missing bulk out endpoints\n"); - return -ENODEV; - } - - /* port 0 now uses the modified endpoint Address */ - port = serial->port[0]; - port->bulk_out_endpointAddress = - serial->port[1]->bulk_out_endpointAddress; - - pipe = usb_sndbulkpipe(serial->dev, port->bulk_out_endpointAddress); - for (j = 0; j < ARRAY_SIZE(port->write_urbs); ++j) - port->write_urbs[j]->pipe = pipe; - - return 0; -} - module_usb_serial_driver(serial_drivers, id_table_combined); MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 5ab65eb1dacc8b5aa2fdf5152df2a381180683c2..55cebc1e6fec1d6a2664e2d796c49e2af728af75 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -80,8 +80,6 @@ static int whiteheat_firmware_download(struct usb_serial *serial, static int whiteheat_firmware_attach(struct usb_serial *serial); /* function prototypes for the Connect Tech WhiteHEAT serial converter */ -static int whiteheat_probe(struct usb_serial *serial, - const struct usb_device_id *id); static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_release(struct usb_serial *serial); static int whiteheat_port_probe(struct usb_serial_port *port); @@ -118,7 +116,8 @@ static struct usb_serial_driver whiteheat_device = { .description = "Connect Tech - WhiteHEAT", .id_table = id_table_std, .num_ports = 4, - .probe = whiteheat_probe, + .num_bulk_in = 5, + .num_bulk_out = 5, .attach = whiteheat_attach, .release = whiteheat_release, .port_probe = whiteheat_port_probe, @@ -221,33 +220,6 @@ static int whiteheat_firmware_attach(struct usb_serial *serial) * Connect Tech's White Heat serial driver functions *****************************************************************************/ -static int whiteheat_probe(struct usb_serial *serial, - const struct usb_device_id *id) -{ - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - size_t num_bulk_in = 0; - size_t num_bulk_out = 0; - size_t min_num_bulk; - unsigned int i; - - iface_desc = serial->interface->cur_altsetting; - - for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { - endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_bulk_in(endpoint)) - ++num_bulk_in; - if (usb_endpoint_is_bulk_out(endpoint)) - ++num_bulk_out; - } - - min_num_bulk = COMMAND_PORT + 1; - if (num_bulk_in < min_num_bulk || num_bulk_out < min_num_bulk) - return -ENODEV; - - return 0; -} - static int whiteheat_attach(struct usb_serial *serial) { struct usb_serial_port *command_port; diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 369f3c24815a1462f405f1a6bc3f2cbdd034e21b..44af719194b239f760c77919bf7cf5bad19ed9d7 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -446,6 +446,10 @@ struct ms_lib_ctrl { #define SD_BLOCK_LEN 9 struct ene_ub6250_info { + + /* I/O bounce buffer */ + u8 *bbuf; + /* for 6250 code */ struct SD_STATUS SD_Status; struct MS_STATUS MS_Status; @@ -493,8 +497,11 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag); static void ene_ub6250_info_destructor(void *extra) { + struct ene_ub6250_info *info = (struct ene_ub6250_info *) extra; + if (!extra) return; + kfree(info->bbuf); } static int ene_send_scsi_cmd(struct us_data *us, u8 fDir, void *buf, int use_sg) @@ -860,8 +867,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, u8 PageNum, u32 *PageBuf, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; + struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; int result; - u8 ExtBuf[4]; u32 bn = PhyBlockAddr * 0x20 + PageNum; result = ene_load_bincode(us, MS_RW_PATTERN); @@ -901,7 +909,7 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, bcb->CDB[2] = (unsigned char)(PhyBlockAddr>>16); bcb->CDB[6] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -910,9 +918,9 @@ static int ms_read_readpage(struct us_data *us, u32 PhyBlockAddr, ExtraDat->status0 = 0x10; /* Not yet,fireware support */ ExtraDat->status1 = 0x00; /* Not yet,fireware support */ - ExtraDat->ovrflg = ExtBuf[0]; - ExtraDat->mngflg = ExtBuf[1]; - ExtraDat->logadr = memstick_logaddr(ExtBuf[2], ExtBuf[3]); + ExtraDat->ovrflg = bbuf[0]; + ExtraDat->mngflg = bbuf[1]; + ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } @@ -1332,8 +1340,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, u8 PageNum, struct ms_lib_type_extdat *ExtraDat) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; + struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; int result; - u8 ExtBuf[4]; memset(bcb, 0, sizeof(struct bulk_cb_wrap)); bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN); @@ -1347,7 +1356,7 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, bcb->CDB[2] = (unsigned char)(PhyBlock>>16); bcb->CDB[6] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &ExtBuf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; @@ -1355,9 +1364,9 @@ static int ms_lib_read_extra(struct us_data *us, u32 PhyBlock, ExtraDat->intr = 0x80; /* Not yet, waiting for fireware support */ ExtraDat->status0 = 0x10; /* Not yet, waiting for fireware support */ ExtraDat->status1 = 0x00; /* Not yet, waiting for fireware support */ - ExtraDat->ovrflg = ExtBuf[0]; - ExtraDat->mngflg = ExtBuf[1]; - ExtraDat->logadr = memstick_logaddr(ExtBuf[2], ExtBuf[3]); + ExtraDat->ovrflg = bbuf[0]; + ExtraDat->mngflg = bbuf[1]; + ExtraDat->logadr = memstick_logaddr(bbuf[2], bbuf[3]); return USB_STOR_TRANSPORT_GOOD; } @@ -1556,9 +1565,9 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) u16 PhyBlock, newblk, i; u16 LogStart, LogEnde; struct ms_lib_type_extdat extdat; - u8 buf[0x200]; u32 count = 0, index = 0; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; for (PhyBlock = 0; PhyBlock < info->MS_Lib.NumberOfPhyBlock;) { ms_lib_phy_to_log_range(PhyBlock, &LogStart, &LogEnde); @@ -1572,14 +1581,16 @@ static int ms_lib_scan_logicalblocknumber(struct us_data *us, u16 btBlk1st) } if (count == PhyBlock) { - ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, &buf); + ms_lib_read_extrablock(us, PhyBlock, 0, 0x80, + bbuf); count += 0x80; } index = (PhyBlock % 0x80) * 4; - extdat.ovrflg = buf[index]; - extdat.mngflg = buf[index+1]; - extdat.logadr = memstick_logaddr(buf[index+2], buf[index+3]); + extdat.ovrflg = bbuf[index]; + extdat.mngflg = bbuf[index+1]; + extdat.logadr = memstick_logaddr(bbuf[index+2], + bbuf[index+3]); if ((extdat.ovrflg & MS_REG_OVR_BKST) != MS_REG_OVR_BKST_OK) { ms_lib_setacquired_errorblock(us, PhyBlock); @@ -2062,9 +2073,9 @@ static int ene_ms_init(struct us_data *us) { struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; int result; - u8 buf[0x200]; u16 MSP_BlockSize, MSP_UserAreaBlocks; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; printk(KERN_INFO "transport --- ENE_MSInit\n"); @@ -2083,13 +2094,13 @@ static int ene_ms_init(struct us_data *us) bcb->CDB[0] = 0xF1; bcb->CDB[1] = 0x01; - result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { printk(KERN_ERR "Execution MS Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } /* the same part to test ENE */ - info->MS_Status = *(struct MS_STATUS *)&buf[0]; + info->MS_Status = *(struct MS_STATUS *) bbuf; if (info->MS_Status.Insert && info->MS_Status.Ready) { printk(KERN_INFO "Insert = %x\n", info->MS_Status.Insert); @@ -2098,15 +2109,15 @@ static int ene_ms_init(struct us_data *us) printk(KERN_INFO "IsMSPHG = %x\n", info->MS_Status.IsMSPHG); printk(KERN_INFO "WtP= %x\n", info->MS_Status.WtP); if (info->MS_Status.IsMSPro) { - MSP_BlockSize = (buf[6] << 8) | buf[7]; - MSP_UserAreaBlocks = (buf[10] << 8) | buf[11]; + MSP_BlockSize = (bbuf[6] << 8) | bbuf[7]; + MSP_UserAreaBlocks = (bbuf[10] << 8) | bbuf[11]; info->MSP_TotalBlock = MSP_BlockSize * MSP_UserAreaBlocks; } else { ms_card_init(us); /* Card is MS (to ms.c)*/ } usb_stor_dbg(us, "MS Init Code OK !!\n"); } else { - usb_stor_dbg(us, "MS Card Not Ready --- %x\n", buf[0]); + usb_stor_dbg(us, "MS Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } @@ -2116,9 +2127,9 @@ static int ene_ms_init(struct us_data *us) static int ene_sd_init(struct us_data *us) { int result; - u8 buf[0x200]; struct bulk_cb_wrap *bcb = (struct bulk_cb_wrap *) us->iobuf; struct ene_ub6250_info *info = (struct ene_ub6250_info *) us->extra; + u8 *bbuf = info->bbuf; usb_stor_dbg(us, "transport --- ENE_SDInit\n"); /* SD Init Part-1 */ @@ -2152,17 +2163,17 @@ static int ene_sd_init(struct us_data *us) bcb->Flags = US_BULK_FLAG_IN; bcb->CDB[0] = 0xF1; - result = ene_send_scsi_cmd(us, FDIR_READ, &buf, 0); + result = ene_send_scsi_cmd(us, FDIR_READ, bbuf, 0); if (result != USB_STOR_XFER_GOOD) { usb_stor_dbg(us, "Execution SD Init Code Fail !!\n"); return USB_STOR_TRANSPORT_ERROR; } - info->SD_Status = *(struct SD_STATUS *)&buf[0]; + info->SD_Status = *(struct SD_STATUS *) bbuf; if (info->SD_Status.Insert && info->SD_Status.Ready) { struct SD_STATUS *s = &info->SD_Status; - ene_get_card_status(us, (unsigned char *)&buf); + ene_get_card_status(us, bbuf); usb_stor_dbg(us, "Insert = %x\n", s->Insert); usb_stor_dbg(us, "Ready = %x\n", s->Ready); usb_stor_dbg(us, "IsMMC = %x\n", s->IsMMC); @@ -2170,7 +2181,7 @@ static int ene_sd_init(struct us_data *us) usb_stor_dbg(us, "HiSpeed = %x\n", s->HiSpeed); usb_stor_dbg(us, "WtP = %x\n", s->WtP); } else { - usb_stor_dbg(us, "SD Card Not Ready --- %x\n", buf[0]); + usb_stor_dbg(us, "SD Card Not Ready --- %x\n", bbuf[0]); return USB_STOR_TRANSPORT_ERROR; } return USB_STOR_TRANSPORT_GOOD; @@ -2180,13 +2191,15 @@ static int ene_sd_init(struct us_data *us) static int ene_init(struct us_data *us) { int result; - u8 misc_reg03 = 0; + u8 misc_reg03; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); + u8 *bbuf = info->bbuf; - result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03); + result = ene_get_card_type(us, REG_CARD_STATUS, bbuf); if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; + misc_reg03 = bbuf[0]; if (misc_reg03 & 0x01) { if (!info->SD_Status.Ready) { result = ene_sd_init(us); @@ -2303,8 +2316,9 @@ static int ene_ub6250_probe(struct usb_interface *intf, const struct usb_device_id *id) { int result; - u8 misc_reg03 = 0; + u8 misc_reg03; struct us_data *us; + struct ene_ub6250_info *info; result = usb_stor_probe1(&us, intf, id, (id - ene_ub6250_usb_ids) + ene_ub6250_unusual_dev_list, @@ -2313,11 +2327,16 @@ static int ene_ub6250_probe(struct usb_interface *intf, return result; /* FIXME: where should the code alloc extra buf ? */ - if (!us->extra) { - us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); - if (!us->extra) - return -ENOMEM; - us->extra_destructor = ene_ub6250_info_destructor; + us->extra = kzalloc(sizeof(struct ene_ub6250_info), GFP_KERNEL); + if (!us->extra) + return -ENOMEM; + us->extra_destructor = ene_ub6250_info_destructor; + + info = (struct ene_ub6250_info *)(us->extra); + info->bbuf = kmalloc(512, GFP_KERNEL); + if (!info->bbuf) { + kfree(us->extra); + return -ENOMEM; } us->transport_name = "ene_ub6250"; @@ -2329,12 +2348,13 @@ static int ene_ub6250_probe(struct usb_interface *intf, return result; /* probe card type */ - result = ene_get_card_type(us, REG_CARD_STATUS, &misc_reg03); + result = ene_get_card_type(us, REG_CARD_STATUS, info->bbuf); if (result != USB_STOR_XFER_GOOD) { usb_stor_disconnect(intf); return USB_STOR_TRANSPORT_ERROR; } + misc_reg03 = info->bbuf[0]; if (!(misc_reg03 & 0x01)) { pr_info("ums_eneub6250: This driver only supports SD/MS cards. " "It does not support SM cards.\n"); diff --git a/drivers/usb/storage/karma.c b/drivers/usb/storage/karma.c index f9d407f0b50868d2c576f2e57ddae64de0a26aad..b05ba4929f00e7f9487306e7ecebce87b86b295e 100644 --- a/drivers/usb/storage/karma.c +++ b/drivers/usb/storage/karma.c @@ -105,7 +105,7 @@ static struct us_unusual_dev karma_unusual_dev_list[] = { */ static int rio_karma_send_command(char cmd, struct us_data *us) { - int result, partial; + int result; unsigned long timeout; static unsigned char seq = 1; struct karma_data *data = (struct karma_data *) us->extra; @@ -119,12 +119,12 @@ static int rio_karma_send_command(char cmd, struct us_data *us) timeout = jiffies + msecs_to_jiffies(6000); for (;;) { result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, - us->iobuf, RIO_SEND_LEN, &partial); + us->iobuf, RIO_SEND_LEN, NULL); if (result != USB_STOR_XFER_GOOD) goto err; result = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, - data->recv, RIO_RECV_LEN, &partial); + data->recv, RIO_RECV_LEN, NULL); if (result != USB_STOR_XFER_GOOD) goto err; diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 9129f6cb823074a555a90f74611dac3f0164a60d..5a70c33ef0e0645d1c97a59021a6b400776dd473 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -42,7 +42,7 @@ * - a patch that adds the entry for your device, including your * email address right above the entry (plus maybe a brief * explanation of the reason for the entry), - * - a copy of /proc/bus/usb/devices with your device plugged in + * - a copy of /sys/kernel/debug/usb/devices with your device plugged in * running with this patch. * Send your submission to either Phil Dibowitz or * Alan Stern , and don't forget to CC: the @@ -176,7 +176,7 @@ UNUSUAL_DEV( 0x0420, 0x0001, 0x0100, 0x0100, /* * Reported by Andrew Nayenko - * Updated for new firmware by Phillip Potter + * Updated for new firmware by Phillip Potter */ UNUSUAL_DEV( 0x0421, 0x0019, 0x0592, 0x0610, "Nokia", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 615bea08ec0aeb1788155e067ac4f999a737f9a0..06615934fed1cc537694b03d023b613c95aea9f1 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -737,13 +737,11 @@ static void get_protocol(struct us_data *us) /* Get the pipe settings */ static int get_pipes(struct us_data *us) { - struct usb_host_interface *altsetting = - us->pusb_intf->cur_altsetting; - int i; - struct usb_endpoint_descriptor *ep; - struct usb_endpoint_descriptor *ep_in = NULL; - struct usb_endpoint_descriptor *ep_out = NULL; - struct usb_endpoint_descriptor *ep_int = NULL; + struct usb_host_interface *alt = us->pusb_intf->cur_altsetting; + struct usb_endpoint_descriptor *ep_in; + struct usb_endpoint_descriptor *ep_out; + struct usb_endpoint_descriptor *ep_int; + int res; /* * Find the first endpoint of each type we need. @@ -751,28 +749,16 @@ static int get_pipes(struct us_data *us) * An optional interrupt-in is OK (necessary for CBI protocol). * We will ignore any others. */ - for (i = 0; i < altsetting->desc.bNumEndpoints; i++) { - ep = &altsetting->endpoint[i].desc; - - if (usb_endpoint_xfer_bulk(ep)) { - if (usb_endpoint_dir_in(ep)) { - if (!ep_in) - ep_in = ep; - } else { - if (!ep_out) - ep_out = ep; - } - } - - else if (usb_endpoint_is_int_in(ep)) { - if (!ep_int) - ep_int = ep; - } + res = usb_find_common_endpoints(alt, &ep_in, &ep_out, NULL, NULL); + if (res) { + usb_stor_dbg(us, "bulk endpoints not found\n"); + return res; } - if (!ep_in || !ep_out || (us->protocol == USB_PR_CBI && !ep_int)) { - usb_stor_dbg(us, "Endpoint sanity check failed! Rejecting dev.\n"); - return -EIO; + res = usb_find_int_in_endpoint(alt, &ep_int); + if (res && us->protocol == USB_PR_CBI) { + usb_stor_dbg(us, "interrupt endpoint not found\n"); + return res; } /* Calculate and store the pipe values */ diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..dfcfe459b7cf5605d4a00abc78e402c6e2468445 --- /dev/null +++ b/drivers/usb/typec/Kconfig @@ -0,0 +1,22 @@ + +menu "USB Power Delivery and Type-C drivers" + +config TYPEC + tristate + +config TYPEC_WCOVE + tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver" + depends on ACPI + depends on INTEL_SOC_PMIC + depends on INTEL_PMC_IPC + depends on BXT_WC_PMIC_OPREGION + select TYPEC + help + This driver adds support for USB Type-C detection on Intel Broxton + platforms that have Intel Whiskey Cove PMIC. The driver can detect the + role and cable orientation. + + To compile this driver as module, choose M here: the module will be + called typec_wcove + +endmenu diff --git a/drivers/usb/typec/Makefile b/drivers/usb/typec/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b9cb862221af39f36b28701576cec2011fccb845 --- /dev/null +++ b/drivers/usb/typec/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_TYPEC) += typec.o +obj-$(CONFIG_TYPEC_WCOVE) += typec_wcove.o diff --git a/drivers/usb/typec/typec.c b/drivers/usb/typec/typec.c new file mode 100644 index 0000000000000000000000000000000000000000..89e540bb7ff3a688621223eb7a9fd4252f21404b --- /dev/null +++ b/drivers/usb/typec/typec.c @@ -0,0 +1,1262 @@ +/* + * USB Type-C Connector Class + * + * Copyright (C) 2017, Intel Corporation + * Author: Heikki Krogerus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +struct typec_mode { + int index; + u32 vdo; + char *desc; + enum typec_port_type roles; + + struct typec_altmode *alt_mode; + + unsigned int active:1; + + char group_name[6]; + struct attribute_group group; + struct attribute *attrs[5]; + struct device_attribute vdo_attr; + struct device_attribute desc_attr; + struct device_attribute active_attr; + struct device_attribute roles_attr; +}; + +struct typec_altmode { + struct device dev; + u16 svid; + int n_modes; + struct typec_mode modes[ALTMODE_MAX_MODES]; + const struct attribute_group *mode_groups[ALTMODE_MAX_MODES]; +}; + +struct typec_plug { + struct device dev; + enum typec_plug_index index; +}; + +struct typec_cable { + struct device dev; + enum typec_plug_type type; + struct usb_pd_identity *identity; + unsigned int active:1; +}; + +struct typec_partner { + struct device dev; + unsigned int usb_pd:1; + struct usb_pd_identity *identity; + enum typec_accessory accessory; +}; + +struct typec_port { + unsigned int id; + struct device dev; + + int prefer_role; + enum typec_data_role data_role; + enum typec_role pwr_role; + enum typec_role vconn_role; + enum typec_pwr_opmode pwr_opmode; + + const struct typec_capability *cap; +}; + +#define to_typec_port(_dev_) container_of(_dev_, struct typec_port, dev) +#define to_typec_plug(_dev_) container_of(_dev_, struct typec_plug, dev) +#define to_typec_cable(_dev_) container_of(_dev_, struct typec_cable, dev) +#define to_typec_partner(_dev_) container_of(_dev_, struct typec_partner, dev) +#define to_altmode(_dev_) container_of(_dev_, struct typec_altmode, dev) + +static const struct device_type typec_partner_dev_type; +static const struct device_type typec_cable_dev_type; +static const struct device_type typec_plug_dev_type; +static const struct device_type typec_port_dev_type; + +#define is_typec_partner(_dev_) (_dev_->type == &typec_partner_dev_type) +#define is_typec_cable(_dev_) (_dev_->type == &typec_cable_dev_type) +#define is_typec_plug(_dev_) (_dev_->type == &typec_plug_dev_type) +#define is_typec_port(_dev_) (_dev_->type == &typec_port_dev_type) + +static DEFINE_IDA(typec_index_ida); +static struct class *typec_class; + +/* Common attributes */ + +static const char * const typec_accessory_modes[] = { + [TYPEC_ACCESSORY_NONE] = "none", + [TYPEC_ACCESSORY_AUDIO] = "analog_audio", + [TYPEC_ACCESSORY_DEBUG] = "debug", +}; + +static struct usb_pd_identity *get_pd_identity(struct device *dev) +{ + if (is_typec_partner(dev)) { + struct typec_partner *partner = to_typec_partner(dev); + + return partner->identity; + } else if (is_typec_cable(dev)) { + struct typec_cable *cable = to_typec_cable(dev); + + return cable->identity; + } + return NULL; +} + +static ssize_t id_header_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct usb_pd_identity *id = get_pd_identity(dev); + + return sprintf(buf, "0x%08x\n", id->id_header); +} +static DEVICE_ATTR_RO(id_header); + +static ssize_t cert_stat_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct usb_pd_identity *id = get_pd_identity(dev); + + return sprintf(buf, "0x%08x\n", id->cert_stat); +} +static DEVICE_ATTR_RO(cert_stat); + +static ssize_t product_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct usb_pd_identity *id = get_pd_identity(dev); + + return sprintf(buf, "0x%08x\n", id->product); +} +static DEVICE_ATTR_RO(product); + +static struct attribute *usb_pd_id_attrs[] = { + &dev_attr_id_header.attr, + &dev_attr_cert_stat.attr, + &dev_attr_product.attr, + NULL +}; + +static const struct attribute_group usb_pd_id_group = { + .name = "identity", + .attrs = usb_pd_id_attrs, +}; + +static const struct attribute_group *usb_pd_id_groups[] = { + &usb_pd_id_group, + NULL, +}; + +static void typec_report_identity(struct device *dev) +{ + sysfs_notify(&dev->kobj, "identity", "id_header"); + sysfs_notify(&dev->kobj, "identity", "cert_stat"); + sysfs_notify(&dev->kobj, "identity", "product"); +} + +/* ------------------------------------------------------------------------- */ +/* Alternate Modes */ + +/** + * typec_altmode_update_active - Report Enter/Exit mode + * @alt: Handle to the alternate mode + * @mode: Mode index + * @active: True when the mode has been entered + * + * If a partner or cable plug executes Enter/Exit Mode command successfully, the + * drivers use this routine to report the updated state of the mode. + */ +void typec_altmode_update_active(struct typec_altmode *alt, int mode, + bool active) +{ + struct typec_mode *m = &alt->modes[mode]; + char dir[6]; + + if (m->active == active) + return; + + m->active = active; + snprintf(dir, sizeof(dir), "mode%d", mode); + sysfs_notify(&alt->dev.kobj, dir, "active"); + kobject_uevent(&alt->dev.kobj, KOBJ_CHANGE); +} +EXPORT_SYMBOL_GPL(typec_altmode_update_active); + +/** + * typec_altmode2port - Alternate Mode to USB Type-C port + * @alt: The Alternate Mode + * + * Returns handle to the port that a cable plug or partner with @alt is + * connected to. + */ +struct typec_port *typec_altmode2port(struct typec_altmode *alt) +{ + if (is_typec_plug(alt->dev.parent)) + return to_typec_port(alt->dev.parent->parent->parent); + if (is_typec_partner(alt->dev.parent)) + return to_typec_port(alt->dev.parent->parent); + if (is_typec_port(alt->dev.parent)) + return to_typec_port(alt->dev.parent); + + return NULL; +} +EXPORT_SYMBOL_GPL(typec_altmode2port); + +static ssize_t +typec_altmode_vdo_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct typec_mode *mode = container_of(attr, struct typec_mode, + vdo_attr); + + return sprintf(buf, "0x%08x\n", mode->vdo); +} + +static ssize_t +typec_altmode_desc_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct typec_mode *mode = container_of(attr, struct typec_mode, + desc_attr); + + return sprintf(buf, "%s\n", mode->desc ? mode->desc : ""); +} + +static ssize_t +typec_altmode_active_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct typec_mode *mode = container_of(attr, struct typec_mode, + active_attr); + + return sprintf(buf, "%s\n", mode->active ? "yes" : "no"); +} + +static ssize_t +typec_altmode_active_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_mode *mode = container_of(attr, struct typec_mode, + active_attr); + struct typec_port *port = typec_altmode2port(mode->alt_mode); + bool activate; + int ret; + + if (!port->cap->activate_mode) + return -EOPNOTSUPP; + + ret = kstrtobool(buf, &activate); + if (ret) + return ret; + + ret = port->cap->activate_mode(port->cap, mode->index, activate); + if (ret) + return ret; + + return size; +} + +static ssize_t +typec_altmode_roles_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct typec_mode *mode = container_of(attr, struct typec_mode, + roles_attr); + ssize_t ret; + + switch (mode->roles) { + case TYPEC_PORT_DFP: + ret = sprintf(buf, "source\n"); + break; + case TYPEC_PORT_UFP: + ret = sprintf(buf, "sink\n"); + break; + case TYPEC_PORT_DRP: + default: + ret = sprintf(buf, "source sink\n"); + break; + } + return ret; +} + +static void typec_init_modes(struct typec_altmode *alt, + struct typec_mode_desc *desc, bool is_port) +{ + int i; + + for (i = 0; i < alt->n_modes; i++, desc++) { + struct typec_mode *mode = &alt->modes[i]; + + /* Not considering the human readable description critical */ + mode->desc = kstrdup(desc->desc, GFP_KERNEL); + if (desc->desc && !mode->desc) + dev_err(&alt->dev, "failed to copy mode%d desc\n", i); + + mode->alt_mode = alt; + mode->vdo = desc->vdo; + mode->roles = desc->roles; + mode->index = desc->index; + sprintf(mode->group_name, "mode%d", desc->index); + + sysfs_attr_init(&mode->vdo_attr.attr); + mode->vdo_attr.attr.name = "vdo"; + mode->vdo_attr.attr.mode = 0444; + mode->vdo_attr.show = typec_altmode_vdo_show; + + sysfs_attr_init(&mode->desc_attr.attr); + mode->desc_attr.attr.name = "description"; + mode->desc_attr.attr.mode = 0444; + mode->desc_attr.show = typec_altmode_desc_show; + + sysfs_attr_init(&mode->active_attr.attr); + mode->active_attr.attr.name = "active"; + mode->active_attr.attr.mode = 0644; + mode->active_attr.show = typec_altmode_active_show; + mode->active_attr.store = typec_altmode_active_store; + + mode->attrs[0] = &mode->vdo_attr.attr; + mode->attrs[1] = &mode->desc_attr.attr; + mode->attrs[2] = &mode->active_attr.attr; + + /* With ports, list the roles that the mode is supported with */ + if (is_port) { + sysfs_attr_init(&mode->roles_attr.attr); + mode->roles_attr.attr.name = "supported_roles"; + mode->roles_attr.attr.mode = 0444; + mode->roles_attr.show = typec_altmode_roles_show; + + mode->attrs[3] = &mode->roles_attr.attr; + } + + mode->group.attrs = mode->attrs; + mode->group.name = mode->group_name; + + alt->mode_groups[i] = &mode->group; + } +} + +static ssize_t svid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct typec_altmode *alt = to_altmode(dev); + + return sprintf(buf, "%04x\n", alt->svid); +} +static DEVICE_ATTR_RO(svid); + +static struct attribute *typec_altmode_attrs[] = { + &dev_attr_svid.attr, + NULL +}; +ATTRIBUTE_GROUPS(typec_altmode); + +static void typec_altmode_release(struct device *dev) +{ + struct typec_altmode *alt = to_altmode(dev); + int i; + + for (i = 0; i < alt->n_modes; i++) + kfree(alt->modes[i].desc); + kfree(alt); +} + +static const struct device_type typec_altmode_dev_type = { + .name = "typec_alternate_mode", + .groups = typec_altmode_groups, + .release = typec_altmode_release, +}; + +static struct typec_altmode * +typec_register_altmode(struct device *parent, struct typec_altmode_desc *desc) +{ + struct typec_altmode *alt; + int ret; + + alt = kzalloc(sizeof(*alt), GFP_KERNEL); + if (!alt) + return NULL; + + alt->svid = desc->svid; + alt->n_modes = desc->n_modes; + typec_init_modes(alt, desc->modes, is_typec_port(parent)); + + alt->dev.parent = parent; + alt->dev.groups = alt->mode_groups; + alt->dev.type = &typec_altmode_dev_type; + dev_set_name(&alt->dev, "svid-%04x", alt->svid); + + ret = device_register(&alt->dev); + if (ret) { + dev_err(parent, "failed to register alternate mode (%d)\n", + ret); + put_device(&alt->dev); + return NULL; + } + + return alt; +} + +/** + * typec_unregister_altmode - Unregister Alternate Mode + * @alt: The alternate mode to be unregistered + * + * Unregister device created with typec_partner_register_altmode(), + * typec_plug_register_altmode() or typec_port_register_altmode(). + */ +void typec_unregister_altmode(struct typec_altmode *alt) +{ + if (alt) + device_unregister(&alt->dev); +} +EXPORT_SYMBOL_GPL(typec_unregister_altmode); + +/* ------------------------------------------------------------------------- */ +/* Type-C Partners */ + +static ssize_t accessory_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct typec_partner *p = to_typec_partner(dev); + + return sprintf(buf, "%s\n", typec_accessory_modes[p->accessory]); +} +static DEVICE_ATTR_RO(accessory_mode); + +static ssize_t supports_usb_power_delivery_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct typec_partner *p = to_typec_partner(dev); + + return sprintf(buf, "%s\n", p->usb_pd ? "yes" : "no"); +} +static DEVICE_ATTR_RO(supports_usb_power_delivery); + +static struct attribute *typec_partner_attrs[] = { + &dev_attr_accessory_mode.attr, + &dev_attr_supports_usb_power_delivery.attr, + NULL +}; +ATTRIBUTE_GROUPS(typec_partner); + +static void typec_partner_release(struct device *dev) +{ + struct typec_partner *partner = to_typec_partner(dev); + + kfree(partner); +} + +static const struct device_type typec_partner_dev_type = { + .name = "typec_partner", + .groups = typec_partner_groups, + .release = typec_partner_release, +}; + +/** + * typec_partner_set_identity - Report result from Discover Identity command + * @partner: The partner updated identity values + * + * This routine is used to report that the result of Discover Identity USB power + * delivery command has become available. + */ +int typec_partner_set_identity(struct typec_partner *partner) +{ + if (!partner->identity) + return -EINVAL; + + typec_report_identity(&partner->dev); + return 0; +} +EXPORT_SYMBOL_GPL(typec_partner_set_identity); + +/** + * typec_partner_register_altmode - Register USB Type-C Partner Alternate Mode + * @partner: USB Type-C Partner that supports the alternate mode + * @desc: Description of the alternate mode + * + * This routine is used to register each alternate mode individually that + * @partner has listed in response to Discover SVIDs command. The modes for a + * SVID listed in response to Discover Modes command need to be listed in an + * array in @desc. + * + * Returns handle to the alternate mode on success or NULL on failure. + */ +struct typec_altmode * +typec_partner_register_altmode(struct typec_partner *partner, + struct typec_altmode_desc *desc) +{ + return typec_register_altmode(&partner->dev, desc); +} +EXPORT_SYMBOL_GPL(typec_partner_register_altmode); + +/** + * typec_register_partner - Register a USB Type-C Partner + * @port: The USB Type-C Port the partner is connected to + * @desc: Description of the partner + * + * Registers a device for USB Type-C Partner described in @desc. + * + * Returns handle to the partner on success or NULL on failure. + */ +struct typec_partner *typec_register_partner(struct typec_port *port, + struct typec_partner_desc *desc) +{ + struct typec_partner *partner; + int ret; + + partner = kzalloc(sizeof(*partner), GFP_KERNEL); + if (!partner) + return NULL; + + partner->usb_pd = desc->usb_pd; + partner->accessory = desc->accessory; + + if (desc->identity) { + /* + * Creating directory for the identity only if the driver is + * able to provide data to it. + */ + partner->dev.groups = usb_pd_id_groups; + partner->identity = desc->identity; + } + + partner->dev.class = typec_class; + partner->dev.parent = &port->dev; + partner->dev.type = &typec_partner_dev_type; + dev_set_name(&partner->dev, "%s-partner", dev_name(&port->dev)); + + ret = device_register(&partner->dev); + if (ret) { + dev_err(&port->dev, "failed to register partner (%d)\n", ret); + put_device(&partner->dev); + return NULL; + } + + return partner; +} +EXPORT_SYMBOL_GPL(typec_register_partner); + +/** + * typec_unregister_partner - Unregister a USB Type-C Partner + * @partner: The partner to be unregistered + * + * Unregister device created with typec_register_partner(). + */ +void typec_unregister_partner(struct typec_partner *partner) +{ + if (partner) + device_unregister(&partner->dev); +} +EXPORT_SYMBOL_GPL(typec_unregister_partner); + +/* ------------------------------------------------------------------------- */ +/* Type-C Cable Plugs */ + +static void typec_plug_release(struct device *dev) +{ + struct typec_plug *plug = to_typec_plug(dev); + + kfree(plug); +} + +static const struct device_type typec_plug_dev_type = { + .name = "typec_plug", + .release = typec_plug_release, +}; + +/** + * typec_plug_register_altmode - Register USB Type-C Cable Plug Alternate Mode + * @plug: USB Type-C Cable Plug that supports the alternate mode + * @desc: Description of the alternate mode + * + * This routine is used to register each alternate mode individually that @plug + * has listed in response to Discover SVIDs command. The modes for a SVID that + * the plug lists in response to Discover Modes command need to be listed in an + * array in @desc. + * + * Returns handle to the alternate mode on success or NULL on failure. + */ +struct typec_altmode * +typec_plug_register_altmode(struct typec_plug *plug, + struct typec_altmode_desc *desc) +{ + return typec_register_altmode(&plug->dev, desc); +} +EXPORT_SYMBOL_GPL(typec_plug_register_altmode); + +/** + * typec_register_plug - Register a USB Type-C Cable Plug + * @cable: USB Type-C Cable with the plug + * @desc: Description of the cable plug + * + * Registers a device for USB Type-C Cable Plug described in @desc. A USB Type-C + * Cable Plug represents a plug with electronics in it that can response to USB + * Power Delivery SOP Prime or SOP Double Prime packages. + * + * Returns handle to the cable plug on success or NULL on failure. + */ +struct typec_plug *typec_register_plug(struct typec_cable *cable, + struct typec_plug_desc *desc) +{ + struct typec_plug *plug; + char name[8]; + int ret; + + plug = kzalloc(sizeof(*plug), GFP_KERNEL); + if (!plug) + return NULL; + + sprintf(name, "plug%d", desc->index); + + plug->index = desc->index; + plug->dev.class = typec_class; + plug->dev.parent = &cable->dev; + plug->dev.type = &typec_plug_dev_type; + dev_set_name(&plug->dev, "%s-%s", dev_name(cable->dev.parent), name); + + ret = device_register(&plug->dev); + if (ret) { + dev_err(&cable->dev, "failed to register plug (%d)\n", ret); + put_device(&plug->dev); + return NULL; + } + + return plug; +} +EXPORT_SYMBOL_GPL(typec_register_plug); + +/** + * typec_unregister_plug - Unregister a USB Type-C Cable Plug + * @plug: The cable plug to be unregistered + * + * Unregister device created with typec_register_plug(). + */ +void typec_unregister_plug(struct typec_plug *plug) +{ + if (plug) + device_unregister(&plug->dev); +} +EXPORT_SYMBOL_GPL(typec_unregister_plug); + +/* Type-C Cables */ + +static ssize_t +type_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct typec_cable *cable = to_typec_cable(dev); + + return sprintf(buf, "%s\n", cable->active ? "active" : "passive"); +} +static DEVICE_ATTR_RO(type); + +static const char * const typec_plug_types[] = { + [USB_PLUG_NONE] = "unknown", + [USB_PLUG_TYPE_A] = "type-a", + [USB_PLUG_TYPE_B] = "type-b", + [USB_PLUG_TYPE_C] = "type-c", + [USB_PLUG_CAPTIVE] = "captive", +}; + +static ssize_t plug_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct typec_cable *cable = to_typec_cable(dev); + + return sprintf(buf, "%s\n", typec_plug_types[cable->type]); +} +static DEVICE_ATTR_RO(plug_type); + +static struct attribute *typec_cable_attrs[] = { + &dev_attr_type.attr, + &dev_attr_plug_type.attr, + NULL +}; +ATTRIBUTE_GROUPS(typec_cable); + +static void typec_cable_release(struct device *dev) +{ + struct typec_cable *cable = to_typec_cable(dev); + + kfree(cable); +} + +static const struct device_type typec_cable_dev_type = { + .name = "typec_cable", + .groups = typec_cable_groups, + .release = typec_cable_release, +}; + +/** + * typec_cable_set_identity - Report result from Discover Identity command + * @cable: The cable updated identity values + * + * This routine is used to report that the result of Discover Identity USB power + * delivery command has become available. + */ +int typec_cable_set_identity(struct typec_cable *cable) +{ + if (!cable->identity) + return -EINVAL; + + typec_report_identity(&cable->dev); + return 0; +} +EXPORT_SYMBOL_GPL(typec_cable_set_identity); + +/** + * typec_register_cable - Register a USB Type-C Cable + * @port: The USB Type-C Port the cable is connected to + * @desc: Description of the cable + * + * Registers a device for USB Type-C Cable described in @desc. The cable will be + * parent for the optional cable plug devises. + * + * Returns handle to the cable on success or NULL on failure. + */ +struct typec_cable *typec_register_cable(struct typec_port *port, + struct typec_cable_desc *desc) +{ + struct typec_cable *cable; + int ret; + + cable = kzalloc(sizeof(*cable), GFP_KERNEL); + if (!cable) + return NULL; + + cable->type = desc->type; + cable->active = desc->active; + + if (desc->identity) { + /* + * Creating directory for the identity only if the driver is + * able to provide data to it. + */ + cable->dev.groups = usb_pd_id_groups; + cable->identity = desc->identity; + } + + cable->dev.class = typec_class; + cable->dev.parent = &port->dev; + cable->dev.type = &typec_cable_dev_type; + dev_set_name(&cable->dev, "%s-cable", dev_name(&port->dev)); + + ret = device_register(&cable->dev); + if (ret) { + dev_err(&port->dev, "failed to register cable (%d)\n", ret); + put_device(&cable->dev); + return NULL; + } + + return cable; +} +EXPORT_SYMBOL_GPL(typec_register_cable); + +/** + * typec_unregister_cable - Unregister a USB Type-C Cable + * @cable: The cable to be unregistered + * + * Unregister device created with typec_register_cable(). + */ +void typec_unregister_cable(struct typec_cable *cable) +{ + if (cable) + device_unregister(&cable->dev); +} +EXPORT_SYMBOL_GPL(typec_unregister_cable); + +/* ------------------------------------------------------------------------- */ +/* USB Type-C ports */ + +static const char * const typec_roles[] = { + [TYPEC_SINK] = "sink", + [TYPEC_SOURCE] = "source", +}; + +static const char * const typec_data_roles[] = { + [TYPEC_DEVICE] = "device", + [TYPEC_HOST] = "host", +}; + +static ssize_t +preferred_role_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_port *port = to_typec_port(dev); + int role; + int ret; + + if (port->cap->type != TYPEC_PORT_DRP) { + dev_dbg(dev, "Preferred role only supported with DRP ports\n"); + return -EOPNOTSUPP; + } + + if (!port->cap->try_role) { + dev_dbg(dev, "Setting preferred role not supported\n"); + return -EOPNOTSUPP; + } + + role = sysfs_match_string(typec_roles, buf); + if (role < 0) { + if (sysfs_streq(buf, "none")) + role = TYPEC_NO_PREFERRED_ROLE; + else + return -EINVAL; + } + + ret = port->cap->try_role(port->cap, role); + if (ret) + return ret; + + port->prefer_role = role; + return size; +} + +static ssize_t +preferred_role_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct typec_port *port = to_typec_port(dev); + + if (port->cap->type != TYPEC_PORT_DRP) + return 0; + + if (port->prefer_role < 0) + return 0; + + return sprintf(buf, "%s\n", typec_roles[port->prefer_role]); +} +static DEVICE_ATTR_RW(preferred_role); + +static ssize_t data_role_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_port *port = to_typec_port(dev); + int ret; + + if (port->cap->type != TYPEC_PORT_DRP) { + dev_dbg(dev, "data role swap only supported with DRP ports\n"); + return -EOPNOTSUPP; + } + + if (!port->cap->dr_set) { + dev_dbg(dev, "data role swapping not supported\n"); + return -EOPNOTSUPP; + } + + ret = sysfs_match_string(typec_data_roles, buf); + if (ret < 0) + return ret; + + ret = port->cap->dr_set(port->cap, ret); + if (ret) + return ret; + + return size; +} + +static ssize_t data_role_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct typec_port *port = to_typec_port(dev); + + if (port->cap->type == TYPEC_PORT_DRP) + return sprintf(buf, "%s\n", port->data_role == TYPEC_HOST ? + "[host] device" : "host [device]"); + + return sprintf(buf, "[%s]\n", typec_data_roles[port->data_role]); +} +static DEVICE_ATTR_RW(data_role); + +static ssize_t power_role_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_port *port = to_typec_port(dev); + int ret = size; + + if (!port->cap->pd_revision) { + dev_dbg(dev, "USB Power Delivery not supported\n"); + return -EOPNOTSUPP; + } + + if (!port->cap->pr_set) { + dev_dbg(dev, "power role swapping not supported\n"); + return -EOPNOTSUPP; + } + + if (port->pwr_opmode != TYPEC_PWR_MODE_PD) { + dev_dbg(dev, "partner unable to swap power role\n"); + return -EIO; + } + + ret = sysfs_match_string(typec_roles, buf); + if (ret < 0) + return ret; + + ret = port->cap->pr_set(port->cap, ret); + if (ret) + return ret; + + return size; +} + +static ssize_t power_role_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct typec_port *port = to_typec_port(dev); + + if (port->cap->type == TYPEC_PORT_DRP) + return sprintf(buf, "%s\n", port->pwr_role == TYPEC_SOURCE ? + "[source] sink" : "source [sink]"); + + return sprintf(buf, "[%s]\n", typec_roles[port->pwr_role]); +} +static DEVICE_ATTR_RW(power_role); + +static const char * const typec_pwr_opmodes[] = { + [TYPEC_PWR_MODE_USB] = "default", + [TYPEC_PWR_MODE_1_5A] = "1.5A", + [TYPEC_PWR_MODE_3_0A] = "3.0A", + [TYPEC_PWR_MODE_PD] = "usb_power_delivery", +}; + +static ssize_t power_operation_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct typec_port *port = to_typec_port(dev); + + return sprintf(buf, "%s\n", typec_pwr_opmodes[port->pwr_opmode]); +} +static DEVICE_ATTR_RO(power_operation_mode); + +static ssize_t vconn_source_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct typec_port *port = to_typec_port(dev); + bool source; + int ret; + + if (!port->cap->pd_revision) { + dev_dbg(dev, "VCONN swap depends on USB Power Delivery\n"); + return -EOPNOTSUPP; + } + + if (!port->cap->vconn_set) { + dev_dbg(dev, "VCONN swapping not supported\n"); + return -EOPNOTSUPP; + } + + ret = kstrtobool(buf, &source); + if (ret) + return ret; + + ret = port->cap->vconn_set(port->cap, (enum typec_role)source); + if (ret) + return ret; + + return size; +} + +static ssize_t vconn_source_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct typec_port *port = to_typec_port(dev); + + return sprintf(buf, "%s\n", + port->vconn_role == TYPEC_SOURCE ? "yes" : "no"); +} +static DEVICE_ATTR_RW(vconn_source); + +static ssize_t supported_accessory_modes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct typec_port *port = to_typec_port(dev); + ssize_t ret = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(port->cap->accessory); i++) { + if (port->cap->accessory[i]) + ret += sprintf(buf + ret, "%s ", + typec_accessory_modes[port->cap->accessory[i]]); + } + + if (!ret) + return sprintf(buf, "none\n"); + + buf[ret - 1] = '\n'; + + return ret; +} +static DEVICE_ATTR_RO(supported_accessory_modes); + +static ssize_t usb_typec_revision_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct typec_port *port = to_typec_port(dev); + u16 rev = port->cap->revision; + + return sprintf(buf, "%d.%d\n", (rev >> 8) & 0xff, (rev >> 4) & 0xf); +} +static DEVICE_ATTR_RO(usb_typec_revision); + +static ssize_t usb_power_delivery_revision_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct typec_port *p = to_typec_port(dev); + + return sprintf(buf, "%d\n", (p->cap->pd_revision >> 8) & 0xff); +} +static DEVICE_ATTR_RO(usb_power_delivery_revision); + +static struct attribute *typec_attrs[] = { + &dev_attr_data_role.attr, + &dev_attr_power_operation_mode.attr, + &dev_attr_power_role.attr, + &dev_attr_preferred_role.attr, + &dev_attr_supported_accessory_modes.attr, + &dev_attr_usb_power_delivery_revision.attr, + &dev_attr_usb_typec_revision.attr, + &dev_attr_vconn_source.attr, + NULL, +}; +ATTRIBUTE_GROUPS(typec); + +static int typec_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + int ret; + + ret = add_uevent_var(env, "TYPEC_PORT=%s", dev_name(dev)); + if (ret) + dev_err(dev, "failed to add uevent TYPEC_PORT\n"); + + return ret; +} + +static void typec_release(struct device *dev) +{ + struct typec_port *port = to_typec_port(dev); + + ida_simple_remove(&typec_index_ida, port->id); + kfree(port); +} + +static const struct device_type typec_port_dev_type = { + .name = "typec_port", + .groups = typec_groups, + .uevent = typec_uevent, + .release = typec_release, +}; + +/* --------------------------------------- */ +/* Driver callbacks to report role updates */ + +/** + * typec_set_data_role - Report data role change + * @port: The USB Type-C Port where the role was changed + * @role: The new data role + * + * This routine is used by the port drivers to report data role changes. + */ +void typec_set_data_role(struct typec_port *port, enum typec_data_role role) +{ + if (port->data_role == role) + return; + + port->data_role = role; + sysfs_notify(&port->dev.kobj, NULL, "data_role"); + kobject_uevent(&port->dev.kobj, KOBJ_CHANGE); +} +EXPORT_SYMBOL_GPL(typec_set_data_role); + +/** + * typec_set_pwr_role - Report power role change + * @port: The USB Type-C Port where the role was changed + * @role: The new data role + * + * This routine is used by the port drivers to report power role changes. + */ +void typec_set_pwr_role(struct typec_port *port, enum typec_role role) +{ + if (port->pwr_role == role) + return; + + port->pwr_role = role; + sysfs_notify(&port->dev.kobj, NULL, "power_role"); + kobject_uevent(&port->dev.kobj, KOBJ_CHANGE); +} +EXPORT_SYMBOL_GPL(typec_set_pwr_role); + +/** + * typec_set_pwr_role - Report VCONN source change + * @port: The USB Type-C Port which VCONN role changed + * @role: Source when @port is sourcing VCONN, or Sink when it's not + * + * This routine is used by the port drivers to report if the VCONN source is + * changes. + */ +void typec_set_vconn_role(struct typec_port *port, enum typec_role role) +{ + if (port->vconn_role == role) + return; + + port->vconn_role = role; + sysfs_notify(&port->dev.kobj, NULL, "vconn_source"); + kobject_uevent(&port->dev.kobj, KOBJ_CHANGE); +} +EXPORT_SYMBOL_GPL(typec_set_vconn_role); + +/** + * typec_set_pwr_opmode - Report changed power operation mode + * @port: The USB Type-C Port where the mode was changed + * @opmode: New power operation mode + * + * This routine is used by the port drivers to report changed power operation + * mode in @port. The modes are USB (default), 1.5A, 3.0A as defined in USB + * Type-C specification, and "USB Power Delivery" when the power levels are + * negotiated with methods defined in USB Power Delivery specification. + */ +void typec_set_pwr_opmode(struct typec_port *port, + enum typec_pwr_opmode opmode) +{ + if (port->pwr_opmode == opmode) + return; + + port->pwr_opmode = opmode; + sysfs_notify(&port->dev.kobj, NULL, "power_operation_mode"); + kobject_uevent(&port->dev.kobj, KOBJ_CHANGE); +} +EXPORT_SYMBOL_GPL(typec_set_pwr_opmode); + +/* --------------------------------------- */ + +/** + * typec_port_register_altmode - Register USB Type-C Port Alternate Mode + * @port: USB Type-C Port that supports the alternate mode + * @desc: Description of the alternate mode + * + * This routine is used to register an alternate mode that @port is capable of + * supporting. + * + * Returns handle to the alternate mode on success or NULL on failure. + */ +struct typec_altmode * +typec_port_register_altmode(struct typec_port *port, + struct typec_altmode_desc *desc) +{ + return typec_register_altmode(&port->dev, desc); +} +EXPORT_SYMBOL_GPL(typec_port_register_altmode); + +/** + * typec_register_port - Register a USB Type-C Port + * @parent: Parent device + * @cap: Description of the port + * + * Registers a device for USB Type-C Port described in @cap. + * + * Returns handle to the port on success or NULL on failure. + */ +struct typec_port *typec_register_port(struct device *parent, + const struct typec_capability *cap) +{ + struct typec_port *port; + int role; + int ret; + int id; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return NULL; + + id = ida_simple_get(&typec_index_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + kfree(port); + return NULL; + } + + if (cap->type == TYPEC_PORT_DFP) + role = TYPEC_SOURCE; + else if (cap->type == TYPEC_PORT_UFP) + role = TYPEC_SINK; + else + role = cap->prefer_role; + + if (role == TYPEC_SOURCE) { + port->data_role = TYPEC_HOST; + port->pwr_role = TYPEC_SOURCE; + port->vconn_role = TYPEC_SOURCE; + } else { + port->data_role = TYPEC_DEVICE; + port->pwr_role = TYPEC_SINK; + port->vconn_role = TYPEC_SINK; + } + + port->id = id; + port->cap = cap; + port->prefer_role = cap->prefer_role; + + port->dev.class = typec_class; + port->dev.parent = parent; + port->dev.fwnode = cap->fwnode; + port->dev.type = &typec_port_dev_type; + dev_set_name(&port->dev, "port%d", id); + + ret = device_register(&port->dev); + if (ret) { + dev_err(parent, "failed to register port (%d)\n", ret); + put_device(&port->dev); + return NULL; + } + + return port; +} +EXPORT_SYMBOL_GPL(typec_register_port); + +/** + * typec_unregister_port - Unregister a USB Type-C Port + * @port: The port to be unregistered + * + * Unregister device created with typec_register_port(). + */ +void typec_unregister_port(struct typec_port *port) +{ + if (port) + device_unregister(&port->dev); +} +EXPORT_SYMBOL_GPL(typec_unregister_port); + +static int __init typec_init(void) +{ + typec_class = class_create(THIS_MODULE, "typec"); + return PTR_ERR_OR_ZERO(typec_class); +} +subsys_initcall(typec_init); + +static void __exit typec_exit(void) +{ + class_destroy(typec_class); + ida_destroy(&typec_index_ida); +} +module_exit(typec_exit); + +MODULE_AUTHOR("Heikki Krogerus "); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("USB Type-C Connector Class"); diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c new file mode 100644 index 0000000000000000000000000000000000000000..d5a7b21fa3f198ef43598d170cd61f7494ac4826 --- /dev/null +++ b/drivers/usb/typec/typec_wcove.c @@ -0,0 +1,377 @@ +/** + * typec_wcove.c - WhiskeyCove PMIC USB Type-C PHY driver + * + * Copyright (C) 2017 Intel Corporation + * Author: Heikki Krogerus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +/* Register offsets */ +#define WCOVE_CHGRIRQ0 0x4e09 +#define WCOVE_PHYCTRL 0x5e07 + +#define USBC_CONTROL1 0x7001 +#define USBC_CONTROL2 0x7002 +#define USBC_CONTROL3 0x7003 +#define USBC_CC1_CTRL 0x7004 +#define USBC_CC2_CTRL 0x7005 +#define USBC_STATUS1 0x7007 +#define USBC_STATUS2 0x7008 +#define USBC_STATUS3 0x7009 +#define USBC_IRQ1 0x7015 +#define USBC_IRQ2 0x7016 +#define USBC_IRQMASK1 0x7017 +#define USBC_IRQMASK2 0x7018 + +/* Register bits */ + +#define USBC_CONTROL1_MODE_DRP(r) (((r) & ~0x7) | 4) + +#define USBC_CONTROL2_UNATT_SNK BIT(0) +#define USBC_CONTROL2_UNATT_SRC BIT(1) +#define USBC_CONTROL2_DIS_ST BIT(2) + +#define USBC_CONTROL3_PD_DIS BIT(1) + +#define USBC_CC_CTRL_VCONN_EN BIT(1) + +#define USBC_STATUS1_DET_ONGOING BIT(6) +#define USBC_STATUS1_RSLT(r) ((r) & 0xf) +#define USBC_RSLT_NOTHING 0 +#define USBC_RSLT_SRC_DEFAULT 1 +#define USBC_RSLT_SRC_1_5A 2 +#define USBC_RSLT_SRC_3_0A 3 +#define USBC_RSLT_SNK 4 +#define USBC_RSLT_DEBUG_ACC 5 +#define USBC_RSLT_AUDIO_ACC 6 +#define USBC_RSLT_UNDEF 15 +#define USBC_STATUS1_ORIENT(r) (((r) >> 4) & 0x3) +#define USBC_ORIENT_NORMAL 1 +#define USBC_ORIENT_REVERSE 2 + +#define USBC_STATUS2_VBUS_REQ BIT(5) + +#define USBC_IRQ1_ADCDONE1 BIT(2) +#define USBC_IRQ1_OVERTEMP BIT(1) +#define USBC_IRQ1_SHORT BIT(0) + +#define USBC_IRQ2_CC_CHANGE BIT(7) +#define USBC_IRQ2_RX_PD BIT(6) +#define USBC_IRQ2_RX_HR BIT(5) +#define USBC_IRQ2_RX_CR BIT(4) +#define USBC_IRQ2_TX_SUCCESS BIT(3) +#define USBC_IRQ2_TX_FAIL BIT(2) + +#define USBC_IRQMASK1_ALL (USBC_IRQ1_ADCDONE1 | USBC_IRQ1_OVERTEMP | \ + USBC_IRQ1_SHORT) + +#define USBC_IRQMASK2_ALL (USBC_IRQ2_CC_CHANGE | USBC_IRQ2_RX_PD | \ + USBC_IRQ2_RX_HR | USBC_IRQ2_RX_CR | \ + USBC_IRQ2_TX_SUCCESS | USBC_IRQ2_TX_FAIL) + +struct wcove_typec { + struct mutex lock; /* device lock */ + struct device *dev; + struct regmap *regmap; + struct typec_port *port; + struct typec_capability cap; + struct typec_partner *partner; +}; + +enum wcove_typec_func { + WCOVE_FUNC_DRIVE_VBUS = 1, + WCOVE_FUNC_ORIENTATION, + WCOVE_FUNC_ROLE, + WCOVE_FUNC_DRIVE_VCONN, +}; + +enum wcove_typec_orientation { + WCOVE_ORIENTATION_NORMAL, + WCOVE_ORIENTATION_REVERSE, +}; + +enum wcove_typec_role { + WCOVE_ROLE_HOST, + WCOVE_ROLE_DEVICE, +}; + +static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49, + 0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37); + +static int wcove_typec_func(struct wcove_typec *wcove, + enum wcove_typec_func func, int param) +{ + union acpi_object *obj; + union acpi_object tmp; + union acpi_object argv4 = ACPI_INIT_DSM_ARGV4(1, &tmp); + + tmp.type = ACPI_TYPE_INTEGER; + tmp.integer.value = param; + + obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func, + &argv4); + if (!obj) { + dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__); + return -EIO; + } + + ACPI_FREE(obj); + return 0; +} + +static irqreturn_t wcove_typec_irq(int irq, void *data) +{ + enum typec_role role = TYPEC_SINK; + struct typec_partner_desc partner; + struct wcove_typec *wcove = data; + unsigned int cc1_ctrl; + unsigned int cc2_ctrl; + unsigned int cc_irq1; + unsigned int cc_irq2; + unsigned int status1; + unsigned int status2; + int ret; + + mutex_lock(&wcove->lock); + + ret = regmap_read(wcove->regmap, USBC_IRQ1, &cc_irq1); + if (ret) + goto err; + + ret = regmap_read(wcove->regmap, USBC_IRQ2, &cc_irq2); + if (ret) + goto err; + + ret = regmap_read(wcove->regmap, USBC_STATUS1, &status1); + if (ret) + goto err; + + ret = regmap_read(wcove->regmap, USBC_STATUS2, &status2); + if (ret) + goto err; + + ret = regmap_read(wcove->regmap, USBC_CC1_CTRL, &cc1_ctrl); + if (ret) + goto err; + + ret = regmap_read(wcove->regmap, USBC_CC2_CTRL, &cc2_ctrl); + if (ret) + goto err; + + if (cc_irq1) { + if (cc_irq1 & USBC_IRQ1_OVERTEMP) + dev_err(wcove->dev, "VCONN Switch Over Temperature!\n"); + if (cc_irq1 & USBC_IRQ1_SHORT) + dev_err(wcove->dev, "VCONN Switch Short Circuit!\n"); + ret = regmap_write(wcove->regmap, USBC_IRQ1, cc_irq1); + if (ret) + goto err; + } + + if (cc_irq2) { + ret = regmap_write(wcove->regmap, USBC_IRQ2, cc_irq2); + if (ret) + goto err; + /* + * Ignoring any PD communication interrupts until the PD support + * is available + */ + if (cc_irq2 & ~USBC_IRQ2_CC_CHANGE) { + dev_WARN(wcove->dev, "USB PD handling missing\n"); + goto err; + } + } + + if (status1 & USBC_STATUS1_DET_ONGOING) + goto out; + + if (USBC_STATUS1_RSLT(status1) == USBC_RSLT_NOTHING) { + if (wcove->partner) { + typec_unregister_partner(wcove->partner); + wcove->partner = NULL; + } + + wcove_typec_func(wcove, WCOVE_FUNC_ORIENTATION, + WCOVE_ORIENTATION_NORMAL); + + /* This makes sure the device controller is disconnected */ + wcove_typec_func(wcove, WCOVE_FUNC_ROLE, WCOVE_ROLE_HOST); + + /* Port to default role */ + typec_set_data_role(wcove->port, TYPEC_DEVICE); + typec_set_pwr_role(wcove->port, TYPEC_SINK); + typec_set_pwr_opmode(wcove->port, TYPEC_PWR_MODE_USB); + + goto out; + } + + if (wcove->partner) + goto out; + + switch (USBC_STATUS1_ORIENT(status1)) { + case USBC_ORIENT_NORMAL: + wcove_typec_func(wcove, WCOVE_FUNC_ORIENTATION, + WCOVE_ORIENTATION_NORMAL); + break; + case USBC_ORIENT_REVERSE: + wcove_typec_func(wcove, WCOVE_FUNC_ORIENTATION, + WCOVE_ORIENTATION_REVERSE); + default: + break; + } + + memset(&partner, 0, sizeof(partner)); + + switch (USBC_STATUS1_RSLT(status1)) { + case USBC_RSLT_SRC_DEFAULT: + typec_set_pwr_opmode(wcove->port, TYPEC_PWR_MODE_USB); + break; + case USBC_RSLT_SRC_1_5A: + typec_set_pwr_opmode(wcove->port, TYPEC_PWR_MODE_1_5A); + break; + case USBC_RSLT_SRC_3_0A: + typec_set_pwr_opmode(wcove->port, TYPEC_PWR_MODE_3_0A); + break; + case USBC_RSLT_SNK: + role = TYPEC_SOURCE; + break; + case USBC_RSLT_DEBUG_ACC: + partner.accessory = TYPEC_ACCESSORY_DEBUG; + break; + case USBC_RSLT_AUDIO_ACC: + partner.accessory = TYPEC_ACCESSORY_AUDIO; + break; + default: + dev_WARN(wcove->dev, "%s Undefined result\n", __func__); + goto err; + } + + if (role == TYPEC_SINK) { + wcove_typec_func(wcove, WCOVE_FUNC_ROLE, WCOVE_ROLE_DEVICE); + typec_set_data_role(wcove->port, TYPEC_DEVICE); + typec_set_pwr_role(wcove->port, TYPEC_SINK); + } else { + wcove_typec_func(wcove, WCOVE_FUNC_ROLE, WCOVE_ROLE_HOST); + typec_set_pwr_role(wcove->port, TYPEC_SOURCE); + typec_set_data_role(wcove->port, TYPEC_HOST); + } + + wcove->partner = typec_register_partner(wcove->port, &partner); + if (!wcove->partner) + dev_err(wcove->dev, "failed register partner\n"); +out: + /* If either CC pins is requesting VCONN, we turn it on */ + if ((cc1_ctrl & USBC_CC_CTRL_VCONN_EN) || + (cc2_ctrl & USBC_CC_CTRL_VCONN_EN)) + wcove_typec_func(wcove, WCOVE_FUNC_DRIVE_VCONN, true); + else + wcove_typec_func(wcove, WCOVE_FUNC_DRIVE_VCONN, false); + + /* Relying on the FSM to know when we need to drive VBUS. */ + wcove_typec_func(wcove, WCOVE_FUNC_DRIVE_VBUS, + !!(status2 & USBC_STATUS2_VBUS_REQ)); +err: + /* REVISIT: Clear WhiskeyCove CHGR Type-C interrupt */ + regmap_write(wcove->regmap, WCOVE_CHGRIRQ0, BIT(5)); + + mutex_unlock(&wcove->lock); + return IRQ_HANDLED; +} + +static int wcove_typec_probe(struct platform_device *pdev) +{ + struct intel_soc_pmic *pmic = dev_get_drvdata(pdev->dev.parent); + struct wcove_typec *wcove; + unsigned int val; + int ret; + + wcove = devm_kzalloc(&pdev->dev, sizeof(*wcove), GFP_KERNEL); + if (!wcove) + return -ENOMEM; + + mutex_init(&wcove->lock); + wcove->dev = &pdev->dev; + wcove->regmap = pmic->regmap; + + ret = regmap_irq_get_virq(pmic->irq_chip_data_level2, + platform_get_irq(pdev, 0)); + if (ret < 0) + return ret; + + ret = devm_request_threaded_irq(&pdev->dev, ret, NULL, + wcove_typec_irq, IRQF_ONESHOT, + "wcove_typec", wcove); + if (ret) + return ret; + + if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) { + dev_err(&pdev->dev, "Missing _DSM functions\n"); + return -ENODEV; + } + + wcove->cap.type = TYPEC_PORT_DRP; + wcove->cap.revision = USB_TYPEC_REV_1_1; + wcove->cap.prefer_role = TYPEC_NO_PREFERRED_ROLE; + + /* Make sure the PD PHY is disabled until USB PD is available */ + regmap_read(wcove->regmap, USBC_CONTROL3, &val); + regmap_write(wcove->regmap, USBC_CONTROL3, val | USBC_CONTROL3_PD_DIS); + + /* DRP mode without accessory support */ + regmap_read(wcove->regmap, USBC_CONTROL1, &val); + regmap_write(wcove->regmap, USBC_CONTROL1, USBC_CONTROL1_MODE_DRP(val)); + + wcove->port = typec_register_port(&pdev->dev, &wcove->cap); + if (!wcove->port) + return -ENODEV; + + /* Unmask everything */ + regmap_read(wcove->regmap, USBC_IRQMASK1, &val); + regmap_write(wcove->regmap, USBC_IRQMASK1, val & ~USBC_IRQMASK1_ALL); + regmap_read(wcove->regmap, USBC_IRQMASK2, &val); + regmap_write(wcove->regmap, USBC_IRQMASK2, val & ~USBC_IRQMASK2_ALL); + + platform_set_drvdata(pdev, wcove); + return 0; +} + +static int wcove_typec_remove(struct platform_device *pdev) +{ + struct wcove_typec *wcove = platform_get_drvdata(pdev); + unsigned int val; + + /* Mask everything */ + regmap_read(wcove->regmap, USBC_IRQMASK1, &val); + regmap_write(wcove->regmap, USBC_IRQMASK1, val | USBC_IRQMASK1_ALL); + regmap_read(wcove->regmap, USBC_IRQMASK2, &val); + regmap_write(wcove->regmap, USBC_IRQMASK2, val | USBC_IRQMASK2_ALL); + + typec_unregister_partner(wcove->partner); + typec_unregister_port(wcove->port); + return 0; +} + +static struct platform_driver wcove_typec_driver = { + .driver = { + .name = "bxt_wcove_usbc", + }, + .probe = wcove_typec_probe, + .remove = wcove_typec_remove, +}; + +module_platform_driver(wcove_typec_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("WhiskeyCove PMIC USB Type-C PHY driver"); +MODULE_ALIAS("platform:bxt_wcove_usbc"); diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index 5133a0792eb0418391f74e4f56486731dc7e62fc..bb0bd732e29ab7369fb3865a4682a596b5f4459a 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -491,16 +491,14 @@ static int skel_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_skel *dev; - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - size_t buffer_size; - int i; - int retval = -ENOMEM; + struct usb_endpoint_descriptor *bulk_in, *bulk_out; + int retval; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) - goto error; + return -ENOMEM; + kref_init(&dev->kref); sema_init(&dev->limit_sem, WRITES_IN_FLIGHT); mutex_init(&dev->io_mutex); @@ -513,36 +511,29 @@ static int skel_probe(struct usb_interface *interface, /* set up the endpoint information */ /* use only the first bulk-in and bulk-out endpoints */ - iface_desc = interface->cur_altsetting; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (!dev->bulk_in_endpointAddr && - usb_endpoint_is_bulk_in(endpoint)) { - /* we found a bulk in endpoint */ - buffer_size = usb_endpoint_maxp(endpoint); - dev->bulk_in_size = buffer_size; - dev->bulk_in_endpointAddr = endpoint->bEndpointAddress; - dev->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL); - if (!dev->bulk_in_buffer) - goto error; - dev->bulk_in_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!dev->bulk_in_urb) - goto error; - } - - if (!dev->bulk_out_endpointAddr && - usb_endpoint_is_bulk_out(endpoint)) { - /* we found a bulk out endpoint */ - dev->bulk_out_endpointAddr = endpoint->bEndpointAddress; - } - } - if (!(dev->bulk_in_endpointAddr && dev->bulk_out_endpointAddr)) { + retval = usb_find_common_endpoints(interface->cur_altsetting, + &bulk_in, &bulk_out, NULL, NULL); + if (retval) { dev_err(&interface->dev, "Could not find both bulk-in and bulk-out endpoints\n"); goto error; } + dev->bulk_in_size = usb_endpoint_maxp(bulk_in); + dev->bulk_in_endpointAddr = bulk_in->bEndpointAddress; + dev->bulk_in_buffer = kmalloc(dev->bulk_in_size, GFP_KERNEL); + if (!dev->bulk_in_buffer) { + retval = -ENOMEM; + goto error; + } + dev->bulk_in_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->bulk_in_urb) { + retval = -ENOMEM; + goto error; + } + + dev->bulk_out_endpointAddr = bulk_out->bEndpointAddress; + /* save our data pointer in this interface device */ usb_set_intfdata(interface, dev); @@ -563,9 +554,9 @@ static int skel_probe(struct usb_interface *interface, return 0; error: - if (dev) - /* this frees allocated memory */ - kref_put(&dev->kref, skel_delete); + /* this frees allocated memory */ + kref_put(&dev->kref, skel_delete); + return retval; } diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index e4cb9f0625e8f0b1afdf9fdf591430780d68eac2..0585078638db7ebcf72a40fc9f9ecdf3ea362512 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -235,14 +235,19 @@ static int vhci_hub_status(struct usb_hcd *hcd, char *buf) static inline void hub_descriptor(struct usb_hub_descriptor *desc) { + int width; + memset(desc, 0, sizeof(*desc)); desc->bDescriptorType = USB_DT_HUB; - desc->bDescLength = 9; desc->wHubCharacteristics = cpu_to_le16( HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM); + desc->bNbrPorts = VHCI_HC_PORTS; - desc->u.hs.DeviceRemovable[0] = 0xff; - desc->u.hs.DeviceRemovable[1] = 0xff; + BUILD_BUG_ON(VHCI_HC_PORTS > USB_MAXCHILDREN); + width = desc->bNbrPorts / 8 + 1; + desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * width; + memset(&desc->u.hs.DeviceRemovable[0], 0, width); + memset(&desc->u.hs.DeviceRemovable[width], 0xff, width); } static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, @@ -430,36 +435,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, return retval; } -static struct vhci_device *get_vdev(struct usb_device *udev) -{ - struct platform_device *pdev; - struct usb_hcd *hcd; - struct vhci_hcd *vhci; - int pdev_nr, rhport; - - if (!udev) - return NULL; - - for (pdev_nr = 0; pdev_nr < vhci_num_controllers; pdev_nr++) { - pdev = *(vhci_pdevs + pdev_nr); - if (pdev == NULL) - continue; - hcd = platform_get_drvdata(pdev); - if (hcd == NULL) - continue; - vhci = hcd_to_vhci(hcd); - for (rhport = 0; rhport < VHCI_HC_PORTS; rhport++) { - if (vhci->vdev[rhport].udev == udev) - return &vhci->vdev[rhport]; - } - } - - return NULL; -} - -static void vhci_tx_urb(struct urb *urb) +static void vhci_tx_urb(struct urb *urb, struct vhci_device *vdev) { - struct vhci_device *vdev = get_vdev(urb->dev); struct vhci_priv *priv; struct vhci_hcd *vhci; unsigned long flags; @@ -601,7 +578,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, } out: - vhci_tx_urb(urb); + vhci_tx_urb(urb, vdev); spin_unlock_irqrestore(&vhci->lock, flags); return 0; diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 6345e85822a42457f11c607effb3fbb66ce8c89f..a50cf45e530f7f0dcc76c111da56d919b9f1f1bf 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -341,6 +341,7 @@ int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size) static int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) { + struct usb_device *udev = interface_to_usbdev(iface); struct i1480_usb *i1480_usb; struct i1480 *i1480; struct device *dev = &iface->dev; @@ -352,8 +353,8 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) iface->cur_altsetting->desc.bInterfaceNumber); goto error; } - if (iface->num_altsetting > 1 - && interface_to_usbdev(iface)->descriptor.idProduct == 0xbabe) { + if (iface->num_altsetting > 1 && + le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) { /* Need altsetting #1 [HW QUIRK] or EP1 won't work */ result = usb_set_interface(interface_to_usbdev(iface), 0, 1); if (result < 0) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index cf3de91fbfe7a522456d00bff9005df9a3662ed4..63112c36ab2de129e41b70e2c9db77e17399a781 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -198,6 +198,11 @@ static long tce_iommu_register_pages(struct tce_container *container, return ret; tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); + if (!tcemem) { + mm_iommu_put(container->mm, mem); + return -ENOMEM; + } + tcemem->mem = mem; list_add(&tcemem->next, &container->prereg_list); @@ -680,7 +685,7 @@ static void tce_iommu_free_table(struct tce_container *container, unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; tce_iommu_userspace_view_free(tbl, container->mm); - tbl->it_ops->free(tbl); + iommu_tce_table_put(tbl); decrement_locked_vm(container->mm, pages); } @@ -1335,8 +1340,16 @@ static int tce_iommu_attach_group(void *iommu_data, if (!table_group->ops || !table_group->ops->take_ownership || !table_group->ops->release_ownership) { + if (container->v2) { + ret = -EPERM; + goto unlock_exit; + } ret = tce_iommu_take_ownership(container, table_group); } else { + if (!container->v2) { + ret = -EPERM; + goto unlock_exit; + } ret = tce_iommu_take_ownership_ddw(container, table_group); if (!tce_groups_attached(container) && !container->tables[0]) container->def_window_pending = true; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 32d2633092a37edf64ec4b9d2afc9fa4f12ea77d..8549cb111627f0b4bc64956df7c2b1e945bbfdc2 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -246,69 +246,46 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) return ret; } -struct vwork { - struct mm_struct *mm; - long npage; - struct work_struct work; -}; - -/* delayed decrement/increment for locked_vm */ -static void vfio_lock_acct_bg(struct work_struct *work) +static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) { - struct vwork *vwork = container_of(work, struct vwork, work); - struct mm_struct *mm; - - mm = vwork->mm; - down_write(&mm->mmap_sem); - mm->locked_vm += vwork->npage; - up_write(&mm->mmap_sem); - mmput(mm); - kfree(vwork); -} - -static void vfio_lock_acct(struct task_struct *task, long npage) -{ - struct vwork *vwork; struct mm_struct *mm; bool is_current; + int ret; if (!npage) - return; + return 0; is_current = (task->mm == current->mm); mm = is_current ? task->mm : get_task_mm(task); if (!mm) - return; /* process exited */ + return -ESRCH; /* process exited */ - if (down_write_trylock(&mm->mmap_sem)) { - mm->locked_vm += npage; - up_write(&mm->mmap_sem); - if (!is_current) - mmput(mm); - return; - } + ret = down_write_killable(&mm->mmap_sem); + if (!ret) { + if (npage > 0) { + if (lock_cap ? !*lock_cap : + !has_capability(task, CAP_IPC_LOCK)) { + unsigned long limit; + + limit = task_rlimit(task, + RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (mm->locked_vm + npage > limit) + ret = -ENOMEM; + } + } + + if (!ret) + mm->locked_vm += npage; - if (is_current) { - mm = get_task_mm(task); - if (!mm) - return; + up_write(&mm->mmap_sem); } - /* - * Couldn't get mmap_sem lock, so must setup to update - * mm->locked_vm later. If locked_vm were atomic, we - * wouldn't need this silliness - */ - vwork = kmalloc(sizeof(struct vwork), GFP_KERNEL); - if (WARN_ON(!vwork)) { + if (!is_current) mmput(mm); - return; - } - INIT_WORK(&vwork->work, vfio_lock_acct_bg); - vwork->mm = mm; - vwork->npage = npage; - schedule_work(&vwork->work); + + return ret; } /* @@ -403,10 +380,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, * first page and all consecutive pages with the same locking. */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, - long npage, unsigned long *pfn_base) + long npage, unsigned long *pfn_base, + bool lock_cap, unsigned long limit) { - unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - bool lock_cap = capable(CAP_IPC_LOCK); + unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; bool rsvd; dma_addr_t iova = vaddr - dma->vaddr + dma->iova; @@ -442,8 +419,6 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, /* Lock all the consecutive pages from pfn_base */ for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - unsigned long pfn = 0; - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); if (ret) break; @@ -460,14 +435,25 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, put_pfn(pfn, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); - break; + ret = -ENOMEM; + goto unpin_out; } lock_acct++; } } out: - vfio_lock_acct(current, lock_acct); + ret = vfio_lock_acct(current, lock_acct, &lock_cap); + +unpin_out: + if (ret) { + if (!rsvd) { + for (pfn = *pfn_base ; pinned ; pfn++, pinned--) + put_pfn(pfn, dma->prot); + } + + return ret; + } return pinned; } @@ -488,7 +474,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, } if (do_accounting) - vfio_lock_acct(dma->task, locked - unlocked); + vfio_lock_acct(dma->task, locked - unlocked, NULL); return unlocked; } @@ -496,37 +482,26 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { - unsigned long limit; - bool lock_cap = has_capability(dma->task, CAP_IPC_LOCK); struct mm_struct *mm; int ret; - bool rsvd; mm = get_task_mm(dma->task); if (!mm) return -ENODEV; ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); - if (ret) - goto pin_page_exit; - - rsvd = is_invalid_reserved_pfn(*pfn_base); - limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (!rsvd && !lock_cap && mm->locked_vm + 1 > limit) { - put_pfn(*pfn_base, dma->prot); - pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK (%ld) exceeded\n", - __func__, dma->task->comm, task_pid_nr(dma->task), - limit << PAGE_SHIFT); - ret = -ENOMEM; - goto pin_page_exit; + if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { + ret = vfio_lock_acct(dma->task, 1, NULL); + if (ret) { + put_pfn(*pfn_base, dma->prot); + if (ret == -ENOMEM) + pr_warn("%s: Task %s (%d) RLIMIT_MEMLOCK " + "(%ld) exceeded\n", __func__, + dma->task->comm, task_pid_nr(dma->task), + task_rlimit(dma->task, RLIMIT_MEMLOCK)); + } } - if (!rsvd && do_accounting) - vfio_lock_acct(dma->task, 1); - ret = 1; - -pin_page_exit: mmput(mm); return ret; } @@ -543,7 +518,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); if (do_accounting) - vfio_lock_acct(dma->task, -unlocked); + vfio_lock_acct(dma->task, -unlocked, NULL); return unlocked; } @@ -606,10 +581,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, remote_vaddr = dma->vaddr + iova - dma->iova; ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], do_accounting); - if (ret <= 0) { - WARN_ON(!ret); + if (ret) goto pin_unwind; - } ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); if (ret) { @@ -740,7 +713,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->iommu_mapped = false; if (do_accounting) { - vfio_lock_acct(dma->task, -unlocked); + vfio_lock_acct(dma->task, -unlocked, NULL); return 0; } return unlocked; @@ -951,13 +924,15 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, unsigned long vaddr = dma->vaddr; size_t size = map_size; long npage; - unsigned long pfn; + unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); int ret = 0; while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn); + size >> PAGE_SHIFT, &pfn, + lock_cap, limit); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1067,6 +1042,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, { struct vfio_domain *d; struct rb_node *n; + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + bool lock_cap = capable(CAP_IPC_LOCK); int ret; /* Arbitrarily pick the first domain in the list for lookups */ @@ -1113,7 +1090,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn); + &pfn, lock_cap, + limit); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1382,7 +1360,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) if (!is_invalid_reserved_pfn(vpfn->pfn)) locked++; } - vfio_lock_acct(dma->task, locked - unlocked); + vfio_lock_acct(dma->task, locked - unlocked, NULL); } } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9b519897cc17b8f31236bd51bfa283fbf5ada319..f61f852d6cfd114978036208bf2b0a5c841231d4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -817,12 +817,9 @@ static int vhost_net_open(struct inode *inode, struct file *f) struct vhost_virtqueue **vqs; int i; - n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!n) { - n = vmalloc(sizeof *n); - if (!n) - return -ENOMEM; - } + n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT); + if (!n) + return -ENOMEM; vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); if (!vqs) { kvfree(n); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f0ba362d4c101aa970a12943d1e69944d54058b7..042030e5a0357ad698181f817e6a75ca7c3087e3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -534,18 +534,9 @@ long vhost_dev_set_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_set_owner); -static void *vhost_kvzalloc(unsigned long size) -{ - void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - - if (!n) - n = vzalloc(size); - return n; -} - struct vhost_umem *vhost_dev_reset_owner_prepare(void) { - return vhost_kvzalloc(sizeof(struct vhost_umem)); + return kvzalloc(sizeof(struct vhost_umem), GFP_KERNEL); } EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); @@ -1276,7 +1267,7 @@ EXPORT_SYMBOL_GPL(vhost_vq_access_ok); static struct vhost_umem *vhost_umem_alloc(void) { - struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem)); + struct vhost_umem *umem = kvzalloc(sizeof(*umem), GFP_KERNEL); if (!umem) return NULL; @@ -1302,7 +1293,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EOPNOTSUPP; if (mem.nregions > max_mem_regions) return -E2BIG; - newmem = vhost_kvzalloc(size + mem.nregions * sizeof(*m->regions)); + newmem = kvzalloc(size + mem.nregions * sizeof(*m->regions), GFP_KERNEL); if (!newmem) return -ENOMEM; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 44eed8eb0725b25e3c9765e19387e7c338ab9bbb..3acef3c5d8edeca06d09e4b5d7e4ddeed0c8936c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -176,6 +176,11 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, restart_tx = true; } + /* Deliver to monitoring devices all correctly transmitted + * packets. + */ + virtio_transport_deliver_tap_pkt(pkt); + virtio_transport_free_pkt(pkt); } if (added) @@ -383,6 +388,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) len = pkt->len; + /* Deliver to monitoring devices all received packets */ + virtio_transport_deliver_tap_pkt(pkt); + /* Only accept correctly addressed packets */ if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) virtio_transport_recv_pkt(pkt); @@ -500,12 +508,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) /* This struct is large and allocation could fail, fall back to vmalloc * if there is no other way. */ - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!vsock) { - vsock = vmalloc(sizeof(*vsock)); - if (!vsock) - return -ENOMEM; - } + vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT); + if (!vsock) + return -ENOMEM; vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL); if (!vqs) { diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 9ad3c17d645689b79e56364377a71671eb4aac23..445b2c230b56ade5695c9e074ed9548b1a77caff 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_VGASTATE) += vgastate.o obj-$(CONFIG_HDMI) += hdmi.o obj-$(CONFIG_VT) += console/ +obj-$(CONFIG_FB_STI) += console/ obj-$(CONFIG_LOGO) += logo/ obj-y += backlight/ diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig index 5ffa4b4e26c0e874d3f63068456b3d72d8b4beb5..4e1d2ad50ba142950e0048d986ab7e0df2eafece 100644 --- a/drivers/video/backlight/Kconfig +++ b/drivers/video/backlight/Kconfig @@ -460,6 +460,13 @@ config BACKLIGHT_BD6107 help If you have a Rohm BD6107 say Y to enable the backlight driver. +config BACKLIGHT_ARCXCNN + tristate "Backlight driver for the Arctic Sands ARCxCnnnn family" + depends on I2C + help + If you have an ARCxCnnnn family backlight say Y to enable + the backlight driver. + endif # BACKLIGHT_CLASS_DEVICE endif # BACKLIGHT_LCD_SUPPORT diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 16ec534cff3044209adbae3495c19d97fee3ef73..8905129691e84a2fe42acd625f32b0a01772097c 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -55,3 +55,4 @@ obj-$(CONFIG_BACKLIGHT_SKY81452) += sky81452-backlight.o obj-$(CONFIG_BACKLIGHT_TOSA) += tosa_bl.o obj-$(CONFIG_BACKLIGHT_TPS65217) += tps65217_bl.o obj-$(CONFIG_BACKLIGHT_WM831X) += wm831x_bl.o +obj-$(CONFIG_BACKLIGHT_ARCXCNN) += arcxcnn_bl.o diff --git a/drivers/video/backlight/arcxcnn_bl.c b/drivers/video/backlight/arcxcnn_bl.c new file mode 100644 index 0000000000000000000000000000000000000000..dec790de72ff4a8496c28be145c834353e06cebc --- /dev/null +++ b/drivers/video/backlight/arcxcnn_bl.c @@ -0,0 +1,419 @@ +/* + * Backlight driver for ArcticSand ARC_X_C_0N_0N Devices + * + * Copyright 2016 ArcticSand, Inc. + * Author : Brian Dodge + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include + +enum arcxcnn_chip_id { + ARC2C0608 +}; + +/** + * struct arcxcnn_platform_data + * @name : Backlight driver name (NULL will use default) + * @initial_brightness : initial value of backlight brightness + * @leden : initial LED string enables, upper bit is global on/off + * @led_config_0 : fading speed (period between intensity steps) + * @led_config_1 : misc settings, see datasheet + * @dim_freq : pwm dimming frequency if in pwm mode + * @comp_config : misc config, see datasheet + * @filter_config : RC/PWM filter config, see datasheet + * @trim_config : full scale current trim, see datasheet + */ +struct arcxcnn_platform_data { + const char *name; + u16 initial_brightness; + u8 leden; + u8 led_config_0; + u8 led_config_1; + u8 dim_freq; + u8 comp_config; + u8 filter_config; + u8 trim_config; +}; + +#define ARCXCNN_CMD 0x00 /* Command Register */ +#define ARCXCNN_CMD_STDBY 0x80 /* I2C Standby */ +#define ARCXCNN_CMD_RESET 0x40 /* Reset */ +#define ARCXCNN_CMD_BOOST 0x10 /* Boost */ +#define ARCXCNN_CMD_OVP_MASK 0x0C /* --- Over Voltage Threshold */ +#define ARCXCNN_CMD_OVP_XXV 0x0C /* Over Voltage Threshold */ +#define ARCXCNN_CMD_OVP_20V 0x08 /* 20v Over Voltage Threshold */ +#define ARCXCNN_CMD_OVP_24V 0x04 /* 24v Over Voltage Threshold */ +#define ARCXCNN_CMD_OVP_31V 0x00 /* 31.4v Over Voltage Threshold */ +#define ARCXCNN_CMD_EXT_COMP 0x01 /* part (0) or full (1) ext. comp */ + +#define ARCXCNN_CONFIG 0x01 /* Configuration */ +#define ARCXCNN_STATUS1 0x02 /* Status 1 */ +#define ARCXCNN_STATUS2 0x03 /* Status 2 */ +#define ARCXCNN_FADECTRL 0x04 /* Fading Control */ +#define ARCXCNN_ILED_CONFIG 0x05 /* ILED Configuration */ +#define ARCXCNN_ILED_DIM_PWM 0x00 /* config dim mode pwm */ +#define ARCXCNN_ILED_DIM_INT 0x04 /* config dim mode internal */ +#define ARCXCNN_LEDEN 0x06 /* LED Enable Register */ +#define ARCXCNN_LEDEN_ISETEXT 0x80 /* Full-scale current set extern */ +#define ARCXCNN_LEDEN_MASK 0x3F /* LED string enables mask */ +#define ARCXCNN_LEDEN_BITS 0x06 /* Bits of LED string enables */ +#define ARCXCNN_LEDEN_LED1 0x01 +#define ARCXCNN_LEDEN_LED2 0x02 +#define ARCXCNN_LEDEN_LED3 0x04 +#define ARCXCNN_LEDEN_LED4 0x08 +#define ARCXCNN_LEDEN_LED5 0x10 +#define ARCXCNN_LEDEN_LED6 0x20 + +#define ARCXCNN_WLED_ISET_LSB 0x07 /* LED ISET LSB (in upper nibble) */ +#define ARCXCNN_WLED_ISET_LSB_SHIFT 0x04 /* ISET LSB Left Shift */ +#define ARCXCNN_WLED_ISET_MSB 0x08 /* LED ISET MSB (8 bits) */ + +#define ARCXCNN_DIMFREQ 0x09 +#define ARCXCNN_COMP_CONFIG 0x0A +#define ARCXCNN_FILT_CONFIG 0x0B +#define ARCXCNN_IMAXTUNE 0x0C +#define ARCXCNN_ID_MSB 0x1E +#define ARCXCNN_ID_LSB 0x1F + +#define MAX_BRIGHTNESS 4095 +#define INIT_BRIGHT 60 + +struct arcxcnn { + struct i2c_client *client; + struct backlight_device *bl; + struct device *dev; + struct arcxcnn_platform_data *pdata; +}; + +static int arcxcnn_update_field(struct arcxcnn *lp, u8 reg, u8 mask, u8 data) +{ + int ret; + u8 tmp; + + ret = i2c_smbus_read_byte_data(lp->client, reg); + if (ret < 0) { + dev_err(lp->dev, "failed to read 0x%.2x\n", reg); + return ret; + } + + tmp = (u8)ret; + tmp &= ~mask; + tmp |= data & mask; + + return i2c_smbus_write_byte_data(lp->client, reg, tmp); +} + +static int arcxcnn_set_brightness(struct arcxcnn *lp, u32 brightness) +{ + int ret; + u8 val; + + /* lower nibble of brightness goes in upper nibble of LSB register */ + val = (brightness & 0xF) << ARCXCNN_WLED_ISET_LSB_SHIFT; + ret = i2c_smbus_write_byte_data(lp->client, + ARCXCNN_WLED_ISET_LSB, val); + if (ret < 0) + return ret; + + /* remaining 8 bits of brightness go in MSB register */ + val = (brightness >> 4); + return i2c_smbus_write_byte_data(lp->client, + ARCXCNN_WLED_ISET_MSB, val); +} + +static int arcxcnn_bl_update_status(struct backlight_device *bl) +{ + struct arcxcnn *lp = bl_get_data(bl); + u32 brightness = bl->props.brightness; + int ret; + + if (bl->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK)) + brightness = 0; + + ret = arcxcnn_set_brightness(lp, brightness); + if (ret) + return ret; + + /* set power-on/off/save modes */ + return arcxcnn_update_field(lp, ARCXCNN_CMD, ARCXCNN_CMD_STDBY, + (bl->props.power == 0) ? 0 : ARCXCNN_CMD_STDBY); +} + +static const struct backlight_ops arcxcnn_bl_ops = { + .options = BL_CORE_SUSPENDRESUME, + .update_status = arcxcnn_bl_update_status, +}; + +static int arcxcnn_backlight_register(struct arcxcnn *lp) +{ + struct backlight_properties *props; + const char *name = lp->pdata->name ? : "arctic_bl"; + + props = devm_kzalloc(lp->dev, sizeof(*props), GFP_KERNEL); + if (!props) + return -ENOMEM; + + props->type = BACKLIGHT_PLATFORM; + props->max_brightness = MAX_BRIGHTNESS; + + if (lp->pdata->initial_brightness > props->max_brightness) + lp->pdata->initial_brightness = props->max_brightness; + + props->brightness = lp->pdata->initial_brightness; + + lp->bl = devm_backlight_device_register(lp->dev, name, lp->dev, lp, + &arcxcnn_bl_ops, props); + return PTR_ERR_OR_ZERO(lp->bl); +} + +static void arcxcnn_parse_dt(struct arcxcnn *lp) +{ + struct device *dev = lp->dev; + struct device_node *node = dev->of_node; + u32 prog_val, num_entry, entry, sources[ARCXCNN_LEDEN_BITS]; + int ret; + + /* device tree entry isn't required, defaults are OK */ + if (!node) + return; + + ret = of_property_read_string(node, "label", &lp->pdata->name); + if (ret < 0) + lp->pdata->name = NULL; + + ret = of_property_read_u32(node, "default-brightness", &prog_val); + if (ret == 0) + lp->pdata->initial_brightness = prog_val; + + ret = of_property_read_u32(node, "arc,led-config-0", &prog_val); + if (ret == 0) + lp->pdata->led_config_0 = (u8)prog_val; + + ret = of_property_read_u32(node, "arc,led-config-1", &prog_val); + if (ret == 0) + lp->pdata->led_config_1 = (u8)prog_val; + + ret = of_property_read_u32(node, "arc,dim-freq", &prog_val); + if (ret == 0) + lp->pdata->dim_freq = (u8)prog_val; + + ret = of_property_read_u32(node, "arc,comp-config", &prog_val); + if (ret == 0) + lp->pdata->comp_config = (u8)prog_val; + + ret = of_property_read_u32(node, "arc,filter-config", &prog_val); + if (ret == 0) + lp->pdata->filter_config = (u8)prog_val; + + ret = of_property_read_u32(node, "arc,trim-config", &prog_val); + if (ret == 0) + lp->pdata->trim_config = (u8)prog_val; + + ret = of_property_count_u32_elems(node, "led-sources"); + if (ret < 0) { + lp->pdata->leden = ARCXCNN_LEDEN_MASK; /* all on is default */ + } else { + num_entry = ret; + if (num_entry > ARCXCNN_LEDEN_BITS) + num_entry = ARCXCNN_LEDEN_BITS; + + ret = of_property_read_u32_array(node, "led-sources", sources, + num_entry); + if (ret < 0) { + dev_err(dev, "led-sources node is invalid.\n"); + return; + } + + lp->pdata->leden = 0; + + /* for each enable in source, set bit in led enable */ + for (entry = 0; entry < num_entry; entry++) { + u8 onbit = 1 << sources[entry]; + + lp->pdata->leden |= onbit; + } + } +} + +static int arcxcnn_probe(struct i2c_client *cl, const struct i2c_device_id *id) +{ + struct arcxcnn *lp; + int ret; + + if (!i2c_check_functionality(cl->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -EIO; + + lp = devm_kzalloc(&cl->dev, sizeof(*lp), GFP_KERNEL); + if (!lp) + return -ENOMEM; + + lp->client = cl; + lp->dev = &cl->dev; + lp->pdata = dev_get_platdata(&cl->dev); + + /* reset the device */ + ret = i2c_smbus_write_byte_data(lp->client, + ARCXCNN_CMD, ARCXCNN_CMD_RESET); + if (ret) + goto probe_err; + + if (!lp->pdata) { + lp->pdata = devm_kzalloc(lp->dev, + sizeof(*lp->pdata), GFP_KERNEL); + if (!lp->pdata) + return -ENOMEM; + + /* Setup defaults based on power-on defaults */ + lp->pdata->name = NULL; + lp->pdata->initial_brightness = INIT_BRIGHT; + lp->pdata->leden = ARCXCNN_LEDEN_MASK; + + lp->pdata->led_config_0 = i2c_smbus_read_byte_data( + lp->client, ARCXCNN_FADECTRL); + + lp->pdata->led_config_1 = i2c_smbus_read_byte_data( + lp->client, ARCXCNN_ILED_CONFIG); + /* insure dim mode is not default pwm */ + lp->pdata->led_config_1 |= ARCXCNN_ILED_DIM_INT; + + lp->pdata->dim_freq = i2c_smbus_read_byte_data( + lp->client, ARCXCNN_DIMFREQ); + + lp->pdata->comp_config = i2c_smbus_read_byte_data( + lp->client, ARCXCNN_COMP_CONFIG); + + lp->pdata->filter_config = i2c_smbus_read_byte_data( + lp->client, ARCXCNN_FILT_CONFIG); + + lp->pdata->trim_config = i2c_smbus_read_byte_data( + lp->client, ARCXCNN_IMAXTUNE); + + if (IS_ENABLED(CONFIG_OF)) + arcxcnn_parse_dt(lp); + } + + i2c_set_clientdata(cl, lp); + + /* constrain settings to what is possible */ + if (lp->pdata->initial_brightness > MAX_BRIGHTNESS) + lp->pdata->initial_brightness = MAX_BRIGHTNESS; + + /* set initial brightness */ + ret = arcxcnn_set_brightness(lp, lp->pdata->initial_brightness); + if (ret) + goto probe_err; + + /* set other register values directly */ + ret = i2c_smbus_write_byte_data(lp->client, ARCXCNN_FADECTRL, + lp->pdata->led_config_0); + if (ret) + goto probe_err; + + ret = i2c_smbus_write_byte_data(lp->client, ARCXCNN_ILED_CONFIG, + lp->pdata->led_config_1); + if (ret) + goto probe_err; + + ret = i2c_smbus_write_byte_data(lp->client, ARCXCNN_DIMFREQ, + lp->pdata->dim_freq); + if (ret) + goto probe_err; + + ret = i2c_smbus_write_byte_data(lp->client, ARCXCNN_COMP_CONFIG, + lp->pdata->comp_config); + if (ret) + goto probe_err; + + ret = i2c_smbus_write_byte_data(lp->client, ARCXCNN_FILT_CONFIG, + lp->pdata->filter_config); + if (ret) + goto probe_err; + + ret = i2c_smbus_write_byte_data(lp->client, ARCXCNN_IMAXTUNE, + lp->pdata->trim_config); + if (ret) + goto probe_err; + + /* set initial LED Enables */ + arcxcnn_update_field(lp, ARCXCNN_LEDEN, + ARCXCNN_LEDEN_MASK, lp->pdata->leden); + + ret = arcxcnn_backlight_register(lp); + if (ret) + goto probe_register_err; + + backlight_update_status(lp->bl); + + return 0; + +probe_register_err: + dev_err(lp->dev, + "failed to register backlight.\n"); + +probe_err: + dev_err(lp->dev, + "failure ret: %d\n", ret); + return ret; +} + +static int arcxcnn_remove(struct i2c_client *cl) +{ + struct arcxcnn *lp = i2c_get_clientdata(cl); + + /* disable all strings (ignore errors) */ + i2c_smbus_write_byte_data(lp->client, + ARCXCNN_LEDEN, 0x00); + /* reset the device (ignore errors) */ + i2c_smbus_write_byte_data(lp->client, + ARCXCNN_CMD, ARCXCNN_CMD_RESET); + + lp->bl->props.brightness = 0; + + backlight_update_status(lp->bl); + + return 0; +} + +static const struct of_device_id arcxcnn_dt_ids[] = { + { .compatible = "arc,arc2c0608" }, + { } +}; +MODULE_DEVICE_TABLE(of, arcxcnn_dt_ids); + +static const struct i2c_device_id arcxcnn_ids[] = { + {"arc2c0608", ARC2C0608}, + { } +}; +MODULE_DEVICE_TABLE(i2c, arcxcnn_ids); + +static struct i2c_driver arcxcnn_driver = { + .driver = { + .name = "arcxcnn_bl", + .of_match_table = of_match_ptr(arcxcnn_dt_ids), + }, + .probe = arcxcnn_probe, + .remove = arcxcnn_remove, + .id_table = arcxcnn_ids, +}; +module_i2c_driver(arcxcnn_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Brian Dodge "); +MODULE_DESCRIPTION("ARCXCNN Backlight driver"); diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index d7efcb632f7d9dde08b0a494c455725b05d55af0..002f1ce22bd02032062924b19d645ebc25302c93 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -297,14 +297,15 @@ static int pwm_backlight_probe(struct platform_device *pdev) } /* - * If the GPIO is configured as input, change the direction to output - * and set the GPIO as active. + * If the GPIO is not known to be already configured as output, that + * is, if gpiod_get_direction returns either GPIOF_DIR_IN or -EINVAL, + * change the direction to output and set the GPIO as active. * Do not force the GPIO to active when it was already output as it * could cause backlight flickering or we would enable the backlight too * early. Leave the decision of the initial backlight state for later. */ if (pb->enable_gpio && - gpiod_get_direction(pb->enable_gpio) == GPIOF_DIR_IN) + gpiod_get_direction(pb->enable_gpio) != GPIOF_DIR_OUT) gpiod_direction_output(pb->enable_gpio, 1); pb->power_supply = devm_regulator_get(&pdev->dev, "power"); diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index 5b71bd905a6066ee53e3615d82b2d889a031d10c..2111d06f8c81a5260235befb029d59901e7bde28 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -6,7 +6,7 @@ menu "Console display driver support" config VGA_CONSOLE bool "VGA text console" if EXPERT || !X86 - depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && \ + depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC && !FRV && \ !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && !CRIS && \ (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \ !ARM64 && !ARC && !MICROBLAZE && !OPENRISC diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 922e4eaed9c5b5278572a79eb1cfc14e267a8bd5..5c6696bb56da89d1ca86f00f16c3c408303ca5ad 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -689,8 +689,6 @@ config FB_STI select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT - select STI_CONSOLE - select VT default y ---help--- STI refers to the HP "Standard Text Interface" which is a set of diff --git a/drivers/video/fbdev/acornfb.c b/drivers/video/fbdev/acornfb.c index fb75b7e5a19ab52748e4a1980d56046e15716908..0c325b4da61de876d998463389ef040373be8645 100644 --- a/drivers/video/fbdev/acornfb.c +++ b/drivers/video/fbdev/acornfb.c @@ -101,7 +101,7 @@ extern unsigned int vram_size; /* set by setup.c */ #ifdef HAS_VIDC20 #include -#define MAX_SIZE 2*1024*1024 +#define MAX_SIZE (2*1024*1024) /* VIDC20 has a different set of rules from the VIDC: * hcr : must be multiple of 4 @@ -162,7 +162,7 @@ static void acornfb_set_timing(struct fb_info *info) if (memcmp(¤t_vidc, &vidc, sizeof(vidc))) { current_vidc = vidc; - vidc_writel(VIDC20_CTRL| vidc.control); + vidc_writel(VIDC20_CTRL | vidc.control); vidc_writel(0xd0000000 | vidc.pll_ctl); vidc_writel(0x80000000 | vidc.h_cycle); vidc_writel(0x81000000 | vidc.h_sync_width); @@ -297,7 +297,7 @@ acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, pal.p = 0; vidc_writel(0x10000000); for (i = 0; i < 256; i += 1) { - pal.vidc20.red = current_par.palette[ i & 31].vidc20.red; + pal.vidc20.red = current_par.palette[i & 31].vidc20.red; pal.vidc20.green = current_par.palette[(i >> 1) & 31].vidc20.green; pal.vidc20.blue = current_par.palette[(i >> 2) & 31].vidc20.blue; vidc_writel(pal.p); @@ -1043,8 +1043,7 @@ static int acornfb_probe(struct platform_device *dev) base = dma_alloc_wc(current_par.dev, size, &handle, GFP_KERNEL); if (base == NULL) { - printk(KERN_ERR "acornfb: unable to allocate screen " - "memory\n"); + printk(KERN_ERR "acornfb: unable to allocate screen memory\n"); return -ENOMEM; } @@ -1103,8 +1102,7 @@ static int acornfb_probe(struct platform_device *dev) v_sync = h_sync / (fb_info.var.yres + fb_info.var.upper_margin + fb_info.var.lower_margin + fb_info.var.vsync_len); - printk(KERN_INFO "Acornfb: %dkB %cRAM, %s, using %dx%d, " - "%d.%03dkHz, %dHz\n", + printk(KERN_INFO "Acornfb: %dkB %cRAM, %s, using %dx%d, %d.%03dkHz, %dHz\n", fb_info.fix.smem_len / 1024, current_par.using_vram ? 'V' : 'D', VIDC_NAME, fb_info.var.xres, fb_info.var.yres, diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 0fab92c628280359ec79023b545ff893c12a2d6c..ffc2c33c6cef520a9aec6473b1b5fd1af96039fd 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -881,8 +881,8 @@ static int clcdfb_of_dma_setup(struct clcd_fb *fb) if (err) return err; - framesize = fb->panel->mode.xres * fb->panel->mode.yres * - fb->panel->bpp / 8; + framesize = PAGE_ALIGN(fb->panel->mode.xres * fb->panel->mode.yres * + fb->panel->bpp / 8); fb->fb.screen_base = dma_alloc_coherent(&fb->dev->dev, framesize, &dma, GFP_KERNEL); if (!fb->fb.screen_base) diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c index 1928cb2b5386fad37b55f1e6995cfaef88b979e2..7e87d0d616581dbf2e58045b424198524d28b3f4 100644 --- a/drivers/video/fbdev/arcfb.c +++ b/drivers/video/fbdev/arcfb.c @@ -645,17 +645,17 @@ module_param(nosplash, uint, 0); MODULE_PARM_DESC(nosplash, "Disable doing the splash screen"); module_param(arcfb_enable, uint, 0); MODULE_PARM_DESC(arcfb_enable, "Enable communication with Arc board"); -module_param(dio_addr, ulong, 0); +module_param_hw(dio_addr, ulong, ioport, 0); MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480"); -module_param(cio_addr, ulong, 0); +module_param_hw(cio_addr, ulong, ioport, 0); MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400"); -module_param(c2io_addr, ulong, 0); +module_param_hw(c2io_addr, ulong, ioport, 0); MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408"); module_param(splashval, ulong, 0); MODULE_PARM_DESC(splashval, "Splash pattern: 0xFF is black, 0x00 is green"); module_param(tuhold, ulong, 0); MODULE_PARM_DESC(tuhold, "Time to hold between strobing data to Arc board"); -module_param(irq, uint, 0); +module_param_hw(irq, uint, irq, 0); MODULE_PARM_DESC(irq, "IRQ for the Arc board"); module_init(arcfb_init); diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c index 218339a4edaac46e5b223d07f12f335b8966c66f..6b4c7872b37519f9fe6238d4fa505d76e14d4c92 100644 --- a/drivers/video/fbdev/aty/radeon_base.c +++ b/drivers/video/fbdev/aty/radeon_base.c @@ -2453,8 +2453,8 @@ static int radeonfb_pci_register(struct pci_dev *pdev, err |= sysfs_create_bin_file(&rinfo->pdev->dev.kobj, &edid2_attr); if (err) - pr_warning("%s() Creating sysfs files failed, continuing\n", - __func__); + pr_warn("%s() Creating sysfs files failed, continuing\n", + __func__); /* save current mode regs before we switch into the new one * so we can restore this upon __exit diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index 62c0cf79674fec39137fe39552288b26e23773e5..41d7979d81c53df3d459d39939798659386444a8 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -1048,7 +1048,7 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE; i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) - if (PIXEL_CLOCK) + if (PIXEL_CLOCK != 0) edt[num++] = block - edid; /* Yikes, EDID data is totally useless */ @@ -1073,9 +1073,9 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) { int idx = svd[i - specs->modedb_len - num]; if (!idx || idx >= ARRAY_SIZE(cea_modes)) { - pr_warning("Reserved SVD code %d\n", idx); + pr_warn("Reserved SVD code %d\n", idx); } else if (!cea_modes[idx].xres) { - pr_warning("Unimplemented SVD code %d\n", idx); + pr_warn("Unimplemented SVD code %d\n", idx); } else { memcpy(&m[i], cea_modes + idx, sizeof(m[i])); pr_debug("Adding SVD #%d: %ux%u@%u\n", idx, diff --git a/drivers/video/fbdev/i810/i810_main.c b/drivers/video/fbdev/i810/i810_main.c index 483ab2592d0c056e9920834303b722105e5eca16..2488baab7c89253b4606faf3b5ae3877939b9201 100644 --- a/drivers/video/fbdev/i810/i810_main.c +++ b/drivers/video/fbdev/i810/i810_main.c @@ -81,7 +81,7 @@ static u32 voffset; static int i810fb_cursor(struct fb_info *info, struct fb_cursor *cursor); static int i810fb_init_pci(struct pci_dev *dev, const struct pci_device_id *entry); -static void __exit i810fb_remove_pci(struct pci_dev *dev); +static void i810fb_remove_pci(struct pci_dev *dev); static int i810fb_resume(struct pci_dev *dev); static int i810fb_suspend(struct pci_dev *dev, pm_message_t state); @@ -128,7 +128,7 @@ static struct pci_driver i810fb_driver = { .name = "i810fb", .id_table = i810fb_pci_tbl, .probe = i810fb_init_pci, - .remove = __exit_p(i810fb_remove_pci), + .remove = i810fb_remove_pci, .suspend = i810fb_suspend, .resume = i810fb_resume, }; @@ -2123,7 +2123,7 @@ static void i810fb_release_resource(struct fb_info *info, } -static void __exit i810fb_remove_pci(struct pci_dev *dev) +static void i810fb_remove_pci(struct pci_dev *dev) { struct fb_info *info = pci_get_drvdata(dev); struct i810fb_par *par = info->par; diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index 1b0faadb30801921d74231b6fc788fac5b67d9cd..c166e0725be5dab13e9a685a93ea7ea9c23a3351 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -117,6 +117,9 @@ #define IMXFB_LSCR1_DEFAULT 0x00120300 +#define LCDC_LAUSCR 0x80 +#define LAUSCR_AUS_MODE (1<<31) + /* Used fb-mode. Can be set on kernel command line, therefore file-static. */ static const char *fb_mode; @@ -158,6 +161,7 @@ struct imxfb_info { dma_addr_t dbar2; u_int pcr; + u_int lauscr; u_int pwmr; u_int lscr1; u_int dmacr; @@ -422,6 +426,11 @@ static int imxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) pcr |= imxfb_mode->pcr & ~(0x3f | (7 << 25)); fbi->pcr = pcr; + /* + * The LCDC AUS Mode Control Register does not exist on imx1. + */ + if (!is_imx1_fb(fbi) && imxfb_mode->aus_mode) + fbi->lauscr = LAUSCR_AUS_MODE; /* * Copy the RGB parameters for this display @@ -638,6 +647,9 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf if (fbi->dmacr) writel(fbi->dmacr, fbi->regs + LCDC_DMACR); + if (fbi->lauscr) + writel(fbi->lauscr, fbi->regs + LCDC_LAUSCR); + return 0; } @@ -734,6 +746,11 @@ static int imxfb_of_read_mode(struct device *dev, struct device_node *np, imxfb_mode->bpp = bpp; imxfb_mode->pcr = pcr; + /* + * fsl,aus-mode is optional + */ + imxfb_mode->aus_mode = of_property_read_bool(np, "fsl,aus-mode"); + return 0; } diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c index ff2a5d2023e1f88078c90b56746303783cbd7429..6b444400a86c2252c7f0c165869113f8393ca1a3 100644 --- a/drivers/video/fbdev/intelfb/intelfbdrv.c +++ b/drivers/video/fbdev/intelfb/intelfbdrv.c @@ -934,7 +934,7 @@ static __inline__ int var_to_refresh(const struct fb_var_screeninfo *var) } /*************************************************************** - * Various intialisation functions * + * Various initialisation functions * ***************************************************************/ static void get_initial_mode(struct intelfb_info *dinfo) diff --git a/drivers/video/fbdev/n411.c b/drivers/video/fbdev/n411.c index 053deacad7cc12d3415e7e5d3b6c0b55e91e22ea..a3677313396e21fa78c266ff5df7a972ccc360a1 100644 --- a/drivers/video/fbdev/n411.c +++ b/drivers/video/fbdev/n411.c @@ -193,11 +193,11 @@ module_exit(n411_exit); module_param(nosplash, uint, 0); MODULE_PARM_DESC(nosplash, "Disable doing the splash screen"); -module_param(dio_addr, ulong, 0); +module_param_hw(dio_addr, ulong, ioport, 0); MODULE_PARM_DESC(dio_addr, "IO address for data, eg: 0x480"); -module_param(cio_addr, ulong, 0); +module_param_hw(cio_addr, ulong, ioport, 0); MODULE_PARM_DESC(cio_addr, "IO address for control, eg: 0x400"); -module_param(c2io_addr, ulong, 0); +module_param_hw(c2io_addr, ulong, ioport, 0); MODULE_PARM_DESC(c2io_addr, "IO address for secondary control, eg: 0x408"); module_param(splashval, ulong, 0); MODULE_PARM_DESC(splashval, "Splash pattern: 0x00 is black, 0x01 is white"); diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c index c81f150589e1f8a2c4a9c342d305ef2684f121cb..df9e6ebcfad5f8d37048e3db8901406e96177c1a 100644 --- a/drivers/video/fbdev/omap/lcd_mipid.c +++ b/drivers/video/fbdev/omap/lcd_mipid.c @@ -174,7 +174,7 @@ static void hw_guard_wait(struct mipid_device *md) { unsigned long wait = md->hw_guard_end - jiffies; - if ((long)wait > 0 && wait <= md->hw_guard_wait) { + if ((long)wait > 0 && time_before_eq(wait, md->hw_guard_wait)) { set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(wait); } diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss.c b/drivers/video/fbdev/omap2/omapfb/dss/dss.c index 47d7f69ad9ad858a75595922f032341d319f442d..48c6500c24e1f4b05a93d4a274c4a366cb8c0d6f 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dss.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dss.c @@ -941,11 +941,13 @@ static int dss_init_features(struct platform_device *pdev) return 0; } +static void dss_uninit_ports(struct platform_device *pdev); + static int dss_init_ports(struct platform_device *pdev) { struct device_node *parent = pdev->dev.of_node; struct device_node *port; - int r; + int r, ret = 0; if (parent == NULL) return 0; @@ -972,17 +974,21 @@ static int dss_init_ports(struct platform_device *pdev) switch (port_type) { case OMAP_DISPLAY_TYPE_DPI: - dpi_init_port(pdev, port); + ret = dpi_init_port(pdev, port); break; case OMAP_DISPLAY_TYPE_SDI: - sdi_init_port(pdev, port); + ret = sdi_init_port(pdev, port); break; default: break; } - } while ((port = omapdss_of_get_next_port(parent, port)) != NULL); + } while (!ret && + (port = omapdss_of_get_next_port(parent, port)) != NULL); - return 0; + if (ret) + dss_uninit_ports(pdev); + + return ret; } static void dss_uninit_ports(struct platform_device *pdev) diff --git a/drivers/video/fbdev/pmag-aa-fb.c b/drivers/video/fbdev/pmag-aa-fb.c index ffe2dd482f840b51e83d3180bc327c1aefdc029e..39922f072db4f89ed5212a8e509cb2e2ca86c2d8 100644 --- a/drivers/video/fbdev/pmag-aa-fb.c +++ b/drivers/video/fbdev/pmag-aa-fb.c @@ -247,7 +247,7 @@ static int pmagaafb_probe(struct device *dev) return err; } -static int __exit pmagaafb_remove(struct device *dev) +static int pmagaafb_remove(struct device *dev) { struct tc_dev *tdev = to_tc_dev(dev); struct fb_info *info = dev_get_drvdata(dev); @@ -280,7 +280,7 @@ static struct tc_driver pmagaafb_driver = { .name = "pmagaafb", .bus = &tc_bus_type, .probe = pmagaafb_probe, - .remove = __exit_p(pmagaafb_remove), + .remove = pmagaafb_remove, }, }; diff --git a/drivers/video/fbdev/pmag-ba-fb.c b/drivers/video/fbdev/pmag-ba-fb.c index df02fb4b7fd1c2fe6363ef5173ed516479811da1..1fd02f40708eb79740db54a292ca87e8e24cab9c 100644 --- a/drivers/video/fbdev/pmag-ba-fb.c +++ b/drivers/video/fbdev/pmag-ba-fb.c @@ -235,7 +235,7 @@ static int pmagbafb_probe(struct device *dev) return err; } -static int __exit pmagbafb_remove(struct device *dev) +static int pmagbafb_remove(struct device *dev) { struct tc_dev *tdev = to_tc_dev(dev); struct fb_info *info = dev_get_drvdata(dev); @@ -270,7 +270,7 @@ static struct tc_driver pmagbafb_driver = { .name = "pmagbafb", .bus = &tc_bus_type, .probe = pmagbafb_probe, - .remove = __exit_p(pmagbafb_remove), + .remove = pmagbafb_remove, }, }; diff --git a/drivers/video/fbdev/pmagb-b-fb.c b/drivers/video/fbdev/pmagb-b-fb.c index a7a179a0bb33f2d85beb0301945375dad45e8f79..46e96c4515060f0c58ade6ff892f711ad39aa0a6 100644 --- a/drivers/video/fbdev/pmagb-b-fb.c +++ b/drivers/video/fbdev/pmagb-b-fb.c @@ -353,7 +353,7 @@ static int pmagbbfb_probe(struct device *dev) return err; } -static int __exit pmagbbfb_remove(struct device *dev) +static int pmagbbfb_remove(struct device *dev) { struct tc_dev *tdev = to_tc_dev(dev); struct fb_info *info = dev_get_drvdata(dev); @@ -388,7 +388,7 @@ static struct tc_driver pmagbbfb_driver = { .name = "pmagbbfb", .bus = &tc_bus_type, .probe = pmagbbfb_probe, - .remove = __exit_p(pmagbbfb_remove), + .remove = pmagbbfb_remove, }, }; diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c index ef73f14d7ba00d4b185739d65006cb50ebea7580..b21a89b03fb474a4a1c5b8821ad8861cd55b7e35 100644 --- a/drivers/video/fbdev/pxafb.c +++ b/drivers/video/fbdev/pxafb.c @@ -645,7 +645,7 @@ static void overlay1fb_disable(struct pxafb_layer *ofb) lcd_writel(ofb->fbi, FBR1, ofb->fbi->fdadr[DMA_OV1] | 0x3); if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0) - pr_warning("%s: timeout disabling overlay1\n", __func__); + pr_warn("%s: timeout disabling overlay1\n", __func__); lcd_writel(ofb->fbi, LCCR5, lccr5); } @@ -710,7 +710,7 @@ static void overlay2fb_disable(struct pxafb_layer *ofb) lcd_writel(ofb->fbi, FBR4, ofb->fbi->fdadr[DMA_OV2_Cr] | 0x3); if (wait_for_completion_timeout(&ofb->branch_done, 1 * HZ) == 0) - pr_warning("%s: timeout disabling overlay2\n", __func__); + pr_warn("%s: timeout disabling overlay2\n", __func__); } static struct pxafb_layer_ops ofb_ops[] = { @@ -1187,8 +1187,7 @@ int pxafb_smart_flush(struct fb_info *info) lcd_writel(fbi, LCCR0, fbi->reg_lccr0 | LCCR0_ENB); if (wait_for_completion_timeout(&fbi->command_done, HZ/2) == 0) { - pr_warning("%s: timeout waiting for command done\n", - __func__); + pr_warn("%s: timeout waiting for command done\n", __func__); ret = -ETIMEDOUT; } diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c index d80bc8a3200fa9c8435bc0b606a4bae0f02728f1..67e314fdd9471513b911d07d33c0ff3c13e25a05 100644 --- a/drivers/video/fbdev/sm501fb.c +++ b/drivers/video/fbdev/sm501fb.c @@ -1600,6 +1600,7 @@ static int sm501fb_start(struct sm501fb_info *info, info->fbmem = ioremap(res->start, resource_size(res)); if (info->fbmem == NULL) { dev_err(dev, "cannot remap framebuffer\n"); + ret = -ENXIO; goto err_mem_res; } diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index ec2e7e3536859cae3294d484c5bb56b5330c6db5..449fceaf79d5505a0e2c409adeb968dbaad4413c 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -1646,8 +1646,9 @@ static int ufx_usb_probe(struct usb_interface *interface, dev_dbg(dev->gdev, "%s %s - serial #%s\n", usbdev->manufacturer, usbdev->product, usbdev->serial); dev_dbg(dev->gdev, "vid_%04x&pid_%04x&rev_%04x driver's ufx_data struct at %p\n", - usbdev->descriptor.idVendor, usbdev->descriptor.idProduct, - usbdev->descriptor.bcdDevice, dev); + le16_to_cpu(usbdev->descriptor.idVendor), + le16_to_cpu(usbdev->descriptor.idProduct), + le16_to_cpu(usbdev->descriptor.bcdDevice), dev); dev_dbg(dev->gdev, "console enable=%d\n", console); dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio); diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index e9c2f7ba3c8e6c34382b6b0b58a7d65e41e8292d..05ef657235df2eb72c68e8a892d70e3edadd17d8 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1105,8 +1105,8 @@ static int dlfb_ops_blank(int blank_mode, struct fb_info *info) char *bufptr; struct urb *urb; - pr_info("/dev/fb%d FB_BLANK mode %d --> %d\n", - info->node, dev->blank_mode, blank_mode); + pr_debug("/dev/fb%d FB_BLANK mode %d --> %d\n", + info->node, dev->blank_mode, blank_mode); if ((dev->blank_mode == FB_BLANK_POWERDOWN) && (blank_mode != FB_BLANK_POWERDOWN)) { @@ -1487,15 +1487,25 @@ static struct device_attribute fb_device_attrs[] = { static int dlfb_select_std_channel(struct dlfb_data *dev) { int ret; - u8 set_def_chn[] = { 0x57, 0xCD, 0xDC, 0xA7, + void *buf; + static const u8 set_def_chn[] = { + 0x57, 0xCD, 0xDC, 0xA7, 0x1C, 0x88, 0x5E, 0x15, 0x60, 0xFE, 0xC6, 0x97, 0x16, 0x3D, 0x47, 0xF2 }; + buf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL); + + if (!buf) + return -ENOMEM; + ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), NR_USB_REQUEST_CHANNEL, (USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0, - set_def_chn, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT); + buf, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT); + + kfree(buf); + return ret; } @@ -1603,8 +1613,9 @@ static int dlfb_usb_probe(struct usb_interface *interface, pr_info("%s %s - serial #%s\n", usbdev->manufacturer, usbdev->product, usbdev->serial); pr_info("vid_%04x&pid_%04x&rev_%04x driver's dlfb_data struct at %p\n", - usbdev->descriptor.idVendor, usbdev->descriptor.idProduct, - usbdev->descriptor.bcdDevice, dev); + le16_to_cpu(usbdev->descriptor.idVendor), + le16_to_cpu(usbdev->descriptor.idProduct), + le16_to_cpu(usbdev->descriptor.bcdDevice), dev); pr_info("console enable=%d\n", console); pr_info("fb_defio enable=%d\n", fb_defio); pr_info("shadow enable=%d\n", shadow); diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c index 1c1e95a0b8faa04006c857158c79ac11bc904d96..ce4c4729a5e8c30ea1222b584e159b4a45c24b48 100644 --- a/drivers/video/fbdev/vermilion/vermilion.c +++ b/drivers/video/fbdev/vermilion/vermilion.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c index f9718f012aae9be85a9568a2ce7be0740f08f5f6..badee04ef496cefd02b8f3f2faebb21fafb839b5 100644 --- a/drivers/video/fbdev/via/viafbdev.c +++ b/drivers/video/fbdev/via/viafbdev.c @@ -1630,16 +1630,14 @@ static void viafb_init_proc(struct viafb_shared *shared) } static void viafb_remove_proc(struct viafb_shared *shared) { - struct proc_dir_entry *viafb_entry = shared->proc_entry, - *iga1_entry = shared->iga1_proc_entry, - *iga2_entry = shared->iga2_proc_entry; + struct proc_dir_entry *viafb_entry = shared->proc_entry; if (!viafb_entry) return; - remove_proc_entry("output_devices", iga2_entry); + remove_proc_entry("output_devices", shared->iga2_proc_entry); remove_proc_entry("iga2", viafb_entry); - remove_proc_entry("output_devices", iga1_entry); + remove_proc_entry("output_devices", shared->iga1_proc_entry); remove_proc_entry("iga1", viafb_entry); remove_proc_entry("supported_output_devices", viafb_entry); diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c index 3ee309c50b2d015fb3d463dd88f6b99253beea12..46f63960fa9e6aa2913be8b804c1de24ce1245be 100644 --- a/drivers/video/fbdev/xen-fbfront.c +++ b/drivers/video/fbdev/xen-fbfront.c @@ -18,6 +18,8 @@ * frame buffer. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -380,10 +382,18 @@ static int xenfb_probe(struct xenbus_device *dev, video[KPARAM_MEM] = val; } + video[KPARAM_WIDTH] = xenbus_read_unsigned(dev->otherend, "width", + video[KPARAM_WIDTH]); + video[KPARAM_HEIGHT] = xenbus_read_unsigned(dev->otherend, "height", + video[KPARAM_HEIGHT]); + /* If requested res does not fit in available memory, use default */ fb_size = video[KPARAM_MEM] * 1024 * 1024; if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8 > fb_size) { + pr_warn("display parameters %d,%d,%d invalid, use defaults\n", + video[KPARAM_MEM], video[KPARAM_WIDTH], + video[KPARAM_HEIGHT]); video[KPARAM_WIDTH] = XENFB_WIDTH; video[KPARAM_HEIGHT] = XENFB_HEIGHT; fb_size = XENFB_DEFAULT_FB_LEN; diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index b6bc4a0bda2a120a30cdb6a18f66fff9d7de0862..4d50bfd13e7c9f58f9f8f472627d6b7051e1e922 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -34,7 +34,7 @@ static int __init fb_logo_late_init(void) return 0; } -late_initcall(fb_logo_late_init); +late_initcall_sync(fb_logo_late_init); /* logo's are marked __initdata. Use __ref to tell * modpost that it is intended that this function uses data diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 150ce2abf6c8f193b4e3b5c9fa66001cd709f39e..d3eca879a0a85474f858e79b759d8b4920ba9709 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -243,11 +243,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list)); /* Get the physical addresses of the source buffer */ - down_read(¤t->mm->mmap_sem); - num_pinned = get_user_pages(param.local_vaddr - lb_offset, - num_pages, (param.source == -1) ? 0 : FOLL_WRITE, - pages, NULL); - up_read(¤t->mm->mmap_sem); + num_pinned = get_user_pages_unlocked(param.local_vaddr - lb_offset, + num_pages, pages, (param.source == -1) ? 0 : FOLL_WRITE); if (num_pinned != num_pages) { /* get_user_pages() failed */ diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 34adf9b9c0538815db33f62ed842de49be5222e7..22caf808bfaba97e45662de0f4150632bcb85d2f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -418,8 +418,7 @@ static int init_vqs(struct virtio_balloon *vb) * optionally stat. */ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, - NULL); + err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); if (err) return err; @@ -664,6 +663,12 @@ static int virtballoon_restore(struct virtio_device *vdev) } #endif +static int virtballoon_validate(struct virtio_device *vdev) +{ + __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); + return 0; +} + static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, @@ -676,6 +681,7 @@ static struct virtio_driver virtio_balloon_driver = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtballoon_validate, .probe = virtballoon_probe, .remove = virtballoon_remove, .config_changed = virtballoon_changed, diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 79f1293cda9327a051feef083871bdbc62038496..3a0468f2ceb08a3c4cd3eac033477b8049ebd89d 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -173,8 +173,7 @@ static int virtinput_init_vqs(struct virtio_input *vi) static const char * const names[] = { "events", "status" }; int err; - err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names, - NULL); + err = virtio_find_vqs(vi->vdev, 2, vqs, cbs, names, NULL); if (err) return err; vi->evt = vqs[0]; diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 78343b8f9034b35ea7d18e6f8a5b3e3df4bae9e0..74dc7170fd351e02d1732357b0705f66c507f7ce 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -351,7 +351,7 @@ static void vm_del_vqs(struct virtio_device *vdev) static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), - const char *name) + const char *name, bool ctx) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_vq_info *info; @@ -388,7 +388,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, /* Create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, - true, true, vm_notify, callback, name); + true, true, ctx, vm_notify, callback, name); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; @@ -447,6 +447,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], + const bool *ctx, struct irq_affinity *desc) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); @@ -459,7 +460,8 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, return err; for (i = 0; i < nvqs; ++i) { - vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]); + vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { vm_del_vqs(vdev); return PTR_ERR(vqs[i]); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 698d5d06fa039ca1a27b151a3dcf5d322784e3f0..007a4f3660862e1aa6e9a16207ae54bbbab61d58 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -172,6 +172,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -183,7 +184,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, if (!info) return ERR_PTR(-ENOMEM); - vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, + vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; @@ -274,6 +275,7 @@ void vp_del_vqs(struct virtio_device *vdev) static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], bool per_vq_vectors, + const bool *ctx, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -315,6 +317,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, else msix_vec = VP_MSIX_VQ_VECTOR; vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false, msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); @@ -345,7 +348,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[]) + const char * const names[], const bool *ctx) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i, err; @@ -367,6 +370,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, continue; } vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + ctx ? ctx[i] : false, VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); @@ -383,20 +387,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc) + const char * const names[], const bool *ctx, + struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc); if (!err) return 0; /* Finally fall back to regular interrupts. */ - return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names); + return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx); } const char *vp_bus_name(struct virtio_device *vdev) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index e96334aec1e0d70842d1a9fc53462ab728be87c0..135ee3cf7175881a8259a192ba102978196687c7 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -102,6 +102,7 @@ struct virtio_pci_device { unsigned idx, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec); void (*del_vq)(struct virtio_pci_vq_info *info); @@ -131,7 +132,8 @@ void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc); + const char * const names[], const bool *ctx, + struct irq_affinity *desc); const char *vp_bus_name(struct virtio_device *vdev); /* Setup the affinity for a virtqueue: diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 4bfa48fb1324660f82ae6272d2e1ecc33522ba3e..2780886e8ba3d393ba4847366983c2ab56a7ed3e 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -116,6 +116,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec) { struct virtqueue *vq; @@ -135,7 +136,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, - true, false, vp_notify, callback, name); + true, false, ctx, + vp_notify, callback, name); if (!vq) return ERR_PTR(-ENOMEM); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 8978f109d2d79828e5b0c12649debc481dfacd7f..2555d80f6eec4b4a78860b46f453092051b50a24 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -297,6 +297,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, + bool ctx, u16 msix_vec) { struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common; @@ -328,7 +329,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, /* create the vring */ vq = vring_create_virtqueue(index, num, SMP_CACHE_BYTES, &vp_dev->vdev, - true, true, vp_notify, callback, name); + true, true, ctx, + vp_notify, callback, name); if (!vq) return ERR_PTR(-ENOMEM); @@ -387,12 +389,14 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, } static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc) + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; - int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc); + int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); if (rc) return rc; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 409aeaa49246a0edd7c6da07ca38b58c3f876109..5e1b548828e60745ba87581d2b9bcb6380b092f8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -263,6 +263,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, unsigned int out_sgs, unsigned int in_sgs, void *data, + void *ctx, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -275,6 +276,7 @@ static inline int virtqueue_add(struct virtqueue *_vq, START_USE(vq); BUG_ON(data == NULL); + BUG_ON(ctx && vq->indirect); if (unlikely(vq->broken)) { END_USE(vq); @@ -389,6 +391,8 @@ static inline int virtqueue_add(struct virtqueue *_vq, vq->desc_state[head].data = data; if (indirect) vq->desc_state[head].indir_desc = desc; + if (ctx) + vq->desc_state[head].indir_desc = ctx; /* Put entry in available array (but don't update avail->idx until they * do sync). */ @@ -461,7 +465,8 @@ int virtqueue_add_sgs(struct virtqueue *_vq, for (sg = sgs[i]; sg; sg = sg_next(sg)) total_sg++; } - return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); + return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, + data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); @@ -483,7 +488,7 @@ int virtqueue_add_outbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 1, 0, data, gfp); + return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); @@ -505,10 +510,34 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp) { - return virtqueue_add(vq, &sg, num, 0, 1, data, gfp); + return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); +/** + * virtqueue_add_inbuf_ctx - expose input buffers to other end + * @vq: the struct virtqueue we're talking about. + * @sg: scatterlist (must be well-formed and terminated!) + * @num: the number of entries in @sg writable by other side + * @data: the token identifying the buffer. + * @ctx: extra context for the token + * @gfp: how to do memory allocations (if necessary). + * + * Caller must ensure we don't call this with other virtqueue operations + * at the same time (except where noted). + * + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). + */ +int virtqueue_add_inbuf_ctx(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + void *ctx, + gfp_t gfp) +{ + return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); +} +EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); + /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @vq: the struct virtqueue @@ -598,7 +627,8 @@ bool virtqueue_kick(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(virtqueue_kick); -static void detach_buf(struct vring_virtqueue *vq, unsigned int head) +static void detach_buf(struct vring_virtqueue *vq, unsigned int head, + void **ctx) { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); @@ -622,10 +652,15 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) /* Plus final descriptor */ vq->vq.num_free++; - /* Free the indirect table, if any, now that it's unmapped. */ - if (vq->desc_state[head].indir_desc) { + if (vq->indirect) { struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; - u32 len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); + u32 len; + + /* Free the indirect table, if any, now that it's unmapped. */ + if (!indir_desc) + return; + + len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); BUG_ON(!(vq->vring.desc[head].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); @@ -634,8 +669,10 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) for (j = 0; j < len / sizeof(struct vring_desc); j++) vring_unmap_one(vq, &indir_desc[j]); - kfree(vq->desc_state[head].indir_desc); + kfree(indir_desc); vq->desc_state[head].indir_desc = NULL; + } else if (ctx) { + *ctx = vq->desc_state[head].indir_desc; } } @@ -660,7 +697,8 @@ static inline bool more_used(const struct vring_virtqueue *vq) * Returns NULL if there are no used buffers, or the "data" token * handed to virtqueue_add_*(). */ -void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) +void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, + void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; @@ -698,7 +736,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) /* detach_buf clears data, so grab it now. */ ret = vq->desc_state[i].data; - detach_buf(vq, i); + detach_buf(vq, i, ctx); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before @@ -715,8 +753,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) END_USE(vq); return ret; } -EXPORT_SYMBOL_GPL(virtqueue_get_buf); +EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); +void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) +{ + return virtqueue_get_buf_ctx(_vq, len, NULL); +} +EXPORT_SYMBOL_GPL(virtqueue_get_buf); /** * virtqueue_disable_cb - disable callbacks * @vq: the struct virtqueue we're talking about. @@ -878,7 +921,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) continue; /* detach_buf clears data, so grab it now. */ buf = vq->desc_state[i].data; - detach_buf(vq, i); + detach_buf(vq, i, NULL); vq->avail_idx_shadow--; vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); END_USE(vq); @@ -916,6 +959,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring vring, struct virtio_device *vdev, bool weak_barriers, + bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name) @@ -950,7 +994,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->last_add_time_valid = false; #endif - vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && + !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ @@ -1019,6 +1064,7 @@ struct virtqueue *vring_create_virtqueue( struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, + bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name) @@ -1058,7 +1104,7 @@ struct virtqueue *vring_create_virtqueue( queue_size_in_bytes = vring_size(num, vring_align); vring_init(&vring, num, queue, vring_align); - vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, + vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, notify, callback, name); if (!vq) { vring_free_queue(vdev, queue_size_in_bytes, queue, @@ -1079,6 +1125,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool context, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), @@ -1086,7 +1133,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, { struct vring vring; vring_init(&vring, num, pages, vring_align); - return __vring_new_virtqueue(index, vring, vdev, weak_barriers, + return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, notify, callback, name); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c index 0035cf79760a512563540d58535987bac70aa35c..6a3ead42aba876cef534b8e9acc34b30cf6cef3a 100644 --- a/drivers/vme/vme.c +++ b/drivers/vme/vme.c @@ -76,9 +76,16 @@ static struct vme_bridge *find_bridge(struct vme_resource *resource) } } -/* +/** + * vme_free_consistent - Allocate contiguous memory. + * @resource: Pointer to VME resource. + * @size: Size of allocation required. + * @dma: Pointer to variable to store physical address of allocation. + * * Allocate a contiguous block of memory for use by the driver. This is used to * create the buffers for the slave windows. + * + * Return: Virtual address of allocation on success, NULL on failure. */ void *vme_alloc_consistent(struct vme_resource *resource, size_t size, dma_addr_t *dma) @@ -111,8 +118,14 @@ void *vme_alloc_consistent(struct vme_resource *resource, size_t size, } EXPORT_SYMBOL(vme_alloc_consistent); -/* - * Free previously allocated contiguous block of memory. +/** + * vme_free_consistent - Free previously allocated memory. + * @resource: Pointer to VME resource. + * @size: Size of allocation to free. + * @vaddr: Virtual address of allocation. + * @dma: Physical address of allocation. + * + * Free previously allocated block of contiguous memory. */ void vme_free_consistent(struct vme_resource *resource, size_t size, void *vaddr, dma_addr_t dma) @@ -145,6 +158,16 @@ void vme_free_consistent(struct vme_resource *resource, size_t size, } EXPORT_SYMBOL(vme_free_consistent); +/** + * vme_get_size - Helper function returning size of a VME window + * @resource: Pointer to VME slave or master resource. + * + * Determine the size of the VME window provided. This is a helper + * function, wrappering the call to vme_master_get or vme_slave_get + * depending on the type of window resource handed to it. + * + * Return: Size of the window on success, zero on failure. + */ size_t vme_get_size(struct vme_resource *resource) { int enabled, retval; @@ -259,9 +282,16 @@ static u32 vme_get_aspace(int am) return 0; } -/* - * Request a slave image with specific attributes, return some unique - * identifier. +/** + * vme_slave_request - Request a VME slave window resource. + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * @address: Required VME address space. + * @cycle: Required VME data transfer cycle type. + * + * Request use of a VME window resource capable of being set for the requested + * address space and data transfer cycle. + * + * Return: Pointer to VME resource on success, NULL on failure. */ struct vme_resource *vme_slave_request(struct vme_dev *vdev, u32 address, u32 cycle) @@ -327,6 +357,23 @@ struct vme_resource *vme_slave_request(struct vme_dev *vdev, u32 address, } EXPORT_SYMBOL(vme_slave_request); +/** + * vme_slave_set - Set VME slave window configuration. + * @resource: Pointer to VME slave resource. + * @enabled: State to which the window should be configured. + * @vme_base: Base address for the window. + * @size: Size of the VME window. + * @buf_base: Based address of buffer used to provide VME slave window storage. + * @aspace: VME address space for the VME window. + * @cycle: VME data transfer cycle type for the VME window. + * + * Set configuration for provided VME slave window. + * + * Return: Zero on success, -EINVAL if operation is not supported on this + * device, if an invalid resource has been provided or invalid + * attributes are provided. Hardware specific errors may also be + * returned. + */ int vme_slave_set(struct vme_resource *resource, int enabled, unsigned long long vme_base, unsigned long long size, dma_addr_t buf_base, u32 aspace, u32 cycle) @@ -362,6 +409,21 @@ int vme_slave_set(struct vme_resource *resource, int enabled, } EXPORT_SYMBOL(vme_slave_set); +/** + * vme_slave_get - Retrieve VME slave window configuration. + * @resource: Pointer to VME slave resource. + * @enabled: Pointer to variable for storing state. + * @vme_base: Pointer to variable for storing window base address. + * @size: Pointer to variable for storing window size. + * @buf_base: Pointer to variable for storing slave buffer base address. + * @aspace: Pointer to variable for storing VME address space. + * @cycle: Pointer to variable for storing VME data transfer cycle type. + * + * Return configuration for provided VME slave window. + * + * Return: Zero on success, -EINVAL if operation is not supported on this + * device or if an invalid resource has been provided. + */ int vme_slave_get(struct vme_resource *resource, int *enabled, unsigned long long *vme_base, unsigned long long *size, dma_addr_t *buf_base, u32 *aspace, u32 *cycle) @@ -386,6 +448,12 @@ int vme_slave_get(struct vme_resource *resource, int *enabled, } EXPORT_SYMBOL(vme_slave_get); +/** + * vme_slave_free - Free VME slave window + * @resource: Pointer to VME slave resource. + * + * Free the provided slave resource so that it may be reallocated. + */ void vme_slave_free(struct vme_resource *resource) { struct vme_slave_resource *slave_image; @@ -415,9 +483,17 @@ void vme_slave_free(struct vme_resource *resource) } EXPORT_SYMBOL(vme_slave_free); -/* - * Request a master image with specific attributes, return some unique - * identifier. +/** + * vme_master_request - Request a VME master window resource. + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * @address: Required VME address space. + * @cycle: Required VME data transfer cycle type. + * @dwidth: Required VME data transfer width. + * + * Request use of a VME window resource capable of being set for the requested + * address space, data transfer cycle and width. + * + * Return: Pointer to VME resource on success, NULL on failure. */ struct vme_resource *vme_master_request(struct vme_dev *vdev, u32 address, u32 cycle, u32 dwidth) @@ -486,6 +562,23 @@ struct vme_resource *vme_master_request(struct vme_dev *vdev, u32 address, } EXPORT_SYMBOL(vme_master_request); +/** + * vme_master_set - Set VME master window configuration. + * @resource: Pointer to VME master resource. + * @enabled: State to which the window should be configured. + * @vme_base: Base address for the window. + * @size: Size of the VME window. + * @aspace: VME address space for the VME window. + * @cycle: VME data transfer cycle type for the VME window. + * @dwidth: VME data transfer width for the VME window. + * + * Set configuration for provided VME master window. + * + * Return: Zero on success, -EINVAL if operation is not supported on this + * device, if an invalid resource has been provided or invalid + * attributes are provided. Hardware specific errors may also be + * returned. + */ int vme_master_set(struct vme_resource *resource, int enabled, unsigned long long vme_base, unsigned long long size, u32 aspace, u32 cycle, u32 dwidth) @@ -522,6 +615,21 @@ int vme_master_set(struct vme_resource *resource, int enabled, } EXPORT_SYMBOL(vme_master_set); +/** + * vme_master_get - Retrieve VME master window configuration. + * @resource: Pointer to VME master resource. + * @enabled: Pointer to variable for storing state. + * @vme_base: Pointer to variable for storing window base address. + * @size: Pointer to variable for storing window size. + * @aspace: Pointer to variable for storing VME address space. + * @cycle: Pointer to variable for storing VME data transfer cycle type. + * @dwidth: Pointer to variable for storing VME data transfer width. + * + * Return configuration for provided VME master window. + * + * Return: Zero on success, -EINVAL if operation is not supported on this + * device or if an invalid resource has been provided. + */ int vme_master_get(struct vme_resource *resource, int *enabled, unsigned long long *vme_base, unsigned long long *size, u32 *aspace, u32 *cycle, u32 *dwidth) @@ -546,8 +654,20 @@ int vme_master_get(struct vme_resource *resource, int *enabled, } EXPORT_SYMBOL(vme_master_get); -/* - * Read data out of VME space into a buffer. +/** + * vme_master_write - Read data from VME space into a buffer. + * @resource: Pointer to VME master resource. + * @buf: Pointer to buffer where data should be transferred. + * @count: Number of bytes to transfer. + * @offset: Offset into VME master window at which to start transfer. + * + * Perform read of count bytes of data from location on VME bus which maps into + * the VME master window at offset to buf. + * + * Return: Number of bytes read, -EINVAL if resource is not a VME master + * resource or read operation is not supported. -EFAULT returned if + * invalid offset is provided. Hardware specific errors may also be + * returned. */ ssize_t vme_master_read(struct vme_resource *resource, void *buf, size_t count, loff_t offset) @@ -583,8 +703,20 @@ ssize_t vme_master_read(struct vme_resource *resource, void *buf, size_t count, } EXPORT_SYMBOL(vme_master_read); -/* - * Write data out to VME space from a buffer. +/** + * vme_master_write - Write data out to VME space from a buffer. + * @resource: Pointer to VME master resource. + * @buf: Pointer to buffer holding data to transfer. + * @count: Number of bytes to transfer. + * @offset: Offset into VME master window at which to start transfer. + * + * Perform write of count bytes of data from buf to location on VME bus which + * maps into the VME master window at offset. + * + * Return: Number of bytes written, -EINVAL if resource is not a VME master + * resource or write operation is not supported. -EFAULT returned if + * invalid offset is provided. Hardware specific errors may also be + * returned. */ ssize_t vme_master_write(struct vme_resource *resource, void *buf, size_t count, loff_t offset) @@ -619,8 +751,24 @@ ssize_t vme_master_write(struct vme_resource *resource, void *buf, } EXPORT_SYMBOL(vme_master_write); -/* - * Perform RMW cycle to provided location. +/** + * vme_master_rmw - Perform read-modify-write cycle. + * @resource: Pointer to VME master resource. + * @mask: Bits to be compared and swapped in operation. + * @compare: Bits to be compared with data read from offset. + * @swap: Bits to be swapped in data read from offset. + * @offset: Offset into VME master window at which to perform operation. + * + * Perform read-modify-write cycle on provided location: + * - Location on VME bus is read. + * - Bits selected by mask are compared with compare. + * - Where a selected bit matches that in compare and are selected in swap, + * the bit is swapped. + * - Result written back to location on VME bus. + * + * Return: Bytes written on success, -EINVAL if resource is not a VME master + * resource or RMW operation is not supported. Hardware specific + * errors may also be returned. */ unsigned int vme_master_rmw(struct vme_resource *resource, unsigned int mask, unsigned int compare, unsigned int swap, loff_t offset) @@ -644,6 +792,17 @@ unsigned int vme_master_rmw(struct vme_resource *resource, unsigned int mask, } EXPORT_SYMBOL(vme_master_rmw); +/** + * vme_master_mmap - Mmap region of VME master window. + * @resource: Pointer to VME master resource. + * @vma: Pointer to definition of user mapping. + * + * Memory map a region of the VME master window into user space. + * + * Return: Zero on success, -EINVAL if resource is not a VME master + * resource or -EFAULT if map exceeds window size. Other generic mmap + * errors may also be returned. + */ int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma) { struct vme_master_resource *image; @@ -670,6 +829,12 @@ int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma) } EXPORT_SYMBOL(vme_master_mmap); +/** + * vme_master_free - Free VME master window + * @resource: Pointer to VME master resource. + * + * Free the provided master resource so that it may be reallocated. + */ void vme_master_free(struct vme_resource *resource) { struct vme_master_resource *master_image; @@ -699,9 +864,15 @@ void vme_master_free(struct vme_resource *resource) } EXPORT_SYMBOL(vme_master_free); -/* - * Request a DMA controller with specific attributes, return some unique - * identifier. +/** + * vme_dma_request - Request a DMA controller. + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * @route: Required src/destination combination. + * + * Request a VME DMA controller with capability to perform transfers bewteen + * requested source/destination combination. + * + * Return: Pointer to VME DMA resource on success, NULL on failure. */ struct vme_resource *vme_dma_request(struct vme_dev *vdev, u32 route) { @@ -768,8 +939,15 @@ struct vme_resource *vme_dma_request(struct vme_dev *vdev, u32 route) } EXPORT_SYMBOL(vme_dma_request); -/* - * Start new list +/** + * vme_new_dma_list - Create new VME DMA list. + * @resource: Pointer to VME DMA resource. + * + * Create a new VME DMA list. It is the responsibility of the user to free + * the list once it is no longer required with vme_dma_list_free(). + * + * Return: Pointer to new VME DMA list, NULL on allocation failure or invalid + * VME DMA resource. */ struct vme_dma_list *vme_new_dma_list(struct vme_resource *resource) { @@ -796,8 +974,16 @@ struct vme_dma_list *vme_new_dma_list(struct vme_resource *resource) } EXPORT_SYMBOL(vme_new_dma_list); -/* - * Create "Pattern" type attributes +/** + * vme_dma_pattern_attribute - Create "Pattern" type VME DMA list attribute. + * @pattern: Value to use used as pattern + * @type: Type of pattern to be written. + * + * Create VME DMA list attribute for pattern generation. It is the + * responsibility of the user to free used attributes using + * vme_dma_free_attribute(). + * + * Return: Pointer to VME DMA attribute, NULL on failure. */ struct vme_dma_attr *vme_dma_pattern_attribute(u32 pattern, u32 type) { @@ -831,8 +1017,15 @@ struct vme_dma_attr *vme_dma_pattern_attribute(u32 pattern, u32 type) } EXPORT_SYMBOL(vme_dma_pattern_attribute); -/* - * Create "PCI" type attributes +/** + * vme_dma_pci_attribute - Create "PCI" type VME DMA list attribute. + * @address: PCI base address for DMA transfer. + * + * Create VME DMA list attribute pointing to a location on PCI for DMA + * transfers. It is the responsibility of the user to free used attributes + * using vme_dma_free_attribute(). + * + * Return: Pointer to VME DMA attribute, NULL on failure. */ struct vme_dma_attr *vme_dma_pci_attribute(dma_addr_t address) { @@ -869,8 +1062,18 @@ struct vme_dma_attr *vme_dma_pci_attribute(dma_addr_t address) } EXPORT_SYMBOL(vme_dma_pci_attribute); -/* - * Create "VME" type attributes +/** + * vme_dma_vme_attribute - Create "VME" type VME DMA list attribute. + * @address: VME base address for DMA transfer. + * @aspace: VME address space to use for DMA transfer. + * @cycle: VME bus cycle to use for DMA transfer. + * @dwidth: VME data width to use for DMA transfer. + * + * Create VME DMA list attribute pointing to a location on the VME bus for DMA + * transfers. It is the responsibility of the user to free used attributes + * using vme_dma_free_attribute(). + * + * Return: Pointer to VME DMA attribute, NULL on failure. */ struct vme_dma_attr *vme_dma_vme_attribute(unsigned long long address, u32 aspace, u32 cycle, u32 dwidth) @@ -908,8 +1111,12 @@ struct vme_dma_attr *vme_dma_vme_attribute(unsigned long long address, } EXPORT_SYMBOL(vme_dma_vme_attribute); -/* - * Free attribute +/** + * vme_dma_free_attribute - Free DMA list attribute. + * @attributes: Pointer to DMA list attribute. + * + * Free VME DMA list attribute. VME DMA list attributes can be safely freed + * once vme_dma_list_add() has returned. */ void vme_dma_free_attribute(struct vme_dma_attr *attributes) { @@ -918,6 +1125,23 @@ void vme_dma_free_attribute(struct vme_dma_attr *attributes) } EXPORT_SYMBOL(vme_dma_free_attribute); +/** + * vme_dma_list_add - Add enty to a VME DMA list. + * @list: Pointer to VME list. + * @src: Pointer to DMA list attribute to use as source. + * @dest: Pointer to DMA list attribute to use as destination. + * @count: Number of bytes to transfer. + * + * Add an entry to the provided VME DMA list. Entry requires pointers to source + * and destination DMA attributes and a count. + * + * Please note, the attributes supported as source and destinations for + * transfers are hardware dependent. + * + * Return: Zero on success, -EINVAL if operation is not supported on this + * device or if the link list has already been submitted for execution. + * Hardware specific errors also possible. + */ int vme_dma_list_add(struct vme_dma_list *list, struct vme_dma_attr *src, struct vme_dma_attr *dest, size_t count) { @@ -942,6 +1166,16 @@ int vme_dma_list_add(struct vme_dma_list *list, struct vme_dma_attr *src, } EXPORT_SYMBOL(vme_dma_list_add); +/** + * vme_dma_list_exec - Queue a VME DMA list for execution. + * @list: Pointer to VME list. + * + * Queue the provided VME DMA list for execution. The call will return once the + * list has been executed. + * + * Return: Zero on success, -EINVAL if operation is not supported on this + * device. Hardware specific errors also possible. + */ int vme_dma_list_exec(struct vme_dma_list *list) { struct vme_bridge *bridge = list->parent->parent; @@ -962,6 +1196,15 @@ int vme_dma_list_exec(struct vme_dma_list *list) } EXPORT_SYMBOL(vme_dma_list_exec); +/** + * vme_dma_list_free - Free a VME DMA list. + * @list: Pointer to VME list. + * + * Free the provided DMA list and all its entries. + * + * Return: Zero on success, -EINVAL on invalid VME resource, -EBUSY if resource + * is still in use. Hardware specific errors also possible. + */ int vme_dma_list_free(struct vme_dma_list *list) { struct vme_bridge *bridge = list->parent->parent; @@ -994,6 +1237,15 @@ int vme_dma_list_free(struct vme_dma_list *list) } EXPORT_SYMBOL(vme_dma_list_free); +/** + * vme_dma_free - Free a VME DMA resource. + * @resource: Pointer to VME DMA resource. + * + * Free the provided DMA resource so that it may be reallocated. + * + * Return: Zero on success, -EINVAL on invalid VME resource, -EBUSY if resource + * is still active. + */ int vme_dma_free(struct vme_resource *resource) { struct vme_dma_resource *ctrlr; @@ -1099,6 +1351,22 @@ void vme_irq_handler(struct vme_bridge *bridge, int level, int statid) } EXPORT_SYMBOL(vme_irq_handler); +/** + * vme_irq_request - Request a specific VME interrupt. + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * @level: Interrupt priority being requested. + * @statid: Interrupt vector being requested. + * @callback: Pointer to callback function called when VME interrupt/vector + * received. + * @priv_data: Generic pointer that will be passed to the callback function. + * + * Request callback to be attached as a handler for VME interrupts with provided + * level and statid. + * + * Return: Zero on success, -EINVAL on invalid vme device, level or if the + * function is not supported, -EBUSY if the level/statid combination is + * already in use. Hardware specific errors also possible. + */ int vme_irq_request(struct vme_dev *vdev, int level, int statid, void (*callback)(int, int, void *), void *priv_data) @@ -1142,6 +1410,14 @@ int vme_irq_request(struct vme_dev *vdev, int level, int statid, } EXPORT_SYMBOL(vme_irq_request); +/** + * vme_irq_free - Free a VME interrupt. + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * @level: Interrupt priority of interrupt being freed. + * @statid: Interrupt vector of interrupt being freed. + * + * Remove previously attached callback from VME interrupt priority/vector. + */ void vme_irq_free(struct vme_dev *vdev, int level, int statid) { struct vme_bridge *bridge; @@ -1177,6 +1453,18 @@ void vme_irq_free(struct vme_dev *vdev, int level, int statid) } EXPORT_SYMBOL(vme_irq_free); +/** + * vme_irq_generate - Generate VME interrupt. + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * @level: Interrupt priority at which to assert the interrupt. + * @statid: Interrupt vector to associate with the interrupt. + * + * Generate a VME interrupt of the provided level and with the provided + * statid. + * + * Return: Zero on success, -EINVAL on invalid vme device, level or if the + * function is not supported. Hardware specific errors also possible. + */ int vme_irq_generate(struct vme_dev *vdev, int level, int statid) { struct vme_bridge *bridge; @@ -1201,8 +1489,15 @@ int vme_irq_generate(struct vme_dev *vdev, int level, int statid) } EXPORT_SYMBOL(vme_irq_generate); -/* - * Request the location monitor, return resource or NULL +/** + * vme_lm_request - Request a VME location monitor + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * + * Allocate a location monitor resource to the driver. A location monitor + * allows the driver to monitor accesses to a contiguous number of + * addresses on the VME bus. + * + * Return: Pointer to a VME resource on success or NULL on failure. */ struct vme_resource *vme_lm_request(struct vme_dev *vdev) { @@ -1218,7 +1513,7 @@ struct vme_resource *vme_lm_request(struct vme_dev *vdev) goto err_bus; } - /* Loop through DMA resources */ + /* Loop through LM resources */ list_for_each(lm_pos, &bridge->lm_resources) { lm = list_entry(lm_pos, struct vme_lm_resource, list); @@ -1264,6 +1559,17 @@ struct vme_resource *vme_lm_request(struct vme_dev *vdev) } EXPORT_SYMBOL(vme_lm_request); +/** + * vme_lm_count - Determine number of VME Addresses monitored + * @resource: Pointer to VME location monitor resource. + * + * The number of contiguous addresses monitored is hardware dependent. + * Return the number of contiguous addresses monitored by the + * location monitor. + * + * Return: Count of addresses monitored or -EINVAL when provided with an + * invalid location monitor resource. + */ int vme_lm_count(struct vme_resource *resource) { struct vme_lm_resource *lm; @@ -1279,6 +1585,20 @@ int vme_lm_count(struct vme_resource *resource) } EXPORT_SYMBOL(vme_lm_count); +/** + * vme_lm_set - Configure location monitor + * @resource: Pointer to VME location monitor resource. + * @lm_base: Base address to monitor. + * @aspace: VME address space to monitor. + * @cycle: VME bus cycle type to monitor. + * + * Set the base address, address space and cycle type of accesses to be + * monitored by the location monitor. + * + * Return: Zero on success, -EINVAL when provided with an invalid location + * monitor resource or function is not supported. Hardware specific + * errors may also be returned. + */ int vme_lm_set(struct vme_resource *resource, unsigned long long lm_base, u32 aspace, u32 cycle) { @@ -1301,6 +1621,20 @@ int vme_lm_set(struct vme_resource *resource, unsigned long long lm_base, } EXPORT_SYMBOL(vme_lm_set); +/** + * vme_lm_get - Retrieve location monitor settings + * @resource: Pointer to VME location monitor resource. + * @lm_base: Pointer used to output the base address monitored. + * @aspace: Pointer used to output the address space monitored. + * @cycle: Pointer used to output the VME bus cycle type monitored. + * + * Retrieve the base address, address space and cycle type of accesses to + * be monitored by the location monitor. + * + * Return: Zero on success, -EINVAL when provided with an invalid location + * monitor resource or function is not supported. Hardware specific + * errors may also be returned. + */ int vme_lm_get(struct vme_resource *resource, unsigned long long *lm_base, u32 *aspace, u32 *cycle) { @@ -1323,6 +1657,21 @@ int vme_lm_get(struct vme_resource *resource, unsigned long long *lm_base, } EXPORT_SYMBOL(vme_lm_get); +/** + * vme_lm_attach - Provide callback for location monitor address + * @resource: Pointer to VME location monitor resource. + * @monitor: Offset to which callback should be attached. + * @callback: Pointer to callback function called when triggered. + * @data: Generic pointer that will be passed to the callback function. + * + * Attach a callback to the specificed offset into the location monitors + * monitored addresses. A generic pointer is provided to allow data to be + * passed to the callback when called. + * + * Return: Zero on success, -EINVAL when provided with an invalid location + * monitor resource or function is not supported. Hardware specific + * errors may also be returned. + */ int vme_lm_attach(struct vme_resource *resource, int monitor, void (*callback)(void *), void *data) { @@ -1345,6 +1694,18 @@ int vme_lm_attach(struct vme_resource *resource, int monitor, } EXPORT_SYMBOL(vme_lm_attach); +/** + * vme_lm_detach - Remove callback for location monitor address + * @resource: Pointer to VME location monitor resource. + * @monitor: Offset to which callback should be removed. + * + * Remove the callback associated with the specificed offset into the + * location monitors monitored addresses. + * + * Return: Zero on success, -EINVAL when provided with an invalid location + * monitor resource or function is not supported. Hardware specific + * errors may also be returned. + */ int vme_lm_detach(struct vme_resource *resource, int monitor) { struct vme_bridge *bridge = find_bridge(resource); @@ -1366,6 +1727,18 @@ int vme_lm_detach(struct vme_resource *resource, int monitor) } EXPORT_SYMBOL(vme_lm_detach); +/** + * vme_lm_free - Free allocated VME location monitor + * @resource: Pointer to VME location monitor resource. + * + * Free allocation of a VME location monitor. + * + * WARNING: This function currently expects that any callbacks that have + * been attached to the location monitor have been removed. + * + * Return: Zero on success, -EINVAL when provided with an invalid location + * monitor resource. + */ void vme_lm_free(struct vme_resource *resource) { struct vme_lm_resource *lm; @@ -1392,6 +1765,16 @@ void vme_lm_free(struct vme_resource *resource) } EXPORT_SYMBOL(vme_lm_free); +/** + * vme_slot_num - Retrieve slot ID + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * + * Retrieve the slot ID associated with the provided VME device. + * + * Return: The slot ID on success, -EINVAL if VME bridge cannot be determined + * or the function is not supported. Hardware specific errors may also + * be returned. + */ int vme_slot_num(struct vme_dev *vdev) { struct vme_bridge *bridge; @@ -1411,6 +1794,15 @@ int vme_slot_num(struct vme_dev *vdev) } EXPORT_SYMBOL(vme_slot_num); +/** + * vme_bus_num - Retrieve bus number + * @vdev: Pointer to VME device struct vme_dev assigned to driver instance. + * + * Retrieve the bus enumeration associated with the provided VME device. + * + * Return: The bus number on success, -EINVAL if VME bridge cannot be + * determined. + */ int vme_bus_num(struct vme_dev *vdev) { struct vme_bridge *bridge; @@ -1556,6 +1948,15 @@ static int __vme_register_driver(struct vme_driver *drv, unsigned int ndevs) return err; } +/** + * vme_register_driver - Register a VME driver + * @drv: Pointer to VME driver structure to register. + * @ndevs: Maximum number of devices to allow to be enumerated. + * + * Register a VME device driver with the VME subsystem. + * + * Return: Zero on success, error value on registration failure. + */ int vme_register_driver(struct vme_driver *drv, unsigned int ndevs) { int err; @@ -1576,6 +1977,12 @@ int vme_register_driver(struct vme_driver *drv, unsigned int ndevs) } EXPORT_SYMBOL(vme_register_driver); +/** + * vme_unregister_driver - Unregister a VME driver + * @drv: Pointer to VME driver structure to unregister. + * + * Unregister a VME device driver from the VME subsystem. + */ void vme_unregister_driver(struct vme_driver *drv) { struct vme_dev *dev, *dev_tmp; diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c index 3749db8b43969253c2089419f980d5553ca140c2..97a676bf5989eba00781b1a2c6387eff4b9d53d8 100644 --- a/drivers/w1/masters/matrox_w1.c +++ b/drivers/w1/masters/matrox_w1.c @@ -36,7 +36,6 @@ #include "../w1.h" #include "../w1_int.h" -#include "../w1_log.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Evgeniy Polyakov "); @@ -157,9 +156,6 @@ static int matrox_w1_probe(struct pci_dev *pdev, const struct pci_device_id *ent struct matrox_device *dev; int err; - assert(pdev != NULL); - assert(ent != NULL); - if (pdev->vendor != PCI_VENDOR_ID_MATROX || pdev->device != PCI_DEVICE_ID_MATROX_G400) return -ENODEV; @@ -224,8 +220,6 @@ static void matrox_w1_remove(struct pci_dev *pdev) { struct matrox_device *dev = pci_get_drvdata(pdev); - assert(dev != NULL); - if (dev->found) { w1_remove_master_device(dev->bus_master); iounmap(dev->virt_addr); diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig index 0ef9f2663dbd1f81b5f08b1b667c06303f7fa46b..fb68465908f21371a31f0656d101996d568cb26c 100644 --- a/drivers/w1/slaves/Kconfig +++ b/drivers/w1/slaves/Kconfig @@ -86,6 +86,12 @@ config W1_SLAVE_DS2433_CRC Each block has 30 bytes of data and a two byte CRC16. Full block writes are only allowed if the CRC is valid. +config W1_SLAVE_DS2438 + tristate "DS2438 Smart Battery Monitor 0x26 family support" + help + Say Y here if you want to use a 1-wire + DS2438 Smart Battery Monitor device support + config W1_SLAVE_DS2760 tristate "Dallas 2760 battery monitor chip (HP iPAQ & others)" help diff --git a/drivers/w1/slaves/Makefile b/drivers/w1/slaves/Makefile index b4a358955ef9b89a2bca762be87f3f2f0d78497d..54c63e4203024d2dcc34dd69f31665f19f01aa9e 100644 --- a/drivers/w1/slaves/Makefile +++ b/drivers/w1/slaves/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_W1_SLAVE_DS2406) += w1_ds2406.o obj-$(CONFIG_W1_SLAVE_DS2423) += w1_ds2423.o obj-$(CONFIG_W1_SLAVE_DS2431) += w1_ds2431.o obj-$(CONFIG_W1_SLAVE_DS2433) += w1_ds2433.o +obj-$(CONFIG_W1_SLAVE_DS2438) += w1_ds2438.o obj-$(CONFIG_W1_SLAVE_DS2760) += w1_ds2760.o obj-$(CONFIG_W1_SLAVE_DS2780) += w1_ds2780.o obj-$(CONFIG_W1_SLAVE_DS2781) += w1_ds2781.o diff --git a/drivers/w1/slaves/w1_ds2438.c b/drivers/w1/slaves/w1_ds2438.c new file mode 100644 index 0000000000000000000000000000000000000000..5ededb4965e143ab774d2705b2ad1e6b0d4fa51b --- /dev/null +++ b/drivers/w1/slaves/w1_ds2438.c @@ -0,0 +1,390 @@ +/* + * 1-Wire implementation for the ds2438 chip + * + * Copyright (c) 2017 Mariusz Bialonczyk + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include + +#include "../w1.h" +#include "../w1_family.h" + +#define W1_DS2438_RETRIES 3 + +/* Memory commands */ +#define W1_DS2438_READ_SCRATCH 0xBE +#define W1_DS2438_WRITE_SCRATCH 0x4E +#define W1_DS2438_COPY_SCRATCH 0x48 +#define W1_DS2438_RECALL_MEMORY 0xB8 +/* Register commands */ +#define W1_DS2438_CONVERT_TEMP 0x44 +#define W1_DS2438_CONVERT_VOLTAGE 0xB4 + +#define DS2438_PAGE_SIZE 8 +#define DS2438_ADC_INPUT_VAD 0 +#define DS2438_ADC_INPUT_VDD 1 +#define DS2438_MAX_CONVERSION_TIME 10 /* ms */ + +/* Page #0 definitions */ +#define DS2438_STATUS_REG 0x00 /* Status/Configuration Register */ +#define DS2438_STATUS_IAD (1 << 0) /* Current A/D Control Bit */ +#define DS2438_STATUS_CA (1 << 1) /* Current Accumulator Configuration */ +#define DS2438_STATUS_EE (1 << 2) /* Current Accumulator Shadow Selector bit */ +#define DS2438_STATUS_AD (1 << 3) /* Voltage A/D Input Select Bit */ +#define DS2438_STATUS_TB (1 << 4) /* Temperature Busy Flag */ +#define DS2438_STATUS_NVB (1 << 5) /* Nonvolatile Memory Busy Flag */ +#define DS2438_STATUS_ADB (1 << 6) /* A/D Converter Busy Flag */ + +#define DS2438_TEMP_LSB 0x01 +#define DS2438_TEMP_MSB 0x02 +#define DS2438_VOLTAGE_LSB 0x03 +#define DS2438_VOLTAGE_MSB 0x04 +#define DS2438_CURRENT_LSB 0x05 +#define DS2438_CURRENT_MSB 0x06 +#define DS2438_THRESHOLD 0x07 + +int w1_ds2438_get_page(struct w1_slave *sl, int pageno, u8 *buf) +{ + unsigned int retries = W1_DS2438_RETRIES; + u8 w1_buf[2]; + u8 crc; + size_t count; + + while (retries--) { + crc = 0; + + if (w1_reset_select_slave(sl)) + continue; + w1_buf[0] = W1_DS2438_RECALL_MEMORY; + w1_buf[1] = 0x00; + w1_write_block(sl->master, w1_buf, 2); + + if (w1_reset_select_slave(sl)) + continue; + w1_buf[0] = W1_DS2438_READ_SCRATCH; + w1_buf[1] = 0x00; + w1_write_block(sl->master, w1_buf, 2); + + count = w1_read_block(sl->master, buf, DS2438_PAGE_SIZE + 1); + if (count == DS2438_PAGE_SIZE + 1) { + crc = w1_calc_crc8(buf, DS2438_PAGE_SIZE); + + /* check for correct CRC */ + if ((u8)buf[DS2438_PAGE_SIZE] == crc) + return 0; + } + } + return -1; +} + +int w1_ds2438_get_temperature(struct w1_slave *sl, int16_t *temperature) +{ + unsigned int retries = W1_DS2438_RETRIES; + u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; + unsigned int tm = DS2438_MAX_CONVERSION_TIME; + unsigned long sleep_rem; + int ret; + + mutex_lock(&sl->master->bus_mutex); + + while (retries--) { + if (w1_reset_select_slave(sl)) + continue; + w1_write_8(sl->master, W1_DS2438_CONVERT_TEMP); + + mutex_unlock(&sl->master->bus_mutex); + sleep_rem = msleep_interruptible(tm); + if (sleep_rem != 0) { + ret = -1; + goto post_unlock; + } + + if (mutex_lock_interruptible(&sl->master->bus_mutex) != 0) { + ret = -1; + goto post_unlock; + } + + break; + } + + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { + *temperature = (((int16_t) w1_buf[DS2438_TEMP_MSB]) << 8) | ((uint16_t) w1_buf[DS2438_TEMP_LSB]); + ret = 0; + } else + ret = -1; + + mutex_unlock(&sl->master->bus_mutex); + +post_unlock: + return ret; +} + +int w1_ds2438_change_config_bit(struct w1_slave *sl, u8 mask, u8 value) +{ + unsigned int retries = W1_DS2438_RETRIES; + u8 w1_buf[3]; + u8 status; + int perform_write = 0; + + while (retries--) { + if (w1_reset_select_slave(sl)) + continue; + w1_buf[0] = W1_DS2438_RECALL_MEMORY; + w1_buf[1] = 0x00; + w1_write_block(sl->master, w1_buf, 2); + + if (w1_reset_select_slave(sl)) + continue; + w1_buf[0] = W1_DS2438_READ_SCRATCH; + w1_buf[1] = 0x00; + w1_write_block(sl->master, w1_buf, 2); + + /* reading one byte of result */ + status = w1_read_8(sl->master); + + /* if bit0=1, set a value to a mask for easy compare */ + if (value) + value = mask; + + if ((status & mask) == value) + return 0; /* already set as requested */ + else { + /* changing bit */ + status ^= mask; + perform_write = 1; + } + break; + } + + if (perform_write) { + retries = W1_DS2438_RETRIES; + while (retries--) { + if (w1_reset_select_slave(sl)) + continue; + w1_buf[0] = W1_DS2438_WRITE_SCRATCH; + w1_buf[1] = 0x00; + w1_buf[2] = status; + w1_write_block(sl->master, w1_buf, 3); + + if (w1_reset_select_slave(sl)) + continue; + w1_buf[0] = W1_DS2438_COPY_SCRATCH; + w1_buf[1] = 0x00; + w1_write_block(sl->master, w1_buf, 2); + + return 0; + } + } + return -1; +} + +uint16_t w1_ds2438_get_voltage(struct w1_slave *sl, int adc_input, uint16_t *voltage) +{ + unsigned int retries = W1_DS2438_RETRIES; + u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; + unsigned int tm = DS2438_MAX_CONVERSION_TIME; + unsigned long sleep_rem; + int ret; + + mutex_lock(&sl->master->bus_mutex); + + if (w1_ds2438_change_config_bit(sl, DS2438_STATUS_AD, adc_input)) { + ret = -1; + goto pre_unlock; + } + + while (retries--) { + if (w1_reset_select_slave(sl)) + continue; + w1_write_8(sl->master, W1_DS2438_CONVERT_VOLTAGE); + + mutex_unlock(&sl->master->bus_mutex); + sleep_rem = msleep_interruptible(tm); + if (sleep_rem != 0) { + ret = -1; + goto post_unlock; + } + + if (mutex_lock_interruptible(&sl->master->bus_mutex) != 0) { + ret = -1; + goto post_unlock; + } + + break; + } + + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { + *voltage = (((uint16_t) w1_buf[DS2438_VOLTAGE_MSB]) << 8) | ((uint16_t) w1_buf[DS2438_VOLTAGE_LSB]); + ret = 0; + } else + ret = -1; + +pre_unlock: + mutex_unlock(&sl->master->bus_mutex); + +post_unlock: + return ret; +} + +static ssize_t iad_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + + if (count != 1 || off != 0) + return -EFAULT; + + mutex_lock(&sl->master->bus_mutex); + + if (w1_ds2438_change_config_bit(sl, DS2438_STATUS_IAD, *buf & 0x01) == 0) + ret = 1; + else + ret = -EIO; + + mutex_unlock(&sl->master->bus_mutex); + + return ret; +} + +static ssize_t page0_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + mutex_lock(&sl->master->bus_mutex); + + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { + memcpy(buf, &w1_buf, DS2438_PAGE_SIZE); + ret = DS2438_PAGE_SIZE; + } else + ret = -EIO; + + mutex_unlock(&sl->master->bus_mutex); + + return ret; +} + +static ssize_t temperature_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + ssize_t c = PAGE_SIZE; + int16_t temp; + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + if (w1_ds2438_get_temperature(sl, &temp) == 0) { + c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", temp); + ret = PAGE_SIZE - c; + } else + ret = -EIO; + + return ret; +} + +static ssize_t vad_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + ssize_t c = PAGE_SIZE; + uint16_t voltage; + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VAD, &voltage) == 0) { + c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage); + ret = PAGE_SIZE - c; + } else + ret = -EIO; + + return ret; +} + +static ssize_t vdd_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + ssize_t c = PAGE_SIZE; + uint16_t voltage; + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VDD, &voltage) == 0) { + c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage); + ret = PAGE_SIZE - c; + } else + ret = -EIO; + + return ret; +} + +static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, NULL, iad_write, 1); +static BIN_ATTR_RO(page0, DS2438_PAGE_SIZE); +static BIN_ATTR_RO(temperature, 0/* real length varies */); +static BIN_ATTR_RO(vad, 0/* real length varies */); +static BIN_ATTR_RO(vdd, 0/* real length varies */); + +static struct bin_attribute *w1_ds2438_bin_attrs[] = { + &bin_attr_iad, + &bin_attr_page0, + &bin_attr_temperature, + &bin_attr_vad, + &bin_attr_vdd, + NULL, +}; + +static const struct attribute_group w1_ds2438_group = { + .bin_attrs = w1_ds2438_bin_attrs, +}; + +static const struct attribute_group *w1_ds2438_groups[] = { + &w1_ds2438_group, + NULL, +}; + +static struct w1_family_ops w1_ds2438_fops = { + .groups = w1_ds2438_groups, +}; + +static struct w1_family w1_ds2438_family = { + .fid = W1_FAMILY_DS2438, + .fops = &w1_ds2438_fops, +}; +module_w1_family(w1_ds2438_family); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mariusz Bialonczyk "); +MODULE_DESCRIPTION("1-wire driver for Maxim/Dallas DS2438 Smart Battery Monitor"); +MODULE_ALIAS("w1-family-" __stringify(W1_FAMILY_DS2438)); diff --git a/drivers/w1/slaves/w1_ds2760.h b/drivers/w1/slaves/w1_ds2760.h index 58e774141568eb11846fab47ae6e9c822993d673..24168c94eeaef0f8265de85f161fef0914d2eb05 100644 --- a/drivers/w1/slaves/w1_ds2760.h +++ b/drivers/w1/slaves/w1_ds2760.h @@ -24,11 +24,13 @@ #define DS2760_DATA_SIZE 0x40 #define DS2760_PROTECTION_REG 0x00 + #define DS2760_STATUS_REG 0x01 - #define DS2760_STATUS_IE (1 << 2) - #define DS2760_STATUS_SWEN (1 << 3) - #define DS2760_STATUS_RNAOP (1 << 4) - #define DS2760_STATUS_PMOD (1 << 5) +#define DS2760_STATUS_IE (1 << 2) +#define DS2760_STATUS_SWEN (1 << 3) +#define DS2760_STATUS_RNAOP (1 << 4) +#define DS2760_STATUS_PMOD (1 << 5) + #define DS2760_EEPROM_REG 0x07 #define DS2760_SPECIAL_FEATURE_REG 0x08 #define DS2760_VOLTAGE_MSB 0x0c diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 90a3d9338fd282a5bc1c8a678c4a505544b86f70..8511d1685db9e8fb3c1dca0d2832ae641353ad4c 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -29,7 +29,6 @@ #include #include "w1.h" -#include "w1_log.h" #include "w1_int.h" #include "w1_family.h" #include "w1_netlink.h" diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h index c4a6b257a367ffc9e4ce0cf108623ca052c4d84e..869a3ff87d2952ef062ca401d63d947ab7621475 100644 --- a/drivers/w1/w1_family.h +++ b/drivers/w1/w1_family.h @@ -29,6 +29,7 @@ #define W1_COUNTER_DS2423 0x1D #define W1_THERM_DS1822 0x22 #define W1_EEPROM_DS2433 0x23 +#define W1_FAMILY_DS2438 0x26 #define W1_THERM_DS18B20 0x28 #define W1_FAMILY_DS2408 0x29 #define W1_EEPROM_DS2431 0x2D diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 2cae7b29bb5fb5b72803b9d82d65d636b130bacc..1072a2e620bb16f7546c4c0d89da27b998a3fed3 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -22,7 +22,6 @@ #include #include "w1.h" -#include "w1_log.h" #include "w1_netlink.h" #include "w1_int.h" diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c index de8bebc278967552222d025dc143054af040a8af..1134e6b1eb023b26ea2f978c046d7796be5f69f6 100644 --- a/drivers/w1/w1_io.c +++ b/drivers/w1/w1_io.c @@ -19,7 +19,6 @@ #include #include "w1.h" -#include "w1_log.h" static int w1_delay_parm = 1; module_param_named(delay_coef, w1_delay_parm, int, 0); diff --git a/drivers/w1/w1_log.h b/drivers/w1/w1_log.h deleted file mode 100644 index dd1422b6afbb11a9fb2376621dff049378c4fc42..0000000000000000000000000000000000000000 --- a/drivers/w1/w1_log.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2004 Evgeniy Polyakov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __W1_LOG_H -#define __W1_LOG_H - -#define DEBUG - -#ifdef W1_DEBUG -# define assert(expr) do {} while (0) -#else -# define assert(expr) \ - if(unlikely(!(expr))) { \ - pr_err("Assertion failed! %s,%s,%s,line=%d\n", \ - #expr, __FILE__, __func__, __LINE__); \ - } -#endif - -#endif /* __W1_LOG_H */ - diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 49e520ca79c5395215701d507c8323391cc6ce93..027950f997d12cb91642ebedf8b18e3242b798c1 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -18,13 +18,10 @@ #include #include "w1.h" -#include "w1_log.h" #include "w1_netlink.h" #if defined(CONFIG_W1_CON) && (defined(CONFIG_CONNECTOR) || (defined(CONFIG_CONNECTOR_MODULE) && defined(CONFIG_W1_MODULE))) -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) - /* Bundle together everything required to process a request in one memory * allocation. */ @@ -598,7 +595,7 @@ static void w1_cn_callback(struct cn_msg *cn, struct netlink_skb_parms *nsp) sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd)); } - reply_size = MIN(CONNECTOR_MAX_MSG_SIZE, reply_size); + reply_size = min(CONNECTOR_MAX_MSG_SIZE, reply_size); /* allocate space for the block, a copy of the original message, * one node per cmd to point into the original message, diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 52a70ee6014fa866762042f78b87f3a4c7c7e794..8b9049dac0948db6cc0fca2f25c9f4e5f5cae6bd 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -452,7 +452,7 @@ config DAVINCI_WATCHDOG config ORION_WATCHDOG tristate "Orion watchdog" - depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || COMPILE_TEST + depends on ARCH_ORION5X || ARCH_DOVE || MACH_DOVE || ARCH_MVEBU || (COMPILE_TEST && !ARCH_EBSA110) depends on ARM select WATCHDOG_CORE help diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index 6fce17d5b9f1a27f94e711d0e69d9844c920a91d..a5775dfd8d5fc226e31ca59abb172f504f7efcda 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -304,6 +304,8 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) if (!wdt) return -ENOMEM; + spin_lock_init(&wdt->lock); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt->base = devm_ioremap_resource(dev, res); if (IS_ERR(wdt->base)) @@ -316,7 +318,6 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) return ret; } - spin_lock_init(&wdt->lock); platform_set_drvdata(pdev, wdt); watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt); bcm_kona_wdt_wdd.parent = &pdev->dev; diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c index 8d61e8bfe60b1a418eab489e966652840dedccff..86e0b5d2e7616690007792833e96cc9086ee0144 100644 --- a/drivers/watchdog/cadence_wdt.c +++ b/drivers/watchdog/cadence_wdt.c @@ -49,7 +49,7 @@ /* Counter maximum value */ #define CDNS_WDT_COUNTER_MAX 0xFFF -static int wdt_timeout = CDNS_WDT_DEFAULT_TIMEOUT; +static int wdt_timeout; static int nowayout = WATCHDOG_NOWAYOUT; module_param(wdt_timeout, int, 0); diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c index 6d03e8e30f8bf4369cfaa6cecde6cb2a6dccb03c..6c3f78e45c265da4b6f9cfefdb8af16a96b30874 100644 --- a/drivers/watchdog/cpu5wdt.c +++ b/drivers/watchdog/cpu5wdt.c @@ -289,7 +289,7 @@ MODULE_DESCRIPTION("sma cpu5 watchdog driver"); MODULE_SUPPORTED_DEVICE("sma cpu5 watchdog"); MODULE_LICENSE("GPL"); -module_param(port, int, 0); +module_param_hw(port, int, ioport, 0); MODULE_PARM_DESC(port, "base address of watchdog card, default is 0x91"); module_param(verbose, int, 0); diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c index 23ee53240c4c1858f7d8aa0f7530d43dcc44f650..38e96712264f92648304ba8282a2e4ba8be90319 100644 --- a/drivers/watchdog/eurotechwdt.c +++ b/drivers/watchdog/eurotechwdt.c @@ -97,9 +97,9 @@ MODULE_PARM_DESC(nowayout, #define WDT_TIMER_CFG 0xf3 -module_param(io, int, 0); +module_param_hw(io, int, ioport, 0); MODULE_PARM_DESC(io, "Eurotech WDT io port (default=0x3f0)"); -module_param(irq, int, 0); +module_param_hw(irq, int, irq, 0); MODULE_PARM_DESC(irq, "Eurotech WDT irq (default=10)"); module_param(ev, charp, 0); MODULE_PARM_DESC(ev, "Eurotech WDT event type (default is `int')"); diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 70c7194e2810dff05ddc0535dc5d8d7579ab7e77..67fbe35ce7cfe5438f18b0ffb4ebdbfdc057b8fe 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #endif /* CONFIG_HPWDT_NMI_DECODING */ #include #include diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 3d0abc0d59b4f176d62ce30ebc236471cde051d2..c4f65873bfa453b1385d2a1371ba10c741df2be9 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -106,6 +106,10 @@ struct iTCO_wdt_private { struct pci_dev *pci_dev; /* whether or not the watchdog has been suspended */ bool suspended; + /* no reboot API private data */ + void *no_reboot_priv; + /* no reboot update function pointer */ + int (*update_no_reboot_bit)(void *p, bool set); }; /* module parameters */ @@ -170,46 +174,68 @@ static inline u32 no_reboot_bit(struct iTCO_wdt_private *p) return enable_bit; } -static void iTCO_wdt_set_NO_REBOOT_bit(struct iTCO_wdt_private *p) +static int update_no_reboot_bit_def(void *priv, bool set) { - u32 val32; + return 0; +} - /* Set the NO_REBOOT bit: this disables reboots */ - if (p->iTCO_version >= 2) { - val32 = readl(p->gcs_pmc); - val32 |= no_reboot_bit(p); - writel(val32, p->gcs_pmc); - } else if (p->iTCO_version == 1) { - pci_read_config_dword(p->pci_dev, 0xd4, &val32); +static int update_no_reboot_bit_pci(void *priv, bool set) +{ + struct iTCO_wdt_private *p = priv; + u32 val32 = 0, newval32 = 0; + + pci_read_config_dword(p->pci_dev, 0xd4, &val32); + if (set) val32 |= no_reboot_bit(p); - pci_write_config_dword(p->pci_dev, 0xd4, val32); - } + else + val32 &= ~no_reboot_bit(p); + pci_write_config_dword(p->pci_dev, 0xd4, val32); + pci_read_config_dword(p->pci_dev, 0xd4, &newval32); + + /* make sure the update is successful */ + if (val32 != newval32) + return -EIO; + + return 0; } -static int iTCO_wdt_unset_NO_REBOOT_bit(struct iTCO_wdt_private *p) +static int update_no_reboot_bit_mem(void *priv, bool set) { - u32 enable_bit = no_reboot_bit(p); - u32 val32 = 0; + struct iTCO_wdt_private *p = priv; + u32 val32 = 0, newval32 = 0; - /* Unset the NO_REBOOT bit: this enables reboots */ - if (p->iTCO_version >= 2) { - val32 = readl(p->gcs_pmc); - val32 &= ~enable_bit; - writel(val32, p->gcs_pmc); + val32 = readl(p->gcs_pmc); + if (set) + val32 |= no_reboot_bit(p); + else + val32 &= ~no_reboot_bit(p); + writel(val32, p->gcs_pmc); + newval32 = readl(p->gcs_pmc); - val32 = readl(p->gcs_pmc); - } else if (p->iTCO_version == 1) { - pci_read_config_dword(p->pci_dev, 0xd4, &val32); - val32 &= ~enable_bit; - pci_write_config_dword(p->pci_dev, 0xd4, val32); + /* make sure the update is successful */ + if (val32 != newval32) + return -EIO; - pci_read_config_dword(p->pci_dev, 0xd4, &val32); + return 0; +} + +static void iTCO_wdt_no_reboot_bit_setup(struct iTCO_wdt_private *p, + struct itco_wdt_platform_data *pdata) +{ + if (pdata->update_no_reboot_bit) { + p->update_no_reboot_bit = pdata->update_no_reboot_bit; + p->no_reboot_priv = pdata->no_reboot_priv; + return; } - if (val32 & enable_bit) - return -EIO; + if (p->iTCO_version >= 2) + p->update_no_reboot_bit = update_no_reboot_bit_mem; + else if (p->iTCO_version == 1) + p->update_no_reboot_bit = update_no_reboot_bit_pci; + else + p->update_no_reboot_bit = update_no_reboot_bit_def; - return 0; + p->no_reboot_priv = p; } static int iTCO_wdt_start(struct watchdog_device *wd_dev) @@ -222,7 +248,7 @@ static int iTCO_wdt_start(struct watchdog_device *wd_dev) iTCO_vendor_pre_start(p->smi_res, wd_dev->timeout); /* disable chipset's NO_REBOOT bit */ - if (iTCO_wdt_unset_NO_REBOOT_bit(p)) { + if (p->update_no_reboot_bit(p->no_reboot_priv, false)) { spin_unlock(&p->io_lock); pr_err("failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n"); return -EIO; @@ -263,7 +289,7 @@ static int iTCO_wdt_stop(struct watchdog_device *wd_dev) val = inw(TCO1_CNT(p)); /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ - iTCO_wdt_set_NO_REBOOT_bit(p); + p->update_no_reboot_bit(p->no_reboot_priv, true); spin_unlock(&p->io_lock); @@ -280,16 +306,15 @@ static int iTCO_wdt_ping(struct watchdog_device *wd_dev) iTCO_vendor_pre_keepalive(p->smi_res, wd_dev->timeout); + /* Reset the timeout status bit so that the timer + * needs to count down twice again before rebooting */ + outw(0x0008, TCO1_STS(p)); /* write 1 to clear bit */ + /* Reload the timer by writing to the TCO Timer Counter register */ - if (p->iTCO_version >= 2) { + if (p->iTCO_version >= 2) outw(0x01, TCO_RLD(p)); - } else if (p->iTCO_version == 1) { - /* Reset the timeout status bit so that the timer - * needs to count down twice again before rebooting */ - outw(0x0008, TCO1_STS(p)); /* write 1 to clear bit */ - + else if (p->iTCO_version == 1) outb(0x01, TCO_RLD(p)); - } spin_unlock(&p->io_lock); return 0; @@ -302,11 +327,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t) unsigned char val8; unsigned int tmrval; - tmrval = seconds_to_ticks(p, t); - - /* For TCO v1 the timer counts down twice before rebooting */ - if (p->iTCO_version == 1) - tmrval /= 2; + /* The timer counts down twice before rebooting */ + tmrval = seconds_to_ticks(p, t) / 2; /* from the specs: */ /* "Values of 0h-3h are ignored and should not be attempted" */ @@ -359,6 +381,8 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev) spin_lock(&p->io_lock); val16 = inw(TCO_RLD(p)); val16 &= 0x3ff; + if (!(inw(TCO1_STS(p)) & 0x0008)) + val16 += (inw(TCOv2_TMR(p)) & 0x3ff); spin_unlock(&p->io_lock); time_left = ticks_to_seconds(p, val16); @@ -428,11 +452,13 @@ static int iTCO_wdt_probe(struct platform_device *pdev) p->iTCO_version = pdata->version; p->pci_dev = to_pci_dev(dev->parent); + iTCO_wdt_no_reboot_bit_setup(p, pdata); + /* * Get the Memory-Mapped GCS or PMC register, we need it for the * NO_REBOOT flag (TCO v2 and v3). */ - if (p->iTCO_version >= 2) { + if (p->iTCO_version >= 2 && !pdata->update_no_reboot_bit) { p->gcs_pmc_res = platform_get_resource(pdev, IORESOURCE_MEM, ICH_RES_MEM_GCS_PMC); @@ -442,14 +468,14 @@ static int iTCO_wdt_probe(struct platform_device *pdev) } /* Check chipset's NO_REBOOT bit */ - if (iTCO_wdt_unset_NO_REBOOT_bit(p) && + if (p->update_no_reboot_bit(p->no_reboot_priv, false) && iTCO_vendor_check_noreboot_on()) { pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n"); return -ENODEV; /* Cannot reset NO_REBOOT bit */ } /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ - iTCO_wdt_set_NO_REBOOT_bit(p); + p->update_no_reboot_bit(p->no_reboot_priv, true); /* The TCO logic uses the TCO_EN bit in the SMI_EN register */ if (!devm_request_region(dev, p->smi_res->start, diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c index 9f15dd9435d1a4efe5dd69fbab25363be026fe46..06a892e36a8d889ab1c6936b3209d8cd61a2f8c1 100644 --- a/drivers/watchdog/pc87413_wdt.c +++ b/drivers/watchdog/pc87413_wdt.c @@ -579,7 +579,7 @@ MODULE_AUTHOR("Marcus Junker "); MODULE_DESCRIPTION("PC87413 WDT driver"); MODULE_LICENSE("GPL"); -module_param(io, int, 0); +module_param_hw(io, int, ioport, 0); MODULE_PARM_DESC(io, MODNAME " I/O port (default: " __MODULE_STRING(IO_DEFAULT) ")."); diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 99ebf6ea3de648d881de4ef1f609d1f4d29f5ca0..5615f40139246a19de1be39df6f6ca60b7a705b9 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -630,6 +630,9 @@ static int usb_pcwd_probe(struct usb_interface *interface, return -ENODEV; } + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* check out the endpoint: it has to be Interrupt & IN */ endpoint = &iface_desc->endpoint[0].desc; diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c index f709962018ac260107dd2c419c1fbcdfafd4599b..362fd229786df6cea1600108a9060b89b800f475 100644 --- a/drivers/watchdog/sama5d4_wdt.c +++ b/drivers/watchdog/sama5d4_wdt.c @@ -6,6 +6,7 @@ * Licensed under GPLv2. */ +#include #include #include #include @@ -29,6 +30,7 @@ struct sama5d4_wdt { struct watchdog_device wdd; void __iomem *reg_base; u32 mr; + unsigned long last_ping; }; static int wdt_timeout = WDT_DEFAULT_TIMEOUT; @@ -44,11 +46,34 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +#define wdt_enabled (!(wdt->mr & AT91_WDT_WDDIS)) + #define wdt_read(wdt, field) \ readl_relaxed((wdt)->reg_base + (field)) -#define wdt_write(wtd, field, val) \ - writel_relaxed((val), (wdt)->reg_base + (field)) +/* 4 slow clock periods is 4/32768 = 122.07µs*/ +#define WDT_DELAY usecs_to_jiffies(123) + +static void wdt_write(struct sama5d4_wdt *wdt, u32 field, u32 val) +{ + /* + * WDT_CR and WDT_MR must not be modified within three slow clock + * periods following a restart of the watchdog performed by a write + * access in WDT_CR. + */ + while (time_before(jiffies, wdt->last_ping + WDT_DELAY)) + usleep_range(30, 125); + writel_relaxed(val, wdt->reg_base + field); + wdt->last_ping = jiffies; +} + +static void wdt_write_nosleep(struct sama5d4_wdt *wdt, u32 field, u32 val) +{ + if (time_before(jiffies, wdt->last_ping + WDT_DELAY)) + udelay(123); + writel_relaxed(val, wdt->reg_base + field); + wdt->last_ping = jiffies; +} static int sama5d4_wdt_start(struct watchdog_device *wdd) { @@ -89,7 +114,16 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd, wdt->mr &= ~AT91_WDT_WDD; wdt->mr |= AT91_WDT_SET_WDV(value); wdt->mr |= AT91_WDT_SET_WDD(value); - wdt_write(wdt, AT91_WDT_MR, wdt->mr); + + /* + * WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When + * setting the WDDIS bit, and while it is set, the fields WDV and WDD + * must not be modified. + * If the watchdog is enabled, then the timeout can be updated. Else, + * wait that the user enables it. + */ + if (wdt_enabled) + wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS); wdd->timeout = timeout; @@ -145,23 +179,21 @@ static int of_sama5d4_wdt_init(struct device_node *np, struct sama5d4_wdt *wdt) static int sama5d4_wdt_init(struct sama5d4_wdt *wdt) { - struct watchdog_device *wdd = &wdt->wdd; - u32 value = WDT_SEC2TICKS(wdd->timeout); u32 reg; - /* - * Because the fields WDV and WDD must not be modified when the WDDIS - * bit is set, so clear the WDDIS bit before writing the WDT_MR. + * When booting and resuming, the bootloader may have changed the + * watchdog configuration. + * If the watchdog is already running, we can safely update it. + * Else, we have to disable it properly. */ - reg = wdt_read(wdt, AT91_WDT_MR); - reg &= ~AT91_WDT_WDDIS; - wdt_write(wdt, AT91_WDT_MR, reg); - - wdt->mr |= AT91_WDT_SET_WDD(value); - wdt->mr |= AT91_WDT_SET_WDV(value); - - wdt_write(wdt, AT91_WDT_MR, wdt->mr); - + if (wdt_enabled) { + wdt_write_nosleep(wdt, AT91_WDT_MR, wdt->mr); + } else { + reg = wdt_read(wdt, AT91_WDT_MR); + if (!(reg & AT91_WDT_WDDIS)) + wdt_write_nosleep(wdt, AT91_WDT_MR, + reg | AT91_WDT_WDDIS); + } return 0; } @@ -172,6 +204,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) struct resource *res; void __iomem *regs; u32 irq = 0; + u32 timeout; int ret; wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); @@ -184,6 +217,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) wdd->ops = &sama5d4_wdt_ops; wdd->min_timeout = MIN_WDT_TIMEOUT; wdd->max_timeout = MAX_WDT_TIMEOUT; + wdt->last_ping = jiffies; watchdog_set_drvdata(wdd, wdt); @@ -221,6 +255,11 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) return ret; } + timeout = WDT_SEC2TICKS(wdd->timeout); + + wdt->mr |= AT91_WDT_SET_WDD(timeout); + wdt->mr |= AT91_WDT_SET_WDV(timeout); + ret = sama5d4_wdt_init(wdt); if (ret) return ret; @@ -263,9 +302,7 @@ static int sama5d4_wdt_resume(struct device *dev) { struct sama5d4_wdt *wdt = dev_get_drvdata(dev); - wdt_write(wdt, AT91_WDT_MR, wdt->mr & ~AT91_WDT_WDDIS); - if (wdt->mr & AT91_WDT_WDDIS) - wdt_write(wdt, AT91_WDT_MR, wdt->mr); + sama5d4_wdt_init(wdt); return 0; } diff --git a/drivers/watchdog/sc1200wdt.c b/drivers/watchdog/sc1200wdt.c index 131193a7acdfd0bb07660095e0ff57c500eed6cb..b34d3d5ba632398e357261bd5e666b9e9efc2157 100644 --- a/drivers/watchdog/sc1200wdt.c +++ b/drivers/watchdog/sc1200wdt.c @@ -88,7 +88,7 @@ MODULE_PARM_DESC(isapnp, "When set to 0 driver ISA PnP support will be disabled"); #endif -module_param(io, int, 0); +module_param_hw(io, int, ioport, 0); MODULE_PARM_DESC(io, "io port"); module_param(timeout, int, 0); MODULE_PARM_DESC(timeout, "range is 0-255 minutes, default is 1"); diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c index e0206b5b7d8955526f9598a6cc012fcdfd090873..e481fbbc4ae706b601dd64e21adcb58a5483c1f0 100644 --- a/drivers/watchdog/wdt.c +++ b/drivers/watchdog/wdt.c @@ -78,9 +78,9 @@ static int irq = 11; static DEFINE_SPINLOCK(wdt_lock); -module_param(io, int, 0); +module_param_hw(io, int, ioport, 0); MODULE_PARM_DESC(io, "WDT io port (default=0x240)"); -module_param(irq, int, 0); +module_param_hw(irq, int, irq, 0); MODULE_PARM_DESC(irq, "WDT irq (default=11)"); /* Support for the Fan Tachometer on the WDT501-P */ diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c index 48b2c058b00910ffee5a313812b5cb4d0ee135c5..bc7addc2dc06d0f9ca4bfa09abdbedfb4252090f 100644 --- a/drivers/watchdog/wdt_pci.c +++ b/drivers/watchdog/wdt_pci.c @@ -332,7 +332,7 @@ static irqreturn_t wdtpci_interrupt(int irq, void *dev_id) pr_crit("Would Reboot\n"); #else pr_crit("Initiating system reboot\n"); - emergency_restart(NULL); + emergency_restart(); #endif #else pr_crit("Reset in 5ms\n"); diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c index e290d5a13a6d5fa621f650ef03f3b8d7f00f7bb5..c98252733c3069dc5528691cecdee279c8f35c5a 100644 --- a/drivers/watchdog/zx2967_wdt.c +++ b/drivers/watchdog/zx2967_wdt.c @@ -211,10 +211,8 @@ static int zx2967_wdt_probe(struct platform_device *pdev) base = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt->reg_base = devm_ioremap_resource(dev, base); - if (IS_ERR(wdt->reg_base)) { - dev_err(dev, "ioremap failed\n"); + if (IS_ERR(wdt->reg_base)) return PTR_ERR(wdt->reg_base); - } zx2967_wdt_reset_sysctrl(dev); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index a6d4378eb8d9fc8aeea4fe1961f5c0f030ce2060..50dcb68d8070756ef8e4a67af8200c9d5e4c8c20 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -709,6 +709,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) } EXPORT_SYMBOL(free_xenballooned_pages); +#ifdef CONFIG_XEN_PV static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { @@ -732,19 +733,22 @@ static void __init balloon_add_region(unsigned long start_pfn, balloon_stats.total_pages += extra_pfn_end - start_pfn; } +#endif static int __init balloon_init(void) { - int i; - if (!xen_domain()) return -ENODEV; pr_info("Initialising balloon driver\n"); +#ifdef CONFIG_XEN_PV balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) : get_num_physpages(); +#else + balloon_stats.current_pages = get_num_physpages(); +#endif balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -761,14 +765,20 @@ static int __init balloon_init(void) register_sysctl_table(xen_root); #endif - /* - * Initialize the balloon with pages from the extra memory - * regions (see arch/x86/xen/setup.c). - */ - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) - if (xen_extra_mem[i].n_pfns) - balloon_add_region(xen_extra_mem[i].start_pfn, - xen_extra_mem[i].n_pfns); +#ifdef CONFIG_XEN_PV + { + int i; + + /* + * Initialize the balloon with pages from the extra memory + * regions (see arch/x86/xen/setup.c). + */ + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) + if (xen_extra_mem[i].n_pfns) + balloon_add_region(xen_extra_mem[i].start_pfn, + xen_extra_mem[i].n_pfns); + } +#endif return 0; } diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index 22f71ffd340677cd8c0dae3c06614041e373c1b9..9243a9051078229172daea01e0d621ac089ac050 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -263,3 +264,20 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); + +void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + switch (reset_type) { + case EFI_RESET_COLD: + case EFI_RESET_WARM: + xen_reboot(SHUTDOWN_reboot); + break; + case EFI_RESET_SHUTDOWN: + xen_reboot(SHUTDOWN_poweroff); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL_GPL(xen_efi_reset_system); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6a53577772c92127aa25dac3cd7bc7fe18fe09d9..b52852f38cff9d07e3326e8659113d9fdb2bba39 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1312,6 +1312,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (!VALID_EVTCHN(evtchn)) return -1; + if (!xen_support_evtchn_rebind()) + return -1; + /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; bind_vcpu.vcpu = xen_vcpu_nr(tcpu); @@ -1646,14 +1649,20 @@ void xen_callback_vector(void) int rc; uint64_t callback_via; - callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); - rc = xen_set_callback_via(callback_via); - BUG_ON(rc); - pr_info("Xen HVM callback vector for event delivery is enabled\n"); - /* in the restore case the vector has already been allocated */ - if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, - xen_hvm_callback_vector); + if (xen_have_vector_callback) { + callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); + rc = xen_set_callback_via(callback_via); + if (rc) { + pr_err("Request for Xen HVM callback vector failed\n"); + xen_have_vector_callback = 0; + return; + } + pr_info("Xen HVM callback vector for event delivery is enabled\n"); + /* in the restore case the vector has already been allocated */ + if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, + xen_hvm_callback_vector); + } } #else void xen_callback_vector(void) {} diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 6890897a6f30edab0c0d87926942463f9b481aa9..10f1ef5826595ae53e927243bcdfa5f66c4121c6 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -87,18 +87,6 @@ struct user_evtchn { bool enabled; }; -static evtchn_port_t *evtchn_alloc_ring(unsigned int size) -{ - evtchn_port_t *ring; - size_t s = size * sizeof(*ring); - - ring = kmalloc(s, GFP_KERNEL); - if (!ring) - ring = vmalloc(s); - - return ring; -} - static void evtchn_free_ring(evtchn_port_t *ring) { kvfree(ring); @@ -334,7 +322,7 @@ static int evtchn_resize_ring(struct per_user_data *u) else new_size = 2 * u->ring_size; - new_ring = evtchn_alloc_ring(new_size); + new_ring = kvmalloc(new_size * sizeof(*new_ring), GFP_KERNEL); if (!new_ring) return -ENOMEM; diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 2a165cc8a43cd6768529ffe48126c172e5b9f7df..1275df83070f2186388cdf09aab3bbf1facf1326 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -90,8 +90,10 @@ static int xen_allocate_irq(struct pci_dev *pdev) static int platform_pci_resume(struct pci_dev *pdev) { int err; - if (!xen_pv_domain()) + + if (xen_have_vector_callback) return 0; + err = xen_set_callback_via(callback_via); if (err) { dev_err(&pdev->dev, "platform_pci_resume failure!\n"); @@ -137,15 +139,7 @@ static int platform_pci_probe(struct pci_dev *pdev, platform_mmio = mmio_addr; platform_mmiolen = mmio_len; - - /* - * Xen HVM guests always use the vector callback mechanism. - * L1 Dom0 in a nested Xen environment is a PV guest inside in an - * HVM environment. It needs the platform-pci driver to get - * notifications from L0 Xen, but it cannot use the vector callback - * as it is not exported by L1 Xen. - */ - if (xen_pv_domain()) { + if (!xen_have_vector_callback) { ret = xen_allocate_irq(pdev); if (ret) { dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 7a92a5e1d40c6f17227936ee2b6925286deab511..feca75b07fddce01e6e121542d3e0b74d321f85f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -362,8 +362,8 @@ static int mmap_batch_fn(void *data, int nr, void *state) st->global_error = 1; } } - st->va += PAGE_SIZE * nr; - st->index += nr; + st->va += XEN_PAGE_SIZE * nr; + st->index += nr / XEN_PFN_PER_PAGE; return 0; } diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index e8cef1ad0fe31e0139903399d70730c7eafdc399..8dab0d3dc1726b0f8b19e06b26208969fc33a0ea 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -693,8 +693,8 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long attrs) { #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) - if (__generic_dma_ops(dev)->mmap) - return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr, + if (xen_get_dma_ops(dev)->mmap) + return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); #endif return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); @@ -711,7 +711,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, unsigned long attrs) { #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) - if (__generic_dma_ops(dev)->get_sgtable) { + if (xen_get_dma_ops(dev)->get_sgtable) { #if 0 /* * This check verifies that the page belongs to the current domain and @@ -721,7 +721,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); BUG_ON (!page_is_ram(bfn)); #endif - return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, + return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, handle, size, attrs); } #endif diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 328c3987b112dcf84fe40005d936bfbfed601f91..967f069385d0cf1fa9933cdeb0f8804dc0324e06 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -44,14 +44,14 @@ static const struct file_operations capabilities_file_ops = { static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr xenfs_files[] = { + static const struct tree_descr xenfs_files[] = { [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, {""}, }; - static struct tree_descr xenfs_init_files[] = { + static const struct tree_descr xenfs_init_files[] = { [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c index eacae1434b73fd7472e2d81b5b86ebc214f3e6af..fa23b7366b98ccbea916e05ad1d810e4c5ad0a50 100644 --- a/drivers/zorro/zorro-driver.c +++ b/drivers/zorro/zorro-driver.c @@ -14,6 +14,8 @@ #include #include +#include "zorro.h" + /** * zorro_match_device - Tell if a Zorro device structure has a matching @@ -161,12 +163,13 @@ static int zorro_uevent(struct device *dev, struct kobj_uevent_env *env) } struct bus_type zorro_bus_type = { - .name = "zorro", - .dev_name = "zorro", - .match = zorro_bus_match, - .uevent = zorro_uevent, - .probe = zorro_device_probe, - .remove = zorro_device_remove, + .name = "zorro", + .dev_name = "zorro", + .dev_groups = zorro_device_attribute_groups, + .match = zorro_bus_match, + .uevent = zorro_uevent, + .probe = zorro_device_probe, + .remove = zorro_device_remove, }; EXPORT_SYMBOL(zorro_bus_type); diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c index 9282dbf5abdba6b01e059a45e7fc9895002dbd2d..3d34dba9bb2dd488ffe509792940e51336c1852e 100644 --- a/drivers/zorro/zorro-sysfs.c +++ b/drivers/zorro/zorro-sysfs.c @@ -23,33 +23,33 @@ /* show configuration fields */ #define zorro_config_attr(name, field, format_string) \ -static ssize_t \ -show_##name(struct device *dev, struct device_attribute *attr, char *buf) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ struct zorro_dev *z; \ \ z = to_zorro_dev(dev); \ return sprintf(buf, format_string, z->field); \ } \ -static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL); +static DEVICE_ATTR_RO(name); zorro_config_attr(id, id, "0x%08x\n"); zorro_config_attr(type, rom.er_Type, "0x%02x\n"); zorro_config_attr(slotaddr, slotaddr, "0x%04x\n"); zorro_config_attr(slotsize, slotsize, "0x%04x\n"); -static ssize_t -show_serial(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct zorro_dev *z; z = to_zorro_dev(dev); return sprintf(buf, "0x%08x\n", be32_to_cpu(z->rom.er_SerialNumber)); } +static DEVICE_ATTR_RO(serial); -static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); - -static ssize_t zorro_show_resource(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t resource_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct zorro_dev *z = to_zorro_dev(dev); @@ -58,8 +58,27 @@ static ssize_t zorro_show_resource(struct device *dev, struct device_attribute * (unsigned long)zorro_resource_end(z), zorro_resource_flags(z)); } +static DEVICE_ATTR_RO(resource); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct zorro_dev *z = to_zorro_dev(dev); -static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL); + return sprintf(buf, ZORRO_DEVICE_MODALIAS_FMT "\n", z->id); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *zorro_device_attrs[] = { + &dev_attr_id.attr, + &dev_attr_type.attr, + &dev_attr_serial.attr, + &dev_attr_slotaddr.attr, + &dev_attr_slotsize.attr, + &dev_attr_resource.attr, + &dev_attr_modalias.attr, + NULL +}; static ssize_t zorro_read_config(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -88,32 +107,17 @@ static struct bin_attribute zorro_config_attr = { .read = zorro_read_config, }; -static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct zorro_dev *z = to_zorro_dev(dev); - - return sprintf(buf, ZORRO_DEVICE_MODALIAS_FMT "\n", z->id); -} - -static DEVICE_ATTR(modalias, S_IRUGO, modalias_show, NULL); +static struct bin_attribute *zorro_device_bin_attrs[] = { + &zorro_config_attr, + NULL +}; -int zorro_create_sysfs_dev_files(struct zorro_dev *z) -{ - struct device *dev = &z->dev; - int error; - - /* current configuration's attributes */ - if ((error = device_create_file(dev, &dev_attr_id)) || - (error = device_create_file(dev, &dev_attr_type)) || - (error = device_create_file(dev, &dev_attr_serial)) || - (error = device_create_file(dev, &dev_attr_slotaddr)) || - (error = device_create_file(dev, &dev_attr_slotsize)) || - (error = device_create_file(dev, &dev_attr_resource)) || - (error = device_create_file(dev, &dev_attr_modalias)) || - (error = sysfs_create_bin_file(&dev->kobj, &zorro_config_attr))) - return error; - - return 0; -} +static const struct attribute_group zorro_device_attr_group = { + .attrs = zorro_device_attrs, + .bin_attrs = zorro_device_bin_attrs, +}; +const struct attribute_group *zorro_device_attribute_groups[] = { + &zorro_device_attr_group, + NULL +}; diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c index d295d9878dff7594980481843ee9688442cee1f4..cc1b1ac57d61e8b75ffde0ae2ea9c8f3a66f04be 100644 --- a/drivers/zorro/zorro.c +++ b/drivers/zorro/zorro.c @@ -197,9 +197,6 @@ static int __init amiga_zorro_probe(struct platform_device *pdev) put_device(&z->dev); continue; } - error = zorro_create_sysfs_dev_files(z); - if (error) - dev_err(&z->dev, "Error creating sysfs files\n"); } /* Mark all available Zorro II memory */ diff --git a/drivers/zorro/zorro.h b/drivers/zorro/zorro.h index 34119fb4e5601603edbd139563f539b3548963c9..4f805c01cfbc017a01f012263f031a8411ac2486 100644 --- a/drivers/zorro/zorro.h +++ b/drivers/zorro/zorro.h @@ -5,5 +5,4 @@ extern void zorro_name_device(struct zorro_dev *z); static inline void zorro_name_device(struct zorro_dev *dev) { } #endif -extern int zorro_create_sysfs_dev_files(struct zorro_dev *z); - +extern const struct attribute_group *zorro_device_attribute_groups[]; diff --git a/firmware/Makefile b/firmware/Makefile index e297e1b52636dadb9bf3f3acb2102c079e73e0e6..fa3e81c2a97b269f2065a45b8045d4944427c1d1 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -176,7 +176,8 @@ quiet_cmd_fwbin = MK_FW $@ wordsize_deps := $(wildcard include/config/64bit.h include/config/32bit.h \ include/config/ppc32.h include/config/ppc64.h \ include/config/superh32.h include/config/superh64.h \ - include/config/x86_32.h include/config/x86_64.h) + include/config/x86_32.h include/config/x86_64.h \ + firmware/Makefile) $(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps) $(call cmd,fwbin,$(patsubst %.gen.S,%,$@)) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index a89f3cfe3c7d7fba5341ee5adac4a3a1ecd8adec..c202930086edb6fd22e0645cdc5264071af28d1f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -333,10 +333,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, goto err_names; init_rwsem(&v9ses->rename_sem); - rc = bdi_setup_and_register(&v9ses->bdi, "9p"); - if (rc) - goto err_names; - v9ses->uid = INVALID_UID; v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; @@ -345,7 +341,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, if (IS_ERR(v9ses->clnt)) { rc = PTR_ERR(v9ses->clnt); p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n"); - goto err_bdi; + goto err_names; } v9ses->flags = V9FS_ACCESS_USER; @@ -415,8 +411,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, err_clnt: p9_client_destroy(v9ses->clnt); -err_bdi: - bdi_destroy(&v9ses->bdi); err_names: kfree(v9ses->uname); kfree(v9ses->aname); @@ -445,8 +439,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) kfree(v9ses->uname); kfree(v9ses->aname); - bdi_destroy(&v9ses->bdi); - spin_lock(&v9fs_sessionlist_lock); list_del(&v9ses->slist); spin_unlock(&v9fs_sessionlist_lock); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 443d12e020436f2e7af92333ae89d3f068dc614a..76eaf49abd3aea3b56644f3a025ccbc48b89bae8 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -114,7 +114,6 @@ struct v9fs_session_info { kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ - struct backing_dev_info bdi; struct rw_semaphore rename_sem; }; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index de3ed862919691344e5080c855403d4236512cd8..a0965fb587a5f7321b7f428b018d88307d471163 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -72,10 +72,12 @@ static int v9fs_set_super(struct super_block *s, void *data) * */ -static void +static int v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, int flags, void *data) { + int ret; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize_bits = fls(v9ses->maxdata - 1); sb->s_blocksize = 1 << sb->s_blocksize_bits; @@ -85,7 +87,11 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, sb->s_xattr = v9fs_xattr_handlers; } else sb->s_op = &v9fs_super_ops; - sb->s_bdi = &v9ses->bdi; + + ret = super_setup_bdi(sb); + if (ret) + return ret; + if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE; @@ -99,6 +105,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, #endif save_mount_options(sb, data); + return 0; } /** @@ -138,7 +145,9 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, retval = PTR_ERR(sb); goto clunk_fid; } - v9fs_fill_super(sb, v9ses, flags, data); + retval = v9fs_fill_super(sb, v9ses, flags, data); + if (retval) + goto release_sb; if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) sb->s_d_op = &v9fs_cached_dentry_operations; diff --git a/fs/Kconfig b/fs/Kconfig index 83eab52fb3f69a75aa06a9f2a31760a384508f41..b0e42b6a96b97e37b03b7b61e9def73e814f421e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -39,6 +39,7 @@ config FS_DAX depends on MMU depends on !(ARM || MIPS || SPARC) select FS_IOMAP + select DAX help Direct Access (DAX) can be used on memory-backed block devices. If the block device supports DAX and the filesystem supports DAX, diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 2f8bab390d1379731ce889ff2b1beec5ddf91a53..773749be8290cb782324d341865861a2e64d199a 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include "amigaffs.h" #include #include @@ -173,7 +173,7 @@ extern int affs_link(struct dentry *olddentry, struct inode *dir, struct dentry *dentry); extern int affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); -extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, +extern int affs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); diff --git a/include/linux/amigaffs.h b/fs/affs/amigaffs.h similarity index 100% rename from include/linux/amigaffs.h rename to fs/affs/amigaffs.h diff --git a/fs/affs/dir.c b/fs/affs/dir.c index f1e7294381c5aa48a386a3a7c0db7f4832c568ec..591ecd7f3063731d0eebe1435714e0be3949d455 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -35,7 +35,7 @@ const struct inode_operations affs_dir_inode_operations = { .symlink = affs_symlink, .mkdir = affs_mkdir, .rmdir = affs_rmdir, - .rename = affs_rename, + .rename = affs_rename2, .setattr = affs_notify_change, }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 0deec9cc2362c23fae1e25e88b12ff7769d823e2..196ee7f6fdc40e45235178b55abbc54461fbbb49 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -499,7 +499,7 @@ affs_getemptyblk_ino(struct inode *inode, int block) } static int -affs_do_readpage_ofs(struct page *page, unsigned to) +affs_do_readpage_ofs(struct page *page, unsigned to, int create) { struct inode *inode = page->mapping->host; struct super_block *sb = inode->i_sb; @@ -518,7 +518,7 @@ affs_do_readpage_ofs(struct page *page, unsigned to) boff = tmp % bsize; while (pos < to) { - bh = affs_bread_ino(inode, bidx, 0); + bh = affs_bread_ino(inode, bidx, create); if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - pos); @@ -620,7 +620,7 @@ affs_readpage_ofs(struct file *file, struct page *page) memset(page_address(page) + to, 0, PAGE_SIZE - to); } - err = affs_do_readpage_ofs(page, to); + err = affs_do_readpage_ofs(page, to, 0); if (!err) SetPageUptodate(page); unlock_page(page); @@ -657,7 +657,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping return 0; /* XXX: inefficient but safe in the face of short writes */ - err = affs_do_readpage_ofs(page, PAGE_SIZE); + err = affs_do_readpage_ofs(page, PAGE_SIZE, 1); if (err) { unlock_page(page); put_page(page); @@ -679,7 +679,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, int written; from = pos & (PAGE_SIZE - 1); - to = pos + len; + to = from + len; /* * XXX: not sure if this can handle short copies (len < copied), but * we don't have to, because the page should always be uptodate here, diff --git a/fs/affs/inode.c b/fs/affs/inode.c index abcc59899229c68165b9bcf0830885ecf707ecac..fd4ef3c40e4054ba9f23dec2198b650ef53e4cae 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -140,6 +140,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino) inode->i_fop = &affs_file_operations; break; case ST_SOFTLINK: + inode->i_size = strlen((char *)AFFS_HEAD(bh)->table); inode->i_mode |= S_IFLNK; inode_nohighmem(inode); inode->i_op = &affs_symlink_inode_operations; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 96dd1d09a2735676f51b94ff9390bbc114cf8367..46d3ace6761d7df89dcbbc598991a41561e16518 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -365,6 +365,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) symname++; } *p = 0; + inode->i_size = i + 1; mark_buffer_dirty_inode(bh, inode); affs_brelse(bh); mark_inode_dirty(inode); @@ -393,21 +394,14 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return affs_add_entry(dir, inode, dentry, ST_LINKFILE); } -int +static int affs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) + struct inode *new_dir, struct dentry *new_dentry) { struct super_block *sb = old_dir->i_sb; struct buffer_head *bh = NULL; int retval; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__, - old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry); - retval = affs_check_name(new_dentry->d_name.name, new_dentry->d_name.len, affs_nofilenametruncate(old_dentry)); @@ -447,6 +441,76 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } +static int +affs_xrename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + + struct super_block *sb = old_dir->i_sb; + struct buffer_head *bh_old = NULL; + struct buffer_head *bh_new = NULL; + int retval; + + bh_old = affs_bread(sb, d_inode(old_dentry)->i_ino); + if (!bh_old) + return -EIO; + + bh_new = affs_bread(sb, d_inode(new_dentry)->i_ino); + if (!bh_new) + return -EIO; + + /* Remove old header from its parent directory. */ + affs_lock_dir(old_dir); + retval = affs_remove_hash(old_dir, bh_old); + affs_unlock_dir(old_dir); + if (retval) + goto done; + + /* Remove new header from its parent directory. */ + affs_lock_dir(new_dir); + retval = affs_remove_hash(new_dir, bh_new); + affs_unlock_dir(new_dir); + if (retval) + goto done; + + /* Insert old into the new directory with the new name. */ + affs_copy_name(AFFS_TAIL(sb, bh_old)->name, new_dentry); + affs_fix_checksum(sb, bh_old); + affs_lock_dir(new_dir); + retval = affs_insert_hash(new_dir, bh_old); + affs_unlock_dir(new_dir); + + /* Insert new into the old directory with the old name. */ + affs_copy_name(AFFS_TAIL(sb, bh_new)->name, old_dentry); + affs_fix_checksum(sb, bh_new); + affs_lock_dir(old_dir); + retval = affs_insert_hash(old_dir, bh_new); + affs_unlock_dir(old_dir); +done: + mark_buffer_dirty_inode(bh_old, new_dir); + mark_buffer_dirty_inode(bh_new, old_dir); + affs_brelse(bh_old); + affs_brelse(bh_new); + return retval; +} + +int affs_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + pr_debug("%s(old=%lu,\"%pd\" to new=%lu,\"%pd\")\n", __func__, + old_dir->i_ino, old_dentry, new_dir->i_ino, new_dentry); + + if (flags & RENAME_EXCHANGE) + return affs_xrename(old_dir, old_dentry, new_dir, new_dentry); + + return affs_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static struct dentry *affs_get_parent(struct dentry *child) { struct inode *parent; @@ -477,11 +541,6 @@ static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino, if (IS_ERR(inode)) return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - return inode; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a6901360fb81d435bf47a85b781a89a1056fd900..393672997cc23d4e5688dc77421ae81b0df95b48 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -318,7 +318,6 @@ struct afs_volume { unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */ struct afs_server *servers[8]; /* servers on which volume resides (ordered) */ struct rw_semaphore server_sem; /* lock for accessing current server */ - struct backing_dev_info bdi; }; /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 8f76b13d55494bddec9e81203c0734a0f6d811d7..d5990eb160bdf49a3a916c0195d04fbe521ef2b2 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -419,7 +419,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, call->state = AFS_CALL_COMPLETE; if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, - -ret, "KSD"); + ret, "KSD"); } else { abort_code = 0; offset = 0; @@ -478,12 +478,12 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code, -ret, "KNC"); + abort_code, ret, "KNC"); goto save_error; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code, -ret, "KIV"); + abort_code, ret, "KIV"); goto save_error; case -ENODATA: case -EBADMSG: @@ -493,7 +493,7 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code, EBADMSG, "KUM"); + abort_code, -EBADMSG, "KUM"); goto save_error; } } @@ -754,7 +754,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT, ENOMEM, "KOO"); + RX_USER_ABORT, -ENOMEM, "KOO"); default: _leave(" [error]"); return; @@ -792,7 +792,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT, ENOMEM, "KOO"); + RX_USER_ABORT, -ENOMEM, "KOO"); } _leave(" [error]"); } diff --git a/fs/afs/super.c b/fs/afs/super.c index fbdb022b75a27be11b5699203f8c04e32c499cf6..c79633e5cfd80ce6754650dfb1a26890cf035205 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -319,7 +319,10 @@ static int afs_fill_super(struct super_block *sb, sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; - sb->s_bdi = &as->volume->bdi; + ret = super_setup_bdi(sb); + if (ret) + return ret; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id)); /* allocate the root inode and dentry */ diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 546f9d01710b5b66706331968edc557306ed848e..db73d6dad02b5f8f549cca6ccacdabaa9d458f63 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -106,11 +106,6 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) volume->cell = params->cell; volume->vid = vlocation->vldb.vid[params->type]; - volume->bdi.ra_pages = VM_MAX_READAHEAD*1024/PAGE_SIZE; - ret = bdi_setup_and_register(&volume->bdi, "afs"); - if (ret) - goto error_bdi; - init_rwsem(&volume->server_sem); /* look up all the applicable server records */ @@ -156,8 +151,6 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) return ERR_PTR(ret); error_discard: - bdi_destroy(&volume->bdi); -error_bdi: up_write(¶ms->cell->vl_sem); for (loop = volume->nservers - 1; loop >= 0; loop--) @@ -207,7 +200,6 @@ void afs_put_volume(struct afs_volume *volume) for (loop = volume->nservers - 1; loop >= 0; loop--) afs_put_server(volume->servers[loop]); - bdi_destroy(&volume->bdi); kfree(volume); _leave(" [destroyed]"); diff --git a/fs/autofs4/Kconfig b/fs/autofs4/Kconfig index 1204d6384d39b9c574d17f863a26db574da9a02c..44727bf18297dfb9434b25bea2af72bfcbb8c2cc 100644 --- a/fs/autofs4/Kconfig +++ b/fs/autofs4/Kconfig @@ -7,7 +7,7 @@ config AUTOFS4_FS automounter (amd), which is a pure user space daemon. To use the automounter you need the user-space tools from - ; you also + ; you also want to answer Y to "NFS file system support", below. To compile this support as a module, choose M here: the module will be diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index c500e954debba1b60a515697276d79c7bd145cdc..63e7c4760bfb468852fb457fd8c2116492bb2a63 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -58,6 +58,7 @@ static struct dentry *befs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); static struct dentry *befs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); +static struct dentry *befs_get_parent(struct dentry *child); static const struct super_operations befs_sops = { .alloc_inode = befs_alloc_inode, /* allocate a new inode */ @@ -93,6 +94,7 @@ static const struct address_space_operations befs_symlink_aops = { static const struct export_operations befs_export_operations = { .fh_to_dentry = befs_fh_to_dentry, .fh_to_parent = befs_fh_to_parent, + .get_parent = befs_get_parent, }; /* @@ -667,6 +669,19 @@ static struct dentry *befs_fh_to_parent(struct super_block *sb, befs_nfs_get_inode); } +static struct dentry *befs_get_parent(struct dentry *child) +{ + struct inode *parent; + struct befs_inode_info *befs_ino = BEFS_I(d_inode(child)); + + parent = befs_iget(child->d_sb, + (unsigned long)befs_ino->i_parent.start); + if (IS_ERR(parent)) + return ERR_CAST(parent); + + return d_obtain_alias(parent); +} + enum { Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, }; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index f2deec0a62f0c613349d6f7d94bd6077d2cceb96..25e312cb60716caef70a66f12eb391f8aa234005 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -1,7 +1,7 @@ /* * fs/bfs/inode.c * BFS superblock and inode operations. - * Copyright (C) 1999-2006 Tigran Aivazian + * Copyright (C) 1999-2006 Tigran Aivazian * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. * * Made endianness-clean by Andrew Stribblehill , 2005. @@ -19,7 +19,7 @@ #include #include "bfs.h" -MODULE_AUTHOR("Tigran Aivazian "); +MODULE_AUTHOR("Tigran Aivazian "); MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux"); MODULE_LICENSE("GPL"); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index bee1a36bc2ec4e0be1343c11bb07da13bbcb384f..f4718098ac31885b64ebaba7c03af5fe75515092 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -818,7 +818,7 @@ static const struct super_operations s_ops = { static int bm_fill_super(struct super_block *sb, void *data, int silent) { int err; - static struct tree_descr bm_files[] = { + static const struct tree_descr bm_files[] = { [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO}, [3] = {"register", &bm_register_operations, S_IWUSR}, /* last one */ {""} diff --git a/fs/block_dev.c b/fs/block_dev.c index 2eca00ec43706bb78955cd221a24593942625d57..519599dddd3692ee373a9eb00d95d5757556ad42 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -103,12 +104,11 @@ void invalidate_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; - if (mapping->nrpages == 0) - return; - - invalidate_bh_lrus(); - lru_add_drain_all(); /* make sure all lru add caches are flushed */ - invalidate_mapping_pages(mapping, 0, -1); + if (mapping->nrpages) { + invalidate_bh_lrus(); + lru_add_drain_all(); /* make sure all lru add caches are flushed */ + invalidate_mapping_pages(mapping, 0, -1); + } /* 99% of the time, we don't need to flush the cleancache on the bdev. * But, for the strange corners, lets be cautious */ @@ -717,120 +717,6 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL_GPL(bdev_write_page); -/** - * bdev_direct_access() - Get the address for directly-accessibly memory - * @bdev: The device containing the memory - * @dax: control and output parameters for ->direct_access - * - * If a block device is made up of directly addressable memory, this function - * will tell the caller the PFN and the address of the memory. The address - * may be directly dereferenced within the kernel without the need to call - * ioremap(), kmap() or similar. The PFN is suitable for inserting into - * page tables. - * - * Return: negative errno if an error occurs, otherwise the number of bytes - * accessible at this address. - */ -long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) -{ - sector_t sector = dax->sector; - long avail, size = dax->size; - const struct block_device_operations *ops = bdev->bd_disk->fops; - - /* - * The device driver is allowed to sleep, in order to make the - * memory directly accessible. - */ - might_sleep(); - - if (size < 0) - return size; - if (!blk_queue_dax(bdev_get_queue(bdev)) || !ops->direct_access) - return -EOPNOTSUPP; - if ((sector + DIV_ROUND_UP(size, 512)) > - part_nr_sects_read(bdev->bd_part)) - return -ERANGE; - sector += get_start_sect(bdev); - if (sector % (PAGE_SIZE / 512)) - return -EINVAL; - avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn, size); - if (!avail) - return -ERANGE; - if (avail > 0 && avail & ~PAGE_MASK) - return -ENXIO; - return min(avail, size); -} -EXPORT_SYMBOL_GPL(bdev_direct_access); - -/** - * bdev_dax_supported() - Check if the device supports dax for filesystem - * @sb: The superblock of the device - * @blocksize: The block size of the device - * - * This is a library function for filesystems to check if the block device - * can be mounted with dax option. - * - * Return: negative errno if unsupported, 0 if supported. - */ -int bdev_dax_supported(struct super_block *sb, int blocksize) -{ - struct blk_dax_ctl dax = { - .sector = 0, - .size = PAGE_SIZE, - }; - int err; - - if (blocksize != PAGE_SIZE) { - vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax"); - return -EINVAL; - } - - err = bdev_direct_access(sb->s_bdev, &dax); - if (err < 0) { - switch (err) { - case -EOPNOTSUPP: - vfs_msg(sb, KERN_ERR, - "error: device does not support dax"); - break; - case -EINVAL: - vfs_msg(sb, KERN_ERR, - "error: unaligned partition for dax"); - break; - default: - vfs_msg(sb, KERN_ERR, - "error: dax access failed (%d)", err); - } - return err; - } - - return 0; -} -EXPORT_SYMBOL_GPL(bdev_dax_supported); - -/** - * bdev_dax_capable() - Return if the raw device is capable for dax - * @bdev: The device for raw block device access - */ -bool bdev_dax_capable(struct block_device *bdev) -{ - struct blk_dax_ctl dax = { - .size = PAGE_SIZE, - }; - - if (!IS_ENABLED(CONFIG_FS_DAX)) - return false; - - dax.sector = 0; - if (bdev_direct_access(bdev, &dax) < 0) - return false; - - dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512); - if (bdev_direct_access(bdev, &dax) < 0) - return false; - - return true; -} - /* * pseudo-fs */ @@ -885,6 +771,8 @@ static void bdev_evict_inode(struct inode *inode) spin_lock(&bdev_lock); list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + /* Detach inode from wb early as bdi_put() may free bdi->wb */ + inode_detach_wb(inode); if (bdev->bd_bdi != &noop_backing_dev_info) { bdi_put(bdev->bd_bdi); bdev->bd_bdi = &noop_backing_dev_info; @@ -1451,7 +1339,6 @@ int revalidate_disk(struct gendisk *disk) if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - blk_integrity_revalidate(disk); bdev = bdget_disk(disk, 0); if (!bdev) return ret; @@ -1556,8 +1443,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; - if (bdev->bd_bdi == &noop_backing_dev_info) - bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); if (!partno) { ret = -ENXIO; @@ -1622,6 +1507,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); } + + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { if (bdev->bd_contains == bdev) { ret = 0; @@ -1653,8 +1541,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; - bdi_put(bdev->bd_bdi); - bdev->bd_bdi = &noop_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; @@ -1876,12 +1762,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) kill_bdev(bdev); bdev_write_inode(bdev); - /* - * Detaching bdev inode from its wb in __destroy_inode() - * is too late: the queue which embeds its bdi (along with - * root wb) can be gone as soon as we put_disk() below. - */ - inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) @@ -2074,7 +1954,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, loff_t len) { struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct request_queue *q = bdev_get_queue(bdev); struct address_space *mapping; loff_t end = start + len - 1; loff_t isize; @@ -2110,18 +1989,13 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, - GFP_KERNEL, false); + GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: - /* Only punch if the device can do zeroing discard. */ - if (!blk_queue_discard(q) || !q->limits.discard_zeroes_data) - return -EOPNOTSUPP; - error = blkdev_issue_discard(bdev, start >> 9, len >> 9, - GFP_KERNEL, 0); + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, + GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); break; case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: - if (!blk_queue_discard(q)) - return -EOPNOTSUPP; error = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); break; diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7699e16784d313459181c746d0b8c30d468e23a7..24865da63d8fdfd2a979429a426b42fe8781e0aa 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -26,6 +26,11 @@ #include "delayed-ref.h" #include "locking.h" +enum merge_mode { + MERGE_IDENTICAL_KEYS = 1, + MERGE_IDENTICAL_PARENTS, +}; + /* Just an arbitrary number so we can be sure this happened */ #define BACKREF_FOUND_SHARED 6 @@ -533,7 +538,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, * slot==nritems. In that case, go to the next leaf before we continue. */ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - if (time_seq == (u64)-1) + if (time_seq == SEQ_LAST) ret = btrfs_next_leaf(root, path); else ret = btrfs_next_old_leaf(root, path, time_seq); @@ -577,7 +582,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie = NULL; } next: - if (time_seq == (u64)-1) + if (time_seq == SEQ_LAST) ret = btrfs_next_item(root, path); else ret = btrfs_next_old_item(root, path, time_seq); @@ -629,7 +634,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, if (path->search_commit_root) root_level = btrfs_header_level(root->commit_root); - else if (time_seq == (u64)-1) + else if (time_seq == SEQ_LAST) root_level = btrfs_header_level(root->node); else root_level = btrfs_old_root_level(root, time_seq); @@ -640,7 +645,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, } path->lowest_level = level; - if (time_seq == (u64)-1) + if (time_seq == SEQ_LAST) ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, 0, 0); else @@ -809,14 +814,12 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, /* * merge backrefs and adjust counts accordingly * - * mode = 1: merge identical keys, if key is set - * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. - * additionally, we could even add a key range for the blocks we - * looked into to merge even more (-> replace unresolved refs by those - * having a parent). - * mode = 2: merge identical parents + * FIXME: For MERGE_IDENTICAL_KEYS, if we add more keys in __add_prelim_ref + * then we can merge more here. Additionally, we could even add a key + * range for the blocks we looked into to merge even more (-> replace + * unresolved refs by those having a parent). */ -static void __merge_refs(struct list_head *head, int mode) +static void __merge_refs(struct list_head *head, enum merge_mode mode) { struct __prelim_ref *pos1; @@ -829,7 +832,7 @@ static void __merge_refs(struct list_head *head, int mode) if (!ref_for_same_block(ref1, ref2)) continue; - if (mode == 1) { + if (mode == MERGE_IDENTICAL_KEYS) { if (!ref1->parent && ref2->parent) swap(ref1, ref2); } else { @@ -1196,7 +1199,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, * * NOTE: This can return values > 0 * - * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave + * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave * much like trans == NULL case, the difference only lies in it will not * commit root. * The special case is for qgroup to search roots in commit_transaction(). @@ -1243,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, path->skip_locking = 1; } - if (time_seq == (u64)-1) + if (time_seq == SEQ_LAST) path->skip_locking = 1; /* @@ -1273,9 +1276,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (trans && likely(trans->type != __TRANS_DUMMY) && - time_seq != (u64)-1) { + time_seq != SEQ_LAST) { #else - if (trans && time_seq != (u64)-1) { + if (trans && time_seq != SEQ_LAST) { #endif /* * look if there are updates for this ref queued and lock the @@ -1286,7 +1289,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); @@ -1374,7 +1377,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, if (ret) goto out; - __merge_refs(&prefs, 1); + __merge_refs(&prefs, MERGE_IDENTICAL_KEYS); ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, extent_item_pos, total_refs, @@ -1382,7 +1385,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, if (ret) goto out; - __merge_refs(&prefs, 2); + __merge_refs(&prefs, MERGE_IDENTICAL_PARENTS); while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0c6baaba0651ce10ba5e394ad82a03a917ced4e6..b8622e4d1744de68180f96036ad5ddbc3c195ca8 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -124,6 +124,13 @@ struct btrfs_inode { */ u64 delalloc_bytes; + /* + * Total number of bytes pending delalloc that fall within a file + * range that is either a hole or beyond EOF (and no prealloc extent + * exists in the range). This is always <= delalloc_bytes. + */ + u64 new_delalloc_bytes; + /* * total number of bytes pending defrag, used by stat to check whether * it needs COW. diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index c7721a6aa3bb5346cbd9103b4ee3e9f4528a8c4e..10e6b282d09d6e8d31b4ac8740d0119ccad3e786 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -44,7 +44,7 @@ struct compressed_bio { /* number of bios pending for this compressed extent */ - atomic_t pending_bios; + refcount_t pending_bios; /* the pages with the compressed data on them */ struct page **compressed_pages; @@ -161,7 +161,7 @@ static void end_compressed_bio_read(struct bio *bio) /* if there are more bios still pending for this compressed * extent, just exit */ - if (!atomic_dec_and_test(&cb->pending_bios)) + if (!refcount_dec_and_test(&cb->pending_bios)) goto out; inode = cb->inode; @@ -274,7 +274,7 @@ static void end_compressed_bio_write(struct bio *bio) /* if there are more bios still pending for this compressed * extent, just exit */ - if (!atomic_dec_and_test(&cb->pending_bios)) + if (!refcount_dec_and_test(&cb->pending_bios)) goto out; /* ok, we're the last bio for this extent, step one is to @@ -342,7 +342,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); if (!cb) return -ENOMEM; - atomic_set(&cb->pending_bios, 0); + refcount_set(&cb->pending_bios, 0); cb->errors = 0; cb->inode = inode; cb->start = start; @@ -363,7 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; - atomic_inc(&cb->pending_bios); + refcount_set(&cb->pending_bios, 1); /* create and submit bios for the compressed pages */ bytes_left = compressed_len; @@ -388,7 +388,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, * we inc the count. Otherwise, the cb might get * freed before we're done setting it up */ - atomic_inc(&cb->pending_bios); + refcount_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -607,7 +607,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!cb) goto out; - atomic_set(&cb->pending_bios, 0); + refcount_set(&cb->pending_bios, 0); cb->errors = 0; cb->inode = inode; cb->mirror_num = mirror_num; @@ -656,7 +656,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_set_op_attrs (comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; - atomic_inc(&cb->pending_bios); + refcount_set(&cb->pending_bios, 1); for (pg_index = 0; pg_index < nr_pages; pg_index++) { page = cb->compressed_pages[pg_index]; @@ -685,7 +685,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, * we inc the count. Otherwise, the cb might get * freed before we're done setting it up */ - atomic_inc(&cb->pending_bios); + refcount_inc(&cb->pending_bios); if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_lookup_bio_sums(inode, comp_bio, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7dc8844037e03b7cfb738d81af6c22b33ee2b31d..a3a75f1de002295c425f5957de7e9aba269a7682 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -567,7 +567,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, static noinline int tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int dst_slot, int src_slot, - int nr_items, gfp_t flags) + int nr_items) { struct tree_mod_elem *tm = NULL; struct tree_mod_elem **tm_list = NULL; @@ -578,11 +578,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, if (!tree_mod_need_log(fs_info, eb)) return 0; - tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags); + tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS); if (!tm_list) return -ENOMEM; - tm = kzalloc(sizeof(*tm), flags); + tm = kzalloc(sizeof(*tm), GFP_NOFS); if (!tm) { ret = -ENOMEM; goto free_tms; @@ -596,7 +596,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags); + MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); if (!tm_list[i]) { ret = -ENOMEM; goto free_tms; @@ -663,7 +663,7 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, static noinline int tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, struct extent_buffer *old_root, - struct extent_buffer *new_root, gfp_t flags, + struct extent_buffer *new_root, int log_removal) { struct tree_mod_elem *tm = NULL; @@ -678,14 +678,14 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal && btrfs_header_level(old_root) > 0) { nritems = btrfs_header_nritems(old_root); tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), - flags); + GFP_NOFS); if (!tm_list) { ret = -ENOMEM; goto free_tms; } for (i = 0; i < nritems; i++) { tm_list[i] = alloc_tree_mod_elem(old_root, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags); + MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); if (!tm_list[i]) { ret = -ENOMEM; goto free_tms; @@ -693,7 +693,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, } } - tm = kzalloc(sizeof(*tm), flags); + tm = kzalloc(sizeof(*tm), GFP_NOFS); if (!tm) { ret = -ENOMEM; goto free_tms; @@ -873,7 +873,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, { int ret; ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, - nr_items, GFP_NOFS); + nr_items); BUG_ON(ret < 0); } @@ -943,7 +943,7 @@ tree_mod_log_set_root_pointer(struct btrfs_root *root, { int ret; ret = tree_mod_log_insert_root(root->fs_info, root->node, - new_root_node, GFP_NOFS, log_removal); + new_root_node, log_removal); BUG_ON(ret < 0); } @@ -5392,13 +5392,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN); + tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); if (!tmp_buf) { - tmp_buf = vmalloc(fs_info->nodesize); - if (!tmp_buf) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } left_path->search_commit_root = 1; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c4115901d9064f68217c3be825f233d7d6872cf6..4f8f75d9e83916c059902196ecab2aa6bbdf4b4d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -39,6 +39,7 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -518,7 +519,7 @@ struct btrfs_caching_control { struct btrfs_work work; struct btrfs_block_group_cache *block_group; u64 progress; - atomic_t count; + refcount_t count; }; /* Once caching_thread() finds this much free space, it will wake up waiters. */ @@ -538,6 +539,14 @@ struct btrfs_io_ctl { unsigned check_crcs:1; }; +/* + * Tree to record all locked full stripes of a RAID5/6 block group + */ +struct btrfs_full_stripe_locks_tree { + struct rb_root root; + struct mutex lock; +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; @@ -648,6 +657,9 @@ struct btrfs_block_group_cache { * Protected by free_space_lock. */ int needs_free_space; + + /* Record locked full stripes for RAID5/6 block group */ + struct btrfs_full_stripe_locks_tree full_stripe_locks_root; }; /* delayed seq elem */ @@ -658,6 +670,8 @@ struct seq_list { #define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 } +#define SEQ_LAST ((u64)-1) + enum btrfs_orphan_cleanup_state { ORPHAN_CLEANUP_STARTED = 1, ORPHAN_CLEANUP_DONE = 2, @@ -702,6 +716,11 @@ struct btrfs_delayed_root; #define BTRFS_FS_BTREE_ERR 11 #define BTRFS_FS_LOG1_ERR 12 #define BTRFS_FS_LOG2_ERR 13 +/* + * Indicate that a whole-filesystem exclusive operation is running + * (device replace, resize, device add/delete, balance) + */ +#define BTRFS_FS_EXCL_OP 14 struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; @@ -810,7 +829,6 @@ struct btrfs_fs_info { struct btrfs_super_block *super_for_commit; struct super_block *sb; struct inode *btree_inode; - struct backing_dev_info bdi; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; @@ -1067,8 +1085,6 @@ struct btrfs_fs_info { /* device replace state */ struct btrfs_dev_replace dev_replace; - atomic_t mutually_exclusive_operation_running; - struct percpu_counter bio_counter; wait_queue_head_t replace_wait; @@ -1221,7 +1237,7 @@ struct btrfs_root { dev_t anon_dev; spinlock_t root_item_lock; - atomic_t refs; + refcount_t refs; struct mutex delalloc_mutex; spinlock_t delalloc_lock; @@ -2547,7 +2563,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes); static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, unsigned num_items) { - return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; } /* @@ -2557,7 +2573,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info, static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info, unsigned num_items) { - return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; + return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; } int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, @@ -3647,6 +3663,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, struct btrfs_device *dev); int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct btrfs_scrub_progress *progress); +static inline void btrfs_init_full_stripe_locks_tree( + struct btrfs_full_stripe_locks_tree *locks_root) +{ + locks_root->root = RB_ROOT; + mutex_init(&locks_root->lock); +} /* dev-replace.c */ void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); @@ -3671,8 +3693,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); -int btree_readahead_hook(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int err); +int btree_readahead_hook(struct extent_buffer *eb, int err); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 1aff676f0e5b5b6c63efd32eee44958b40968e2a..8ae409b5a61d7186f050780beced1bf72cd572ca 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -52,7 +52,7 @@ static inline void btrfs_init_delayed_node( { delayed_node->root = root; delayed_node->inode_id = inode_id; - atomic_set(&delayed_node->refs, 0); + refcount_set(&delayed_node->refs, 0); delayed_node->ins_root = RB_ROOT; delayed_node->del_root = RB_ROOT; mutex_init(&delayed_node->mutex); @@ -81,7 +81,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( node = READ_ONCE(btrfs_inode->delayed_node); if (node) { - atomic_inc(&node->refs); + refcount_inc(&node->refs); return node; } @@ -89,14 +89,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( node = radix_tree_lookup(&root->delayed_nodes_tree, ino); if (node) { if (btrfs_inode->delayed_node) { - atomic_inc(&node->refs); /* can be accessed */ + refcount_inc(&node->refs); /* can be accessed */ BUG_ON(btrfs_inode->delayed_node != node); spin_unlock(&root->inode_lock); return node; } btrfs_inode->delayed_node = node; /* can be accessed and cached in the inode */ - atomic_add(2, &node->refs); + refcount_add(2, &node->refs); spin_unlock(&root->inode_lock); return node; } @@ -125,7 +125,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( btrfs_init_delayed_node(node, root, ino); /* cached in the btrfs inode and can be accessed */ - atomic_add(2, &node->refs); + refcount_set(&node->refs, 2); ret = radix_tree_preload(GFP_NOFS); if (ret) { @@ -166,7 +166,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root, } else { list_add_tail(&node->n_list, &root->node_list); list_add_tail(&node->p_list, &root->prepare_list); - atomic_inc(&node->refs); /* inserted into list */ + refcount_inc(&node->refs); /* inserted into list */ root->nodes++; set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); } @@ -180,7 +180,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, spin_lock(&root->lock); if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { root->nodes--; - atomic_dec(&node->refs); /* not in the list */ + refcount_dec(&node->refs); /* not in the list */ list_del_init(&node->n_list); if (!list_empty(&node->p_list)) list_del_init(&node->p_list); @@ -201,7 +201,7 @@ static struct btrfs_delayed_node *btrfs_first_delayed_node( p = delayed_root->node_list.next; node = list_entry(p, struct btrfs_delayed_node, n_list); - atomic_inc(&node->refs); + refcount_inc(&node->refs); out: spin_unlock(&delayed_root->lock); @@ -228,7 +228,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node( p = node->n_list.next; next = list_entry(p, struct btrfs_delayed_node, n_list); - atomic_inc(&next->refs); + refcount_inc(&next->refs); out: spin_unlock(&delayed_root->lock); @@ -253,11 +253,11 @@ static void __btrfs_release_delayed_node( btrfs_dequeue_delayed_node(delayed_root, delayed_node); mutex_unlock(&delayed_node->mutex); - if (atomic_dec_and_test(&delayed_node->refs)) { + if (refcount_dec_and_test(&delayed_node->refs)) { bool free = false; struct btrfs_root *root = delayed_node->root; spin_lock(&root->inode_lock); - if (atomic_read(&delayed_node->refs) == 0) { + if (refcount_read(&delayed_node->refs) == 0) { radix_tree_delete(&root->delayed_nodes_tree, delayed_node->inode_id); free = true; @@ -286,7 +286,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( p = delayed_root->prepare_list.next; list_del_init(p); node = list_entry(p, struct btrfs_delayed_node, p_list); - atomic_inc(&node->refs); + refcount_inc(&node->refs); out: spin_unlock(&delayed_root->lock); @@ -308,7 +308,7 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) item->ins_or_del = 0; item->bytes_reserved = 0; item->delayed_node = NULL; - atomic_set(&item->refs, 1); + refcount_set(&item->refs, 1); } return item; } @@ -483,7 +483,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) { if (item) { __btrfs_remove_delayed_item(item); - if (atomic_dec_and_test(&item->refs)) + if (refcount_dec_and_test(&item->refs)) kfree(item); } } @@ -1600,14 +1600,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, mutex_lock(&delayed_node->mutex); item = __btrfs_first_delayed_insertion_item(delayed_node); while (item) { - atomic_inc(&item->refs); + refcount_inc(&item->refs); list_add_tail(&item->readdir_list, ins_list); item = __btrfs_next_delayed_item(item); } item = __btrfs_first_delayed_deletion_item(delayed_node); while (item) { - atomic_inc(&item->refs); + refcount_inc(&item->refs); list_add_tail(&item->readdir_list, del_list); item = __btrfs_next_delayed_item(item); } @@ -1621,7 +1621,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode, * insert/delete delayed items in this period. So we also needn't * requeue or dequeue this delayed node. */ - atomic_dec(&delayed_node->refs); + refcount_dec(&delayed_node->refs); return true; } @@ -1634,13 +1634,13 @@ void btrfs_readdir_put_delayed_items(struct inode *inode, list_for_each_entry_safe(curr, next, ins_list, readdir_list) { list_del(&curr->readdir_list); - if (atomic_dec_and_test(&curr->refs)) + if (refcount_dec_and_test(&curr->refs)) kfree(curr); } list_for_each_entry_safe(curr, next, del_list, readdir_list) { list_del(&curr->readdir_list); - if (atomic_dec_and_test(&curr->refs)) + if (refcount_dec_and_test(&curr->refs)) kfree(curr); } @@ -1667,7 +1667,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, list_del(&curr->readdir_list); ret = (curr->key.offset == index); - if (atomic_dec_and_test(&curr->refs)) + if (refcount_dec_and_test(&curr->refs)) kfree(curr); if (ret) @@ -1705,7 +1705,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, list_del(&curr->readdir_list); if (curr->key.offset < ctx->pos) { - if (atomic_dec_and_test(&curr->refs)) + if (refcount_dec_and_test(&curr->refs)) kfree(curr); continue; } @@ -1722,7 +1722,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, over = !dir_emit(ctx, name, name_len, location.objectid, d_type); - if (atomic_dec_and_test(&curr->refs)) + if (refcount_dec_and_test(&curr->refs)) kfree(curr); if (over) @@ -1963,7 +1963,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) inode_id = delayed_nodes[n - 1]->inode_id + 1; for (i = 0; i < n; i++) - atomic_inc(&delayed_nodes[i]->refs); + refcount_inc(&delayed_nodes[i]->refs); spin_unlock(&root->inode_lock); for (i = 0; i < n; i++) { diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 40327cc3b99a3bdcd517827652969299e1ce2a2b..c4189d4959343219eadf1db68d8aa5dd3f8d6ee4 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -26,7 +26,7 @@ #include #include #include - +#include #include "ctree.h" /* types of the delayed item */ @@ -67,7 +67,7 @@ struct btrfs_delayed_node { struct rb_root del_root; struct mutex mutex; struct btrfs_inode_item inode_item; - atomic_t refs; + refcount_t refs; u64 index_cnt; unsigned long flags; int count; @@ -80,7 +80,7 @@ struct btrfs_delayed_item { struct list_head readdir_list; /* used for readdir items */ u64 bytes_reserved; struct btrfs_delayed_node *delayed_node; - atomic_t refs; + refcount_t refs; int ins_or_del; u32 data_len; char data[0]; diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6eb80952efb3310ae55de9c2e234319abe14465a..be70d90dfee591953d53100b3c872d0efc5f324f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -164,7 +164,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, if (mutex_trylock(&head->mutex)) return 0; - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); mutex_lock(&head->mutex); @@ -590,7 +590,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; /* first set the basic ref node struct up */ - atomic_set(&ref->refs, 1); + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = count_mod; @@ -682,7 +682,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; /* first set the basic ref node struct up */ - atomic_set(&ref->refs, 1); + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = 1; @@ -739,7 +739,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, seq = atomic64_read(&fs_info->tree_mod_seq); /* first set the basic ref node struct up */ - atomic_set(&ref->refs, 1); + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = 1; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0e537f98f1a1c63c529c118baed1cac26efa194e..c0264ff01b53cfe9e2fa44a7cdd7e9352012f0ee 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -18,6 +18,8 @@ #ifndef __DELAYED_REF__ #define __DELAYED_REF__ +#include + /* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ @@ -53,7 +55,7 @@ struct btrfs_delayed_ref_node { u64 seq; /* ref count on this data structure */ - atomic_t refs; + refcount_t refs; /* * how many refs is this entry adding or deleting. For @@ -220,8 +222,8 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) { - WARN_ON(atomic_read(&ref->refs) == 0); - if (atomic_dec_and_test(&ref->refs)) { + WARN_ON(refcount_read(&ref->refs) == 0); + if (refcount_dec_and_test(&ref->refs)) { WARN_ON(ref->in_tree); switch (ref->type) { case BTRFS_TREE_BLOCK_REF_KEY: diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index e653921f05d93936581785553a8964124f5df1c0..5fe1ca8abc70577fe28ee6fd69899d11b618d479 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -546,8 +546,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&uuid_mutex); + btrfs_rm_dev_replace_blocked(fs_info); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); + btrfs_rm_dev_replace_unblocked(fs_info); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return scrub_ret; @@ -665,7 +667,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: srcdev = dev_replace->srcdev; - args->status.progress_1000 = div_u64(dev_replace->cursor_left, + args->status.progress_1000 = div64_u64(dev_replace->cursor_left, div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); break; } @@ -784,8 +786,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) } btrfs_dev_replace_unlock(dev_replace, 1); - WARN_ON(atomic_xchg( - &fs_info->mutually_exclusive_operation_running, 1)); + WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)); task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl"); return PTR_ERR_OR_ZERO(task); } @@ -814,7 +815,7 @@ static int btrfs_dev_replace_kthread(void *data) (unsigned int)progress); } btrfs_dev_replace_continue_on_mount(fs_info); - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); return 0; } diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 60a750678a82b335ab7fd0e9432288d013c52e64..c24d615e3d7f60694d25d6a5c34f2dff45121ca7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_fs_info *fs_info, if (btrfs_dir_name_len(leaf, dir_item) > namelen) { btrfs_crit(fs_info, "invalid dir item name len: %u", - (unsigned)btrfs_dir_data_len(leaf, dir_item)); + (unsigned)btrfs_dir_name_len(leaf, dir_item)); return 1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb1ee7b6f532b74409a6bb50fdb204cb2c8332a1..5f678dcb20e634d936c56f6bdcba0c86b624c21b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -762,7 +762,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(fs_info, eb, ret); + btree_readahead_hook(eb, ret); if (ret) { /* @@ -787,7 +787,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(eb->fs_info, eb, -EIO); + btree_readahead_hook(eb, -EIO); return -EIO; /* we fixed nothing */ } @@ -1340,7 +1340,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->log_writers, 0); atomic_set(&root->log_batch, 0); atomic_set(&root->orphan_inodes, 0); - atomic_set(&root->refs, 1); + refcount_set(&root->refs, 1); atomic_set(&root->will_be_snapshoted, 0); atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; @@ -1808,21 +1808,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) return ret; } -static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) -{ - int err; - - err = bdi_setup_and_register(bdi, "btrfs"); - if (err) - return err; - - bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; - bdi->congested_fn = btrfs_congested_fn; - bdi->congested_data = info; - bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; - return 0; -} - /* * called by the kthread helper functions to finally call the bio end_io * functions. This is where read checksum verification actually happens @@ -2601,16 +2586,10 @@ int open_ctree(struct super_block *sb, goto fail; } - ret = setup_bdi(fs_info, &fs_info->bdi); - if (ret) { - err = ret; - goto fail_srcu; - } - ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) { err = ret; - goto fail_bdi; + goto fail_srcu; } fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); @@ -2718,7 +2697,6 @@ int open_ctree(struct super_block *sb, sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); - sb->s_bdi = &fs_info->bdi; btrfs_init_btree_inode(fs_info); @@ -2915,9 +2893,12 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); - fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, - SZ_4M / PAGE_SIZE); + sb->s_bdi->congested_fn = btrfs_congested_fn; + sb->s_bdi->congested_data = fs_info; + sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); + sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); @@ -3285,8 +3266,6 @@ int open_ctree(struct super_block *sb, percpu_counter_destroy(&fs_info->delalloc_bytes); fail_dirty_metadata_bytes: percpu_counter_destroy(&fs_info->dirty_metadata_bytes); -fail_bdi: - bdi_destroy(&fs_info->bdi); fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: @@ -3488,10 +3467,12 @@ static int write_dev_supers(struct btrfs_device *device, * we fua the first super. The others we allow * to go down lazy. */ - if (i == 0) - ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); - else + if (i == 0) { + ret = btrfsic_submit_bh(REQ_OP_WRITE, + REQ_SYNC | REQ_FUA, bh); + } else { ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + } if (ret) errors++; } @@ -3518,10 +3499,11 @@ static void btrfs_end_empty_barrier(struct bio *bio) */ static int write_dev_flush(struct btrfs_device *device, int wait) { + struct request_queue *q = bdev_get_queue(device->bdev); struct bio *bio; int ret = 0; - if (device->nobarriers) + if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) return 0; if (wait) { @@ -3555,7 +3537,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; @@ -4007,7 +3989,6 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); percpu_counter_destroy(&fs_info->bio_counter); - bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); btrfs_free_stripe_hash_table(fs_info); @@ -4343,7 +4324,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); mutex_lock(&head->mutex); @@ -4615,7 +4596,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) t = list_first_entry(&fs_info->trans_list, struct btrfs_transaction, list); if (t->state >= TRANS_STATE_COMMIT_START) { - atomic_inc(&t->use_count); + refcount_inc(&t->use_count); spin_unlock(&fs_info->trans_lock); btrfs_wait_for_commit(fs_info, t->transid); btrfs_put_transaction(t); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2e0ec29bfd69f04b4232b75754010594bd3d5f95..21f1ceb85b76737a67c1ffbc02cbd725b09fb510 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -101,14 +101,14 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); */ static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root) { - if (atomic_inc_not_zero(&root->refs)) + if (refcount_inc_not_zero(&root->refs)) return root; return NULL; } static inline void btrfs_put_fs_root(struct btrfs_root *root) { - if (atomic_dec_and_test(&root->refs)) + if (refcount_dec_and_test(&root->refs)) kfree(root); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be5477676cc829e4efe89349fc9b7df540fd0dff..33d979e9ea2a307802434f8425a6b3a33c5d59d6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -131,6 +131,16 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) if (atomic_dec_and_test(&cache->count)) { WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); + + /* + * If not empty, someone is still holding mutex of + * full_stripe_lock, which can only be released by caller. + * And it will definitely cause use-after-free when caller + * tries to release full stripe lock. + * + * No better way to resolve, but only to warn. + */ + WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root)); kfree(cache->free_space_ctl); kfree(cache); } @@ -316,14 +326,14 @@ get_caching_control(struct btrfs_block_group_cache *cache) } ctl = cache->caching_ctl; - atomic_inc(&ctl->count); + refcount_inc(&ctl->count); spin_unlock(&cache->lock); return ctl; } static void put_caching_control(struct btrfs_caching_control *ctl) { - if (atomic_dec_and_test(&ctl->count)) + if (refcount_dec_and_test(&ctl->count)) kfree(ctl); } @@ -599,7 +609,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, init_waitqueue_head(&caching_ctl->wait); caching_ctl->block_group = cache; caching_ctl->progress = cache->key.objectid; - atomic_set(&caching_ctl->count, 1); + refcount_set(&caching_ctl->count, 1); btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, caching_thread, NULL, NULL); @@ -620,7 +630,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, struct btrfs_caching_control *ctl; ctl = cache->caching_ctl; - atomic_inc(&ctl->count); + refcount_inc(&ctl->count); prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock(&cache->lock); @@ -707,7 +717,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, } down_write(&fs_info->commit_root_sem); - atomic_inc(&caching_ctl->count); + refcount_inc(&caching_ctl->count); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); up_write(&fs_info->commit_root_sem); @@ -892,7 +902,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); @@ -2980,7 +2990,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref; ref = &head->node; - atomic_inc(&ref->refs); + refcount_inc(&ref->refs); spin_unlock(&delayed_refs->lock); /* @@ -3003,7 +3013,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, goto again; } out: - assert_qgroups_uptodate(trans); trans->can_flush_pending_bgs = can_flush_pending_bgs; return 0; } @@ -3057,7 +3066,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root, } if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); @@ -3443,7 +3452,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, /* * don't bother trying to write stuff out _if_ * a) we're not cached, - * b) we're with nospace_cache mount option. + * b) we're with nospace_cache mount option, + * c) we're with v2 space_cache (FREE_SPACE_TREE). */ dcs = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); @@ -3983,6 +3993,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, info->space_info_kobj, "%s", alloc_name(found->flags)); if (ret) { + percpu_counter_destroy(&found->total_bytes_pinned); kfree(found); return ret; } @@ -4834,7 +4845,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, spin_unlock(&delayed_rsv->lock); commit: - trans = btrfs_join_transaction(fs_info->fs_root); + trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return -ENOSPC; @@ -4852,7 +4863,7 @@ static int flush_space(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 num_bytes, u64 orig_bytes, int state) { - struct btrfs_root *root = fs_info->fs_root; + struct btrfs_root *root = fs_info->extent_root; struct btrfs_trans_handle *trans; int nr; int ret = 0; @@ -5052,7 +5063,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, int flush_state = FLUSH_DELAYED_ITEMS_NR; spin_lock(&space_info->lock); - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root, + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root, space_info); if (!to_reclaim) { spin_unlock(&space_info->lock); @@ -9917,6 +9928,7 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, btrfs_init_free_space_ctl(cache); atomic_set(&cache->trimming, 0); mutex_init(&cache->free_space_lock); + btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root); return cache; } @@ -10416,7 +10428,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, &fs_info->caching_block_groups, list) if (ctl->block_group == block_group) { caching_ctl = ctl; - atomic_inc(&caching_ctl->count); + refcount_inc(&caching_ctl->count); break; } } @@ -10850,7 +10862,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, spin_lock(&fs_info->trans_lock); trans = fs_info->running_transaction; if (trans) - atomic_inc(&trans->use_count); + refcount_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); ret = find_free_dev_extent_start(trans, device, minlen, start, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 27fdb250b4467f65a8c6a42d06835f3bb3a36aec..d3619e01000551c42bf65bd6983801a0b04423e5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -68,7 +68,7 @@ void btrfs_leak_debug_check(void) pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", state->start, state->end, state->state, extent_state_in_tree(state), - atomic_read(&state->refs)); + refcount_read(&state->refs)); list_del(&state->leak_list); kmem_cache_free(extent_state_cache, state); } @@ -238,7 +238,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) state->failrec = NULL; RB_CLEAR_NODE(&state->rb_node); btrfs_leak_debug_add(&state->leak_list, &states); - atomic_set(&state->refs, 1); + refcount_set(&state->refs, 1); init_waitqueue_head(&state->wq); trace_alloc_extent_state(state, mask, _RET_IP_); return state; @@ -248,7 +248,7 @@ void free_extent_state(struct extent_state *state) { if (!state) return; - if (atomic_dec_and_test(&state->refs)) { + if (refcount_dec_and_test(&state->refs)) { WARN_ON(extent_state_in_tree(state)); btrfs_leak_debug_del(&state->leak_list); trace_free_extent_state(state, _RET_IP_); @@ -641,7 +641,7 @@ static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (cached && extent_state_in_tree(cached) && cached->start <= start && cached->end > start) { if (clear) - atomic_dec(&cached->refs); + refcount_dec(&cached->refs); state = cached; goto hit_next; } @@ -793,7 +793,7 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (state->state & bits) { start = state->start; - atomic_inc(&state->refs); + refcount_inc(&state->refs); wait_on_state(tree, state); free_extent_state(state); goto again; @@ -834,7 +834,7 @@ static void cache_state_if_flags(struct extent_state *state, if (cached_ptr && !(*cached_ptr)) { if (!flags || (state->state & flags)) { *cached_ptr = state; - atomic_inc(&state->refs); + refcount_inc(&state->refs); } } } @@ -1538,7 +1538,7 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree, if (!found) { *start = state->start; *cached_state = state; - atomic_inc(&state->refs); + refcount_inc(&state->refs); } found++; *end = state->end; @@ -2004,16 +2004,11 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, u64 map_length = 0; u64 sector; struct btrfs_bio *bbio = NULL; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int ret; ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); BUG_ON(!mirror_num); - /* we can't repair anything in raid56 yet */ - if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) - return 0; - bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; @@ -2026,17 +2021,35 @@ int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length, * read repair operation. */ btrfs_bio_counter_inc_blocked(fs_info); - ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, - &map_length, &bbio, mirror_num); - if (ret) { - btrfs_bio_counter_dec(fs_info); - bio_put(bio); - return -EIO; + if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) { + /* + * Note that we don't use BTRFS_MAP_WRITE because it's supposed + * to update all raid stripes, but here we just want to correct + * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad + * stripe's dev and sector. + */ + ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, + &map_length, &bbio, 0); + if (ret) { + btrfs_bio_counter_dec(fs_info); + bio_put(bio); + return -EIO; + } + ASSERT(bbio->mirror_num == 1); + } else { + ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, + &map_length, &bbio, mirror_num); + if (ret) { + btrfs_bio_counter_dec(fs_info); + bio_put(bio); + return -EIO; + } + BUG_ON(mirror_num != bbio->mirror_num); } - BUG_ON(mirror_num != bbio->mirror_num); - sector = bbio->stripes[mirror_num-1].physical >> 9; + + sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9; bio->bi_iter.bi_sector = sector; - dev = bbio->stripes[mirror_num-1].dev; + dev = bbio->stripes[bbio->mirror_num - 1].dev; btrfs_put_bbio(bbio); if (!dev || !dev->bdev || !dev->writeable) { btrfs_bio_counter_dec(fs_info); @@ -2445,7 +2458,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end) if (!uptodate) { ClearPageUptodate(page); SetPageError(page); - ret = ret < 0 ? ret : -EIO; + ret = err < 0 ? err : -EIO; mapping_set_error(page->mapping, ret); } } @@ -2859,7 +2872,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, em = *em_cached; if (extent_map_in_tree(em) && start >= em->start && start < extent_map_end(em)) { - atomic_inc(&em->refs); + refcount_inc(&em->refs); return em; } @@ -2870,7 +2883,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0); if (em_cached && !IS_ERR_OR_NULL(em)) { BUG_ON(*em_cached); - atomic_inc(&em->refs); + refcount_inc(&em->refs); *em_cached = em; } return em; @@ -4364,6 +4377,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, return NULL; } +/* + * To cache previous fiemap extent + * + * Will be used for merging fiemap extent + */ +struct fiemap_cache { + u64 offset; + u64 phys; + u64 len; + u32 flags; + bool cached; +}; + +/* + * Helper to submit fiemap extent. + * + * Will try to merge current fiemap extent specified by @offset, @phys, + * @len and @flags with cached one. + * And only when we fails to merge, cached one will be submitted as + * fiemap extent. + * + * Return value is the same as fiemap_fill_next_extent(). + */ +static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache, + u64 offset, u64 phys, u64 len, u32 flags) +{ + int ret = 0; + + if (!cache->cached) + goto assign; + + /* + * Sanity check, extent_fiemap() should have ensured that new + * fiemap extent won't overlap with cahced one. + * Not recoverable. + * + * NOTE: Physical address can overlap, due to compression + */ + if (cache->offset + cache->len > offset) { + WARN_ON(1); + return -EINVAL; + } + + /* + * Only merges fiemap extents if + * 1) Their logical addresses are continuous + * + * 2) Their physical addresses are continuous + * So truly compressed (physical size smaller than logical size) + * extents won't get merged with each other + * + * 3) Share same flags except FIEMAP_EXTENT_LAST + * So regular extent won't get merged with prealloc extent + */ + if (cache->offset + cache->len == offset && + cache->phys + cache->len == phys && + (cache->flags & ~FIEMAP_EXTENT_LAST) == + (flags & ~FIEMAP_EXTENT_LAST)) { + cache->len += len; + cache->flags |= flags; + goto try_submit_last; + } + + /* Not mergeable, need to submit cached one */ + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, + cache->len, cache->flags); + cache->cached = false; + if (ret) + return ret; +assign: + cache->cached = true; + cache->offset = offset; + cache->phys = phys; + cache->len = len; + cache->flags = flags; +try_submit_last: + if (cache->flags & FIEMAP_EXTENT_LAST) { + ret = fiemap_fill_next_extent(fieinfo, cache->offset, + cache->phys, cache->len, cache->flags); + cache->cached = false; + } + return ret; +} + +/* + * Sanity check for fiemap cache + * + * All fiemap cache should be submitted by emit_fiemap_extent() + * Iteration should be terminated either by last fiemap extent or + * fieinfo->fi_extents_max. + * So no cached fiemap should exist. + */ +static int check_fiemap_cache(struct btrfs_fs_info *fs_info, + struct fiemap_extent_info *fieinfo, + struct fiemap_cache *cache) +{ + int ret; + + if (!cache->cached) + return 0; + + /* Small and recoverbale problem, only to info developer */ +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(1); +#endif + btrfs_warn(fs_info, + "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x", + cache->offset, cache->phys, cache->len, cache->flags); + ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, + cache->len, cache->flags); + cache->cached = false; + if (ret > 0) + ret = 0; + return ret; +} + int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent) { @@ -4381,6 +4511,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct extent_state *cached_state = NULL; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; + struct fiemap_cache cache = { 0 }; int end = 0; u64 em_start = 0; u64 em_len = 0; @@ -4560,8 +4691,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= FIEMAP_EXTENT_LAST; end = 1; } - ret = fiemap_fill_next_extent(fieinfo, em_start, disko, - em_len, flags); + ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko, + em_len, flags); if (ret) { if (ret == 1) ret = 0; @@ -4569,6 +4700,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } } out_free: + if (!ret) + ret = check_fiemap_cache(root->fs_info, fieinfo, &cache); free_extent_map(em); out: btrfs_free_path(path); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 3e4fad4a909d110d9283f979ccb9dec9a48c607c..1eafa2f0ede370ae802bb882557b3d4ad5c26340 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -2,6 +2,7 @@ #define __EXTENTIO__ #include +#include #include "ulist.h" /* bits for the extent state */ @@ -14,14 +15,17 @@ #define EXTENT_DEFRAG (1U << 6) #define EXTENT_BOUNDARY (1U << 9) #define EXTENT_NODATASUM (1U << 10) -#define EXTENT_DO_ACCOUNTING (1U << 11) +#define EXTENT_CLEAR_META_RESV (1U << 11) #define EXTENT_FIRST_DELALLOC (1U << 12) #define EXTENT_NEED_WAIT (1U << 13) #define EXTENT_DAMAGED (1U << 14) #define EXTENT_NORESERVE (1U << 15) #define EXTENT_QGROUP_RESERVED (1U << 16) #define EXTENT_CLEAR_DATA_RESV (1U << 17) +#define EXTENT_DELALLOC_NEW (1U << 18) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \ + EXTENT_CLEAR_DATA_RESV) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) /* @@ -143,7 +147,7 @@ struct extent_state { /* ADD NEW ELEMENTS AFTER THIS */ wait_queue_head_t wq; - atomic_t refs; + refcount_t refs; unsigned state; struct io_failure_record *failrec; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 26f9ac719d20b4bff1a6b0a456ca45dd1752b4c7..69850155870c067d82768c67f3895a2e7a7c487d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -55,7 +55,7 @@ struct extent_map *alloc_extent_map(void) em->flags = 0; em->compress_type = BTRFS_COMPRESS_NONE; em->generation = 0; - atomic_set(&em->refs, 1); + refcount_set(&em->refs, 1); INIT_LIST_HEAD(&em->list); return em; } @@ -71,8 +71,8 @@ void free_extent_map(struct extent_map *em) { if (!em) return; - WARN_ON(atomic_read(&em->refs) == 0); - if (atomic_dec_and_test(&em->refs)) { + WARN_ON(refcount_read(&em->refs) == 0); + if (refcount_dec_and_test(&em->refs)) { WARN_ON(extent_map_in_tree(em)); WARN_ON(!list_empty(&em->list)); if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) @@ -322,7 +322,7 @@ static inline void setup_extent_mapping(struct extent_map_tree *tree, struct extent_map *em, int modified) { - atomic_inc(&em->refs); + refcount_inc(&em->refs); em->mod_start = em->start; em->mod_len = em->len; @@ -381,7 +381,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree, if (strict && !(end > em->start && start < extent_map_end(em))) return NULL; - atomic_inc(&em->refs); + refcount_inc(&em->refs); return em; } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index eb8b8fae036bc3c67ceea03220cdca503626546f..a67b2def54131f10326c71092f80f2cd2d706212 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -2,6 +2,7 @@ #define __EXTENTMAP__ #include +#include #define EXTENT_MAP_LAST_BYTE ((u64)-4) #define EXTENT_MAP_HOLE ((u64)-3) @@ -41,7 +42,7 @@ struct extent_map { */ struct map_lookup *map_lookup; }; - atomic_t refs; + refcount_t refs; unsigned int compress_type; struct list_head list; }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 520cb7230b2d2cb5ca798c0030fa446957799456..da1096eb1a406f648b1bb0c7f3ee1da0e3013646 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1404,6 +1404,47 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, } +static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode, + const u64 start, + const u64 len, + struct extent_state **cached_state) +{ + u64 search_start = start; + const u64 end = start + len - 1; + + while (search_start < end) { + const u64 search_len = end - search_start + 1; + struct extent_map *em; + u64 em_len; + int ret = 0; + + em = btrfs_get_extent(inode, NULL, 0, search_start, + search_len, 0); + if (IS_ERR(em)) + return PTR_ERR(em); + + if (em->block_start != EXTENT_MAP_HOLE) + goto next; + + em_len = em->len; + if (em->start < search_start) + em_len -= search_start - em->start; + if (em_len > search_len) + em_len = search_len; + + ret = set_extent_bit(&inode->io_tree, search_start, + search_start + em_len - 1, + EXTENT_DELALLOC_NEW, + NULL, cached_state, GFP_NOFS); +next: + search_start = extent_map_end(em); + free_extent_map(em); + if (ret) + return ret; + } + return 0; +} + /* * This function locks the extent and properly waits for data=ordered extents * to finish before allowing the pages to be modified if need. @@ -1432,8 +1473,11 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, + round_up(pos + write_bytes - start_pos, fs_info->sectorsize) - 1; - if (start_pos < inode->vfs_inode.i_size) { + if (start_pos < inode->vfs_inode.i_size || + (inode->flags & BTRFS_INODE_PREALLOC)) { struct btrfs_ordered_extent *ordered; + unsigned int clear_bits; + lock_extent_bits(&inode->io_tree, start_pos, last_pos, cached_state); ordered = btrfs_lookup_ordered_range(inode, start_pos, @@ -1454,11 +1498,19 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, } if (ordered) btrfs_put_ordered_extent(ordered); - + ret = btrfs_find_new_delalloc_bytes(inode, start_pos, + last_pos - start_pos + 1, + cached_state); + clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG; + if (ret) + clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED; clear_extent_bit(&inode->io_tree, start_pos, - last_pos, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, cached_state, GFP_NOFS); + last_pos, clear_bits, + (clear_bits & EXTENT_LOCKED) ? 1 : 0, + 0, cached_state, GFP_NOFS); + if (ret) + return ret; *lockstart = start_pos; *lockend = last_pos; ret = 1; @@ -2342,13 +2394,8 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) int ret = 0; em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0); - if (IS_ERR_OR_NULL(em)) { - if (!em) - ret = -ENOMEM; - else - ret = PTR_ERR(em); - return ret; - } + if (IS_ERR(em)) + return PTR_ERR(em); /* Hole or vacuum extent(only exists in no-hole mode) */ if (em->block_start == EXTENT_MAP_HOLE) { @@ -2835,11 +2882,8 @@ static long btrfs_fallocate(struct file *file, int mode, while (1) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, alloc_end - cur_offset, 0); - if (IS_ERR_OR_NULL(em)) { - if (!em) - ret = -ENOMEM; - else - ret = PTR_ERR(em); + if (IS_ERR(em)) { + ret = PTR_ERR(em); break; } last_byte = min(extent_map_end(em), alloc_end); @@ -2856,8 +2900,10 @@ static long btrfs_fallocate(struct file *file, int mode, } ret = btrfs_qgroup_reserve_data(inode, cur_offset, last_byte - cur_offset); - if (ret < 0) + if (ret < 0) { + free_extent_map(em); break; + } } else { /* * Do not need to reserve unwritten extent for this diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index da6841efac26b1be3509ad3e410c34e72b253a65..c5e6180cdb8c9250e3a525e311a42795c4831c32 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -355,7 +355,7 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) io_ctl->orig = io_ctl->cur; io_ctl->size = PAGE_SIZE; if (clear) - memset(io_ctl->cur, 0, PAGE_SIZE); + clear_page(io_ctl->cur); } static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index dd7fb22a955a6bff32be4632053bcd9086efe6e2..fc0bd84067582523b958db030a3439d1088f5832 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -167,8 +167,7 @@ static u8 *alloc_bitmap(u32 bitmap_size) if (mem) return mem; - return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, - PAGE_KERNEL); + return __vmalloc(bitmap_size, GFP_NOFS | __GFP_ZERO, PAGE_KERNEL); } int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index a97fdc156a03512bf36df40dd1b4278df845115f..baacc18668611b778b9270256a8962b439e4b78e 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) { SHASH_DESC_ON_STACK(shash, tfm); u32 *ctx = (u32 *)shash_desc_ctx(shash); + u32 retval; int err; shash->tfm = tfm; @@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length) err = crypto_shash_update(shash, address, length); BUG_ON(err); - return *ctx; + retval = *ctx; + barrier_data(ctx); + return retval; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5e71f1ea3391b034dc8e6f55f62d82dbe76e9811..ef3c98c527c136c89b081f5fb0c4ebd414105cbe 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -115,6 +115,31 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, u64 ram_bytes, int compress_type, int type); +static void __endio_write_update_ordered(struct inode *inode, + const u64 offset, const u64 bytes, + const bool uptodate); + +/* + * Cleanup all submitted ordered extents in specified range to handle errors + * from the fill_dellaloc() callback. + * + * NOTE: caller must ensure that when an error happens, it can not call + * extent_clear_unlock_delalloc() to clear both the bits EXTENT_DO_ACCOUNTING + * and EXTENT_DELALLOC simultaneously, because that causes the reserved metadata + * to be released, which we want to happen only when finishing the ordered + * extent (btrfs_finish_ordered_io()). Also note that the caller of the + * fill_delalloc() callback already does proper cleanup for the first page of + * the range, that is, it invokes the callback writepage_end_io_hook() for the + * range of the first page. + */ +static inline void btrfs_cleanup_ordered_extents(struct inode *inode, + const u64 offset, + const u64 bytes) +{ + return __endio_write_update_ordered(inode, offset + PAGE_SIZE, + bytes - PAGE_SIZE, false); +} + static int btrfs_dirty_inode(struct inode *inode); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS @@ -547,7 +572,7 @@ static noinline void compress_file_range(struct inode *inode, } if (ret <= 0) { unsigned long clear_flags = EXTENT_DELALLOC | - EXTENT_DEFRAG; + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG; unsigned long page_error_op; clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; @@ -565,8 +590,10 @@ static noinline void compress_file_range(struct inode *inode, PAGE_SET_WRITEBACK | page_error_op | PAGE_END_WRITEBACK); - btrfs_free_reserved_data_space_noquota(inode, start, - end - start + 1); + if (ret == 0) + btrfs_free_reserved_data_space_noquota(inode, + start, + end - start + 1); goto free_pages_out; } } @@ -852,6 +879,7 @@ static noinline void submit_compressed_extents(struct inode *inode, async_extent->start + async_extent->ram_size - 1, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK | @@ -918,10 +946,13 @@ static noinline int cow_file_range(struct inode *inode, u64 num_bytes; unsigned long ram_size; u64 disk_num_bytes; - u64 cur_alloc_size; + u64 cur_alloc_size = 0; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; + unsigned clear_bits; + unsigned long page_ops; + bool extent_reserved = false; int ret = 0; if (btrfs_is_free_space_inode(BTRFS_I(inode))) { @@ -944,6 +975,7 @@ static noinline int cow_file_range(struct inode *inode, extent_clear_unlock_delalloc(inode, start, end, delalloc_end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); @@ -966,14 +998,14 @@ static noinline int cow_file_range(struct inode *inode, start + num_bytes - 1, 0); while (disk_num_bytes > 0) { - unsigned long op; - cur_alloc_size = disk_num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, fs_info->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; + cur_alloc_size = ins.offset; + extent_reserved = true; ram_size = ins.offset; em = create_io_em(inode, start, ins.offset, /* len */ @@ -988,7 +1020,6 @@ static noinline int cow_file_range(struct inode *inode, goto out_reserve; free_extent_map(em); - cur_alloc_size = ins.offset; ret = btrfs_add_ordered_extent(inode, start, ins.objectid, ram_size, cur_alloc_size, 0); if (ret) @@ -998,15 +1029,24 @@ static noinline int cow_file_range(struct inode *inode, BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); + /* + * Only drop cache here, and process as normal. + * + * We must not allow extent_clear_unlock_delalloc() + * at out_unlock label to free meta of this ordered + * extent, as its meta should be freed by + * btrfs_finish_ordered_io(). + * + * So we must continue until @start is increased to + * skip current ordered extent. + */ if (ret) - goto out_drop_extent_cache; + btrfs_drop_extent_cache(BTRFS_I(inode), start, + start + ram_size - 1, 0); } btrfs_dec_block_group_reservations(fs_info, ins.objectid); - if (disk_num_bytes < cur_alloc_size) - break; - /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits @@ -1014,18 +1054,30 @@ static noinline int cow_file_range(struct inode *inode, * Do set the Private2 bit so we know this page was properly * setup for writepage */ - op = unlock ? PAGE_UNLOCK : 0; - op |= PAGE_SET_PRIVATE2; + page_ops = unlock ? PAGE_UNLOCK : 0; + page_ops |= PAGE_SET_PRIVATE2; extent_clear_unlock_delalloc(inode, start, start + ram_size - 1, delalloc_end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC, - op); - disk_num_bytes -= cur_alloc_size; + page_ops); + if (disk_num_bytes < cur_alloc_size) + disk_num_bytes = 0; + else + disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; + extent_reserved = false; + + /* + * btrfs_reloc_clone_csums() error, since start is increased + * extent_clear_unlock_delalloc() at out_unlock label won't + * free metadata of current ordered extent, we're OK to exit. + */ + if (ret) + goto out_unlock; } out: return ret; @@ -1036,12 +1088,35 @@ static noinline int cow_file_range(struct inode *inode, btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); out_unlock: + clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; + page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | + PAGE_END_WRITEBACK; + /* + * If we reserved an extent for our delalloc range (or a subrange) and + * failed to create the respective ordered extent, then it means that + * when we reserved the extent we decremented the extent's size from + * the data space_info's bytes_may_use counter and incremented the + * space_info's bytes_reserved counter by the same amount. We must make + * sure extent_clear_unlock_delalloc() does not try to decrement again + * the data space_info's bytes_may_use counter, therefore we do not pass + * it the flag EXTENT_CLEAR_DATA_RESV. + */ + if (extent_reserved) { + extent_clear_unlock_delalloc(inode, start, + start + cur_alloc_size, + start + cur_alloc_size, + locked_page, + clear_bits, + page_ops); + start += cur_alloc_size; + if (start >= end) + goto out; + } extent_clear_unlock_delalloc(inode, start, end, delalloc_end, locked_page, - EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | - EXTENT_DELALLOC | EXTENT_DEFRAG, - PAGE_UNLOCK | PAGE_CLEAR_DIRTY | - PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); + clear_bits | EXTENT_CLEAR_DATA_RESV, + page_ops); goto out; } @@ -1414,15 +1489,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, BUG_ON(ret); /* -ENOMEM */ if (root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID) { + BTRFS_DATA_RELOC_TREE_OBJECTID) + /* + * Error handled later, as we must prevent + * extent_clear_unlock_delalloc() in error handler + * from freeing metadata of created ordered extent. + */ ret = btrfs_reloc_clone_csums(inode, cur_offset, num_bytes); - if (ret) { - if (!nolock && nocow) - btrfs_end_write_no_snapshoting(root); - goto error; - } - } extent_clear_unlock_delalloc(inode, cur_offset, cur_offset + num_bytes - 1, end, @@ -1434,6 +1508,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); cur_offset = extent_end; + + /* + * btrfs_reloc_clone_csums() error, now we're OK to call error + * handler, as metadata for created ordered extent will only + * be freed by btrfs_finish_ordered_io(). + */ + if (ret) + goto error; if (cur_offset > end) break; } @@ -1509,6 +1591,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, ret = cow_file_range_async(inode, locked_page, start, end, page_started, nr_written); } + if (ret) + btrfs_cleanup_ordered_extents(inode, start, end - start + 1); return ret; } @@ -1693,6 +1777,14 @@ static void btrfs_set_bit_hook(struct inode *inode, btrfs_add_delalloc_inodes(root, inode); spin_unlock(&BTRFS_I(inode)->lock); } + + if (!(state->state & EXTENT_DELALLOC_NEW) && + (*bits & EXTENT_DELALLOC_NEW)) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 - + state->start; + spin_unlock(&BTRFS_I(inode)->lock); + } } /* @@ -1722,7 +1814,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode, if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { + } else if (!(*bits & EXTENT_CLEAR_META_RESV)) { spin_lock(&inode->lock); inode->outstanding_extents -= num_extents; spin_unlock(&inode->lock); @@ -1733,7 +1825,7 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode, * don't need to call dellalloc_release_metadata if there is an * error. */ - if (*bits & EXTENT_DO_ACCOUNTING && + if (*bits & EXTENT_CLEAR_META_RESV && root != fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); @@ -1741,10 +1833,9 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode, if (btrfs_is_testing(fs_info)) return; - if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID - && do_list && !(state->state & EXTENT_NORESERVE) - && (*bits & (EXTENT_DO_ACCOUNTING | - EXTENT_CLEAR_DATA_RESV))) + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && + do_list && !(state->state & EXTENT_NORESERVE) && + (*bits & EXTENT_CLEAR_DATA_RESV)) btrfs_free_reserved_data_space_noquota( &inode->vfs_inode, state->start, len); @@ -1759,6 +1850,14 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode, btrfs_del_delalloc_inode(root, inode); spin_unlock(&inode->lock); } + + if ((state->state & EXTENT_DELALLOC_NEW) && + (*bits & EXTENT_DELALLOC_NEW)) { + spin_lock(&inode->lock); + ASSERT(inode->new_delalloc_bytes >= len); + inode->new_delalloc_bytes -= len; + spin_unlock(&inode->lock); + } } /* @@ -2791,6 +2890,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) u64 logical_len = ordered_extent->len; bool nolock; bool truncated = false; + bool range_locked = false; + bool clear_new_delalloc_bytes = false; + + if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && + !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags)) + clear_new_delalloc_bytes = true; nolock = btrfs_is_free_space_inode(BTRFS_I(inode)); @@ -2839,13 +2945,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } + range_locked = true; lock_extent_bits(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, &cached_state); ret = test_range_bit(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, - EXTENT_DEFRAG, 1, cached_state); + EXTENT_DEFRAG, 0, cached_state); if (ret) { u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); if (0 && last_snapshot >= BTRFS_I(inode)->generation) @@ -2864,7 +2971,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; - goto out_unlock; + goto out; } trans->block_rsv = &fs_info->delalloc_block_rsv; @@ -2896,7 +3003,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) trans->transid); if (ret < 0) { btrfs_abort_transaction(trans, ret); - goto out_unlock; + goto out; } add_pending_csums(trans, inode, &ordered_extent->list); @@ -2905,14 +3012,26 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) { /* -ENOMEM or corruption */ btrfs_abort_transaction(trans, ret); - goto out_unlock; + goto out; } ret = 0; -out_unlock: - unlock_extent_cached(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, &cached_state, GFP_NOFS); out: + if (range_locked || clear_new_delalloc_bytes) { + unsigned int clear_bits = 0; + + if (range_locked) + clear_bits |= EXTENT_LOCKED; + if (clear_new_delalloc_bytes) + clear_bits |= EXTENT_DELALLOC_NEW; + clear_extent_bit(&BTRFS_I(inode)->io_tree, + ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, + clear_bits, + (clear_bits & EXTENT_LOCKED) ? 1 : 0, + 0, &cached_state, GFP_NOFS); + } + if (root != fs_info->tree_root) btrfs_delalloc_release_metadata(BTRFS_I(inode), ordered_extent->len); @@ -4401,9 +4520,17 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (extent_type != BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_num_bytes(leaf, fi); + + trace_btrfs_truncate_show_fi_regular( + BTRFS_I(inode), leaf, fi, + found_key.offset); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { item_end += btrfs_file_extent_inline_len(leaf, path->slots[0], fi); + + trace_btrfs_truncate_show_fi_inline( + BTRFS_I(inode), leaf, fi, path->slots[0], + found_key.offset); } item_end--; } @@ -4603,13 +4730,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, btrfs_free_path(path); - if (err == 0) { - /* only inline file may have last_size != new_size */ - if (new_size >= fs_info->sectorsize || - new_size > fs_info->max_inline) - ASSERT(last_size == new_size); - } - if (be_nice && bytes_deleted > SZ_32M) { unsigned long updates = trans->delayed_ref_updates; if (updates) { @@ -6735,7 +6855,6 @@ static noinline int uncompress_inline(struct btrfs_path *path, * * This also copies inline extents directly into the page. */ - struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, @@ -6835,11 +6954,18 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, found_type == BTRFS_FILE_EXTENT_PREALLOC) { extent_end = extent_start + btrfs_file_extent_num_bytes(leaf, item); + + trace_btrfs_get_extent_show_fi_regular(inode, leaf, item, + extent_start); } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size_t size; size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); + + trace_btrfs_get_extent_show_fi_inline(inode, leaf, item, + path->slots[0], + extent_start); } next: if (start >= extent_end) { @@ -7037,19 +7163,17 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, em = btrfs_get_extent(inode, page, pg_offset, start, len, create); if (IS_ERR(em)) return em; - if (em) { - /* - * if our em maps to - * - a hole or - * - a pre-alloc extent, - * there might actually be delalloc bytes behind it. - */ - if (em->block_start != EXTENT_MAP_HOLE && - !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) - return em; - else - hole_em = em; - } + /* + * If our em maps to: + * - a hole or + * - a pre-alloc extent, + * there might actually be delalloc bytes behind it. + */ + if (em->block_start != EXTENT_MAP_HOLE && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + return em; + else + hole_em = em; /* check to see if we've wrapped (len == -1 or similar) */ end = start + len; @@ -7359,8 +7483,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) int found = false; void **pagep = NULL; struct page *page = NULL; - int start_idx; - int end_idx; + unsigned long start_idx; + unsigned long end_idx; start_idx = start >> PAGE_SHIFT; @@ -8127,17 +8251,26 @@ static void btrfs_endio_direct_read(struct bio *bio) bio_put(bio); } -static void btrfs_endio_direct_write_update_ordered(struct inode *inode, - const u64 offset, - const u64 bytes, - const int uptodate) +static void __endio_write_update_ordered(struct inode *inode, + const u64 offset, const u64 bytes, + const bool uptodate) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ordered_extent *ordered = NULL; + struct btrfs_workqueue *wq; + btrfs_work_func_t func; u64 ordered_offset = offset; u64 ordered_bytes = bytes; int ret; + if (btrfs_is_free_space_inode(BTRFS_I(inode))) { + wq = fs_info->endio_freespace_worker; + func = btrfs_freespace_write_helper; + } else { + wq = fs_info->endio_write_workers; + func = btrfs_endio_write_helper; + } + again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, @@ -8146,9 +8279,8 @@ static void btrfs_endio_direct_write_update_ordered(struct inode *inode, if (!ret) goto out_test; - btrfs_init_work(&ordered->work, btrfs_endio_write_helper, - finish_ordered_fn, NULL, NULL); - btrfs_queue_work(fs_info->endio_write_workers, &ordered->work); + btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL); + btrfs_queue_work(wq, &ordered->work); out_test: /* * our bio might span multiple ordered extents. If we haven't @@ -8166,10 +8298,8 @@ static void btrfs_endio_direct_write(struct bio *bio) struct btrfs_dio_private *dip = bio->bi_private; struct bio *dio_bio = dip->dio_bio; - btrfs_endio_direct_write_update_ordered(dip->inode, - dip->logical_offset, - dip->bytes, - !bio->bi_error); + __endio_write_update_ordered(dip->inode, dip->logical_offset, + dip->bytes, !bio->bi_error); kfree(dip); @@ -8530,10 +8660,10 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, io_bio = NULL; } else { if (write) - btrfs_endio_direct_write_update_ordered(inode, + __endio_write_update_ordered(inode, file_offset, dio_bio->bi_iter.bi_size, - 0); + false); else unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, file_offset + dio_bio->bi_iter.bi_size - 1); @@ -8668,11 +8798,11 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) */ if (dio_data.unsubmitted_oe_range_start < dio_data.unsubmitted_oe_range_end) - btrfs_endio_direct_write_update_ordered(inode, + __endio_write_update_ordered(inode, dio_data.unsubmitted_oe_range_start, dio_data.unsubmitted_oe_range_end - dio_data.unsubmitted_oe_range_start, - 0); + false); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, offset, count - (size_t)ret); @@ -8819,6 +8949,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, if (!inode_evicting) clear_extent_bit(tree, start, end, EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DELALLOC_NEW | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); @@ -8876,8 +9007,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, if (!inode_evicting) { clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | - EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, 1, 1, + EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | + EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, &cached_state, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); @@ -9248,6 +9379,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; + ei->new_delalloc_bytes = 0; ei->defrag_bytes = 0; ei->disk_i_size = 0; ei->flags = 0; @@ -9313,6 +9445,7 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(BTRFS_I(inode)->outstanding_extents); WARN_ON(BTRFS_I(inode)->reserved_extents); WARN_ON(BTRFS_I(inode)->delalloc_bytes); + WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); WARN_ON(BTRFS_I(inode)->csum_bytes); WARN_ON(BTRFS_I(inode)->defrag_bytes); @@ -9436,7 +9569,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat, stat->dev = BTRFS_I(inode)->root->anon_dev; spin_lock(&BTRFS_I(inode)->lock); - delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; + delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes; spin_unlock(&BTRFS_I(inode)->lock); stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + ALIGN(delalloc_bytes, blocksize)) >> 9; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dabfc7ac48a674db12252722c99509d9462dbe53..e176375f374f917be5c50a46ed1c94444d1949d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1504,7 +1504,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, if (ret) return ret; - if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { + if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { mnt_drop_write_file(file); return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } @@ -1619,7 +1619,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, kfree(vol_args); out: mutex_unlock(&fs_info->volume_mutex); - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); mnt_drop_write_file(file); return ret; } @@ -2661,7 +2661,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) + if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; mutex_lock(&fs_info->volume_mutex); @@ -2680,7 +2680,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) kfree(vol_args); out: mutex_unlock(&fs_info->volume_mutex); - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); return ret; } @@ -2708,7 +2708,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) return -EOPNOTSUPP; - if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { + if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out; } @@ -2721,7 +2721,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) ret = btrfs_rm_device(fs_info, vol_args->name, 0); } mutex_unlock(&fs_info->volume_mutex); - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); if (!ret) { if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) @@ -2752,7 +2752,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) if (ret) return ret; - if (atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { + if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; goto out_drop_write; } @@ -2772,7 +2772,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) btrfs_info(fs_info, "disk deleted %s", vol_args->name); kfree(vol_args); out: - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); out_drop_write: mnt_drop_write_file(file); @@ -3539,12 +3539,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u64 last_dest_end = destoff; ret = -ENOMEM; - buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN); - if (!buf) { - buf = vmalloc(fs_info->nodesize); - if (!buf) - return ret; - } + buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); + if (!buf) + return ret; path = btrfs_alloc_path(); if (!path) { @@ -4442,13 +4439,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info, ret = -EROFS; goto out; } - if (atomic_xchg( - &fs_info->mutually_exclusive_operation_running, 1)) { + if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; } else { ret = btrfs_dev_replace_by_ioctl(fs_info, p); - atomic_set( - &fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); } break; case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: @@ -4643,7 +4638,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) return ret; again: - if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { + if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); need_unlock = true; @@ -4689,7 +4684,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) } locked: - BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); + BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)); if (arg) { bargs = memdup_user(arg, sizeof(*bargs)); @@ -4745,11 +4740,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) do_balance: /* - * Ownership of bctl and mutually_exclusive_operation_running + * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP * goes to to btrfs_balance. bctl is freed in __cancel_balance, * or, if restriper was paused all the way until unmount, in - * free_fs_info. mutually_exclusive_operation_running is - * cleared in __cancel_balance. + * free_fs_info. The flag is cleared in __cancel_balance. */ need_unlock = false; @@ -4769,7 +4763,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); if (need_unlock) - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); out: mnt_drop_write_file(file); return ret; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9a46878ba60fa973562139f32629810b476ed453..7b40e2e7292a41a047b0f8e300304822b7c63e74 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -212,7 +212,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); /* one ref for the tree */ - atomic_set(&entry->refs, 1); + refcount_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); INIT_LIST_HEAD(&entry->list); INIT_LIST_HEAD(&entry->root_extent_list); @@ -358,7 +358,7 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, out: if (!ret && cached && entry) { *cached = entry; - atomic_inc(&entry->refs); + refcount_inc(&entry->refs); } spin_unlock_irqrestore(&tree->lock, flags); return ret == 0; @@ -425,7 +425,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, out: if (!ret && cached && entry) { *cached = entry; - atomic_inc(&entry->refs); + refcount_inc(&entry->refs); } spin_unlock_irqrestore(&tree->lock, flags); return ret == 0; @@ -456,7 +456,7 @@ void btrfs_get_logged_extents(struct btrfs_inode *inode, if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) continue; list_add(&ordered->log_list, logged_list); - atomic_inc(&ordered->refs); + refcount_inc(&ordered->refs); } spin_unlock_irq(&tree->lock); } @@ -565,7 +565,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); - if (atomic_dec_and_test(&entry->refs)) { + if (refcount_dec_and_test(&entry->refs)) { ASSERT(list_empty(&entry->log_list)); ASSERT(list_empty(&entry->trans_list)); ASSERT(list_empty(&entry->root_extent_list)); @@ -623,7 +623,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, spin_lock(&fs_info->trans_lock); trans = fs_info->running_transaction; if (trans) - atomic_inc(&trans->use_count); + refcount_inc(&trans->use_count); spin_unlock(&fs_info->trans_lock); ASSERT(trans); @@ -690,7 +690,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, list_move_tail(&ordered->root_extent_list, &root->ordered_extents); - atomic_inc(&ordered->refs); + refcount_inc(&ordered->refs); spin_unlock(&root->ordered_extent_lock); btrfs_init_work(&ordered->flush_work, @@ -870,7 +870,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, if (!offset_in_entry(entry, file_offset)) entry = NULL; if (entry) - atomic_inc(&entry->refs); + refcount_inc(&entry->refs); out: spin_unlock_irq(&tree->lock); return entry; @@ -911,7 +911,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range( } out: if (entry) - atomic_inc(&entry->refs); + refcount_inc(&entry->refs); spin_unlock_irq(&tree->lock); return entry; } @@ -948,7 +948,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) goto out; entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - atomic_inc(&entry->refs); + refcount_inc(&entry->refs); out: spin_unlock_irq(&tree->lock); return entry; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 195c93b67fe002861153fb58f6506fa93deb0434..e0c1d5b8d859c95c9e870b8feb735c921a09d8f1 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -113,7 +113,7 @@ struct btrfs_ordered_extent { int compress_type; /* reference count */ - atomic_t refs; + refcount_t refs; /* the inode we belong to */ struct inode *inode; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a59801dc2a340bcd3c5a34af9ef7277061967377..deffbeb74a0be7499c42578efc51903c3737f3bf 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -47,50 +47,6 @@ * - check all ioctl parameters */ -/* - * one struct for each qgroup, organized in fs_info->qgroup_tree. - */ -struct btrfs_qgroup { - u64 qgroupid; - - /* - * state - */ - u64 rfer; /* referenced */ - u64 rfer_cmpr; /* referenced compressed */ - u64 excl; /* exclusive */ - u64 excl_cmpr; /* exclusive compressed */ - - /* - * limits - */ - u64 lim_flags; /* which limits are set */ - u64 max_rfer; - u64 max_excl; - u64 rsv_rfer; - u64 rsv_excl; - - /* - * reservation tracking - */ - u64 reserved; - - /* - * lists - */ - struct list_head groups; /* groups this group is member of */ - struct list_head members; /* groups that are members of this group */ - struct list_head dirty; /* dirty groups */ - struct rb_node node; /* tree of qgroups */ - - /* - * temp variables for accounting operations - * Refer to qgroup_shared_accounting() for details. - */ - u64 old_refcnt; - u64 new_refcnt; -}; - static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, int mod) { @@ -1042,9 +998,12 @@ static void report_reserved_underflow(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup, u64 num_bytes) { - btrfs_warn(fs_info, +#ifdef CONFIG_BTRFS_DEBUG + WARN_ON(qgroup->reserved < num_bytes); + btrfs_debug(fs_info, "qgroup %llu reserved space underflow, have: %llu, to free: %llu", qgroup->qgroupid, qgroup->reserved, num_bytes); +#endif qgroup->reserved = 0; } /* @@ -1075,7 +1034,8 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, qgroup->excl += sign * num_bytes; qgroup->excl_cmpr += sign * num_bytes; if (sign > 0) { - if (WARN_ON(qgroup->reserved < num_bytes)) + trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes); + if (qgroup->reserved < num_bytes) report_reserved_underflow(fs_info, qgroup, num_bytes); else qgroup->reserved -= num_bytes; @@ -1100,7 +1060,9 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, WARN_ON(sign < 0 && qgroup->excl < num_bytes); qgroup->excl += sign * num_bytes; if (sign > 0) { - if (WARN_ON(qgroup->reserved < num_bytes)) + trace_qgroup_update_reserve(fs_info, qgroup, + -(s64)num_bytes); + if (qgroup->reserved < num_bytes) report_reserved_underflow(fs_info, qgroup, num_bytes); else @@ -2055,12 +2017,12 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, if (!ret) { /* - * Use (u64)-1 as time_seq to do special search, which + * Use SEQ_LAST as time_seq to do special search, which * doesn't lock tree or delayed_refs and search current * root. It's safe inside commit_transaction(). */ ret = btrfs_find_all_roots(trans, fs_info, - record->bytenr, (u64)-1, &new_roots); + record->bytenr, SEQ_LAST, &new_roots); if (ret < 0) goto cleanup; if (qgroup_to_skip) @@ -2367,6 +2329,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) struct btrfs_fs_info *fs_info = root->fs_info; u64 ref_root = root->root_key.objectid; int ret = 0; + int retried = 0; struct ulist_node *unode; struct ulist_iterator uiter; @@ -2375,7 +2338,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) if (num_bytes == 0) return 0; - +retry: spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; if (!quota_root) @@ -2402,6 +2365,27 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) qg = unode_aux_to_qgroup(unode); if (enforce && !qgroup_check_limits(qg, num_bytes)) { + /* + * Commit the tree and retry, since we may have + * deletions which would free up space. + */ + if (!retried && qg->reserved > 0) { + struct btrfs_trans_handle *trans; + + spin_unlock(&fs_info->qgroup_lock); + ret = btrfs_start_delalloc_inodes(root, 0); + if (ret) + return ret; + btrfs_wait_ordered_extents(root, -1, 0, (u64)-1); + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + ret = btrfs_commit_transaction(trans); + if (ret) + return ret; + retried++; + goto retry; + } ret = -EDQUOT; goto out; } @@ -2424,6 +2408,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) qg = unode_aux_to_qgroup(unode); + trace_qgroup_update_reserve(fs_info, qg, num_bytes); qg->reserved += num_bytes; } @@ -2469,7 +2454,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, qg = unode_aux_to_qgroup(unode); - if (WARN_ON(qg->reserved < num_bytes)) + trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes); + if (qg->reserved < num_bytes) report_reserved_underflow(fs_info, qg, num_bytes); else qg->reserved -= num_bytes; @@ -2487,18 +2473,6 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->qgroup_lock); } -void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) -{ - if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) - return; - btrfs_err(trans->fs_info, - "qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x", - trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", - (u32)(trans->delayed_ref_elem.seq >> 32), - (u32)trans->delayed_ref_elem.seq); - BUG(); -} - /* * returns < 0 on error, 0 when more leafs are to be scanned. * returns 1 when done. @@ -2886,14 +2860,14 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, if (ret < 0) goto out; - if (free) { - btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, - BTRFS_I(inode)->root->objectid, - changeset.bytes_changed); + if (free) trace_op = QGROUP_FREE; - } trace_btrfs_qgroup_release_data(inode, start, len, changeset.bytes_changed, trace_op); + if (free) + btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, + BTRFS_I(inode)->root->objectid, + changeset.bytes_changed); out: ulist_release(&changeset.range_changed); return ret; @@ -2945,6 +2919,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, return 0; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); + trace_qgroup_meta_reserve(root, (s64)num_bytes); ret = qgroup_reserve(root, num_bytes, enforce); if (ret < 0) return ret; @@ -2964,6 +2939,7 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root) reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); if (reserved == 0) return; + trace_qgroup_meta_reserve(root, -(s64)reserved); btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); } @@ -2978,6 +2954,7 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); atomic64_sub(num_bytes, &root->qgroup_meta_rsv); + trace_qgroup_meta_reserve(root, -(s64)num_bytes); btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 26932a8a19930bc48ff14ad3c154a7326ce3a111..fe04d3f295c67f0b97b79095885878237260a274 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -61,6 +61,50 @@ struct btrfs_qgroup_extent_record { struct ulist *old_roots; }; +/* + * one struct for each qgroup, organized in fs_info->qgroup_tree. + */ +struct btrfs_qgroup { + u64 qgroupid; + + /* + * state + */ + u64 rfer; /* referenced */ + u64 rfer_cmpr; /* referenced compressed */ + u64 excl; /* exclusive */ + u64 excl_cmpr; /* exclusive compressed */ + + /* + * limits + */ + u64 lim_flags; /* which limits are set */ + u64 max_rfer; + u64 max_excl; + u64 rsv_rfer; + u64 rsv_excl; + + /* + * reservation tracking + */ + u64 reserved; + + /* + * lists + */ + struct list_head groups; /* groups this group is member of */ + struct list_head members; /* groups that are members of this group */ + struct list_head dirty; /* dirty groups */ + struct rb_node node; /* tree of qgroups */ + + /* + * temp variables for accounting operations + * Refer to qgroup_shared_accounting() for details. + */ + u64 old_refcnt; + u64 new_refcnt; +}; + /* * For qgroup event trace points only */ @@ -186,17 +230,12 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, struct btrfs_qgroup_inherit *inherit); void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes); -/* - * TODO: Add proper trace point for it, as btrfs_qgroup_free() is - * called by everywhere, can't provide good trace for delayed ref case. - */ static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, u64 ref_root, u64 num_bytes) { - btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes); trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); + btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes); } -void assert_qgroups_uptodate(struct btrfs_trans_handle *trans); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 1571bf26dc077a0575b39977ead28e229d68550f..d8ea0eb76325e9b25d42dfa4a99c63918981aa2f 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -149,7 +149,7 @@ struct btrfs_raid_bio { int generic_bio_cnt; - atomic_t refs; + refcount_t refs; atomic_t stripes_pending; @@ -389,7 +389,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) if (bio_list_empty(&rbio->bio_list)) { if (!list_empty(&rbio->hash_list)) { list_del_init(&rbio->hash_list); - atomic_dec(&rbio->refs); + refcount_dec(&rbio->refs); BUG_ON(!list_empty(&rbio->plug_list)); } } @@ -480,7 +480,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio) /* bump our ref if we were not in the list before */ if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) - atomic_inc(&rbio->refs); + refcount_inc(&rbio->refs); if (!list_empty(&rbio->stripe_cache)){ list_move(&rbio->stripe_cache, &table->stripe_cache); @@ -689,7 +689,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) test_bit(RBIO_CACHE_BIT, &cur->flags) && !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { list_del_init(&cur->hash_list); - atomic_dec(&cur->refs); + refcount_dec(&cur->refs); steal_rbio(cur, rbio); cache_drop = cur; @@ -738,7 +738,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) } } lockit: - atomic_inc(&rbio->refs); + refcount_inc(&rbio->refs); list_add(&rbio->hash_list, &h->hash_list); out: spin_unlock_irqrestore(&h->lock, flags); @@ -784,7 +784,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) } list_del_init(&rbio->hash_list); - atomic_dec(&rbio->refs); + refcount_dec(&rbio->refs); /* * we use the plug list to hold all the rbios @@ -801,7 +801,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) list_del_init(&rbio->plug_list); list_add(&next->hash_list, &h->hash_list); - atomic_inc(&next->refs); + refcount_inc(&next->refs); spin_unlock(&rbio->bio_list_lock); spin_unlock_irqrestore(&h->lock, flags); @@ -843,8 +843,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) { int i; - WARN_ON(atomic_read(&rbio->refs) < 0); - if (!atomic_dec_and_test(&rbio->refs)) + if (!refcount_dec_and_test(&rbio->refs)) return; WARN_ON(!list_empty(&rbio->stripe_cache)); @@ -997,7 +996,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, rbio->stripe_npages = stripe_npages; rbio->faila = -1; rbio->failb = -1; - atomic_set(&rbio->refs, 1); + refcount_set(&rbio->refs, 1); atomic_set(&rbio->error, 0); atomic_set(&rbio->stripes_pending, 0); @@ -2118,6 +2117,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, struct btrfs_raid_bio *rbio; int ret; + if (generic_io) { + ASSERT(bbio->mirror_num == mirror_num); + btrfs_io_bio(bio)->mirror_num = mirror_num; + } + rbio = alloc_rbio(fs_info, bbio, stripe_len); if (IS_ERR(rbio)) { if (generic_io) @@ -2194,6 +2198,8 @@ static void read_rebuild_work(struct btrfs_work *work) /* * The following code is used to scrub/replace the parity stripe * + * Caller must have already increased bio_counter for getting @bbio. + * * Note: We need make sure all the pages that add into the scrub/replace * raid bio are correct and not be changed during the scrub/replace. That * is those pages just hold metadata or file data with checksum. @@ -2231,6 +2237,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, ASSERT(rbio->stripe_npages == stripe_nsectors); bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); + /* + * We have already increased bio_counter when getting bbio, record it + * so we can free it at rbio_orig_end_io(). + */ + rbio->generic_bio_cnt = 1; + return rbio; } @@ -2673,6 +2685,12 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, return NULL; } + /* + * When we get bbio, we have already increased bio_counter, record it + * so we can free it at rbio_orig_end_io() + */ + rbio->generic_bio_cnt = 1; + return rbio; } diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index e88bca87f5d275c7c25b2ee6279fe4cdc1482413..a17e775a4a89fca1e48214c5abb02756dd1c69d6 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -209,9 +209,9 @@ static void __readahead_hook(struct btrfs_fs_info *fs_info, return; } -int btree_readahead_hook(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int err) +int btree_readahead_hook(struct extent_buffer *eb, int err) { + struct btrfs_fs_info *fs_info = eb->fs_info; int ret = 0; struct reada_extent *re; @@ -235,10 +235,10 @@ int btree_readahead_hook(struct btrfs_fs_info *fs_info, return ret; } -static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, - struct btrfs_device *dev, u64 logical, +static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, struct btrfs_bio *bbio) { + struct btrfs_fs_info *fs_info = dev->fs_info; int ret; struct reada_zone *zone; struct btrfs_block_group_cache *cache = NULL; @@ -270,6 +270,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, if (!zone) return NULL; + ret = radix_tree_preload(GFP_KERNEL); + if (ret) { + kfree(zone); + return NULL; + } + zone->start = start; zone->end = end; INIT_LIST_HEAD(&zone->list); @@ -299,6 +305,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, zone = NULL; } spin_unlock(&fs_info->reada_lock); + radix_tree_preload_end(); return zone; } @@ -313,7 +320,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, struct btrfs_bio *bbio = NULL; struct btrfs_device *dev; struct btrfs_device *prev_dev; - u32 blocksize; u64 length; int real_stripes; int nzones = 0; @@ -334,7 +340,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!re) return NULL; - blocksize = fs_info->nodesize; re->logical = logical; re->top = *top; INIT_LIST_HEAD(&re->extctl); @@ -344,10 +349,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, /* * map block */ - length = blocksize; + length = fs_info->nodesize; ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &length, &bbio, 0); - if (ret || !bbio || length < blocksize) + if (ret || !bbio || length < fs_info->nodesize) goto error; if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { @@ -367,7 +372,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!dev->bdev) continue; - zone = reada_find_zone(fs_info, dev, logical, bbio); + zone = reada_find_zone(dev, logical, bbio); if (!zone) continue; @@ -386,6 +391,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, goto error; } + ret = radix_tree_preload(GFP_KERNEL); + if (ret) + goto error; + /* insert extent in reada_tree + all per-device trees, all or nothing */ btrfs_dev_replace_lock(&fs_info->dev_replace, 0); spin_lock(&fs_info->reada_lock); @@ -395,13 +404,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + radix_tree_preload_end(); goto error; } if (ret) { spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + radix_tree_preload_end(); goto error; } + radix_tree_preload_end(); prev_dev = NULL; dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( &fs_info->dev_replace); @@ -639,9 +651,9 @@ static int reada_pick_zone(struct btrfs_device *dev) return 1; } -static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, - struct btrfs_device *dev) +static int reada_start_machine_dev(struct btrfs_device *dev) { + struct btrfs_fs_info *fs_info = dev->fs_info; struct reada_extent *re = NULL; int mirror_num = 0; struct extent_buffer *eb = NULL; @@ -754,8 +766,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) list_for_each_entry(device, &fs_devices->devices, dev_list) { if (atomic_read(&device->reada_in_flight) < MAX_IN_FLIGHT) - enqueued += reada_start_machine_dev(fs_info, - device); + enqueued += reada_start_machine_dev(device); } mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a08224eab8b47111b2fc6f7f51ac33e6ca69814e..7d6bc308bf4308f653cc300c1354602ddd0df911 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -501,8 +501,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_root_item *item = &root->root_item; - struct timespec ct = current_fs_time(root->fs_info->sb); + struct timespec ct; + ktime_get_real_ts(&ct); spin_lock(&root->root_item_lock); btrfs_set_root_ctransid(item, trans->transid); btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b0251eb1239fce83226650be88c31122a9f108af..c7b45eb2403d09e94b2538dabcb5a1f0116c55dd 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -64,7 +64,7 @@ struct scrub_ctx; #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ struct scrub_recover { - atomic_t refs; + refcount_t refs; struct btrfs_bio *bbio; u64 map_length; }; @@ -112,7 +112,7 @@ struct scrub_block { struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; int page_count; atomic_t outstanding_pages; - atomic_t refs; /* free mem on transition to zero */ + refcount_t refs; /* free mem on transition to zero */ struct scrub_ctx *sctx; struct scrub_parity *sparity; struct { @@ -140,9 +140,9 @@ struct scrub_parity { int nsectors; - int stripe_len; + u64 stripe_len; - atomic_t refs; + refcount_t refs; struct list_head spages; @@ -202,7 +202,7 @@ struct scrub_ctx { * doesn't free the scrub context before or while the workers are * doing the wakeup() call. */ - atomic_t refs; + refcount_t refs; }; struct scrub_fixup_nodatasum { @@ -240,6 +240,13 @@ struct scrub_warning { struct btrfs_device *dev; }; +struct full_stripe_lock { + struct rb_node node; + u64 logical; + u64 refs; + struct mutex mutex; +}; + static void scrub_pending_bio_inc(struct scrub_ctx *sctx); static void scrub_pending_bio_dec(struct scrub_ctx *sctx); static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); @@ -305,7 +312,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx); static void scrub_pending_bio_inc(struct scrub_ctx *sctx) { - atomic_inc(&sctx->refs); + refcount_inc(&sctx->refs); atomic_inc(&sctx->bios_in_flight); } @@ -348,6 +355,222 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) scrub_pause_off(fs_info); } +/* + * Insert new full stripe lock into full stripe locks tree + * + * Return pointer to existing or newly inserted full_stripe_lock structure if + * everything works well. + * Return ERR_PTR(-ENOMEM) if we failed to allocate memory + * + * NOTE: caller must hold full_stripe_locks_root->lock before calling this + * function + */ +static struct full_stripe_lock *insert_full_stripe_lock( + struct btrfs_full_stripe_locks_tree *locks_root, + u64 fstripe_logical) +{ + struct rb_node **p; + struct rb_node *parent = NULL; + struct full_stripe_lock *entry; + struct full_stripe_lock *ret; + + WARN_ON(!mutex_is_locked(&locks_root->lock)); + + p = &locks_root->root.rb_node; + while (*p) { + parent = *p; + entry = rb_entry(parent, struct full_stripe_lock, node); + if (fstripe_logical < entry->logical) { + p = &(*p)->rb_left; + } else if (fstripe_logical > entry->logical) { + p = &(*p)->rb_right; + } else { + entry->refs++; + return entry; + } + } + + /* Insert new lock */ + ret = kmalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return ERR_PTR(-ENOMEM); + ret->logical = fstripe_logical; + ret->refs = 1; + mutex_init(&ret->mutex); + + rb_link_node(&ret->node, parent, p); + rb_insert_color(&ret->node, &locks_root->root); + return ret; +} + +/* + * Search for a full stripe lock of a block group + * + * Return pointer to existing full stripe lock if found + * Return NULL if not found + */ +static struct full_stripe_lock *search_full_stripe_lock( + struct btrfs_full_stripe_locks_tree *locks_root, + u64 fstripe_logical) +{ + struct rb_node *node; + struct full_stripe_lock *entry; + + WARN_ON(!mutex_is_locked(&locks_root->lock)); + + node = locks_root->root.rb_node; + while (node) { + entry = rb_entry(node, struct full_stripe_lock, node); + if (fstripe_logical < entry->logical) + node = node->rb_left; + else if (fstripe_logical > entry->logical) + node = node->rb_right; + else + return entry; + } + return NULL; +} + +/* + * Helper to get full stripe logical from a normal bytenr. + * + * Caller must ensure @cache is a RAID56 block group. + */ +static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache, + u64 bytenr) +{ + u64 ret; + + /* + * Due to chunk item size limit, full stripe length should not be + * larger than U32_MAX. Just a sanity check here. + */ + WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX); + + /* + * round_down() can only handle power of 2, while RAID56 full + * stripe length can be 64KiB * n, so we need to manually round down. + */ + ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) * + cache->full_stripe_len + cache->key.objectid; + return ret; +} + +/* + * Lock a full stripe to avoid concurrency of recovery and read + * + * It's only used for profiles with parities (RAID5/6), for other profiles it + * does nothing. + * + * Return 0 if we locked full stripe covering @bytenr, with a mutex held. + * So caller must call unlock_full_stripe() at the same context. + * + * Return <0 if encounters error. + */ +static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr, + bool *locked_ret) +{ + struct btrfs_block_group_cache *bg_cache; + struct btrfs_full_stripe_locks_tree *locks_root; + struct full_stripe_lock *existing; + u64 fstripe_start; + int ret = 0; + + *locked_ret = false; + bg_cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!bg_cache) { + ASSERT(0); + return -ENOENT; + } + + /* Profiles not based on parity don't need full stripe lock */ + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) + goto out; + locks_root = &bg_cache->full_stripe_locks_root; + + fstripe_start = get_full_stripe_logical(bg_cache, bytenr); + + /* Now insert the full stripe lock */ + mutex_lock(&locks_root->lock); + existing = insert_full_stripe_lock(locks_root, fstripe_start); + mutex_unlock(&locks_root->lock); + if (IS_ERR(existing)) { + ret = PTR_ERR(existing); + goto out; + } + mutex_lock(&existing->mutex); + *locked_ret = true; +out: + btrfs_put_block_group(bg_cache); + return ret; +} + +/* + * Unlock a full stripe. + * + * NOTE: Caller must ensure it's the same context calling corresponding + * lock_full_stripe(). + * + * Return 0 if we unlock full stripe without problem. + * Return <0 for error + */ +static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr, + bool locked) +{ + struct btrfs_block_group_cache *bg_cache; + struct btrfs_full_stripe_locks_tree *locks_root; + struct full_stripe_lock *fstripe_lock; + u64 fstripe_start; + bool freeit = false; + int ret = 0; + + /* If we didn't acquire full stripe lock, no need to continue */ + if (!locked) + return 0; + + bg_cache = btrfs_lookup_block_group(fs_info, bytenr); + if (!bg_cache) { + ASSERT(0); + return -ENOENT; + } + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) + goto out; + + locks_root = &bg_cache->full_stripe_locks_root; + fstripe_start = get_full_stripe_logical(bg_cache, bytenr); + + mutex_lock(&locks_root->lock); + fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start); + /* Unpaired unlock_full_stripe() detected */ + if (!fstripe_lock) { + WARN_ON(1); + ret = -ENOENT; + mutex_unlock(&locks_root->lock); + goto out; + } + + if (fstripe_lock->refs == 0) { + WARN_ON(1); + btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow", + fstripe_lock->logical); + } else { + fstripe_lock->refs--; + } + + if (fstripe_lock->refs == 0) { + rb_erase(&fstripe_lock->node, &locks_root->root); + freeit = true; + } + mutex_unlock(&locks_root->lock); + + mutex_unlock(&fstripe_lock->mutex); + if (freeit) + kfree(fstripe_lock); +out: + btrfs_put_block_group(bg_cache); + return ret; +} + /* * used for workers that require transaction commits (i.e., for the * NOCOW case) @@ -356,7 +579,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) { struct btrfs_fs_info *fs_info = sctx->fs_info; - atomic_inc(&sctx->refs); + refcount_inc(&sctx->refs); /* * increment scrubs_running to prevent cancel requests from * completing as long as a worker is running. we must also @@ -447,7 +670,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) static void scrub_put_ctx(struct scrub_ctx *sctx) { - if (atomic_dec_and_test(&sctx->refs)) + if (refcount_dec_and_test(&sctx->refs)) scrub_free_ctx(sctx); } @@ -462,7 +685,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sctx = kzalloc(sizeof(*sctx), GFP_KERNEL); if (!sctx) goto nomem; - atomic_set(&sctx->refs, 1); + refcount_set(&sctx->refs, 1); sctx->is_dev_replace = is_dev_replace; sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; sctx->curr = -1; @@ -857,12 +1080,14 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) static inline void scrub_get_recover(struct scrub_recover *recover) { - atomic_inc(&recover->refs); + refcount_inc(&recover->refs); } -static inline void scrub_put_recover(struct scrub_recover *recover) +static inline void scrub_put_recover(struct btrfs_fs_info *fs_info, + struct scrub_recover *recover) { - if (atomic_dec_and_test(&recover->refs)) { + if (refcount_dec_and_test(&recover->refs)) { + btrfs_bio_counter_dec(fs_info); btrfs_put_bbio(recover->bbio); kfree(recover); } @@ -892,6 +1117,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) int mirror_index; int page_num; int success; + bool full_stripe_locked; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -917,6 +1143,24 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) have_csum = sblock_to_check->pagev[0]->have_csum; dev = sblock_to_check->pagev[0]->dev; + /* + * For RAID5/6, race can happen for a different device scrub thread. + * For data corruption, Parity and Data threads will both try + * to recovery the data. + * Race can lead to doubly added csum error, or even unrecoverable + * error. + */ + ret = lock_full_stripe(fs_info, logical, &full_stripe_locked); + if (ret < 0) { + spin_lock(&sctx->stat_lock); + if (ret == -ENOMEM) + sctx->stat.malloc_errors++; + sctx->stat.read_errors++; + sctx->stat.uncorrectable_errors++; + spin_unlock(&sctx->stat_lock); + return ret; + } + if (sctx->is_dev_replace && !is_metadata && !have_csum) { sblocks_for_recheck = NULL; goto nodatasum_case; @@ -1241,7 +1485,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock->pagev[page_index]->sblock = NULL; recover = sblock->pagev[page_index]->recover; if (recover) { - scrub_put_recover(recover); + scrub_put_recover(fs_info, recover); sblock->pagev[page_index]->recover = NULL; } @@ -1251,6 +1495,9 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) kfree(sblocks_for_recheck); } + ret = unlock_full_stripe(fs_info, logical, full_stripe_locked); + if (ret < 0) + return ret; return 0; } @@ -1330,20 +1577,23 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, * with a length of PAGE_SIZE, each returned stripe * represents one mirror */ + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &mapped_length, &bbio, 0, 1); + logical, &mapped_length, &bbio); if (ret || !bbio || mapped_length < sublen) { btrfs_put_bbio(bbio); + btrfs_bio_counter_dec(fs_info); return -EIO; } recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); if (!recover) { btrfs_put_bbio(bbio); + btrfs_bio_counter_dec(fs_info); return -ENOMEM; } - atomic_set(&recover->refs, 1); + refcount_set(&recover->refs, 1); recover->bbio = bbio; recover->map_length = mapped_length; @@ -1365,7 +1615,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); - scrub_put_recover(recover); + scrub_put_recover(fs_info, recover); return -ENOMEM; } scrub_page_get(page); @@ -1407,7 +1657,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, scrub_get_recover(recover); page->recover = recover; } - scrub_put_recover(recover); + scrub_put_recover(fs_info, recover); length -= sublen; logical += sublen; page_index++; @@ -1497,14 +1747,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, bio_add_page(bio, page->page, PAGE_SIZE, 0); if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { - if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) + if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) { + page->io_error = 1; sblock->no_io_error_seen = 0; + } } else { bio->bi_iter.bi_sector = page->physical >> 9; bio_set_op_attrs(bio, REQ_OP_READ, 0); - if (btrfsic_submit_bio_wait(bio)) + if (btrfsic_submit_bio_wait(bio)) { + page->io_error = 1; sblock->no_io_error_seen = 0; + } } bio_put(bio); @@ -1634,7 +1888,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, if (spage->io_error) { void *mapped_buffer = kmap_atomic(spage->page); - memset(mapped_buffer, 0, PAGE_SIZE); + clear_page(mapped_buffer); flush_dcache_page(spage->page); kunmap_atomic(mapped_buffer); } @@ -1998,12 +2252,12 @@ static int scrub_checksum_super(struct scrub_block *sblock) static void scrub_block_get(struct scrub_block *sblock) { - atomic_inc(&sblock->refs); + refcount_inc(&sblock->refs); } static void scrub_block_put(struct scrub_block *sblock) { - if (atomic_dec_and_test(&sblock->refs)) { + if (refcount_dec_and_test(&sblock->refs)) { int i; if (sblock->sparity) @@ -2187,8 +2441,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) int ret; int i; + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &length, &bbio, 0, 1); + &length, &bbio); if (ret || !bbio || !bbio->raid_map) goto bbio_out; @@ -2231,6 +2486,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) rbio_out: bio_put(bio); bbio_out: + btrfs_bio_counter_dec(fs_info); btrfs_put_bbio(bbio); spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2255,7 +2511,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, /* one ref inside this function, plus one for each page added to * a bio later on */ - atomic_set(&sblock->refs, 1); + refcount_set(&sblock->refs, 1); sblock->sctx = sctx; sblock->no_io_error_seen = 1; @@ -2385,7 +2641,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, unsigned long *bitmap, u64 start, u64 len) { - u32 offset; + u64 offset; int nsectors; int sectorsize = sparity->sctx->fs_info->sectorsize; @@ -2395,8 +2651,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, } start -= sparity->logic_start; - start = div_u64_rem(start, sparity->stripe_len, &offset); - offset /= sectorsize; + start = div64_u64_rem(start, sparity->stripe_len, &offset); + offset = div_u64(offset, sectorsize); nsectors = (int)len / sectorsize; if (offset + nsectors <= sparity->nsectors) { @@ -2555,7 +2811,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, /* one ref inside this function, plus one for each page added to * a bio later on */ - atomic_set(&sblock->refs, 1); + refcount_set(&sblock->refs, 1); sblock->sctx = sctx; sblock->no_io_error_seen = 1; sblock->sparity = sparity; @@ -2694,7 +2950,7 @@ static int get_raid56_logic_offset(u64 physical, int num, for (i = 0; i < nr_data_stripes(map); i++) { *offset = last_offset + i * map->stripe_len; - stripe_nr = div_u64(*offset, map->stripe_len); + stripe_nr = div64_u64(*offset, map->stripe_len); stripe_nr = div_u64(stripe_nr, nr_data_stripes(map)); /* Work out the disk rotation on this stripe-set */ @@ -2765,7 +3021,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) struct btrfs_fs_info *fs_info = sctx->fs_info; struct bio *bio; struct btrfs_raid_bio *rbio; - struct scrub_page *spage; struct btrfs_bio *bbio = NULL; u64 length; int ret; @@ -2775,8 +3030,10 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) goto out; length = sparity->logic_end - sparity->logic_start; + + btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start, - &length, &bbio, 0, 1); + &length, &bbio); if (ret || !bbio || !bbio->raid_map) goto bbio_out; @@ -2795,9 +3052,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) if (!rbio) goto rbio_out; - list_for_each_entry(spage, &sparity->spages, list) - raid56_add_scrub_pages(rbio, spage->page, spage->logical); - scrub_pending_bio_inc(sctx); raid56_parity_submit_scrub_rbio(rbio); return; @@ -2805,6 +3059,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) rbio_out: bio_put(bio); bbio_out: + btrfs_bio_counter_dec(fs_info); btrfs_put_bbio(bbio); bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); @@ -2822,12 +3077,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors) static void scrub_parity_get(struct scrub_parity *sparity) { - atomic_inc(&sparity->refs); + refcount_inc(&sparity->refs); } static void scrub_parity_put(struct scrub_parity *sparity) { - if (!atomic_dec_and_test(&sparity->refs)) + if (!refcount_dec_and_test(&sparity->refs)) return; scrub_parity_check_and_repair(sparity); @@ -2879,7 +3134,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, sparity->scrub_dev = sdev; sparity->logic_start = logic_start; sparity->logic_end = logic_end; - atomic_set(&sparity->refs, 1); + refcount_set(&sparity->refs, 1); INIT_LIST_HEAD(&sparity->spages); sparity->dbitmap = sparity->bitmap; sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; @@ -3098,7 +3353,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, physical = map->stripes[num].physical; offset = 0; - nstripes = div_u64(length, map->stripe_len); + nstripes = div64_u64(length, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { offset = map->stripe_len * num; increment = map->stripe_len * map->num_stripes; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a60d5bfb8a49e2bfc3faef10f4ea353a9da5f8b8..fc496a6f842a87d3544e80b3b9e063d57417f788 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5184,13 +5184,19 @@ static int is_extent_unchanged(struct send_ctx *sctx, while (key.offset < ekey->offset + left_len) { ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); right_type = btrfs_file_extent_type(eb, ei); - if (right_type != BTRFS_FILE_EXTENT_REG) { + if (right_type != BTRFS_FILE_EXTENT_REG && + right_type != BTRFS_FILE_EXTENT_INLINE) { ret = 0; goto out; } right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); - right_len = btrfs_file_extent_num_bytes(eb, ei); + if (right_type == BTRFS_FILE_EXTENT_INLINE) { + right_len = btrfs_file_extent_inline_len(eb, slot, ei); + right_len = PAGE_ALIGN(right_len); + } else { + right_len = btrfs_file_extent_num_bytes(eb, ei); + } right_offset = btrfs_file_extent_offset(eb, ei); right_gen = btrfs_file_extent_generation(eb, ei); @@ -5204,6 +5210,19 @@ static int is_extent_unchanged(struct send_ctx *sctx, goto out; } + /* + * We just wanted to see if when we have an inline extent, what + * follows it is a regular extent (wanted to check the above + * condition for inline extents too). This should normally not + * happen but it's possible for example when we have an inline + * compressed extent representing data with a size matching + * the page size (currently the same as sector size). + */ + if (right_type == BTRFS_FILE_EXTENT_INLINE) { + ret = 0; + goto out; + } + left_offset_fixed = left_offset; if (key.offset < ekey->offset) { /* Fix the right offset for 2a and 7. */ @@ -6360,22 +6379,16 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) sctx->clone_roots_cnt = arg->clone_sources_count; sctx->send_max_size = BTRFS_SEND_BUF_SIZE; - sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN); + sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL); if (!sctx->send_buf) { - sctx->send_buf = vmalloc(sctx->send_max_size); - if (!sctx->send_buf) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } - sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN); + sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL); if (!sctx->read_buf) { - sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); - if (!sctx->read_buf) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } sctx->pending_dir_moves = RB_ROOT; @@ -6396,13 +6409,10 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); if (arg->clone_sources_count) { - clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); + clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL); if (!clone_sources_tmp) { - clone_sources_tmp = vmalloc(alloc_size); - if (!clone_sources_tmp) { - ret = -ENOMEM; - goto out; - } + ret = -ENOMEM; + goto out; } ret = copy_from_user(clone_sources_tmp, arg->clone_sources, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9530a333d302c0c13c3e8463814b642a023afc23..4f1cdd5058f12b9970b5894828498624a28c77b5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1136,6 +1136,13 @@ static int btrfs_fill_super(struct super_block *sb, #endif sb->s_flags |= MS_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; + + err = super_setup_bdi(sb); + if (err) { + btrfs_err(fs_info, "super_setup_bdi failed"); + return err; + } + err = open_ctree(sb, fs_devices, (char *)data); if (err) { btrfs_err(fs_info, "open_ctree failed"); @@ -1788,8 +1795,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } if (fs_info->fs_devices->missing_devices > - fs_info->num_tolerated_disk_barrier_failures && - !(*flags & MS_RDONLY)) { + fs_info->num_tolerated_disk_barrier_failures) { btrfs_warn(fs_info, "too many missing devices, writeable remount is not allowed"); ret = -EACCES; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index ea272432c9305177a1059db321f8aae6f89a09f3..b18ab8f327a53dd10b09bee6169f34ecff5eb809 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -237,7 +237,6 @@ void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans) { memset(trans, 0, sizeof(*trans)); trans->transid = 1; - INIT_LIST_HEAD(&trans->qgroup_ref_list); trans->type = __TRANS_DUMMY; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 61b807de3e164e38877230cdfd1e9a545573f1be..2168654c90a1e6cab355d8270b23db68de0253c3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -60,8 +60,8 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { void btrfs_put_transaction(struct btrfs_transaction *transaction) { - WARN_ON(atomic_read(&transaction->use_count) == 0); - if (atomic_dec_and_test(&transaction->use_count)) { + WARN_ON(refcount_read(&transaction->use_count) == 0); + if (refcount_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); if (transaction->delayed_refs.pending_csums) @@ -207,7 +207,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, spin_unlock(&fs_info->trans_lock); return -EBUSY; } - atomic_inc(&cur_trans->use_count); + refcount_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); extwriter_counter_inc(cur_trans, type); spin_unlock(&fs_info->trans_lock); @@ -257,7 +257,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, * One for this trans handle, one so it will live on until we * commit the transaction. */ - atomic_set(&cur_trans->use_count, 2); + refcount_set(&cur_trans->use_count, 2); atomic_set(&cur_trans->pending_ordered, 0); cur_trans->flags = 0; cur_trans->start_time = get_seconds(); @@ -432,7 +432,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->trans_lock); cur_trans = fs_info->running_transaction; if (cur_trans && is_transaction_blocked(cur_trans)) { - atomic_inc(&cur_trans->use_count); + refcount_inc(&cur_trans->use_count); spin_unlock(&fs_info->trans_lock); wait_event(fs_info->transaction_wait, @@ -572,7 +572,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, h->type = type; h->can_flush_pending_bgs = true; - INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); smp_mb(); @@ -744,7 +743,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) list_for_each_entry(t, &fs_info->trans_list, list) { if (t->transid == transid) { cur_trans = t; - atomic_inc(&cur_trans->use_count); + refcount_inc(&cur_trans->use_count); ret = 0; break; } @@ -773,7 +772,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) if (t->state == TRANS_STATE_COMPLETED) break; cur_trans = t; - atomic_inc(&cur_trans->use_count); + refcount_inc(&cur_trans->use_count); break; } } @@ -917,7 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, wake_up_process(info->transaction_kthread); err = -EIO; } - assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); if (must_run_delayed_refs) { @@ -1839,7 +1837,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, /* take transaction reference */ cur_trans = trans->transaction; - atomic_inc(&cur_trans->use_count); + refcount_inc(&cur_trans->use_count); btrfs_end_transaction(trans); @@ -2015,7 +2013,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) spin_lock(&fs_info->trans_lock); if (cur_trans->state >= TRANS_STATE_COMMIT_START) { spin_unlock(&fs_info->trans_lock); - atomic_inc(&cur_trans->use_count); + refcount_inc(&cur_trans->use_count); ret = btrfs_end_transaction(trans); wait_for_commit(cur_trans); @@ -2035,7 +2033,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); if (prev_trans->state != TRANS_STATE_COMPLETED) { - atomic_inc(&prev_trans->use_count); + refcount_inc(&prev_trans->use_count); spin_unlock(&fs_info->trans_lock); wait_for_commit(prev_trans); @@ -2130,13 +2128,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) goto scrub_continue; } - /* Reocrd old roots for later qgroup accounting */ - ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); - if (ret) { - mutex_unlock(&fs_info->reloc_mutex); - goto scrub_continue; - } - /* * make sure none of the code above managed to slip in a * delayed item @@ -2178,6 +2169,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ btrfs_free_log_root_tree(trans, fs_info); + /* + * commit_fs_roots() can call btrfs_save_ino_cache(), which generates + * new delayed refs. Must handle them or qgroup can be wrong. + */ + ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1); + if (ret) { + mutex_unlock(&fs_info->tree_log_mutex); + mutex_unlock(&fs_info->reloc_mutex); + goto scrub_continue; + } + + ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); + if (ret) { + mutex_unlock(&fs_info->tree_log_mutex); + mutex_unlock(&fs_info->reloc_mutex); + goto scrub_continue; + } + /* * Since fs roots are all committed, we can get a quite accurate * new_roots. So let's do quota accounting. @@ -2223,7 +2232,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) switch_commit_roots(cur_trans, fs_info); - assert_qgroups_uptodate(trans); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); update_super_roots(fs_info); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 5dfb5590fff654a077fd95704682293bfc948a8f..c55e44560103b48e2a917701899c611732d8a013 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -18,6 +18,8 @@ #ifndef __BTRFS_TRANSACTION__ #define __BTRFS_TRANSACTION__ + +#include #include "btrfs_inode.h" #include "delayed-ref.h" #include "ctree.h" @@ -49,7 +51,7 @@ struct btrfs_transaction { * transaction can end */ atomic_t num_writers; - atomic_t use_count; + refcount_t use_count; atomic_t pending_ordered; unsigned long flags; @@ -125,8 +127,6 @@ struct btrfs_trans_handle { unsigned int type; struct btrfs_root *root; struct btrfs_fs_info *fs_info; - struct seq_list delayed_ref_elem; - struct list_head qgroup_ref_list; struct list_head new_bgs; }; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a59674c3e69efb76d27d6705b41ca76d94e82e15..ccfe9fe7754a8d4d80fd3e5b1f0a1d2f2118e4e6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4196,7 +4196,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, if (em->generation <= test_gen) continue; /* Need a ref to keep it from getting evicted from cache */ - atomic_inc(&em->refs); + refcount_inc(&em->refs); set_bit(EXTENT_FLAG_LOGGING, &em->flags); list_add_tail(&em->list, &extents); num++; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ab8a66d852f91cb04206361a551b8c57760c9c40..017b67daa3bbf375919019e5c089b94f97d3e2a5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -139,6 +139,11 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info); static void __btrfs_reset_dev_stats(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); +static int __btrfs_map_block(struct btrfs_fs_info *fs_info, + enum btrfs_map_op op, + u64 logical, u64 *length, + struct btrfs_bio **bbio_ret, + int mirror_num, int need_raid_map); DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -1008,14 +1013,13 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, q = bdev_get_queue(bdev); if (blk_queue_discard(q)) device->can_discard = 1; + if (!blk_queue_nonrot(q)) + fs_devices->rotating = 1; device->bdev = bdev; device->in_fs_metadata = 0; device->mode = flags; - if (!blk_queue_nonrot(bdev_get_queue(bdev))) - fs_devices->rotating = 1; - fs_devices->open_devices++; if (device->writeable && device->devid != BTRFS_DEV_REPLACE_DEVID) { @@ -2417,7 +2421,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path fs_info->free_chunk_space += device->total_bytes; spin_unlock(&fs_info->free_chunk_lock); - if (!blk_queue_nonrot(bdev_get_queue(bdev))) + if (!blk_queue_nonrot(q)) fs_info->fs_devices->rotating = 1; tmp = btrfs_super_total_bytes(fs_info->super_copy); @@ -2795,10 +2799,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, return ret; } +static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) +{ + struct extent_map_tree *em_tree; + struct extent_map *em; + + em_tree = &fs_info->mapping_tree.map_tree; + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, logical, length); + read_unlock(&em_tree->lock); + + if (!em) { + btrfs_crit(fs_info, "unable to find logical %llu length %llu", + logical, length); + return ERR_PTR(-EINVAL); + } + + if (em->start > logical || em->start + em->len < logical) { + btrfs_crit(fs_info, + "found a bad mapping, wanted %llu-%llu, found %llu-%llu", + logical, length, em->start, em->start + em->len); + free_extent_map(em); + return ERR_PTR(-EINVAL); + } + + /* callers are responsible for dropping em's ref. */ + return em; +} + int btrfs_remove_chunk(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 chunk_offset) { - struct extent_map_tree *em_tree; struct extent_map *em; struct map_lookup *map; u64 dev_extent_len = 0; @@ -2806,23 +2838,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, int i, ret = 0; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - em_tree = &fs_info->mapping_tree.map_tree; - - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, chunk_offset, 1); - read_unlock(&em_tree->lock); - - if (!em || em->start > chunk_offset || - em->start + em->len < chunk_offset) { + em = get_chunk_map(fs_info, chunk_offset, 1); + if (IS_ERR(em)) { /* * This is a logic error, but we don't want to just rely on the * user having built with ASSERT enabled, so if ASSERT doesn't * do anything we still error out. */ ASSERT(0); - if (em) - free_extent_map(em); - return -EINVAL; + return PTR_ERR(em); } map = em->map_lookup; mutex_lock(&fs_info->chunk_mutex); @@ -3736,7 +3760,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) if (ret) btrfs_handle_fs_error(fs_info, ret, NULL); - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); } /* Non-zero return value signifies invalidity */ @@ -3755,6 +3779,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs) { struct btrfs_fs_info *fs_info = bctl->fs_info; + u64 meta_target, data_target; u64 allowed; int mixed = 0; int ret; @@ -3851,11 +3876,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } while (read_seqretry(&fs_info->profiles_lock, seq)); - if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) < - btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) { + /* if we're not converting, the target field is uninitialized */ + meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->meta.target : fs_info->avail_metadata_alloc_bits; + data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->data.target : fs_info->avail_data_alloc_bits; + if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) < + btrfs_get_num_tolerated_disk_barrier_failures(data_target)) { btrfs_warn(fs_info, "metadata profile 0x%llx has lower redundancy than data profile 0x%llx", - bctl->meta.target, bctl->data.target); + meta_target, data_target); } if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { @@ -3910,7 +3940,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, __cancel_balance(fs_info); else { kfree(bctl); - atomic_set(&fs_info->mutually_exclusive_operation_running, 0); + clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); } return ret; } @@ -4000,7 +4030,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); - WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); + WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)); mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); @@ -4785,7 +4815,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = div_u64(stripe_size, dev_stripes); /* align to BTRFS_STRIPE_LEN */ - stripe_size = div_u64(stripe_size, raid_stripe_len); + stripe_size = div64_u64(stripe_size, raid_stripe_len); stripe_size *= raid_stripe_len; map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); @@ -4833,7 +4863,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ret = add_extent_mapping(em_tree, em, 0); if (!ret) { list_add_tail(&em->list, &trans->transaction->pending_chunks); - atomic_inc(&em->refs); + refcount_inc(&em->refs); } write_unlock(&em_tree->lock); if (ret) { @@ -4888,7 +4918,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_device *device; struct btrfs_chunk *chunk; struct btrfs_stripe *stripe; - struct extent_map_tree *em_tree; struct extent_map *em; struct map_lookup *map; size_t item_size; @@ -4897,24 +4926,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, int i = 0; int ret = 0; - em_tree = &fs_info->mapping_tree.map_tree; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size); - read_unlock(&em_tree->lock); - - if (!em) { - btrfs_crit(fs_info, "unable to find logical %Lu len %Lu", - chunk_offset, chunk_size); - return -EINVAL; - } - - if (em->start != chunk_offset || em->len != chunk_size) { - btrfs_crit(fs_info, - "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu", - chunk_offset, chunk_size, em->start, em->len); - free_extent_map(em); - return -EINVAL; - } + em = get_chunk_map(fs_info, chunk_offset, chunk_size); + if (IS_ERR(em)) + return PTR_ERR(em); map = em->map_lookup; item_size = btrfs_chunk_item_size(map->num_stripes); @@ -5055,15 +5069,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) { struct extent_map *em; struct map_lookup *map; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int readonly = 0; int miss_ndevs = 0; int i; - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - read_unlock(&map_tree->map_tree.lock); - if (!em) + em = get_chunk_map(fs_info, chunk_offset, 1); + if (IS_ERR(em)) return 1; map = em->map_lookup; @@ -5117,34 +5128,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) { - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; - struct extent_map_tree *em_tree = &map_tree->map_tree; int ret; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, logical, len); - read_unlock(&em_tree->lock); - - /* - * We could return errors for these cases, but that could get ugly and - * we'd probably do the same thing which is just not do anything else - * and exit, so return 1 so the callers don't try to use other copies. - */ - if (!em) { - btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical, - logical+len); - return 1; - } - - if (em->start > logical || em->start + em->len < logical) { - btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu", - logical, logical+len, em->start, - em->start + em->len); - free_extent_map(em); + em = get_chunk_map(fs_info, logical, len); + if (IS_ERR(em)) + /* + * We could return errors for these cases, but that could get + * ugly and we'd probably do the same thing which is just not do + * anything else and exit, so return 1 so the callers don't try + * to use other copies. + */ return 1; - } map = em->map_lookup; if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) @@ -5160,7 +5156,8 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) free_extent_map(em); btrfs_dev_replace_lock(&fs_info->dev_replace, 0); - if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) + if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) && + fs_info->dev_replace.tgtdev) ret++; btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); @@ -5173,15 +5170,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, { struct extent_map *em; struct map_lookup *map; - struct extent_map_tree *em_tree = &map_tree->map_tree; unsigned long len = fs_info->sectorsize; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, logical, len); - read_unlock(&em_tree->lock); - BUG_ON(!em); + em = get_chunk_map(fs_info, logical, len); + WARN_ON(IS_ERR(em)); - BUG_ON(em->start > logical || em->start + em->len < logical); map = em->map_lookup; if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) len = map->stripe_len * nr_data_stripes(map); @@ -5189,20 +5182,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, return len; } -int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len, int mirror_num) { struct extent_map *em; struct map_lookup *map; - struct extent_map_tree *em_tree = &map_tree->map_tree; int ret = 0; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, logical, len); - read_unlock(&em_tree->lock); - BUG_ON(!em); + em = get_chunk_map(fs_info, logical, len); + WARN_ON(IS_ERR(em)); - BUG_ON(em->start > logical || em->start + em->len < logical); map = em->map_lookup; if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) ret = 1; @@ -5295,25 +5284,353 @@ static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) GFP_NOFS|__GFP_NOFAIL); atomic_set(&bbio->error, 0); - atomic_set(&bbio->refs, 1); + refcount_set(&bbio->refs, 1); return bbio; } void btrfs_get_bbio(struct btrfs_bio *bbio) { - WARN_ON(!atomic_read(&bbio->refs)); - atomic_inc(&bbio->refs); + WARN_ON(!refcount_read(&bbio->refs)); + refcount_inc(&bbio->refs); } void btrfs_put_bbio(struct btrfs_bio *bbio) { if (!bbio) return; - if (atomic_dec_and_test(&bbio->refs)) + if (refcount_dec_and_test(&bbio->refs)) kfree(bbio); } +/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ +/* + * Please note that, discard won't be sent to target device of device + * replace. + */ +static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, + u64 logical, u64 length, + struct btrfs_bio **bbio_ret) +{ + struct extent_map *em; + struct map_lookup *map; + struct btrfs_bio *bbio; + u64 offset; + u64 stripe_nr; + u64 stripe_nr_end; + u64 stripe_end_offset; + u64 stripe_cnt; + u64 stripe_len; + u64 stripe_offset; + u64 num_stripes; + u32 stripe_index; + u32 factor = 0; + u32 sub_stripes = 0; + u64 stripes_per_dev = 0; + u32 remaining_stripes = 0; + u32 last_stripe = 0; + int ret = 0; + int i; + + /* discard always return a bbio */ + ASSERT(bbio_ret); + + em = get_chunk_map(fs_info, logical, length); + if (IS_ERR(em)) + return PTR_ERR(em); + + map = em->map_lookup; + /* we don't discard raid56 yet */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + ret = -EOPNOTSUPP; + goto out; + } + + offset = logical - em->start; + length = min_t(u64, em->len - offset, length); + + stripe_len = map->stripe_len; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + stripe_nr = div64_u64(offset, stripe_len); + + /* stripe_offset is the offset of this block in its stripe */ + stripe_offset = offset - stripe_nr * stripe_len; + + stripe_nr_end = round_up(offset + length, map->stripe_len); + stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len); + stripe_cnt = stripe_nr_end - stripe_nr; + stripe_end_offset = stripe_nr_end * map->stripe_len - + (offset + length); + /* + * after this, stripe_nr is the number of stripes on this + * device we have to walk to find the data, and stripe_index is + * the number of our device in the stripe array + */ + num_stripes = 1; + stripe_index = 0; + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID0) + sub_stripes = 1; + else + sub_stripes = map->sub_stripes; + + factor = map->num_stripes / sub_stripes; + num_stripes = min_t(u64, map->num_stripes, + sub_stripes * stripe_cnt); + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + stripe_index *= sub_stripes; + stripes_per_dev = div_u64_rem(stripe_cnt, factor, + &remaining_stripes); + div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); + last_stripe *= sub_stripes; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { + num_stripes = map->num_stripes; + } else { + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, + &stripe_index); + } + + bbio = alloc_btrfs_bio(num_stripes, 0); + if (!bbio) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < num_stripes; i++) { + bbio->stripes[i].physical = + map->stripes[stripe_index].physical + + stripe_offset + stripe_nr * map->stripe_len; + bbio->stripes[i].dev = map->stripes[stripe_index].dev; + + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + bbio->stripes[i].length = stripes_per_dev * + map->stripe_len; + + if (i / sub_stripes < remaining_stripes) + bbio->stripes[i].length += + map->stripe_len; + + /* + * Special for the first stripe and + * the last stripe: + * + * |-------|...|-------| + * |----------| + * off end_off + */ + if (i < sub_stripes) + bbio->stripes[i].length -= + stripe_offset; + + if (stripe_index >= last_stripe && + stripe_index <= (last_stripe + + sub_stripes - 1)) + bbio->stripes[i].length -= + stripe_end_offset; + + if (i == sub_stripes - 1) + stripe_offset = 0; + } else { + bbio->stripes[i].length = length; + } + + stripe_index++; + if (stripe_index == map->num_stripes) { + stripe_index = 0; + stripe_nr++; + } + } + + *bbio_ret = bbio; + bbio->map_type = map->type; + bbio->num_stripes = num_stripes; +out: + free_extent_map(em); + return ret; +} + +/* + * In dev-replace case, for repair case (that's the only case where the mirror + * is selected explicitly when calling btrfs_map_block), blocks left of the + * left cursor can also be read from the target drive. + * + * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the + * array of stripes. + * For READ, it also needs to be supported using the same mirror number. + * + * If the requested block is not left of the left cursor, EIO is returned. This + * can happen because btrfs_num_copies() returns one more in the dev-replace + * case. + */ +static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, + u64 logical, u64 length, + u64 srcdev_devid, int *mirror_num, + u64 *physical) +{ + struct btrfs_bio *bbio = NULL; + int num_stripes; + int index_srcdev = 0; + int found = 0; + u64 physical_of_found = 0; + int i; + int ret = 0; + + ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, + logical, &length, &bbio, 0, 0); + if (ret) { + ASSERT(bbio == NULL); + return ret; + } + + num_stripes = bbio->num_stripes; + if (*mirror_num > num_stripes) { + /* + * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror, + * that means that the requested area is not left of the left + * cursor + */ + btrfs_put_bbio(bbio); + return -EIO; + } + + /* + * process the rest of the function using the mirror_num of the source + * drive. Therefore look it up first. At the end, patch the device + * pointer to the one of the target drive. + */ + for (i = 0; i < num_stripes; i++) { + if (bbio->stripes[i].dev->devid != srcdev_devid) + continue; + + /* + * In case of DUP, in order to keep it simple, only add the + * mirror with the lowest physical address + */ + if (found && + physical_of_found <= bbio->stripes[i].physical) + continue; + + index_srcdev = i; + found = 1; + physical_of_found = bbio->stripes[i].physical; + } + + btrfs_put_bbio(bbio); + + ASSERT(found); + if (!found) + return -EIO; + + *mirror_num = index_srcdev + 1; + *physical = physical_of_found; + return ret; +} + +static void handle_ops_on_dev_replace(enum btrfs_map_op op, + struct btrfs_bio **bbio_ret, + struct btrfs_dev_replace *dev_replace, + int *num_stripes_ret, int *max_errors_ret) +{ + struct btrfs_bio *bbio = *bbio_ret; + u64 srcdev_devid = dev_replace->srcdev->devid; + int tgtdev_indexes = 0; + int num_stripes = *num_stripes_ret; + int max_errors = *max_errors_ret; + int i; + + if (op == BTRFS_MAP_WRITE) { + int index_where_to_add; + + /* + * duplicate the write operations while the dev replace + * procedure is running. Since the copying of the old disk to + * the new disk takes place at run time while the filesystem is + * mounted writable, the regular write operations to the old + * disk have to be duplicated to go to the new disk as well. + * + * Note that device->missing is handled by the caller, and that + * the write to the old disk is already set up in the stripes + * array. + */ + index_where_to_add = num_stripes; + for (i = 0; i < num_stripes; i++) { + if (bbio->stripes[i].dev->devid == srcdev_devid) { + /* write to new disk, too */ + struct btrfs_bio_stripe *new = + bbio->stripes + index_where_to_add; + struct btrfs_bio_stripe *old = + bbio->stripes + i; + + new->physical = old->physical; + new->length = old->length; + new->dev = dev_replace->tgtdev; + bbio->tgtdev_map[i] = index_where_to_add; + index_where_to_add++; + max_errors++; + tgtdev_indexes++; + } + } + num_stripes = index_where_to_add; + } else if (op == BTRFS_MAP_GET_READ_MIRRORS) { + int index_srcdev = 0; + int found = 0; + u64 physical_of_found = 0; + + /* + * During the dev-replace procedure, the target drive can also + * be used to read data in case it is needed to repair a corrupt + * block elsewhere. This is possible if the requested area is + * left of the left cursor. In this area, the target drive is a + * full copy of the source drive. + */ + for (i = 0; i < num_stripes; i++) { + if (bbio->stripes[i].dev->devid == srcdev_devid) { + /* + * In case of DUP, in order to keep it simple, + * only add the mirror with the lowest physical + * address + */ + if (found && + physical_of_found <= + bbio->stripes[i].physical) + continue; + index_srcdev = i; + found = 1; + physical_of_found = bbio->stripes[i].physical; + } + } + if (found) { + struct btrfs_bio_stripe *tgtdev_stripe = + bbio->stripes + num_stripes; + + tgtdev_stripe->physical = physical_of_found; + tgtdev_stripe->length = + bbio->stripes[index_srcdev].length; + tgtdev_stripe->dev = dev_replace->tgtdev; + bbio->tgtdev_map[index_srcdev] = num_stripes; + + tgtdev_indexes++; + num_stripes++; + } + } + + *num_stripes_ret = num_stripes; + *max_errors_ret = max_errors; + bbio->num_tgtdevs = tgtdev_indexes; + *bbio_ret = bbio; +} + +static bool need_full_stripe(enum btrfs_map_op op) +{ + return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS); +} + static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, @@ -5322,14 +5639,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, { struct extent_map *em; struct map_lookup *map; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; - struct extent_map_tree *em_tree = &map_tree->map_tree; u64 offset; u64 stripe_offset; - u64 stripe_end_offset; u64 stripe_nr; - u64 stripe_nr_orig; - u64 stripe_nr_end; u64 stripe_len; u32 stripe_index; int i; @@ -5345,23 +5657,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 physical_to_patch_in_first_stripe = 0; u64 raid56_full_stripe_start = (u64)-1; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, logical, *length); - read_unlock(&em_tree->lock); - - if (!em) { - btrfs_crit(fs_info, "unable to find logical %llu len %llu", - logical, *length); - return -EINVAL; - } + if (op == BTRFS_MAP_DISCARD) + return __btrfs_map_block_for_discard(fs_info, logical, + *length, bbio_ret); - if (em->start > logical || em->start + em->len < logical) { - btrfs_crit(fs_info, - "found a bad mapping, wanted %Lu, found %Lu-%Lu", - logical, em->start, em->start + em->len); - free_extent_map(em); - return -EINVAL; - } + em = get_chunk_map(fs_info, logical, *length); + if (IS_ERR(em)) + return PTR_ERR(em); map = em->map_lookup; offset = logical - em->start; @@ -5400,14 +5702,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, raid56_full_stripe_start *= full_stripe_len; } - if (op == BTRFS_MAP_DISCARD) { - /* we don't discard raid56 yet */ - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - ret = -EOPNOTSUPP; - goto out; - } - *length = min_t(u64, em->len - offset, *length); - } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { u64 max_len; /* For writes to RAID[56], allow a full stripeset across all disks. For other RAID types and for RAID[56] reads, just allow a single @@ -5438,105 +5733,28 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, btrfs_dev_replace_set_lock_blocking(dev_replace); if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && - op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && - op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { - /* - * in dev-replace case, for repair case (that's the only - * case where the mirror is selected explicitly when - * calling btrfs_map_block), blocks left of the left cursor - * can also be read from the target drive. - * For REQ_GET_READ_MIRRORS, the target drive is added as - * the last one to the array of stripes. For READ, it also - * needs to be supported using the same mirror number. - * If the requested block is not left of the left cursor, - * EIO is returned. This can happen because btrfs_num_copies() - * returns one more in the dev-replace case. - */ - u64 tmp_length = *length; - struct btrfs_bio *tmp_bbio = NULL; - int tmp_num_stripes; - u64 srcdev_devid = dev_replace->srcdev->devid; - int index_srcdev = 0; - int found = 0; - u64 physical_of_found = 0; - - ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &tmp_length, &tmp_bbio, 0, 0); - if (ret) { - WARN_ON(tmp_bbio != NULL); - goto out; - } - - tmp_num_stripes = tmp_bbio->num_stripes; - if (mirror_num > tmp_num_stripes) { - /* - * BTRFS_MAP_GET_READ_MIRRORS does not contain this - * mirror, that means that the requested area - * is not left of the left cursor - */ - ret = -EIO; - btrfs_put_bbio(tmp_bbio); - goto out; - } - - /* - * process the rest of the function using the mirror_num - * of the source drive. Therefore look it up first. - * At the end, patch the device pointer to the one of the - * target drive. - */ - for (i = 0; i < tmp_num_stripes; i++) { - if (tmp_bbio->stripes[i].dev->devid != srcdev_devid) - continue; - - /* - * In case of DUP, in order to keep it simple, only add - * the mirror with the lowest physical address - */ - if (found && - physical_of_found <= tmp_bbio->stripes[i].physical) - continue; - - index_srcdev = i; - found = 1; - physical_of_found = tmp_bbio->stripes[i].physical; - } - - btrfs_put_bbio(tmp_bbio); - - if (!found) { - WARN_ON(1); - ret = -EIO; + !need_full_stripe(op) && dev_replace->tgtdev != NULL) { + ret = get_extra_mirror_from_replace(fs_info, logical, *length, + dev_replace->srcdev->devid, + &mirror_num, + &physical_to_patch_in_first_stripe); + if (ret) goto out; - } - - mirror_num = index_srcdev + 1; - patch_the_first_stripe_for_dev_replace = 1; - physical_to_patch_in_first_stripe = physical_of_found; + else + patch_the_first_stripe_for_dev_replace = 1; } else if (mirror_num > map->num_stripes) { mirror_num = 0; } num_stripes = 1; stripe_index = 0; - stripe_nr_orig = stripe_nr; - stripe_nr_end = ALIGN(offset + *length, map->stripe_len); - stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); - stripe_end_offset = stripe_nr_end * map->stripe_len - - (offset + *length); - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - if (op == BTRFS_MAP_DISCARD) - num_stripes = min_t(u64, map->num_stripes, - stripe_nr_end - stripe_nr_orig); stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && - op != BTRFS_MAP_GET_READ_MIRRORS) + if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || - op == BTRFS_MAP_GET_READ_MIRRORS) + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -5549,8 +5767,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || - op == BTRFS_MAP_GET_READ_MIRRORS) { + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) { num_stripes = map->num_stripes; } else if (mirror_num) { stripe_index = mirror_num - 1; @@ -5566,10 +5783,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) num_stripes = map->sub_stripes; - else if (op == BTRFS_MAP_DISCARD) - num_stripes = min_t(u64, map->sub_stripes * - (stripe_nr_end - stripe_nr_orig), - map->num_stripes); else if (mirror_num) stripe_index += mirror_num - 1; else { @@ -5587,7 +5800,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ - stripe_nr = div_u64(raid56_full_stripe_start, + stripe_nr = div64_u64(raid56_full_stripe_start, stripe_len * nr_data_stripes(map)); /* RAID[56] write or recovery. Return all stripes */ @@ -5612,8 +5825,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, &stripe_index); - if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && - op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1) + if ((op != BTRFS_MAP_WRITE && + op != BTRFS_MAP_GET_READ_MIRRORS) && + mirror_num <= 1) mirror_num = 1; } } else { @@ -5635,8 +5849,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, } num_alloc_stripes = num_stripes; - if (dev_replace_is_ongoing) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) { + if (op == BTRFS_MAP_WRITE) num_alloc_stripes <<= 1; if (op == BTRFS_MAP_GET_READ_MIRRORS) num_alloc_stripes++; @@ -5648,14 +5862,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, ret = -ENOMEM; goto out; } - if (dev_replace_is_ongoing) + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); /* build raid_map */ - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && - need_raid_map && - ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) || - mirror_num > 1)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && + (need_full_stripe(op) || mirror_num > 1)) { u64 tmp; unsigned rot; @@ -5679,173 +5891,27 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, RAID6_Q_STRIPE; } - if (op == BTRFS_MAP_DISCARD) { - u32 factor = 0; - u32 sub_stripes = 0; - u64 stripes_per_dev = 0; - u32 remaining_stripes = 0; - u32 last_stripe = 0; - if (map->type & - (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { - if (map->type & BTRFS_BLOCK_GROUP_RAID0) - sub_stripes = 1; - else - sub_stripes = map->sub_stripes; - - factor = map->num_stripes / sub_stripes; - stripes_per_dev = div_u64_rem(stripe_nr_end - - stripe_nr_orig, - factor, - &remaining_stripes); - div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); - last_stripe *= sub_stripes; - } - - for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = - map->stripes[stripe_index].physical + - stripe_offset + stripe_nr * map->stripe_len; - bbio->stripes[i].dev = map->stripes[stripe_index].dev; - - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID10)) { - bbio->stripes[i].length = stripes_per_dev * - map->stripe_len; - - if (i / sub_stripes < remaining_stripes) - bbio->stripes[i].length += - map->stripe_len; - - /* - * Special for the first stripe and - * the last stripe: - * - * |-------|...|-------| - * |----------| - * off end_off - */ - if (i < sub_stripes) - bbio->stripes[i].length -= - stripe_offset; - - if (stripe_index >= last_stripe && - stripe_index <= (last_stripe + - sub_stripes - 1)) - bbio->stripes[i].length -= - stripe_end_offset; - - if (i == sub_stripes - 1) - stripe_offset = 0; - } else - bbio->stripes[i].length = *length; - - stripe_index++; - if (stripe_index == map->num_stripes) { - /* This could only happen for RAID0/10 */ - stripe_index = 0; - stripe_nr++; - } - } - } else { - for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = - map->stripes[stripe_index].physical + - stripe_offset + - stripe_nr * map->stripe_len; - bbio->stripes[i].dev = - map->stripes[stripe_index].dev; - stripe_index++; - } + for (i = 0; i < num_stripes; i++) { + bbio->stripes[i].physical = + map->stripes[stripe_index].physical + + stripe_offset + + stripe_nr * map->stripe_len; + bbio->stripes[i].dev = + map->stripes[stripe_index].dev; + stripe_index++; } - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) + if (need_full_stripe(op)) max_errors = btrfs_chunk_max_errors(map); if (bbio->raid_map) sort_parity_stripes(bbio, num_stripes); - tgtdev_indexes = 0; - if (dev_replace_is_ongoing && - (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) && - dev_replace->tgtdev != NULL) { - int index_where_to_add; - u64 srcdev_devid = dev_replace->srcdev->devid; - - /* - * duplicate the write operations while the dev replace - * procedure is running. Since the copying of the old disk - * to the new disk takes place at run time while the - * filesystem is mounted writable, the regular write - * operations to the old disk have to be duplicated to go - * to the new disk as well. - * Note that device->missing is handled by the caller, and - * that the write to the old disk is already set up in the - * stripes array. - */ - index_where_to_add = num_stripes; - for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid == srcdev_devid) { - /* write to new disk, too */ - struct btrfs_bio_stripe *new = - bbio->stripes + index_where_to_add; - struct btrfs_bio_stripe *old = - bbio->stripes + i; - - new->physical = old->physical; - new->length = old->length; - new->dev = dev_replace->tgtdev; - bbio->tgtdev_map[i] = index_where_to_add; - index_where_to_add++; - max_errors++; - tgtdev_indexes++; - } - } - num_stripes = index_where_to_add; - } else if (dev_replace_is_ongoing && - op == BTRFS_MAP_GET_READ_MIRRORS && - dev_replace->tgtdev != NULL) { - u64 srcdev_devid = dev_replace->srcdev->devid; - int index_srcdev = 0; - int found = 0; - u64 physical_of_found = 0; - - /* - * During the dev-replace procedure, the target drive can - * also be used to read data in case it is needed to repair - * a corrupt block elsewhere. This is possible if the - * requested area is left of the left cursor. In this area, - * the target drive is a full copy of the source drive. - */ - for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid == srcdev_devid) { - /* - * In case of DUP, in order to keep it - * simple, only add the mirror with the - * lowest physical address - */ - if (found && - physical_of_found <= - bbio->stripes[i].physical) - continue; - index_srcdev = i; - found = 1; - physical_of_found = bbio->stripes[i].physical; - } - } - if (found) { - struct btrfs_bio_stripe *tgtdev_stripe = - bbio->stripes + num_stripes; - - tgtdev_stripe->physical = physical_of_found; - tgtdev_stripe->length = - bbio->stripes[index_srcdev].length; - tgtdev_stripe->dev = dev_replace->tgtdev; - bbio->tgtdev_map[index_srcdev] = num_stripes; - - tgtdev_indexes++; - num_stripes++; - } + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && + need_full_stripe(op)) { + handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes, + &max_errors); } *bbio_ret = bbio; @@ -5853,7 +5919,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, bbio->num_stripes = num_stripes; bbio->max_errors = max_errors; bbio->mirror_num = mirror_num; - bbio->num_tgtdevs = tgtdev_indexes; /* * this is the case that REQ_READ && dev_replace_is_ongoing && @@ -5886,19 +5951,15 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, /* For Scrub/replace */ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, int mirror_num, - int need_raid_map) + struct btrfs_bio **bbio_ret) { - return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, - mirror_num, need_raid_map); + return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1); } int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len) { - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; - struct extent_map_tree *em_tree = &map_tree->map_tree; struct extent_map *em; struct map_lookup *map; u64 *buf; @@ -5908,24 +5969,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 rmap_len; int i, j, nr = 0; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, chunk_start, 1); - read_unlock(&em_tree->lock); - - if (!em) { - btrfs_err(fs_info, "couldn't find em for chunk %Lu", - chunk_start); + em = get_chunk_map(fs_info, chunk_start, 1); + if (IS_ERR(em)) return -EIO; - } - if (em->start != chunk_start) { - btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu", - em->start, chunk_start); - free_extent_map(em); - return -EIO; - } map = em->map_lookup; - length = em->len; rmap_len = map->stripe_len; @@ -5949,7 +5997,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, continue; stripe_nr = physical - map->stripes[i].physical; - stripe_nr = div_u64(stripe_nr, map->stripe_len); + stripe_nr = div64_u64(stripe_nr, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID10) { stripe_nr = stripe_nr * map->num_stripes + i; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 59be81206dd7b949683ad95c80fa561c022ad812..c7d0fbc915cabdee7cfec437e18f795ace77abbb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -123,7 +123,6 @@ struct btrfs_device { struct list_head resized_list; /* for sending down flush barriers */ - int nobarriers; struct bio *flush_bio; struct completion flush_wait; @@ -298,7 +297,7 @@ struct btrfs_bio; typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); struct btrfs_bio { - atomic_t refs; + refcount_t refs; atomic_t stripes_pending; struct btrfs_fs_info *fs_info; u64 map_type; /* get from map_lookup->type */ @@ -400,8 +399,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, struct btrfs_bio **bbio_ret, int mirror_num); int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, int mirror_num, - int need_raid_map); + struct btrfs_bio **bbio_ret); int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len); @@ -475,7 +473,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, struct btrfs_device *tgtdev); void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path); -int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len, int mirror_num); unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, struct btrfs_mapping_tree *map_tree, diff --git a/fs/buffer.c b/fs/buffer.c index 9196f2a270daac4d8d4612be27a0bd8288657335..161be58c5cb0f738754b79d87eda879aa3bb9553 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -49,7 +49,6 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags, struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -1830,7 +1829,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); nr_underway++; } bh = next; @@ -1884,7 +1883,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc); + submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); nr_underway++; } bh = next; @@ -2379,8 +2378,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) goto out; err = pagecache_write_begin(NULL, mapping, size, 0, - AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND, - &page, &fsdata); + AOP_FLAG_CONT_EXPAND, &page, &fsdata); if (err) goto out; @@ -2415,9 +2413,8 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, } len = PAGE_SIZE - zerofrom; - err = pagecache_write_begin(file, mapping, curpos, len, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); + err = pagecache_write_begin(file, mapping, curpos, len, 0, + &page, &fsdata); if (err) goto out; zero_user(page, zerofrom, len); @@ -2449,9 +2446,8 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, } len = offset - zerofrom; - err = pagecache_write_begin(file, mapping, curpos, len, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); + err = pagecache_write_begin(file, mapping, curpos, len, 0, + &page, &fsdata); if (err) goto out; zero_user(page, zerofrom, len); @@ -3095,7 +3091,7 @@ void guard_bio_eod(int op, struct bio *bio) } static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags, struct writeback_control *wbc) + struct writeback_control *wbc) { struct bio *bio; @@ -3130,7 +3126,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; - bio->bi_flags |= bio_flags; /* Take care of bh's that straddle the end of the device */ guard_bio_eod(op, bio); @@ -3145,16 +3140,9 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, return 0; } -int _submit_bh(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags) +int submit_bh(int op, int op_flags, struct buffer_head *bh) { - return submit_bh_wbc(op, op_flags, bh, bio_flags, NULL); -} -EXPORT_SYMBOL_GPL(_submit_bh); - -int submit_bh(int op, int op_flags, struct buffer_head *bh) -{ - return submit_bh_wbc(op, op_flags, bh, 0, NULL); + return submit_bh_wbc(op, op_flags, bh, NULL); } EXPORT_SYMBOL(submit_bh); diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 987044bca1c27176ab4d6fca34e5787ae3f9404c..59cb307b15fbea58eca82535b7a7b30ec6db5409 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) } if (new_mode != old_mode) { + newattrs.ia_ctime = current_time(inode); newattrs.ia_mode = new_mode; newattrs.ia_valid = ATTR_MODE; ret = __ceph_setattr(inode, &newattrs); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1a3e1b40799a086037fe529e1b0ff02e302adb43..1e71e6ca5ddfb09466bee7d8721d064a4a846176 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -578,7 +578,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) writeback_stat = atomic_long_inc_return(&fsc->writeback_count); if (writeback_stat > CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) - set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); + set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); set_page_writeback(page); err = ceph_osdc_writepages(osdc, ceph_vino(inode), @@ -670,8 +670,12 @@ static void writepages_finish(struct ceph_osd_request *req) bool remove_page; dout("writepages_finish %p rc %d\n", inode, rc); - if (rc < 0) + if (rc < 0) { mapping_set_error(mapping, rc); + ceph_set_error_write(ci); + } else { + ceph_clear_error_write(ci); + } /* * We lost the cache cap, need to truncate the page before @@ -700,12 +704,9 @@ static void writepages_finish(struct ceph_osd_request *req) if (atomic_long_dec_return(&fsc->writeback_count) < CONGESTION_OFF_THRESH( fsc->mount_options->congestion_kb)) - clear_bdi_congested(&fsc->backing_dev_info, + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); - if (rc < 0) - SetPageError(page); - ceph_put_snap_context(page_snap_context(page)); page->private = 0; ClearPagePrivate(page); @@ -979,7 +980,7 @@ static int ceph_writepages_start(struct address_space *mapping, if (atomic_long_inc_return(&fsc->writeback_count) > CONGESTION_ON_THRESH( fsc->mount_options->congestion_kb)) { - set_bdi_congested(&fsc->backing_dev_info, + set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } @@ -1892,6 +1893,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false); wr_req->r_mtime = ci->vfs_inode.i_mtime; + wr_req->r_abort_on_full = true; err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false); if (!err) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 68c78be19d5b78ad181d15b6caf5cc075b6f9f99..a3ebb632294e7b90a315508c9b75671d2ce8ff6a 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1015,6 +1015,7 @@ static int send_cap_msg(struct cap_msg_args *arg) void *p; size_t extra_len; struct timespec zerotime = {0}; + struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" " seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu" @@ -1076,8 +1077,12 @@ static int send_cap_msg(struct cap_msg_args *arg) ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE); /* inline data size */ ceph_encode_32(&p, 0); - /* osd_epoch_barrier (version 5) */ - ceph_encode_32(&p, 0); + /* + * osd_epoch_barrier (version 5) + * The epoch_barrier is protected osdc->lock, so READ_ONCE here in + * case it was recently changed + */ + ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier)); /* oldest_flush_tid (version 6) */ ceph_encode_64(&p, arg->oldest_flush_tid); @@ -1389,7 +1394,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci, first_tid = cf->tid + 1; capsnap = container_of(cf, struct ceph_cap_snap, cap_flush); - atomic_inc(&capsnap->nref); + refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); dout("__flush_snaps %p capsnap %p tid %llu %s\n", @@ -2202,7 +2207,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty)); - atomic_inc(&capsnap->nref); + refcount_inc(&capsnap->nref); spin_unlock(&ci->i_ceph_lock); ret = __send_flush_snap(inode, session, capsnap, cap->mseq, @@ -3633,13 +3638,19 @@ void ceph_handle_caps(struct ceph_mds_session *session, p += inline_len; } + if (le16_to_cpu(msg->hdr.version) >= 5) { + struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; + u32 epoch_barrier; + + ceph_decode_32_safe(&p, end, epoch_barrier, bad); + ceph_osdc_update_epoch_barrier(osdc, epoch_barrier); + } + if (le16_to_cpu(msg->hdr.version) >= 8) { u64 flush_tid; u32 caller_uid, caller_gid; - u32 osd_epoch_barrier; u32 pool_ns_len; - /* version >= 5 */ - ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad); + /* version >= 6 */ ceph_decode_64_safe(&p, end, flush_tid, bad); /* version >= 7 */ diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f2ae393e2c31a2b3dbca7a5f81eeb5b81afa5e1b..4e2d112c982f4b12852ba7b8bad58327783bc3db 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -22,20 +22,19 @@ static int mdsmap_show(struct seq_file *s, void *p) { int i; struct ceph_fs_client *fsc = s->private; + struct ceph_mdsmap *mdsmap; if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) return 0; - seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); - seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); - seq_printf(s, "session_timeout %d\n", - fsc->mdsc->mdsmap->m_session_timeout); - seq_printf(s, "session_autoclose %d\n", - fsc->mdsc->mdsmap->m_session_autoclose); - for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { - struct ceph_entity_addr *addr = - &fsc->mdsc->mdsmap->m_info[i].addr; - int state = fsc->mdsc->mdsmap->m_info[i].state; - + mdsmap = fsc->mdsc->mdsmap; + seq_printf(s, "epoch %d\n", mdsmap->m_epoch); + seq_printf(s, "root %d\n", mdsmap->m_root); + seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds); + seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout); + seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose); + for (i = 0; i < mdsmap->m_num_mds; i++) { + struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; + int state = mdsmap->m_info[i].state; seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, ceph_pr_addr(&addr->in_addr), ceph_mds_state_name(state)); @@ -251,7 +250,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->backing_dev_info.dev)); + dev_name(fsc->sb->s_bdi->dev)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3e9ad501addfe92f171a40dffb93c65209819cbe..e071d23f61481bf23fc4407d72ea75c9d1ec2430 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_client *mdsc = fsc->mdsc; int i; int err; - u32 ftype; + unsigned frag = -1; struct ceph_mds_reply_info_parsed *rinfo; dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); @@ -341,7 +341,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* do we have the correct frag content buffered? */ if (need_send_readdir(fi, ctx->pos)) { struct ceph_mds_request *req; - unsigned frag; int op = ceph_snap(inode) == CEPH_SNAPDIR ? CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; @@ -352,8 +351,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) } if (is_hash_order(ctx->pos)) { - frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), - NULL, NULL); + /* fragtree isn't always accurate. choose frag + * based on previous reply when possible. */ + if (frag == (unsigned)-1) + frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), + NULL, NULL); } else { frag = fpos_frag(ctx->pos); } @@ -378,7 +380,11 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ceph_mdsc_put_request(req); return -ENOMEM; } + } else if (is_hash_order(ctx->pos)) { + req->r_args.readdir.offset_hash = + cpu_to_le32(fpos_hash(ctx->pos)); } + req->r_dir_release_cnt = fi->dir_release_count; req->r_dir_ordered_cnt = fi->dir_ordered_count; req->r_readdir_cache_idx = fi->readdir_cache_idx; @@ -476,6 +482,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; struct ceph_vino vino; ino_t ino; + u32 ftype; BUG_ON(rde->offset < ctx->pos); @@ -498,15 +505,17 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ctx->pos++; } + ceph_mdsc_put_request(fi->last_readdir); + fi->last_readdir = NULL; + if (fi->next_offset > 2) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + frag = fi->frag; goto more; } /* more frags? */ if (!ceph_frag_is_rightmost(fi->frag)) { - unsigned frag = ceph_frag_next(fi->frag); + frag = ceph_frag_next(fi->frag); if (is_hash_order(ctx->pos)) { loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), fi->next_offset, true); diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e8f11fa565c53ac58fddf402f6ade6320d47d490..7df550c13d7f3e25c2f0f4d6259bd92255a44f58 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) ceph_mdsc_put_request(req); if (!inode) return ERR_PTR(-ESTALE); + if (inode->i_nlink == 0) { + iput(inode); + return ERR_PTR(-ESTALE); + } } return d_obtain_alias(inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 26cc95421cca6e62ef10bfd32b18cf1e526b7c51..29308a80d66ffa118b9aec996874d65a952c29aa 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -13,6 +13,38 @@ #include "mds_client.h" #include "cache.h" +static __le32 ceph_flags_sys2wire(u32 flags) +{ + u32 wire_flags = 0; + + switch (flags & O_ACCMODE) { + case O_RDONLY: + wire_flags |= CEPH_O_RDONLY; + break; + case O_WRONLY: + wire_flags |= CEPH_O_WRONLY; + break; + case O_RDWR: + wire_flags |= CEPH_O_RDWR; + break; + } + +#define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; } + + ceph_sys2wire(O_CREAT); + ceph_sys2wire(O_EXCL); + ceph_sys2wire(O_TRUNC); + ceph_sys2wire(O_DIRECTORY); + ceph_sys2wire(O_NOFOLLOW); + +#undef ceph_sys2wire + + if (flags) + dout("unused open flags: %x", flags); + + return cpu_to_le32(wire_flags); +} + /* * Ceph file operations * @@ -74,12 +106,9 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, align = (unsigned long)(it->iov->iov_base + it->iov_offset) & (PAGE_SIZE - 1); npages = calc_pages_for(align, nbytes); - pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); - if (!pages) { - pages = vmalloc(sizeof(*pages) * npages); - if (!pages) - return ERR_PTR(-ENOMEM); - } + pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL); + if (!pages) + return ERR_PTR(-ENOMEM); for (idx = 0; idx < npages; ) { size_t start; @@ -123,7 +152,7 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode) if (IS_ERR(req)) goto out; req->r_fmode = ceph_flags_to_mode(flags); - req->r_args.open.flags = cpu_to_le32(flags); + req->r_args.open.flags = ceph_flags_sys2wire(flags); req->r_args.open.mode = cpu_to_le32(create_mode); out: return req; @@ -192,7 +221,7 @@ int ceph_renew_caps(struct inode *inode) spin_lock(&ci->i_ceph_lock); wanted = __ceph_caps_file_wanted(ci); if (__ceph_is_any_real_caps(ci) && - (!(wanted & CEPH_CAP_ANY_WR) == 0 || ci->i_auth_cap)) { + (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) { int issued = __ceph_caps_issued(ci, NULL); spin_unlock(&ci->i_ceph_lock); dout("renew caps %p want %s issued %s updating mds_wanted\n", @@ -781,6 +810,7 @@ static void ceph_aio_retry_work(struct work_struct *work) req->r_callback = ceph_aio_complete_req; req->r_inode = inode; req->r_priv = aio_req; + req->r_abort_on_full = true; ret = ceph_osdc_start_request(req->r_osdc, req, false); out: @@ -1088,19 +1118,22 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, out: ceph_osdc_put_request(req); - if (ret == 0) { - pos += len; - written += len; - - if (pos > i_size_read(inode)) { - check_caps = ceph_inode_set_size(inode, pos); - if (check_caps) - ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_AUTHONLY, - NULL); - } - } else + if (ret != 0) { + ceph_set_error_write(ci); break; + } + + ceph_clear_error_write(ci); + pos += len; + written += len; + if (pos > i_size_read(inode)) { + check_caps = ceph_inode_set_size(inode, pos); + if (check_caps) + ceph_check_caps(ceph_inode(inode), + CHECK_CAPS_AUTHONLY, + NULL); + } + } if (ret != -EOLDSNAPC && written > 0) { @@ -1306,6 +1339,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } retry_snap: + /* FIXME: not complete since it doesn't account for being at quota */ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; @@ -1327,7 +1361,8 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || - (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) || + (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) { struct ceph_snap_context *snapc; struct iov_iter data; inode_unlock(inode); @@ -1636,8 +1671,12 @@ static long ceph_fallocate(struct file *file, int mode, } size = i_size_read(inode); - if (!(mode & FALLOC_FL_KEEP_SIZE)) + if (!(mode & FALLOC_FL_KEEP_SIZE)) { endoff = offset + length; + ret = inode_newsize_ok(inode, endoff); + if (ret) + goto unlock; + } if (fi->fmode & CEPH_FILE_MODE_LAZY) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d449e1c03cbd791922148ad00c3d6d0f0e1599ce..4de6cdddf05928a29051e9996e75245732fee618 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1482,10 +1482,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) return readdir_prepopulate_inodes_only(req, session); - if (rinfo->hash_order && req->r_path2) { - last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, - req->r_path2, strlen(req->r_path2)); - last_hash = ceph_frag_value(last_hash); + if (rinfo->hash_order) { + if (req->r_path2) { + last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, + req->r_path2, + strlen(req->r_path2)); + last_hash = ceph_frag_value(last_hash); + } else if (rinfo->offset_hash) { + /* mds understands offset_hash */ + WARN_ON_ONCE(req->r_readdir_offset != 2); + last_hash = le32_to_cpu(rhead->args.readdir.offset_hash); + } } if (rinfo->dir_dir && @@ -1510,7 +1517,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, } if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 && - !(rinfo->hash_order && req->r_path2)) { + !(rinfo->hash_order && last_hash)) { /* note dir version at start of readdir so we can tell * if any dentries get dropped */ req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); @@ -2015,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) attr->ia_size > inode->i_size) { i_size_write(inode, attr->ia_size); inode->i_blocks = calc_inode_blocks(attr->ia_size); - inode->i_ctime = attr->ia_ctime; ci->i_reported_size = attr->ia_size; dirtied |= CEPH_CAP_FILE_EXCL; } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || @@ -2037,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, only ? "ctime only" : "ignored"); - inode->i_ctime = attr->ia_ctime; if (only) { /* * if kernel wants to dirty ctime but nothing else, @@ -2060,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (dirtied) { inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, &prealloc_cf); - inode->i_ctime = current_time(inode); + inode->i_ctime = attr->ia_ctime; } release &= issued; @@ -2071,11 +2076,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) if (inode_dirty_flags) __mark_inode_dirty(inode, inode_dirty_flags); - if (ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, attr->ia_mode); - if (err) - goto out_put; - } if (mask) { req->r_inode = inode; @@ -2083,19 +2083,18 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; + req->r_stamp = attr->ia_ctime; err = ceph_mdsc_do_request(mdsc, NULL, req); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, ceph_cap_string(dirtied), mask); - ceph_mdsc_put_request(req); - if (mask & CEPH_SETATTR_SIZE) - __ceph_do_pending_vmtruncate(inode); - ceph_free_cap_flush(prealloc_cf); - return err; -out_put: ceph_mdsc_put_request(req); ceph_free_cap_flush(prealloc_cf); + + if (err >= 0 && (mask & CEPH_SETATTR_SIZE)) + __ceph_do_pending_vmtruncate(inode); + return err; } @@ -2114,7 +2113,12 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; - return __ceph_setattr(inode, attr); + err = __ceph_setattr(inode, attr); + + if (err >= 0 && (attr->ia_valid & ATTR_MODE)) + err = posix_acl_chmod(inode, attr->ia_mode); + + return err; } /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c681762d76e66be1edf7004b1de8c13568ec6022..0c05df44cc6c8888d56d32f53d54fba109f8508c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -189,6 +189,7 @@ static int parse_reply_info_dir(void **p, void *end, info->dir_end = !!(flags & CEPH_READDIR_FRAG_END); info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE); info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER); + info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH); } if (num == 0) goto done; @@ -378,9 +379,9 @@ const char *ceph_session_state_name(int s) static struct ceph_mds_session *get_session(struct ceph_mds_session *s) { - if (atomic_inc_not_zero(&s->s_ref)) { + if (refcount_inc_not_zero(&s->s_ref)) { dout("mdsc get_session %p %d -> %d\n", s, - atomic_read(&s->s_ref)-1, atomic_read(&s->s_ref)); + refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref)); return s; } else { dout("mdsc get_session %p 0 -- FAIL", s); @@ -391,8 +392,8 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s) void ceph_put_mds_session(struct ceph_mds_session *s) { dout("mdsc put_session %p %d -> %d\n", s, - atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); - if (atomic_dec_and_test(&s->s_ref)) { + refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1); + if (refcount_dec_and_test(&s->s_ref)) { if (s->s_auth.authorizer) ceph_auth_destroy_authorizer(s->s_auth.authorizer); kfree(s); @@ -411,7 +412,7 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, return NULL; session = mdsc->sessions[mds]; dout("lookup_mds_session %p %d\n", session, - atomic_read(&session->s_ref)); + refcount_read(&session->s_ref)); get_session(session); return session; } @@ -441,7 +442,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; - if (mds >= mdsc->mdsmap->m_max_mds) + if (mds >= mdsc->mdsmap->m_num_mds) return ERR_PTR(-EINVAL); s = kzalloc(sizeof(*s), GFP_NOFS); @@ -466,7 +467,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_caps); s->s_nr_caps = 0; s->s_trim_caps = 0; - atomic_set(&s->s_ref, 1); + refcount_set(&s->s_ref, 1); INIT_LIST_HEAD(&s->s_waiting); INIT_LIST_HEAD(&s->s_unsafe); s->s_num_cap_releases = 0; @@ -494,7 +495,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, } mdsc->sessions[mds] = s; atomic_inc(&mdsc->num_sessions); - atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ + refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); @@ -1004,7 +1005,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_session *ts; int i, mds = session->s_mds; - if (mds >= mdsc->mdsmap->m_max_mds) + if (mds >= mdsc->mdsmap->m_num_mds) return; mi = &mdsc->mdsmap->m_info[mds]; @@ -1551,9 +1552,15 @@ void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_msg *msg = NULL; struct ceph_mds_cap_release *head; struct ceph_mds_cap_item *item; + struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; struct ceph_cap *cap; LIST_HEAD(tmp_list); int num_cap_releases; + __le32 barrier, *cap_barrier; + + down_read(&osdc->lock); + barrier = cpu_to_le32(osdc->epoch_barrier); + up_read(&osdc->lock); spin_lock(&session->s_cap_lock); again: @@ -1571,7 +1578,11 @@ void ceph_send_cap_releases(struct ceph_mds_client *mdsc, head = msg->front.iov_base; head->num = cpu_to_le32(0); msg->front.iov_len = sizeof(*head); + + msg->hdr.version = cpu_to_le16(2); + msg->hdr.compat_version = cpu_to_le16(1); } + cap = list_first_entry(&tmp_list, struct ceph_cap, session_caps); list_del(&cap->session_caps); @@ -1589,6 +1600,11 @@ void ceph_send_cap_releases(struct ceph_mds_client *mdsc, ceph_put_cap(mdsc, cap); if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + // Append cap_barrier field + cap_barrier = msg->front.iov_base + msg->front.iov_len; + *cap_barrier = barrier; + msg->front.iov_len += sizeof(*cap_barrier); + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); dout("send_cap_releases mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); @@ -1604,6 +1620,11 @@ void ceph_send_cap_releases(struct ceph_mds_client *mdsc, spin_unlock(&session->s_cap_lock); if (msg) { + // Append cap_barrier field + cap_barrier = msg->front.iov_base + msg->front.iov_len; + *cap_barrier = barrier; + msg->front.iov_len += sizeof(*cap_barrier); + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); dout("send_cap_releases mds%d %p\n", session->s_mds, msg); ceph_con_send(&session->s_con, msg); @@ -1684,7 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); - req->r_stamp = current_fs_time(mdsc->fsc->sb); + req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran); req->r_op = op; req->r_direct_mode = mode; @@ -1991,7 +2012,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, if (req->r_pagelist) { struct ceph_pagelist *pagelist = req->r_pagelist; - atomic_inc(&pagelist->refcnt); + refcount_inc(&pagelist->refcnt); ceph_msg_data_add_pagelist(msg, pagelist); msg->hdr.data_len = cpu_to_le32(pagelist->length); } else { @@ -2638,8 +2659,10 @@ static void handle_session(struct ceph_mds_session *session, seq = le64_to_cpu(h->seq); mutex_lock(&mdsc->mutex); - if (op == CEPH_SESSION_CLOSE) + if (op == CEPH_SESSION_CLOSE) { + get_session(session); __unregister_session(mdsc, session); + } /* FIXME: this ttl calculation is generous */ session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose; mutex_unlock(&mdsc->mutex); @@ -2728,6 +2751,8 @@ static void handle_session(struct ceph_mds_session *session, kick_requests(mdsc, mds); mutex_unlock(&mdsc->mutex); } + if (op == CEPH_SESSION_CLOSE) + ceph_put_mds_session(session); return; bad: @@ -3107,7 +3132,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, dout("check_new_map new %u old %u\n", newmap->m_epoch, oldmap->m_epoch); - for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) { + for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { if (mdsc->sessions[i] == NULL) continue; s = mdsc->sessions[i]; @@ -3121,15 +3146,33 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", ceph_session_state_name(s->s_state)); - if (i >= newmap->m_max_mds || + if (i >= newmap->m_num_mds || memcmp(ceph_mdsmap_get_addr(oldmap, i), ceph_mdsmap_get_addr(newmap, i), sizeof(struct ceph_entity_addr))) { if (s->s_state == CEPH_MDS_SESSION_OPENING) { /* the session never opened, just close it * out now */ + get_session(s); + __unregister_session(mdsc, s); __wake_requests(mdsc, &s->s_waiting); + ceph_put_mds_session(s); + } else if (i >= newmap->m_num_mds) { + /* force close session for stopped mds */ + get_session(s); __unregister_session(mdsc, s); + __wake_requests(mdsc, &s->s_waiting); + kick_requests(mdsc, i); + mutex_unlock(&mdsc->mutex); + + mutex_lock(&s->s_mutex); + cleanup_session_requests(mdsc, s); + remove_session_caps(s); + mutex_unlock(&s->s_mutex); + + ceph_put_mds_session(s); + + mutex_lock(&mdsc->mutex); } else { /* just close it */ mutex_unlock(&mdsc->mutex); @@ -3167,7 +3210,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, } } - for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) { + for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) { s = mdsc->sessions[i]; if (!s) continue; @@ -3881,7 +3924,7 @@ static struct ceph_connection *con_get(struct ceph_connection *con) struct ceph_mds_session *s = con->private; if (get_session(s)) { - dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref)); + dout("mdsc con_get %p ok (%d)\n", s, refcount_read(&s->s_ref)); return con; } dout("mdsc con_get %p FAIL\n", s); @@ -3892,7 +3935,7 @@ static void con_put(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; - dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1); + dout("mdsc con_put %p (%d)\n", s, refcount_read(&s->s_ref) - 1); ceph_put_mds_session(s); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ac0475a2daa749d3d689956cc45a2913c955ca8f..db57ae98ed345e280358a2ac65e75ac6c71e7c2b 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -82,9 +83,10 @@ struct ceph_mds_reply_info_parsed { struct ceph_mds_reply_dirfrag *dir_dir; size_t dir_buf_size; int dir_nr; - bool dir_complete; bool dir_end; + bool dir_complete; bool hash_order; + bool offset_hash; struct ceph_mds_reply_dir_entry *dir_entries; }; @@ -104,10 +106,13 @@ struct ceph_mds_reply_info_parsed { /* * cap releases are batched and sent to the MDS en masse. + * + * Account for per-message overhead of mds_cap_release header + * and __le32 for osd epoch barrier trailing field. */ -#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE - \ +#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE - sizeof(u32) - \ sizeof(struct ceph_mds_cap_release)) / \ - sizeof(struct ceph_mds_cap_item)) + sizeof(struct ceph_mds_cap_item)) /* @@ -156,7 +161,7 @@ struct ceph_mds_session { unsigned long s_renew_requested; /* last time we sent a renew req */ u64 s_renew_seq; - atomic_t s_ref; + refcount_t s_ref; struct list_head s_waiting; /* waiting requests */ struct list_head s_unsafe; /* unsafe requests */ }; @@ -373,7 +378,7 @@ __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); static inline struct ceph_mds_session * ceph_get_mds_session(struct ceph_mds_session *s) { - atomic_inc(&s->s_ref); + refcount_inc(&s->s_ref); return s; } diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 5454e2327a5f77874d63344802709697ceb64de1..1a748cf88535bc80b8060be0cae2d4a92f85599f 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -22,11 +22,11 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) int i; /* special case for one mds */ - if (1 == m->m_max_mds && m->m_info[0].state > 0) + if (1 == m->m_num_mds && m->m_info[0].state > 0) return 0; /* count */ - for (i = 0; i < m->m_max_mds; i++) + for (i = 0; i < m->m_num_mds; i++) if (m->m_info[i].state > 0) n++; if (n == 0) @@ -135,8 +135,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_session_autoclose = ceph_decode_32(p); m->m_max_file_size = ceph_decode_64(p); m->m_max_mds = ceph_decode_32(p); + m->m_num_mds = m->m_max_mds; - m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); + m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); if (m->m_info == NULL) goto nomem; @@ -207,9 +208,20 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_pr_addr(&addr.in_addr), ceph_mds_state_name(state)); - if (mds < 0 || mds >= m->m_max_mds || state <= 0) + if (mds < 0 || state <= 0) continue; + if (mds >= m->m_num_mds) { + int new_num = max(mds + 1, m->m_num_mds * 2); + void *new_m_info = krealloc(m->m_info, + new_num * sizeof(*m->m_info), + GFP_NOFS | __GFP_ZERO); + if (!new_m_info) + goto nomem; + m->m_info = new_m_info; + m->m_num_mds = new_num; + } + info = &m->m_info[mds]; info->global_id = global_id; info->state = state; @@ -229,6 +241,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) info->export_targets = NULL; } } + if (m->m_num_mds > m->m_max_mds) { + /* find max up mds */ + for (i = m->m_num_mds; i >= m->m_max_mds; i--) { + if (i == 0 || m->m_info[i-1].state > 0) + break; + } + m->m_num_mds = i; + } /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); @@ -270,12 +290,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) for (i = 0; i < n; i++) { s32 mds = ceph_decode_32(p); - if (mds >= 0 && mds < m->m_max_mds) { + if (mds >= 0 && mds < m->m_num_mds) { if (m->m_info[mds].laggy) num_laggy++; } } m->m_num_laggy = num_laggy; + + if (n > m->m_num_mds) { + void *new_m_info = krealloc(m->m_info, + n * sizeof(*m->m_info), + GFP_NOFS | __GFP_ZERO); + if (!new_m_info) + goto nomem; + m->m_info = new_m_info; + } + m->m_num_mds = n; } /* inc */ @@ -341,7 +371,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) { int i; - for (i = 0; i < m->m_max_mds; i++) + for (i = 0; i < m->m_num_mds; i++) kfree(m->m_info[i].export_targets); kfree(m->m_info); kfree(m->m_data_pg_pools); @@ -357,7 +387,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) return false; if (m->m_num_laggy > 0) return false; - for (i = 0; i < m->m_max_mds; i++) { + for (i = 0; i < m->m_num_mds; i++) { if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) nr_active++; } diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 8f8b41c2ef0f7d472afad0e1abcb4801ac11b221..dab5d6732345b218e12f9bf7d6fea1c5dac38d55 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -519,7 +519,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) capsnap->need_flush ? "" : "no_flush"); ihold(inode); - atomic_set(&capsnap->nref, 1); + refcount_set(&capsnap->nref, 1); INIT_LIST_HEAD(&capsnap->ci_item); capsnap->follows = old_snapc->seq; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 0ec8d0114e57ba80fdc46b1acdc9b7de7373e276..8d7918ce694a9c0d980641ab66b7be6d0aaa33a5 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -544,10 +544,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_options *opt) { struct ceph_fs_client *fsc; - const u64 supported_features = - CEPH_FEATURE_FLOCK | CEPH_FEATURE_DIRLAYOUTHASH | - CEPH_FEATURE_MDSENC | CEPH_FEATURE_MDS_INLINE_DATA; - const u64 required_features = 0; int page_count; size_t size; int err = -ENOMEM; @@ -556,8 +552,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, if (!fsc) return ERR_PTR(-ENOMEM); - fsc->client = ceph_create_client(opt, fsc, supported_features, - required_features); + fsc->client = ceph_create_client(opt, fsc); if (IS_ERR(fsc->client)) { err = PTR_ERR(fsc->client); goto fail; @@ -579,10 +574,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, atomic_long_set(&fsc->writeback_count, 0); - err = bdi_init(&fsc->backing_dev_info); - if (err < 0) - goto fail_client; - err = -ENOMEM; /* * The number of concurrent works can be high but they don't need @@ -590,7 +581,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, */ fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); if (fsc->wb_wq == NULL) - goto fail_bdi; + goto fail_client; fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); if (fsc->pg_inv_wq == NULL) goto fail_wb_wq; @@ -624,8 +615,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, destroy_workqueue(fsc->pg_inv_wq); fail_wb_wq: destroy_workqueue(fsc->wb_wq); -fail_bdi: - bdi_destroy(&fsc->backing_dev_info); fail_client: ceph_destroy_client(fsc->client); fail: @@ -643,8 +632,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) destroy_workqueue(fsc->pg_inv_wq); destroy_workqueue(fsc->trunc_wq); - bdi_destroy(&fsc->backing_dev_info); - mempool_destroy(fsc->wb_pagevec_pool); destroy_mount_options(fsc->mount_options); @@ -937,33 +924,32 @@ static int ceph_compare_super(struct super_block *sb, void *data) */ static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); -static int ceph_register_bdi(struct super_block *sb, - struct ceph_fs_client *fsc) +static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) { int err; + err = super_setup_bdi_name(sb, "ceph-%ld", + atomic_long_inc_return(&bdi_seq)); + if (err) + return err; + /* set ra_pages based on rasize mount option? */ if (fsc->mount_options->rasize >= PAGE_SIZE) - fsc->backing_dev_info.ra_pages = + sb->s_bdi->ra_pages = (fsc->mount_options->rasize + PAGE_SIZE - 1) >> PAGE_SHIFT; else - fsc->backing_dev_info.ra_pages = - VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; if (fsc->mount_options->rsize > fsc->mount_options->rasize && fsc->mount_options->rsize >= PAGE_SIZE) - fsc->backing_dev_info.io_pages = + sb->s_bdi->io_pages = (fsc->mount_options->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT; else if (fsc->mount_options->rsize == 0) - fsc->backing_dev_info.io_pages = ULONG_MAX; + sb->s_bdi->io_pages = ULONG_MAX; - err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld", - atomic_long_inc_return(&bdi_seq)); - if (!err) - sb->s_bdi = &fsc->backing_dev_info; - return err; + return 0; } static struct dentry *ceph_mount(struct file_system_type *fs_type, @@ -1018,7 +1004,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, dout("get_sb got existing client %p\n", fsc); } else { dout("get_sb using new client %p\n", fsc); - err = ceph_register_bdi(sb, fsc); + err = ceph_setup_bdi(sb, fsc); if (err < 0) { res = ERR_PTR(err); goto out_splat; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fe6b9cfc4013e63c8b6f6ea3b9b3bd1d4eb89364..a973acd8beaff67b5b0e67b1217c9bb7419c9982 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -92,8 +93,6 @@ struct ceph_fs_client { struct workqueue_struct *trunc_wq; atomic_long_t writeback_count; - struct backing_dev_info backing_dev_info; - #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_dentry_lru, *debugfs_caps; struct dentry *debugfs_congestion_kb; @@ -162,7 +161,7 @@ struct ceph_cap_flush { * data before flushing the snapped state (tracked here) back to the MDS. */ struct ceph_cap_snap { - atomic_t nref; + refcount_t nref; struct list_head ci_item; struct ceph_cap_flush cap_flush; @@ -191,7 +190,7 @@ struct ceph_cap_snap { static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) { - if (atomic_dec_and_test(&capsnap->nref)) { + if (refcount_dec_and_test(&capsnap->nref)) { if (capsnap->xattr_blob) ceph_buffer_put(capsnap->xattr_blob); kfree(capsnap); @@ -473,6 +472,32 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, #define CEPH_I_CAP_DROPPED (1 << 8) /* caps were forcibly dropped */ #define CEPH_I_KICK_FLUSH (1 << 9) /* kick flushing caps */ #define CEPH_I_FLUSH_SNAPS (1 << 10) /* need flush snapss */ +#define CEPH_I_ERROR_WRITE (1 << 11) /* have seen write errors */ + +/* + * We set the ERROR_WRITE bit when we start seeing write errors on an inode + * and then clear it when they start succeeding. Note that we do a lockless + * check first, and only take the lock if it looks like it needs to be changed. + * The write submission code just takes this as a hint, so we're not too + * worried if a few slip through in either direction. + */ +static inline void ceph_set_error_write(struct ceph_inode_info *ci) +{ + if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) { + spin_lock(&ci->i_ceph_lock); + ci->i_ceph_flags |= CEPH_I_ERROR_WRITE; + spin_unlock(&ci->i_ceph_lock); + } +} + +static inline void ceph_clear_error_write(struct ceph_inode_info *ci) +{ + if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) { + spin_lock(&ci->i_ceph_lock); + ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE; + spin_unlock(&ci->i_ceph_lock); + } +} static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, long long release_count, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index febc28f9e2c27648e1621531e90cefc482ed2e5d..75267cdd5dfd822b3290221b269cb0fde64a8e83 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -392,6 +392,7 @@ static int __set_xattr(struct ceph_inode_info *ci, if (update_xattr) { int err = 0; + if (xattr && (flags & XATTR_CREATE)) err = -EEXIST; else if (!xattr && (flags & XATTR_REPLACE)) @@ -399,12 +400,14 @@ static int __set_xattr(struct ceph_inode_info *ci, if (err) { kfree(name); kfree(val); + kfree(*newxattr); return err; } if (update_xattr < 0) { if (xattr) __remove_xattr(ci, xattr); kfree(name); + kfree(*newxattr); return 0; } } diff --git a/fs/char_dev.c b/fs/char_dev.c index 44a240c4bb658149c2d2b5b527af35fbf1a95d49..fb8507f521b265ba8feba385154cc908eb812f44 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -471,6 +471,85 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count) return 0; } +/** + * cdev_set_parent() - set the parent kobject for a char device + * @p: the cdev structure + * @kobj: the kobject to take a reference to + * + * cdev_set_parent() sets a parent kobject which will be referenced + * appropriately so the parent is not freed before the cdev. This + * should be called before cdev_add. + */ +void cdev_set_parent(struct cdev *p, struct kobject *kobj) +{ + WARN_ON(!kobj->state_initialized); + p->kobj.parent = kobj; +} + +/** + * cdev_device_add() - add a char device and it's corresponding + * struct device, linkink + * @dev: the device structure + * @cdev: the cdev structure + * + * cdev_device_add() adds the char device represented by @cdev to the system, + * just as cdev_add does. It then adds @dev to the system using device_add + * The dev_t for the char device will be taken from the struct device which + * needs to be initialized first. This helper function correctly takes a + * reference to the parent device so the parent will not get released until + * all references to the cdev are released. + * + * This helper uses dev->devt for the device number. If it is not set + * it will not add the cdev and it will be equivalent to device_add. + * + * This function should be used whenever the struct cdev and the + * struct device are members of the same structure whose lifetime is + * managed by the struct device. + * + * NOTE: Callers must assume that userspace was able to open the cdev and + * can call cdev fops callbacks at any time, even if this function fails. + */ +int cdev_device_add(struct cdev *cdev, struct device *dev) +{ + int rc = 0; + + if (dev->devt) { + cdev_set_parent(cdev, &dev->kobj); + + rc = cdev_add(cdev, dev->devt, 1); + if (rc) + return rc; + } + + rc = device_add(dev); + if (rc) + cdev_del(cdev); + + return rc; +} + +/** + * cdev_device_del() - inverse of cdev_device_add + * @dev: the device structure + * @cdev: the cdev structure + * + * cdev_device_del() is a helper function to call cdev_del and device_del. + * It should be used whenever cdev_device_add is used. + * + * If dev->devt is not set it will not remove the cdev and will be equivalent + * to device_del. + * + * NOTE: This guarantees that associated sysfs callbacks are not running + * or runnable, however any cdevs already open will remain and their fops + * will still be callable even after this function returns. + */ +void cdev_device_del(struct cdev *cdev, struct device *dev) +{ + device_del(dev); + if (dev->devt) + cdev_del(cdev); +} + static void cdev_unmap(dev_t dev, unsigned count) { kobj_unmap(cdev_map, dev, count); @@ -482,6 +561,10 @@ static void cdev_unmap(dev_t dev, unsigned count) * * cdev_del() removes @p from the system, possibly freeing the structure * itself. + * + * NOTE: This guarantees that cdev device will no longer be able to be + * opened, however any cdevs already open will remain and their fops will + * still be callable even after cdev_del returns. */ void cdev_del(struct cdev *p) { @@ -570,5 +653,8 @@ EXPORT_SYMBOL(cdev_init); EXPORT_SYMBOL(cdev_alloc); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); +EXPORT_SYMBOL(cdev_set_parent); +EXPORT_SYMBOL(cdev_device_add); +EXPORT_SYMBOL(cdev_device_del); EXPORT_SYMBOL(__register_chrdev); EXPORT_SYMBOL(__unregister_chrdev); diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 07ed81cf1552e6ae97be753de0720741e3f80068..cbd216b572390ca76e481aabf9a311e4b7749d7c 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -68,7 +68,6 @@ struct cifs_sb_info { umode_t mnt_dir_mode; unsigned int mnt_cifs_flags; char *mountdata; /* options received at mount time or via DFS refs */ - struct backing_dev_info bdi; struct delayed_work prune_tlinks; struct rcu_head rcu; char *prepath; diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 02b071bf3732ac29808183974636e2ac9ca56ce6..a0b3e7d1be484fd3f026258577e18c165f738f0e 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_COLON: *target = ':'; break; + case SFM_DOUBLEQUOTE: + *target = '"'; + break; case SFM_ASTERISK: *target = '*'; break; @@ -418,6 +421,9 @@ static __le16 convert_to_sfm_char(char src_char, bool end_of_string) case ':': dest_char = cpu_to_le16(SFM_COLON); break; + case '"': + dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); + break; case '*': dest_char = cpu_to_le16(SFM_ASTERISK); break; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 3d7298cc0aeb357899dfcd9216d5b76f17cc64d9..8a79a34e66b8bee8cf9575eda76c77faae30862e 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -57,6 +57,7 @@ * not conflict (although almost does) with the mapping above. */ +#define SFM_DOUBLEQUOTE ((__u16) 0xF020) #define SFM_ASTERISK ((__u16) 0xF021) #define SFM_QUESTION ((__u16) 0xF025) #define SFM_COLON ((__u16) 0xF022) @@ -64,8 +65,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) -#define SFM_PERIOD ((__u16) 0xF028) -#define SFM_SPACE ((__u16) 0xF029) +#define SFM_SPACE ((__u16) 0xF028) +#define SFM_PERIOD ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 15bac390dff945d7fa5d785f666b2b0291fb9f65..b98436f5c7c74f9e773db0485bb9e80fcb43be99 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1135,20 +1135,19 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, u32 acllen = 0; int rc = 0; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); - struct cifs_tcon *tcon; + struct smb_version_operations *ops; cifs_dbg(NOISY, "converting ACL to mode for %s\n", path); if (IS_ERR(tlink)) return PTR_ERR(tlink); - tcon = tlink_tcon(tlink); - if (pfid && (tcon->ses->server->ops->get_acl_by_fid)) - pntsd = tcon->ses->server->ops->get_acl_by_fid(cifs_sb, pfid, - &acllen); - else if (tcon->ses->server->ops->get_acl) - pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, - &acllen); + ops = tlink_tcon(tlink)->ses->server->ops; + + if (pfid && (ops->get_acl_by_fid)) + pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen); + else if (ops->get_acl) + pntsd = ops->get_acl(cifs_sb, inode, path, &acllen); else { cifs_put_tlink(tlink); return -EOPNOTSUPP; @@ -1181,23 +1180,23 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); - struct cifs_tcon *tcon; + struct smb_version_operations *ops; if (IS_ERR(tlink)) return PTR_ERR(tlink); - tcon = tlink_tcon(tlink); + + ops = tlink_tcon(tlink)->ses->server->ops; cifs_dbg(NOISY, "set ACL from mode for %s\n", path); /* Get the security descriptor */ - if (tcon->ses->server->ops->get_acl == NULL) { + if (ops->get_acl == NULL) { cifs_put_tlink(tlink); return -EOPNOTSUPP; } - pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, - &secdesclen); + pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); @@ -1224,13 +1223,12 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); - if (tcon->ses->server->ops->set_acl == NULL) + if (ops->set_acl == NULL) rc = -EOPNOTSUPP; if (!rc) { /* Set the security descriptor */ - rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode, - path, aclflag); + rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } cifs_put_tlink(tlink); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 058ac9b36f0470a4d04478377cdc12a041df0db4..68abbb0db60898cc417ecfa03a8c11e5fe136c93 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -478,6 +478,7 @@ find_timestamp(struct cifs_ses *ses) unsigned char *blobptr; unsigned char *blobend; struct ntlmssp2_name *attrptr; + struct timespec ts; if (!ses->auth_key.len || !ses->auth_key.response) return 0; @@ -502,7 +503,8 @@ find_timestamp(struct cifs_ses *ses) blobptr += attrsize; /* advance attr value */ } - return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + ktime_get_real_ts(&ts); + return cpu_to_le64(cifs_UnixTimeToNT(ts)); } static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index dd3f5fabfdf6a35e25f9dae28020dcc9bba65007..9a1667e0e8d604ededf2a7ceb27ffcebf24b0ab9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include "cifsfs.h" @@ -87,6 +88,7 @@ extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +struct workqueue_struct *cifsoplockd_wq; __u32 cifs_lock_secret; /* @@ -138,7 +140,12 @@ cifs_read_super(struct super_block *sb) sb->s_magic = CIFS_MAGIC_NUMBER; sb->s_op = &cifs_super_ops; sb->s_xattr = cifs_xattr_handlers; - sb->s_bdi = &cifs_sb->bdi; + rc = super_setup_bdi(sb); + if (rc) + goto out_no_root; + /* tune readahead according to rsize */ + sb->s_bdi->ra_pages = cifs_sb->rsize / PAGE_SIZE; + sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ inode = cifs_root_iget(sb); @@ -1369,9 +1376,16 @@ init_cifs(void) goto out_clean_proc; } + cifsoplockd_wq = alloc_workqueue("cifsoplockd", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!cifsoplockd_wq) { + rc = -ENOMEM; + goto out_destroy_cifsiod_wq; + } + rc = cifs_fscache_register(); if (rc) - goto out_destroy_wq; + goto out_destroy_cifsoplockd_wq; rc = cifs_init_inodecache(); if (rc) @@ -1419,7 +1433,9 @@ init_cifs(void) cifs_destroy_inodecache(); out_unreg_fscache: cifs_fscache_unregister(); -out_destroy_wq: +out_destroy_cifsoplockd_wq: + destroy_workqueue(cifsoplockd_wq); +out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); out_clean_proc: cifs_proc_clean(); @@ -1442,6 +1458,7 @@ exit_cifs(void) cifs_destroy_mids(); cifs_destroy_inodecache(); cifs_fscache_unregister(); + destroy_workqueue(cifsoplockd_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 37f5a41cc50cc523cd76c790100398d4db9e80ca..bcc7d9acad64b3a59b9c966956accb2c51d9ac13 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -418,7 +418,7 @@ struct smb_version_operations { int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, const unsigned char *, const unsigned char *, char *, - size_t, const struct nls_table *, int); + size_t, struct cifs_sb_info *); int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, const char *, const void *, const __u16, const struct nls_table *, int); @@ -1115,6 +1115,23 @@ struct cifs_io_parms { struct cifs_tcon *tcon; }; +struct cifs_aio_ctx { + struct kref refcount; + struct list_head list; + struct mutex aio_mutex; + struct completion done; + struct iov_iter iter; + struct kiocb *iocb; + struct cifsFileInfo *cfile; + struct bio_vec *bv; + loff_t pos; + unsigned int npages; + ssize_t rc; + unsigned int len; + unsigned int total_len; + bool should_dirty; +}; + struct cifs_readdata; /* asynchronous read support */ @@ -1124,6 +1141,7 @@ struct cifs_readdata { struct completion done; struct cifsFileInfo *cfile; struct address_space *mapping; + struct cifs_aio_ctx *ctx; __u64 offset; unsigned int bytes; unsigned int got_bytes; @@ -1154,6 +1172,7 @@ struct cifs_writedata { enum writeback_sync_modes sync_mode; struct work_struct work; struct cifsFileInfo *cfile; + struct cifs_aio_ctx *ctx; __u64 offset; pid_t pid; unsigned int bytes; @@ -1683,6 +1702,7 @@ void cifs_oplock_break(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern struct workqueue_struct *cifsoplockd_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_mid_poolp; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 97e5d236d26559806ca8bc278f7c248e0579ef77..6eb3147132e30c2225b610b01281111bb896c617 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -480,8 +480,7 @@ extern int CIFSSMBCopy(unsigned int xid, extern ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, const unsigned char *ea_name, char *EAData, - size_t bufsize, const struct nls_table *nls_codepage, - int remap_special_chars); + size_t bufsize, struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const char *ea_name, const void *ea_value, const __u16 ea_value_len, @@ -535,4 +534,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); +struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); +void cifs_aio_ctx_release(struct kref *refcount); +int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw); #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5d21f00ae341b4ec002eb33dd79c7a16a1e1dfa4..fbb0d4cbda413e5349ab97c8fdfe65435efdc6ed 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -478,14 +478,14 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr) * this requirement. */ int val, seconds, remain, result; - struct timespec ts, utc; - utc = CURRENT_TIME; + struct timespec ts; + unsigned long utc = ktime_get_real_seconds(); ts = cnvrtDosUnixTm(rsp->SrvTime.Date, rsp->SrvTime.Time, 0); cifs_dbg(FYI, "SrvTime %d sec since 1970 (utc: %d) diff: %d\n", - (int)ts.tv_sec, (int)utc.tv_sec, - (int)(utc.tv_sec - ts.tv_sec)); - val = (int)(utc.tv_sec - ts.tv_sec); + (int)ts.tv_sec, (int)utc, + (int)(utc - ts.tv_sec)); + val = (int)(utc - ts.tv_sec); seconds = abs(val); result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; remain = seconds % MIN_TZ_ADJ; @@ -697,9 +697,7 @@ cifs_echo_callback(struct mid_q_entry *mid) { struct TCP_Server_Info *server = mid->callback_data; - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, 1, CIFS_ECHO_OP); } @@ -718,6 +716,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server) if (rc) return rc; + if (server->capabilities & CAP_UNICODE) + smb->hdr.Flags2 |= SMBFLG2_UNICODE; + /* set up echo request */ smb->hdr.Tid = 0xffff; smb->hdr.WordCount = 1; @@ -1596,9 +1597,7 @@ cifs_readv_callback(struct mid_q_entry *mid) } queue_work(cifsiod_wq, &rdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, 1, 0); } @@ -2055,7 +2054,6 @@ cifs_writev_callback(struct mid_q_entry *mid) { struct cifs_writedata *wdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; unsigned int written; WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf; @@ -2092,9 +2090,7 @@ cifs_writev_callback(struct mid_q_entry *mid) } queue_work(cifsiod_wq, &wdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(tcon->ses->server, 1, 0); } @@ -6073,11 +6069,13 @@ ssize_t CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, const unsigned char *ea_name, char *EAData, size_t buf_size, - const struct nls_table *nls_codepage, int remap) + struct cifs_sb_info *cifs_sb) { /* BB assumes one setup word */ TRANSACTION2_QPI_REQ *pSMB = NULL; TRANSACTION2_QPI_RSP *pSMBr = NULL; + int remap = cifs_remap(cifs_sb); + struct nls_table *nls_codepage = cifs_sb->local_nls; int rc = 0; int bytes_returned; int list_len; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d82467cfb0e2df5ef7e655bc601f4f17f3b890b1..9365c0cf77adbadee2313776f1a9dc7b5468f0bb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -1945,9 +1946,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } if (!got_ip) { + int len; + const char *slash; + /* No ip= option specified? Try to get it from UNC */ - if (!cifs_convert_address(dstaddr, &vol->UNC[2], - strlen(&vol->UNC[2]))) { + /* Use the address part of the UNC. */ + slash = strchr(&vol->UNC[2], '\\'); + len = slash - &vol->UNC[2]; + if (!cifs_convert_address(dstaddr, &vol->UNC[2], len)) { pr_err("Unable to determine destination address.\n"); goto cifs_parse_mount_err; } @@ -2912,16 +2918,14 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; + bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) { - if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)) - return 0; - /* The prepath should be null terminated strings */ - if (strcmp(new->prepath, old->prepath)) - return 0; - + if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; - } + else if (!old_set && !new_set) + return 1; + return 0; } @@ -3692,10 +3696,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) int referral_walks_count = 0; #endif - rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs"); - if (rc) - return rc; - #ifdef CONFIG_CIFS_DFS_UPCALL try_mount_again: /* cleanup activities if we're chasing a referral */ @@ -3723,7 +3723,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) server = cifs_get_tcp_session(volume_info); if (IS_ERR(server)) { rc = PTR_ERR(server); - bdi_destroy(&cifs_sb->bdi); goto out; } if ((volume_info->max_credits < 20) || @@ -3780,9 +3779,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info); - /* tune readahead according to rsize */ - cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_SIZE; - remote_path_check: #ifdef CONFIG_CIFS_DFS_UPCALL /* @@ -3899,7 +3895,6 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) cifs_put_smb_ses(ses); else cifs_put_tcp_session(server, 0); - bdi_destroy(&cifs_sb->bdi); } out: @@ -4102,7 +4097,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); - bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb->mountdata); kfree(cifs_sb->prepath); call_rcu(&cifs_sb->rcu, delayed_free); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 21d4045357397de647b8fe0282ed1a436b7e3a7f..0fd081bd2a2f5d3fb4ed18fdcb7a1371cf9f5627 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -582,7 +582,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - down_read(&cinode->lock_sem); + down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); if (cinode->can_cache_brlcks) { /* can cache locks - no need to relock */ up_read(&cinode->lock_sem); @@ -2458,11 +2458,14 @@ cifs_uncached_writedata_release(struct kref *refcount) struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata, refcount); + kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); for (i = 0; i < wdata->nr_pages; i++) put_page(wdata->pages[i]); cifs_writedata_release(refcount); } +static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); + static void cifs_uncached_writev_complete(struct work_struct *work) { @@ -2478,7 +2481,8 @@ cifs_uncached_writev_complete(struct work_struct *work) spin_unlock(&inode->i_lock); complete(&wdata->done); - + collect_uncached_write_data(wdata->ctx); + /* the below call can possibly free the last ref to aio ctx */ kref_put(&wdata->refcount, cifs_uncached_writedata_release); } @@ -2527,7 +2531,8 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, static int cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, struct cifsFileInfo *open_file, - struct cifs_sb_info *cifs_sb, struct list_head *wdata_list) + struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, + struct cifs_aio_ctx *ctx) { int rc = 0; size_t cur_len; @@ -2595,6 +2600,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, wdata->pagesz = PAGE_SIZE; wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); wdata->credits = credits; + wdata->ctx = ctx; + kref_get(&ctx->refcount); if (!wdata->cfile->invalidHandle || !(rc = cifs_reopen_file(wdata->cfile, false))) @@ -2620,81 +2627,61 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, return rc; } -ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) { - struct file *file = iocb->ki_filp; - ssize_t total_written = 0; - struct cifsFileInfo *open_file; + struct cifs_writedata *wdata, *tmp; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; - struct cifs_writedata *wdata, *tmp; - struct list_head wdata_list; - struct iov_iter saved_from = *from; + struct dentry *dentry = ctx->cfile->dentry; + unsigned int i; int rc; - /* - * BB - optimize the way when signing is disabled. We can drop this - * extra memory-to-memory copying and use iovec buffers for constructing - * write request. - */ - - rc = generic_write_checks(iocb, from); - if (rc <= 0) - return rc; - - INIT_LIST_HEAD(&wdata_list); - cifs_sb = CIFS_FILE_SB(file); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_writev) - return -ENOSYS; + tcon = tlink_tcon(ctx->cfile->tlink); + cifs_sb = CIFS_SB(dentry->d_sb); - rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from, - open_file, cifs_sb, &wdata_list); + mutex_lock(&ctx->aio_mutex); - /* - * If at least one write was successfully sent, then discard any rc - * value from the later writes. If the other write succeeds, then - * we'll end up returning whatever was written. If it fails, then - * we'll get a new rc value from that. - */ - if (!list_empty(&wdata_list)) - rc = 0; + if (list_empty(&ctx->list)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + rc = ctx->rc; /* * Wait for and collect replies for any successful sends in order of - * increasing offset. Once an error is hit or we get a fatal signal - * while waiting, then return without waiting for any more replies. + * increasing offset. Once an error is hit, then return without waiting + * for any more replies. */ restart_loop: - list_for_each_entry_safe(wdata, tmp, &wdata_list, list) { + list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { if (!rc) { - /* FIXME: freezable too? */ - rc = wait_for_completion_killable(&wdata->done); - if (rc) - rc = -EINTR; - else if (wdata->result) + if (!try_wait_for_completion(&wdata->done)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + + if (wdata->result) rc = wdata->result; else - total_written += wdata->bytes; + ctx->total_len += wdata->bytes; /* resend call if it's a retryable error */ if (rc == -EAGAIN) { struct list_head tmp_list; - struct iov_iter tmp_from = saved_from; + struct iov_iter tmp_from = ctx->iter; INIT_LIST_HEAD(&tmp_list); list_del_init(&wdata->list); iov_iter_advance(&tmp_from, - wdata->offset - iocb->ki_pos); + wdata->offset - ctx->pos); rc = cifs_write_from_iter(wdata->offset, wdata->bytes, &tmp_from, - open_file, cifs_sb, &tmp_list); + ctx->cfile, cifs_sb, &tmp_list, + ctx); - list_splice(&tmp_list, &wdata_list); + list_splice(&tmp_list, &ctx->list); kref_put(&wdata->refcount, cifs_uncached_writedata_release); @@ -2705,12 +2692,111 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } + for (i = 0; i < ctx->npages; i++) + put_page(ctx->bv[i].bv_page); + + cifs_stats_bytes_written(tcon, ctx->total_len); + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); + + ctx->rc = (rc == 0) ? ctx->total_len : rc; + + mutex_unlock(&ctx->aio_mutex); + + if (ctx->iocb && ctx->iocb->ki_complete) + ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0); + else + complete(&ctx->done); +} + +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + ssize_t total_written = 0; + struct cifsFileInfo *cfile; + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + struct cifs_aio_ctx *ctx; + struct iov_iter saved_from = *from; + int rc; + + /* + * BB - optimize the way when signing is disabled. We can drop this + * extra memory-to-memory copying and use iovec buffers for constructing + * write request. + */ + + rc = generic_write_checks(iocb, from); + if (rc <= 0) + return rc; + + cifs_sb = CIFS_FILE_SB(file); + cfile = file->private_data; + tcon = tlink_tcon(cfile->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + + ctx = cifs_aio_ctx_alloc(); + if (!ctx) + return -ENOMEM; + + ctx->cfile = cifsFileInfo_get(cfile); + + if (!is_sync_kiocb(iocb)) + ctx->iocb = iocb; + + ctx->pos = iocb->ki_pos; + + rc = setup_aio_ctx_iter(ctx, from, WRITE); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + /* grab a lock here due to read response handlers can access ctx */ + mutex_lock(&ctx->aio_mutex); + + rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from, + cfile, cifs_sb, &ctx->list, ctx); + + /* + * If at least one write was successfully sent, then discard any rc + * value from the later writes. If the other write succeeds, then + * we'll end up returning whatever was written. If it fails, then + * we'll get a new rc value from that. + */ + if (!list_empty(&ctx->list)) + rc = 0; + + mutex_unlock(&ctx->aio_mutex); + + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + if (!is_sync_kiocb(iocb)) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EIOCBQUEUED; + } + + rc = wait_for_completion_killable(&ctx->done); + if (rc) { + mutex_lock(&ctx->aio_mutex); + ctx->rc = rc = -EINTR; + total_written = ctx->total_len; + mutex_unlock(&ctx->aio_mutex); + } else { + rc = ctx->rc; + total_written = ctx->total_len; + } + + kref_put(&ctx->refcount, cifs_aio_ctx_release); + if (unlikely(!total_written)) return rc; iocb->ki_pos += total_written; - set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags); - cifs_stats_bytes_written(tcon, total_written); return total_written; } @@ -2859,6 +2945,7 @@ cifs_uncached_readdata_release(struct kref *refcount) struct cifs_readdata, refcount); unsigned int i; + kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); for (i = 0; i < rdata->nr_pages; i++) { put_page(rdata->pages[i]); rdata->pages[i] = NULL; @@ -2900,6 +2987,8 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) return remaining ? -EFAULT : 0; } +static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); + static void cifs_uncached_readv_complete(struct work_struct *work) { @@ -2907,6 +2996,8 @@ cifs_uncached_readv_complete(struct work_struct *work) struct cifs_readdata, work); complete(&rdata->done); + collect_uncached_read_data(rdata->ctx); + /* the below call can possibly free the last ref to aio ctx */ kref_put(&rdata->refcount, cifs_uncached_readdata_release); } @@ -2973,7 +3064,8 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server, static int cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, - struct cifs_sb_info *cifs_sb, struct list_head *rdata_list) + struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, + struct cifs_aio_ctx *ctx) { struct cifs_readdata *rdata; unsigned int npages, rsize, credits; @@ -3020,6 +3112,8 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, rdata->read_into_pages = cifs_uncached_read_into_pages; rdata->copy_into_pages = cifs_uncached_copy_into_pages; rdata->credits = credits; + rdata->ctx = ctx; + kref_get(&ctx->refcount); if (!rdata->cfile->invalidHandle || !(rc = cifs_reopen_file(rdata->cfile, true))) @@ -3042,50 +3136,37 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, return rc; } -ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +static void +collect_uncached_read_data(struct cifs_aio_ctx *ctx) { - struct file *file = iocb->ki_filp; - ssize_t rc; - size_t len; - ssize_t total_read = 0; - loff_t offset = iocb->ki_pos; + struct cifs_readdata *rdata, *tmp; + struct iov_iter *to = &ctx->iter; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; - struct cifsFileInfo *open_file; - struct cifs_readdata *rdata, *tmp; - struct list_head rdata_list; - - len = iov_iter_count(to); - if (!len) - return 0; - - INIT_LIST_HEAD(&rdata_list); - cifs_sb = CIFS_FILE_SB(file); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_readv) - return -ENOSYS; + unsigned int i; + int rc; - if ((file->f_flags & O_ACCMODE) == O_WRONLY) - cifs_dbg(FYI, "attempting read on write only file instance\n"); + tcon = tlink_tcon(ctx->cfile->tlink); + cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); - rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list); + mutex_lock(&ctx->aio_mutex); - /* if at least one read request send succeeded, then reset rc */ - if (!list_empty(&rdata_list)) - rc = 0; + if (list_empty(&ctx->list)) { + mutex_unlock(&ctx->aio_mutex); + return; + } - len = iov_iter_count(to); + rc = ctx->rc; /* the loop below should proceed in the order of increasing offsets */ again: - list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { + list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { if (!rc) { - /* FIXME: freezable sleep too? */ - rc = wait_for_completion_killable(&rdata->done); - if (rc) - rc = -EINTR; - else if (rdata->result == -EAGAIN) { + if (!try_wait_for_completion(&rdata->done)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + + if (rdata->result == -EAGAIN) { /* resend call if it's a retryable error */ struct list_head tmp_list; unsigned int got_bytes = rdata->got_bytes; @@ -3111,9 +3192,9 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) rdata->offset + got_bytes, rdata->bytes - got_bytes, rdata->cfile, cifs_sb, - &tmp_list); + &tmp_list, ctx); - list_splice(&tmp_list, &rdata_list); + list_splice(&tmp_list, &ctx->list); kref_put(&rdata->refcount, cifs_uncached_readdata_release); @@ -3131,14 +3212,110 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - total_read = len - iov_iter_count(to); + for (i = 0; i < ctx->npages; i++) { + if (ctx->should_dirty) + set_page_dirty(ctx->bv[i].bv_page); + put_page(ctx->bv[i].bv_page); + } + + ctx->total_len = ctx->len - iov_iter_count(to); - cifs_stats_bytes_read(tcon, total_read); + cifs_stats_bytes_read(tcon, ctx->total_len); /* mask nodata case */ if (rc == -ENODATA) rc = 0; + ctx->rc = (rc == 0) ? ctx->total_len : rc; + + mutex_unlock(&ctx->aio_mutex); + + if (ctx->iocb && ctx->iocb->ki_complete) + ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0); + else + complete(&ctx->done); +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + ssize_t rc; + size_t len; + ssize_t total_read = 0; + loff_t offset = iocb->ki_pos; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *cfile; + struct cifs_aio_ctx *ctx; + + len = iov_iter_count(to); + if (!len) + return 0; + + cifs_sb = CIFS_FILE_SB(file); + cfile = file->private_data; + tcon = tlink_tcon(cfile->tlink); + + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + cifs_dbg(FYI, "attempting read on write only file instance\n"); + + ctx = cifs_aio_ctx_alloc(); + if (!ctx) + return -ENOMEM; + + ctx->cfile = cifsFileInfo_get(cfile); + + if (!is_sync_kiocb(iocb)) + ctx->iocb = iocb; + + if (to->type & ITER_IOVEC) + ctx->should_dirty = true; + + rc = setup_aio_ctx_iter(ctx, to, READ); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + len = ctx->len; + + /* grab a lock here due to read response handlers can access ctx */ + mutex_lock(&ctx->aio_mutex); + + rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); + + /* if at least one read request send succeeded, then reset rc */ + if (!list_empty(&ctx->list)) + rc = 0; + + mutex_unlock(&ctx->aio_mutex); + + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + if (!is_sync_kiocb(iocb)) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EIOCBQUEUED; + } + + rc = wait_for_completion_killable(&ctx->done); + if (rc) { + mutex_lock(&ctx->aio_mutex); + ctx->rc = rc = -EINTR; + total_read = ctx->total_len; + mutex_unlock(&ctx->aio_mutex); + } else { + rc = ctx->rc; + total_read = ctx->total_len; + } + + kref_put(&ctx->refcount, cifs_aio_ctx_release); + if (total_read) { iocb->ki_pos += total_read; return total_read; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b261db34103ce929ed863e34753511ac94ec0355..4d1fcd76d022f6848c16294411d5a937e053e27a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -322,9 +322,9 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb) fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; fattr->cf_uid = cifs_sb->mnt_uid; fattr->cf_gid = cifs_sb->mnt_gid; - fattr->cf_atime = CURRENT_TIME; - fattr->cf_ctime = CURRENT_TIME; - fattr->cf_mtime = CURRENT_TIME; + ktime_get_real_ts(&fattr->cf_mtime); + fattr->cf_mtime = timespec_trunc(fattr->cf_mtime, sb->s_time_gran); + fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime; fattr->cf_nlink = 2; fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; } @@ -563,8 +563,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path, "SETFILEBITS", ea_value, 4 /* size of buf */, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); + cifs_sb); cifs_put_tlink(tlink); if (rc < 0) return (int)rc; @@ -586,9 +585,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, - struct cifs_sb_info *cifs_sb, bool adjust_tz, + struct super_block *sb, bool adjust_tz, bool symlink) { + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); memset(fattr, 0, sizeof(*fattr)); @@ -598,8 +598,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, if (info->LastAccessTime) fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); - else - fattr->cf_atime = CURRENT_TIME; + else { + ktime_get_real_ts(&fattr->cf_atime); + fattr->cf_atime = timespec_trunc(fattr->cf_atime, sb->s_time_gran); + } fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime); fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); @@ -659,7 +661,6 @@ cifs_get_file_info(struct file *filp) FILE_ALL_INFO find_data; struct cifs_fattr fattr; struct inode *inode = file_inode(filp); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsFileInfo *cfile = filp->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; @@ -671,7 +672,7 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false, + cifs_all_info_to_fattr(&fattr, &find_data, inode->i_sb, false, false); break; case -EREMOTE: @@ -753,7 +754,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } if (!rc) { - cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz, + cifs_all_info_to_fattr(&fattr, data, sb, adjust_tz, symlink); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); @@ -1363,9 +1364,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) cifs_inode = CIFS_I(inode); cifs_inode->time = 0; /* will force revalidate to get info when needed */ - inode->i_ctime = current_fs_time(sb); + inode->i_ctime = current_time(inode); } - dir->i_ctime = dir->i_mtime = current_fs_time(sb); + dir->i_ctime = dir->i_mtime = current_time(dir); cifs_inode = CIFS_I(dir); CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ unlink_out: @@ -1633,7 +1634,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifsInode->time = 0; d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime = - current_fs_time(inode->i_sb); + current_time(inode); rmdir_exit: kfree(full_path); @@ -1806,7 +1807,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = - target_dir->i_mtime = current_fs_time(source_dir->i_sb); + target_dir->i_mtime = current_time(source_dir); cifs_rename_exit: kfree(info_buf_source); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 265c45fe4ea5e5246f5304480c2b4b66bf9d6c73..76fb0917dc8c37405b4a481831ee6a695d1e265c 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -74,7 +74,8 @@ static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0, src_inode->i_size, 0); - + if (rc > 0) + rc = 0; out_fput: fdput(src_file); out_drop_write: @@ -208,10 +209,14 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EOPNOTSUPP; break; case CIFS_IOC_GET_MNT_INFO: + if (pSMBFile == NULL) + break; tcon = tlink_tcon(pSMBFile->tlink); rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; case CIFS_ENUMERATE_SNAPSHOTS: + if (pSMBFile == NULL) + break; if (arg == 0) { rc = -EINVAL; goto cifs_ioc_exit; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index d3fb11529ed96054ac753a9e26898942bb708d56..b08531977daa4084f774c75de33204b2b6fa0902 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -167,13 +168,11 @@ cifs_buf_get(void) /* clear the first few header bytes */ /* for most paths, more is cleared in header_assemble */ - if (ret_buf) { - memset(ret_buf, 0, buf_size + 3); - atomic_inc(&bufAllocCount); + memset(ret_buf, 0, buf_size + 3); + atomic_inc(&bufAllocCount); #ifdef CONFIG_CIFS_STATS2 - atomic_inc(&totBufAllocCount); + atomic_inc(&totBufAllocCount); #endif /* CONFIG_CIFS_STATS2 */ - } return ret_buf; } @@ -201,15 +200,13 @@ cifs_small_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = mempool_alloc(cifs_sm_req_poolp, GFP_NOFS); - if (ret_buf) { /* No need to clear memory here, cleared in header assemble */ /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ - atomic_inc(&smBufAllocCount); + atomic_inc(&smBufAllocCount); #ifdef CONFIG_CIFS_STATS2 - atomic_inc(&totSmBufAllocCount); + atomic_inc(&totSmBufAllocCount); #endif /* CONFIG_CIFS_STATS2 */ - } return ret_buf; } @@ -492,7 +489,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &pCifsInode->flags); - queue_work(cifsiod_wq, + queue_work(cifsoplockd_wq, &netfile->oplock_break); netfile->oplock_break_cancelled = false; @@ -745,3 +742,122 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, } return rc; } + +struct cifs_aio_ctx * +cifs_aio_ctx_alloc(void) +{ + struct cifs_aio_ctx *ctx; + + ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + INIT_LIST_HEAD(&ctx->list); + mutex_init(&ctx->aio_mutex); + init_completion(&ctx->done); + kref_init(&ctx->refcount); + return ctx; +} + +void +cifs_aio_ctx_release(struct kref *refcount) +{ + struct cifs_aio_ctx *ctx = container_of(refcount, + struct cifs_aio_ctx, refcount); + + cifsFileInfo_put(ctx->cfile); + kvfree(ctx->bv); + kfree(ctx); +} + +#define CIFS_AIO_KMALLOC_LIMIT (1024 * 1024) + +int +setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw) +{ + ssize_t rc; + unsigned int cur_npages; + unsigned int npages = 0; + unsigned int i; + size_t len; + size_t count = iov_iter_count(iter); + unsigned int saved_len; + size_t start; + unsigned int max_pages = iov_iter_npages(iter, INT_MAX); + struct page **pages = NULL; + struct bio_vec *bv = NULL; + + if (iter->type & ITER_KVEC) { + memcpy(&ctx->iter, iter, sizeof(struct iov_iter)); + ctx->len = count; + iov_iter_advance(iter, count); + return 0; + } + + if (max_pages * sizeof(struct bio_vec) <= CIFS_AIO_KMALLOC_LIMIT) + bv = kmalloc_array(max_pages, sizeof(struct bio_vec), + GFP_KERNEL); + + if (!bv) { + bv = vmalloc(max_pages * sizeof(struct bio_vec)); + if (!bv) + return -ENOMEM; + } + + if (max_pages * sizeof(struct page *) <= CIFS_AIO_KMALLOC_LIMIT) + pages = kmalloc_array(max_pages, sizeof(struct page *), + GFP_KERNEL); + + if (!pages) { + pages = vmalloc(max_pages * sizeof(struct page *)); + if (!bv) { + kvfree(bv); + return -ENOMEM; + } + } + + saved_len = count; + + while (count && npages < max_pages) { + rc = iov_iter_get_pages(iter, pages, count, max_pages, &start); + if (rc < 0) { + cifs_dbg(VFS, "couldn't get user pages (rc=%zd)\n", rc); + break; + } + + if (rc > count) { + cifs_dbg(VFS, "get pages rc=%zd more than %zu\n", rc, + count); + break; + } + + iov_iter_advance(iter, rc); + count -= rc; + rc += start; + cur_npages = DIV_ROUND_UP(rc, PAGE_SIZE); + + if (npages + cur_npages > max_pages) { + cifs_dbg(VFS, "out of vec array capacity (%u vs %u)\n", + npages + cur_npages, max_pages); + break; + } + + for (i = 0; i < cur_npages; i++) { + len = rc > PAGE_SIZE ? PAGE_SIZE : rc; + bv[npages + i].bv_page = pages[i]; + bv[npages + i].bv_offset = start; + bv[npages + i].bv_len = len - start; + rc -= len; + start = 0; + } + + npages += cur_npages; + } + + kvfree(pages); + ctx->bv = bv; + ctx->len = saved_len - count; + ctx->npages = npages; + iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len); + return 0; +} diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index abae6dd2c6b998816db830f40934c86ea8c33fff..cc88f4f0325ef4003bd248c36ca96865ff1b4ddd 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -980,10 +980,10 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) cifs_dbg(VFS, "illegal hours %d\n", st->Hours); days = sd->Day; month = sd->Month; - if ((days > 31) || (month > 12)) { + if (days < 1 || days > 31 || month < 1 || month > 12) { cifs_dbg(VFS, "illegal date, month %d day: %d\n", month, days); - if (month > 12) - month = 12; + days = clamp(days, 1, 31); + month = clamp(month, 1, 12); } month -= 1; days += total_days_of_prev_months[month]; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index cc93ba4da9b592468f36d37b80777e88b8e400e6..27bc360c7ffd7e1081f907c5f080dc4ba439fbfc 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1015,6 +1015,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile) return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; } +static bool +cifs_can_echo(struct TCP_Server_Info *server) +{ + if (server->tcpStatus == CifsGood) + return true; + + return false; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1049,6 +1058,7 @@ struct smb_version_operations smb1_operations = { .get_dfs_refer = CIFSGetDFSRefer, .qfs_tcon = cifs_qfs_tcon, .is_path_accessible = cifs_is_path_accessible, + .can_echo = cifs_can_echo, .query_path_info = cifs_query_path_info, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 1a04b3a5beb164cb22c166f5c7ac03b65e70d00b..7b08a1446a7fbd08f2f245fa2dbb75261eb3f5b2 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -499,7 +499,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, else cfile->oplock_break_cancelled = true; - queue_work(cifsiod_wq, &cfile->oplock_break); + queue_work(cifsoplockd_wq, &cfile->oplock_break); kfree(lw); return true; } @@ -643,7 +643,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags); spin_unlock(&cfile->file_info_lock); - queue_work(cifsiod_wq, &cfile->oplock_break); + queue_work(cifsoplockd_wq, + &cfile->oplock_break); spin_unlock(&tcon->open_file_lock); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 152e37f2ad9213462a2ae439296a6edb49d653f9..c58691834eb2b74fa34f3fe2661ed3e211c4d22e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -942,6 +942,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, } if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) { rc = -ERANGE; + kfree(retbuf); return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 02da648041fcd58d6e37431a60d596a5bfd4ca06..e4afdaae743f28cf2d1396faed88e7465f875d48 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include "smb2pdu.h" @@ -632,8 +633,12 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { - cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); - return -EIO; + cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n", + rsplen); + + /* relax check since Mac returns max bufsize allowed on ioctl */ + if (rsplen > CIFSMaxBufSize) + return -EIO; } /* check validate negotiate info response matches what we got earlier */ @@ -1235,15 +1240,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_exit; } - if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) + switch (rsp->ShareType) { + case SMB2_SHARE_TYPE_DISK: cifs_dbg(FYI, "connection to disk share\n"); - else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { + break; + case SMB2_SHARE_TYPE_PIPE: tcon->ipc = true; cifs_dbg(FYI, "connection to pipe share\n"); - } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { - tcon->print = true; + break; + case SMB2_SHARE_TYPE_PRINT: + tcon->ipc = true; cifs_dbg(FYI, "connection to printer\n"); - } else { + break; + default: cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType); rc = -EOPNOTSUPP; goto tcon_error_exit; @@ -1853,8 +1862,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, * than one credit. Windows typically sets this smaller, but for some * ioctls it may be useful to allow server to send more. No point * limiting what the server can send as long as fits in one credit + * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE + * (by default, note that it can be overridden to make max larger) + * in responses (except for read responses which can be bigger. + * We may want to bump this limit up */ - req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ + req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize); if (is_fsctl) req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); @@ -2164,9 +2177,7 @@ smb2_echo_callback(struct mid_q_entry *mid) if (mid->mid_state == MID_RESPONSE_RECEIVED) credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, credits_received, CIFS_ECHO_OP); } @@ -2424,9 +2435,7 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_fail_inc(tcon, SMB2_READ_HE); queue_work(cifsiod_wq, &rdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(server, credits_received, 0); } @@ -2585,7 +2594,6 @@ smb2_writev_callback(struct mid_q_entry *mid) { struct cifs_writedata *wdata = mid->callback_data; struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); - struct TCP_Server_Info *server = tcon->ses->server; unsigned int written; struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; unsigned int credits_received = 1; @@ -2625,9 +2633,7 @@ smb2_writev_callback(struct mid_q_entry *mid) cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); queue_work(cifsiod_wq, &wdata->work); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); add_credits(tcon->ses->server, credits_received, 0); } diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 506b67fc93d9611ea859cf738126fb2d67e52e16..c69ec96e92acb8613db1585d855fa54dce426a5d 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -538,23 +538,19 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, } temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); - if (temp == NULL) - return temp; - else { - memset(temp, 0, sizeof(struct mid_q_entry)); - temp->mid = le64_to_cpu(shdr->MessageId); - temp->pid = current->pid; - temp->command = shdr->Command; /* Always LE */ - temp->when_alloc = jiffies; - temp->server = server; - - /* - * The default is for the mid to be synchronous, so the - * default callback just wakes up the current task. - */ - temp->callback = cifs_wake_up_task; - temp->callback_data = current; - } + memset(temp, 0, sizeof(struct mid_q_entry)); + temp->mid = le64_to_cpu(shdr->MessageId); + temp->pid = current->pid; + temp->command = shdr->Command; /* Always LE */ + temp->when_alloc = jiffies; + temp->server = server; + + /* + * The default is for the mid to be synchronous, so the + * default callback just wakes up the current task. + */ + temp->callback = cifs_wake_up_task; + temp->callback_data = current; atomic_inc(&midCount); temp->mid_state = MID_REQUEST_ALLOCATED; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index f6e13a977fc8e907e3fa89fc0a5d14a4ab0eccd5..47a125ece11ea0d3e2daa0814c8cab28b4643bb1 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -55,26 +55,22 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) } temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); - if (temp == NULL) - return temp; - else { - memset(temp, 0, sizeof(struct mid_q_entry)); - temp->mid = get_mid(smb_buffer); - temp->pid = current->pid; - temp->command = cpu_to_le16(smb_buffer->Command); - cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); + memset(temp, 0, sizeof(struct mid_q_entry)); + temp->mid = get_mid(smb_buffer); + temp->pid = current->pid; + temp->command = cpu_to_le16(smb_buffer->Command); + cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ - /* when mid allocated can be before when sent */ - temp->when_alloc = jiffies; - temp->server = server; + /* when mid allocated can be before when sent */ + temp->when_alloc = jiffies; + temp->server = server; - /* - * The default is for the mid to be synchronous, so the - * default callback just wakes up the current task. - */ - temp->callback = cifs_wake_up_task; - temp->callback_data = current; - } + /* + * The default is for the mid to be synchronous, so the + * default callback just wakes up the current task. + */ + temp->callback = cifs_wake_up_task; + temp->callback_data = current; atomic_inc(&midCount); temp->mid_state = MID_REQUEST_ALLOCATED; @@ -98,7 +94,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) now = jiffies; /* commands taking longer than one second are indications that something is wrong, unless it is quite a slow link or server */ - if ((now - midEntry->when_alloc) > HZ) { + if (time_after(now, midEntry->when_alloc + HZ)) { if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) { pr_debug(" CIFS slow rsp: cmd %d mid %llu", midEntry->command, midEntry->mid); @@ -617,9 +613,7 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) } spin_unlock(&GlobalMid_Lock); - mutex_lock(&server->srv_mutex); DeleteMidQEntry(mid); - mutex_unlock(&server->srv_mutex); return rc; } diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 20af5187ba63cc14150185952a3f7cdf9526556d..3cb5c9e2d4e78f641549818fbbad7681b193854d 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -235,8 +235,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler, if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, - full_path, name, value, size, - cifs_sb->local_nls, cifs_remap(cifs_sb)); + full_path, name, value, size, cifs_sb); break; case XATTR_CIFS_ACL: { @@ -336,8 +335,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) if (pTcon->ses->server->ops->query_all_EAs) rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, - full_path, NULL, data, buf_size, - cifs_sb->local_nls, cifs_remap(cifs_sb)); + full_path, NULL, data, buf_size, cifs_sb); list_ea_exit: kfree(full_path); free_xid(xid); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 2dea594da19968288586138a35c22a81151914e7..6058df380cc00ed0d4be8cb81a0a87369cb9aac3 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -183,10 +183,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) goto unlock_out; } - error = bdi_setup_and_register(&vc->bdi, "coda"); - if (error) - goto unlock_out; - vc->vc_sb = sb; mutex_unlock(&vc->vc_mutex); @@ -197,7 +193,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = CODA_SUPER_MAGIC; sb->s_op = &coda_super_operations; sb->s_d_op = &coda_dentry_operations; - sb->s_bdi = &vc->bdi; + + error = super_setup_bdi(sb); + if (error) + goto error; /* get root fid from Venus: this needs the root inode */ error = venus_rootfid(sb, &fid); @@ -228,7 +227,6 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) error: mutex_lock(&vc->vc_mutex); - bdi_destroy(&vc->bdi); vc->vc_sb = NULL; sb->s_fs_info = NULL; unlock_out: @@ -240,7 +238,6 @@ static void coda_put_super(struct super_block *sb) { struct venus_comm *vcp = coda_vcp(sb); mutex_lock(&vcp->vc_mutex); - bdi_destroy(&vcp->bdi); vcp->vc_sb = NULL; sb->s_fs_info = NULL; mutex_unlock(&vcp->vc_mutex); diff --git a/fs/compat.c b/fs/compat.c index c61b506f5bc94ba0dcac0d9a28a7ab1070ac3a66..190b38b39d9ed74efa5816e437232f86ee7070d4 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -15,555 +15,14 @@ * published by the Free Software Foundation. */ -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include - #include -#include -#include #include "internal.h" -/* - * Not all architectures have sys_utime, so implement this in terms - * of sys_utimes. - */ -COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, - struct compat_utimbuf __user *, t) -{ - struct timespec tv[2]; - - if (t) { - if (get_user(tv[0].tv_sec, &t->actime) || - get_user(tv[1].tv_sec, &t->modtime)) - return -EFAULT; - tv[0].tv_nsec = 0; - tv[1].tv_nsec = 0; - } - return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); -} - -COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) -{ - struct timespec tv[2]; - - if (t) { - if (compat_get_timespec(&tv[0], &t[0]) || - compat_get_timespec(&tv[1], &t[1])) - return -EFAULT; - - if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) - return 0; - } - return do_utimes(dfd, filename, t ? tv : NULL, flags); -} - -COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) -{ - struct timespec tv[2]; - - if (t) { - if (get_user(tv[0].tv_sec, &t[0].tv_sec) || - get_user(tv[0].tv_nsec, &t[0].tv_usec) || - get_user(tv[1].tv_sec, &t[1].tv_sec) || - get_user(tv[1].tv_nsec, &t[1].tv_usec)) - return -EFAULT; - if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 || - tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0) - return -EINVAL; - tv[0].tv_nsec *= 1000; - tv[1].tv_nsec *= 1000; - } - return do_utimes(dfd, filename, t ? tv : NULL, 0); -} - -COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) -{ - return compat_sys_futimesat(AT_FDCWD, filename, t); -} - -static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) -{ - struct compat_stat tmp; - - if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - memset(&tmp, 0, sizeof(tmp)); - tmp.st_dev = old_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) - return -EOVERFLOW; - tmp.st_mode = stat->mode; - tmp.st_nlink = stat->nlink; - if (tmp.st_nlink != stat->nlink) - return -EOVERFLOW; - SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); - SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); - tmp.st_rdev = old_encode_dev(stat->rdev); - if ((u64) stat->size > MAX_NON_LFS) - return -EOVERFLOW; - tmp.st_size = stat->size; - tmp.st_atime = stat->atime.tv_sec; - tmp.st_atime_nsec = stat->atime.tv_nsec; - tmp.st_mtime = stat->mtime.tv_sec; - tmp.st_mtime_nsec = stat->mtime.tv_nsec; - tmp.st_ctime = stat->ctime.tv_sec; - tmp.st_ctime_nsec = stat->ctime.tv_nsec; - tmp.st_blocks = stat->blocks; - tmp.st_blksize = stat->blksize; - return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; -} - -COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, - struct compat_stat __user *, statbuf) -{ - struct kstat stat; - int error; - - error = vfs_stat(filename, &stat); - if (error) - return error; - return cp_compat_stat(&stat, statbuf); -} - -COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, - struct compat_stat __user *, statbuf) -{ - struct kstat stat; - int error; - - error = vfs_lstat(filename, &stat); - if (error) - return error; - return cp_compat_stat(&stat, statbuf); -} - -#ifndef __ARCH_WANT_STAT64 -COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, - const char __user *, filename, - struct compat_stat __user *, statbuf, int, flag) -{ - struct kstat stat; - int error; - - error = vfs_fstatat(dfd, filename, &stat, flag); - if (error) - return error; - return cp_compat_stat(&stat, statbuf); -} -#endif - -COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, - struct compat_stat __user *, statbuf) -{ - struct kstat stat; - int error = vfs_fstat(fd, &stat); - - if (!error) - error = cp_compat_stat(&stat, statbuf); - return error; -} - -static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf) -{ - - if (sizeof ubuf->f_blocks == 4) { - if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | - kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) - return -EOVERFLOW; - /* f_files and f_ffree may be -1; it's okay - * to stuff that into 32 bits */ - if (kbuf->f_files != 0xffffffffffffffffULL - && (kbuf->f_files & 0xffffffff00000000ULL)) - return -EOVERFLOW; - if (kbuf->f_ffree != 0xffffffffffffffffULL - && (kbuf->f_ffree & 0xffffffff00000000ULL)) - return -EOVERFLOW; - } - if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || - __put_user(kbuf->f_type, &ubuf->f_type) || - __put_user(kbuf->f_bsize, &ubuf->f_bsize) || - __put_user(kbuf->f_blocks, &ubuf->f_blocks) || - __put_user(kbuf->f_bfree, &ubuf->f_bfree) || - __put_user(kbuf->f_bavail, &ubuf->f_bavail) || - __put_user(kbuf->f_files, &ubuf->f_files) || - __put_user(kbuf->f_ffree, &ubuf->f_ffree) || - __put_user(kbuf->f_namelen, &ubuf->f_namelen) || - __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || - __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || - __put_user(kbuf->f_frsize, &ubuf->f_frsize) || - __put_user(kbuf->f_flags, &ubuf->f_flags) || - __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) - return -EFAULT; - return 0; -} - -/* - * The following statfs calls are copies of code from fs/statfs.c and - * should be checked against those from time to time - */ -COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) -{ - struct kstatfs tmp; - int error = user_statfs(pathname, &tmp); - if (!error) - error = put_compat_statfs(buf, &tmp); - return error; -} - -COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) -{ - struct kstatfs tmp; - int error = fd_statfs(fd, &tmp); - if (!error) - error = put_compat_statfs(buf, &tmp); - return error; -} - -static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) -{ - if (sizeof(ubuf->f_bsize) == 4) { - if ((kbuf->f_type | kbuf->f_bsize | kbuf->f_namelen | - kbuf->f_frsize | kbuf->f_flags) & 0xffffffff00000000ULL) - return -EOVERFLOW; - /* f_files and f_ffree may be -1; it's okay - * to stuff that into 32 bits */ - if (kbuf->f_files != 0xffffffffffffffffULL - && (kbuf->f_files & 0xffffffff00000000ULL)) - return -EOVERFLOW; - if (kbuf->f_ffree != 0xffffffffffffffffULL - && (kbuf->f_ffree & 0xffffffff00000000ULL)) - return -EOVERFLOW; - } - if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || - __put_user(kbuf->f_type, &ubuf->f_type) || - __put_user(kbuf->f_bsize, &ubuf->f_bsize) || - __put_user(kbuf->f_blocks, &ubuf->f_blocks) || - __put_user(kbuf->f_bfree, &ubuf->f_bfree) || - __put_user(kbuf->f_bavail, &ubuf->f_bavail) || - __put_user(kbuf->f_files, &ubuf->f_files) || - __put_user(kbuf->f_ffree, &ubuf->f_ffree) || - __put_user(kbuf->f_namelen, &ubuf->f_namelen) || - __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || - __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || - __put_user(kbuf->f_frsize, &ubuf->f_frsize) || - __put_user(kbuf->f_flags, &ubuf->f_flags) || - __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) - return -EFAULT; - return 0; -} - -COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) -{ - struct kstatfs tmp; - int error; - - if (sz != sizeof(*buf)) - return -EINVAL; - - error = user_statfs(pathname, &tmp); - if (!error) - error = put_compat_statfs64(buf, &tmp); - return error; -} - -COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) -{ - struct kstatfs tmp; - int error; - - if (sz != sizeof(*buf)) - return -EINVAL; - - error = fd_statfs(fd, &tmp); - if (!error) - error = put_compat_statfs64(buf, &tmp); - return error; -} - -/* - * This is a copy of sys_ustat, just dealing with a structure layout. - * Given how simple this syscall is that apporach is more maintainable - * than the various conversion hacks. - */ -COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) -{ - struct compat_ustat tmp; - struct kstatfs sbuf; - int err = vfs_ustat(new_decode_dev(dev), &sbuf); - if (err) - return err; - - memset(&tmp, 0, sizeof(struct compat_ustat)); - tmp.f_tfree = sbuf.f_bfree; - tmp.f_tinode = sbuf.f_ffree; - if (copy_to_user(u, &tmp, sizeof(struct compat_ustat))) - return -EFAULT; - return 0; -} - -static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) -{ - if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || - __get_user(kfl->l_type, &ufl->l_type) || - __get_user(kfl->l_whence, &ufl->l_whence) || - __get_user(kfl->l_start, &ufl->l_start) || - __get_user(kfl->l_len, &ufl->l_len) || - __get_user(kfl->l_pid, &ufl->l_pid)) - return -EFAULT; - return 0; -} - -static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) -{ - if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || - __put_user(kfl->l_type, &ufl->l_type) || - __put_user(kfl->l_whence, &ufl->l_whence) || - __put_user(kfl->l_start, &ufl->l_start) || - __put_user(kfl->l_len, &ufl->l_len) || - __put_user(kfl->l_pid, &ufl->l_pid)) - return -EFAULT; - return 0; -} - -#ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 -static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) -{ - if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || - __get_user(kfl->l_type, &ufl->l_type) || - __get_user(kfl->l_whence, &ufl->l_whence) || - __get_user(kfl->l_start, &ufl->l_start) || - __get_user(kfl->l_len, &ufl->l_len) || - __get_user(kfl->l_pid, &ufl->l_pid)) - return -EFAULT; - return 0; -} -#endif - -#ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 -static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) -{ - if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || - __put_user(kfl->l_type, &ufl->l_type) || - __put_user(kfl->l_whence, &ufl->l_whence) || - __put_user(kfl->l_start, &ufl->l_start) || - __put_user(kfl->l_len, &ufl->l_len) || - __put_user(kfl->l_pid, &ufl->l_pid)) - return -EFAULT; - return 0; -} -#endif - -static unsigned int -convert_fcntl_cmd(unsigned int cmd) -{ - switch (cmd) { - case F_GETLK64: - return F_GETLK; - case F_SETLK64: - return F_SETLK; - case F_SETLKW64: - return F_SETLKW; - } - - return cmd; -} - -COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, - compat_ulong_t, arg) -{ - mm_segment_t old_fs; - struct flock f; - long ret; - unsigned int conv_cmd; - - switch (cmd) { - case F_GETLK: - case F_SETLK: - case F_SETLKW: - ret = get_compat_flock(&f, compat_ptr(arg)); - if (ret != 0) - break; - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_fcntl(fd, cmd, (unsigned long)&f); - set_fs(old_fs); - if (cmd == F_GETLK && ret == 0) { - /* GETLK was successful and we need to return the data... - * but it needs to fit in the compat structure. - * l_start shouldn't be too big, unless the original - * start + end is greater than COMPAT_OFF_T_MAX, in which - * case the app was asking for trouble, so we return - * -EOVERFLOW in that case. - * l_len could be too big, in which case we just truncate it, - * and only allow the app to see that part of the conflicting - * lock that might make sense to it anyway - */ - - if (f.l_start > COMPAT_OFF_T_MAX) - ret = -EOVERFLOW; - if (f.l_len > COMPAT_OFF_T_MAX) - f.l_len = COMPAT_OFF_T_MAX; - if (ret == 0) - ret = put_compat_flock(&f, compat_ptr(arg)); - } - break; - - case F_GETLK64: - case F_SETLK64: - case F_SETLKW64: - case F_OFD_GETLK: - case F_OFD_SETLK: - case F_OFD_SETLKW: - ret = get_compat_flock64(&f, compat_ptr(arg)); - if (ret != 0) - break; - old_fs = get_fs(); - set_fs(KERNEL_DS); - conv_cmd = convert_fcntl_cmd(cmd); - ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); - set_fs(old_fs); - if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { - /* need to return lock information - see above for commentary */ - if (f.l_start > COMPAT_LOFF_T_MAX) - ret = -EOVERFLOW; - if (f.l_len > COMPAT_LOFF_T_MAX) - f.l_len = COMPAT_LOFF_T_MAX; - if (ret == 0) - ret = put_compat_flock64(&f, compat_ptr(arg)); - } - break; - - default: - ret = sys_fcntl(fd, cmd, arg); - break; - } - return ret; -} - -COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, - compat_ulong_t, arg) -{ - switch (cmd) { - case F_GETLK64: - case F_SETLK64: - case F_SETLKW64: - case F_OFD_GETLK: - case F_OFD_SETLK: - case F_OFD_SETLKW: - return -EINVAL; - } - return compat_sys_fcntl64(fd, cmd, arg); -} - -/* A write operation does a read from user space and vice versa */ -#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) - -ssize_t compat_rw_copy_check_uvector(int type, - const struct compat_iovec __user *uvector, unsigned long nr_segs, - unsigned long fast_segs, struct iovec *fast_pointer, - struct iovec **ret_pointer) -{ - compat_ssize_t tot_len; - struct iovec *iov = *ret_pointer = fast_pointer; - ssize_t ret = 0; - int seg; - - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - if (nr_segs == 0) - goto out; - - ret = -EINVAL; - if (nr_segs > UIO_MAXIOV) - goto out; - if (nr_segs > fast_segs) { - ret = -ENOMEM; - iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); - if (iov == NULL) - goto out; - } - *ret_pointer = iov; - - ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) - goto out; - - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. - * - * In Linux, the total length is limited to MAX_RW_COUNT, there is - * no overflow possibility. - */ - tot_len = 0; - ret = -EINVAL; - for (seg = 0; seg < nr_segs; seg++) { - compat_uptr_t buf; - compat_ssize_t len; - - if (__get_user(len, &uvector->iov_len) || - __get_user(buf, &uvector->iov_base)) { - ret = -EFAULT; - goto out; - } - if (len < 0) /* size_t not fitting in compat_ssize_t .. */ - goto out; - if (type >= 0 && - !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { - ret = -EFAULT; - goto out; - } - if (len > MAX_RW_COUNT - tot_len) - len = MAX_RW_COUNT - tot_len; - tot_len += len; - iov->iov_base = compat_ptr(buf); - iov->iov_len = (compat_size_t) len; - uvector++; - iov++; - } - ret = tot_len; - -out: - return ret; -} - struct compat_ncp_mount_data { compat_int_t version; compat_uint_t ncp_fd; @@ -744,653 +203,3 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, out: return retval; } - -struct compat_old_linux_dirent { - compat_ulong_t d_ino; - compat_ulong_t d_offset; - unsigned short d_namlen; - char d_name[1]; -}; - -struct compat_readdir_callback { - struct dir_context ctx; - struct compat_old_linux_dirent __user *dirent; - int result; -}; - -static int compat_fillonedir(struct dir_context *ctx, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int d_type) -{ - struct compat_readdir_callback *buf = - container_of(ctx, struct compat_readdir_callback, ctx); - struct compat_old_linux_dirent __user *dirent; - compat_ulong_t d_ino; - - if (buf->result) - return -EINVAL; - d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { - buf->result = -EOVERFLOW; - return -EOVERFLOW; - } - buf->result++; - dirent = buf->dirent; - if (!access_ok(VERIFY_WRITE, dirent, - (unsigned long)(dirent->d_name + namlen + 1) - - (unsigned long)dirent)) - goto efault; - if ( __put_user(d_ino, &dirent->d_ino) || - __put_user(offset, &dirent->d_offset) || - __put_user(namlen, &dirent->d_namlen) || - __copy_to_user(dirent->d_name, name, namlen) || - __put_user(0, dirent->d_name + namlen)) - goto efault; - return 0; -efault: - buf->result = -EFAULT; - return -EFAULT; -} - -COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, - struct compat_old_linux_dirent __user *, dirent, unsigned int, count) -{ - int error; - struct fd f = fdget_pos(fd); - struct compat_readdir_callback buf = { - .ctx.actor = compat_fillonedir, - .dirent = dirent - }; - - if (!f.file) - return -EBADF; - - error = iterate_dir(f.file, &buf.ctx); - if (buf.result) - error = buf.result; - - fdput_pos(f); - return error; -} - -struct compat_linux_dirent { - compat_ulong_t d_ino; - compat_ulong_t d_off; - unsigned short d_reclen; - char d_name[1]; -}; - -struct compat_getdents_callback { - struct dir_context ctx; - struct compat_linux_dirent __user *current_dir; - struct compat_linux_dirent __user *previous; - int count; - int error; -}; - -static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct compat_linux_dirent __user * dirent; - struct compat_getdents_callback *buf = - container_of(ctx, struct compat_getdents_callback, ctx); - compat_ulong_t d_ino; - int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + - namlen + 2, sizeof(compat_long_t)); - - buf->error = -EINVAL; /* only used if we fail.. */ - if (reclen > buf->count) - return -EINVAL; - d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { - buf->error = -EOVERFLOW; - return -EOVERFLOW; - } - dirent = buf->previous; - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user(d_ino, &dirent->d_ino)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) - goto efault; - if (__put_user(d_type, (char __user *) dirent + reclen - 1)) - goto efault; - buf->previous = dirent; - dirent = (void __user *)dirent + reclen; - buf->current_dir = dirent; - buf->count -= reclen; - return 0; -efault: - buf->error = -EFAULT; - return -EFAULT; -} - -COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, - struct compat_linux_dirent __user *, dirent, unsigned int, count) -{ - struct fd f; - struct compat_linux_dirent __user * lastdirent; - struct compat_getdents_callback buf = { - .ctx.actor = compat_filldir, - .current_dir = dirent, - .count = count - }; - int error; - - if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; - - f = fdget_pos(fd); - if (!f.file) - return -EBADF; - - error = iterate_dir(f.file, &buf.ctx); - if (error >= 0) - error = buf.error; - lastdirent = buf.previous; - if (lastdirent) { - if (put_user(buf.ctx.pos, &lastdirent->d_off)) - error = -EFAULT; - else - error = count - buf.count; - } - fdput_pos(f); - return error; -} - -#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 - -struct compat_getdents_callback64 { - struct dir_context ctx; - struct linux_dirent64 __user *current_dir; - struct linux_dirent64 __user *previous; - int count; - int error; -}; - -static int compat_filldir64(struct dir_context *ctx, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int d_type) -{ - struct linux_dirent64 __user *dirent; - struct compat_getdents_callback64 *buf = - container_of(ctx, struct compat_getdents_callback64, ctx); - int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, - sizeof(u64)); - u64 off; - - buf->error = -EINVAL; /* only used if we fail.. */ - if (reclen > buf->count) - return -EINVAL; - dirent = buf->previous; - - if (dirent) { - if (signal_pending(current)) - return -EINTR; - if (__put_user_unaligned(offset, &dirent->d_off)) - goto efault; - } - dirent = buf->current_dir; - if (__put_user_unaligned(ino, &dirent->d_ino)) - goto efault; - off = 0; - if (__put_user_unaligned(off, &dirent->d_off)) - goto efault; - if (__put_user(reclen, &dirent->d_reclen)) - goto efault; - if (__put_user(d_type, &dirent->d_type)) - goto efault; - if (copy_to_user(dirent->d_name, name, namlen)) - goto efault; - if (__put_user(0, dirent->d_name + namlen)) - goto efault; - buf->previous = dirent; - dirent = (void __user *)dirent + reclen; - buf->current_dir = dirent; - buf->count -= reclen; - return 0; -efault: - buf->error = -EFAULT; - return -EFAULT; -} - -COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, - struct linux_dirent64 __user *, dirent, unsigned int, count) -{ - struct fd f; - struct linux_dirent64 __user * lastdirent; - struct compat_getdents_callback64 buf = { - .ctx.actor = compat_filldir64, - .current_dir = dirent, - .count = count - }; - int error; - - if (!access_ok(VERIFY_WRITE, dirent, count)) - return -EFAULT; - - f = fdget_pos(fd); - if (!f.file) - return -EBADF; - - error = iterate_dir(f.file, &buf.ctx); - if (error >= 0) - error = buf.error; - lastdirent = buf.previous; - if (lastdirent) { - typeof(lastdirent->d_off) d_off = buf.ctx.pos; - if (__put_user_unaligned(d_off, &lastdirent->d_off)) - error = -EFAULT; - else - error = count - buf.count; - } - fdput_pos(f); - return error; -} -#endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ - -/* - * Exactly like fs/open.c:sys_open(), except that it doesn't set the - * O_LARGEFILE flag. - */ -COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) -{ - return do_sys_open(AT_FDCWD, filename, flags, mode); -} - -/* - * Exactly like fs/open.c:sys_openat(), except that it doesn't set the - * O_LARGEFILE flag. - */ -COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) -{ - return do_sys_open(dfd, filename, flags, mode); -} - -#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) - -static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, - int timeval, int ret) -{ - struct timespec ts; - - if (!p) - return ret; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - - /* No update for zero timeout */ - if (!end_time->tv_sec && !end_time->tv_nsec) - return ret; - - ktime_get_ts(&ts); - ts = timespec_sub(*end_time, ts); - if (ts.tv_sec < 0) - ts.tv_sec = ts.tv_nsec = 0; - - if (timeval) { - struct compat_timeval rtv; - - rtv.tv_sec = ts.tv_sec; - rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; - - if (!copy_to_user(p, &rtv, sizeof(rtv))) - return ret; - } else { - struct compat_timespec rts; - - rts.tv_sec = ts.tv_sec; - rts.tv_nsec = ts.tv_nsec; - - if (!copy_to_user(p, &rts, sizeof(rts))) - return ret; - } - /* - * If an application puts its timeval in read-only memory, we - * don't want the Linux-specific update to the timeval to - * cause a fault after the select has completed - * successfully. However, because we're not updating the - * timeval, we can't restart the system call. - */ - -sticky: - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - return ret; -} - -/* - * Ooo, nasty. We need here to frob 32-bit unsigned longs to - * 64-bit unsigned longs. - */ -static -int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, - unsigned long *fdset) -{ - nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); - if (ufdset) { - unsigned long odd; - - if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) - return -EFAULT; - - odd = nr & 1UL; - nr &= ~1UL; - while (nr) { - unsigned long h, l; - if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) - return -EFAULT; - ufdset += 2; - *fdset++ = h << 32 | l; - nr -= 2; - } - if (odd && __get_user(*fdset, ufdset)) - return -EFAULT; - } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, this makes sure that - * actually happens. - */ - memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); - } - return 0; -} - -static -int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, - unsigned long *fdset) -{ - unsigned long odd; - nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); - - if (!ufdset) - return 0; - - odd = nr & 1UL; - nr &= ~1UL; - while (nr) { - unsigned long h, l; - l = *fdset++; - h = l >> 32; - if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) - return -EFAULT; - ufdset += 2; - nr -= 2; - } - if (odd && __put_user(*fdset, ufdset)) - return -EFAULT; - return 0; -} - - -/* - * This is a virtual copy of sys_select from fs/select.c and probably - * should be compared to it from time to time - */ - -/* - * We can actually return ERESTARTSYS instead of EINTR, but I'd - * like to be certain this leads to no problems. So I return - * EINTR just for safety. - * - * Update: ERESTARTSYS breaks at least the xview clock binary, so - * I'm trying ERESTARTNOHAND which restart only when you want to. - */ -int compat_core_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct timespec *end_time) -{ - fd_set_bits fds; - void *bits; - int size, max_fds, ret = -EINVAL; - struct fdtable *fdt; - long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; - - if (n < 0) - goto out_nofds; - - /* max_fds can increase, so grab it once to avoid race */ - rcu_read_lock(); - fdt = files_fdtable(current->files); - max_fds = fdt->max_fds; - rcu_read_unlock(); - if (n > max_fds) - n = max_fds; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - size = FDS_BYTES(n); - bits = stack_fds; - if (size > sizeof(stack_fds) / 6) { - bits = kmalloc(6 * size, GFP_KERNEL); - ret = -ENOMEM; - if (!bits) - goto out_nofds; - } - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - if ((ret = compat_get_fd_set(n, inp, fds.in)) || - (ret = compat_get_fd_set(n, outp, fds.out)) || - (ret = compat_get_fd_set(n, exp, fds.ex))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, end_time); - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - if (compat_set_fd_set(n, inp, fds.res_in) || - compat_set_fd_set(n, outp, fds.res_out) || - compat_set_fd_set(n, exp, fds.res_ex)) - ret = -EFAULT; -out: - if (bits != stack_fds) - kfree(bits); -out_nofds: - return ret; -} - -COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, - compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, - struct compat_timeval __user *, tvp) -{ - struct timespec end_time, *to = NULL; - struct compat_timeval tv; - int ret; - - if (tvp) { - if (copy_from_user(&tv, tvp, sizeof(tv))) - return -EFAULT; - - to = &end_time; - if (poll_select_set_timeout(to, - tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), - (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) - return -EINVAL; - } - - ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); - - return ret; -} - -struct compat_sel_arg_struct { - compat_ulong_t n; - compat_uptr_t inp; - compat_uptr_t outp; - compat_uptr_t exp; - compat_uptr_t tvp; -}; - -COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) -{ - struct compat_sel_arg_struct a; - - if (copy_from_user(&a, arg, sizeof(a))) - return -EFAULT; - return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), - compat_ptr(a.exp), compat_ptr(a.tvp)); -} - -static long do_compat_pselect(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, - compat_size_t sigsetsize) -{ - compat_sigset_t ss32; - sigset_t ksigmask, sigsaved; - struct compat_timespec ts; - struct timespec end_time, *to = NULL; - int ret; - - if (tsp) { - if (copy_from_user(&ts, tsp, sizeof(ts))) - return -EFAULT; - - to = &end_time; - if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) - return -EINVAL; - } - - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) - return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); - - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} - -COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, - compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, - struct compat_timespec __user *, tsp, void __user *, sig) -{ - compat_size_t sigsetsize = 0; - compat_uptr_t up = 0; - - if (sig) { - if (!access_ok(VERIFY_READ, sig, - sizeof(compat_uptr_t)+sizeof(compat_size_t)) || - __get_user(up, (compat_uptr_t __user *)sig) || - __get_user(sigsetsize, - (compat_size_t __user *)(sig+sizeof(up)))) - return -EFAULT; - } - return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), - sigsetsize); -} - -COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, - unsigned int, nfds, struct compat_timespec __user *, tsp, - const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) -{ - compat_sigset_t ss32; - sigset_t ksigmask, sigsaved; - struct compat_timespec ts; - struct timespec end_time, *to = NULL; - int ret; - - if (tsp) { - if (copy_from_user(&ts, tsp, sizeof(ts))) - return -EFAULT; - - to = &end_time; - if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) - return -EINVAL; - } - - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) - return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_sys_poll(ufds, nfds, to); - - /* We can restart this syscall, usually */ - if (ret == -EINTR) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - ret = -ERESTARTNOHAND; - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); - - return ret; -} - -#ifdef CONFIG_FHANDLE -/* - * Exactly like fs/open.c:sys_open_by_handle_at(), except that it - * doesn't set the O_LARGEFILE flag. - */ -COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, - struct file_handle __user *, handle, int, flags) -{ - return do_handle_open(mountdirfd, handle, flags); -} -#endif diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 11d087b2b28edd79bf03b367d45bfd49bd210352..6116d5275a3ecc7f95d3e3009cca261f8b7c075d 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -833,7 +833,7 @@ static int compat_ioctl_preallocate(struct file *file, */ #define XFORM(i) (((i) ^ ((i) << 27) ^ ((i) << 17)) & 0xffffffff) -#define COMPATIBLE_IOCTL(cmd) XFORM(cmd), +#define COMPATIBLE_IOCTL(cmd) XFORM((u32)cmd), /* ioctl should not be warned about even if it's not implemented. Valid reasons to use this: - It is implemented with ->compat_ioctl on some device, but programs diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 8b2a994042ddeb2d1b8c00bbee3436e79fff52ad..a66f6624d89943997d1d35ead214aee85857e5d7 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item) } EXPORT_SYMBOL(config_item_get); +struct config_item *config_item_get_unless_zero(struct config_item *item) +{ + if (item && kref_get_unless_zero(&item->ci_kref)) + return item; + return NULL; +} +EXPORT_SYMBOL(config_item_get_unless_zero); + static void config_item_cleanup(struct config_item *item) { struct config_item_type *t = item->ci_type; diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index a6ab012a2c6acf9815bb8d4d1e29fb4568084b08..c8aabba502f6d7f019890bcdeba2fcdd7a56717f 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item, ret = -ENOMEM; sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { - sl->sl_target = config_item_get(item); spin_lock(&configfs_dirent_lock); if (target_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); - config_item_put(item); kfree(sl); return -ENOENT; } + sl->sl_target = config_item_get(item); list_add(&sl->sl_list, &target_sd->s_links); spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 37b49894c762344841117b2a0042c5e9ec8b7140..d1bb02b1ee589ecd68876ff240bf57e756a70466 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -159,6 +159,8 @@ static int fname_decrypt(struct inode *inode, static const char *lookup_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; +#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) + /** * digest_encode() - * @@ -230,11 +232,14 @@ EXPORT_SYMBOL(fscrypt_fname_encrypted_size); int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 ilen, struct fscrypt_str *crypto_str) { - unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen); + u32 olen = fscrypt_fname_encrypted_size(inode, ilen); + const u32 max_encoded_len = + max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), + 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); crypto_str->len = olen; - if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2) - olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2; + olen = max(olen, max_encoded_len); + /* * Allocated buffer can hold one more character to null-terminate the * string @@ -266,6 +271,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * The caller must have allocated sufficient memory for the @oname string. * + * If the key is available, we'll decrypt the disk name; otherwise, we'll encode + * it for presentation. Short names are directly base64-encoded, while long + * names are encoded in fscrypt_digested_name format. + * * Return: 0 on success, -errno on failure */ int fscrypt_fname_disk_to_usr(struct inode *inode, @@ -274,7 +283,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); - char buf[24]; + struct fscrypt_digested_name digested_name; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; @@ -289,20 +298,24 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, if (inode->i_crypt_info) return fname_decrypt(inode, iname, oname); - if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) { + if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { oname->len = digest_encode(iname->name, iname->len, oname->name); return 0; } if (hash) { - memcpy(buf, &hash, 4); - memcpy(buf + 4, &minor_hash, 4); + digested_name.hash = hash; + digested_name.minor_hash = minor_hash; } else { - memset(buf, 0, 8); + digested_name.hash = 0; + digested_name.minor_hash = 0; } - memcpy(buf + 8, iname->name + iname->len - 16, 16); + memcpy(digested_name.digest, + FSCRYPT_FNAME_DIGEST(iname->name, iname->len), + FSCRYPT_FNAME_DIGEST_SIZE); oname->name[0] = '_'; - oname->len = 1 + digest_encode(buf, 24, oname->name + 1); + oname->len = 1 + digest_encode((const char *)&digested_name, + sizeof(digested_name), oname->name + 1); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -336,10 +349,35 @@ int fscrypt_fname_usr_to_disk(struct inode *inode, } EXPORT_SYMBOL(fscrypt_fname_usr_to_disk); +/** + * fscrypt_setup_filename() - prepare to search a possibly encrypted directory + * @dir: the directory that will be searched + * @iname: the user-provided filename being searched for + * @lookup: 1 if we're allowed to proceed without the key because it's + * ->lookup() or we're finding the dir_entry for deletion; 0 if we cannot + * proceed without the key because we're going to create the dir_entry. + * @fname: the filename information to be filled in + * + * Given a user-provided filename @iname, this function sets @fname->disk_name + * to the name that would be stored in the on-disk directory entry, if possible. + * If the directory is unencrypted this is simply @iname. Else, if we have the + * directory's encryption key, then @iname is the plaintext, so we encrypt it to + * get the disk_name. + * + * Else, for keyless @lookup operations, @iname is the presented ciphertext, so + * we decode it to get either the ciphertext disk_name (for short names) or the + * fscrypt_digested_name (for long names). Non-@lookup operations will be + * impossible in this case, so we fail them with ENOKEY. + * + * If successful, fscrypt_free_filename() must be called later to clean up. + * + * Return: 0 on success, -errno on failure + */ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - int ret = 0, bigname = 0; + int ret; + int digested; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; @@ -373,25 +411,37 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, * We don't have the key and we are doing a lookup; decode the * user-supplied name */ - if (iname->name[0] == '_') - bigname = 1; - if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43))) - return -ENOENT; + if (iname->name[0] == '_') { + if (iname->len != + 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))) + return -ENOENT; + digested = 1; + } else { + if (iname->len > + BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)) + return -ENOENT; + digested = 0; + } - fname->crypto_buf.name = kmalloc(32, GFP_KERNEL); + fname->crypto_buf.name = + kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE, + sizeof(struct fscrypt_digested_name)), + GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = digest_decode(iname->name + bigname, iname->len - bigname, + ret = digest_decode(iname->name + digested, iname->len - digested, fname->crypto_buf.name); if (ret < 0) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; - if (bigname) { - memcpy(&fname->hash, fname->crypto_buf.name, 4); - memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4); + if (digested) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + fname->hash = n->hash; + fname->minor_hash = n->minor_hash; } else { fname->disk_name.name = fname->crypto_buf.name; fname->disk_name.len = fname->crypto_buf.len; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index e39696e644942a80d110035e2d49570840823af7..1e1f8a361b75479f3051cb924207f633cfbe8487 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -13,8 +13,6 @@ #include -#define FS_FNAME_CRYPTO_DIGEST_SIZE 32 - /* Encryption parameters */ #define FS_XTS_TWEAK_SIZE 16 #define FS_AES_128_ECB_KEY_SIZE 16 @@ -22,10 +20,6 @@ #define FS_AES_256_CBC_KEY_SIZE 32 #define FS_AES_256_CTS_KEY_SIZE 32 #define FS_AES_256_XTS_KEY_SIZE 64 -#define FS_MAX_KEY_SIZE 64 - -#define FS_KEY_DESC_PREFIX "fscrypt:" -#define FS_KEY_DESC_PREFIX_SIZE 8 #define FS_KEY_DERIVATION_NONCE_SIZE 16 @@ -51,13 +45,6 @@ struct fscrypt_context { #define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 -/* This is passed in from userspace into the kernel keyring */ -struct fscrypt_key { - u32 mode; - u8 raw[FS_MAX_KEY_SIZE]; - u32 size; -} __packed; - /* * A pointer to this structure is stored in the file system's in-core * representation of an inode. diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 8cdfddce2b34868f0cfe3f71da55d64187172a38..179e578b875b1abbc58fbb2d3fd634b3608243ad 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -183,9 +183,6 @@ int fscrypt_get_encryption_info(struct inode *inode) if (res) return res; - if (!inode->i_sb->s_cop->get_context) - return -EOPNOTSUPP; - res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { if (!fscrypt_dummy_context_enabled(inode) || diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 4908906d54d562263093cd5245fcb14e36d18b8e..210976e7a269ff0760cf81f6b1975632f9876862 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -34,9 +34,6 @@ static int create_encryption_context_from_policy(struct inode *inode, { struct fscrypt_context ctx; - if (!inode->i_sb->s_cop->set_context) - return -EOPNOTSUPP; - ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); @@ -87,8 +84,6 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) if (ret == -ENODATA) { if (!S_ISDIR(inode->i_mode)) ret = -ENOTDIR; - else if (!inode->i_sb->s_cop->empty_dir) - ret = -EOPNOTSUPP; else if (!inode->i_sb->s_cop->empty_dir(inode)) ret = -ENOTEMPTY; else @@ -118,8 +113,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) struct fscrypt_policy policy; int res; - if (!inode->i_sb->s_cop->get_context || - !inode->i_sb->s_cop->is_encrypted(inode)) + if (!inode->i_sb->s_cop->is_encrypted(inode)) return -ENODATA; res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); @@ -143,27 +137,61 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) } EXPORT_SYMBOL(fscrypt_ioctl_get_policy); +/** + * fscrypt_has_permitted_context() - is a file's encryption policy permitted + * within its directory? + * + * @parent: inode for parent directory + * @child: inode for file being looked up, opened, or linked into @parent + * + * Filesystems must call this before permitting access to an inode in a + * situation where the parent directory is encrypted (either before allowing + * ->lookup() to succeed, or for a regular file before allowing it to be opened) + * and before any operation that involves linking an inode into an encrypted + * directory, including link, rename, and cross rename. It enforces the + * constraint that within a given encrypted directory tree, all files use the + * same encryption policy. The pre-access check is needed to detect potentially + * malicious offline violations of this constraint, while the link and rename + * checks are needed to prevent online violations of this constraint. + * + * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail + * the filesystem operation with EPERM. + */ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { - struct fscrypt_info *parent_ci, *child_ci; + const struct fscrypt_operations *cops = parent->i_sb->s_cop; + const struct fscrypt_info *parent_ci, *child_ci; + struct fscrypt_context parent_ctx, child_ctx; int res; - if ((parent == NULL) || (child == NULL)) { - printk(KERN_ERR "parent %p child %p\n", parent, child); - BUG_ON(1); - } - /* No restrictions on file types which are never encrypted */ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && !S_ISLNK(child->i_mode)) return 1; - /* no restrictions if the parent directory is not encrypted */ - if (!parent->i_sb->s_cop->is_encrypted(parent)) + /* No restrictions if the parent directory is unencrypted */ + if (!cops->is_encrypted(parent)) return 1; - /* if the child directory is not encrypted, this is always a problem */ - if (!parent->i_sb->s_cop->is_encrypted(child)) + + /* Encrypted directories must not contain unencrypted files */ + if (!cops->is_encrypted(child)) return 0; + + /* + * Both parent and child are encrypted, so verify they use the same + * encryption policy. Compare the fscrypt_info structs if the keys are + * available, otherwise retrieve and compare the fscrypt_contexts. + * + * Note that the fscrypt_context retrieval will be required frequently + * when accessing an encrypted directory tree without the key. + * Performance-wise this is not a big deal because we already don't + * really optimize for file access without the key (to the extent that + * such access is even possible), given that any attempted access + * already causes a fscrypt_context retrieval and keyring search. + * + * In any case, if an unexpected error occurs, fall back to "forbidden". + */ + res = fscrypt_get_encryption_info(parent); if (res) return 0; @@ -172,17 +200,32 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) return 0; parent_ci = parent->i_crypt_info; child_ci = child->i_crypt_info; - if (!parent_ci && !child_ci) - return 1; - if (!parent_ci || !child_ci) + + if (parent_ci && child_ci) { + return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == + child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags); + } + + res = cops->get_context(parent, &parent_ctx, sizeof(parent_ctx)); + if (res != sizeof(parent_ctx)) return 0; - return (memcmp(parent_ci->ci_master_key, - child_ci->ci_master_key, - FS_KEY_DESCRIPTOR_SIZE) == 0 && - (parent_ci->ci_data_mode == child_ci->ci_data_mode) && - (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && - (parent_ci->ci_flags == child_ci->ci_flags)); + res = cops->get_context(child, &child_ctx, sizeof(child_ctx)); + if (res != sizeof(child_ctx)) + return 0; + + return memcmp(parent_ctx.master_key_descriptor, + child_ctx.master_key_descriptor, + FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ctx.contents_encryption_mode == + child_ctx.contents_encryption_mode) && + (parent_ctx.filenames_encryption_mode == + child_ctx.filenames_encryption_mode) && + (parent_ctx.flags == child_ctx.flags); } EXPORT_SYMBOL(fscrypt_has_permitted_context); @@ -202,9 +245,6 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child, struct fscrypt_info *ci; int res; - if (!parent->i_sb->s_cop->set_context) - return -EOPNOTSUPP; - res = fscrypt_get_encryption_info(parent); if (res < 0) return res; diff --git a/fs/dax.c b/fs/dax.c index 85abd741253d4b89a42c6c5f2b4dd7bcf456e843..2a6889b3585f068c73091d8895639b7e941d702a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -55,32 +55,6 @@ static int __init init_dax_wait_table(void) } fs_initcall(init_dax_wait_table); -static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax) -{ - struct request_queue *q = bdev->bd_queue; - long rc = -EIO; - - dax->addr = ERR_PTR(-EIO); - if (blk_queue_enter(q, true) != 0) - return rc; - - rc = bdev_direct_access(bdev, dax); - if (rc < 0) { - dax->addr = ERR_PTR(rc); - blk_queue_exit(q); - return rc; - } - return rc; -} - -static void dax_unmap_atomic(struct block_device *bdev, - const struct blk_dax_ctl *dax) -{ - if (IS_ERR(dax->addr)) - return; - blk_queue_exit(bdev->bd_queue); -} - static int dax_is_pmd_entry(void *entry) { return (unsigned long)entry & RADIX_DAX_PMD; @@ -101,26 +75,6 @@ static int dax_is_empty_entry(void *entry) return (unsigned long)entry & RADIX_DAX_EMPTY; } -struct page *read_dax_sector(struct block_device *bdev, sector_t n) -{ - struct page *page = alloc_pages(GFP_KERNEL, 0); - struct blk_dax_ctl dax = { - .size = PAGE_SIZE, - .sector = n & ~((((int) PAGE_SIZE) / 512) - 1), - }; - long rc; - - if (!page) - return ERR_PTR(-ENOMEM); - - rc = dax_map_atomic(bdev, &dax); - if (rc < 0) - return ERR_PTR(rc); - memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE); - dax_unmap_atomic(bdev, &dax); - return page; -} - /* * DAX radix tree locking */ @@ -506,35 +460,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) return ret; } -/* - * Invalidate exceptional DAX entry if easily possible. This handles DAX - * entries for invalidate_inode_pages() so we evict the entry only if we can - * do so without blocking. - */ -int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index) -{ - int ret = 0; - void *entry, **slot; - struct radix_tree_root *page_tree = &mapping->page_tree; - - spin_lock_irq(&mapping->tree_lock); - entry = __radix_tree_lookup(page_tree, index, NULL, &slot); - if (!entry || !radix_tree_exceptional_entry(entry) || - slot_locked(mapping, slot)) - goto out; - if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || - radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) - goto out; - radix_tree_delete(page_tree, index); - mapping->nrexceptional--; - ret = 1; -out: - spin_unlock_irq(&mapping->tree_lock); - if (ret) - dax_wake_mapping_entry_waiter(mapping, index, entry, true); - return ret; -} - /* * Invalidate exceptional DAX entry if it is clean. */ @@ -555,21 +480,25 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, static int dax_load_hole(struct address_space *mapping, void **entry, struct vm_fault *vmf) { + struct inode *inode = mapping->host; struct page *page; int ret; /* Hole page already exists? Return it... */ if (!radix_tree_exceptional_entry(*entry)) { page = *entry; - goto out; + goto finish_fault; } /* This will replace locked radix tree entry with a hole page */ page = find_or_create_page(mapping, vmf->pgoff, vmf->gfp_mask | __GFP_ZERO); - if (!page) - return VM_FAULT_OOM; - out: + if (!page) { + ret = VM_FAULT_OOM; + goto out; + } + +finish_fault: vmf->page = page; ret = finish_fault(vmf); vmf->page = NULL; @@ -577,26 +506,37 @@ static int dax_load_hole(struct address_space *mapping, void **entry, if (!ret) { /* Grab reference for PTE that is now referencing the page */ get_page(page); - return VM_FAULT_NOPAGE; + ret = VM_FAULT_NOPAGE; } +out: + trace_dax_load_hole(inode, vmf, ret); return ret; } -static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, - struct page *to, unsigned long vaddr) +static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, + sector_t sector, size_t size, struct page *to, + unsigned long vaddr) { - struct blk_dax_ctl dax = { - .sector = sector, - .size = size, - }; - void *vto; - - if (dax_map_atomic(bdev, &dax) < 0) - return PTR_ERR(dax.addr); + void *vto, *kaddr; + pgoff_t pgoff; + pfn_t pfn; + long rc; + int id; + + rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); + if (rc) + return rc; + + id = dax_read_lock(); + rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); + if (rc < 0) { + dax_read_unlock(id); + return rc; + } vto = kmap_atomic(to); - copy_user_page(vto, (void __force *)dax.addr, vaddr, to); + copy_user_page(vto, (void __force *)kaddr, vaddr, to); kunmap_atomic(vto); - dax_unmap_atomic(bdev, &dax); + dax_read_unlock(id); return 0; } @@ -764,12 +704,16 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, } static int dax_writeback_one(struct block_device *bdev, - struct address_space *mapping, pgoff_t index, void *entry) + struct dax_device *dax_dev, struct address_space *mapping, + pgoff_t index, void *entry) { struct radix_tree_root *page_tree = &mapping->page_tree; - struct blk_dax_ctl dax; - void *entry2, **slot; - int ret = 0; + void *entry2, **slot, *kaddr; + long ret = 0, id; + sector_t sector; + pgoff_t pgoff; + size_t size; + pfn_t pfn; /* * A page got tagged dirty in DAX mapping? Something is seriously @@ -818,26 +762,29 @@ static int dax_writeback_one(struct block_device *bdev, * 'entry'. This allows us to flush for PMD_SIZE and not have to * worry about partial PMD writebacks. */ - dax.sector = dax_radix_sector(entry); - dax.size = PAGE_SIZE << dax_radix_order(entry); + sector = dax_radix_sector(entry); + size = PAGE_SIZE << dax_radix_order(entry); + + id = dax_read_lock(); + ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); + if (ret) + goto dax_unlock; /* - * We cannot hold tree_lock while calling dax_map_atomic() because it - * eventually calls cond_resched(). + * dax_direct_access() may sleep, so cannot hold tree_lock over + * its invocation. */ - ret = dax_map_atomic(bdev, &dax); - if (ret < 0) { - put_locked_mapping_entry(mapping, index, entry); - return ret; - } + ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn); + if (ret < 0) + goto dax_unlock; - if (WARN_ON_ONCE(ret < dax.size)) { + if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) { ret = -EIO; - goto unmap; + goto dax_unlock; } - dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(dax.pfn)); - wb_cache_pmem(dax.addr, dax.size); + dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); + wb_cache_pmem(kaddr, size); /* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as @@ -847,8 +794,9 @@ static int dax_writeback_one(struct block_device *bdev, spin_lock_irq(&mapping->tree_lock); radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); spin_unlock_irq(&mapping->tree_lock); - unmap: - dax_unmap_atomic(bdev, &dax); + trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); + dax_unlock: + dax_read_unlock(id); put_locked_mapping_entry(mapping, index, entry); return ret; @@ -869,6 +817,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct inode *inode = mapping->host; pgoff_t start_index, end_index; pgoff_t indices[PAGEVEC_SIZE]; + struct dax_device *dax_dev; struct pagevec pvec; bool done = false; int i, ret = 0; @@ -879,9 +828,15 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; + dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + if (!dax_dev) + return -EIO; + start_index = wbc->range_start >> PAGE_SHIFT; end_index = wbc->range_end >> PAGE_SHIFT; + trace_dax_writeback_range(inode, start_index, end_index); + tag_pages_for_writeback(mapping, start_index, end_index); pagevec_init(&pvec, 0); @@ -899,38 +854,50 @@ int dax_writeback_mapping_range(struct address_space *mapping, break; } - ret = dax_writeback_one(bdev, mapping, indices[i], - pvec.pages[i]); + ret = dax_writeback_one(bdev, dax_dev, mapping, + indices[i], pvec.pages[i]); if (ret < 0) - return ret; + goto out; } } - return 0; +out: + put_dax(dax_dev); + trace_dax_writeback_range_done(inode, start_index, end_index); + return (ret < 0 ? ret : 0); } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); static int dax_insert_mapping(struct address_space *mapping, - struct block_device *bdev, sector_t sector, size_t size, - void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf) + struct block_device *bdev, struct dax_device *dax_dev, + sector_t sector, size_t size, void **entryp, + struct vm_area_struct *vma, struct vm_fault *vmf) { unsigned long vaddr = vmf->address; - struct blk_dax_ctl dax = { - .sector = sector, - .size = size, - }; - void *ret; void *entry = *entryp; + void *ret, *kaddr; + pgoff_t pgoff; + int id, rc; + pfn_t pfn; + + rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); + if (rc) + return rc; - if (dax_map_atomic(bdev, &dax) < 0) - return PTR_ERR(dax.addr); - dax_unmap_atomic(bdev, &dax); + id = dax_read_lock(); + rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); + if (rc < 0) { + dax_read_unlock(id); + return rc; + } + dax_read_unlock(id); - ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector, 0); + ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0); if (IS_ERR(ret)) return PTR_ERR(ret); *entryp = ret; - return vm_insert_mixed(vma, vaddr, dax.pfn); + trace_dax_insert_mapping(mapping->host, vmf, ret); + return vm_insert_mixed(vma, vaddr, pfn); } /** @@ -941,6 +908,7 @@ int dax_pfn_mkwrite(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; void *entry, **slot; pgoff_t index = vmf->pgoff; @@ -950,6 +918,7 @@ int dax_pfn_mkwrite(struct vm_fault *vmf) if (entry) put_unlocked_mapping_entry(mapping, index, entry); spin_unlock_irq(&mapping->tree_lock); + trace_dax_pfn_mkwrite_no_entry(inode, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); @@ -962,6 +931,7 @@ int dax_pfn_mkwrite(struct vm_fault *vmf) */ finish_mkwrite_fault(vmf); put_locked_mapping_entry(mapping, index, entry); + trace_dax_pfn_mkwrite(inode, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); @@ -979,24 +949,34 @@ static bool dax_range_is_aligned(struct block_device *bdev, return true; } -int __dax_zero_page_range(struct block_device *bdev, sector_t sector, - unsigned int offset, unsigned int length) +int __dax_zero_page_range(struct block_device *bdev, + struct dax_device *dax_dev, sector_t sector, + unsigned int offset, unsigned int size) { - struct blk_dax_ctl dax = { - .sector = sector, - .size = PAGE_SIZE, - }; - - if (dax_range_is_aligned(bdev, offset, length)) { - sector_t start_sector = dax.sector + (offset >> 9); + if (dax_range_is_aligned(bdev, offset, size)) { + sector_t start_sector = sector + (offset >> 9); return blkdev_issue_zeroout(bdev, start_sector, - length >> 9, GFP_NOFS, true); + size >> 9, GFP_NOFS, 0); } else { - if (dax_map_atomic(bdev, &dax) < 0) - return PTR_ERR(dax.addr); - clear_pmem(dax.addr + offset, length); - dax_unmap_atomic(bdev, &dax); + pgoff_t pgoff; + long rc, id; + void *kaddr; + pfn_t pfn; + + rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); + if (rc) + return rc; + + id = dax_read_lock(); + rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, + &pfn); + if (rc < 0) { + dax_read_unlock(id); + return rc; + } + clear_pmem(kaddr + offset, size); + dax_read_unlock(id); } return 0; } @@ -1011,9 +991,12 @@ static loff_t dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap) { + struct block_device *bdev = iomap->bdev; + struct dax_device *dax_dev = iomap->dax_dev; struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; + int id; if (iov_iter_rw(iter) == READ) { end = min(end, i_size_read(inode)); @@ -1032,40 +1015,48 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * into page tables. We have to tear down these mappings so that data * written by write(2) is visible in mmap. */ - if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { + if (iomap->flags & IOMAP_F_NEW) { invalidate_inode_pages2_range(inode->i_mapping, pos >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); } + id = dax_read_lock(); while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); - struct blk_dax_ctl dax = { 0 }; + const size_t size = ALIGN(length + offset, PAGE_SIZE); + const sector_t sector = dax_iomap_sector(iomap, pos); ssize_t map_len; + pgoff_t pgoff; + void *kaddr; + pfn_t pfn; if (fatal_signal_pending(current)) { ret = -EINTR; break; } - dax.sector = dax_iomap_sector(iomap, pos); - dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; - map_len = dax_map_atomic(iomap->bdev, &dax); + ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); + if (ret) + break; + + map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), + &kaddr, &pfn); if (map_len < 0) { ret = map_len; break; } - dax.addr += offset; + map_len = PFN_PHYS(map_len); + kaddr += offset; map_len -= offset; if (map_len > end - pos) map_len = end - pos; if (iov_iter_rw(iter) == WRITE) - map_len = copy_from_iter_pmem(dax.addr, map_len, iter); + map_len = copy_from_iter_pmem(kaddr, map_len, iter); else - map_len = copy_to_iter(dax.addr, map_len, iter); - dax_unmap_atomic(iomap->bdev, &dax); + map_len = copy_to_iter(kaddr, map_len, iter); if (map_len <= 0) { ret = map_len ? map_len : -EFAULT; break; @@ -1075,6 +1066,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, length -= map_len; done += map_len; } + dax_read_unlock(id); return done ? done : ret; } @@ -1142,34 +1134,50 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, int vmf_ret = 0; void *entry; + trace_dax_pte_fault(inode, vmf, vmf_ret); /* * Check whether offset isn't beyond end of file now. Caller is supposed * to hold locks serializing us with truncate / punch hole so this is * a reliable test. */ - if (pos >= i_size_read(inode)) - return VM_FAULT_SIGBUS; + if (pos >= i_size_read(inode)) { + vmf_ret = VM_FAULT_SIGBUS; + goto out; + } if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) flags |= IOMAP_WRITE; + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); + if (IS_ERR(entry)) { + vmf_ret = dax_fault_return(PTR_ERR(entry)); + goto out; + } + + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PMD fault that overlaps with + * the PTE we need to set up. If so just return and the fault will be + * retried. + */ + if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { + vmf_ret = VM_FAULT_NOPAGE; + goto unlock_entry; + } + /* * Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means * that we never have to deal with more than a single extent here. */ error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); - if (error) - return dax_fault_return(error); - if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { - vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ - goto finish_iomap; + if (error) { + vmf_ret = dax_fault_return(error); + goto unlock_entry; } - - entry = grab_mapping_entry(mapping, vmf->pgoff, 0); - if (IS_ERR(entry)) { - vmf_ret = dax_fault_return(PTR_ERR(entry)); - goto finish_iomap; + if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { + error = -EIO; /* fs corruption? */ + goto error_finish_iomap; } sector = dax_iomap_sector(&iomap, pos); @@ -1181,8 +1189,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, clear_user_highpage(vmf->cow_page, vaddr); break; case IOMAP_MAPPED: - error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE, - vmf->cow_page, vaddr); + error = copy_user_dax(iomap.bdev, iomap.dax_dev, + sector, PAGE_SIZE, vmf->cow_page, vaddr); break; default: WARN_ON_ONCE(1); @@ -1191,13 +1199,13 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, } if (error) - goto error_unlock_entry; + goto error_finish_iomap; __SetPageUptodate(vmf->cow_page); vmf_ret = finish_fault(vmf); if (!vmf_ret) vmf_ret = VM_FAULT_DONE_COW; - goto unlock_entry; + goto finish_iomap; } switch (iomap.type) { @@ -1207,8 +1215,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT); major = VM_FAULT_MAJOR; } - error = dax_insert_mapping(mapping, iomap.bdev, sector, - PAGE_SIZE, &entry, vmf->vma, vmf); + error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev, + sector, PAGE_SIZE, &entry, vmf->vma, vmf); /* -EBUSY is fine, somebody else faulted on the same PTE */ if (error == -EBUSY) error = 0; @@ -1217,7 +1225,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, case IOMAP_HOLE: if (!(vmf->flags & FAULT_FLAG_WRITE)) { vmf_ret = dax_load_hole(mapping, &entry, vmf); - goto unlock_entry; + goto finish_iomap; } /*FALLTHRU*/ default: @@ -1226,10 +1234,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, break; } - error_unlock_entry: + error_finish_iomap: vmf_ret = dax_fault_return(error) | major; - unlock_entry: - put_locked_mapping_entry(mapping, vmf->pgoff, entry); finish_iomap: if (ops->iomap_end) { int copied = PAGE_SIZE; @@ -1244,6 +1250,10 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, */ ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); } + unlock_entry: + put_locked_mapping_entry(mapping, vmf->pgoff, entry); + out: + trace_dax_pte_fault_done(inode, vmf, vmf_ret); return vmf_ret; } @@ -1258,41 +1268,48 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap, loff_t pos, void **entryp) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; + const sector_t sector = dax_iomap_sector(iomap, pos); + struct dax_device *dax_dev = iomap->dax_dev; struct block_device *bdev = iomap->bdev; struct inode *inode = mapping->host; - struct blk_dax_ctl dax = { - .sector = dax_iomap_sector(iomap, pos), - .size = PMD_SIZE, - }; - long length = dax_map_atomic(bdev, &dax); - void *ret = NULL; - - if (length < 0) /* dax_map_atomic() failed */ + const size_t size = PMD_SIZE; + void *ret = NULL, *kaddr; + long length = 0; + pgoff_t pgoff; + pfn_t pfn; + int id; + + if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0) goto fallback; - if (length < PMD_SIZE) - goto unmap_fallback; - if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) - goto unmap_fallback; - if (!pfn_t_devmap(dax.pfn)) - goto unmap_fallback; - - dax_unmap_atomic(bdev, &dax); - ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector, + id = dax_read_lock(); + length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn); + if (length < 0) + goto unlock_fallback; + length = PFN_PHYS(length); + + if (length < size) + goto unlock_fallback; + if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) + goto unlock_fallback; + if (!pfn_t_devmap(pfn)) + goto unlock_fallback; + dax_read_unlock(id); + + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, sector, RADIX_DAX_PMD); if (IS_ERR(ret)) goto fallback; *entryp = ret; - trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret); + trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret); return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, - dax.pfn, vmf->flags & FAULT_FLAG_WRITE); + pfn, vmf->flags & FAULT_FLAG_WRITE); - unmap_fallback: - dax_unmap_atomic(bdev, &dax); +unlock_fallback: + dax_read_unlock(id); fallback: - trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, - dax.pfn, ret); + trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret); return VM_FAULT_FALLBACK; } @@ -1381,6 +1398,28 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, if ((pgoff | PG_PMD_COLOUR) > max_pgoff) goto fallback; + /* + * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX + * PMD or a HZP entry. If it can't (because a 4k page is already in + * the tree, for instance), it will return -EEXIST and we just fall + * back to 4k entries. + */ + entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto fallback; + + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PTE fault that overlaps with + * the PMD we need to set up. If so just return and the fault will be + * retried. + */ + if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && + !pmd_devmap(*vmf->pmd)) { + result = 0; + goto unlock_entry; + } + /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way @@ -1389,21 +1428,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pos = (loff_t)pgoff << PAGE_SHIFT; error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); if (error) - goto fallback; + goto unlock_entry; if (iomap.offset + iomap.length < pos + PMD_SIZE) goto finish_iomap; - /* - * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX - * PMD or a HZP entry. If it can't (because a 4k page is already in - * the tree, for instance), it will return -EEXIST and we just fall - * back to 4k entries. - */ - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); - if (IS_ERR(entry)) - goto finish_iomap; - switch (iomap.type) { case IOMAP_MAPPED: result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); @@ -1411,7 +1440,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, case IOMAP_UNWRITTEN: case IOMAP_HOLE: if (WARN_ON_ONCE(write)) - goto unlock_entry; + break; result = dax_pmd_load_hole(vmf, &iomap, &entry); break; default: @@ -1419,8 +1448,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, break; } - unlock_entry: - put_locked_mapping_entry(mapping, pgoff, entry); finish_iomap: if (ops->iomap_end) { int copied = PMD_SIZE; @@ -1436,6 +1463,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, &iomap); } + unlock_entry: + put_locked_mapping_entry(mapping, pgoff, entry); fallback: if (result == VM_FAULT_FALLBACK) { split_huge_pmd(vma, vmf->pmd, vmf->address); diff --git a/fs/dcache.c b/fs/dcache.c index 95d71eda81420a506c5a1f4a4f5283b31310d5e4..a9f995f6859eb19ea3c5cddac19d86512ccf1d1e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -419,6 +419,8 @@ static void dentry_lru_add(struct dentry *dentry) { if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) d_lru_add(dentry); + else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) + dentry->d_flags |= DCACHE_REFERENCED; } /** @@ -779,8 +781,6 @@ void dput(struct dentry *dentry) goto kill_it; } - if (!(dentry->d_flags & DCACHE_REFERENCED)) - dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; @@ -1494,7 +1494,7 @@ static void check_and_drop(void *_data) { struct detach_data *data = _data; - if (!data->mountpoint && !data->select.found) + if (!data->mountpoint && list_empty(&data->select.dispose)) __d_drop(data->select.start); } @@ -1536,17 +1536,15 @@ void d_invalidate(struct dentry *dentry) d_walk(dentry, &data, detach_and_collect, check_and_drop); - if (data.select.found) + if (!list_empty(&data.select.dispose)) shrink_dentry_list(&data.select.dispose); + else if (!data.mountpoint) + return; if (data.mountpoint) { detach_mounts(data.mountpoint); dput(data.mountpoint); } - - if (!data.mountpoint && !data.select.found) - break; - cond_resched(); } } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 7fd4ec4bb21406b1d427f24ee7789c54b62c335b..e892ae7d89f8bc078848d8752affa1598a7e1e11 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -199,7 +199,7 @@ static const struct dentry_operations debugfs_dops = { static int debug_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr debug_files[] = {{""}}; + static const struct tree_descr debug_files[] = {{""}}; struct debugfs_fs_info *fsi; int err; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 95c1c8d3453922be63c7c0d13e2bd8acd18d7e12..9c351bf757b20e037f39aeadf0fa0ed12f963db6 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -349,7 +349,6 @@ struct ecryptfs_mount_crypt_stat { struct ecryptfs_sb_info { struct super_block *wsi_sb; struct ecryptfs_mount_crypt_stat mount_crypt_stat; - struct backing_dev_info bdi; }; /* file private data. */ diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 151872dcc1f402ef47273b6da96a5d15a3f251ba..9014479d01600be356b87c284c675ada55b6a723 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -519,12 +519,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs"); + rc = super_setup_bdi(s); if (rc) goto out1; ecryptfs_set_superblock_private(s, sbi); - s->s_bdi = &sbi->bdi; /* ->kill_sb() will take care of sbi after that point */ sbi = NULL; @@ -633,7 +632,6 @@ static void ecryptfs_kill_block_super(struct super_block *sb) if (!sb_info) return; ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); - bdi_destroy(&sb_info->bdi); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 341251421ced00ab1be854c4145cf408cfc93c2f..5420767c9b686a7a9db30bbf932e85071c83c9f6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -42,6 +42,7 @@ #include #include #include +#include /* * LOCKING: @@ -224,6 +225,11 @@ struct eventpoll { /* used to optimize loop detection check */ int visited; struct list_head visited_list_link; + +#ifdef CONFIG_NET_RX_BUSY_POLL + /* used to track busy poll napi_id */ + unsigned int napi_id; +#endif }; /* Wait structure used by the poll hooks */ @@ -384,6 +390,77 @@ static inline int ep_events_available(struct eventpoll *ep) return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; } +#ifdef CONFIG_NET_RX_BUSY_POLL +static bool ep_busy_loop_end(void *p, unsigned long start_time) +{ + struct eventpoll *ep = p; + + return ep_events_available(ep) || busy_loop_timeout(start_time); +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + +/* + * Busy poll if globally on and supporting sockets found && no events, + * busy loop will return if need_resched or ep_events_available. + * + * we must do our busy polling with irqs enabled + */ +static void ep_busy_loop(struct eventpoll *ep, int nonblock) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id = READ_ONCE(ep->napi_id); + + if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) + napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep); +#endif +} + +static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + if (ep->napi_id) + ep->napi_id = 0; +#endif +} + +/* + * Set epoll busy poll NAPI ID from sk. + */ +static inline void ep_set_busy_poll_napi_id(struct epitem *epi) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + struct eventpoll *ep; + unsigned int napi_id; + struct socket *sock; + struct sock *sk; + int err; + + if (!net_busy_loop_on()) + return; + + sock = sock_from_file(epi->ffd.file, &err); + if (!sock) + return; + + sk = sock->sk; + if (!sk) + return; + + napi_id = READ_ONCE(sk->sk_napi_id); + ep = epi->ep; + + /* Non-NAPI IDs can be rejected + * or + * Nothing to do if we already have this ID + */ + if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id) + return; + + /* record NAPI ID for use in next busy poll */ + ep->napi_id = napi_id; +#endif +} + /** * ep_call_nested - Perform a bound (possibly) nested call, by checking * that the recursion limit is not exceeded, and that @@ -1022,6 +1099,8 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k spin_lock_irqsave(&ep->lock, flags); + ep_set_busy_poll_napi_id(epi); + /* * If the event mask does not contain any poll(2) event, we consider the * descriptor to be disabled. This condition is likely the effect of the @@ -1363,6 +1442,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* We have to drop the new item inside our item list to keep track of it */ spin_lock_irqsave(&ep->lock, flags); + /* record NAPI ID of new item if present */ + ep_set_busy_poll_napi_id(epi); + /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); @@ -1637,9 +1719,20 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } fetch_events: + + if (!ep_events_available(ep)) + ep_busy_loop(ep, timed_out); + spin_lock_irqsave(&ep->lock, flags); if (!ep_events_available(ep)) { + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ + ep_reset_busy_poll_napi_id(ep); + /* * We don't have any available event to return to the caller. * We need to sleep here, and we will be wake up by diff --git a/fs/exec.c b/fs/exec.c index 65145a3df065192345c66ebc311d464fe09a6f29..72934df6847150ba50dfbadad78fe10e01d2eadd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); + arch_setup_new_exec(); perf_event_exec(); __set_task_comm(current, kbasename(bprm->filename), true); diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 42f9a0a0c4caf09722bc69fa278d509a7b7f16b3..8eeb694332fe811e820ed342ebbecc25bb678a1f 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -405,8 +405,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de, int err; lock_page(page); - err = exofs_write_begin(NULL, page->mapping, pos, len, - AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + err = exofs_write_begin(NULL, page->mapping, pos, len, 0, &page, NULL); if (err) EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n", err); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 2e86086bc9403efe99a25ab0df439db0c714eb4e..5dc392404559b033445a38a0f25bfacfd41a5c9c 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -64,7 +64,6 @@ struct exofs_dev { * our extension to the in-memory superblock */ struct exofs_sb_info { - struct backing_dev_info bdi; /* register our bdi with VFS */ struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ int s_timeout; /* timeout for OSD operations */ uint64_t s_nextid; /* highest object ID used */ diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 1076a4233b3962e34df904f55f9923dbdec2b48f..819624cfc8da4c7bad401ee26611d69b7ff38f98 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -464,7 +464,6 @@ static void exofs_put_super(struct super_block *sb) sbi->one_comp.obj.partition); exofs_sysfs_sb_del(sbi); - bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); sb->s_fs_info = NULL; } @@ -809,8 +808,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) __sbi_read_stats(sbi); /* set up operation vectors */ - sbi->bdi.ra_pages = __ra_pages(&sbi->layout); - sb->s_bdi = &sbi->bdi; + ret = super_setup_bdi(sb); + if (ret) { + EXOFS_DBGMSG("Failed to super_setup_bdi\n"); + goto free_sbi; + } + sb->s_bdi->ra_pages = __ra_pages(&sbi->layout); sb->s_fs_info = sbi; sb->s_op = &exofs_sops; sb->s_export_op = &exofs_export_ops; @@ -836,14 +839,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - ret = bdi_setup_and_register(&sbi->bdi, "exofs"); - if (ret) { - EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); - dput(sb->s_root); - sb->s_root = NULL; - goto free_sbi; - } - exofs_sysfs_dbg_print(); _exofs_print_device("Mounting", opts->dev_name, ore_comp_dev(&sbi->oc, 0), diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 5e64de9c5093fa5d3a3e361305f8c648284c4e24..03f5ce1d3dbe1c334958c1de072ff88ce1cdeb8a 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -779,7 +779,6 @@ extern void ext2_evict_inode(struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); -extern void ext2_get_inode_flags(struct ext2_inode_info *); extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); @@ -796,7 +795,8 @@ void ext2_error(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ext2_msg(struct super_block *, const char *, const char *, ...); extern void ext2_update_dynamic_rev (struct super_block *sb); -extern void ext2_write_super (struct super_block *); +extern void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait); /* * Inodes and files operations diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 128cce5406455716172a88a50cbeddd6edf046f2..2dcbd56988843ccdfb017c9b14ca8f5b05a31b62 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -799,6 +799,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock, static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap) { + struct block_device *bdev; unsigned int blkbits = inode->i_blkbits; unsigned long first_block = offset >> blkbits; unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; @@ -812,8 +813,13 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return ret; iomap->flags = 0; - iomap->bdev = inode->i_sb->s_bdev; + bdev = inode->i_sb->s_bdev; + iomap->bdev = bdev; iomap->offset = (u64)first_block << blkbits; + if (blk_queue_dax(bdev->bd_queue)) + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); + else + iomap->dax_dev = NULL; if (ret == 0) { iomap->type = IOMAP_HOLE; @@ -835,6 +841,7 @@ static int ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { + fs_put_dax(iomap->dax_dev); if (iomap->type == IOMAP_MAPPED && written < length && (flags & IOMAP_WRITE)) @@ -1384,25 +1391,6 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_DAX; } -/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ -void ext2_get_inode_flags(struct ext2_inode_info *ei) -{ - unsigned int flags = ei->vfs_inode.i_flags; - - ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| - EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); - if (flags & S_SYNC) - ei->i_flags |= EXT2_SYNC_FL; - if (flags & S_APPEND) - ei->i_flags |= EXT2_APPEND_FL; - if (flags & S_IMMUTABLE) - ei->i_flags |= EXT2_IMMUTABLE_FL; - if (flags & S_NOATIME) - ei->i_flags |= EXT2_NOATIME_FL; - if (flags & S_DIRSYNC) - ei->i_flags |= EXT2_DIRSYNC_FL; -} - struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; @@ -1563,7 +1551,6 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) if (ei->i_state & EXT2_STATE_NEW) memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); - ext2_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if (!(test_opt(sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); @@ -1615,7 +1602,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) EXT2_SET_RO_COMPAT_FEATURE(sb, EXT2_FEATURE_RO_COMPAT_LARGE_FILE); spin_unlock(&EXT2_SB(sb)->s_lock); - ext2_write_super(sb); + ext2_sync_super(sb, EXT2_SB(sb)->s_es, 1); } } } diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 191e02b28ce8ff0329646313a7e8445b36ef7b93..087f122cca42063a796367364e09874581fba90d 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -29,7 +29,6 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case EXT2_IOC_GETFLAGS: - ext2_get_inode_flags(ei); flags = ei->i_flags & EXT2_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT2_IOC_SETFLAGS: { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9e25a71fe1a27e584ab3754f0a70b26e94f9a44c..9c2028b50e5c389ed6f86a89d9bf94960fcfc8cb 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -32,12 +32,12 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" -static void ext2_sync_super(struct super_block *sb, - struct ext2_super_block *es, int wait); +static void ext2_write_super(struct super_block *sb); static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); @@ -123,13 +123,29 @@ void ext2_update_dynamic_rev(struct super_block *sb) */ } +#ifdef CONFIG_QUOTA +static int ext2_quota_off(struct super_block *sb, int type); + +static void ext2_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + ext2_quota_off(sb, type); +} +#else +static inline void ext2_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void ext2_put_super (struct super_block * sb) { int db_count; int i; struct ext2_sb_info *sbi = EXT2_SB(sb); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + ext2_quota_off_umount(sb); if (sbi->s_mb_cache) { ext2_xattr_destroy_cache(sbi->s_mb_cache); @@ -314,10 +330,23 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static int ext2_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path); static struct dquot **ext2_get_dquots(struct inode *inode) { return EXT2_I(inode)->i_dquot; } + +static const struct quotactl_ops ext2_quotactl_ops = { + .quota_on = ext2_quota_on, + .quota_off = ext2_quota_off, + .quota_sync = dquot_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; #endif static const struct super_operations ext2_sops = { @@ -1117,7 +1146,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; - sb->s_qcop = &dquot_quotactl_ops; + sb->s_qcop = &ext2_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif @@ -1194,8 +1223,8 @@ static void ext2_clear_super_error(struct super_block *sb) } } -static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, - int wait) +void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait) { ext2_clear_super_error(sb); spin_lock(&EXT2_SB(sb)->s_lock); @@ -1270,7 +1299,7 @@ static int ext2_unfreeze(struct super_block *sb) return 0; } -void ext2_write_super(struct super_block *sb) +static void ext2_write_super(struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) ext2_sync_fs(sb, 1); @@ -1548,6 +1577,51 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, return len - towrite; } +static int ext2_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + int err; + struct inode *inode; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + inode_lock(inode); + EXT2_I(inode)->i_flags |= EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); + + return 0; +} + +static int ext2_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + EXT2_I(inode)->i_flags &= ~(EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} + #endif static struct file_system_type ext2_fs_type = { diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 354103f3490c3cba719b64a8d0f297200fde67e6..d9beca1653c597bcc9f48835e766332124f07cc3 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -4,11 +4,11 @@ obj-$(CONFIG_EXT4_FS) += ext4.o -ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ - mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ - xattr_trusted.o inline.o readpage.o sysfs.o +ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ + extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \ + indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \ + mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \ + super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index fd389935ecd1629f402128518f63fd7d851e2a84..3ec0e46de95fc8dece943c4d753eb401e6343cad 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -4,6 +4,7 @@ * Copyright (C) 2001-2003 Andreas Gruenbacher, */ +#include #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle_t *handle; int error, retries = 0; + error = dquot_initialize(inode); + if (error) + return error; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, ext4_jbd2_credits_xattr(inode)); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fb69ee2388dba0b83b4f29c57b37ec5cb0aefb34..32191548abed3b02eb107bfd5a23196f40200a70 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2356,17 +2356,16 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, void *buf, int buf_size, struct ext4_filename *fname, struct ext4_dir_entry_2 **dest_de); -int ext4_insert_dentry(struct inode *dir, - struct inode *inode, - struct ext4_dir_entry_2 *de, - int buf_size, - struct ext4_filename *fname); +void ext4_insert_dentry(struct inode *inode, + struct ext4_dir_entry_2 *de, + int buf_size, + struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { if (!ext4_has_feature_dir_index(inode->i_sb)) ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); } -static unsigned char ext4_filetype_table[] = { +static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; @@ -2477,7 +2476,6 @@ extern int ext4_truncate(struct inode *); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); -extern void ext4_get_inode_flags(struct ext4_inode_info *); extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); @@ -2525,7 +2523,6 @@ extern int ext4_search_dir(struct buffer_head *bh, int buf_size, struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir); extern int ext4_generic_delete_entry(handle_t *handle, @@ -3009,7 +3006,6 @@ extern int htree_inlinedir_to_tree(struct file *dir_file, int *has_inline_data); extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *has_inline_data); extern int ext4_delete_inline_entry(handle_t *handle, @@ -3051,7 +3047,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle, struct inode *inode, struct buffer_head *bh); #define S_SHIFT 12 -static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { +static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2a97dff87b961771383c33e97dc57997eb6586fb..3e36508610b796b6612cc1e670a0fc2c8c3c282a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; - struct ext4_extent zero_ex; + struct ext4_extent zero_ex1, zero_ex2; struct ext4_extent *ex, *abut_ex; ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth, map_len = map->m_len; int allocated = 0, max_zeroout = 0; int err = 0; - int split_flag = 0; + int split_flag = EXT4_EXT_DATA_VALID2; ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, @@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - zero_ex.ee_len = 0; + zero_ex1.ee_len = 0; + zero_ex2.ee_len = 0; trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); @@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (ext4_encrypted_inode(inode)) max_zeroout = 0; - /* If extent is less than s_max_zeroout_kb, zeroout directly */ - if (max_zeroout && (ee_len <= max_zeroout)) { - err = ext4_ext_zeroout(inode, ex); - if (err) - goto out; - zero_ex.ee_block = ex->ee_block; - zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)); - ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; - ext4_ext_mark_initialized(ex); - ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - goto out; - } - /* - * four cases: + * five cases: * 1. split the extent into three extents. - * 2. split the extent into two extents, zeroout the first half. - * 3. split the extent into two extents, zeroout the second half. + * 2. split the extent into two extents, zeroout the head of the first + * extent. + * 3. split the extent into two extents, zeroout the tail of the second + * extent. * 4. split the extent into two extents with out zeroout. + * 5. no splitting needed, just possibly zeroout the head and / or the + * tail of the extent. */ split_map.m_lblk = map->m_lblk; split_map.m_len = map->m_len; - if (max_zeroout && (allocated > map->m_len)) { + if (max_zeroout && (allocated > split_map.m_len)) { if (allocated <= max_zeroout) { - /* case 3 */ - zero_ex.ee_block = - cpu_to_le32(map->m_lblk); - zero_ex.ee_len = cpu_to_le16(allocated); - ext4_ext_store_pblock(&zero_ex, - ext4_ext_pblock(ex) + map->m_lblk - ee_block); - err = ext4_ext_zeroout(inode, &zero_ex); + /* case 3 or 5 */ + zero_ex1.ee_block = + cpu_to_le32(split_map.m_lblk + + split_map.m_len); + zero_ex1.ee_len = + cpu_to_le16(allocated - split_map.m_len); + ext4_ext_store_pblock(&zero_ex1, + ext4_ext_pblock(ex) + split_map.m_lblk + + split_map.m_len - ee_block); + err = ext4_ext_zeroout(inode, &zero_ex1); if (err) goto out; - split_map.m_lblk = map->m_lblk; split_map.m_len = allocated; - } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { - /* case 2 */ - if (map->m_lblk != ee_block) { - zero_ex.ee_block = ex->ee_block; - zero_ex.ee_len = cpu_to_le16(map->m_lblk - + } + if (split_map.m_lblk - ee_block + split_map.m_len < + max_zeroout) { + /* case 2 or 5 */ + if (split_map.m_lblk != ee_block) { + zero_ex2.ee_block = ex->ee_block; + zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk - ee_block); - ext4_ext_store_pblock(&zero_ex, + ext4_ext_store_pblock(&zero_ex2, ext4_ext_pblock(ex)); - err = ext4_ext_zeroout(inode, &zero_ex); + err = ext4_ext_zeroout(inode, &zero_ex2); if (err) goto out; } + split_map.m_len += split_map.m_lblk - ee_block; split_map.m_lblk = ee_block; - split_map.m_len = map->m_lblk - ee_block + map->m_len; allocated = map->m_len; } } @@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, err = 0; out: /* If we have gotten a failure, don't zero out status tree */ - if (!err) - err = ext4_zeroout_es(inode, &zero_ex); + if (!err) { + err = ext4_zeroout_es(inode, &zero_ex1); + if (!err) + err = ext4_zeroout_es(inode, &zero_ex2); + } return err ? err : allocated; } @@ -4883,6 +4877,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); if (file->f_flags & O_SYNC) ext4_handle_sync(handle); @@ -5569,6 +5565,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_handle_sync(handle); inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); @@ -5742,6 +5739,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index cefa9835f275d9b062ae9b13765ea743edb53f64..02ce7e7bbdf5ba4da0db50e50d8d4cab49cde4a3 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -257,6 +257,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { int result; + handle_t *handle = NULL; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -264,12 +265,24 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, if (write) { sb_start_pagefault(sb); file_update_time(vmf->vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, + EXT4_DATA_TRANS_BLOCKS(sb)); + } else { + down_read(&EXT4_I(inode)->i_mmap_sem); } - down_read(&EXT4_I(inode)->i_mmap_sem); - result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); - up_read(&EXT4_I(inode)->i_mmap_sem); - if (write) + if (!IS_ERR(handle)) + result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); + else + result = VM_FAULT_SIGBUS; + if (write) { + if (!IS_ERR(handle)) + ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); + } else { + up_read(&EXT4_I(inode)->i_mmap_sem); + } return result; } @@ -461,57 +474,37 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, endoff = (loff_t)end_blk << blkbits; index = startoff >> PAGE_SHIFT; - end = endoff >> PAGE_SHIFT; + end = (endoff - 1) >> PAGE_SHIFT; pagevec_init(&pvec, 0); do { int i, num; unsigned long nr_pages; - num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); - if (nr_pages == 0) { - if (whence == SEEK_DATA) - break; - - BUG_ON(whence != SEEK_HOLE); - /* - * If this is the first time to go into the loop and - * offset is not beyond the end offset, it will be a - * hole at this offset - */ - if (lastoff == startoff || lastoff < endoff) - found = 1; + if (nr_pages == 0) break; - } - - /* - * If this is the first time to go into the loop and - * offset is smaller than the first page offset, it will be a - * hole at this offset. - */ - if (lastoff == startoff && whence == SEEK_HOLE && - lastoff < page_offset(pvec.pages[0])) { - found = 1; - break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; struct buffer_head *bh, *head; /* - * If the current offset is not beyond the end of given - * range, it will be a hole. + * If current offset is smaller than the page offset, + * there is a hole at this offset. */ - if (lastoff < endoff && whence == SEEK_HOLE && - page->index > end) { + if (whence == SEEK_HOLE && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { found = 1; *offset = lastoff; goto out; } + if (page->index > end) + goto out; + lock_page(page); if (unlikely(page->mapping != inode->i_mapping)) { @@ -551,20 +544,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); } - /* - * The no. of pages is less than our desired, that would be a - * hole in there. - */ - if (nr_pages < num && whence == SEEK_HOLE) { - found = 1; - *offset = lastoff; + /* The no. of pages is less than our desired, we are done. */ + if (nr_pages < num) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + if (whence == SEEK_HOLE && lastoff < endoff) { + found = 1; + *offset = lastoff; + } out: pagevec_release(&pvec); return found; diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c new file mode 100644 index 0000000000000000000000000000000000000000..b194360988371638272871cdd95ab650e58d295c --- /dev/null +++ b/fs/ext4/fsmap.c @@ -0,0 +1,722 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "ext4.h" +#include +#include "fsmap.h" +#include "mballoc.h" +#include +#include +#include + +/* Convert an ext4_fsmap to an fsmap. */ +void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest, + struct ext4_fsmap *src) +{ + dest->fmr_device = src->fmr_device; + dest->fmr_flags = src->fmr_flags; + dest->fmr_physical = src->fmr_physical << sb->s_blocksize_bits; + dest->fmr_owner = src->fmr_owner; + dest->fmr_offset = 0; + dest->fmr_length = src->fmr_length << sb->s_blocksize_bits; + dest->fmr_reserved[0] = 0; + dest->fmr_reserved[1] = 0; + dest->fmr_reserved[2] = 0; +} + +/* Convert an fsmap to an ext4_fsmap. */ +void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest, + struct fsmap *src) +{ + dest->fmr_device = src->fmr_device; + dest->fmr_flags = src->fmr_flags; + dest->fmr_physical = src->fmr_physical >> sb->s_blocksize_bits; + dest->fmr_owner = src->fmr_owner; + dest->fmr_length = src->fmr_length >> sb->s_blocksize_bits; +} + +/* getfsmap query state */ +struct ext4_getfsmap_info { + struct ext4_fsmap_head *gfi_head; + ext4_fsmap_format_t gfi_formatter; /* formatting fn */ + void *gfi_format_arg;/* format buffer */ + ext4_fsblk_t gfi_next_fsblk; /* next fsblock we expect */ + u32 gfi_dev; /* device id */ + ext4_group_t gfi_agno; /* bg number, if applicable */ + struct ext4_fsmap gfi_low; /* low rmap key */ + struct ext4_fsmap gfi_high; /* high rmap key */ + struct ext4_fsmap gfi_lastfree; /* free ext at end of last bg */ + struct list_head gfi_meta_list; /* fixed metadata list */ + bool gfi_last; /* last extent? */ +}; + +/* Associate a device with a getfsmap handler. */ +struct ext4_getfsmap_dev { + int (*gfd_fn)(struct super_block *sb, + struct ext4_fsmap *keys, + struct ext4_getfsmap_info *info); + u32 gfd_dev; +}; + +/* Compare two getfsmap device handlers. */ +static int ext4_getfsmap_dev_compare(const void *p1, const void *p2) +{ + const struct ext4_getfsmap_dev *d1 = p1; + const struct ext4_getfsmap_dev *d2 = p2; + + return d1->gfd_dev - d2->gfd_dev; +} + +/* Compare a record against our starting point */ +static bool ext4_getfsmap_rec_before_low_key(struct ext4_getfsmap_info *info, + struct ext4_fsmap *rec) +{ + return rec->fmr_physical < info->gfi_low.fmr_physical; +} + +/* + * Format a reverse mapping for getfsmap, having translated rm_startblock + * into the appropriate daddr units. + */ +static int ext4_getfsmap_helper(struct super_block *sb, + struct ext4_getfsmap_info *info, + struct ext4_fsmap *rec) +{ + struct ext4_fsmap fmr; + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t rec_fsblk = rec->fmr_physical; + ext4_group_t agno; + ext4_grpblk_t cno; + int error; + + if (fatal_signal_pending(current)) + return -EINTR; + + /* + * Filter out records that start before our startpoint, if the + * caller requested that. + */ + if (ext4_getfsmap_rec_before_low_key(info, rec)) { + rec_fsblk += rec->fmr_length; + if (info->gfi_next_fsblk < rec_fsblk) + info->gfi_next_fsblk = rec_fsblk; + return EXT4_QUERY_RANGE_CONTINUE; + } + + /* Are we just counting mappings? */ + if (info->gfi_head->fmh_count == 0) { + if (rec_fsblk > info->gfi_next_fsblk) + info->gfi_head->fmh_entries++; + + if (info->gfi_last) + return EXT4_QUERY_RANGE_CONTINUE; + + info->gfi_head->fmh_entries++; + + rec_fsblk += rec->fmr_length; + if (info->gfi_next_fsblk < rec_fsblk) + info->gfi_next_fsblk = rec_fsblk; + return EXT4_QUERY_RANGE_CONTINUE; + } + + /* + * If the record starts past the last physical block we saw, + * then we've found a gap. Report the gap as being owned by + * whatever the caller specified is the missing owner. + */ + if (rec_fsblk > info->gfi_next_fsblk) { + if (info->gfi_head->fmh_entries >= info->gfi_head->fmh_count) + return EXT4_QUERY_RANGE_ABORT; + + ext4_get_group_no_and_offset(sb, info->gfi_next_fsblk, + &agno, &cno); + trace_ext4_fsmap_mapping(sb, info->gfi_dev, agno, + EXT4_C2B(sbi, cno), + rec_fsblk - info->gfi_next_fsblk, + EXT4_FMR_OWN_UNKNOWN); + + fmr.fmr_device = info->gfi_dev; + fmr.fmr_physical = info->gfi_next_fsblk; + fmr.fmr_owner = EXT4_FMR_OWN_UNKNOWN; + fmr.fmr_length = rec_fsblk - info->gfi_next_fsblk; + fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; + error = info->gfi_formatter(&fmr, info->gfi_format_arg); + if (error) + return error; + info->gfi_head->fmh_entries++; + } + + if (info->gfi_last) + goto out; + + /* Fill out the extent we found */ + if (info->gfi_head->fmh_entries >= info->gfi_head->fmh_count) + return EXT4_QUERY_RANGE_ABORT; + + ext4_get_group_no_and_offset(sb, rec_fsblk, &agno, &cno); + trace_ext4_fsmap_mapping(sb, info->gfi_dev, agno, EXT4_C2B(sbi, cno), + rec->fmr_length, rec->fmr_owner); + + fmr.fmr_device = info->gfi_dev; + fmr.fmr_physical = rec_fsblk; + fmr.fmr_owner = rec->fmr_owner; + fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; + fmr.fmr_length = rec->fmr_length; + error = info->gfi_formatter(&fmr, info->gfi_format_arg); + if (error) + return error; + info->gfi_head->fmh_entries++; + +out: + rec_fsblk += rec->fmr_length; + if (info->gfi_next_fsblk < rec_fsblk) + info->gfi_next_fsblk = rec_fsblk; + return EXT4_QUERY_RANGE_CONTINUE; +} + +static inline ext4_fsblk_t ext4_fsmap_next_pblk(struct ext4_fsmap *fmr) +{ + return fmr->fmr_physical + fmr->fmr_length; +} + +/* Transform a blockgroup's free record into a fsmap */ +static int ext4_getfsmap_datadev_helper(struct super_block *sb, + ext4_group_t agno, ext4_grpblk_t start, + ext4_grpblk_t len, void *priv) +{ + struct ext4_fsmap irec; + struct ext4_getfsmap_info *info = priv; + struct ext4_fsmap *p; + struct ext4_fsmap *tmp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t fsb; + ext4_fsblk_t fslen; + int error; + + fsb = (EXT4_C2B(sbi, start) + ext4_group_first_block_no(sb, agno)); + fslen = EXT4_C2B(sbi, len); + + /* If the retained free extent record is set... */ + if (info->gfi_lastfree.fmr_owner) { + /* ...and abuts this one, lengthen it and return. */ + if (ext4_fsmap_next_pblk(&info->gfi_lastfree) == fsb) { + info->gfi_lastfree.fmr_length += fslen; + return 0; + } + + /* + * There's a gap between the two free extents; emit the + * retained extent prior to merging the meta_list. + */ + error = ext4_getfsmap_helper(sb, info, &info->gfi_lastfree); + if (error) + return error; + info->gfi_lastfree.fmr_owner = 0; + } + + /* Merge in any relevant extents from the meta_list */ + list_for_each_entry_safe(p, tmp, &info->gfi_meta_list, fmr_list) { + if (p->fmr_physical + p->fmr_length <= info->gfi_next_fsblk) { + list_del(&p->fmr_list); + kfree(p); + } else if (p->fmr_physical < fsb) { + error = ext4_getfsmap_helper(sb, info, p); + if (error) + return error; + + list_del(&p->fmr_list); + kfree(p); + } + } + + irec.fmr_device = 0; + irec.fmr_physical = fsb; + irec.fmr_length = fslen; + irec.fmr_owner = EXT4_FMR_OWN_FREE; + irec.fmr_flags = 0; + + /* If this is a free extent at the end of a bg, buffer it. */ + if (ext4_fsmap_next_pblk(&irec) == + ext4_group_first_block_no(sb, agno + 1)) { + info->gfi_lastfree = irec; + return 0; + } + + /* Otherwise, emit it */ + return ext4_getfsmap_helper(sb, info, &irec); +} + +/* Execute a getfsmap query against the log device. */ +static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys, + struct ext4_getfsmap_info *info) +{ + journal_t *journal = EXT4_SB(sb)->s_journal; + struct ext4_fsmap irec; + + /* Set up search keys */ + info->gfi_low = keys[0]; + info->gfi_low.fmr_length = 0; + + memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high)); + + trace_ext4_fsmap_low_key(sb, info->gfi_dev, 0, + info->gfi_low.fmr_physical, + info->gfi_low.fmr_length, + info->gfi_low.fmr_owner); + + trace_ext4_fsmap_high_key(sb, info->gfi_dev, 0, + info->gfi_high.fmr_physical, + info->gfi_high.fmr_length, + info->gfi_high.fmr_owner); + + if (keys[0].fmr_physical > 0) + return 0; + + /* Fabricate an rmap entry for the external log device. */ + irec.fmr_physical = journal->j_blk_offset; + irec.fmr_length = journal->j_maxlen; + irec.fmr_owner = EXT4_FMR_OWN_LOG; + irec.fmr_flags = 0; + + return ext4_getfsmap_helper(sb, info, &irec); +} + +/* Helper to fill out an ext4_fsmap. */ +static inline int ext4_getfsmap_fill(struct list_head *meta_list, + ext4_fsblk_t fsb, ext4_fsblk_t len, + uint64_t owner) +{ + struct ext4_fsmap *fsm; + + fsm = kmalloc(sizeof(*fsm), GFP_NOFS); + if (!fsm) + return -ENOMEM; + fsm->fmr_device = 0; + fsm->fmr_flags = 0; + fsm->fmr_physical = fsb; + fsm->fmr_owner = owner; + fsm->fmr_length = len; + list_add_tail(&fsm->fmr_list, meta_list); + + return 0; +} + +/* + * This function returns the number of file system metadata blocks at + * the beginning of a block group, including the reserved gdt blocks. + */ +static unsigned int ext4_getfsmap_find_sb(struct super_block *sb, + ext4_group_t agno, + struct list_head *meta_list) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t fsb = ext4_group_first_block_no(sb, agno); + ext4_fsblk_t len; + unsigned long first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); + unsigned long metagroup = agno / EXT4_DESC_PER_BLOCK(sb); + int error; + + /* Record the superblock. */ + if (ext4_bg_has_super(sb, agno)) { + error = ext4_getfsmap_fill(meta_list, fsb, 1, EXT4_FMR_OWN_FS); + if (error) + return error; + fsb++; + } + + /* Record the group descriptors. */ + len = ext4_bg_num_gdb(sb, agno); + if (!len) + return 0; + error = ext4_getfsmap_fill(meta_list, fsb, len, + EXT4_FMR_OWN_GDT); + if (error) + return error; + fsb += len; + + /* Reserved GDT blocks */ + if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) { + len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + error = ext4_getfsmap_fill(meta_list, fsb, len, + EXT4_FMR_OWN_RESV_GDT); + if (error) + return error; + } + + return 0; +} + +/* Compare two fsmap items. */ +static int ext4_getfsmap_compare(void *priv, + struct list_head *a, + struct list_head *b) +{ + struct ext4_fsmap *fa; + struct ext4_fsmap *fb; + + fa = container_of(a, struct ext4_fsmap, fmr_list); + fb = container_of(b, struct ext4_fsmap, fmr_list); + if (fa->fmr_physical < fb->fmr_physical) + return -1; + else if (fa->fmr_physical > fb->fmr_physical) + return 1; + return 0; +} + +/* Merge adjacent extents of fixed metadata. */ +static void ext4_getfsmap_merge_fixed_metadata(struct list_head *meta_list) +{ + struct ext4_fsmap *p; + struct ext4_fsmap *prev = NULL; + struct ext4_fsmap *tmp; + + list_for_each_entry_safe(p, tmp, meta_list, fmr_list) { + if (!prev) { + prev = p; + continue; + } + + if (prev->fmr_owner == p->fmr_owner && + prev->fmr_physical + prev->fmr_length == p->fmr_physical) { + prev->fmr_length += p->fmr_length; + list_del(&p->fmr_list); + kfree(p); + } else + prev = p; + } +} + +/* Free a list of fixed metadata. */ +static void ext4_getfsmap_free_fixed_metadata(struct list_head *meta_list) +{ + struct ext4_fsmap *p; + struct ext4_fsmap *tmp; + + list_for_each_entry_safe(p, tmp, meta_list, fmr_list) { + list_del(&p->fmr_list); + kfree(p); + } +} + +/* Find all the fixed metadata in the filesystem. */ +int ext4_getfsmap_find_fixed_metadata(struct super_block *sb, + struct list_head *meta_list) +{ + struct ext4_group_desc *gdp; + ext4_group_t agno; + int error; + + INIT_LIST_HEAD(meta_list); + + /* Collect everything. */ + for (agno = 0; agno < EXT4_SB(sb)->s_groups_count; agno++) { + gdp = ext4_get_group_desc(sb, agno, NULL); + if (!gdp) { + error = -EFSCORRUPTED; + goto err; + } + + /* Superblock & GDT */ + error = ext4_getfsmap_find_sb(sb, agno, meta_list); + if (error) + goto err; + + /* Block bitmap */ + error = ext4_getfsmap_fill(meta_list, + ext4_block_bitmap(sb, gdp), 1, + EXT4_FMR_OWN_BLKBM); + if (error) + goto err; + + /* Inode bitmap */ + error = ext4_getfsmap_fill(meta_list, + ext4_inode_bitmap(sb, gdp), 1, + EXT4_FMR_OWN_INOBM); + if (error) + goto err; + + /* Inodes */ + error = ext4_getfsmap_fill(meta_list, + ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group, + EXT4_FMR_OWN_INODES); + if (error) + goto err; + } + + /* Sort the list */ + list_sort(NULL, meta_list, ext4_getfsmap_compare); + + /* Merge adjacent extents */ + ext4_getfsmap_merge_fixed_metadata(meta_list); + + return 0; +err: + ext4_getfsmap_free_fixed_metadata(meta_list); + return error; +} + +/* Execute a getfsmap query against the buddy bitmaps */ +static int ext4_getfsmap_datadev(struct super_block *sb, + struct ext4_fsmap *keys, + struct ext4_getfsmap_info *info) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_fsblk_t start_fsb; + ext4_fsblk_t end_fsb; + ext4_fsblk_t eofs; + ext4_group_t start_ag; + ext4_group_t end_ag; + ext4_grpblk_t first_cluster; + ext4_grpblk_t last_cluster; + int error = 0; + + eofs = ext4_blocks_count(sbi->s_es); + if (keys[0].fmr_physical >= eofs) + return 0; + if (keys[1].fmr_physical >= eofs) + keys[1].fmr_physical = eofs - 1; + start_fsb = keys[0].fmr_physical; + end_fsb = keys[1].fmr_physical; + + /* Determine first and last group to examine based on start and end */ + ext4_get_group_no_and_offset(sb, start_fsb, &start_ag, &first_cluster); + ext4_get_group_no_and_offset(sb, end_fsb, &end_ag, &last_cluster); + + /* + * Convert the fsmap low/high keys to bg based keys. Initialize + * low to the fsmap low key and max out the high key to the end + * of the bg. + */ + info->gfi_low = keys[0]; + info->gfi_low.fmr_physical = EXT4_C2B(sbi, first_cluster); + info->gfi_low.fmr_length = 0; + + memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high)); + + /* Assemble a list of all the fixed-location metadata. */ + error = ext4_getfsmap_find_fixed_metadata(sb, &info->gfi_meta_list); + if (error) + goto err; + + /* Query each bg */ + for (info->gfi_agno = start_ag; + info->gfi_agno <= end_ag; + info->gfi_agno++) { + /* + * Set the bg high key from the fsmap high key if this + * is the last bg that we're querying. + */ + if (info->gfi_agno == end_ag) { + info->gfi_high = keys[1]; + info->gfi_high.fmr_physical = EXT4_C2B(sbi, + last_cluster); + info->gfi_high.fmr_length = 0; + } + + trace_ext4_fsmap_low_key(sb, info->gfi_dev, info->gfi_agno, + info->gfi_low.fmr_physical, + info->gfi_low.fmr_length, + info->gfi_low.fmr_owner); + + trace_ext4_fsmap_high_key(sb, info->gfi_dev, info->gfi_agno, + info->gfi_high.fmr_physical, + info->gfi_high.fmr_length, + info->gfi_high.fmr_owner); + + error = ext4_mballoc_query_range(sb, info->gfi_agno, + EXT4_B2C(sbi, info->gfi_low.fmr_physical), + EXT4_B2C(sbi, info->gfi_high.fmr_physical), + ext4_getfsmap_datadev_helper, info); + if (error) + goto err; + + /* + * Set the bg low key to the start of the bg prior to + * moving on to the next bg. + */ + if (info->gfi_agno == start_ag) + memset(&info->gfi_low, 0, sizeof(info->gfi_low)); + } + + /* Do we have a retained free extent? */ + if (info->gfi_lastfree.fmr_owner) { + error = ext4_getfsmap_helper(sb, info, &info->gfi_lastfree); + if (error) + goto err; + } + + /* Report any gaps at the end of the bg */ + info->gfi_last = true; + error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster, 0, info); + if (error) + goto err; + +err: + ext4_getfsmap_free_fixed_metadata(&info->gfi_meta_list); + return error; +} + +/* Do we recognize the device? */ +static bool ext4_getfsmap_is_valid_device(struct super_block *sb, + struct ext4_fsmap *fm) +{ + if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || + fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev)) + return true; + if (EXT4_SB(sb)->journal_bdev && + fm->fmr_device == new_encode_dev(EXT4_SB(sb)->journal_bdev->bd_dev)) + return true; + return false; +} + +/* Ensure that the low key is less than the high key. */ +static bool ext4_getfsmap_check_keys(struct ext4_fsmap *low_key, + struct ext4_fsmap *high_key) +{ + if (low_key->fmr_device > high_key->fmr_device) + return false; + if (low_key->fmr_device < high_key->fmr_device) + return true; + + if (low_key->fmr_physical > high_key->fmr_physical) + return false; + if (low_key->fmr_physical < high_key->fmr_physical) + return true; + + if (low_key->fmr_owner > high_key->fmr_owner) + return false; + if (low_key->fmr_owner < high_key->fmr_owner) + return true; + + return false; +} + +#define EXT4_GETFSMAP_DEVS 2 +/* + * Get filesystem's extents as described in head, and format for + * output. Calls formatter to fill the user's buffer until all + * extents are mapped, until the passed-in head->fmh_count slots have + * been filled, or until the formatter short-circuits the loop, if it + * is tracking filled-in extents on its own. + * + * Key to Confusion + * ---------------- + * There are multiple levels of keys and counters at work here: + * _fsmap_head.fmh_keys -- low and high fsmap keys passed in; + * these reflect fs-wide block addrs. + * dkeys -- fmh_keys used to query each device; + * these are fmh_keys but w/ the low key + * bumped up by fmr_length. + * _getfsmap_info.gfi_next_fsblk-- next fs block we expect to see; this + * is how we detect gaps in the fsmap + * records and report them. + * _getfsmap_info.gfi_low/high -- per-bg low/high keys computed from + * dkeys; used to query the free space. + */ +int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, + ext4_fsmap_format_t formatter, void *arg) +{ + struct ext4_fsmap dkeys[2]; /* per-dev keys */ + struct ext4_getfsmap_dev handlers[EXT4_GETFSMAP_DEVS]; + struct ext4_getfsmap_info info = {0}; + int i; + int error = 0; + + if (head->fmh_iflags & ~FMH_IF_VALID) + return -EINVAL; + if (!ext4_getfsmap_is_valid_device(sb, &head->fmh_keys[0]) || + !ext4_getfsmap_is_valid_device(sb, &head->fmh_keys[1])) + return -EINVAL; + + head->fmh_entries = 0; + + /* Set up our device handlers. */ + memset(handlers, 0, sizeof(handlers)); + handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev); + handlers[0].gfd_fn = ext4_getfsmap_datadev; + if (EXT4_SB(sb)->journal_bdev) { + handlers[1].gfd_dev = new_encode_dev( + EXT4_SB(sb)->journal_bdev->bd_dev); + handlers[1].gfd_fn = ext4_getfsmap_logdev; + } + + sort(handlers, EXT4_GETFSMAP_DEVS, sizeof(struct ext4_getfsmap_dev), + ext4_getfsmap_dev_compare, NULL); + + /* + * To continue where we left off, we allow userspace to use the + * last mapping from a previous call as the low key of the next. + * This is identified by a non-zero length in the low key. We + * have to increment the low key in this scenario to ensure we + * don't return the same mapping again, and instead return the + * very next mapping. + * + * Bump the physical offset as there can be no other mapping for + * the same physical block range. + */ + dkeys[0] = head->fmh_keys[0]; + dkeys[0].fmr_physical += dkeys[0].fmr_length; + dkeys[0].fmr_owner = 0; + dkeys[0].fmr_length = 0; + memset(&dkeys[1], 0xFF, sizeof(struct ext4_fsmap)); + + if (!ext4_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) + return -EINVAL; + + info.gfi_next_fsblk = head->fmh_keys[0].fmr_physical + + head->fmh_keys[0].fmr_length; + info.gfi_formatter = formatter; + info.gfi_format_arg = arg; + info.gfi_head = head; + + /* For each device we support... */ + for (i = 0; i < EXT4_GETFSMAP_DEVS; i++) { + /* Is this device within the range the user asked for? */ + if (!handlers[i].gfd_fn) + continue; + if (head->fmh_keys[0].fmr_device > handlers[i].gfd_dev) + continue; + if (head->fmh_keys[1].fmr_device < handlers[i].gfd_dev) + break; + + /* + * If this device number matches the high key, we have + * to pass the high key to the handler to limit the + * query results. If the device number exceeds the + * low key, zero out the low key so that we get + * everything from the beginning. + */ + if (handlers[i].gfd_dev == head->fmh_keys[1].fmr_device) + dkeys[1] = head->fmh_keys[1]; + if (handlers[i].gfd_dev > head->fmh_keys[0].fmr_device) + memset(&dkeys[0], 0, sizeof(struct ext4_fsmap)); + + info.gfi_dev = handlers[i].gfd_dev; + info.gfi_last = false; + info.gfi_agno = -1; + error = handlers[i].gfd_fn(sb, dkeys, &info); + if (error) + break; + info.gfi_next_fsblk = 0; + } + + head->fmh_oflags = FMH_OF_DEV_T; + return error; +} diff --git a/fs/ext4/fsmap.h b/fs/ext4/fsmap.h new file mode 100644 index 0000000000000000000000000000000000000000..9a2cd367cc66bd0b6ee97cade916cbc801051a7a --- /dev/null +++ b/fs/ext4/fsmap.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __EXT4_FSMAP_H__ +#define __EXT4_FSMAP_H__ + +struct fsmap; + +/* internal fsmap representation */ +struct ext4_fsmap { + struct list_head fmr_list; + dev_t fmr_device; /* device id */ + uint32_t fmr_flags; /* mapping flags */ + uint64_t fmr_physical; /* device offset of segment */ + uint64_t fmr_owner; /* owner id */ + uint64_t fmr_length; /* length of segment, blocks */ +}; + +struct ext4_fsmap_head { + uint32_t fmh_iflags; /* control flags */ + uint32_t fmh_oflags; /* output flags */ + unsigned int fmh_count; /* # of entries in array incl. input */ + unsigned int fmh_entries; /* # of entries filled in (output). */ + + struct ext4_fsmap fmh_keys[2]; /* low and high keys */ +}; + +void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest, + struct ext4_fsmap *src); +void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest, + struct fsmap *src); + +/* fsmap to userspace formatter - copy to user & advance pointer */ +typedef int (*ext4_fsmap_format_t)(struct ext4_fsmap *, void *); + +int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, + ext4_fsmap_format_t formatter, void *arg); + +#define EXT4_QUERY_RANGE_ABORT 1 +#define EXT4_QUERY_RANGE_CONTINUE 0 + +/* fmr_owner special values for FS_IOC_GETFSMAP; some share w/ XFS */ +#define EXT4_FMR_OWN_FREE FMR_OWN_FREE /* free space */ +#define EXT4_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */ +#define EXT4_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */ +#define EXT4_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */ +#define EXT4_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */ +#define EXT4_FMR_OWN_GDT FMR_OWNER('f', 1) /* group descriptors */ +#define EXT4_FMR_OWN_RESV_GDT FMR_OWNER('f', 2) /* reserved gdt blocks */ +#define EXT4_FMR_OWN_BLKBM FMR_OWNER('f', 3) /* inode bitmap */ +#define EXT4_FMR_OWN_INOBM FMR_OWNER('f', 4) /* block bitmap */ + +#endif /* __EXT4_FSMAP_H__ */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 17bc043308f33bb28cdb4d63ea7393b0dfc310e1..98ac2f1f23b3d74ec29ac31eeb276da4f1de5605 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1098,6 +1098,17 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (err) goto fail_drop; + /* + * Since the encryption xattr will always be unique, create it first so + * that it's less likely to end up in an external xattr block and + * prevent its deduplication. + */ + if (encrypt) { + err = fscrypt_inherit_context(dir, inode, handle, true); + if (err) + goto fail_free_drop; + } + err = ext4_init_acl(handle, inode, dir); if (err) goto fail_free_drop; @@ -1119,12 +1130,6 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, ei->i_datasync_tid = handle->h_transaction->t_tid; } - if (encrypt) { - err = fscrypt_inherit_context(dir, inode, handle, true); - if (err) - goto fail_free_drop; - } - err = ext4_mark_inode_dirty(handle, inode); if (err) { ext4_std_error(sb, err); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 375fb1c05d49ce87a287213720ca0ebf5e0deef1..8d141c0c8ff9de32a5be08a48f573de7fa618dc5 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1034,7 +1034,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, err = ext4_journal_get_write_access(handle, iloc->bh); if (err) return err; - ext4_insert_dentry(dir, inode, de, inline_size, fname); + ext4_insert_dentry(inode, de, inline_size, fname); ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); @@ -1627,7 +1627,6 @@ int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *has_inline_data) { @@ -1649,7 +1648,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; ret = ext4_search_dir(iloc.bh, inline_start, inline_size, - dir, fname, d_name, 0, res_dir); + dir, fname, 0, res_dir); if (ret == 1) goto out_find; if (ret < 0) @@ -1662,7 +1661,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir, inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; ret = ext4_search_dir(iloc.bh, inline_start, inline_size, - dir, fname, d_name, 0, res_dir); + dir, fname, 0, res_dir); if (ret == 1) goto out_find; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b9ffa9f4191f4cb3a122c215894ef76689fd9604..5cf82d03968ca2c0eb240461fa696e784694c1ea 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1643,6 +1643,7 @@ struct mpage_da_data { */ struct ext4_map_blocks map; struct ext4_io_submit io_submit; /* IO submission data */ + unsigned int do_map:1; }; static void mpage_release_unused_pages(struct mpage_da_data *mpd, @@ -2123,15 +2124,29 @@ static int ext4_writepage(struct page *page, static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) { int len; - loff_t size = i_size_read(mpd->inode); + loff_t size; int err; BUG_ON(page->index != mpd->first_page); + clear_page_dirty_for_io(page); + /* + * We have to be very careful here! Nothing protects writeback path + * against i_size changes and the page can be writeably mapped into + * page tables. So an application can be growing i_size and writing + * data through mmap while writeback runs. clear_page_dirty_for_io() + * write-protects our page in page tables and the page cannot get + * written to again until we release page lock. So only after + * clear_page_dirty_for_io() we are safe to sample i_size for + * ext4_bio_write_page() to zero-out tail of the written page. We rely + * on the barrier provided by TestClearPageDirty in + * clear_page_dirty_for_io() to make sure i_size is really sampled only + * after page tables are updated. + */ + size = i_size_read(mpd->inode); if (page->index == size >> PAGE_SHIFT) len = size & ~PAGE_MASK; else len = PAGE_SIZE; - clear_page_dirty_for_io(page); err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; @@ -2179,6 +2194,9 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, /* First block in the extent? */ if (map->m_len == 0) { + /* We cannot map unless handle is started... */ + if (!mpd->do_map) + return false; map->m_lblk = lblk; map->m_len = 1; map->m_flags = bh->b_state & BH_FLAGS; @@ -2231,6 +2249,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, /* Found extent to map? */ if (mpd->map.m_len) return 0; + /* Buffer needs mapping and handle is not started? */ + if (!mpd->do_map) + return 0; /* Everything mapped so far and we hit EOF */ break; } @@ -2747,6 +2768,29 @@ static int ext4_writepages(struct address_space *mapping, tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); done = false; blk_start_plug(&plug); + + /* + * First writeback pages that don't need mapping - we can avoid + * starting a transaction unnecessarily and also avoid being blocked + * in the block layer on device congestion while having transaction + * started. + */ + mpd.do_map = 0; + mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); + if (!mpd.io_submit.io_end) { + ret = -ENOMEM; + goto unplug; + } + ret = mpage_prepare_extent_to_map(&mpd); + /* Submit prepared bio */ + ext4_io_submit(&mpd.io_submit); + ext4_put_io_end_defer(mpd.io_submit.io_end); + mpd.io_submit.io_end = NULL; + /* Unlock pages we didn't use */ + mpage_release_unused_pages(&mpd, false); + if (ret < 0) + goto unplug; + while (!done && mpd.first_page <= mpd.last_page) { /* For each extent of pages we use new io_end */ mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); @@ -2775,8 +2819,10 @@ static int ext4_writepages(struct address_space *mapping, wbc->nr_to_write, inode->i_ino, ret); /* Release allocated io_end */ ext4_put_io_end(mpd.io_submit.io_end); + mpd.io_submit.io_end = NULL; break; } + mpd.do_map = 1; trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); ret = mpage_prepare_extent_to_map(&mpd); @@ -2807,6 +2853,7 @@ static int ext4_writepages(struct address_space *mapping, if (!ext4_handle_valid(handle) || handle->h_sync == 0) { ext4_journal_stop(handle); handle = NULL; + mpd.do_map = 0; } /* Submit prepared bio */ ext4_io_submit(&mpd.io_submit); @@ -2824,6 +2871,7 @@ static int ext4_writepages(struct address_space *mapping, ext4_journal_stop(handle); } else ext4_put_io_end(mpd.io_submit.io_end); + mpd.io_submit.io_end = NULL; if (ret == -ENOSPC && sbi->s_journal) { /* @@ -2839,6 +2887,7 @@ static int ext4_writepages(struct address_space *mapping, if (ret) break; } +unplug: blk_finish_plug(&plug); if (!ret && !cycled && wbc->nr_to_write > 0) { cycled = 1; @@ -3305,6 +3354,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, unsigned flags, struct iomap *iomap) { + struct block_device *bdev; unsigned int blkbits = inode->i_blkbits; unsigned long first_block = offset >> blkbits; unsigned long last_block = (offset + length - 1) >> blkbits; @@ -3373,7 +3423,12 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, } iomap->flags = 0; - iomap->bdev = inode->i_sb->s_bdev; + bdev = inode->i_sb->s_bdev; + iomap->bdev = bdev; + if (blk_queue_dax(bdev->bd_queue)) + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); + else + iomap->dax_dev = NULL; iomap->offset = first_block << blkbits; if (ret == 0) { @@ -3406,6 +3461,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, int blkbits = inode->i_blkbits; bool truncate = false; + fs_put_dax(iomap->dax_dev); if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) return 0; @@ -3587,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) get_block_func = ext4_dio_get_block_unwritten_async; dio_flags = DIO_LOCKING; } -#ifdef CONFIG_EXT4_FS_ENCRYPTION - BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); -#endif ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block_func, ext4_end_io_dio, NULL, dio_flags); @@ -3671,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) */ inode_lock_shared(inode); ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, - iocb->ki_pos + count); + iocb->ki_pos + count - 1); if (ret) goto out_unlock; ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, @@ -4165,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); out_dio: @@ -4502,31 +4557,6 @@ void ext4_set_inode_flags(struct inode *inode) S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); } -/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ -void ext4_get_inode_flags(struct ext4_inode_info *ei) -{ - unsigned int vfs_fl; - unsigned long old_fl, new_fl; - - do { - vfs_fl = ei->vfs_inode.i_flags; - old_fl = ei->i_flags; - new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| - EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| - EXT4_DIRSYNC_FL); - if (vfs_fl & S_SYNC) - new_fl |= EXT4_SYNC_FL; - if (vfs_fl & S_APPEND) - new_fl |= EXT4_APPEND_FL; - if (vfs_fl & S_IMMUTABLE) - new_fl |= EXT4_IMMUTABLE_FL; - if (vfs_fl & S_NOATIME) - new_fl |= EXT4_NOATIME_FL; - if (vfs_fl & S_DIRSYNC) - new_fl |= EXT4_DIRSYNC_FL; - } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); -} - static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, struct ext4_inode_info *ei) { @@ -4963,7 +4993,6 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); i_gid = i_gid_read(inode); @@ -5621,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode, /* No extended attributes present */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { - memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, - new_extra_isize); + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize, 0, + new_extra_isize - EXT4_I(inode)->i_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; return 0; } @@ -5874,6 +5904,11 @@ int ext4_page_mkwrite(struct vm_fault *vmf) file_update_time(vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_convert_inline_data(inode); + if (ret) + goto out_ret; + /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a4273ddb992232e1587e33fd7f3a23ea926aaa7e..0c21e22acd74f37ec21f61d6d990fad33ce767db 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -19,6 +19,9 @@ #include #include "ext4_jbd2.h" #include "ext4.h" +#include +#include "fsmap.h" +#include /** * Swap memory between @a and @b for @len bytes. @@ -443,7 +446,7 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) return iflags; } -int ext4_shutdown(struct super_block *sb, unsigned long arg) +static int ext4_shutdown(struct super_block *sb, unsigned long arg) { struct ext4_sb_info *sbi = EXT4_SB(sb); __u32 flags; @@ -489,6 +492,90 @@ int ext4_shutdown(struct super_block *sb, unsigned long arg) return 0; } +struct getfsmap_info { + struct super_block *gi_sb; + struct fsmap_head __user *gi_data; + unsigned int gi_idx; + __u32 gi_last_flags; +}; + +static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv) +{ + struct getfsmap_info *info = priv; + struct fsmap fm; + + trace_ext4_getfsmap_mapping(info->gi_sb, xfm); + + info->gi_last_flags = xfm->fmr_flags; + ext4_fsmap_from_internal(info->gi_sb, &fm, xfm); + if (copy_to_user(&info->gi_data->fmh_recs[info->gi_idx++], &fm, + sizeof(struct fsmap))) + return -EFAULT; + + return 0; +} + +static int ext4_ioc_getfsmap(struct super_block *sb, + struct fsmap_head __user *arg) +{ + struct getfsmap_info info = {0}; + struct ext4_fsmap_head xhead = {0}; + struct fsmap_head head; + bool aborted = false; + int error; + + if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) + return -EFAULT; + if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || + memchr_inv(head.fmh_keys[0].fmr_reserved, 0, + sizeof(head.fmh_keys[0].fmr_reserved)) || + memchr_inv(head.fmh_keys[1].fmr_reserved, 0, + sizeof(head.fmh_keys[1].fmr_reserved))) + return -EINVAL; + /* + * ext4 doesn't report file extents at all, so the only valid + * file offsets are the magic ones (all zeroes or all ones). + */ + if (head.fmh_keys[0].fmr_offset || + (head.fmh_keys[1].fmr_offset != 0 && + head.fmh_keys[1].fmr_offset != -1ULL)) + return -EINVAL; + + xhead.fmh_iflags = head.fmh_iflags; + xhead.fmh_count = head.fmh_count; + ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]); + ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]); + + trace_ext4_getfsmap_low_key(sb, &xhead.fmh_keys[0]); + trace_ext4_getfsmap_high_key(sb, &xhead.fmh_keys[1]); + + info.gi_sb = sb; + info.gi_data = arg; + error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info); + if (error == EXT4_QUERY_RANGE_ABORT) { + error = 0; + aborted = true; + } else if (error) + return error; + + /* If we didn't abort, set the "last" flag in the last fmx */ + if (!aborted && info.gi_idx) { + info.gi_last_flags |= FMR_OF_LAST; + if (copy_to_user(&info.gi_data->fmh_recs[info.gi_idx - 1].fmr_flags, + &info.gi_last_flags, + sizeof(info.gi_last_flags))) + return -EFAULT; + } + + /* copy back header */ + head.fmh_entries = xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) + return -EFAULT; + + return 0; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -499,8 +586,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { + case FS_IOC_GETFSMAP: + return ext4_ioc_getfsmap(sb, (void __user *)arg); case EXT4_IOC_GETFLAGS: - ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { @@ -888,7 +976,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct fsxattr fa; memset(&fa, 0, sizeof(struct fsxattr)); - ext4_get_inode_flags(ei); fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); if (ext4_has_feature_project(inode->i_sb)) { @@ -1009,6 +1096,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC_GET_ENCRYPTION_PWSALT: case EXT4_IOC_GET_ENCRYPTION_POLICY: case EXT4_IOC_SHUTDOWN: + case FS_IOC_GETFSMAP: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 354dc1a894c29bb86dcafcd1b2a5d8a53ca7119d..b7928cddd539ee223584db33ab8cb3341faa2e86 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -357,7 +357,7 @@ static struct kmem_cache *ext4_free_data_cachep; #define NR_GRPINFO_CACHES 8 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; -static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { +static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", "ext4_groupinfo_64k", "ext4_groupinfo_128k" @@ -2393,7 +2393,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) return 0; size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size); - new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL); + new_groupinfo = kvzalloc(size, GFP_KERNEL); if (!new_groupinfo) { ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; @@ -3887,7 +3887,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, "Error loading buddy information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + err, group); put_bh(bitmap_bh); return 0; } @@ -4044,10 +4045,11 @@ void ext4_discard_preallocations(struct inode *inode) BUG_ON(pa->pa_type != MB_INODE_PA); group = ext4_get_group_number(sb, pa->pa_pstart); - err = ext4_mb_load_buddy(sb, group, &e4b); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_error(sb, "Error loading buddy information for %u", - group); + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } @@ -4303,11 +4305,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, spin_unlock(&lg->lg_prealloc_lock); list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + int err; group = ext4_get_group_number(sb, pa->pa_pstart); - if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, "Error loading buddy information for %u", - group); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); + if (err) { + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } ext4_lock_group(sb, group); @@ -5127,8 +5132,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { - ext4_error(sb, "Error in loading buddy " - "information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + ret, group); return ret; } bitmap = e4b.bd_bitmap; @@ -5277,3 +5282,52 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; return ret; } + +/* Iterate all the free extents in the group. */ +int +ext4_mballoc_query_range( + struct super_block *sb, + ext4_group_t group, + ext4_grpblk_t start, + ext4_grpblk_t end, + ext4_mballoc_query_range_fn formatter, + void *priv) +{ + void *bitmap; + ext4_grpblk_t next; + struct ext4_buddy e4b; + int error; + + error = ext4_mb_load_buddy(sb, group, &e4b); + if (error) + return error; + bitmap = e4b.bd_bitmap; + + ext4_lock_group(sb, group); + + start = (e4b.bd_info->bb_first_free > start) ? + e4b.bd_info->bb_first_free : start; + if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; + + while (start <= end) { + start = mb_find_next_zero_bit(bitmap, end + 1, start); + if (start > end) + break; + next = mb_find_next_bit(bitmap, end + 1, start); + + ext4_unlock_group(sb, group); + error = formatter(sb, group, start, next - start, priv); + if (error) + goto out_unload; + ext4_lock_group(sb, group); + + start = next + 1; + } + + ext4_unlock_group(sb, group); +out_unload: + ext4_mb_unload_buddy(&e4b); + + return error; +} diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 1aba469f82209fe40d602579a17964346749e024..2bed62084a8c1592e638f311a1a9574057a7e6a6 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -199,4 +199,21 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, return ext4_group_first_block_no(sb, fex->fe_group) + (fex->fe_start << EXT4_SB(sb)->s_cluster_bits); } + +typedef int (*ext4_mballoc_query_range_fn)( + struct super_block *sb, + ext4_group_t agno, + ext4_grpblk_t start, + ext4_grpblk_t len, + void *priv); + +int +ext4_mballoc_query_range( + struct super_block *sb, + ext4_group_t agno, + ext4_grpblk_t start, + ext4_grpblk_t end, + ext4_mballoc_query_range_fn formatter, + void *priv); + #endif diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 07e5e140577176e13db84089eaa048713ddb4797..404256caf9cff0df28517350113dff0d8b239cc9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1155,12 +1155,11 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, static inline int search_dirblock(struct buffer_head *bh, struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir, - fname, d_name, offset, res_dir); + fname, offset, res_dir); } /* @@ -1237,37 +1236,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) } /* - * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure. + * Test whether a directory entry matches the filename being searched for. * - * `len <= EXT4_NAME_LEN' is guaranteed by caller. - * `de != NULL' is guaranteed by caller. + * Return: %true if the directory entry matches, otherwise %false. */ -static inline int ext4_match(struct ext4_filename *fname, - struct ext4_dir_entry_2 *de) +static inline bool ext4_match(const struct ext4_filename *fname, + const struct ext4_dir_entry_2 *de) { - const void *name = fname_name(fname); - u32 len = fname_len(fname); + struct fscrypt_name f; if (!de->inode) - return 0; + return false; + f.usr_fname = fname->usr_fname; + f.disk_name = fname->disk_name; #ifdef CONFIG_EXT4_FS_ENCRYPTION - if (unlikely(!name)) { - if (fname->usr_fname->name[0] == '_') { - int ret; - if (de->name_len < 16) - return 0; - ret = memcmp(de->name + de->name_len - 16, - fname->crypto_buf.name + 8, 16); - return (ret == 0) ? 1 : 0; - } - name = fname->crypto_buf.name; - len = fname->crypto_buf.len; - } + f.crypto_buf = fname->crypto_buf; #endif - if (de->name_len != len) - return 0; - return (memcmp(de->name, name, len) == 0) ? 1 : 0; + return fscrypt_match_name(&f, de->name, de->name_len); } /* @@ -1275,54 +1261,36 @@ static inline int ext4_match(struct ext4_filename *fname, */ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size, struct inode *dir, struct ext4_filename *fname, - const struct qstr *d_name, unsigned int offset, struct ext4_dir_entry_2 **res_dir) { struct ext4_dir_entry_2 * de; char * dlimit; int de_len; - int res; de = (struct ext4_dir_entry_2 *)search_buf; dlimit = search_buf + buf_size; while ((char *) de < dlimit) { /* this code is executed quadratically often */ /* do minimal checking `by hand' */ - if ((char *) de + de->name_len <= dlimit) { - res = ext4_match(fname, de); - if (res < 0) { - res = -1; - goto return_result; - } - if (res > 0) { - /* found a match - just to be sure, do - * a full check */ - if (ext4_check_dir_entry(dir, NULL, de, bh, - bh->b_data, - bh->b_size, offset)) { - res = -1; - goto return_result; - } - *res_dir = de; - res = 1; - goto return_result; - } - + if ((char *) de + de->name_len <= dlimit && + ext4_match(fname, de)) { + /* found a match - just to be sure, do + * a full check */ + if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, + bh->b_size, offset)) + return -1; + *res_dir = de; + return 1; } /* prevent looping on a bad block */ de_len = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize); - if (de_len <= 0) { - res = -1; - goto return_result; - } + if (de_len <= 0) + return -1; offset += de_len; de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); } - - res = 0; -return_result: - return res; + return 0; } static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, @@ -1385,7 +1353,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir, + ret = ext4_find_inline_entry(dir, &fname, res_dir, &has_inline_data); if (has_inline_data) { if (inlined) @@ -1477,7 +1445,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto next; } set_buffer_verified(bh); - i = search_dirblock(bh, dir, &fname, d_name, + i = search_dirblock(bh, dir, &fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -1518,7 +1486,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, { struct super_block * sb = dir->i_sb; struct dx_frame frames[2], *frame; - const struct qstr *d_name = fname->usr_fname; struct buffer_head *bh; ext4_lblk_t block; int retval; @@ -1535,7 +1502,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, if (IS_ERR(bh)) goto errout; - retval = search_dirblock(bh, dir, fname, d_name, + retval = search_dirblock(bh, dir, fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (retval == 1) @@ -1560,7 +1527,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, bh = NULL; errout: - dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); + dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name)); success: dx_release(frames); return bh; @@ -1616,16 +1583,9 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - int nokey = ext4_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode); - if (nokey) { - iput(inode); - return ERR_PTR(-ENOKEY); - } ext4_warning(inode->i_sb, "Inconsistent encryption contexts: %lu/%lu", - (unsigned long) dir->i_ino, - (unsigned long) inode->i_ino); + dir->i_ino, inode->i_ino); iput(inode); return ERR_PTR(-EPERM); } @@ -1833,24 +1793,15 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, int nlen, rlen; unsigned int offset = 0; char *top; - int res; de = (struct ext4_dir_entry_2 *)buf; top = buf + buf_size - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, - buf, buf_size, offset)) { - res = -EFSCORRUPTED; - goto return_result; - } - /* Provide crypto context and crypto buffer to ext4 match */ - res = ext4_match(fname, de); - if (res < 0) - goto return_result; - if (res > 0) { - res = -EEXIST; - goto return_result; - } + buf, buf_size, offset)) + return -EFSCORRUPTED; + if (ext4_match(fname, de)) + return -EEXIST; nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); if ((de->inode ? rlen - nlen : rlen) >= reclen) @@ -1858,22 +1809,17 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, de = (struct ext4_dir_entry_2 *)((char *)de + rlen); offset += rlen; } - if ((char *) de > top) - res = -ENOSPC; - else { - *dest_de = de; - res = 0; - } -return_result: - return res; + return -ENOSPC; + + *dest_de = de; + return 0; } -int ext4_insert_dentry(struct inode *dir, - struct inode *inode, - struct ext4_dir_entry_2 *de, - int buf_size, - struct ext4_filename *fname) +void ext4_insert_dentry(struct inode *inode, + struct ext4_dir_entry_2 *de, + int buf_size, + struct ext4_filename *fname) { int nlen, rlen; @@ -1892,7 +1838,6 @@ int ext4_insert_dentry(struct inode *dir, ext4_set_de_type(inode->i_sb, de, inode->i_mode); de->name_len = fname_len(fname); memcpy(de->name, fname_name(fname), fname_len(fname)); - return 0; } /* @@ -1928,11 +1873,8 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, return err; } - /* By now the buffer is marked for journaling. Due to crypto operations, - * the following function call may fail */ - err = ext4_insert_dentry(dir, inode, de, blocksize, fname); - if (err < 0) - return err; + /* By now the buffer is marked for journaling */ + ext4_insert_dentry(inode, de, blocksize, fname); /* * XXX shouldn't update any times until successful diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 208241b06662fbccfd741340ba167e68a711418e..1a82138ba7391ac8fd960c0294715d13051fb232 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -297,8 +297,17 @@ static void ext4_end_bio(struct bio *bio) { ext4_io_end_t *io_end = bio->bi_private; sector_t bi_sector = bio->bi_iter.bi_sector; + char b[BDEVNAME_SIZE]; - BUG_ON(!io_end); + if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n", + bdevname(bio->bi_bdev, b), + (long long) bio->bi_iter.bi_sector, + (unsigned) bio_sectors(bio), + bio->bi_error)) { + ext4_finish_bio(bio); + bio_put(bio); + return; + } bio->bi_end_io = NULL; if (bio->bi_error) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a9448db1cf7e87c2bd41daac6fbab7729bc87ecc..d37c81f327e790cc4a309fd0a12baef0dc2ff4ac 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,7 @@ #include "xattr.h" #include "acl.h" #include "mballoc.h" +#include "fsmap.h" #define CREATE_TRACE_POINTS #include @@ -839,6 +841,23 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) } } +#ifdef CONFIG_QUOTA +static int ext4_quota_off(struct super_block *sb, int type); + +static inline void ext4_quota_off_umount(struct super_block *sb) +{ + int type; + + /* Use our quota_off function to clear inode flags etc. */ + for (type = 0; type < EXT4_MAXQUOTAS; type++) + ext4_quota_off(sb, type); +} +#else +static inline void ext4_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -847,7 +866,7 @@ static void ext4_put_super(struct super_block *sb) int i, err; ext4_unregister_li_request(sb); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + ext4_quota_off_umount(sb); flush_workqueue(sbi->rsv_conversion_wq); destroy_workqueue(sbi->rsv_conversion_wq); @@ -1155,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, return res; } + res = dquot_initialize(inode); + if (res) + return res; retry: handle = ext4_journal_start(inode, EXT4_HT_MISC, ext4_jbd2_credits_xattr(inode)); @@ -1208,7 +1230,7 @@ static const struct fscrypt_operations ext4_cryptops = { #endif #ifdef CONFIG_QUOTA -static char *quotatypes[] = INITQFNAMES; +static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) static int ext4_write_dquot(struct dquot *dquot); @@ -1218,7 +1240,6 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, const struct path *path); -static int ext4_quota_off(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -1422,7 +1443,8 @@ static ext4_fsblk_t get_sb_block(void **data) } #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) -static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" +static const char deprecated_msg[] = + "Mount option \"%s\" will be removed by %s\n" "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; #ifdef CONFIG_QUOTA @@ -2132,7 +2154,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) return 0; size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = ext4_kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(size, GFP_KERNEL); if (!new_groups) { ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", size / (int) sizeof(struct flex_groups)); @@ -3866,7 +3888,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = ext4_kvmalloc(db_count * + sbi->s_group_desc = kvmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { @@ -3877,6 +3899,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) bgl_lock_init(sbi->s_blockgroup_lock); + /* Pre-read the descriptors into the buffer cache */ + for (i = 0; i < db_count; i++) { + block = descriptor_loc(sb, logical_sb_block, i); + sb_breadahead(sb, block); + } + for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); @@ -4629,7 +4657,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (sync) { unlock_buffer(sbh); error = __sync_dirty_buffer(sbh, - test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC); + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); if (error) return error; @@ -5344,11 +5372,33 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; } + lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) + if (err) { lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_NORMAL); + } else { + struct inode *inode = d_inode(path->dentry); + handle_t *handle; + + /* + * Set inode flags to prevent userspace from messing with quota + * files. If this fails, we return success anyway since quotas + * are already enabled and this is not a hard failure. + */ + inode_lock(inode); + handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); + if (IS_ERR(handle)) + goto unlock_inode; + EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + unlock_inode: + inode_unlock(inode); + } return err; } @@ -5422,24 +5472,40 @@ static int ext4_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; handle_t *handle; + int err; /* Force all delayed allocation blocks to be allocated. * Caller already holds s_umount sem */ if (test_opt(sb, DELALLOC)) sync_filesystem(sb); - if (!inode) + if (!inode || !igrab(inode)) goto out; - /* Update modification times of quota files when userspace can - * start looking at them */ + err = dquot_quota_off(sb, type); + if (err || ext4_has_feature_quota(sb)) + goto out_put; + + inode_lock(inode); + /* + * Update modification times of quota files when userspace can + * start looking at them. If we fail, we return success anyway since + * this is not a hard failure and quotas are already disabled. + */ handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); if (IS_ERR(handle)) - goto out; + goto out_unlock; + EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); - +out_unlock: + inode_unlock(inode); +out_put: + lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL); + iput(inode); + return err; out: return dquot_quota_off(sb, type); } diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 42145be5c6b4dea4258b35586693ae336f047d25..d74dc5f81a0483b2bfeab1ab1216870ce8d25506 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -34,7 +34,7 @@ typedef enum { ptr_ext4_super_block_offset, } attr_ptr_t; -static const char *proc_dirname = "fs/ext4"; +static const char proc_dirname[] = "fs/ext4"; static struct proc_dir_entry *ext4_proc_root; struct ext4_attr { @@ -375,7 +375,7 @@ static const struct file_operations ext4_seq_##name##_fops = { \ PROC_FILE_SHOW_DEFN(es_shrinker_info); PROC_FILE_SHOW_DEFN(options); -static struct ext4_proc_files { +static const struct ext4_proc_files { const char *name; const struct file_operations *fops; } proc_files[] = { @@ -388,7 +388,7 @@ static struct ext4_proc_files { int ext4_register_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_proc_files *p; + const struct ext4_proc_files *p; int err; sbi->s_kobj.kset = &ext4_kset; @@ -412,7 +412,7 @@ int ext4_register_sysfs(struct super_block *sb) void ext4_unregister_sysfs(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_proc_files *p; + const struct ext4_proc_files *p; if (sbi->s_proc) { for (p = proc_files; p->name; p++) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 996e7900d4c8ea2d16f65f47e3f1082552b2fc66..5d3c2536641c7db6c1fa5b19b4ad10e630e09986 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -78,10 +78,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *, struct mb_cache_entry **); static void ext4_xattr_rehash(struct ext4_xattr_header *, struct ext4_xattr_entry *); -static int ext4_xattr_list(struct dentry *dentry, char *buffer, - size_t buffer_size); -static const struct xattr_handler *ext4_xattr_handler_map[] = { +static const struct xattr_handler * const ext4_xattr_handler_map[] = { [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, #ifdef CONFIG_EXT4_FS_POSIX_ACL [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, @@ -163,20 +161,9 @@ ext4_xattr_handler(int name_index) return handler; } -/* - * Inode operation listxattr() - * - * d_inode(dentry)->i_mutex: don't care - */ -ssize_t -ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - return ext4_xattr_list(dentry, buffer, size); -} - static int -ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, - void *value_start) +ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, + void *value_start) { struct ext4_xattr_entry *e = entry; @@ -230,8 +217,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) return -EFSCORRUPTED; if (!ext4_xattr_block_csum_verify(inode, bh)) return -EFSBADCRC; - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, - bh->b_data); + error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size, + bh->b_data); if (!error) set_buffer_verified(bh); return error; @@ -246,7 +233,7 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, if (end - (void *)header < sizeof(*header) + sizeof(u32) || (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC))) goto errout; - error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); + error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header)); errout: if (error) __ext4_error_inode(inode, function, line, 0, @@ -257,20 +244,9 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, #define xattr_check_inode(inode, header, end) \ __xattr_check_inode((inode), (header), (end), __func__, __LINE__) -static inline int -ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) -{ - size_t value_size = le32_to_cpu(entry->e_value_size); - - if (entry->e_value_block != 0 || value_size > size || - le16_to_cpu(entry->e_value_offs) + value_size > size) - return -EFSCORRUPTED; - return 0; -} - static int ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, - const char *name, size_t size, int sorted) + const char *name, int sorted) { struct ext4_xattr_entry *entry; size_t name_len; @@ -290,8 +266,6 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, break; } *pentry = entry; - if (!cmp && ext4_xattr_check_entry(entry, size)) - return -EFSCORRUPTED; return cmp ? -ENODATA : 0; } @@ -319,7 +293,6 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(inode, bh)) { -bad_block: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EFSCORRUPTED; @@ -327,9 +300,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, } ext4_xattr_cache_insert(ext4_mb_cache, bh); entry = BFIRST(bh); - error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); - if (error == -EFSCORRUPTED) - goto bad_block; + error = ext4_xattr_find_entry(&entry, name_index, name, 1); if (error) goto cleanup; size = le32_to_cpu(entry->e_value_size); @@ -366,13 +337,12 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, return error; raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); - entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; error = xattr_check_inode(inode, header, end); if (error) goto cleanup; - error = ext4_xattr_find_entry(&entry, name_index, name, - end - (void *)entry, 0); + entry = IFIRST(header); + error = ext4_xattr_find_entry(&entry, name_index, name, 0); if (error) goto cleanup; size = le32_to_cpu(entry->e_value_size); @@ -519,7 +489,9 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) } /* - * ext4_xattr_list() + * Inode operation listxattr() + * + * d_inode(dentry)->i_rwsem: don't care * * Copy a list of attribute names into the buffer * provided, or compute the buffer size required. @@ -528,8 +500,8 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) * Returns a negative error number on failure, or the number of bytes * used / required on success. */ -static int -ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) +ssize_t +ext4_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { int ret, ret2; @@ -804,7 +776,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, bs->s.end = bs->bh->b_data + bs->bh->b_size; bs->s.here = bs->s.first; error = ext4_xattr_find_entry(&bs->s.here, i->name_index, - i->name, bs->bh->b_size, 1); + i->name, 1); if (error && error != -ENODATA) goto cleanup; bs->s.not_found = error; @@ -916,6 +888,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, else { u32 ref; + WARN_ON_ONCE(dquot_initialize_needed(inode)); + /* The old block is released after updating the inode. */ error = dquot_alloc_block(inode, @@ -982,6 +956,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, /* We need to allocate a new block */ ext4_fsblk_t goal, block; + WARN_ON_ONCE(dquot_initialize_needed(inode)); + goal = ext4_group_first_block_no(sb, EXT4_I(inode)->i_block_group); @@ -1076,8 +1052,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, return error; /* Find the named attribute. */ error = ext4_xattr_find_entry(&is->s.here, i->name_index, - i->name, is->s.end - - (void *)is->s.base, 0); + i->name, 0); if (error && error != -ENODATA) return error; is->s.not_found = error; @@ -1195,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, return -EINVAL; if (strlen(name) > 255) return -ERANGE; + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); @@ -1296,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, int error, retries = 0; int credits = ext4_jbd2_credits_xattr(inode); + error = dquot_initialize(inode); + if (error) + return error; retry: handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) { diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0339daf4ca02fac4090fc59783bbfa745bccc9eb..ea9c317b5916ee51610069c2c58cdb504f95969d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -275,10 +275,11 @@ static int f2fs_write_meta_pages(struct address_space *mapping, get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, META); + /* if locked failed, cp will flush dirty pages instead */ + if (!mutex_trylock(&sbi->cp_mutex)) + goto skip_write; - /* if mounting is failed, skip writing node pages */ - mutex_lock(&sbi->cp_mutex); + trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = sync_meta_pages(sbi, META, wbc->nr_to_write); mutex_unlock(&sbi->cp_mutex); @@ -567,7 +568,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) if (ni.blk_addr != NULL_ADDR) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", + "%s: orphan failed (ino=%x) by kernel, retry mount.", __func__, ino); return -EIO; } @@ -677,7 +678,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset >= blk_size) { + if (crc_offset > (blk_size - sizeof(__le32))) { f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; @@ -816,7 +817,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type) return; set_inode_flag(inode, flag); - list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); + if (!f2fs_is_volatile_file(inode)) + list_add_tail(&F2FS_I(inode)->dirty_list, + &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } @@ -941,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) return 0; } +static void __prepare_cp_block(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + nid_t last_nid = nm_i->next_scan_nid; + + next_free_nid(sbi, &last_nid); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -964,21 +980,26 @@ static int block_operations(struct f2fs_sb_info *sbi) err = sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; + cond_resched(); goto retry_flush_dents; } + /* + * POR: we should ensure that there are no dirty node pages + * until finishing nat/sit flush. inode->i_blocks can be updated. + */ + down_write(&sbi->node_change); + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) goto out; + cond_resched(); goto retry_flush_dents; } - /* - * POR: we should ensure that there are no dirty node pages - * until finishing nat/sit flush. - */ retry_flush_nodes: down_write(&sbi->node_write); @@ -986,11 +1007,20 @@ static int block_operations(struct f2fs_sb_info *sbi) up_write(&sbi->node_write); err = sync_node_pages(sbi, &wbc); if (err) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); goto out; } + cond_resched(); goto retry_flush_nodes; } + + /* + * sbi->node_change is used only for AIO write_begin path which produces + * dirty node blocks and some checkpoint values by block allocation. + */ + __prepare_cp_block(sbi); + up_write(&sbi->node_change); out: blk_finish_plug(&plug); return err; @@ -1024,16 +1054,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_lock(&sbi->cp_lock); - if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count > + if ((cpc->reason & CP_UMOUNT) && + le32_to_cpu(ckpt->cp_pack_total_block_count) > sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) disable_nat_bits(sbi, false); - if (cpc->reason == CP_UMOUNT) + if (cpc->reason & CP_TRIMMED) + __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + + if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); else __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - if (cpc->reason == CP_FASTBOOT) + if (cpc->reason & CP_FASTBOOT) __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); else __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); @@ -1057,7 +1091,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; - nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; @@ -1074,14 +1107,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return -EIO; } - next_free_nid(sbi, &last_nid); - /* * modify checkpoint * version number is already updated */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); - ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1100,10 +1130,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); } - ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); - ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); - ckpt->next_free_nid = cpu_to_le32(last_nid); - /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); spin_lock(&sbi->cp_lock); @@ -1143,7 +1169,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write nat bits */ if (enabled_nat_bits(sbi, cpc)) { __u64 cp_ver = cur_cp_version(ckpt); - unsigned int i; block_t blk; cp_ver |= ((__u64)crc32 << 32); @@ -1250,8 +1275,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || - (cpc->reason == CP_DISCARD && !sbi->discard_blks))) + ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || + ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1273,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_merged_bios(sbi); /* this is the case of multiple fstrims without any changes */ - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { if (!exist_trim_candidates(sbi, cpc)) { unblock_operations(sbi); goto out; @@ -1311,7 +1336,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); - if (cpc->reason == CP_RECOVERY) + if (cpc->reason & CP_RECOVERY) f2fs_msg(sbi->sb, KERN_NOTICE, "checkpoint: version = %llx", ckpt_ver); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1602b4bccae61e8ac9fcff3d4810b953294773e2..7c0f6bdf817d4370b74b36de9096fad73c75fbd4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - io->fio.op_flags = REQ_META | REQ_PRIO; + io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; if (!test_opt(sbi, NOBARRIER)) io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } @@ -341,7 +341,7 @@ void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) /* * Fill the locked page with data located in the block address. - * Return unlocked page. + * A caller needs to unlock the page on failure. */ int f2fs_submit_page_bio(struct f2fs_io_info *fio) { @@ -362,6 +362,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); __submit_bio(fio->sbi, bio, fio->type); + + if (!is_read_io(fio->op)) + inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); return 0; } @@ -787,6 +790,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } +static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +{ + if (flag == F2FS_GET_BLOCK_PRE_AIO) { + if (lock) + down_read(&sbi->node_change); + else + up_read(&sbi->node_change); + } else { + if (lock) + f2fs_lock_op(sbi); + else + f2fs_unlock_op(sbi); + } +} + /* * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with * f2fs_map_blocks structure. @@ -829,7 +847,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, next_dnode: if (create) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -939,7 +957,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, f2fs_put_dnode(&dn); if (create) { - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; @@ -948,7 +966,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, f2fs_put_dnode(&dn); unlock_out: if (create) { - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } out: @@ -1151,9 +1169,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping, for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { - prefetchw(&page->flags); if (pages) { page = list_last_entry(pages, struct page, lru); + + prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, @@ -1283,17 +1302,83 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } +static int encrypt_one_page(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + gfp_t gfp_flags = GFP_NOFS; + + if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) + return 0; + + /* wait for GCed encrypted page writeback */ + f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr); + +retry_encrypt: + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, + PAGE_SIZE, 0, fio->page->index, gfp_flags); + if (!IS_ERR(fio->encrypted_page)) + return 0; + + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_bios(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); +} + +static inline bool need_inplace_update(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) + return false; + if (is_cold_data(fio->page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + return false; + + return need_inplace_update_policy(inode, fio); +} + +static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) +{ + if (fio->old_blkaddr == NEW_ADDR) + return false; + if (fio->old_blkaddr == NULL_ADDR) + return false; + return true; +} + int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; + struct extent_info ei = {0,0,0}; + bool ipu_force = false; int err = 0; set_new_dnode(&dn, inode, NULL, NULL, 0); + if (need_inplace_update(fio) && + f2fs_lookup_extent_cache(inode, page->index, &ei)) { + fio->old_blkaddr = ei.blk + page->index - ei.fofs; + + if (valid_ipu_blkaddr(fio)) { + ipu_force = true; + fio->need_lock = false; + goto got_it; + } + } + + if (fio->need_lock) + f2fs_lock_op(fio->sbi); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) - return err; + goto out; fio->old_blkaddr = dn.data_blkaddr; @@ -1302,31 +1387,10 @@ int do_write_data_page(struct f2fs_io_info *fio) ClearPageUptodate(page); goto out_writepage; } - - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - gfp_t gfp_flags = GFP_NOFS; - - /* wait for GCed encrypted page writeback */ - f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), - fio->old_blkaddr); -retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - PAGE_SIZE, 0, - fio->page->index, - gfp_flags); - if (IS_ERR(fio->encrypted_page)) { - err = PTR_ERR(fio->encrypted_page); - if (err == -ENOMEM) { - /* flush pending ios and wait for a while */ - f2fs_flush_merged_bios(F2FS_I_SB(inode)); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - err = 0; - goto retry_encrypt; - } - goto out_writepage; - } - } +got_it: + err = encrypt_one_page(fio); + if (err) + goto out_writepage; set_page_writeback(page); @@ -1334,22 +1398,27 @@ int do_write_data_page(struct f2fs_io_info *fio) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(fio->old_blkaddr != NEW_ADDR && - !is_cold_data(page) && - !IS_ATOMIC_WRITTEN_PAGE(page) && - need_inplace_update(inode))) { - rewrite_data_page(fio); + if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { + f2fs_put_dnode(&dn); + if (fio->need_lock) + f2fs_unlock_op(fio->sbi); + err = rewrite_data_page(fio); + trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); - trace_f2fs_do_write_data_page(page, IPU); - } else { - write_data_page(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + return err; } + + /* LFS mode write path */ + write_data_page(&dn, fio); + trace_f2fs_do_write_data_page(page, OPU); + set_inode_flag(inode, FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); +out: + if (fio->need_lock) + f2fs_unlock_op(fio->sbi); return err; } @@ -1370,9 +1439,11 @@ static int __write_data_page(struct page *page, bool *submitted, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, .submitted = false, + .need_lock = true, }; trace_f2fs_writepage(page, DATA); @@ -1408,6 +1479,7 @@ static int __write_data_page(struct page *page, bool *submitted, /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + fio.need_lock = false; err = do_write_data_page(&fio); goto done; } @@ -1416,6 +1488,8 @@ static int __write_data_page(struct page *page, bool *submitted, need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0, 0)) goto redirty_out; + else + set_inode_flag(inode, FI_HOT_DATA); err = -EAGAIN; if (f2fs_has_inline_data(inode)) { @@ -1423,12 +1497,12 @@ static int __write_data_page(struct page *page, bool *submitted, if (!err) goto out; } - f2fs_lock_op(sbi); + if (err == -EAGAIN) err = do_write_data_page(&fio); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - f2fs_unlock_op(sbi); + done: if (err && err != -ENOENT) goto redirty_out; @@ -1441,12 +1515,14 @@ static int __write_data_page(struct page *page, bool *submitted, if (wbc->for_reclaim) { f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, DATA, WRITE); + clear_inode_flag(inode, FI_HOT_DATA); remove_dirty_inode(inode); submitted = NULL; } unlock_page(page); - f2fs_balance_fs(sbi, need_balance_fs); + if (!S_ISDIR(inode->i_mode)) + f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); @@ -1495,6 +1571,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pagevec_init(&pvec, 0); + if (get_dirty_pages(mapping->host) <= + SM_I(F2FS_M_SB(mapping))->min_hot_blocks) + set_inode_flag(mapping->host, FI_HOT_DATA); + else + clear_inode_flag(mapping->host, FI_HOT_DATA); + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -1580,8 +1662,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping, last_idx = page->index; } - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) { + /* give a priority to WB_SYNC threads */ + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || + --wbc->nr_to_write <= 0) && + wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } @@ -1637,9 +1721,18 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_inc(&sbi->wb_sync_req); + else if (atomic_read(&sbi->wb_sync_req)) + goto skip_write; + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc); blk_finish_plug(&plug); + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_dec(&sbi->wb_sync_req); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. @@ -1687,7 +1780,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); locked = true; } restart: @@ -1723,7 +1816,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, + true); locked = true; goto restart; } @@ -1737,7 +1831,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, f2fs_put_dnode(&dn); unlock_out: if (locked) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); return err; } @@ -1951,7 +2045,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) - return; + return drop_inmem_page(inode, page); set_page_private(page, 0); ClearPagePrivate(page); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ee2d0a485fc3478fc5f93b5b85c6dad0431e8ea0..87f449845f5f9a8010ee13c40548fb6620802872 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -51,15 +51,26 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); + si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); - if (SM_I(sbi) && SM_I(sbi)->fcc_info) - si->nr_flush = - atomic_read(&SM_I(sbi)->fcc_info->submit_flush); - if (SM_I(sbi) && SM_I(sbi)->dcc_info) - si->nr_discard = - atomic_read(&SM_I(sbi)->dcc_info->submit_discard); + if (SM_I(sbi) && SM_I(sbi)->fcc_info) { + si->nr_flushed = + atomic_read(&SM_I(sbi)->fcc_info->issued_flush); + si->nr_flushing = + atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + } + if (SM_I(sbi) && SM_I(sbi)->dcc_info) { + si->nr_discarded = + atomic_read(&SM_I(sbi)->dcc_info->issued_discard); + si->nr_discarding = + atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + si->nr_discard_cmd = + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; + } si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -86,6 +97,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -99,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); si->curseg[i] = curseg->segno; - si->cursec[i] = curseg->segno / sbi->segs_per_sec; - si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); + si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } for (i = 0; i < 2; i++) { @@ -124,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; + blks_per_sec = BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -156,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi) if (si->base_mem) goto get_cache; - si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + /* build stat */ + si->base_mem = sizeof(struct f2fs_stat_info); + + /* build superblock */ + si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; @@ -208,8 +224,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build merge flush thread */ if (SM_I(sbi)->fcc_info) si->cache_mem += sizeof(struct flush_cmd_control); - if (SM_I(sbi)->dcc_info) + if (SM_I(sbi)->dcc_info) { si->cache_mem += sizeof(struct discard_cmd_control); + si->cache_mem += sizeof(struct discard_cmd) * + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + } /* free nids */ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + @@ -330,11 +349,16 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n", + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, - si->nr_flush, si->nr_discard); - seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n", - si->inmem_pages, si->aw_cnt, si->max_aw_cnt); + si->nr_flushing, si->nr_flushed, + si->nr_discarding, si->nr_discarded, + si->nr_discard_cmd, si->undiscard_blks); + seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " + "volatile IO: %4d (Max. %4d)\n", + si->inmem_pages, si->aw_cnt, si->max_aw_cnt, + si->vw_cnt, si->max_vw_cnt); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", @@ -347,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); - seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n", - si->free_nids, si->alloc_nids); + seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", + si->free_nids, si->avail_nids, si->alloc_nids); seq_puts(s, "\nDistribution of User Blocks:"); seq_puts(s, " [ valid | invalid | free ]\n"); seq_puts(s, " ["); @@ -434,7 +458,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inplace_count, 0); atomic_set(&sbi->aw_cnt, 0); + atomic_set(&sbi->vw_cnt, 0); atomic_set(&sbi->max_aw_cnt, 0); + atomic_set(&sbi->max_vw_cnt, 0); mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 8d5c62b07b283f53e90ded2366c8bb9375409fa2..94756f55a97e7d86052cdb49ce79fcc131738cfd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; @@ -111,8 +111,6 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, struct f2fs_dir_entry *de; unsigned long bit_pos = 0; int max_len = 0; - struct fscrypt_str de_name = FSTR_INIT(NULL, 0); - struct fscrypt_str *name = &fname->disk_name; if (max_slots) *max_slots = 0; @@ -130,17 +128,9 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname, continue; } - /* encrypted case */ - de_name.name = d->filename[bit_pos]; - de_name.len = le16_to_cpu(de->name_len); - - /* show encrypted name */ - if (fname->hash) { - if (de->hash_code == cpu_to_le32(fname->hash)) - goto found; - } else if (de_name.len == name->len && - de->hash_code == namehash && - !memcmp(de_name.name, name->name, name->len)) + if (de->hash_code == namehash && + fscrypt_match_name(fname, d->filename[bit_pos], + le16_to_cpu(de->name_len))) goto found; if (max_slots && max_len > *max_slots) @@ -170,12 +160,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct f2fs_dir_entry *de = NULL; bool room = false; int max_slots; - f2fs_hash_t namehash; - - if(fname->hash) - namehash = cpu_to_le32(fname->hash); - else - namehash = f2fs_dentry_hash(&name); + f2fs_hash_t namehash = f2fs_dentry_hash(&name, fname); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); @@ -207,13 +192,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - /* This is to increase the speed of f2fs_create */ - if (!de && room) { - F2FS_I(dir)->task = current; - if (F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; - } + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; } return de; @@ -254,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, break; } out: + /* This is to increase the speed of f2fs_create */ + if (!de) + F2FS_I(dir)->task = current; return de; } @@ -337,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, struct inode *to, - const struct qstr *name) -{ - struct page *page; - - if (file_enc_name(to)) - return 0; - - page = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); - - init_dent_inode(name, page); - f2fs_put_page(page, 1); - - return 0; -} - void do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { @@ -384,7 +350,7 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); @@ -438,8 +404,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, set_cold_node(inode, page); } - if (new_name) + if (new_name) { init_dent_inode(new_name, page); + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); + } /* * This file should be checkpointed during fsync. @@ -542,7 +511,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(new_name); + dentry_hash = f2fs_dentry_hash(new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -599,11 +568,9 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -911,7 +878,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) dentry_blk = kmap(dentry_page); - make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index c6934f014e0f1d9a458a6489422612a6484fe198..2f98d70397013317c393257c3a957f4fa12e1c00 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -18,6 +18,179 @@ #include "node.h" #include +static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, + unsigned int ofs) +{ + if (cached_re) { + if (cached_re->ofs <= ofs && + cached_re->ofs + cached_re->len > ofs) { + return cached_re; + } + } + return NULL; +} + +static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, + unsigned int ofs) +{ + struct rb_node *node = root->rb_node; + struct rb_entry *re; + + while (node) { + re = rb_entry(node, struct rb_entry, rb_node); + + if (ofs < re->ofs) + node = node->rb_left; + else if (ofs >= re->ofs + re->len) + node = node->rb_right; + else + return re; + } + return NULL; +} + +struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs) +{ + struct rb_entry *re; + + re = __lookup_rb_tree_fast(cached_re, ofs); + if (!re) + return __lookup_rb_tree_slow(root, ofs); + + return re; +} + +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs) +{ + struct rb_node **p = &root->rb_node; + struct rb_entry *re; + + while (*p) { + *parent = *p; + re = rb_entry(*parent, struct rb_entry, rb_node); + + if (ofs < re->ofs) + p = &(*p)->rb_left; + else if (ofs >= re->ofs + re->len) + p = &(*p)->rb_right; + else + f2fs_bug_on(sbi, 1); + } + + return p; +} + +/* + * lookup rb entry in position of @ofs in rb-tree, + * if hit, return the entry, otherwise, return NULL + * @prev_ex: extent before ofs + * @next_ex: extent after ofs + * @insert_p: insert point for new extent at ofs + * in order to simpfy the insertion after. + * tree must stay unchanged between lookup and insertion. + */ +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, + unsigned int ofs, + struct rb_entry **prev_entry, + struct rb_entry **next_entry, + struct rb_node ***insert_p, + struct rb_node **insert_parent, + bool force) +{ + struct rb_node **pnode = &root->rb_node; + struct rb_node *parent = NULL, *tmp_node; + struct rb_entry *re = cached_re; + + *insert_p = NULL; + *insert_parent = NULL; + *prev_entry = NULL; + *next_entry = NULL; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + if (re) { + if (re->ofs <= ofs && re->ofs + re->len > ofs) + goto lookup_neighbors; + } + + while (*pnode) { + parent = *pnode; + re = rb_entry(*pnode, struct rb_entry, rb_node); + + if (ofs < re->ofs) + pnode = &(*pnode)->rb_left; + else if (ofs >= re->ofs + re->len) + pnode = &(*pnode)->rb_right; + else + goto lookup_neighbors; + } + + *insert_p = pnode; + *insert_parent = parent; + + re = rb_entry(parent, struct rb_entry, rb_node); + tmp_node = parent; + if (parent && ofs > re->ofs) + tmp_node = rb_next(parent); + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + + tmp_node = parent; + if (parent && ofs < re->ofs) + tmp_node = rb_prev(parent); + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + return NULL; + +lookup_neighbors: + if (ofs == re->ofs || force) { + /* lookup prev node for merging backward later */ + tmp_node = rb_prev(&re->rb_node); + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + } + if (ofs == re->ofs + re->len - 1 || force) { + /* lookup next node for merging frontward later */ + tmp_node = rb_next(&re->rb_node); + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + } + return re; +} + +bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root *root) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct rb_node *cur = rb_first(root), *next; + struct rb_entry *cur_re, *next_re; + + if (!cur) + return true; + + while (cur) { + next = rb_next(cur); + if (!next) + return true; + + cur_re = rb_entry(cur, struct rb_entry, rb_node); + next_re = rb_entry(next, struct rb_entry, rb_node); + + if (cur_re->ofs + cur_re->len > next_re->ofs) { + f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " + "cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); + return false; + } + + cur = next; + } +#endif + return true; +} + static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab; @@ -102,36 +275,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) return et; } -static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, unsigned int fofs) -{ - struct rb_node *node = et->root.rb_node; - struct extent_node *en = et->cached_en; - - if (en) { - struct extent_info *cei = &en->ei; - - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) { - stat_inc_cached_node_hit(sbi); - return en; - } - } - - while (node) { - en = rb_entry(node, struct extent_node, rb_node); - - if (fofs < en->ei.fofs) { - node = node->rb_left; - } else if (fofs >= en->ei.fofs + en->ei.len) { - node = node->rb_right; - } else { - stat_inc_rbtree_node_hit(sbi); - return en; - } - } - return NULL; -} - static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei) { @@ -237,17 +380,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } - en = __lookup_extent_tree(sbi, et, pgofs); - if (en) { - *ei = en->ei; - spin_lock(&sbi->extent_lock); - if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); - et->cached_en = en; - } - spin_unlock(&sbi->extent_lock); - ret = true; + en = (struct extent_node *)__lookup_rb_tree(&et->root, + (struct rb_entry *)et->cached_en, pgofs); + if (!en) + goto out; + + if (en == et->cached_en) + stat_inc_cached_node_hit(sbi); + else + stat_inc_rbtree_node_hit(sbi); + + *ei = en->ei; + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) { + list_move_tail(&en->list, &sbi->extent_list); + et->cached_en = en; } + spin_unlock(&sbi->extent_lock); + ret = true; out: stat_inc_total_hit(sbi); read_unlock(&et->lock); @@ -256,83 +406,6 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, return ret; } - -/* - * lookup extent at @fofs, if hit, return the extent - * if not, return NULL and - * @prev_ex: extent before fofs - * @next_ex: extent after fofs - * @insert_p: insert point for new extent at fofs - * in order to simpfy the insertion after. - * tree must stay unchanged between lookup and insertion. - */ -static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et, - unsigned int fofs, - struct extent_node **prev_ex, - struct extent_node **next_ex, - struct rb_node ***insert_p, - struct rb_node **insert_parent) -{ - struct rb_node **pnode = &et->root.rb_node; - struct rb_node *parent = NULL, *tmp_node; - struct extent_node *en = et->cached_en; - - *insert_p = NULL; - *insert_parent = NULL; - *prev_ex = NULL; - *next_ex = NULL; - - if (RB_EMPTY_ROOT(&et->root)) - return NULL; - - if (en) { - struct extent_info *cei = &en->ei; - - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) - goto lookup_neighbors; - } - - while (*pnode) { - parent = *pnode; - en = rb_entry(*pnode, struct extent_node, rb_node); - - if (fofs < en->ei.fofs) - pnode = &(*pnode)->rb_left; - else if (fofs >= en->ei.fofs + en->ei.len) - pnode = &(*pnode)->rb_right; - else - goto lookup_neighbors; - } - - *insert_p = pnode; - *insert_parent = parent; - - en = rb_entry(parent, struct extent_node, rb_node); - tmp_node = parent; - if (parent && fofs > en->ei.fofs) - tmp_node = rb_next(parent); - *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - - tmp_node = parent; - if (parent && fofs < en->ei.fofs) - tmp_node = rb_prev(parent); - *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - return NULL; - -lookup_neighbors: - if (fofs == en->ei.fofs) { - /* lookup prev node for merging backward later */ - tmp_node = rb_prev(&en->rb_node); - *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - } - if (fofs == en->ei.fofs + en->ei.len - 1) { - /* lookup next node for merging frontward later */ - tmp_node = rb_next(&en->rb_node); - *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - } - return en; -} - static struct extent_node *__try_merge_extent_node(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, @@ -387,17 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, goto do_insert; } - while (*p) { - parent = *p; - en = rb_entry(parent, struct extent_node, rb_node); - - if (ei->fofs < en->ei.fofs) - p = &(*p)->rb_left; - else if (ei->fofs >= en->ei.fofs + en->ei.len) - p = &(*p)->rb_right; - else - f2fs_bug_on(sbi, 1); - } + p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); do_insert: en = __attach_extent_node(sbi, et, ei, parent, p); if (!en) @@ -447,8 +510,11 @@ static void f2fs_update_extent_tree_range(struct inode *inode, __drop_largest_extent(inode, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ - en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, - &insert_p, &insert_parent); + en = (struct extent_node *)__lookup_rb_tree_ret(&et->root, + (struct rb_entry *)et->cached_en, fofs, + (struct rb_entry **)&prev_en, + (struct rb_entry **)&next_en, + &insert_p, &insert_parent, false); if (!en) en = next_en; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a6e115562f62edca5b60ee4c833e889a904c202..fd2e651bad6d3c620b83f9dc32e25dfad22a4e7f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -50,6 +50,7 @@ enum { FAULT_BLOCK, FAULT_DIR_DEPTH, FAULT_EVICT_INODE, + FAULT_TRUNCATE, FAULT_IO, FAULT_CHECKPOINT, FAULT_MAX, @@ -62,7 +63,7 @@ struct f2fs_fault_info { }; extern char *fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type))) +#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif /* @@ -88,9 +89,9 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_ADAPTIVE 0x00020000 #define F2FS_MOUNT_LFS 0x00040000 -#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) +#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -124,22 +125,20 @@ enum { SIT_BITMAP }; -enum { - CP_UMOUNT, - CP_FASTBOOT, - CP_SYNC, - CP_RECOVERY, - CP_DISCARD, -}; +#define CP_UMOUNT 0x00000001 +#define CP_FASTBOOT 0x00000002 +#define CP_SYNC 0x00000004 +#define CP_RECOVERY 0x00000008 +#define CP_DISCARD 0x00000010 +#define CP_TRIMMED 0x00000020 #define DEF_BATCHED_TRIM_SECTIONS 2048 #define BATCHED_TRIM_SEGMENTS(sbi) \ - (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) + (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) -#define MAX_DISCARD_BLOCKS(sbi) \ - ((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec) -#define DISCARD_ISSUE_RATE 8 +#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) +#define DISCARD_ISSUE_RATE 8 #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ @@ -181,37 +180,63 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; -/* for the list of blockaddresses to be discarded */ +/* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ - block_t blkaddr; /* block address to be discarded */ - int len; /* # of consecutive blocks of the discard */ + block_t start_blkaddr; /* start blockaddr of current segment */ + unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* max discard pend list number */ +#define MAX_PLIST_NUM 512 +#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ + (MAX_PLIST_NUM - 1) : (blk_num - 1)) + enum { D_PREP, D_SUBMIT, D_DONE, }; +struct discard_info { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ +}; + struct discard_cmd { + struct rb_node rb_node; /* rb node located in rb-tree */ + union { + struct { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ + }; + struct discard_info di; /* discard info */ + + }; struct list_head list; /* command list */ struct completion wait; /* compleation */ - block_t lstart; /* logical start address */ - block_t len; /* length */ - struct bio *bio; /* bio */ - int state; /* state */ + struct block_device *bdev; /* bdev */ + unsigned short ref; /* reference count */ + unsigned char state; /* state */ + int error; /* bio error */ }; struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ - struct list_head discard_entry_list; /* 4KB discard entry list */ - int nr_discards; /* # of discards in the list */ - struct list_head discard_cmd_list; /* discard cmd list */ + struct list_head entry_list; /* 4KB discard entry list */ + struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ + struct list_head wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; - int max_discards; /* max. discards to be issued */ - atomic_t submit_discard; /* # of issued discard */ + unsigned int nr_discards; /* # of discards in the list */ + unsigned int max_discards; /* max. discards to be issued */ + unsigned int undiscard_blks; /* # of undiscard blocks */ + atomic_t issued_discard; /* # of issued discard */ + atomic_t issing_discard; /* # of issing discard */ + atomic_t discard_cmd_cnt; /* # of cached cmd count */ + struct rb_root root; /* root of discard rb-tree */ }; /* for the list of fsync inodes, used only during recovery */ @@ -222,13 +247,13 @@ struct fsync_inode_entry { block_t last_dentry; /* block address locating the last dentry */ }; -#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) -#define sits_in_cursum(jnl) (le16_to_cpu(jnl->n_sits)) +#define nats_in_cursum(jnl) (le16_to_cpu((jnl)->n_nats)) +#define sits_in_cursum(jnl) (le16_to_cpu((jnl)->n_sits)) -#define nat_in_journal(jnl, i) (jnl->nat_j.entries[i].ne) -#define nid_in_journal(jnl, i) (jnl->nat_j.entries[i].nid) -#define sit_in_journal(jnl, i) (jnl->sit_j.entries[i].se) -#define segno_in_journal(jnl, i) (jnl->sit_j.entries[i].segno) +#define nat_in_journal(jnl, i) ((jnl)->nat_j.entries[i].ne) +#define nid_in_journal(jnl, i) ((jnl)->nat_j.entries[i].nid) +#define sit_in_journal(jnl, i) ((jnl)->sit_j.entries[i].se) +#define segno_in_journal(jnl, i) ((jnl)->sit_j.entries[i].segno) #define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl)) #define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl)) @@ -270,11 +295,14 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) -#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) +#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) -#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) +#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ + struct f2fs_defragment) #define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ struct f2fs_move_range) +#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ + struct f2fs_flush_device) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -311,6 +339,11 @@ struct f2fs_move_range { u64 len; /* size to move */ }; +struct f2fs_flush_device { + u32 dev_num; /* device number to flush */ + u32 segments; /* # of segments to flush */ +}; + /* * For INODE and NODE manager */ @@ -323,26 +356,24 @@ struct f2fs_dentry_ptr { int max; }; -static inline void make_dentry_ptr(struct inode *inode, - struct f2fs_dentry_ptr *d, void *src, int type) +static inline void make_dentry_ptr_block(struct inode *inode, + struct f2fs_dentry_ptr *d, struct f2fs_dentry_block *t) { d->inode = inode; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; +} - if (type == 1) { - struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; - - d->max = NR_DENTRY_IN_BLOCK; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; - } else { - struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; - - d->max = NR_INLINE_DENTRY; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; - } +static inline void make_dentry_ptr_inline(struct inode *inode, + struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t) +{ + d->inode = inode; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; } /* @@ -374,16 +405,30 @@ enum { /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +struct rb_entry { + struct rb_node rb_node; /* rb node located in rb-tree */ + unsigned int ofs; /* start offset of the entry */ + unsigned int len; /* length of the entry */ +}; + struct extent_info { unsigned int fofs; /* start offset in a file */ - u32 blk; /* start block address of the extent */ unsigned int len; /* length of the extent */ + u32 blk; /* start block address of the extent */ }; struct extent_node { - struct rb_node rb_node; /* rb node located in rb-tree */ + struct rb_node rb_node; + union { + struct { + unsigned int fofs; + unsigned int len; + u32 blk; + }; + struct extent_info ei; /* extent info */ + + }; struct list_head list; /* node in global extent list of sbi */ - struct extent_info ei; /* extent info */ struct extent_tree *et; /* extent tree pointer */ }; @@ -500,6 +545,24 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->len = len; } +static inline bool __is_discard_mergeable(struct discard_info *back, + struct discard_info *front) +{ + return back->lstart + back->len == front->lstart; +} + +static inline bool __is_discard_back_mergeable(struct discard_info *cur, + struct discard_info *back) +{ + return __is_discard_mergeable(back, cur); +} + +static inline bool __is_discard_front_mergeable(struct discard_info *cur, + struct discard_info *front) +{ + return __is_discard_mergeable(cur, front); +} + static inline bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front) { @@ -562,7 +625,6 @@ struct f2fs_nm_info { unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ - spinlock_t free_nid_lock; /* protect updating of nid count */ /* for checkpoint */ char *nat_bitmap; /* NAT bitmap pointer */ @@ -641,7 +703,8 @@ struct flush_cmd { struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - atomic_t submit_flush; /* # of issued flushes */ + atomic_t issued_flush; /* # of issued flushes */ + atomic_t issing_flush; /* # of issing flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -672,6 +735,7 @@ struct f2fs_sm_info { unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_hot_blocks; /* threshold for hot block allocation */ /* for flush command control */ struct flush_cmd_control *fcc_info; @@ -722,6 +786,7 @@ enum page_type { META_FLUSH, INMEM, /* the below types are used by tracepoints only. */ INMEM_DROP, + INMEM_INVALIDATE, INMEM_REVOKE, IPU, OPU, @@ -737,9 +802,10 @@ struct f2fs_io_info { struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ bool submitted; /* indicate IO submission */ + bool need_lock; /* indicate we need to lock cp_rwsem */ }; -#define is_read_io(rw) (rw == READ) +#define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ struct bio *bio; /* bios to merge */ @@ -827,6 +893,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct rw_semaphore node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -879,6 +946,9 @@ struct f2fs_sb_info { /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* writeback control */ + atomic_t wb_sync_req; /* count # of WB_SYNC threads */ + /* valid inode count */ struct percpu_counter total_valid_inode_count; @@ -912,11 +982,12 @@ struct f2fs_sb_info { atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ atomic_t aw_cnt; /* # of atomic writes */ + atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ + atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif - unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ /* For sysfs suppport */ @@ -971,8 +1042,8 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) - \ - s->sectors_written_start) >> 1) +(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \ + (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { @@ -1007,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, { SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver); u32 *ctx = (u32 *)shash_desc_ctx(shash); + u32 retval; int err; shash->tfm = sbi->s_chksum_driver; @@ -1016,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, err = crypto_shash_update(shash, address, length); BUG_ON(err); - return *ctx; + retval = *ctx; + barrier_data(ctx); + return retval; } static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, @@ -1193,7 +1267,7 @@ static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, { bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set; + return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set; } static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) @@ -1229,7 +1303,7 @@ static inline int __get_cp_reason(struct f2fs_sb_info *sbi) static inline bool __remain_node_summaries(int reason) { - return (reason == CP_UMOUNT || reason == CP_FASTBOOT); + return (reason & (CP_UMOUNT | CP_FASTBOOT)); } static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) @@ -1707,6 +1781,7 @@ enum { FI_DO_DEFRAG, /* indicate defragment is running */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -1869,12 +1944,6 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DATA); } -static inline void f2fs_clear_inline_inode(struct inode *inode) -{ - clear_inode_flag(inode, FI_INLINE_DATA); - clear_inode_flag(inode, FI_DATA_EXIST); -} - static inline int f2fs_exist_data(struct inode *inode) { return is_inode_flag_set(inode, FI_DATA_EXIST); @@ -2005,36 +2074,10 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kmalloc(size, flags); } -static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kmalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags, PAGE_KERNEL); - return ret; -} - -static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) -{ - void *ret; - - ret = kzalloc(size, flags | __GFP_NOWARN); - if (!ret) - ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); - return ret; -} - #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) -/* get offset of first page in next direct node */ -#define PGOFS_OF_NEXT_DNODE(pgofs, inode) \ - ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) : \ - (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ - ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) - /* * file.c */ @@ -2096,8 +2139,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct page **page); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); -int update_dent_inode(struct inode *inode, struct inode *to, - const struct qstr *name); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); @@ -2133,7 +2174,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info); +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, + struct fscrypt_name *fname); /* * node.c @@ -2184,6 +2226,7 @@ void destroy_node_manager_caches(void); */ void register_inmem_page(struct inode *inode, struct page *page); void drop_inmem_pages(struct inode *inode); +void drop_inmem_page(struct inode *inode, struct page *page); int commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); @@ -2193,7 +2236,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); @@ -2205,7 +2248,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); void write_meta_page(struct f2fs_sb_info *sbi, struct page *page); void write_node_page(unsigned int nid, struct f2fs_io_info *fio); void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); -void rewrite_data_page(struct f2fs_io_info *fio); +int rewrite_data_page(struct f2fs_io_info *fio); void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr); @@ -2310,7 +2353,8 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, int start_gc_thread(struct f2fs_sb_info *sbi); void stop_gc_thread(struct f2fs_sb_info *sbi); block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background); +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, + unsigned int segno); void build_gc_manager(struct f2fs_sb_info *sbi); /* @@ -2334,11 +2378,15 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; int inmem_pages; unsigned int ndirty_dirs, ndirty_files, ndirty_all; - int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids; + int nats, dirty_nats, sits, dirty_sits; + int free_nids, avail_nids, alloc_nids; int total_count, utilization; - int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard; + int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_discard_cmd; + unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; - int aw_cnt, max_aw_cnt; + int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -2421,11 +2469,22 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ } while (0) +#define stat_inc_volatile_write(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->vw_cnt)) +#define stat_dec_volatile_write(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->vw_cnt)) +#define stat_update_max_volatile_write(inode) \ + do { \ + int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \ + int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \ + if (cur > max) \ + atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \ + } while (0) #define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) { \ + si->tot_segs++; \ + if ((type) == SUM_TYPE_DATA) { \ si->data_segs++; \ si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ } else { \ @@ -2435,14 +2494,14 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) } while (0) #define stat_inc_tot_blk_count(si, blks) \ - (si->tot_blks += (blks)) + ((si)->tot_blks += (blks)) #define stat_inc_data_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ - si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ + si->bg_data_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) \ @@ -2450,7 +2509,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ - si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ + si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) int f2fs_build_stats(struct f2fs_sb_info *sbi); @@ -2458,32 +2517,35 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi); int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else -#define stat_inc_cp_count(si) -#define stat_inc_bg_cp_count(si) -#define stat_inc_call_count(si) -#define stat_inc_bggc_count(si) -#define stat_inc_dirty_inode(sbi, type) -#define stat_dec_dirty_inode(sbi, type) -#define stat_inc_total_hit(sb) -#define stat_inc_rbtree_node_hit(sb) -#define stat_inc_largest_node_hit(sbi) -#define stat_inc_cached_node_hit(sbi) -#define stat_inc_inline_xattr(inode) -#define stat_dec_inline_xattr(inode) -#define stat_inc_inline_inode(inode) -#define stat_dec_inline_inode(inode) -#define stat_inc_inline_dir(inode) -#define stat_dec_inline_dir(inode) -#define stat_inc_atomic_write(inode) -#define stat_dec_atomic_write(inode) -#define stat_update_max_atomic_write(inode) -#define stat_inc_seg_type(sbi, curseg) -#define stat_inc_block_count(sbi, curseg) -#define stat_inc_inplace_blocks(sbi) -#define stat_inc_seg_count(sbi, type, gc_type) -#define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(sbi, blks, gc_type) -#define stat_inc_node_blk_count(sbi, blks, gc_type) +#define stat_inc_cp_count(si) do { } while (0) +#define stat_inc_bg_cp_count(si) do { } while (0) +#define stat_inc_call_count(si) do { } while (0) +#define stat_inc_bggc_count(si) do { } while (0) +#define stat_inc_dirty_inode(sbi, type) do { } while (0) +#define stat_dec_dirty_inode(sbi, type) do { } while (0) +#define stat_inc_total_hit(sb) do { } while (0) +#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_largest_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_inline_xattr(inode) do { } while (0) +#define stat_dec_inline_xattr(inode) do { } while (0) +#define stat_inc_inline_inode(inode) do { } while (0) +#define stat_dec_inline_inode(inode) do { } while (0) +#define stat_inc_inline_dir(inode) do { } while (0) +#define stat_dec_inline_dir(inode) do { } while (0) +#define stat_inc_atomic_write(inode) do { } while (0) +#define stat_dec_atomic_write(inode) do { } while (0) +#define stat_update_max_atomic_write(inode) do { } while (0) +#define stat_inc_volatile_write(inode) do { } while (0) +#define stat_dec_volatile_write(inode) do { } while (0) +#define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_seg_type(sbi, curseg) do { } while (0) +#define stat_inc_block_count(sbi, curseg) do { } while (0) +#define stat_inc_inplace_blocks(sbi) do { } while (0) +#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_tot_blk_count(si, blks) do { } while (0) +#define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) +#define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } @@ -2509,7 +2571,7 @@ extern struct kmem_cache *inode_entry_slab; bool f2fs_may_inline_data(struct inode *inode); bool f2fs_may_inline_dentry(struct inode *inode); void read_inline_data(struct page *page, struct page *ipage); -bool truncate_inline_inode(struct page *ipage, u64 from); +void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); @@ -2544,6 +2606,18 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ +struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs); +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs); +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs, + struct rb_entry **prev_entry, struct rb_entry **next_entry, + struct rb_node ***insert_p, struct rb_node **insert_parent, + bool force); +bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); void f2fs_drop_extent_tree(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5f7317875a6726b9ea968f81cbf401b2c8966f8f..61af721329fa28563cd7ed6cbe9e3a72a459c69c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -116,11 +116,6 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - if (update_dent_inode(inode, inode, &dentry->d_name)) { - dput(dentry); - return 0; - } - *pino = parent_ino(dentry); dput(dentry); return 1; @@ -528,7 +523,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, page = get_lock_data_page(inode, index, true); if (IS_ERR(page)) - return 0; + return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: f2fs_wait_on_page_writeback(page, DATA, true); zero_user(page, offset, PAGE_SIZE - offset); @@ -566,9 +561,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { - truncate_inline_inode(ipage, from); - if (from == 0) - clear_inode_flag(inode, FI_DATA_EXIST); + truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; @@ -617,6 +610,12 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { + f2fs_show_injection_info(FAULT_TRUNCATE); + return -EIO; + } +#endif /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -1012,11 +1011,11 @@ static int __exchange_data_block(struct inode *src_inode, while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1188,8 +1187,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - if (offset + len > new_size) - new_size = offset + len; new_size = max_t(loff_t, new_size, offset + len); } else { if (off_start) { @@ -1257,8 +1254,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) int ret = 0; new_size = i_size_read(inode) + len; - if (new_size > inode->i_sb->s_maxbytes) - return -EFBIG; + ret = inode_newsize_ok(inode, new_size); + if (ret) + return ret; if (offset >= i_size_read(inode)) return -EINVAL; @@ -1428,6 +1426,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); @@ -1474,10 +1473,10 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) if (ret) return ret; - flags = f2fs_mask_flags(inode->i_mode, flags); - inode_lock(inode); + flags = f2fs_mask_flags(inode->i_mode, flags); + oldflags = fi->i_flags; if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { @@ -1491,10 +1490,11 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) flags = flags & FS_FL_USER_MODIFIABLE; flags |= oldflags & ~FS_FL_USER_MODIFIABLE; fi->i_flags = flags; - inode_unlock(inode); inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); + + inode_unlock(inode); out: mnt_drop_write_file(filp); return ret; @@ -1515,6 +1515,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -1529,20 +1532,25 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) goto out; set_inode_flag(inode, FI_ATOMIC_FILE); + set_inode_flag(inode, FI_HOT_DATA); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) - goto out; + goto inc_stat; f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) + if (ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); -out: + goto out; + } + +inc_stat: stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); +out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1592,6 +1600,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -1605,6 +1616,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (ret) goto out; + stat_inc_volatile_write(inode); + stat_update_max_volatile_write(inode); + set_inode_flag(inode, FI_VOLATILE_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: @@ -1660,6 +1674,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } @@ -1841,7 +1856,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) mutex_lock(&sbi->gc_mutex); } - ret = f2fs_gc(sbi, sync, true); + ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); out: mnt_drop_write_file(filp); return ret; @@ -1879,13 +1894,12 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; - unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update(inode)) + if (need_inplace_update_policy(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -1943,7 +1957,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, map.m_lblk = pg_start; map.m_len = pg_end - pg_start; - sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; + sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can @@ -2020,42 +2034,40 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) return -EINVAL; - err = mnt_want_write_file(filp); - if (err) - return err; - - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } + if (f2fs_readonly(sbi->sb)) + return -EROFS; if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, - sizeof(range))) { - err = -EFAULT; - goto out; - } + sizeof(range))) + return -EFAULT; /* verify alignment of offset & size */ - if (range.start & (F2FS_BLKSIZE - 1) || - range.len & (F2FS_BLKSIZE - 1)) { - err = -EINVAL; - goto out; - } + if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) + return -EINVAL; + + if (unlikely((range.start + range.len) >> PAGE_SHIFT > + sbi->max_file_blocks)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) + return err; err = f2fs_defragment_range(sbi, filp, &range); + mnt_drop_write_file(filp); + f2fs_update_time(sbi, REQ_TIME); if (err < 0) - goto out; + return err; if (copy_to_user((struct f2fs_defragment __user *)arg, &range, sizeof(range))) - err = -EFAULT; -out: - mnt_drop_write_file(filp); - return err; + return -EFAULT; + + return 0; } static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, @@ -2189,6 +2201,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) range.pos_out, range.len); mnt_drop_write_file(filp); + if (err) + goto err_out; if (copy_to_user((struct f2fs_move_range __user *)arg, &range, sizeof(range))) @@ -2198,6 +2212,69 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) return err; } +static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct sit_info *sm = SIT_I(sbi); + unsigned int start_segno = 0, end_segno = 0; + unsigned int dev_start_segno = 0, dev_end_segno = 0; + struct f2fs_flush_device range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, + sizeof(range))) + return -EFAULT; + + if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || + sbi->segs_per_sec != 1) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Can't flush %u in %d for segs_per_sec %u != 1\n", + range.dev_num, sbi->s_ndevs, + sbi->segs_per_sec); + return -EINVAL; + } + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (range.dev_num != 0) + dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); + dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); + + start_segno = sm->last_victim[FLUSH_DEVICE]; + if (start_segno < dev_start_segno || start_segno >= dev_end_segno) + start_segno = dev_start_segno; + end_segno = min(start_segno + range.segments, dev_end_segno); + + while (start_segno < end_segno) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + sm->last_victim[GC_CB] = end_segno + 1; + sm->last_victim[GC_GREEDY] = end_segno + 1; + sm->last_victim[ALLOC_NEXT] = end_segno + 1; + ret = f2fs_gc(sbi, true, true, start_segno); + if (ret == -EAGAIN) + ret = 0; + else if (ret < 0) + break; + start_segno++; + } +out: + mnt_drop_write_file(filp); + return ret; +} + + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -2235,6 +2312,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_defragment(filp, arg); case F2FS_IOC_MOVE_RANGE: return f2fs_ioc_move_range(filp, arg); + case F2FS_IOC_FLUSH_DEVICE: + return f2fs_ioc_flush_device(filp, arg); default: return -ENOTTY; } @@ -2302,8 +2381,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GARBAGE_COLLECT: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: - break; case F2FS_IOC_MOVE_RANGE: + case F2FS_IOC_FLUSH_DEVICE: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 418fd988164677623cf0ad305d34f7855fb608dd..026522107ca3f843dee496b0f1ab5359c10eeb6a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -84,7 +84,7 @@ static int gc_thread_func(void *data) stat_inc_bggc_count(sbi); /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true)) + if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, @@ -172,7 +172,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - p->offset = sbi->last_victim[p->gc_mode]; + /* let's select beginning hot/small space first */ + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + p->offset = 0; + else + p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; } static unsigned int get_max_cost(struct f2fs_sb_info *sbi, @@ -182,7 +186,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return sbi->blocks_per_seg; if (p->gc_mode == GC_GREEDY) - return sbi->blocks_per_seg * p->ofs_unit; + return 2 * sbi->blocks_per_seg * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else /* No other gc_mode */ @@ -207,7 +211,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) continue; clear_bit(secno, dirty_i->victim_secmap); - return secno * sbi->segs_per_sec; + return GET_SEG_FROM_SEC(sbi, secno); } return NULL_SEGNO; } @@ -215,8 +219,8 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SECNO(sbi, segno); - unsigned int start = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; @@ -225,7 +229,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) for (i = 0; i < sbi->segs_per_sec; i++) mtime += get_seg_entry(sbi, start + i)->mtime; - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); mtime = div_u64(mtime, sbi->segs_per_sec); vblocks = div_u64(vblocks, sbi->segs_per_sec); @@ -248,7 +252,7 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int valid_blocks = - get_valid_blocks(sbi, segno, sbi->segs_per_sec); + get_valid_blocks(sbi, segno, true); return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? valid_blocks * 2 : valid_blocks; @@ -291,6 +295,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; unsigned int last_segment = MAIN_SEGS(sbi); @@ -304,10 +309,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_segno = NULL_SEGNO; p.min_cost = get_max_cost(sbi, &p); + if (*result != NULL_SEGNO) { + if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && + get_valid_blocks(sbi, *result, false) && + !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + p.min_segno = *result; + goto out; + } + if (p.max_search == 0) goto out; - last_victim = sbi->last_victim[p.gc_mode]; + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); if (p.min_segno != NULL_SEGNO) @@ -320,9 +333,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); if (segno >= last_segment) { - if (sbi->last_victim[p.gc_mode]) { - last_segment = sbi->last_victim[p.gc_mode]; - sbi->last_victim[p.gc_mode] = 0; + if (sm->last_victim[p.gc_mode]) { + last_segment = + sm->last_victim[p.gc_mode]; + sm->last_victim[p.gc_mode] = 0; p.offset = 0; continue; } @@ -339,7 +353,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, nsearched++; } - secno = GET_SECNO(sbi, segno); + secno = GET_SEC_FROM_SEG(sbi, segno); if (sec_usage_check(sbi, secno)) goto next; @@ -357,17 +371,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } next: if (nsearched >= p.max_search) { - if (!sbi->last_victim[p.gc_mode] && segno <= last_victim) - sbi->last_victim[p.gc_mode] = last_victim + 1; + if (!sm->last_victim[p.gc_mode] && segno <= last_victim) + sm->last_victim[p.gc_mode] = last_victim + 1; else - sbi->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); break; } } if (p.min_segno != NULL_SEGNO) { got_it: if (p.alloc_mode == LFS) { - secno = GET_SECNO(sbi, p.min_segno); + secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) sbi->cur_victim_sec = secno; else @@ -550,8 +565,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, get_node_info(sbi, nid, dni); if (sum->version != dni->version) { - f2fs_put_page(node_page, 1); - return false; + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: valid data with mismatched node version.", + __func__); + set_sbi_flag(sbi, SBI_NEED_FSCK); } *nofs = ofs_of_node(node_page); @@ -697,8 +714,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, + .need_lock = true, }; bool is_dirty = PageDirty(page); int err; @@ -890,7 +909,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, 1) == 0 || + if (get_valid_blocks(sbi, segno, false) == 0 || !PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) goto next; @@ -905,7 +924,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - mutex_lock(sentry_lock) - change_curseg() * - lock_page(sum_page) */ - if (type == SUM_TYPE_NODE) gc_node_segment(sbi, sum->entries, segno, gc_type); else @@ -924,7 +942,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, blk_finish_plug(&plug); if (gc_type == FG_GC && - get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0) + get_valid_blocks(sbi, start_segno, true) == 0) sec_freed = 1; stat_inc_call_count(sbi->stat_info); @@ -932,13 +950,14 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, return sec_freed; } -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background) +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, + bool background, unsigned int segno) { - unsigned int segno; int gc_type = sync ? FG_GC : BG_GC; int sec_freed = 0; int ret = -EINVAL; struct cp_control cpc; + unsigned int init_segno = segno; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), @@ -959,9 +978,11 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background) * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - ret = write_checkpoint(sbi, &cpc); - if (ret) - goto stop; + if (prefree_segments(sbi)) { + ret = write_checkpoint(sbi, &cpc); + if (ret) + goto stop; + } if (has_not_enough_free_secs(sbi, 0, 0)) gc_type = FG_GC; } @@ -981,13 +1002,17 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background) sbi->cur_victim_sec = NULL_SEGNO; if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed, 0)) + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + segno = NULL_SEGNO; goto gc_more; + } if (gc_type == FG_GC) ret = write_checkpoint(sbi, &cpc); } stop: + SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; + SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; mutex_unlock(&sbi->gc_mutex); put_gc_inode(&gc_list); @@ -999,7 +1024,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background) void build_gc_manager(struct f2fs_sb_info *sbi) { - u64 main_count, resv_count, ovp_count, blocks_per_sec; + u64 main_count, resv_count, ovp_count; DIRTY_I(sbi)->v_ops = &default_v_ops; @@ -1007,8 +1032,12 @@ void build_gc_manager(struct f2fs_sb_info *sbi) main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; - blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; - sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec, - (main_count - resv_count)); + sbi->fggc_threshold = div64_u64((main_count - ovp_count) * + BLKS_PER_SEC(sbi), (main_count - resv_count)); + + /* give warm/cold data area from slower device */ + if (sbi->s_ndevs && sbi->segs_per_sec == 1) + SIT_I(sbi)->last_victim[ALLOC_NEXT] = + GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 71b7206c431ea00f937e5b1e59b5f90999091d2a..eb2e031ea887bed43229f63e9923986bc14f3955 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, + struct fscrypt_name *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) const unsigned char *name = name_info->name; size_t len = name_info->len; + /* encrypted bigname case */ + if (fname && !fname->disk_name.name) + return cpu_to_le32(fname->hash); + if (is_dot_dotdot(name_info)) return 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e32a9e5279686035829dba5d45396e22cf10438f..e4c527c4e7d0b382907b0c8fbf95d0f15f4bbc09 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -63,19 +63,21 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } -bool truncate_inline_inode(struct page *ipage, u64 from) +void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) { void *addr; if (from >= MAX_INLINE_DATA) - return false; + return; addr = inline_data_addr(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); memset(addr + from, 0, MAX_INLINE_DATA - from); set_page_dirty(ipage); - return true; + + if (from == 0) + clear_inode_flag(inode, FI_DATA_EXIST); } int f2fs_read_inline_data(struct inode *inode, struct page *page) @@ -135,6 +137,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); fio.old_blkaddr = dn->data_blkaddr; + set_inode_flag(dn->inode, FI_HOT_DATA); write_data_page(dn, &fio); f2fs_wait_on_page_writeback(page, DATA, true); if (dirty) { @@ -146,11 +149,11 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_inode(dn->inode_page, 0); + truncate_inline_inode(dn->inode, dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); - f2fs_clear_inline_inode(dn->inode); + clear_inode_flag(dn->inode, FI_INLINE_DATA); f2fs_put_dnode(dn); return 0; } @@ -267,9 +270,8 @@ bool recover_inline_data(struct inode *inode, struct page *npage) if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - if (!truncate_inline_inode(ipage, 0)) - return false; - f2fs_clear_inline_inode(inode); + truncate_inline_inode(inode, ipage, 0); + clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { if (truncate_blocks(inode, 0, false)) @@ -296,11 +298,11 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, return NULL; } - namehash = f2fs_dentry_hash(&name); + namehash = f2fs_dentry_hash(&name, fname); inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(NULL, &d, inline_dentry); de = find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) @@ -319,7 +321,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, dentry_blk = inline_data_addr(ipage); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + make_dentry_ptr_inline(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -380,7 +382,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_inode(ipage, 0); + truncate_inline_inode(dir, ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -400,7 +402,7 @@ static int f2fs_add_inline_entries(struct inode *dir, unsigned long bit_pos = 0; int err = 0; - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(NULL, &d, inline_dentry); while (bit_pos < d.max) { struct f2fs_dir_entry *de; @@ -455,7 +457,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, } memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); - truncate_inline_inode(ipage, 0); + truncate_inline_inode(dir, ipage, 0); unlock_page(ipage); @@ -527,14 +529,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } f2fs_wait_on_page_writeback(ipage, NODE, true); - name_hash = f2fs_dentry_hash(new_name); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + name_hash = f2fs_dentry_hash(new_name, NULL); + make_dentry_ptr_inline(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -623,7 +623,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(inode, &d, inline_dentry); err = f2fs_fill_dentries(ctx, &d, 0, fstr); if (!err) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 24bb8213d974b710b43f286b1089ccff7946a50e..518f49643092582ae256c425616e942ed2392c40 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -316,7 +316,6 @@ int update_inode_page(struct inode *inode) } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } - f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -339,7 +338,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - if (update_inode_page(inode) && wbc && wbc->nr_to_write) + update_inode_page(inode); + if (wbc && wbc->nr_to_write) f2fs_balance_fs(sbi, true); return 0; } @@ -372,13 +372,6 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); - goto no_delete; - } -#endif - remove_ino_entry(sbi, inode->i_ino, APPEND_INO); remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -389,6 +382,12 @@ void f2fs_evict_inode(struct inode *inode) if (F2FS_HAS_BLOCKS(inode)) err = f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_EVICT_INODE)) { + f2fs_show_injection_info(FAULT_EVICT_INODE); + err = -EIO; + } +#endif if (!err) { f2fs_lock_op(sbi); err = remove_inode_page(inode); @@ -411,7 +410,10 @@ void f2fs_evict_inode(struct inode *inode) stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); + /* ino == 0, if f2fs_new_inode() was failed t*/ + if (inode->i_ino) + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, + inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (inode->i_nlink) { @@ -448,6 +450,7 @@ void handle_failed_inode(struct inode *inode) * in a panic when flushing dirty inodes in gdirty_list. */ update_inode_page(inode); + f2fs_inode_synced(inode); /* don't make bad inode, since it becomes a regular file. */ unlock_new_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 98f00a3a7f5016a42741169736f1182efe83a3b0..c31b40e5f9cf6dce167f3c24381df1e31c63acf2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -148,8 +148,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -163,6 +161,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -324,9 +324,10 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (f2fs_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - bool nokey = f2fs_encrypted_inode(inode) && - !fscrypt_has_encryption_key(inode); - err = nokey ? -ENOKEY : -EPERM; + f2fs_msg(inode->i_sb, KERN_WARNING, + "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); + err = -EPERM; goto err_out; } return d_splice_alias(inode, dentry); @@ -423,8 +424,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode_nohighmem(inode); inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -487,6 +486,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, } kfree(sd); + + f2fs_balance_fs(sbi, true); return err; out: handle_failed_inode(inode); @@ -508,8 +509,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); - f2fs_balance_fs(sbi, true); - set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -524,6 +523,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out_fail: @@ -554,8 +555,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -569,6 +568,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -595,8 +596,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) @@ -622,6 +621,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, /* link_count was changed by d_tmpfile as well. */ f2fs_unlock_op(sbi); unlock_new_inode(inode); + + f2fs_balance_fs(sbi, true); return 0; release_out: @@ -720,13 +721,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto put_out_dir; - err = update_dent_inode(old_inode, new_inode, - &new_dentry->d_name); - if (err) { - release_orphan_inode(sbi); - goto put_out_dir; - } - f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_inode->i_ctime = current_time(new_inode); @@ -779,8 +773,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, down_write(&F2FS_I(old_inode)->i_sem); file_lost_pino(old_inode); - if (new_inode && file_enc_name(new_inode)) - file_set_enc_name(old_inode); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); @@ -908,8 +900,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, old_nlink = old_dir_entry ? -1 : 1; new_nlink = -old_nlink; err = -EMLINK; - if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || - (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) + if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) || + (new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX)) goto out_new_dir; } @@ -917,18 +909,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_lock_op(sbi); - err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); - if (err) - goto out_unlock; - if (file_enc_name(new_inode)) - file_set_enc_name(old_inode); - - err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name); - if (err) - goto out_undo; - if (file_enc_name(old_inode)) - file_set_enc_name(new_inode); - /* update ".." directory entry info of old dentry */ if (old_dir_entry) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); @@ -972,14 +952,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); return 0; -out_undo: - /* - * Still we may fail to recover name info of f2fs_inode here - * Drop it, once its name is set as encrypted - */ - update_dent_inode(old_inode, old_inode, &old_dentry->d_name); -out_unlock: - f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { f2fs_dentry_kunmap(new_inode, new_dir_page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 481aa8dc79f46f4c156cf67cca665e8160e36e6a..4547c5c5cd989373235531bb21193531bd05e720 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -22,7 +22,7 @@ #include "trace.h" #include -#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) +#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -63,8 +63,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) int i; for (i = 0; i <= UPDATE_INO; i++) - mem_size += (sbi->im[i].ino_num * - sizeof(struct ino_entry)) >> PAGE_SHIFT; + mem_size += sbi->im[i].ino_num * + sizeof(struct ino_entry); + mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == EXTENT_CACHE) { mem_size = (atomic_read(&sbi->total_ext_tree) * @@ -177,18 +178,12 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry_set *set, struct nat_entry *ne) { - nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); - struct nat_entry_set *head; - - head = radix_tree_lookup(&nm_i->nat_set_root, set); - if (head) { - list_move_tail(&ne->list, &nm_i->nat_entries); - set_nat_flag(ne, IS_DIRTY, false); - head->entry_cnt--; - nm_i->dirty_nat_cnt--; - } + list_move_tail(&ne->list, &nm_i->nat_entries); + set_nat_flag(ne, IS_DIRTY, false); + set->entry_cnt--; + nm_i->dirty_nat_cnt--; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -381,6 +376,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct page *page = NULL; struct f2fs_nat_entry ne; struct nat_entry *e; + pgoff_t index; int i; ni->nid = nid; @@ -406,17 +402,21 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); } up_read(&curseg->journal_rwsem); - if (i >= 0) + if (i >= 0) { + up_read(&nm_i->nat_tree_lock); goto cache; + } /* Fill node_info from nat page */ - page = get_current_nat_page(sbi, start_nid); + index = current_nat_addr(sbi, nid); + up_read(&nm_i->nat_tree_lock); + + page = get_meta_page(sbi, index); nat_blk = (struct f2fs_nat_block *)page_address(page); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - up_read(&nm_i->nat_tree_lock); /* cache nat entry */ down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); @@ -1463,6 +1463,9 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, f2fs_wait_on_page_writeback(page, NODE, true); BUG_ON(PageWriteback(page)); + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + if (!atomic || page == last_page) { set_fsync_mark(page, 1); if (IS_INODE(page)) { @@ -1766,40 +1769,67 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi, static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i; + struct free_nid *i, *e; struct nat_entry *ne; - int err; + int err = -EINVAL; + bool ret = false; /* 0 nid should not be used */ if (unlikely(nid == 0)) return false; - if (build) { - /* do not add allocated nids */ - ne = __lookup_nat_cache(nm_i, nid); - if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || - nat_get_blkaddr(ne) != NULL_ADDR)) - return false; - } - i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; i->state = NID_NEW; - if (radix_tree_preload(GFP_NOFS)) { - kmem_cache_free(free_nid_slab, i); - return true; - } + if (radix_tree_preload(GFP_NOFS)) + goto err; spin_lock(&nm_i->nid_list_lock); + + if (build) { + /* + * Thread A Thread B + * - f2fs_create + * - f2fs_new_inode + * - alloc_nid + * - __insert_nid_to_list(ALLOC_NID_LIST) + * - f2fs_balance_fs_bg + * - build_free_nids + * - __build_free_nids + * - scan_nat_page + * - add_free_nid + * - __lookup_nat_cache + * - f2fs_add_link + * - init_inode_metadata + * - new_inode_page + * - new_node_page + * - set_node_addr + * - alloc_nid_done + * - __remove_nid_from_list(ALLOC_NID_LIST) + * - __insert_nid_to_list(FREE_NID_LIST) + */ + ne = __lookup_nat_cache(nm_i, nid); + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || + nat_get_blkaddr(ne) != NULL_ADDR)) + goto err_out; + + e = __lookup_free_nid_list(nm_i, nid); + if (e) { + if (e->state == NID_NEW) + ret = true; + goto err_out; + } + } + ret = true; err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); +err_out: spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); - if (err) { +err: + if (err) kmem_cache_free(free_nid_slab, i); - return true; - } - return true; + return ret; } static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) @@ -1821,7 +1851,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set, bool build, bool locked) + bool set, bool build) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -1835,14 +1865,10 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, else __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - if (!locked) - spin_lock(&nm_i->free_nid_lock); if (set) nm_i->free_nid_count[nat_ofs]++; else if (!build) nm_i->free_nid_count[nat_ofs]--; - if (!locked) - spin_unlock(&nm_i->free_nid_lock); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1871,7 +1897,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) freed = add_free_nid(sbi, start_nid, true); - update_free_nid_bitmap(sbi, start_nid, freed, true, false); + spin_lock(&NM_I(sbi)->nid_list_lock); + update_free_nid_bitmap(sbi, start_nid, freed, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -1927,6 +1955,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) int i = 0; nid_t nid = nm_i->next_scan_nid; + if (unlikely(nid >= nm_i->max_nid)) + nid = 0; + /* Enough entries */ if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) return; @@ -2026,7 +2057,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); nm_i->available_nids--; - update_free_nid_bitmap(sbi, *nid, false, false, false); + update_free_nid_bitmap(sbi, *nid, false, false); spin_unlock(&nm_i->nid_list_lock); return true; @@ -2082,7 +2113,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) nm_i->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false, false); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&nm_i->nid_list_lock); @@ -2407,16 +2438,16 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); nat_reset_flag(ne); - __clear_nat_cache_dirty(NM_I(sbi), ne); + __clear_nat_cache_dirty(NM_I(sbi), set, ne); if (nat_get_blkaddr(ne) == NULL_ADDR) { add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false, false); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } else { spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, nid, false, false, false); + update_free_nid_bitmap(sbi, nid, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2428,10 +2459,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } - f2fs_bug_on(sbi, set->entry_cnt); - - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); + /* Allow dirty nats by node block allocation in write_begin */ + if (!set->entry_cnt) { + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); + } } /* @@ -2476,8 +2508,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) __flush_nat_entry_set(sbi, set, cpc); up_write(&nm_i->nat_tree_lock); - - f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); + /* Allow dirty nats by node block allocation in write_begin */ } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) @@ -2541,10 +2572,10 @@ inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) nid = i * NAT_ENTRY_PER_BLOCK; last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; - spin_lock(&nm_i->free_nid_lock); + spin_lock(&NM_I(sbi)->nid_list_lock); for (; nid < last_nid; nid++) - update_free_nid_bitmap(sbi, nid, true, true, true); - spin_unlock(&nm_i->free_nid_lock); + update_free_nid_bitmap(sbi, nid, true, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); } for (i = 0; i < nm_i->nat_blocks; i++) { @@ -2621,23 +2652,20 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); - nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks * + nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks * NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; - nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8, + nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) return -ENOMEM; - nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks * + nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks * sizeof(unsigned short), GFP_KERNEL); if (!nm_i->free_nid_count) return -ENOMEM; - - spin_lock_init(&nm_i->free_nid_lock); - return 0; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2f9603fa85a59842281b3443e4984351b2692af8..558048e33cf9a6c1920f03673a98409c34325f4c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -9,10 +9,10 @@ * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ -#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) +#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) /* node block offset on the NAT area dedicated to the given start node id */ -#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) +#define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ #define FREE_NID_PAGES 8 @@ -62,16 +62,16 @@ struct nat_entry { struct node_info ni; /* in-memory node information */ }; -#define nat_get_nid(nat) (nat->ni.nid) -#define nat_set_nid(nat, n) (nat->ni.nid = n) -#define nat_get_blkaddr(nat) (nat->ni.blk_addr) -#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) -#define nat_get_ino(nat) (nat->ni.ino) -#define nat_set_ino(nat, i) (nat->ni.ino = i) -#define nat_get_version(nat) (nat->ni.version) -#define nat_set_version(nat, v) (nat->ni.version = v) +#define nat_get_nid(nat) ((nat)->ni.nid) +#define nat_set_nid(nat, n) ((nat)->ni.nid = (n)) +#define nat_get_blkaddr(nat) ((nat)->ni.blk_addr) +#define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b)) +#define nat_get_ino(nat) ((nat)->ni.ino) +#define nat_set_ino(nat, i) ((nat)->ni.ino = (i)) +#define nat_get_version(nat) ((nat)->ni.version) +#define nat_set_version(nat, v) ((nat)->ni.version = (v)) -#define inc_node_version(version) (++version) +#define inc_node_version(version) (++(version)) static inline void copy_node_info(struct node_info *dst, struct node_info *src) @@ -200,13 +200,16 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) struct f2fs_nm_info *nm_i = NM_I(sbi); pgoff_t block_off; pgoff_t block_addr; - int seg_off; + /* + * block_off = segment_off * 512 + off_in_segment + * OLD = (segment_off * 512) * 2 + off_in_segment + * NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment + */ block_off = NAT_BLOCK_OFFSET(start); - seg_off = block_off >> sbi->log_blocks_per_seg; block_addr = (pgoff_t)(nm_i->nat_blkaddr + - (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off << 1) - (block_off & (sbi->blocks_per_seg - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d025aa83fb5bb344fcb5799f080d033895e584aa..907d6b7dde6a7f636ea7027d461cfd029b8b1ae5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -198,7 +198,8 @@ static void recover_inode(struct inode *inode, struct page *page) ino_of_node(page), name); } -static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, + bool check_only) { struct curseg_info *curseg; struct page *page = NULL; @@ -225,7 +226,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) { - if (IS_INODE(page) && is_dent_dnode(page)) { + if (!check_only && + IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) break; @@ -569,7 +571,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) mutex_lock(&sbi->cp_mutex); /* step #1: find fsynced inode numbers */ - err = find_fsync_dnodes(sbi, &inode_list); + err = find_fsync_dnodes(sbi, &inode_list, check_only); if (err || list_empty(&inode_list)) goto out; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29ef7088c5582a480b6a1f7965fbbcca4f07e24e..96845854e7ee808c2df5227ddf68a614fcc779ba 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -250,6 +250,36 @@ void drop_inmem_pages(struct inode *inode) stat_dec_atomic_write(inode); } +void drop_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct list_head *head = &fi->inmem_pages; + struct inmem_pages *cur = NULL; + + f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); + + mutex_lock(&fi->inmem_lock); + list_for_each_entry(cur, head, list) { + if (cur->page == page) + break; + } + + f2fs_bug_on(sbi, !cur || cur->page != page); + list_del(&cur->list); + mutex_unlock(&fi->inmem_lock); + + dec_page_count(sbi, F2FS_INMEM_PAGES); + kmem_cache_free(inmem_entry_slab, cur); + + ClearPageUptodate(page); + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); + + trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); +} + static int __commit_inmem_pages(struct inode *inode, struct list_head *revoke_list) { @@ -261,7 +291,6 @@ static int __commit_inmem_pages(struct inode *inode, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .encrypted_page = NULL, }; pgoff_t last_idx = ULONG_MAX; int err = 0; @@ -281,6 +310,9 @@ static int __commit_inmem_pages(struct inode *inode, } fio.page = page; + fio.old_blkaddr = NULL_ADDR; + fio.encrypted_page = NULL; + fio.need_lock = false, err = do_write_data_page(&fio); if (err) { unlock_page(page); @@ -358,11 +390,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } #endif - if (!need) - return; - /* balance_fs_bg is able to be pending */ - if (excess_cached_nats(sbi)) + if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); /* @@ -371,7 +400,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) */ if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi, false, false); + f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -390,7 +419,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else build_free_nids(sbi, false, false); - if (!is_idle(sbi)) + if (!is_idle(sbi) && !excess_dirty_nats(sbi)) return; /* checkpoint is the only way to shrink partial cached entries */ @@ -411,32 +440,34 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) } } -static int __submit_flush_wait(struct block_device *bdev) +static int __submit_flush_wait(struct f2fs_sb_info *sbi, + struct block_device *bdev) { struct bio *bio = f2fs_bio_alloc(0); int ret; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; bio->bi_bdev = bdev; ret = submit_bio_wait(bio); bio_put(bio); + + trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE), ret); return ret; } static int submit_flush_wait(struct f2fs_sb_info *sbi) { - int ret = __submit_flush_wait(sbi->sb->s_bdev); + int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); int i; - if (sbi->s_ndevs && !ret) { - for (i = 1; i < sbi->s_ndevs; i++) { - trace_f2fs_issue_flush(FDEV(i).bdev, - test_opt(sbi, NOBARRIER), - test_opt(sbi, FLUSH_MERGE)); - ret = __submit_flush_wait(FDEV(i).bdev); - if (ret) - break; - } + if (!sbi->s_ndevs || ret) + return ret; + + for (i = 1; i < sbi->s_ndevs; i++) { + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; } return ret; } @@ -458,6 +489,8 @@ static int issue_flush_thread(void *data) fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { cmd->ret = ret; @@ -475,25 +508,29 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; + int ret; if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) - return submit_flush_wait(sbi); - - if (!atomic_read(&fcc->submit_flush)) { - int ret; + if (!test_opt(sbi, FLUSH_MERGE)) { + ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + return ret; + } - atomic_inc(&fcc->submit_flush); + if (!atomic_read(&fcc->issing_flush)) { + atomic_inc(&fcc->issing_flush); ret = submit_flush_wait(sbi); - atomic_dec(&fcc->submit_flush); + atomic_dec(&fcc->issing_flush); + + atomic_inc(&fcc->issued_flush); return ret; } init_completion(&cmd.wait); - atomic_inc(&fcc->submit_flush); + atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) @@ -501,10 +538,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->submit_flush); + atomic_dec(&fcc->issing_flush); } else { llist_del_all(&fcc->issue_list); - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issing_flush, 0); } return cmd.ret; @@ -524,7 +561,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issued_flush, 0); + atomic_set(&fcc->issing_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -597,8 +635,8 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) - clear_bit(GET_SECNO(sbi, segno), + if (get_valid_blocks(sbi, segno, true) == 0) + clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); } } @@ -618,7 +656,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == 0) { __locate_dirty_segment(sbi, segno, PRE); @@ -633,162 +671,407 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void __add_discard_cmd(struct f2fs_sb_info *sbi, - struct bio *bio, block_t lstart, block_t len) +static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *cmd_list = &(dcc->discard_cmd_list); + struct list_head *pend_list; struct discard_cmd *dc; + f2fs_bug_on(sbi, !len); + + pend_list = &dcc->pend_list[plist_idx(len)]; + dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); INIT_LIST_HEAD(&dc->list); - dc->bio = bio; - bio->bi_private = dc; + dc->bdev = bdev; dc->lstart = lstart; + dc->start = start; dc->len = len; + dc->ref = 0; dc->state = D_PREP; + dc->error = 0; init_completion(&dc->wait); + list_add_tail(&dc->list, pend_list); + atomic_inc(&dcc->discard_cmd_cnt); + dcc->undiscard_blks += len; - mutex_lock(&dcc->cmd_lock); - list_add_tail(&dc->list, cmd_list); - mutex_unlock(&dcc->cmd_lock); + return dc; } -static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) +static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node *parent, struct rb_node **p) { - int err = dc->bio->bi_error; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; - if (dc->state == D_DONE) - atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); - if (err == -EOPNOTSUPP) - err = 0; + rb_link_node(&dc->rb_node, parent, p); + rb_insert_color(&dc->rb_node, &dcc->root); + + return dc; +} + +static void __detach_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) +{ + if (dc->state == D_DONE) + atomic_dec(&dcc->issing_discard); - if (err) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", err); - bio_put(dc->bio); list_del(&dc->list); + rb_erase(&dc->rb_node, &dcc->root); + dcc->undiscard_blks -= dc->len; + kmem_cache_free(discard_cmd_slab, dc); + + atomic_dec(&dcc->discard_cmd_cnt); } -/* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void __remove_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->discard_cmd_list); - struct discard_cmd *dc, *tmp; - struct blk_plug plug; - mutex_lock(&dcc->cmd_lock); + if (dc->error == -EOPNOTSUPP) + dc->error = 0; - blk_start_plug(&plug); + if (dc->error) + f2fs_msg(sbi->sb, KERN_INFO, + "Issue discard failed, ret: %d", dc->error); + __detach_discard_cmd(dcc, dc); +} - list_for_each_entry_safe(dc, tmp, wait_list, list) { +static void f2fs_submit_discard_endio(struct bio *bio) +{ + struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; - if (blkaddr == NULL_ADDR) { - if (dc->state == D_PREP) { - dc->state = D_SUBMIT; - submit_bio(dc->bio); - atomic_inc(&dcc->submit_discard); - } - continue; + dc->error = bio->bi_error; + dc->state = D_DONE; + complete(&dc->wait); + bio_put(bio); +} + +/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ +static void __submit_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct bio *bio = NULL; + + if (dc->state != D_PREP) + return; + + trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); + + dc->error = __blkdev_issue_discard(dc->bdev, + SECTOR_FROM_BLOCK(dc->start), + SECTOR_FROM_BLOCK(dc->len), + GFP_NOFS, 0, &bio); + if (!dc->error) { + /* should keep before submission to avoid D_DONE right away */ + dc->state = D_SUBMIT; + atomic_inc(&dcc->issued_discard); + atomic_inc(&dcc->issing_discard); + if (bio) { + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + bio->bi_opf |= REQ_SYNC; + submit_bio(bio); + list_move_tail(&dc->list, &dcc->wait_list); } + } else { + __remove_discard_cmd(sbi, dc); + } +} - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - if (dc->state == D_SUBMIT) - wait_for_completion_io(&dc->wait); - else - __remove_discard_cmd(sbi, dc); +static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node **insert_p, + struct rb_node *insert_parent) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node **p = &dcc->root.rb_node; + struct rb_node *parent = NULL; + struct discard_cmd *dc = NULL; + + if (insert_p && insert_parent) { + parent = insert_parent; + p = insert_p; + goto do_insert; + } + + p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); +do_insert: + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + if (!dc) + return NULL; + + return dc; +} + +static void __relocate_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) +{ + list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); +} + +static void __punch_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_info di = dc->di; + bool modified = false; + + if (dc->state == D_DONE || dc->len == 1) { + __remove_discard_cmd(sbi, dc); + return; + } + + dcc->undiscard_blks -= di.len; + + if (blkaddr > di.lstart) { + dc->len = blkaddr - dc->lstart; + dcc->undiscard_blks += dc->len; + __relocate_discard_cmd(dcc, dc); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + modified = true; + } + + if (blkaddr < di.lstart + di.len - 1) { + if (modified) { + __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, + di.start + blkaddr + 1 - di.lstart, + di.lstart + di.len - 1 - blkaddr, + NULL, NULL); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); + } else { + dc->lstart++; + dc->len--; + dc->start++; + dcc->undiscard_blks += dc->len; + __relocate_discard_cmd(dcc, dc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); } } - blk_finish_plug(&plug); +} - /* this comes from f2fs_put_super */ - if (blkaddr == NULL_ADDR) { - list_for_each_entry_safe(dc, tmp, wait_list, list) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct discard_cmd *dc; + struct discard_info di = {0}; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + block_t end = lstart + len; + + mutex_lock(&dcc->cmd_lock); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, lstart, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (dc) + prev_dc = dc; + + if (!prev_dc) { + di.lstart = lstart; + di.len = next_dc ? next_dc->lstart - lstart : len; + di.len = min(di.len, len); + di.start = start; + } + + while (1) { + struct rb_node *node; + bool merged = false; + struct discard_cmd *tdc = NULL; + + if (prev_dc) { + di.lstart = prev_dc->lstart + prev_dc->len; + if (di.lstart < lstart) + di.lstart = lstart; + if (di.lstart >= end) + break; + + if (!next_dc || next_dc->lstart > end) + di.len = end - di.lstart; + else + di.len = next_dc->lstart - di.lstart; + di.start = start + di.lstart - lstart; + } + + if (!di.len) + goto next; + + if (prev_dc && prev_dc->state == D_PREP && + prev_dc->bdev == bdev && + __is_discard_back_mergeable(&di, &prev_dc->di)) { + prev_dc->di.len += di.len; + dcc->undiscard_blks += di.len; + __relocate_discard_cmd(dcc, prev_dc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); + di = prev_dc->di; + tdc = prev_dc; + merged = true; + } + + if (next_dc && next_dc->state == D_PREP && + next_dc->bdev == bdev && + __is_discard_front_mergeable(&di, &next_dc->di)) { + next_dc->di.lstart = di.lstart; + next_dc->di.len += di.len; + next_dc->di.start = di.start; + dcc->undiscard_blks += di.len; + __relocate_discard_cmd(dcc, next_dc); + if (tdc) + __remove_discard_cmd(sbi, tdc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); + merged = true; + } + + if (!merged) { + __insert_discard_tree(sbi, bdev, di.lstart, di.start, + di.len, NULL, NULL); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); } + next: + prev_dc = next_dc; + if (!prev_dc) + break; + + node = rb_next(&prev_dc->rb_node); + next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); } + mutex_unlock(&dcc->cmd_lock); } -static void f2fs_submit_discard_endio(struct bio *bio) +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) { - struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; + block_t lblkstart = blkstart; - complete(&dc->wait); - dc->state = D_DONE; + trace_f2fs_queue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + return 0; } -static int issue_discard_thread(void *data) +static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) { - struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *cmd_list = &dcc->discard_cmd_list; + struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int iter = 0; -repeat: - if (kthread_should_stop()) - return 0; + int i, iter = 0; + mutex_lock(&dcc->cmd_lock); blk_start_plug(&plug); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + + if (!issue_cond || is_idle(sbi)) + __submit_discard_cmd(sbi, dc); + if (issue_cond && iter++ > DISCARD_ISSUE_RATE) + goto out; + } + } +out: + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + +static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = &(dcc->wait_list); + struct discard_cmd *dc, *tmp; mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, cmd_list, list) { - if (dc->state == D_PREP) { - dc->state = D_SUBMIT; - submit_bio(dc->bio); - atomic_inc(&dcc->submit_discard); - if (iter++ > DISCARD_ISSUE_RATE) - break; - } else if (dc->state == D_DONE) { + list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (!wait_cond || dc->state == D_DONE) { + if (dc->ref) + continue; + wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); } } mutex_unlock(&dcc->cmd_lock); +} - blk_finish_plug(&plug); +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + bool need_wait = false; - iter = 0; - congestion_wait(BLK_RW_SYNC, HZ/50); + mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + if (dc) { + if (dc->state == D_PREP) { + __punch_discard_cmd(sbi, dc, blkaddr); + } else { + dc->ref++; + need_wait = true; + } + } + mutex_unlock(&dcc->cmd_lock); - wait_event_interruptible(*q, - kthread_should_stop() || !list_empty(&dcc->discard_cmd_list)); - goto repeat; + if (need_wait) { + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); + } } - -/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkstart, block_t blklen) +/* This comes from f2fs_put_super */ +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { - struct bio *bio = NULL; - block_t lblkstart = blkstart; - int err; + __issue_discard_cmd(sbi, false); + __wait_discard_cmd(sbi, false); +} - trace_f2fs_issue_discard(bdev, blkstart, blklen); +static int issue_discard_thread(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + wait_queue_head_t *q = &dcc->discard_wait_queue; +repeat: + if (kthread_should_stop()) + return 0; - if (sbi->s_ndevs) { - int devi = f2fs_target_device_index(sbi, blkstart); + __issue_discard_cmd(sbi, true); + __wait_discard_cmd(sbi, true); - blkstart -= FDEV(devi).start_blk; - } - err = __blkdev_issue_discard(bdev, - SECTOR_FROM_BLOCK(blkstart), - SECTOR_FROM_BLOCK(blklen), - GFP_NOFS, 0, &bio); - if (!err && bio) { - bio->bi_end_io = f2fs_submit_discard_endio; - bio->bi_opf |= REQ_SYNC; + congestion_wait(BLK_RW_SYNC, HZ/50); - __add_discard_cmd(sbi, bio, lblkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); - } - return err; + wait_event_interruptible(*q, kthread_should_stop() || + atomic_read(&dcc->discard_cmd_cnt)); + goto repeat; } #ifdef CONFIG_BLK_DEV_ZONED @@ -796,6 +1079,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { sector_t sector, nr_sects; + block_t lblkstart = blkstart; int devi = 0; if (sbi->s_ndevs) { @@ -813,7 +1097,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) return 0; - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sector = SECTOR_FROM_BLOCK(blkstart); @@ -845,7 +1129,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); + return __queue_discard_cmd(sbi, bdev, blkstart, blklen); } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, @@ -888,32 +1172,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, return err; } -static void __add_discard_entry(struct f2fs_sb_info *sbi, - struct cp_control *cpc, struct seg_entry *se, - unsigned int start, unsigned int end) -{ - struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; - struct discard_entry *new, *last; - - if (!list_empty(head)) { - last = list_last_entry(head, struct discard_entry, list); - if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len && - last->len < MAX_DISCARD_BLOCKS(sbi)) { - last->len += end - start; - goto done; - } - } - - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - list_add_tail(&new->list, head); -done: - SM_I(sbi)->dcc_info->nr_discards += end - start; -} - static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { @@ -925,7 +1183,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); + struct discard_entry *de = NULL; + struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) @@ -957,14 +1217,24 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (check_only) return true; - __add_discard_entry(sbi, cpc, se, start, end); + if (!de) { + de = f2fs_kmem_cache_alloc(discard_entry_slab, + GFP_F2FS_ZERO); + de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); + list_add_tail(&de->list, head); + } + + for (i = start; i < end; i++) + __set_bit_le(i, (void *)de->discard_map); + + SM_I(sbi)->dcc_info->nr_discards += end - start; } return false; } void release_discard_addrs(struct f2fs_sb_info *sbi) { - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ @@ -990,13 +1260,13 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -1026,10 +1296,10 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) continue; } next: - secno = GET_SECNO(sbi, start); - start_segno = secno * sbi->segs_per_sec; + secno = GET_SEC_FROM_SEG(sbi, start); + start_segno = GET_SEG_FROM_SEC(sbi, secno); if (!IS_CURSEC(sbi, secno) && - !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), sbi->segs_per_sec << sbi->log_blocks_per_seg); @@ -1043,22 +1313,46 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (force && entry->len < cpc->trim_minlen) - goto skip; - f2fs_issue_discard(sbi, entry->blkaddr, entry->len); - cpc->trimmed += entry->len; + unsigned int cur_pos = 0, next_pos, len, total_len = 0; + bool is_valid = test_bit_le(0, entry->discard_map); + +find_next: + if (is_valid) { + next_pos = find_next_zero_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + len = next_pos - cur_pos; + + if (force && len < cpc->trim_minlen) + goto skip; + + f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, + len); + cpc->trimmed += len; + total_len += len; + } else { + next_pos = find_next_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + } skip: + cur_pos = next_pos; + is_valid = !is_valid; + + if (cur_pos < sbi->blocks_per_seg) + goto find_next; + list_del(&entry->list); - SM_I(sbi)->dcc_info->nr_discards -= entry->len; + SM_I(sbi)->dcc_info->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } + + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct discard_cmd_control *dcc; - int err = 0; + int err = 0, i; if (SM_I(sbi)->dcc_info) { dcc = SM_I(sbi)->dcc_info; @@ -1069,12 +1363,18 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - INIT_LIST_HEAD(&dcc->discard_entry_list); - INIT_LIST_HEAD(&dcc->discard_cmd_list); + INIT_LIST_HEAD(&dcc->entry_list); + for (i = 0; i < MAX_PLIST_NUM; i++) + INIT_LIST_HEAD(&dcc->pend_list[i]); + INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); - atomic_set(&dcc->submit_discard, 0); + atomic_set(&dcc->issued_discard, 0); + atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; - dcc->max_discards = 0; + dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->undiscard_blks = 0; + dcc->root = RB_ROOT; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; @@ -1091,20 +1391,22 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return err; } -static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) +static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - if (dcc && dcc->f2fs_issue_discard) { + if (!dcc) + return; + + if (dcc->f2fs_issue_discard) { struct task_struct *discard_thread = dcc->f2fs_issue_discard; dcc->f2fs_issue_discard = NULL; kthread_stop(discard_thread); } - if (free) { - kfree(dcc); - SM_I(sbi)->dcc_info = NULL; - } + + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -1345,6 +1647,17 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno + 1; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); + return 0; +} + /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG @@ -1355,8 +1668,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; - unsigned int hint = *newseg / sbi->segs_per_sec; - unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); + unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); unsigned int left_start = hint; bool init = true; int go_left = 0; @@ -1366,8 +1679,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - (hint + 1) * sbi->segs_per_sec, *newseg + 1); - if (segno < (hint + 1) * sbi->segs_per_sec) + GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); + if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } find_other_zone: @@ -1398,8 +1711,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, secno = left_start; skip_left: hint = secno; - segno = secno * sbi->segs_per_sec; - zoneno = secno / sbi->secs_per_zone; + segno = GET_SEG_FROM_SEC(sbi, secno); + zoneno = GET_ZONE_FROM_SEC(sbi, secno); /* give up on finding another zone */ if (!init) @@ -1443,7 +1756,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; curseg->segno = curseg->next_segno; - curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); curseg->next_blkoff = 0; curseg->next_segno = NULL_SEGNO; @@ -1456,6 +1769,20 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) __set_sit_entry_type(sbi, type, curseg->segno, modified); } +static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) +{ + /* if segs_per_sec is large than 1, we need to keep original policy. */ + if (sbi->segs_per_sec != 1) + return CURSEG_I(sbi, type)->segno; + + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + return 0; + + if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) + return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + return CURSEG_I(sbi, type)->segno; +} + /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. @@ -1474,6 +1801,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) if (test_opt(sbi, NOHEAP)) dir = ALLOC_RIGHT; + segno = __get_next_segno(sbi, type); get_new_segment(sbi, &segno, new_sec, dir); curseg->next_segno = segno; reset_curseg(sbi, type, 1); @@ -1549,12 +1877,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + unsigned segno = NULL_SEGNO; int i, cnt; bool reversed = false; /* need_SSR() already forces to do this */ - if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) + if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { + curseg->next_segno = segno; return 1; + } /* For node segments, let's do SSR more intensively */ if (IS_NODESEG(type)) { @@ -1578,9 +1909,10 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) for (; cnt-- > 0; reversed ? i-- : i++) { if (i == type) continue; - if (v_ops->get_victim(sbi, &(curseg)->next_segno, - BG_GC, i, SSR)) + if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { + curseg->next_segno = segno; return 1; + } } return 0; } @@ -1592,17 +1924,21 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) static void allocate_segment_by_default(struct f2fs_sb_info *sbi, int type, bool force) { + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (force) new_curseg(sbi, type, true); else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type, true); else new_curseg(sbi, type, false); - stat_inc_seg_type(sbi, CURSEG_I(sbi, type)); + stat_inc_seg_type(sbi, curseg); } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -1734,18 +2070,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) if (p_type == DATA) { struct inode *inode = page->mapping->host; - if (S_ISDIR(inode->i_mode)) - return CURSEG_HOT_DATA; - else if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - else - return CURSEG_WARM_DATA; + if (is_inode_flag_set(inode, FI_HOT_DATA)) + return CURSEG_HOT_DATA; + return CURSEG_WARM_DATA; } else { if (IS_DNODE(page)) return is_cold_node(page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; - else - return CURSEG_COLD_NODE; + return CURSEG_COLD_NODE; } } @@ -1788,15 +2122,14 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); /* - * SIT information should be updated before segment allocation, - * since SSR needs latest valid block information. + * SIT information should be updated after segment allocation, + * since we need to keep dirty segments precisely under SSR. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); - mutex_unlock(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) @@ -1868,11 +2201,11 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) f2fs_update_data_blkaddr(dn, fio->new_blkaddr); } -void rewrite_data_page(struct f2fs_io_info *fio) +int rewrite_data_page(struct f2fs_io_info *fio) { fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - f2fs_submit_page_mbio(fio); + return f2fs_submit_page_bio(fio); } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -2437,7 +2770,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (cpc->reason != CP_DISCARD) { + if (!(cpc->reason & CP_DISCARD)) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc, false); } @@ -2473,7 +2806,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, !list_empty(head)); f2fs_bug_on(sbi, sit_i->dirty_sentries); out: - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { __u64 trim_start = cpc->trim_start; for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) @@ -2501,13 +2834,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * + sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; @@ -2540,7 +2873,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * + sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; @@ -2573,7 +2906,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->dirty_sentries = 0; sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); - sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; + sit_i->mounted_time = ktime_get_real_seconds(); mutex_init(&sit_i->sentry_lock); return 0; } @@ -2591,12 +2924,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -2672,10 +3005,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) /* build discard map only one time */ if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += sbi->blocks_per_seg - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) @@ -2699,10 +3039,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) @@ -2746,7 +3091,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) continue; if (valid_blocks > sbi->blocks_per_seg) { @@ -2764,7 +3109,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -2786,7 +3131,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } @@ -2852,6 +3197,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; @@ -2988,7 +3334,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) if (!sm_info) return; destroy_flush_cmd_control(sbi, true); - destroy_discard_cmd_control(sbi, true); + destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5e8ad4280a5016d293817115a31a94daaf8d2d49..010f336a75730e5920d67a38d04b64cebc59bf57 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -21,78 +21,84 @@ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ /* L: Logical segment # in volume, R: Relative segment # in main area */ -#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) -#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) +#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) +#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) -#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) -#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) +#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) +#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE) #define IS_CURSEG(sbi, seg) \ - ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) #define IS_CURSEC(sbi, secno) \ - ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - sbi->segs_per_sec)) \ + (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + (sbi)->segs_per_sec)) \ #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) -#define MAIN_SECS(sbi) (sbi->total_sections) +#define MAIN_SECS(sbi) ((sbi)->total_sections) #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) -#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) +#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) -#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ - sbi->log_blocks_per_seg)) +#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ + (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) + (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) #define NEXT_FREE_BLKADDR(sbi, curseg) \ - (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define GET_SECNO(sbi, segno) \ - ((segno) / sbi->segs_per_sec) -#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ - ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) +#define BLKS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) +#define GET_SEC_FROM_SEG(sbi, segno) \ + ((segno) / (sbi)->segs_per_sec) +#define GET_SEG_FROM_SEC(sbi, secno) \ + ((secno) * (sbi)->segs_per_sec) +#define GET_ZONE_FROM_SEC(sbi, secno) \ + ((secno) / (sbi)->secs_per_zone) +#define GET_ZONE_FROM_SEG(sbi, segno) \ + GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) #define GET_SUM_BLOCK(sbi, segno) \ - ((sbi->sm_info->ssa_blkaddr) + segno) + ((sbi)->sm_info->ssa_blkaddr + (segno)) #define GET_SUM_TYPE(footer) ((footer)->entry_type) -#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) #define SIT_ENTRY_OFFSET(sit_i, segno) \ - (segno % sit_i->sents_per_block) + ((segno) % (sit_i)->sents_per_block) #define SIT_BLOCK_OFFSET(segno) \ - (segno / SIT_ENTRY_PER_BLOCK) + ((segno) / SIT_ENTRY_PER_BLOCK) #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ @@ -103,7 +109,7 @@ #define SECTOR_FROM_BLOCK(blk_addr) \ (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ - (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) + ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) /* * indicate a block allocation direction: RIGHT and LEFT. @@ -132,7 +138,10 @@ enum { */ enum { GC_CB = 0, - GC_GREEDY + GC_GREEDY, + ALLOC_NEXT, + FLUSH_DEVICE, + MAX_GC_POLICY, }; /* @@ -227,6 +236,8 @@ struct sit_info { unsigned long long mounted_time; /* mount time */ unsigned long long min_mtime; /* min. modification time */ unsigned long long max_mtime; /* max. modification time */ + + unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */ }; struct free_segmap_info { @@ -303,17 +314,17 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; + return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)]; } static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, - unsigned int segno, int section) + unsigned int segno, bool use_section) { /* * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (section > 1) + if (use_section && sbi->segs_per_sec > 1) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; @@ -358,8 +369,8 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); @@ -379,7 +390,8 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + set_bit(segno, free_i->free_segmap); free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) @@ -390,8 +402,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); @@ -412,7 +424,8 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; @@ -477,12 +490,12 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi) static inline int overprovision_sections(struct f2fs_sb_info *sbi) { - return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; + return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi)); } static inline int reserved_sections(struct f2fs_sb_info *sbi) { - return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; + return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } static inline bool need_SSR(struct f2fs_sb_info *sbi) @@ -495,7 +508,7 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) return false; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - reserved_sections(sbi) + 1); + 2 * reserved_sections(sbi)); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, @@ -540,6 +553,7 @@ static inline int utilization(struct f2fs_sb_info *sbi) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 +#define DEF_MIN_HOT_BLOCKS 16 enum { F2FS_IPU_FORCE, @@ -547,17 +561,15 @@ enum { F2FS_IPU_UTIL, F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, + F2FS_IPU_ASYNC, }; -static inline bool need_inplace_update(struct inode *inode) +static inline bool need_inplace_update_policy(struct inode *inode, + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; - /* IPU can be done only for the user data */ - if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) - return false; - if (test_opt(sbi, LFS)) return false; @@ -572,6 +584,15 @@ static inline bool need_inplace_update(struct inode *inode) utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; + /* + * IPU for rewrite async pages + */ + if (policy & (0x1 << F2FS_IPU_ASYNC) && + fio && fio->op == REQ_OP_WRITE && + !(fio->op_flags & REQ_SYNC) && + !f2fs_encrypted_inode(inode)) + return true; + /* this is only set during fdatasync */ if (policy & (0x1 << F2FS_IPU_FSYNC) && is_inode_flag_set(inode, FI_NEED_IPU)) @@ -691,8 +712,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); - return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - - sit_i->mounted_time; + time64_t now = ktime_get_real_seconds(); + + return sit_i->elapsed_time + now - sit_i->mounted_time; } static inline void set_summary(struct f2fs_summary *sum, nid_t nid, @@ -719,7 +741,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >= + if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= sbi->fggc_threshold) return true; return false; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 96fe8ed7310001c666c5f370ea735b5aecdc2518..83355ec4a92cdeb86d4be8d4630e01e7b0c96a28 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_TRUNCATE] = "truncate fail", [FAULT_IO] = "IO error", [FAULT_CHECKPOINT] = "checkpoint error", }; @@ -82,6 +83,7 @@ enum { Opt_discard, Opt_nodiscard, Opt_noheap, + Opt_heap, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, @@ -116,6 +118,7 @@ static match_table_t f2fs_tokens = { {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_noheap, "no_heap"}, + {Opt_heap, "heap"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, @@ -293,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); @@ -318,6 +322,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_hot_blocks), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), @@ -436,6 +441,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noheap: set_opt(sbi, NOHEAP); break; + case Opt_heap: + clear_opt(sbi, NOHEAP); + break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: set_opt(sbi, XATTR_USER); @@ -787,7 +795,14 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bio(sbi, NULL_ADDR); + f2fs_wait_discard_bios(sbi); + + if (!sbi->discard_blks) { + struct cp_control cpc = { + .reason = CP_UMOUNT | CP_TRIMMED, + }; + write_checkpoint(sbi, &cpc); + } /* write_checkpoint can update stat informaion */ f2fs_destroy_stats(sbi); @@ -913,7 +928,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap_alloc"); + seq_puts(seq, ",no_heap"); + else + seq_puts(seq, ",heap"); #ifdef CONFIG_F2FS_FS_XATTR if (test_opt(sbi, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -986,7 +1003,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) if ((i % 10) == 0) seq_printf(seq, "%-10d", i); seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, 1)); + get_valid_blocks(sbi, i, false)); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else @@ -1012,7 +1029,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "%-10d", i); seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, 1)); + get_valid_blocks(sbi, i, false)); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); @@ -1046,6 +1063,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); if (f2fs_sb_mounted_blkzoned(sbi->sb)) { @@ -1307,7 +1325,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); + return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, @@ -1483,6 +1501,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } + if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment count (%u)", + le32_to_cpu(raw_super->segment_count)); + return 1; + } + /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) return 1; @@ -1555,6 +1580,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); + atomic_set(&sbi->wb_sync_req, 0); + INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); mutex_init(&sbi->wio_mutex[NODE]); @@ -1917,6 +1944,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->gc_mutex); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); + init_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); @@ -2022,6 +2050,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_join_shrinker(sbi); + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; + /* if there are nt orphan nodes free them */ err = recover_orphan_inodes(sbi); if (err) @@ -2046,10 +2078,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_root_inode; } - err = f2fs_build_stats(sbi); - if (err) - goto free_root_inode; - if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -2143,7 +2171,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } - f2fs_destroy_stats(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -2161,6 +2188,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) truncate_inode_pages_final(META_MAPPING(sbi)); iput(sbi->node_inode); mutex_unlock(&sbi->umount_mutex); + f2fs_destroy_stats(sbi); free_nm: destroy_node_manager(sbi); free_sm: diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 73b4e1d1912a7ff7370c1c6f2d649b770ed70acc..bccbbf2616d2b2cd3e5c3380b809f545dc1802a9 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -59,7 +59,7 @@ void f2fs_trace_pid(struct page *page) pid_t pid = task_pid_nr(current); void *p; - page->private = pid; + set_page_private(page, (unsigned long)pid); if (radix_tree_preload(GFP_NOFS)) return; @@ -138,7 +138,7 @@ static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, radix_tree_for_each_slot(slot, &pids, &iter, first_index) { results[ret] = iter.index; - if (++ret == PIDVEC_SIZE) + if (++ret == max_items) break; } return ret; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 7298a4488f7f59cfc68587c4fbc3b1ac6293f9aa..832c5110abab51208e77751056436e7ba7e7ff16 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -250,15 +250,13 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; - unsigned int inline_size = 0; + unsigned int inline_size = inline_xattr_size(inode); int err = 0; - inline_size = inline_xattr_size(inode); - if (!size && !inline_size) return -ENODATA; - txattr_addr = kzalloc(inline_size + size + sizeof(__u32), + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -328,13 +326,14 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; - size_t size = PAGE_SIZE, inline_size = 0; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int size = VALID_XATTR_BLOCK_SIZE; + unsigned int inline_size = inline_xattr_size(inode); void *txattr_addr; int err; - inline_size = inline_xattr_size(inode); - - txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO); + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, + GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -358,19 +357,19 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, } /* read from xattr node block */ - if (F2FS_I(inode)->i_xattr_nid) { + if (xnid) { struct page *xpage; void *xattr_addr; /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + xpage = get_node_page(sbi, xnid); if (IS_ERR(xpage)) { err = PTR_ERR(xpage); goto fail; } xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + memcpy(txattr_addr + inline_size, xattr_addr, size); f2fs_put_page(xpage, 1); } @@ -392,14 +391,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, void *txattr_addr, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - size_t inline_size = 0; + size_t inline_size = inline_xattr_size(inode); void *xattr_addr; struct page *xpage; nid_t new_nid = 0; int err; - inline_size = inline_xattr_size(inode); - if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) if (!alloc_nid(sbi, &new_nid)) return -ENOSPC; @@ -454,7 +451,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } xattr_addr = page_address(xpage); - memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE); + memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); set_page_dirty(xpage); f2fs_put_page(xpage, 1); @@ -546,7 +543,9 @@ static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry, const void *value, size_t size) { void *pval = entry->e_name + entry->e_name_len; - return (entry->e_value_size == size) && !memcmp(pval, value, size); + + return (le16_to_cpu(entry->e_value_size) == size) && + !memcmp(pval, value, size); } static int __f2fs_setxattr(struct inode *inode, int index, diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index d5a94928c11695cde054f6143cb604d0bf850fc0..dbcd1d16e66982e07233e66fe400e95597a6c2df 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -58,10 +58,10 @@ struct f2fs_xattr_entry { #define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) #define XATTR_ROUND (3) -#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) +#define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND) #define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ - entry->e_name_len + le16_to_cpu(entry->e_value_size))) + (entry)->e_name_len + le16_to_cpu((entry)->e_value_size))) #define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ ENTRY_SIZE(entry))) @@ -72,8 +72,8 @@ struct f2fs_xattr_entry { for (entry = XATTR_FIRST_ENTRY(addr);\ !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) -#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) -#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32)) +#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) +#define XATTR_PADDING_SIZE (sizeof(__u32)) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) diff --git a/fs/fcntl.c b/fs/fcntl.c index be8fbe289087e61222103ec5675bb27857c502e0..f4e7267d117fb83b69f6f5be2305fb8b375077c8 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -420,6 +421,162 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, } #endif +#ifdef CONFIG_COMPAT +static int get_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) +{ + if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || + __get_user(kfl->l_type, &ufl->l_type) || + __get_user(kfl->l_whence, &ufl->l_whence) || + __get_user(kfl->l_start, &ufl->l_start) || + __get_user(kfl->l_len, &ufl->l_len) || + __get_user(kfl->l_pid, &ufl->l_pid)) + return -EFAULT; + return 0; +} + +static int put_compat_flock(struct flock *kfl, struct compat_flock __user *ufl) +{ + if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || + __put_user(kfl->l_type, &ufl->l_type) || + __put_user(kfl->l_whence, &ufl->l_whence) || + __put_user(kfl->l_start, &ufl->l_start) || + __put_user(kfl->l_len, &ufl->l_len) || + __put_user(kfl->l_pid, &ufl->l_pid)) + return -EFAULT; + return 0; +} + +#ifndef HAVE_ARCH_GET_COMPAT_FLOCK64 +static int get_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) +{ + if (!access_ok(VERIFY_READ, ufl, sizeof(*ufl)) || + __get_user(kfl->l_type, &ufl->l_type) || + __get_user(kfl->l_whence, &ufl->l_whence) || + __get_user(kfl->l_start, &ufl->l_start) || + __get_user(kfl->l_len, &ufl->l_len) || + __get_user(kfl->l_pid, &ufl->l_pid)) + return -EFAULT; + return 0; +} +#endif + +#ifndef HAVE_ARCH_PUT_COMPAT_FLOCK64 +static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *ufl) +{ + if (!access_ok(VERIFY_WRITE, ufl, sizeof(*ufl)) || + __put_user(kfl->l_type, &ufl->l_type) || + __put_user(kfl->l_whence, &ufl->l_whence) || + __put_user(kfl->l_start, &ufl->l_start) || + __put_user(kfl->l_len, &ufl->l_len) || + __put_user(kfl->l_pid, &ufl->l_pid)) + return -EFAULT; + return 0; +} +#endif + +static unsigned int +convert_fcntl_cmd(unsigned int cmd) +{ + switch (cmd) { + case F_GETLK64: + return F_GETLK; + case F_SETLK64: + return F_SETLK; + case F_SETLKW64: + return F_SETLKW; + } + + return cmd; +} + +COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) +{ + mm_segment_t old_fs; + struct flock f; + long ret; + unsigned int conv_cmd; + + switch (cmd) { + case F_GETLK: + case F_SETLK: + case F_SETLKW: + ret = get_compat_flock(&f, compat_ptr(arg)); + if (ret != 0) + break; + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_fcntl(fd, cmd, (unsigned long)&f); + set_fs(old_fs); + if (cmd == F_GETLK && ret == 0) { + /* GETLK was successful and we need to return the data... + * but it needs to fit in the compat structure. + * l_start shouldn't be too big, unless the original + * start + end is greater than COMPAT_OFF_T_MAX, in which + * case the app was asking for trouble, so we return + * -EOVERFLOW in that case. + * l_len could be too big, in which case we just truncate it, + * and only allow the app to see that part of the conflicting + * lock that might make sense to it anyway + */ + + if (f.l_start > COMPAT_OFF_T_MAX) + ret = -EOVERFLOW; + if (f.l_len > COMPAT_OFF_T_MAX) + f.l_len = COMPAT_OFF_T_MAX; + if (ret == 0) + ret = put_compat_flock(&f, compat_ptr(arg)); + } + break; + + case F_GETLK64: + case F_SETLK64: + case F_SETLKW64: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: + ret = get_compat_flock64(&f, compat_ptr(arg)); + if (ret != 0) + break; + old_fs = get_fs(); + set_fs(KERNEL_DS); + conv_cmd = convert_fcntl_cmd(cmd); + ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); + set_fs(old_fs); + if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { + /* need to return lock information - see above for commentary */ + if (f.l_start > COMPAT_LOFF_T_MAX) + ret = -EOVERFLOW; + if (f.l_len > COMPAT_LOFF_T_MAX) + f.l_len = COMPAT_LOFF_T_MAX; + if (ret == 0) + ret = put_compat_flock64(&f, compat_ptr(arg)); + } + break; + + default: + ret = sys_fcntl(fd, cmd, arg); + break; + } + return ret; +} + +COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) +{ + switch (cmd) { + case F_GETLK64: + case F_SETLK64: + case F_SETLKW64: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: + return -EINVAL; + } + return compat_sys_fcntl64(fd, cmd, arg); +} +#endif + /* Table to convert sigio signal codes into poll band bitmaps */ static const long band_table[NSIGPOLL] = { @@ -742,16 +899,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( - O_RDONLY | O_WRONLY | O_RDWR | - O_CREAT | O_EXCL | O_NOCTTY | - O_TRUNC | O_APPEND | /* O_NONBLOCK | */ - __O_SYNC | O_DSYNC | FASYNC | - O_DIRECT | O_LARGEFILE | O_DIRECTORY | - O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC | O_PATH | __O_TMPFILE | - __FMODE_NONOTIFY - )); + BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + HWEIGHT32( + (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | + __FMODE_EXEC | __FMODE_NONOTIFY)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); diff --git a/fs/fhandle.c b/fs/fhandle.c index 5559168d5637310d2712eab97af1117a786cfba1..58a61f55e0d09f868bb1bac6a0b4d6e5042822ad 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "internal.h" #include "mount.h" @@ -264,3 +265,15 @@ SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, ret = do_handle_open(mountdirfd, handle, flags); return ret; } + +#ifdef CONFIG_COMPAT +/* + * Exactly like fs/open.c:sys_open_by_handle_at(), except that it + * doesn't set the O_LARGEFILE flag. + */ +COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, + struct file_handle __user *, handle, int, flags) +{ + return do_handle_open(mountdirfd, handle, flags); +} +#endif diff --git a/fs/file.c b/fs/file.c index ad6f094f2eff2f90314d237fa3f3cb16c5b79b66..1c2972e3a405455e292520032342276cc621d369 100644 --- a/fs/file.c +++ b/fs/file.c @@ -42,7 +42,7 @@ static void *alloc_fdmem(size_t size) if (data != NULL) return data; } - return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); } static void __free_fdtable(struct fdtable *fdt) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 6e22748b0704c509edad6a0e93d0a6626601137b..b9ea99c5b5b31948882d37113d520ab9cdcf9564 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -292,7 +292,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) { - struct tree_descr empty_descr = {""}; + static const struct tree_descr empty_descr = {""}; struct fuse_conn *fc; int err; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b681b43c766e11daf45814fc694b4b988f218a43..c16d00e5326459c6608268f9ce3f68f30467eac6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -20,6 +20,7 @@ #include #include #include +#include MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); @@ -45,7 +46,7 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages, INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); - atomic_set(&req->count, 1); + refcount_set(&req->count, 1); req->pages = pages; req->page_descs = page_descs; req->max_pages = npages; @@ -102,21 +103,20 @@ void fuse_request_free(struct fuse_req *req) void __fuse_get_request(struct fuse_req *req) { - atomic_inc(&req->count); + refcount_inc(&req->count); } /* Must be called with > 1 refcount */ static void __fuse_put_request(struct fuse_req *req) { - BUG_ON(atomic_read(&req->count) < 2); - atomic_dec(&req->count); + refcount_dec(&req->count); } -static void fuse_req_init_context(struct fuse_req *req) +static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req) { req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid()); req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid()); - req->in.h.pid = current->pid; + req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); } void fuse_set_initialized(struct fuse_conn *fc) @@ -163,7 +163,7 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages, goto out; } - fuse_req_init_context(req); + fuse_req_init_context(fc, req); __set_bit(FR_WAITING, &req->flags); if (for_background) __set_bit(FR_BACKGROUND, &req->flags); @@ -256,7 +256,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, if (!req) req = get_reserved_req(fc, file); - fuse_req_init_context(req); + fuse_req_init_context(fc, req); __set_bit(FR_WAITING, &req->flags); __clear_bit(FR_BACKGROUND, &req->flags); return req; @@ -264,7 +264,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc, void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) { - if (atomic_dec_and_test(&req->count)) { + if (refcount_dec_and_test(&req->count)) { if (test_bit(FR_BACKGROUND, &req->flags)) { /* * We get here in the unlikely case that a background @@ -382,9 +382,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&fc->blocked_waitq); if (fc->num_background == fc->congestion_threshold && - fc->connected && fc->bdi_initialized) { - clear_bdi_congested(&fc->bdi, BLK_RW_SYNC); - clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC); + fc->connected && fc->sb) { + clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); + clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); } fc->num_background--; fc->active_background--; @@ -573,10 +573,9 @@ void fuse_request_send_background_locked(struct fuse_conn *fc, fc->num_background++; if (fc->num_background == fc->max_background) fc->blocked = 1; - if (fc->num_background == fc->congestion_threshold && - fc->bdi_initialized) { - set_bdi_congested(&fc->bdi, BLK_RW_SYNC); - set_bdi_congested(&fc->bdi, BLK_RW_ASYNC); + if (fc->num_background == fc->congestion_threshold && fc->sb) { + set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC); + set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC); } list_add_tail(&req->list, &fc->bg_queue); flush_bg_queue(fc); @@ -1223,6 +1222,9 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_in *in; unsigned reqsize; + if (task_active_pid_ns(current) != fc->pid_ns) + return -EIO; + restart: spin_lock(&fiq->waitq.lock); err = -EAGAIN; @@ -1821,6 +1823,9 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_req *req; struct fuse_out_header oh; + if (task_active_pid_ns(current) != fc->pid_ns) + return -EIO; + if (nbytes < sizeof(struct fuse_out_header)) return -EINVAL; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ec238fb5a584b1c3cc5bc545806575160f78cde4..3ee4fdc3da9ec359ad847afa36354240329a2da6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -58,7 +58,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) } INIT_LIST_HEAD(&ff->write_entry); - atomic_set(&ff->count, 1); + refcount_set(&ff->count, 1); RB_CLEAR_NODE(&ff->polled_node); init_waitqueue_head(&ff->poll_wait); @@ -77,7 +77,7 @@ void fuse_file_free(struct fuse_file *ff) static struct fuse_file *fuse_file_get(struct fuse_file *ff) { - atomic_inc(&ff->count); + refcount_inc(&ff->count); return ff; } @@ -88,7 +88,7 @@ static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) static void fuse_file_put(struct fuse_file *ff, bool sync) { - if (atomic_dec_and_test(&ff->count)) { + if (refcount_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; if (ff->fc->no_open) { @@ -293,7 +293,7 @@ static int fuse_release(struct inode *inode, struct file *file) void fuse_sync_release(struct fuse_file *ff, int flags) { - WARN_ON(atomic_read(&ff->count) != 1); + WARN_ON(refcount_read(&ff->count) > 1); fuse_prepare_release(ff, flags, FUSE_RELEASE); /* * iput(NULL) is a no-op and since the refcount is 1 and everything's @@ -2083,7 +2083,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) return generic_file_mmap(file, vma); } -static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, +static int convert_fuse_file_lock(struct fuse_conn *fc, + const struct fuse_file_lock *ffl, struct file_lock *fl) { switch (ffl->type) { @@ -2098,7 +2099,14 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, fl->fl_start = ffl->start; fl->fl_end = ffl->end; - fl->fl_pid = ffl->pid; + + /* + * Convert pid into the caller's pid namespace. If the pid + * does not map into the namespace fl_pid will get set to 0. + */ + rcu_read_lock(); + fl->fl_pid = pid_vnr(find_pid_ns(ffl->pid, fc->pid_ns)); + rcu_read_unlock(); break; default: @@ -2147,7 +2155,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl) args.out.args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) - err = convert_fuse_file_lock(&outarg.lk, fl); + err = convert_fuse_file_lock(fc, &outarg.lk, fl); return err; } @@ -2159,7 +2167,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) FUSE_ARGS(args); struct fuse_lk_in inarg; int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; - pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; + struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; + pid_t pid_nr = pid_nr_ns(pid, fc->pid_ns); int err; if (fl->fl_lmops && fl->fl_lmops->lm_grant) { @@ -2168,10 +2177,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) } /* Unlock on close is handled by the flush method */ - if (fl->fl_flags & FL_CLOSE) + if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX) return 0; - fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg); + if (pid && pid_nr == 0) + return -EOVERFLOW; + + fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); err = fuse_simple_request(fc, &args); /* locking is restartable */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 32ac2c9b09c0302c99337c374263a6fa428b5cf4..1bd7ffdad593977013c1ddd233b2a91093471471 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -24,6 +24,8 @@ #include #include #include +#include +#include /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -137,7 +139,7 @@ struct fuse_file { u64 nodeid; /** Refcount */ - atomic_t count; + refcount_t count; /** FOPEN_* flags returned by open */ u32 open_flags; @@ -306,7 +308,7 @@ struct fuse_req { struct list_head intr_entry; /** refcount */ - atomic_t count; + refcount_t count; /** Unique ID for the interrupt request */ u64 intr_unique; @@ -448,7 +450,7 @@ struct fuse_conn { spinlock_t lock; /** Refcount */ - atomic_t count; + refcount_t count; /** Number of fuse_dev's */ atomic_t dev_count; @@ -461,6 +463,9 @@ struct fuse_conn { /** The group id for this mount */ kgid_t group_id; + /** The pid namespace for this mount */ + struct pid_namespace *pid_ns; + /** Maximum read size */ unsigned max_read; @@ -527,9 +532,6 @@ struct fuse_conn { /** Filesystem supports NFS exporting. Only set in INIT */ unsigned export_support:1; - /** Set if bdi is valid */ - unsigned bdi_initialized:1; - /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; @@ -631,9 +633,6 @@ struct fuse_conn { /** Negotiated minor version */ unsigned minor; - /** Backing dev info */ - struct backing_dev_info bdi; - /** Entry on the fuse_conn_list */ struct list_head entry; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6fe6a88ecb4afd9eaaad1b0c75fe961108efc64a..65c88379a3a14311cca68b8750d6bb0b9f107444 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -386,12 +387,6 @@ static void fuse_send_destroy(struct fuse_conn *fc) } } -static void fuse_bdi_destroy(struct fuse_conn *fc) -{ - if (fc->bdi_initialized) - bdi_destroy(&fc->bdi); -} - static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); @@ -403,7 +398,6 @@ static void fuse_put_super(struct super_block *sb) list_del(&fc->entry); fuse_ctl_remove_conn(fc); mutex_unlock(&fuse_mutex); - fuse_bdi_destroy(fc); fuse_conn_put(fc); } @@ -608,7 +602,7 @@ void fuse_conn_init(struct fuse_conn *fc) memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); init_rwsem(&fc->killsb); - atomic_set(&fc->count, 1); + refcount_set(&fc->count, 1); atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); init_waitqueue_head(&fc->reserved_req_waitq); @@ -626,14 +620,16 @@ void fuse_conn_init(struct fuse_conn *fc) fc->connected = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); + fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); } EXPORT_SYMBOL_GPL(fuse_conn_init); void fuse_conn_put(struct fuse_conn *fc) { - if (atomic_dec_and_test(&fc->count)) { + if (refcount_dec_and_test(&fc->count)) { if (fc->destroy_req) fuse_request_free(fc->destroy_req); + put_pid_ns(fc->pid_ns); fc->release(fc); } } @@ -641,7 +637,7 @@ EXPORT_SYMBOL_GPL(fuse_conn_put); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) { - atomic_inc(&fc->count); + refcount_inc(&fc->count); return fc; } EXPORT_SYMBOL_GPL(fuse_conn_get); @@ -928,7 +924,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_flock = 1; } - fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); + fc->sb->s_bdi->ra_pages = + min(fc->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; fc->max_write = max_t(unsigned, 4096, fc->max_write); @@ -944,7 +941,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; - arg->max_readahead = fc->bdi.ra_pages * PAGE_SIZE; + arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | @@ -976,28 +973,26 @@ static void fuse_free_conn(struct fuse_conn *fc) static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) { int err; - - fc->bdi.name = "fuse"; - fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; - /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; - - err = bdi_init(&fc->bdi); - if (err) - return err; - - fc->bdi_initialized = 1; + char *suffix = ""; if (sb->s_bdev) { - err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", - MAJOR(fc->dev), MINOR(fc->dev)); - } else { - err = bdi_register_dev(&fc->bdi, fc->dev); + suffix = "-fuseblk"; + /* + * sb->s_bdi points to blkdev's bdi however we want to redirect + * it to our private bdi... + */ + bdi_put(sb->s_bdi); + sb->s_bdi = &noop_backing_dev_info; } - + err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), + MINOR(fc->dev), suffix); if (err) return err; + sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; + /* fuse does it's own writeback accounting */ + sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; + /* * For a single fuse filesystem use max 1% of dirty + * writeback threshold. @@ -1010,7 +1005,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) * * /sys/class/bdi//max_ratio */ - bdi_set_max_ratio(&fc->bdi, 1); + bdi_set_max_ratio(sb->s_bdi, 1); return 0; } @@ -1113,8 +1108,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_dev_free; - sb->s_bdi = &fc->bdi; - /* Handle umasking inside the fuse code */ if (sb->s_flags & MS_POSIXACL) fc->dont_mask = 1; @@ -1182,7 +1175,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_dev_free: fuse_dev_free(fud); err_put_conn: - fuse_bdi_destroy(fc); fuse_conn_put(fc); err_fput: fput(file); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 01b97c012c6e9635540eaf2126cd5358726c1082..4d810be532ddcb6d9d18b234383c5167b0780e32 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -38,11 +38,6 @@ struct metapath { __u16 mp_list[GFS2_MAX_META_HEIGHT]; }; -struct strip_mine { - int sm_first; - unsigned int sm_height; -}; - /** * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page * @ip: the inode @@ -252,6 +247,19 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp) return 1; } +/** + * metaptr1 - Return the first possible metadata pointer in a metaath buffer + * @height: The metadata height (0 = dinode) + * @mp: The metapath + */ +static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) +{ + struct buffer_head *bh = mp->mp_bh[height]; + if (height == 0) + return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); + return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); +} + /** * metapointer - Return pointer to start of metadata in a buffer * @height: The metadata height (0 = dinode) @@ -264,10 +272,8 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp) static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) { - struct buffer_head *bh = mp->mp_bh[height]; - unsigned int head_size = (height > 0) ? - sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); - return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; + __be64 *p = metaptr1(height, mp); + return p + mp->mp_list[height]; } static void gfs2_metapath_ra(struct gfs2_glock *gl, @@ -295,6 +301,23 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, } } +/** + * lookup_mp_height - helper function for lookup_metapath + * @ip: the inode + * @mp: the metapath + * @h: the height which needs looking up + */ +static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h) +{ + __be64 *ptr = metapointer(h, mp); + u64 dblock = be64_to_cpu(*ptr); + + if (!dblock) + return h + 1; + + return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]); +} + /** * lookup_metapath - Walk the metadata tree to a specific point * @ip: The inode @@ -316,17 +339,10 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) { unsigned int end_of_metadata = ip->i_height - 1; unsigned int x; - __be64 *ptr; - u64 dblock; int ret; for (x = 0; x < end_of_metadata; x++) { - ptr = metapointer(x, mp); - dblock = be64_to_cpu(*ptr); - if (!dblock) - return x + 1; - - ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]); + ret = lookup_mp_height(ip, mp, x); if (ret) return ret; } @@ -334,6 +350,35 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) return ip->i_height; } +/** + * fillup_metapath - fill up buffers for the metadata path to a specific height + * @ip: The inode + * @mp: The metapath + * @h: The height to which it should be mapped + * + * Similar to lookup_metapath, but does lookups for a range of heights + * + * Returns: error or height of metadata tree + */ + +static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) +{ + unsigned int start_h = h - 1; + int ret; + + if (h) { + /* find the first buffer we need to look up. */ + while (start_h > 0 && mp->mp_bh[start_h] == NULL) + start_h--; + for (; start_h < h; start_h++) { + ret = lookup_mp_height(ip, mp, start_h); + if (ret) + return ret; + } + } + return ip->i_height; +} + static inline void release_metapath(struct metapath *mp) { int i; @@ -422,6 +467,13 @@ enum alloc_state { /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ }; +static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt) +{ + if (hgt) + return sdp->sd_inptrs; + return sdp->sd_diptrs; +} + /** * gfs2_bmap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode @@ -620,7 +672,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, BUG_ON(maxlen == 0); - memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); + memset(&mp, 0, sizeof(mp)); bmap_lock(ip, create); clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); @@ -701,252 +753,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi return ret; } -/** - * do_strip - Look for a layer a particular layer of the file and strip it off - * @ip: the inode - * @dibh: the dinode buffer - * @bh: A buffer of pointers - * @top: The first pointer in the buffer - * @bottom: One more than the last pointer - * @height: the height this buffer is at - * @sm: a pointer to a struct strip_mine - * - * Returns: errno - */ - -static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, - struct buffer_head *bh, __be64 *top, __be64 *bottom, - unsigned int height, struct strip_mine *sm) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrp_list rlist; - struct gfs2_trans *tr; - u64 bn, bstart; - u32 blen, btotal; - __be64 *p; - unsigned int rg_blocks = 0; - int metadata; - unsigned int revokes = 0; - int x; - int error; - int jblocks_rqsted; - - error = gfs2_rindex_update(sdp); - if (error) - return error; - - if (!*top) - sm->sm_first = 0; - - if (height != sm->sm_height) - return 0; - - if (sm->sm_first) { - top++; - sm->sm_first = 0; - } - - metadata = (height != ip->i_height - 1); - if (metadata) - revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; - else if (ip->i_depth) - revokes = sdp->sd_inptrs; - - memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); - bstart = 0; - blen = 0; - - for (p = top; p < bottom; p++) { - if (!*p) - continue; - - bn = be64_to_cpu(*p); - - if (bstart + blen == bn) - blen++; - else { - if (bstart) - gfs2_rlist_add(ip, &rlist, bstart); - - bstart = bn; - blen = 1; - } - } - - if (bstart) - gfs2_rlist_add(ip, &rlist, bstart); - else - goto out; /* Nothing to do */ - - gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); - - for (x = 0; x < rlist.rl_rgrps; x++) { - struct gfs2_rgrpd *rgd; - rgd = rlist.rl_ghs[x].gh_gl->gl_object; - rg_blocks += rgd->rd_length; - } - - error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); - if (error) - goto out_rlist; - - if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(&ip->i_res); - -restart: - jblocks_rqsted = rg_blocks + RES_DINODE + - RES_INDIRECT + RES_STATFS + RES_QUOTA + - gfs2_struct2blk(sdp, revokes, sizeof(u64)); - if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2)) - jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2); - error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); - if (error) - goto out_rg_gunlock; - - tr = current->journal_info; - down_write(&ip->i_rw_mutex); - - gfs2_trans_add_meta(ip->i_gl, dibh); - gfs2_trans_add_meta(ip->i_gl, bh); - - bstart = 0; - blen = 0; - btotal = 0; - - for (p = top; p < bottom; p++) { - if (!*p) - continue; - - /* check for max reasonable journal transaction blocks */ - if (tr->tr_num_buf_new + RES_STATFS + - RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { - if (rg_blocks >= tr->tr_num_buf_new) - rg_blocks -= tr->tr_num_buf_new; - else - rg_blocks = 0; - break; - } - - bn = be64_to_cpu(*p); - - if (bstart + blen == bn) - blen++; - else { - if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); - btotal += blen; - } - - bstart = bn; - blen = 1; - } - - *p = 0; - gfs2_add_inode_blocks(&ip->i_inode, -1); - } - if (p == bottom) - rg_blocks = 0; - - if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); - btotal += blen; - } - - gfs2_statfs_change(sdp, 0, +btotal, 0); - gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, - ip->i_inode.i_gid); - - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); - - gfs2_dinode_out(ip, dibh->b_data); - - up_write(&ip->i_rw_mutex); - - gfs2_trans_end(sdp); - - if (rg_blocks) - goto restart; - -out_rg_gunlock: - gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); -out_rlist: - gfs2_rlist_free(&rlist); -out: - return error; -} - -/** - * recursive_scan - recursively scan through the end of a file - * @ip: the inode - * @dibh: the dinode buffer - * @mp: the path through the metadata to the point to start - * @height: the height the recursion is at - * @block: the indirect block to look at - * @first: 1 if this is the first block - * @sm: data opaque to this function to pass to @bc - * - * When this is first called @height and @block should be zero and - * @first should be 1. - * - * Returns: errno - */ - -static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, - struct metapath *mp, unsigned int height, - u64 block, int first, struct strip_mine *sm) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head *bh = NULL; - __be64 *top, *bottom; - u64 bn; - int error; - int mh_size = sizeof(struct gfs2_meta_header); - - if (!height) { - error = gfs2_meta_inode_buffer(ip, &bh); - if (error) - return error; - dibh = bh; - - top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; - bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; - } else { - error = gfs2_meta_indirect_buffer(ip, height, block, &bh); - if (error) - return error; - - top = (__be64 *)(bh->b_data + mh_size) + - (first ? mp->mp_list[height] : 0); - - bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; - } - - error = do_strip(ip, dibh, bh, top, bottom, height, sm); - if (error) - goto out; - - if (height < ip->i_height - 1) { - - gfs2_metapath_ra(ip->i_gl, bh, top); - - for (; top < bottom; top++, first = 0) { - if (!*top) - continue; - - bn = be64_to_cpu(*top); - - error = recursive_scan(ip, dibh, mp, height + 1, bn, - first, sm); - if (error) - break; - } - } -out: - brelse(bh); - return error; -} - - /** * gfs2_block_truncate_page - Deal with zeroing out data for truncate * @@ -1106,41 +912,406 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) return error; } -static int trunc_dealloc(struct gfs2_inode *ip, u64 size) +/** + * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein + * @ip: inode + * @rg_gh: holder of resource group glock + * @mp: current metapath fully populated with buffers + * @btotal: place to keep count of total blocks freed + * @hgt: height we're processing + * @first: true if this is the first call to this function for this height + * + * We sweep a metadata buffer (provided by the metapath) for blocks we need to + * free, and free them all. However, we do it one rgrp at a time. If this + * block has references to multiple rgrps, we break it into individual + * transactions. This allows other processes to use the rgrps while we're + * focused on a single one, for better concurrency / performance. + * At every transaction boundary, we rewrite the inode into the journal. + * That way the bitmaps are kept consistent with the inode and we can recover + * if we're interrupted by power-outages. + * + * Returns: 0, or return code if an error occurred. + * *btotal has the total number of blocks freed + */ +static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, + const struct metapath *mp, u32 *btotal, int hgt, + bool preserve1) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int height = ip->i_height; - u64 lblock; - struct metapath mp; - int error; + struct gfs2_rgrpd *rgd; + struct gfs2_trans *tr; + struct buffer_head *bh = mp->mp_bh[hgt]; + __be64 *top, *bottom, *p; + int blks_outside_rgrp; + u64 bn, bstart, isize_blks; + s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ + int meta = ((hgt != ip->i_height - 1) ? 1 : 0); + int ret = 0; + bool buf_in_tr = false; /* buffer was added to transaction */ + + if (gfs2_metatype_check(sdp, bh, + (hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI))) + return -EIO; + +more_rgrps: + blks_outside_rgrp = 0; + bstart = 0; + blen = 0; + top = metapointer(hgt, mp); /* first ptr from metapath */ + /* If we're keeping some data at the truncation point, we've got to + preserve the metadata tree by adding 1 to the starting metapath. */ + if (preserve1) + top++; + + bottom = (__be64 *)(bh->b_data + bh->b_size); + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + bn = be64_to_cpu(*p); + if (gfs2_holder_initialized(rd_gh)) { + rgd = (struct gfs2_rgrpd *)rd_gh->gh_gl->gl_object; + gfs2_assert_withdraw(sdp, + gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); + } else { + rgd = gfs2_blk2rgrpd(sdp, bn, false); + ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + 0, rd_gh); + if (ret) + goto out; + + /* Must be done with the rgrp glock held: */ + if (gfs2_rs_active(&ip->i_res) && + rgd == ip->i_res.rs_rbm.rgd) + gfs2_rs_deltree(&ip->i_res); + } + + if (!rgrp_contains_block(rgd, bn)) { + blks_outside_rgrp++; + continue; + } + + /* The size of our transactions will be unknown until we + actually process all the metadata blocks that relate to + the rgrp. So we estimate. We know it can't be more than + the dinode's i_blocks and we don't want to exceed the + journal flush threshold, sd_log_thresh2. */ + if (current->journal_info == NULL) { + unsigned int jblocks_rqsted, revokes; + + jblocks_rqsted = rgd->rd_length + RES_DINODE + + RES_INDIRECT; + isize_blks = gfs2_get_inode_blocks(&ip->i_inode); + if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) + jblocks_rqsted += + atomic_read(&sdp->sd_log_thresh2); + else + jblocks_rqsted += isize_blks; + revokes = jblocks_rqsted; + if (meta) + revokes += hptrs(sdp, hgt); + else if (ip->i_depth) + revokes += sdp->sd_inptrs; + ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); + if (ret) + goto out_unlock; + down_write(&ip->i_rw_mutex); + } + /* check if we will exceed the transaction blocks requested */ + tr = current->journal_info; + if (tr->tr_num_buf_new + RES_STATFS + + RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { + /* We set blks_outside_rgrp to ensure the loop will + be repeated for the same rgrp, but with a new + transaction. */ + blks_outside_rgrp++; + /* This next part is tricky. If the buffer was added + to the transaction, we've already set some block + pointers to 0, so we better follow through and free + them, or we will introduce corruption (so break). + This may be impossible, or at least rare, but I + decided to cover the case regardless. + + If the buffer was not added to the transaction + (this call), doing so would exceed our transaction + size, so we need to end the transaction and start a + new one (so goto). */ + + if (buf_in_tr) + break; + goto out_unlock; + } + + gfs2_trans_add_meta(ip->i_gl, bh); + buf_in_tr = true; + *p = 0; + if (bstart + blen == bn) { + blen++; + continue; + } + if (bstart) { + __gfs2_free_blocks(ip, bstart, (u32)blen, meta); + (*btotal) += blen; + gfs2_add_inode_blocks(&ip->i_inode, -blen); + } + bstart = bn; + blen = 1; + } + if (bstart) { + __gfs2_free_blocks(ip, bstart, (u32)blen, meta); + (*btotal) += blen; + gfs2_add_inode_blocks(&ip->i_inode, -blen); + } +out_unlock: + if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks + outside the rgrp we just processed, + do it all over again. */ + if (current->journal_info) { + struct buffer_head *dibh = mp->mp_bh[0]; + + /* Every transaction boundary, we rewrite the dinode + to keep its di_blocks current in case of failure. */ + ip->i_inode.i_mtime = ip->i_inode.i_ctime = + current_time(&ip->i_inode); + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + } + gfs2_glock_dq_uninit(rd_gh); + cond_resched(); + goto more_rgrps; + } +out: + return ret; +} + +/** + * find_nonnull_ptr - find a non-null pointer given a metapath and height + * assumes the metapath is valid (with buffers) out to height h + * @mp: starting metapath + * @h: desired height to search + * + * Returns: true if a non-null pointer was found in the metapath buffer + * false if all remaining pointers are NULL in the buffer + */ +static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, + unsigned int h) +{ + __be64 *ptr; + unsigned int ptrs = hptrs(sdp, h) - 1; + + while (true) { + ptr = metapointer(h, mp); + if (*ptr) /* if we have a non-null pointer */ + return true; + + if (mp->mp_list[h] < ptrs) + mp->mp_list[h]++; + else + return false; /* no more pointers in this buffer */ + } +} + +enum dealloc_states { + DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ + DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ + DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ + DEALLOC_DONE = 3, /* process complete */ +}; - if (!size) +/** + * trunc_dealloc - truncate a file down to a desired size + * @ip: inode to truncate + * @newsize: The desired size of the file + * + * This function truncates a file to newsize. It works from the + * bottom up, and from the right to the left. In other words, it strips off + * the highest layer (data) before stripping any of the metadata. Doing it + * this way is best in case the operation is interrupted by power failure, etc. + * The dinode is rewritten in every transaction to guarantee integrity. + */ +static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct metapath mp; + struct buffer_head *dibh, *bh; + struct gfs2_holder rd_gh; + u64 lblock; + __u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */ + unsigned int strip_h = ip->i_height - 1; + u32 btotal = 0; + int ret, state; + int mp_h; /* metapath buffers are read in to this height */ + sector_t last_ra = 0; + u64 prev_bnr = 0; + bool preserve1; /* need to preserve the first meta pointer? */ + + if (!newsize) lblock = 0; else - lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; + lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift; + memset(&mp, 0, sizeof(mp)); find_metapath(sdp, lblock, &mp, ip->i_height); - error = gfs2_rindex_update(sdp); - if (error) - return error; - error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); - if (error) - return error; + memcpy(&nbof, &mp.mp_list, sizeof(nbof)); + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (ret) + return ret; - while (height--) { - struct strip_mine sm; - sm.sm_first = !!size; - sm.sm_height = height; + mp.mp_bh[0] = dibh; + ret = lookup_metapath(ip, &mp); + if (ret == ip->i_height) + state = DEALLOC_MP_FULL; /* We have a complete metapath */ + else + state = DEALLOC_FILL_MP; /* deal with partial metapath */ - error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm); - if (error) + ret = gfs2_rindex_update(sdp); + if (ret) + goto out_metapath; + + ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); + if (ret) + goto out_metapath; + gfs2_holder_mark_uninitialized(&rd_gh); + + mp_h = strip_h; + + while (state != DEALLOC_DONE) { + switch (state) { + /* Truncate a full metapath at the given strip height. + * Note that strip_h == mp_h in order to be in this state. */ + case DEALLOC_MP_FULL: + if (mp_h > 0) { /* issue read-ahead on metadata */ + __be64 *top; + + bh = mp.mp_bh[mp_h - 1]; + if (bh->b_blocknr != last_ra) { + last_ra = bh->b_blocknr; + top = metaptr1(mp_h - 1, &mp); + gfs2_metapath_ra(ip->i_gl, bh, top); + } + } + /* If we're truncating to a non-zero size and the mp is + at the beginning of file for the strip height, we + need to preserve the first metadata pointer. */ + preserve1 = (newsize && + (mp.mp_list[mp_h] == nbof[mp_h])); + bh = mp.mp_bh[mp_h]; + gfs2_assert_withdraw(sdp, bh); + if (gfs2_assert_withdraw(sdp, + prev_bnr != bh->b_blocknr)) { + printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, " + "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n", + sdp->sd_fsname, + (unsigned long long)ip->i_no_addr, + prev_bnr, ip->i_height, strip_h, mp_h); + } + prev_bnr = bh->b_blocknr; + ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal, + mp_h, preserve1); + /* If we hit an error or just swept dinode buffer, + just exit. */ + if (ret || !mp_h) { + state = DEALLOC_DONE; + break; + } + state = DEALLOC_MP_LOWER; + break; + + /* lower the metapath strip height */ + case DEALLOC_MP_LOWER: + /* We're done with the current buffer, so release it, + unless it's the dinode buffer. Then back up to the + previous pointer. */ + if (mp_h) { + brelse(mp.mp_bh[mp_h]); + mp.mp_bh[mp_h] = NULL; + } + /* If we can't get any lower in height, we've stripped + off all we can. Next step is to back up and start + stripping the previous level of metadata. */ + if (mp_h == 0) { + strip_h--; + memcpy(&mp.mp_list, &nbof, sizeof(nbof)); + mp_h = strip_h; + state = DEALLOC_FILL_MP; + break; + } + mp.mp_list[mp_h] = 0; + mp_h--; /* search one metadata height down */ + if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1) + break; /* loop around in the same state */ + mp.mp_list[mp_h]++; + /* Here we've found a part of the metapath that is not + * allocated. We need to search at that height for the + * next non-null pointer. */ + if (find_nonnull_ptr(sdp, &mp, mp_h)) { + state = DEALLOC_FILL_MP; + mp_h++; + } + /* No more non-null pointers at this height. Back up + to the previous height and try again. */ + break; /* loop around in the same state */ + + /* Fill the metapath with buffers to the given height. */ + case DEALLOC_FILL_MP: + /* Fill the buffers out to the current height. */ + ret = fillup_metapath(ip, &mp, mp_h); + if (ret < 0) + goto out; + + /* If buffers found for the entire strip height */ + if ((ret == ip->i_height) && (mp_h == strip_h)) { + state = DEALLOC_MP_FULL; + break; + } + if (ret < ip->i_height) /* We have a partial height */ + mp_h = ret - 1; + + /* If we find a non-null block pointer, crawl a bit + higher up in the metapath and try again, otherwise + we need to look lower for a new starting point. */ + if (find_nonnull_ptr(sdp, &mp, mp_h)) + mp_h++; + else + state = DEALLOC_MP_LOWER; break; + } } - gfs2_quota_unhold(ip); + if (btotal) { + if (current->journal_info == NULL) { + ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + + RES_QUOTA, 0); + if (ret) + goto out; + down_write(&ip->i_rw_mutex); + } + gfs2_statfs_change(sdp, 0, +btotal, 0); + gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, + ip->i_inode.i_gid); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + } - return error; +out: + if (gfs2_holder_initialized(&rd_gh)) + gfs2_glock_dq_uninit(&rd_gh); + if (current->journal_info) { + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + cond_resched(); + } + gfs2_quota_unhold(ip); +out_metapath: + release_metapath(&mp); + return ret; } static int trunc_end(struct gfs2_inode *ip) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6fe2a59c6a9a5e8ba14e7ff4f4fefd729686bf58..c2062a108d19602544276f596de75b1f593a1e61 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -911,11 +911,15 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); + struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; - if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip)) + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + /* fallocate is needed by gfs2_grow to reserve space in the rindex */ + if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex) return -EOPNOTSUPP; inode_lock(inode); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ec0848fcca02d8960ba671b9b0be5a56ed3fd7ed..959a19ced4d5f83fccf7e82c88a5855f3d1d26cc 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -73,7 +73,7 @@ static DEFINE_SPINLOCK(lru_lock); static struct rhashtable_params ht_parms = { .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, - .key_len = sizeof(struct lm_lockname), + .key_len = offsetofend(struct lm_lockname, ln_type), .key_offset = offsetof(struct gfs2_glock, gl_name), .head_offset = offsetof(struct gfs2_glock, gl_node), }; @@ -449,6 +449,9 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && + target != LM_ST_UNLOCKED) + return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -484,7 +487,8 @@ __acquires(&gl->gl_lockref.lock) } else if (ret) { pr_err("lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, 1); + GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN, + &sdp->sd_flags)); } } else { /* lock_nolock */ finish_xmote(gl, target); @@ -653,10 +657,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; - struct gfs2_glock *gl, *tmp = NULL; + struct gfs2_glock *gl, *tmp; struct address_space *mapping; struct kmem_cache *cachep; - int ret, tries = 0; + int ret = 0; rcu_read_lock(); gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); @@ -721,35 +725,32 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } again: - ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node, - ht_parms); - if (ret == 0) { + rcu_read_lock(); + tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node, + ht_parms); + if (!tmp) { *glp = gl; - return 0; + goto out; } - - if (ret == -EEXIST) { - ret = 0; - rcu_read_lock(); - tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); - if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { - if (++tries < 100) { - rcu_read_unlock(); - cond_resched(); - goto again; - } - tmp = NULL; - ret = -ENOMEM; - } - rcu_read_unlock(); - } else { - WARN_ON_ONCE(ret); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto out_free; } + if (lockref_get_not_dead(&tmp->gl_lockref)) { + *glp = tmp; + goto out_free; + } + rcu_read_unlock(); + cond_resched(); + goto again; + +out_free: kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); atomic_dec(&sdp->sd_glock_disposal); - *glp = tmp; +out: + rcu_read_unlock(); return ret; } @@ -1918,10 +1919,10 @@ static const struct seq_operations gfs2_sbstats_seq_ops = { #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) -static int gfs2_glocks_open(struct inode *inode, struct file *file) +static int __gfs2_glocks_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) { - int ret = seq_open_private(file, &gfs2_glock_seq_ops, - sizeof(struct gfs2_glock_iter)); + int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); if (ret == 0) { struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; @@ -1932,11 +1933,16 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); + rhashtable_walk_enter(&gl_hash_table, &gi->hti); } return ret; } +static int gfs2_glocks_open(struct inode *inode, struct file *file) +{ + return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); +} + static int gfs2_glocks_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; @@ -1949,20 +1955,7 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file) static int gfs2_glstats_open(struct inode *inode, struct file *file) { - int ret = seq_open_private(file, &gfs2_glstats_seq_ops, - sizeof(struct gfs2_glock_iter)); - if (ret == 0) { - struct seq_file *seq = file->private_data; - struct gfs2_glock_iter *gi = seq->private; - gi->sdp = inode->i_private; - gi->last_pos = 0; - seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); - if (seq->buf) - seq->size = GFS2_SEQ_GOODSIZE; - gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); - } - return ret; + return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); } static int gfs2_sbstats_open(struct inode *inode, struct file *file) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 511e1ed7e2ded7b0a9dc9882cffe0b66c37c96a2..b7cf65d13561fedf98a72dfc66b36e70d84100cd 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -203,11 +203,15 @@ enum { DFL_DLM_RECOVERY = 6, }; +/* + * We are using struct lm_lockname as an rhashtable key. Avoid holes within + * the struct; padding at the end is fine. + */ struct lm_lockname { - struct gfs2_sbd *ln_sbd; u64 ln_number; + struct gfs2_sbd *ln_sbd; unsigned int ln_type; -} __packed __aligned(sizeof(int)); +}; #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e279c3ce27be3cd9f3048557bd90e06f21df9e85..9f605ea4810c68b4ec8e7092a1b1668e0659aa27 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -202,8 +202,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, fail_refresh: ip->i_iopen_gh.gh_flags |= GL_NOCACHE; ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_put: if (io_gl) gfs2_glock_put(io_gl); @@ -667,6 +666,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, ip->i_height = 0; ip->i_depth = 0; ip->i_entries = 0; + ip->i_no_addr = 0; /* Temporarily zero until real addr is assigned */ switch(mode & S_IFMT) { case S_IFREG: diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f865b96374df2b5c40ecfb13663154499ec09b31..d2955daf17a4fcefa2ded3a412f67732315de12a 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META; + int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b108e7ba81af73568b71ec0b163bf68fbbde8948..ed67548b286ccc58d84732d00af5ca281772bd39 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1222,12 +1223,7 @@ static int set_gfs2_super(struct super_block *s, void *data) { s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; - - /* - * We set the bdi here to the queue backing, file systems can - * overwrite this in ->fill_super() - */ - s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdi_get(s->s_bdev->bd_bdi); return 0; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 86ccc01593937d93b7e5901ec26ed30bd5a4ab32..83c9909ff14a6bf7b741dd29cb316a346ba23c94 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -483,13 +483,6 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } } -static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) -{ - u64 first = rgd->rd_data0; - u64 last = first + rgd->rd_data; - return first <= block && block < last; -} - /** * gfs2_blk2rgrpd - Find resource group for a given data/meta block number * @sdp: The GFS2 superblock diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 66b51cf66dfa3ca7f29a5ad7879d3485e956c462..e90478e2f5453be647d6386bb17e5dce52cccbba 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -83,5 +83,12 @@ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) return rs && !RB_EMPTY_NODE(&rs->rs_node); } +static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) +{ + u64 first = rgd->rd_data0; + u64 last = first + rgd->rd_data; + return first <= block && block < last; +} + extern void check_and_update_goal(struct gfs2_inode *ip); #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 361796a84fce4f444f563592aa05f4e25642aa61..29b0473f6e74132c465cc2e398552d977be98e8d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -793,7 +793,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC))) return; - + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) { @@ -1538,8 +1539,7 @@ static void gfs2_evict_inode(struct inode *inode) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; } @@ -1617,7 +1617,7 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_holder_initialized(&ip->i_iopen_gh)) { if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_glock_dq(&ip->i_iopen_gh); } gfs2_holder_uninit(&ip->i_iopen_gh); } @@ -1639,8 +1639,7 @@ static void gfs2_evict_inode(struct inode *inode) if (gfs2_holder_initialized(&ip->i_iopen_gh)) { ip->i_iopen_gh.gh_gl->gl_object = NULL; ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); } } diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index e33a0d36a93ebea7183909ff45f523302943b694..5d01826545809de7de454e2615553fc6a637ce70 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -485,8 +485,8 @@ void hfs_file_truncate(struct inode *inode) /* XXX: Can use generic_cont_expand? */ size = inode->i_size - 1; - res = pagecache_write_begin(NULL, mapping, size+1, 0, - AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + res = pagecache_write_begin(NULL, mapping, size+1, 0, 0, + &page, &fsdata); if (!res) { res = pagecache_write_end(NULL, mapping, size+1, 0, 0, page, fsdata); diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index feca524ce2a5c7d6034bf0a528679fe923381507..a3eb640b4f8f8474130a893100303a0211d9037c 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -545,9 +545,8 @@ void hfsplus_file_truncate(struct inode *inode) void *fsdata; loff_t size = inode->i_size; - res = pagecache_write_begin(NULL, mapping, size, 0, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); + res = pagecache_write_begin(NULL, mapping, size, 0, 0, + &page, &fsdata); if (res) return; res = pagecache_write_end(NULL, mapping, size, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dde861387a407810b35f73ff37c3ce6389048326..d44f5456eb9baf943186a2d145736c6435f891fb 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/fs/inode.c b/fs/inode.c index 88110fd0b282e49246dc9cd93a1d6e173d951d7b..db5914783a7130d77725502cb4182c05ff7775c2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -119,7 +119,7 @@ static int no_open(struct inode *inode, struct file *file) } /** - * inode_init_always - perform inode structure intialisation + * inode_init_always - perform inode structure initialisation * @sb: superblock inode belongs to * @inode: inode to initialise * @@ -371,9 +371,6 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_lru); address_space_init_once(&inode->i_data); i_size_ordered_init(inode); -#ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&inode->i_fsnotify_marks); -#endif } EXPORT_SYMBOL(inode_init_once); @@ -405,6 +402,8 @@ static void inode_lru_list_add(struct inode *inode) { if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); + else + inode->i_state |= I_REFERENCED; } /* @@ -1492,7 +1491,6 @@ static void iput_final(struct inode *inode) drop = generic_drop_inode(inode); if (!drop && (sb->s_flags & MS_ACTIVE)) { - inode->i_state |= I_REFERENCED; inode_add_lru(inode); spin_unlock(&inode->i_lock); return; diff --git a/fs/internal.h b/fs/internal.h index 11c6d89dce9c9ae2e1bc1d04614a732ccc0a952e..9676fe11c093538d284ec5c2c59e9a2a9e37c1f6 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -108,8 +108,6 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname, extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *); -extern long do_handle_open(int mountdirfd, - struct file_handle __user *ufh, int open_flag); extern int open_check_o_direct(struct file *f); extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file *filp_clone_open(struct file *); @@ -128,8 +126,6 @@ static inline bool atime_needs_update_rcu(const struct path *path, return __atime_needs_update(path, inode, true); } -extern bool atime_needs_update_rcu(const struct path *, struct inode *); - /* * fs-writeback.c */ diff --git a/fs/iomap.c b/fs/iomap.c index 141c3cd55a8b2d974f431d7710fbe4de58f78355..4b10892967a5ae9a4ece5d8571e47c01c72f74da 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -158,12 +158,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ssize_t written = 0; unsigned int flags = AOP_FLAG_NOFS; - /* - * Copies from kernel address space cannot fail (NFSD is a big user). - */ - if (!iter_is_iovec(i)) - flags |= AOP_FLAG_UNINTERRUPTIBLE; - do { struct page *page; unsigned long offset; /* Offset into pagecache page */ @@ -291,8 +285,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, return PTR_ERR(rpage); status = iomap_write_begin(inode, pos, bytes, - AOP_FLAG_NOFS | AOP_FLAG_UNINTERRUPTIBLE, - &page, iomap); + AOP_FLAG_NOFS, &page, iomap); put_page(rpage); if (unlikely(status)) return status; @@ -343,8 +336,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, struct page *page; int status; - status = iomap_write_begin(inode, pos, bytes, - AOP_FLAG_UNINTERRUPTIBLE | AOP_FLAG_NOFS, &page, iomap); + status = iomap_write_begin(inode, pos, bytes, AOP_FLAG_NOFS, &page, + iomap); if (status) return status; @@ -360,7 +353,8 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, sector_t sector = iomap->blkno + (((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9); - return __dax_zero_page_range(iomap->bdev, sector, offset, bytes); + return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector, + offset, bytes); } static loff_t @@ -887,16 +881,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, flags |= IOMAP_WRITE; } - if (mapping->nrpages) { - ret = filemap_write_and_wait_range(mapping, start, end); - if (ret) - goto out_free_dio; + ret = filemap_write_and_wait_range(mapping, start, end); + if (ret) + goto out_free_dio; - ret = invalidate_inode_pages2_range(mapping, - start >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - ret = 0; - } + ret = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; inode_dio_begin(inode); @@ -911,6 +903,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, break; } pos += ret; + + if (iov_iter_rw(iter) == READ && pos >= dio->i_size) + break; } while ((count = iov_iter_count(iter)) > 0); blk_finish_plug(&plug); @@ -951,7 +946,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... */ - if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { + if (iov_iter_rw(iter) == WRITE) { int err = invalidate_inode_pages2_range(mapping, start >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(err); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5adc2fb62b0fab89899e5d0acba1e8019a73c766..ebad34266bcfbbf4fb5d491b2f2699583f7116e1 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -43,6 +43,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -205,6 +206,14 @@ static int kjournald2(void *arg) journal->j_task = current; wake_up(&journal->j_wait_done_commit); + /* + * Make sure that no allocations from this kernel thread will ever + * recurse to the fs layer because we are responsible for the + * transaction commit and any fs involvement might get stuck waiting for + * the trasn. commit. + */ + memalloc_nofs_save(); + /* * And now, wait forever for commit wakeup events. */ @@ -691,8 +700,21 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; - jbd2_might_wait_for_commit(journal); read_lock(&journal->j_state_lock); +#ifdef CONFIG_PROVE_LOCKING + /* + * Some callers make sure transaction is already committing and in that + * case we cannot block on open handles anymore. So don't warn in that + * case. + */ + if (tid_gt(tid, journal->j_commit_sequence) && + (!journal->j_committing_transaction || + journal->j_committing_transaction->t_tid != tid)) { + read_unlock(&journal->j_state_lock); + jbd2_might_wait_for_commit(journal); + read_lock(&journal->j_state_lock); + } +#endif #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR @@ -913,7 +935,8 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, + REQ_SYNC | REQ_FUA); if (ret) goto out; @@ -1314,7 +1337,7 @@ static int journal_reset(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - REQ_FUA); + REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1454,7 +1477,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) sb->s_errno = cpu_to_be32(journal->j_errno); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, REQ_FUA); + jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -1721,7 +1744,7 @@ int jbd2_journal_destroy(journal_t *journal) write_unlock(&journal->j_state_lock); jbd2_mark_journal_empty(journal, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -1980,7 +2003,7 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); J_ASSERT(!journal->j_running_transaction); @@ -2026,7 +2049,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } @@ -2340,7 +2363,7 @@ static int jbd2_journal_init_journal_head_cache(void) jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ - SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU, + SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, NULL); /* ctor */ retval = 0; if (!jbd2_journal_head_cache) { diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5e659ee08d6ae84046b9b8d59f41641e9ef28b22..2d30a6da7013112adafd08545b32cb00d179311c 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -388,6 +389,11 @@ static int start_this_handle(journal_t *journal, handle_t *handle, rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_); jbd2_journal_free_transaction(new_transaction); + /* + * Ensure that no allocations done while the transaction is open are + * going to recurse back to the fs layer. + */ + handle->saved_alloc_context = memalloc_nofs_save(); return 0; } @@ -466,6 +472,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, trace_jbd2_handle_start(journal->j_fs_dev->bd_dev, handle->h_transaction->t_tid, type, line_no, nblocks); + return handle; } EXPORT_SYMBOL(jbd2__journal_start); @@ -673,6 +680,12 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_); handle->h_buffer_credits = nblocks; + /* + * Restore the original nofs context because the journal restart + * is basically the same thing as journal stop and start. + * start_this_handle will start a new nofs context. + */ + memalloc_nofs_restore(handle->saved_alloc_context); ret = start_this_handle(journal, handle, gfp_mask); return ret; } @@ -1760,6 +1773,11 @@ int jbd2_journal_stop(handle_t *handle) if (handle->h_rsv_handle) jbd2_journal_free_reserved(handle->h_rsv_handle); free_and_exit: + /* + * Scope of the GFP_NOFS context is over here and so we can restore the + * original alloc context. + */ + memalloc_nofs_restore(handle->saved_alloc_context); jbd2_free_handle(handle); return err; } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 06a71dbd4833e3bdf4ea5277bc50e15ee40c45ad..389ea53ea487538061ff3b6da78e27df2f894ac1 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -1366,7 +1366,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, jffs2_add_ino_cache(c, f->inocache); } if (!f->inocache) { - JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino); + JFFS2_ERROR("requested to read a nonexistent ino %u\n", ino); return -ENOENT; } diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index fc89f94367842583d7dd9f9d8d93707e5bd1a50c..5c5ac5b3aec33baf1da1af89f762d62275a59916 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -64,7 +64,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case JFS_IOC_GETFLAGS: - jfs_get_inode_flags(jfs_inode); flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; flags = jfs_map_ext2(flags, 0); return put_user(flags, (int __user *) arg); @@ -98,7 +97,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Lock against other parallel changes of flags */ inode_lock(inode); - jfs_get_inode_flags(jfs_inode); oldflags = jfs_inode->mode2; /* diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 6aca224a5d6847ac29104f41053f4b1de9f9de8f..f36ef68905a74d830e75e7566c1145c8f55fc0cf 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3148,7 +3148,6 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) else dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, jfs_ip->saved_gid)); - jfs_get_inode_flags(jfs_ip); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 375dd257a34fe343b1b25a0176b4dd863aeae0c4..5e9b7bb3aabf9d5827d367d35ee1bd4740af90e3 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -45,24 +45,6 @@ void jfs_set_inode_flags(struct inode *inode) S_DIRSYNC | S_SYNC); } -void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) -{ - unsigned int flags = jfs_ip->vfs_inode.i_flags; - - jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL | - JFS_DIRSYNC_FL | JFS_SYNC_FL); - if (flags & S_IMMUTABLE) - jfs_ip->mode2 |= JFS_IMMUTABLE_FL; - if (flags & S_APPEND) - jfs_ip->mode2 |= JFS_APPEND_FL; - if (flags & S_NOATIME) - jfs_ip->mode2 |= JFS_NOATIME_FL; - if (flags & S_DIRSYNC) - jfs_ip->mode2 |= JFS_DIRSYNC_FL; - if (flags & S_SYNC) - jfs_ip->mode2 |= JFS_SYNC_FL; -} - /* * NAME: ialloc() * diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 9271cfe4a1490ca88785a7973ecd22fd88000e73..7b0b3a40788ffd0a834df3651a24b66551b71f99 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -33,7 +33,6 @@ extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); -extern void jfs_get_inode_flags(struct jfs_inode_info *); extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c64c2574a0aad7eecf4922a6a005341c26b3fa24..e8aad7d87b8c938aad4a51db500684950df73944 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -45,6 +45,7 @@ #include "jfs_acl.h" #include "jfs_debug.h" #include "jfs_xattr.h" +#include "jfs_dinode.h" MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); @@ -181,6 +182,35 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +#ifdef CONFIG_QUOTA +static int jfs_quota_off(struct super_block *sb, int type); +static int jfs_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path); + +static void jfs_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + jfs_quota_off(sb, type); +} + +static const struct quotactl_ops jfs_quotactl_ops = { + .quota_on = jfs_quota_on, + .quota_off = jfs_quota_off, + .quota_sync = dquot_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; +#else +static inline void jfs_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void jfs_put_super(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); @@ -188,7 +218,7 @@ static void jfs_put_super(struct super_block *sb) jfs_info("In jfs_put_super"); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + jfs_quota_off_umount(sb); rc = jfs_umount(sb); if (rc) @@ -536,7 +566,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = jfs_xattr_handlers; #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; - sb->s_qcop = &dquot_quotactl_ops; + sb->s_qcop = &jfs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif @@ -840,6 +870,51 @@ static struct dquot **jfs_get_dquots(struct inode *inode) { return JFS_IP(inode)->i_dquot; } + +static int jfs_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + int err; + struct inode *inode; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + inode_lock(inode); + JFS_IP(inode)->mode2 |= JFS_NOATIME_FL | JFS_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); + + return 0; +} + +static int jfs_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + JFS_IP(inode)->mode2 &= ~(JFS_NOATIME_FL | JFS_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} #endif static const struct super_operations jfs_super_operations = { diff --git a/fs/libfs.c b/fs/libfs.c index a8b62e5d43a972d3b02867e6d9c2136b23987ecd..a04395334bb156ae04853639650dc9751fb55753 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -507,7 +507,7 @@ EXPORT_SYMBOL(simple_write_end); * to pass it an appropriate max_reserved value to avoid collisions. */ int simple_fill_super(struct super_block *s, unsigned long magic, - struct tree_descr *files) + const struct tree_descr *files) { struct inode *inode; struct dentry *root; diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 41e491b8e5d7e40164e0e51d90534c9d8660a94e..27d577dbe51a48f21950a63758e76783fed67eca 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -69,6 +69,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) goto out_nobind; + host->h_nlmclnt_ops = nlm_init->nlmclnt_ops; return host; out_nobind: nlmclnt_release_host(host); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 112952037933b79fa36dbea67579c576ee19be58..066ac313ae5c0fff15188d3123079acd9a7180f3 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -150,17 +150,22 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req) * @host: address of a valid nlm_host context representing the NLM server * @cmd: fcntl-style file lock operation to perform * @fl: address of arguments for the lock operation + * @data: address of data to be sent to callback operations * */ -int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) +int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data) { struct nlm_rqst *call; int status; + const struct nlmclnt_operations *nlmclnt_ops = host->h_nlmclnt_ops; call = nlm_alloc_call(host); if (call == NULL) return -ENOMEM; + if (nlmclnt_ops && nlmclnt_ops->nlmclnt_alloc_call) + nlmclnt_ops->nlmclnt_alloc_call(data); + nlmclnt_locks_init_private(fl, host); if (!fl->fl_u.nfs_fl.owner) { /* lockowner allocation has failed */ @@ -169,6 +174,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) } /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); + call->a_callback_data = data; if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { if (fl->fl_type != F_UNLCK) { @@ -214,8 +220,12 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) void nlmclnt_release_call(struct nlm_rqst *call) { + const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops; + if (!atomic_dec_and_test(&call->a_count)) return; + if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call) + nlmclnt_ops->nlmclnt_release_call(call->a_callback_data); nlmclnt_release_host(call->a_host); nlmclnt_release_lockargs(call); kfree(call); @@ -687,6 +697,19 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) return status; } +static void nlmclnt_unlock_prepare(struct rpc_task *task, void *data) +{ + struct nlm_rqst *req = data; + const struct nlmclnt_operations *nlmclnt_ops = req->a_host->h_nlmclnt_ops; + bool defer_call = false; + + if (nlmclnt_ops && nlmclnt_ops->nlmclnt_unlock_prepare) + defer_call = nlmclnt_ops->nlmclnt_unlock_prepare(task, req->a_callback_data); + + if (!defer_call) + rpc_call_start(task); +} + static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) { struct nlm_rqst *req = data; @@ -720,6 +743,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) } static const struct rpc_call_ops nlmclnt_unlock_ops = { + .rpc_call_prepare = nlmclnt_unlock_prepare, .rpc_call_done = nlmclnt_unlock_callback, .rpc_release = nlmclnt_rpc_release, }; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index e7c8b9c76e48573a381d0196d0dc2166045a7b3e..5d481e8a1b5d0c732207c4a583027ec084d7a0db 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -132,6 +132,8 @@ lockd(void *vrqstp) { int err = 0; struct svc_rqst *rqstp = vrqstp; + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -176,6 +178,8 @@ lockd(void *vrqstp) if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); return 0; } @@ -270,8 +274,6 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); - cancel_delayed_work_sync(&ln->grace_period_end); - locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 5581e020644bda687f0b4bd09645a058a18d4537..3507c80d1d4b962b579d501201f4cdd060e6248b 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -870,15 +870,15 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status) if (!(block = nlmsvc_find_block(cookie))) return; - if (block) { - if (status == nlm_lck_denied_grace_period) { - /* Try again in a couple of seconds */ - nlmsvc_insert_block(block, 10 * HZ); - } else { - /* Lock is now held by client, or has been rejected. - * In both cases, the block should be removed. */ - nlmsvc_unlink_block(block); - } + if (status == nlm_lck_denied_grace_period) { + /* Try again in a couple of seconds */ + nlmsvc_insert_block(block, 10 * HZ); + } else { + /* + * Lock is now held by client, or has been rejected. + * In both cases, the block should be removed. + */ + nlmsvc_unlink_block(block); } nlmsvc_release_block(block); } diff --git a/fs/locks.c b/fs/locks.c index 26811321d39b8d404046100ac3dcc4b5e04f5cc7..af2031a1fcff14fcfb05e108b9d49bfc4a74685f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2504,7 +2504,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx) .fl_owner = filp, .fl_pid = current->tgid, .fl_file = filp, - .fl_flags = FL_FLOCK, + .fl_flags = FL_FLOCK | FL_CLOSE, .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; diff --git a/fs/mount.h b/fs/mount.h index 2826543a131d1217f306404e264a27a2ee163ec6..bf1fda6eed8f36beae7e678a0be8e78cd9317201 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -59,7 +59,7 @@ struct mount { struct mountpoint *mnt_mp; /* where is it mounted */ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ #ifdef CONFIG_FSNOTIFY - struct hlist_head mnt_fsnotify_marks; + struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; #endif int mnt_id; /* mount identifier */ diff --git a/fs/namei.c b/fs/namei.c index 19dcf62133cc95162d364f7ef43c17d280ee6448..6571a5f5112ed82894fdbec1c9b6dfaf557d93b1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -340,22 +340,14 @@ int generic_permission(struct inode *inode, int mask) if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ - if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) - return 0; if (!(mask & MAY_WRITE)) if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH)) return 0; - return -EACCES; - } - /* - * Read/write DACs are always overridable. - * Executable DACs are overridable when there is - * at least one exec bit set. - */ - if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) return 0; + return -EACCES; + } /* * Searching includes executable on directories, else just read. @@ -364,6 +356,14 @@ int generic_permission(struct inode *inode, int mask) if (mask == MAY_READ) if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH)) return 0; + /* + * Read/write DACs are always overridable. + * Executable DACs are overridable when there is + * at least one exec bit set. + */ + if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) + return 0; return -EACCES; } @@ -2142,7 +2142,6 @@ static int link_path_walk(const char *name, struct nameidata *nd) static const char *path_init(struct nameidata *nd, unsigned flags) { - int retval = 0; const char *s = nd->name->name; if (!*s) @@ -2154,13 +2153,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags) if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; - if (*s) { - if (!d_can_lookup(root)) - return ERR_PTR(-ENOTDIR); - retval = inode_permission(inode, MAY_EXEC); - if (retval) - return ERR_PTR(retval); - } + if (*s && unlikely(!d_can_lookup(root))) + return ERR_PTR(-ENOTDIR); nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { @@ -2258,6 +2252,35 @@ static inline int lookup_last(struct nameidata *nd) return walk_component(nd, 0); } +static int handle_lookup_down(struct nameidata *nd) +{ + struct path path = nd->path; + struct inode *inode = nd->inode; + unsigned seq = nd->seq; + int err; + + if (nd->flags & LOOKUP_RCU) { + /* + * don't bother with unlazy_walk on failure - we are + * at the very beginning of walk, so we lose nothing + * if we simply redo everything in non-RCU mode + */ + if (unlikely(!__follow_mount_rcu(nd, &path, &inode, &seq))) + return -ECHILD; + } else { + dget(path.dentry); + err = follow_managed(&path, nd); + if (unlikely(err < 0)) + return err; + inode = d_backing_inode(path.dentry); + seq = 0; + } + path_to_nameidata(&path, nd); + nd->inode = inode; + nd->seq = seq; + return 0; +} + /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path) { @@ -2266,6 +2289,15 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path if (IS_ERR(s)) return PTR_ERR(s); + + if (unlikely(flags & LOOKUP_DOWN)) { + err = handle_lookup_down(nd); + if (unlikely(err < 0)) { + terminate_walk(nd); + return err; + } + } + while (!(err = link_path_walk(s, nd)) && ((err = lookup_last(nd)) > 0)) { s = trailing_symlink(nd); @@ -4766,7 +4798,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) struct page *page; void *fsdata; int err; - unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; + unsigned int flags = 0; if (nofs) flags |= AOP_FLAG_NOFS; diff --git a/fs/namespace.c b/fs/namespace.c index cc1375eff88c75abdf14aeb22a55c3d8b802895f..5a4438445bf788e65ade26b741911861d7ea2084 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -236,9 +236,6 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); -#ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif init_fs_pin(&mnt->mnt_umount, drop_mountpoint); } return mnt; @@ -3465,8 +3462,9 @@ static void mntns_put(struct ns_common *ns) static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) { struct fs_struct *fs = current->fs; - struct mnt_namespace *mnt_ns = to_mnt_ns(ns); + struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns; struct path root; + int err; if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || !ns_capable(current_user_ns(), CAP_SYS_CHROOT) || @@ -3477,15 +3475,20 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns) return -EINVAL; get_mnt_ns(mnt_ns); - put_mnt_ns(nsproxy->mnt_ns); + old_mnt_ns = nsproxy->mnt_ns; nsproxy->mnt_ns = mnt_ns; /* Find the root */ - root.mnt = &mnt_ns->root->mnt; - root.dentry = mnt_ns->root->mnt.mnt_root; - path_get(&root); - while(d_mountpoint(root.dentry) && follow_down_one(&root)) - ; + err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt, + "/", LOOKUP_DOWN, &root); + if (err) { + /* revert to old namespace */ + nsproxy->mnt_ns = old_mnt_ns; + put_mnt_ns(mnt_ns); + return err; + } + + put_mnt_ns(old_mnt_ns); /* Update the pwd and root */ set_fs_pwd(fs, &root); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d5606099712a4cb2059c4dac9036aba35e9c3751..6d0f14c8609971378ee75104380873a6da67a4be 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -554,12 +554,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) sb->s_magic = NCP_SUPER_MAGIC; sb->s_op = &ncp_sops; sb->s_d_op = &ncp_dentry_operations; - sb->s_bdi = &server->bdi; server = NCP_SBP(sb); memset(server, 0, sizeof(*server)); - error = bdi_setup_and_register(&server->bdi, "ncpfs"); + error = super_setup_bdi(sb); if (error) goto out_fput; @@ -568,7 +567,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) if (data.info_fd != -1) { struct socket *info_sock = sockfd_lookup(data.info_fd, &error); if (!info_sock) - goto out_bdi; + goto out_fput; server->info_sock = info_sock; error = -EBADFD; if (info_sock->type != SOCK_STREAM) @@ -746,8 +745,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) out_fput2: if (server->info_sock) sockfd_put(server->info_sock); -out_bdi: - bdi_destroy(&server->bdi); out_fput: sockfd_put(sock); out: @@ -788,7 +785,6 @@ static void ncp_put_super(struct super_block *sb) kill_pid(server->m.wdog_pid, SIGTERM, 1); put_pid(server->m.wdog_pid); - bdi_destroy(&server->bdi); kfree(server->priv.data); kfree(server->auth.object_name); vfree(server->rxbuf); diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index 55e26fd8088694308aeee3ceff1d11b91d6b1fb5..366fd63cc506fc3e61be93877349ef6032cf5734 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -143,7 +143,6 @@ struct ncp_server { size_t len; __u8 data[128]; } unexpected_packet; - struct backing_dev_info bdi; }; extern void ncp_tcp_rcv_proc(struct work_struct *work); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f31fd0dd92c61d22fc477b6da0ccc62a7eb0481d..69d02cf8cf370678609175d6e5626d0dedf02c3a 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -123,11 +123,6 @@ config PNFS_BLOCK depends on NFS_V4_1 && BLK_DEV_DM default NFS_V4 -config PNFS_OBJLAYOUT - tristate - depends on NFS_V4_1 && SCSI_OSD_ULD - default NFS_V4 - config PNFS_FLEXFILE_LAYOUT tristate depends on NFS_V4_1 && NFS_V3 diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 6abdda209642e70d917308282d82ec2cfa0f353b..98f4e5728a67c87c13cc8ca52d00e72e7bb3bc2c 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -31,6 +31,5 @@ nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ -obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/ diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 773774531aff5fc081610706ea39756b0e5a5c25..73a1f928226c05706bab82a37468973bff7b185c 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -76,7 +76,10 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); - while (!kthread_should_stop()) { + while (!kthread_freezable_should_stop(NULL)) { + + if (signal_pending(current)) + flush_signals(current); /* * Listen for a request on the socket */ @@ -85,6 +88,8 @@ nfs4_callback_svc(void *vrqstp) continue; svc_process(rqstp); } + svc_exit_thread(rqstp); + module_put_and_exit(0); return 0; } @@ -103,9 +108,10 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); - while (!kthread_should_stop()) { - if (try_to_freeze()) - continue; + while (!kthread_freezable_should_stop(NULL)) { + + if (signal_pending(current)) + flush_signals(current); prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); @@ -121,11 +127,13 @@ nfs41_callback_svc(void *vrqstp) error); } else { spin_unlock_bh(&serv->sv_cb_lock); - schedule(); + if (!kthread_should_stop()) + schedule(); finish_wait(&serv->sv_cb_waitq, &wq); } - flush_signals(current); } + svc_exit_thread(rqstp); + module_put_and_exit(0); return 0; } @@ -221,14 +229,14 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, static struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, + .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) static struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, + .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; @@ -280,7 +288,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); + serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f073a6d2c6a51a4ec91cf38783519ca957bdece4..52479f180ea1497af4f8a250db4d8c222ce8bfaa 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -131,10 +131,11 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, if (!inode) continue; if (!nfs_sb_active(inode->i_sb)) { - rcu_read_lock(); + rcu_read_unlock(); spin_unlock(&clp->cl_lock); iput(inode); spin_lock(&clp->cl_lock); + rcu_read_lock(); goto restart; } return inode; @@ -170,10 +171,11 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp, if (!inode) continue; if (!nfs_sb_active(inode->i_sb)) { - rcu_read_lock(); + rcu_read_unlock(); spin_unlock(&clp->cl_lock); iput(inode); spin_lock(&clp->cl_lock); + rcu_read_lock(); goto restart; } return inode; @@ -317,31 +319,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp, struct cb_layoutrecallargs *args) { - u32 res; - - dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); if (args->cbl_recall_type == RETURN_FILE) - res = initiate_file_draining(clp, args); - else - res = initiate_bulk_draining(clp, args); - dprintk("%s returning %i\n", __func__, res); - return res; - + return initiate_file_draining(clp, args); + return initiate_bulk_draining(clp, args); } __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args, void *dummy, struct cb_process_state *cps) { - u32 res; - - dprintk("%s: -->\n", __func__); + u32 res = NFS4ERR_OP_NOT_IN_SESSION; if (cps->clp) res = do_callback_layoutrecall(cps->clp, args); - else - res = NFS4ERR_OP_NOT_IN_SESSION; - - dprintk("%s: exit with status = %d\n", __func__, res); return cpu_to_be32(res); } @@ -364,8 +353,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, struct nfs_client *clp = cps->clp; struct nfs_server *server = NULL; - dprintk("%s: -->\n", __func__); - if (!clp) { res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); goto out; @@ -384,8 +371,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, goto found; } rcu_read_unlock(); - dprintk("%s: layout type %u not found\n", - __func__, dev->cbd_layout_type); continue; } @@ -395,8 +380,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, out: kfree(args->devs); - dprintk("%s: exit with status = %u\n", - __func__, be32_to_cpu(res)); return res; } @@ -417,16 +400,11 @@ static __be32 validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, const struct cb_sequenceargs * args) { - dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n", - __func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr); - if (args->csa_slotid > tbl->server_highest_slotid) return htonl(NFS4ERR_BADSLOT); /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { - dprintk("%s seqid %u is a replay\n", - __func__, args->csa_sequenceid); if (nfs4_test_locked_slot(tbl, slot->slot_nr)) return htonl(NFS4ERR_DELAY); /* Signal process_op to set this error on next op */ @@ -480,15 +458,6 @@ static bool referring_call_exists(struct nfs_client *clp, for (j = 0; j < rclist->rcl_nrefcalls; j++) { ref = &rclist->rcl_refcalls[j]; - - dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u " - "slotid %u\n", __func__, - ((u32 *)&rclist->rcl_sessionid.data)[0], - ((u32 *)&rclist->rcl_sessionid.data)[1], - ((u32 *)&rclist->rcl_sessionid.data)[2], - ((u32 *)&rclist->rcl_sessionid.data)[3], - ref->rc_sequenceid, ref->rc_slotid); - status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid, ref->rc_sequenceid, HZ >> 1) < 0; if (status) @@ -593,8 +562,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_status = status; trace_nfs4_cb_sequence(args, res, status); - dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, - ntohl(status), ntohl(res->csr_status)); return status; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d051fc3583a9097d46d8159860ea6c22a7ec116a..c14758e08d738eec44bf08c79acee71366ce0e70 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -171,8 +171,6 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } hdr->nops = ntohl(*p); - dprintk("%s: minorversion %d nops %d\n", __func__, - hdr->minorversion, hdr->nops); return 0; } @@ -192,11 +190,8 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr status = decode_fh(xdr, &args->fh); if (unlikely(status != 0)) - goto out; - status = decode_bitmap(xdr, args->bitmap); -out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - return status; + return status; + return decode_bitmap(xdr, args->bitmap); } static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) @@ -206,17 +201,12 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, status = decode_delegation_stateid(xdr, &args->stateid); if (unlikely(status != 0)) - goto out; + return status; p = read_buf(xdr, 4); - if (unlikely(p == NULL)) { - status = htonl(NFS4ERR_RESOURCE); - goto out; - } + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); args->truncate = ntohl(*p); - status = decode_fh(xdr, &args->fh); -out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - return status; + return decode_fh(xdr, &args->fh); } #if defined(CONFIG_NFS_V4_1) @@ -235,10 +225,8 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, uint32_t iomode; p = read_buf(xdr, 4 * sizeof(uint32_t)); - if (unlikely(p == NULL)) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); args->cbl_layout_type = ntohl(*p++); /* Depite the spec's xdr, iomode really belongs in the FILE switch, @@ -252,37 +240,23 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, args->cbl_range.iomode = iomode; status = decode_fh(xdr, &args->cbl_fh); if (unlikely(status != 0)) - goto out; + return status; p = read_buf(xdr, 2 * sizeof(uint64_t)); - if (unlikely(p == NULL)) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); p = xdr_decode_hyper(p, &args->cbl_range.offset); p = xdr_decode_hyper(p, &args->cbl_range.length); - status = decode_layout_stateid(xdr, &args->cbl_stateid); - if (unlikely(status != 0)) - goto out; + return decode_layout_stateid(xdr, &args->cbl_stateid); } else if (args->cbl_recall_type == RETURN_FSID) { p = read_buf(xdr, 2 * sizeof(uint64_t)); - if (unlikely(p == NULL)) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); p = xdr_decode_hyper(p, &args->cbl_fsid.major); p = xdr_decode_hyper(p, &args->cbl_fsid.minor); - } else if (args->cbl_recall_type != RETURN_ALL) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } - dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n", - __func__, - args->cbl_layout_type, iomode, - args->cbl_layoutchanged, args->cbl_recall_type); -out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - return status; + } else if (args->cbl_recall_type != RETURN_ALL) + return htonl(NFS4ERR_BADXDR); + return 0; } static @@ -437,12 +411,11 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, status = decode_sessionid(xdr, &args->csa_sessionid); if (status) - goto out; + return status; - status = htonl(NFS4ERR_RESOURCE); p = read_buf(xdr, 5 * sizeof(uint32_t)); if (unlikely(p == NULL)) - goto out; + return htonl(NFS4ERR_RESOURCE); args->csa_addr = svc_addr(rqstp); args->csa_sequenceid = ntohl(*p++); @@ -456,7 +429,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, sizeof(*args->csa_rclists), GFP_KERNEL); if (unlikely(args->csa_rclists == NULL)) - goto out; + return htonl(NFS4ERR_RESOURCE); for (i = 0; i < args->csa_nrclists; i++) { status = decode_rc_list(xdr, &args->csa_rclists[i]); @@ -466,27 +439,13 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp, } } } - status = 0; - - dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u " - "highestslotid %u cachethis %d nrclists %u\n", - __func__, - ((u32 *)&args->csa_sessionid)[0], - ((u32 *)&args->csa_sessionid)[1], - ((u32 *)&args->csa_sessionid)[2], - ((u32 *)&args->csa_sessionid)[3], - args->csa_sequenceid, args->csa_slotid, - args->csa_highestslotid, args->csa_cachethis, - args->csa_nrclists); -out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - return status; + return 0; out_free: for (i = 0; i < args->csa_nrclists; i++) kfree(args->csa_rclists[i].rcl_refcalls); kfree(args->csa_rclists); - goto out; + return status; } static __be32 decode_recallany_args(struct svc_rqst *rqstp, @@ -557,11 +516,8 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream status = decode_fh(xdr, &args->cbnl_fh); if (unlikely(status != 0)) - goto out; - status = decode_lockowner(xdr, args); -out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - return status; + return status; + return decode_lockowner(xdr, args); } #endif /* CONFIG_NFS_V4_1 */ @@ -707,7 +663,6 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } @@ -734,11 +689,11 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, __be32 status = res->csr_status; if (unlikely(status != 0)) - goto out; + return status; status = encode_sessionid(xdr, &res->csr_sessionid); if (status) - goto out; + return status; p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) @@ -748,9 +703,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, *p++ = htonl(res->csr_slotid); *p++ = htonl(res->csr_highestslotid); *p++ = htonl(res->csr_target_highestslotid); -out: - dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); - return status; + return 0; } static __be32 @@ -871,14 +824,10 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp, long maxlen; __be32 res; - dprintk("%s: start\n", __func__); status = decode_op_hdr(xdr_in, &op_nr); if (unlikely(status)) return status; - dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", - __func__, cps->minorversion, nop, op_nr); - switch (cps->minorversion) { case 0: status = preprocess_nfs4_op(op_nr, &op); @@ -917,7 +866,6 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp, return res; if (op->encode_res != NULL && status == 0) status = op->encode_res(rqstp, xdr_out, resp); - dprintk("%s: done, status = %d\n", __func__, ntohl(status)); return status; } @@ -937,8 +885,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r }; unsigned int nops = 0; - dprintk("%s: start\n", __func__); - xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); @@ -977,7 +923,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.nops = htonl(nops); nfs4_cb_free_slot(&cps); nfs_put_client(cps.clp); - dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success; out_invalidcred: diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 390ada8741bcbfd2e4aaecb3f759ec0707003674..ee5ddbd36088e66d21b2900fddb9c7ded0d17f9a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -218,6 +218,7 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) static void pnfs_init_server(struct nfs_server *server) { rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); + rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); } #else @@ -240,8 +241,6 @@ static void pnfs_init_server(struct nfs_server *server) */ void nfs_free_client(struct nfs_client *clp) { - dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); - nfs_fscache_release_client_cookie(clp); /* -EIO all pending I/O */ @@ -256,8 +255,6 @@ void nfs_free_client(struct nfs_client *clp) kfree(clp->cl_hostname); kfree(clp->cl_acceptor); kfree(clp); - - dprintk("<-- nfs_free_client()\n"); } EXPORT_SYMBOL_GPL(nfs_free_client); @@ -271,7 +268,6 @@ void nfs_put_client(struct nfs_client *clp) if (!clp) return; - dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); nn = net_generic(clp->cl_net, nfs_net_id); if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { @@ -382,9 +378,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, } smp_rmb(); - - dprintk("<-- %s found nfs_client %p for %s\n", - __func__, clp, cl_init->hostname ?: ""); return clp; } @@ -403,9 +396,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) return NULL; } - dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname, rpc_ops->version); - /* see if the client already exists */ do { spin_lock(&nn->nfs_client_lock); @@ -430,8 +420,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) new = rpc_ops->alloc_client(cl_init); } while (!IS_ERR(new)); - dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", - cl_init->hostname, PTR_ERR(new)); return new; } EXPORT_SYMBOL_GPL(nfs_get_client); @@ -558,6 +546,7 @@ static int nfs_start_lockd(struct nfs_server *server) .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, .net = clp->cl_net, + .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops, }; if (nlm_init.nfs_version > 3) @@ -624,27 +613,21 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp, { int error; - if (clp->cl_cons_state == NFS_CS_READY) { - /* the client is already initialised */ - dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp); + /* the client is already initialised */ + if (clp->cl_cons_state == NFS_CS_READY) return clp; - } /* * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); - if (error < 0) - goto error; - nfs_mark_client_ready(clp, NFS_CS_READY); + nfs_mark_client_ready(clp, error == 0 ? NFS_CS_READY : error); + if (error < 0) { + nfs_put_client(clp); + clp = ERR_PTR(error); + } return clp; - -error: - nfs_mark_client_ready(clp, error); - nfs_put_client(clp); - dprintk("<-- nfs_init_client() = xerror %d\n", error); - return ERR_PTR(error); } EXPORT_SYMBOL_GPL(nfs_init_client); @@ -668,8 +651,6 @@ static int nfs_init_server(struct nfs_server *server, struct nfs_client *clp; int error; - dprintk("--> nfs_init_server()\n"); - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) @@ -677,10 +658,8 @@ static int nfs_init_server(struct nfs_server *server, /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); - if (IS_ERR(clp)) { - dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); + if (IS_ERR(clp)) return PTR_ERR(clp); - } server->nfs_client = clp; @@ -725,13 +704,11 @@ static int nfs_init_server(struct nfs_server *server, server->mountd_protocol = data->mount_server.protocol; server->namelen = data->namlen; - dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); return 0; error: server->nfs_client = NULL; nfs_put_client(clp); - dprintk("<-- nfs_init_server() = xerror %d\n", error); return error; } @@ -761,9 +738,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT; - server->backing_dev_info.name = "nfs"; - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - if (server->wsize > max_rpc_payload) server->wsize = max_rpc_payload; if (server->wsize > NFS_MAX_FILE_IO_SIZE) @@ -801,12 +775,10 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs struct nfs_client *clp = server->nfs_client; int error; - dprintk("--> nfs_probe_fsinfo()\n"); - if (clp->rpc_ops->set_capabilities != NULL) { error = clp->rpc_ops->set_capabilities(server, mntfh); if (error < 0) - goto out_error; + return error; } fsinfo.fattr = fattr; @@ -814,7 +786,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype)); error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); if (error < 0) - goto out_error; + return error; nfs_server_set_fsinfo(server, &fsinfo); @@ -829,12 +801,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs server->namelen = pathinfo.max_namelen; } - dprintk("<-- nfs_probe_fsinfo() = 0\n"); return 0; - -out_error: - dprintk("nfs_probe_fsinfo: error = %d\n", -error); - return error; } EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); @@ -917,12 +884,6 @@ struct nfs_server *nfs_alloc_server(void) return NULL; } - if (bdi_init(&server->backing_dev_info)) { - nfs_free_iostats(server->io_stats); - kfree(server); - return NULL; - } - ida_init(&server->openowner_id); ida_init(&server->lockowner_id); pnfs_init_server(server); @@ -936,8 +897,6 @@ EXPORT_SYMBOL_GPL(nfs_alloc_server); */ void nfs_free_server(struct nfs_server *server) { - dprintk("--> nfs_free_server()\n"); - nfs_server_remove_lists(server); if (server->destroy != NULL) @@ -953,10 +912,8 @@ void nfs_free_server(struct nfs_server *server) ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); - bdi_destroy(&server->backing_dev_info); kfree(server); nfs_release_automount_timer(); - dprintk("<-- nfs_free_server()\n"); } EXPORT_SYMBOL_GPL(nfs_free_server); @@ -1036,10 +993,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, struct nfs_fattr *fattr_fsinfo; int error; - dprintk("--> nfs_clone_server(,%llx:%llx,)\n", - (unsigned long long) fattr->fsid.major, - (unsigned long long) fattr->fsid.minor); - server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); @@ -1071,10 +1024,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; - dprintk("Cloned FSID: %llx:%llx\n", - (unsigned long long) server->fsid.major, - (unsigned long long) server->fsid.minor); - error = nfs_start_lockd(server); if (error < 0) goto out_free_server; @@ -1083,13 +1032,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, server->mount_time = jiffies; nfs_free_fattr(fattr_fsinfo); - dprintk("<-- nfs_clone_server() = %p\n", server); return server; out_free_server: nfs_free_fattr(fattr_fsinfo); nfs_free_server(server); - dprintk("<-- nfs_clone_server() = error %d\n", error); return ERR_PTR(error); } EXPORT_SYMBOL_GPL(nfs_clone_server); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f92ba8d6c5569099f6c469eda92446ad0d7e148d..32ccd7754f8a2875933d1f9c532b54c656971bfd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, - .iterate_shared = nfs_readdir, + .iterate = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, @@ -145,7 +145,6 @@ struct nfs_cache_array_entry { }; struct nfs_cache_array { - atomic_t refcount; int size; int eof_index; u64 last_cookie; @@ -170,27 +169,6 @@ typedef struct { unsigned int eof:1; } nfs_readdir_descriptor_t; -/* - * The caller is responsible for calling nfs_readdir_release_array(page) - */ -static -struct nfs_cache_array *nfs_readdir_get_array(struct page *page) -{ - void *ptr; - if (page == NULL) - return ERR_PTR(-EIO); - ptr = kmap(page); - if (ptr == NULL) - return ERR_PTR(-ENOMEM); - return ptr; -} - -static -void nfs_readdir_release_array(struct page *page) -{ - kunmap(page); -} - /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -201,18 +179,9 @@ void nfs_readdir_clear_array(struct page *page) int i; array = kmap_atomic(page); - if (atomic_dec_and_test(&array->refcount)) - for (i = 0; i < array->size; i++) - kfree(array->array[i].string.name); - kunmap_atomic(array); -} - -static bool grab_page(struct page *page) -{ - struct nfs_cache_array *array = kmap_atomic(page); - bool res = atomic_inc_not_zero(&array->refcount); + for (i = 0; i < array->size; i++) + kfree(array->array[i].string.name); kunmap_atomic(array); - return res; } /* @@ -239,13 +208,10 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le static int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) { - struct nfs_cache_array *array = nfs_readdir_get_array(page); + struct nfs_cache_array *array = kmap(page); struct nfs_cache_array_entry *cache_entry; int ret; - if (IS_ERR(array)) - return PTR_ERR(array); - cache_entry = &array->array[array->size]; /* Check that this entry lies within the page bounds */ @@ -264,7 +230,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) if (entry->eof != 0) array->eof_index = array->size; out: - nfs_readdir_release_array(page); + kunmap(page); return ret; } @@ -353,11 +319,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) struct nfs_cache_array *array; int status; - array = nfs_readdir_get_array(desc->page); - if (IS_ERR(array)) { - status = PTR_ERR(array); - goto out; - } + array = kmap(desc->page); if (*desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); @@ -369,8 +331,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) desc->current_index += array->size; desc->page_index++; } - nfs_readdir_release_array(desc->page); -out: + kunmap(desc->page); return status; } @@ -606,13 +567,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en out_nopages: if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - array = nfs_readdir_get_array(page); - if (!IS_ERR(array)) { - array->eof_index = array->size; - status = 0; - nfs_readdir_release_array(page); - } else - status = PTR_ERR(array); + array = kmap(page); + array->eof_index = array->size; + status = 0; + kunmap(page); } put_page(scratch); @@ -674,13 +632,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, goto out; } - array = nfs_readdir_get_array(page); - if (IS_ERR(array)) { - status = PTR_ERR(array); - goto out_label_free; - } + array = kmap(page); memset(array, 0, sizeof(struct nfs_cache_array)); - atomic_set(&array->refcount, 1); array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); @@ -703,8 +656,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, nfs_readdir_free_pages(pages, array_size); out_release_array: - nfs_readdir_release_array(page); -out_label_free: + kunmap(page); nfs4_label_free(entry.label); out: nfs_free_fattr(entry.fattr); @@ -743,7 +695,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - nfs_readdir_clear_array(desc->page); + if (!desc->page->mapping) + nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -751,16 +704,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { - struct page *page; - - for (;;) { - page = read_cache_page(desc->file->f_mapping, + return read_cache_page(desc->file->f_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); - if (IS_ERR(page) || grab_page(page)) - break; - put_page(page); - } - return page; } /* @@ -809,12 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) struct nfs_cache_array *array = NULL; struct nfs_open_dir_context *ctx = file->private_data; - array = nfs_readdir_get_array(desc->page); - if (IS_ERR(array)) { - res = PTR_ERR(array); - goto out; - } - + array = kmap(desc->page); for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; @@ -835,8 +775,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) if (array->eof_index >= 0) desc->eof = 1; - nfs_readdir_release_array(desc->page); -out: + kunmap(desc->page); cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); @@ -966,11 +905,13 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { + struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", filp, offset, whence); + inode_lock(inode); switch (whence) { case 1: offset += filp->f_pos; @@ -978,13 +919,16 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) if (offset >= 0) break; default: - return -EINVAL; + offset = -EINVAL; + goto out; } if (offset != filp->f_pos) { filp->f_pos = offset; dir_ctx->dir_cookie = 0; dir_ctx->duped = 0; } +out: + inode_unlock(inode); return offset; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aab32fc3d6a84965ea879854c942b12888548411..6fb9fad2d1e6cf6909cfe6dfb2e482bff969e7df 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -392,16 +392,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_direct_req_release(dreq); } -static void nfs_direct_readpage_release(struct nfs_page *req) -{ - dprintk("NFS: direct read done (%s/%llu %d@%lld)\n", - req->wb_context->dentry->d_sb->s_id, - (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)), - req->wb_bytes, - (long long)req_offset(req)); - nfs_release_request(req); -} - static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -426,7 +416,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) set_page_dirty(page); bytes += req->wb_bytes; nfs_list_remove_request(req); - nfs_direct_readpage_release(req); + nfs_release_request(req); } out_put: if (put_dreq(dreq)) @@ -537,7 +527,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, if (put_dreq(dreq)) nfs_direct_complete(dreq); - return 0; + return requested_bytes; } /** @@ -566,7 +556,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - ssize_t result = -EINVAL; + ssize_t result = -EINVAL, requested; size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); @@ -600,14 +590,19 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; - result = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); + requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); nfs_end_io_direct(inode); - if (!result) { + if (requested > 0) { result = nfs_direct_wait(dreq); - if (result > 0) + if (result > 0) { + requested -= result; iocb->ki_pos += result; + } + iov_iter_revert(iter, requested); + } else { + result = requested; } out_release: @@ -695,16 +690,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) int status = data->task.tk_status; nfs_init_cinfo_from_dreq(&cinfo, dreq); - if (status < 0) { - dprintk("NFS: %5u commit failed with error %d.\n", - data->task.tk_pid, status); + if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data)) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) { - dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } - dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); @@ -954,7 +942,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, if (put_dreq(dreq)) nfs_direct_write_complete(dreq); - return 0; + return requested_bytes; } /** @@ -979,7 +967,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, */ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { - ssize_t result = -EINVAL; + ssize_t result = -EINVAL, requested; size_t count; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -1022,7 +1010,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); - result = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, @@ -1031,13 +1019,17 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_end_io_direct(inode); - if (!result) { + if (requested > 0) { result = nfs_direct_wait(dreq); if (result > 0) { + requested -= result; iocb->ki_pos = pos + result; /* XXX: should check the generic_write_sync retval */ generic_write_sync(iocb, result); } + iov_iter_revert(iter, requested); + } else { + result = requested; } out_release: nfs_direct_req_release(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 668213984d68708c3d54cccce36ad3a0c048657e..5713eb32a45ea20c1de50f2ee28f4ddcae468f67 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -482,7 +482,7 @@ static int nfs_launder_page(struct page *page) inode->i_ino, (long long)page_offset(page)); nfs_fscache_wait_on_page_write(nfsi, page); - return nfs_wb_launder_page(inode, page); + return nfs_wb_page(inode, page); } static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, @@ -697,14 +697,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!IS_ERR(l_ctx)) { status = nfs_iocounter_wait(l_ctx); nfs_put_lock_context(l_ctx); - if (status < 0) + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + if (status < 0 && !(fl->fl_flags & FL_CLOSE)) return status; } - /* NOTE: special case - * If we're signalled while cleaning up locks on process exit, we - * still need to complete the unlock. - */ /* * Use local locking if mounted with "-onolock" or with appropriate * "-olocal_lock=" @@ -820,9 +820,23 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; - /* We're simulating flock() locks using posix locks on the server */ - if (fl->fl_type == F_UNLCK) + /* + * VFS doesn't require the open mode to match a flock() lock's type. + * NFS, however, may simulate flock() locking with posix locking which + * requires the open mode to match the lock type. + */ + switch (fl->fl_type) { + case F_UNLCK: return do_unlk(filp, cmd, fl, is_local); + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + } + return do_setlk(filp, cmd, fl, is_local); } EXPORT_SYMBOL_GPL(nfs_flock); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index acd30baca46166c902aa5dfae1663184cc30e235..1cf85d65b7489bd56c69447d618360263febc52c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -921,11 +921,11 @@ fl_pnfs_update_layout(struct inode *ino, fl = FILELAYOUT_LSEG(lseg); status = filelayout_check_deviceid(lo, fl, gfp_flags); - if (status) + if (status) { + pnfs_put_lseg(lseg); lseg = ERR_PTR(status); + } out: - if (IS_ERR(lseg)) - pnfs_put_lseg(lseg); return lseg; } @@ -933,6 +933,7 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -959,6 +960,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; + pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, req->wb_context, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 42dedf2d625fca62a418c5e4f82935dc41a53987..23542dc44a25c9f398b8a2a69905bf9eafbe5270 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; /* fh */ + rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; @@ -846,6 +847,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, int ds_idx; retry: + pnfs_generic_pg_check_layout(pgio); /* Use full layout for now */ if (!pgio->pg_lseg) ff_layout_pg_get_read(pgio, req, false); @@ -894,6 +896,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, int status; retry: + pnfs_generic_pg_check_layout(pgio); if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -1800,16 +1803,16 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); if (!ds) - return PNFS_NOT_ATTEMPTED; + goto out_failed; ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, hdr->inode); if (IS_ERR(ds_clnt)) - return PNFS_NOT_ATTEMPTED; + goto out_failed; ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); if (!ds_cred) - return PNFS_NOT_ATTEMPTED; + goto out_failed; vers = nfs4_ff_layout_ds_version(lseg, idx); @@ -1839,6 +1842,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) sync, RPC_TASK_SOFTCONN); put_rpccred(ds_cred); return PNFS_ATTEMPTED; + +out_failed: + if (ff_layout_avoid_mds_available_ds(lseg)) + return PNFS_TRY_AGAIN; + return PNFS_NOT_ATTEMPTED; } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) @@ -2354,10 +2362,21 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) return 0; } +static int +ff_layout_set_layoutdriver(struct nfs_server *server, + const struct nfs_fh *dummy) +{ +#if IS_ENABLED(CONFIG_NFS_V4_2) + server->caps |= NFS_CAP_LAYOUTSTATS; +#endif + return 0; +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", .owner = THIS_MODULE, + .set_layoutdriver = ff_layout_set_layoutdriver, .alloc_layout_hdr = ff_layout_alloc_layout_hdr, .free_layout_hdr = ff_layout_free_layout_hdr, .alloc_lseg = ff_layout_alloc_lseg, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 457cfeb1d5c162e4177450eb941460a2fe39f3b1..6df7a0cf566015378aa3f76c480115675454297d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -119,7 +119,13 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; - if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { + /* + * check for valid major/minor combination. + * currently we support dataserver which talk: + * v3, v4.0, v4.1, v4.2 + */ + if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) || + (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) { dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, i, ds_versions[i].version, ds_versions[i].minor_version); @@ -415,7 +421,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].minor_version); /* connect success, check rsize/wsize limit */ - if (ds->ds_clp) { + if (!status) { max_payload = nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), NULL); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f489a5a71bd5cd89f0667b455e04930753e24e84..1de93ba78dc95a34292cc6160198d9be7c843de1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -734,7 +734,10 @@ int nfs_getattr(const struct path *path, struct kstat *stat, if (need_atime || nfs_need_revalidate_inode(inode)) { struct nfs_server *server = NFS_SERVER(inode); - nfs_readdirplus_parent_cache_miss(path->dentry); + if (!(server->flags & NFS_MOUNT_NOAC)) + nfs_readdirplus_parent_cache_miss(path->dentry); + else + nfs_readdirplus_parent_cache_hit(path->dentry); err = __nfs_revalidate_inode(server, inode); } else nfs_readdirplus_parent_cache_hit(path->dentry); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7b38fedb7e032824ec509edca5cf465a22147851..3e24392f2caa1296c4475ed84701fdfabfff467f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -398,7 +398,6 @@ extern struct file_system_type nfs4_referral_fs_type; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); -void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, @@ -458,7 +457,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ -void nfs_clone_super(struct super_block *, struct nfs_mount_info *); void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); @@ -495,7 +493,6 @@ void nfs_mark_request_commit(struct nfs_page *req, u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); -int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, @@ -756,9 +753,13 @@ static inline bool nfs_error_is_fatal(int err) { switch (err) { case -ERESTARTSYS: + case -EACCES: + case -EDQUOT: + case -EFBIG: case -EIO: case -ENOSPC: case -EROFS: + case -ESTALE: case -E2BIG: return true; default: diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 786f175805827df3a76ef4e37307927f81372628..e5686be67be8d361a32344e3aaaae235d739ffd7 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -143,11 +143,8 @@ struct vfsmount *nfs_d_automount(struct path *path) struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; - dprintk("--> nfs_d_automount()\n"); - - mnt = ERR_PTR(-ESTALE); if (IS_ROOT(path->dentry)) - goto out_nofree; + return ERR_PTR(-ESTALE); mnt = ERR_PTR(-ENOMEM); fh = nfs_alloc_fhandle(); @@ -155,13 +152,10 @@ struct vfsmount *nfs_d_automount(struct path *path) if (fh == NULL || fattr == NULL) goto out; - dprintk("%s: enter\n", __func__); - mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); if (IS_ERR(mnt)) goto out; - dprintk("%s: done, success\n", __func__); mntget(mnt); /* prevent immediate expiration */ mnt_set_expiry(mnt, &nfs_automount_list); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); @@ -169,11 +163,6 @@ struct vfsmount *nfs_d_automount(struct path *path) out: nfs_free_fattr(fattr); nfs_free_fhandle(fh); -out_nofree: - if (IS_ERR(mnt)) - dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt)); - else - dprintk("<-- %s() = %p\n", __func__, mnt); return mnt; } @@ -248,27 +237,20 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, .fattr = fattr, .authflavor = authflavor, }; - struct vfsmount *mnt = ERR_PTR(-ENOMEM); + struct vfsmount *mnt; char *page = (char *) __get_free_page(GFP_USER); char *devname; - dprintk("--> nfs_do_submount()\n"); - - dprintk("%s: submounting on %pd2\n", __func__, - dentry); if (page == NULL) - goto out; + return ERR_PTR(-ENOMEM); + devname = nfs_devname(dentry, page, PAGE_SIZE); - mnt = (struct vfsmount *)devname; if (IS_ERR(devname)) - goto free_page; - mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); -free_page: - free_page((unsigned long)page); -out: - dprintk("%s: done\n", __func__); + mnt = ERR_CAST(devname); + else + mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); - dprintk("<-- nfs_do_submount() = %p\n", mnt); + free_page((unsigned long)page); return mnt; } EXPORT_SYMBOL_GPL(nfs_do_submount); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index dc925b531f32632bb708c3f8490d0d00c7315495..0c07b567118dcd8a99d924a738c0fe7a0149bf43 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -865,12 +865,63 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; } +static void nfs3_nlm_alloc_call(void *data) +{ + struct nfs_lock_context *l_ctx = data; + if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) { + get_nfs_open_context(l_ctx->open_context); + nfs_get_lock_context(l_ctx->open_context); + } +} + +static bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data) +{ + struct nfs_lock_context *l_ctx = data; + if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) + return nfs_async_iocounter_wait(task, l_ctx); + return false; + +} + +static void nfs3_nlm_release_call(void *data) +{ + struct nfs_lock_context *l_ctx = data; + struct nfs_open_context *ctx; + if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) { + ctx = l_ctx->open_context; + nfs_put_lock_context(l_ctx); + put_nfs_open_context(ctx); + } +} + +const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = { + .nlmclnt_alloc_call = nfs3_nlm_alloc_call, + .nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare, + .nlmclnt_release_call = nfs3_nlm_release_call, +}; + static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(filp); + struct nfs_lock_context *l_ctx = NULL; + struct nfs_open_context *ctx = nfs_file_open_context(filp); + int status; - return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); + if (fl->fl_flags & FL_CLOSE) { + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) + l_ctx = NULL; + else + set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags); + } + + status = nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, l_ctx); + + if (l_ctx) + nfs_put_lock_context(l_ctx); + + return status; } static int nfs3_have_delegation(struct inode *inode, fmode_t flags) @@ -921,6 +972,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .dir_inode_ops = &nfs3_dir_inode_operations, .file_inode_ops = &nfs3_file_inode_operations, .file_ops = &nfs_file_operations, + .nlmclnt_ops = &nlmclnt_fl_close_lock_ops, .getroot = nfs3_proc_get_root, .submount = nfs_submount, .try_mount = nfs_try_mount, diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1e486c73ec9438d99e2e4a5b6637ec64594b626c..319a47db218d133d36b749641b63e5fa4489014c 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -167,23 +167,29 @@ static ssize_t _nfs42_proc_copy(struct file *src, if (status) return status; + res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS); + if (!res->commit_res.verf) + return -ENOMEM; status = nfs4_call_sync(server->client, server, &msg, &args->seq_args, &res->seq_res, 0); if (status == -ENOTSUPP) server->caps &= ~NFS_CAP_COPY; if (status) - return status; + goto out; - if (res->write_res.verifier.committed != NFS_FILE_SYNC) { - status = nfs_commit_file(dst, &res->write_res.verifier.verifier); - if (status) - return status; + if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier, + &res->commit_res.verf->verifier)) { + status = -EAGAIN; + goto out; } truncate_pagecache_range(dst_inode, pos_dst, pos_dst + res->write_res.count); - return res->write_res.count; + status = res->write_res.count; +out: + kfree(res->commit_res.verf); + return status; } ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, @@ -240,6 +246,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, if (err == -ENOTSUPP) { err = -EOPNOTSUPP; break; + } if (err == -EAGAIN) { + dst_exception.retry = 1; + continue; } err2 = nfs4_handle_exception(server, err, &src_exception); @@ -379,6 +388,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) pnfs_mark_layout_stateid_invalid(lo, &head); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); + nfs_commit_inode(inode, 0); } else spin_unlock(&inode->i_lock); break; @@ -400,8 +410,6 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) case -EOPNOTSUPP: NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; } - - dprintk("%s server returns %d\n", __func__, task->tk_status); } static void diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6c7296454bbc05c3ab1c18dd178314603916e40a..528362f69cc1ba6156102b113fdd5ba18c566c71 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -66,12 +66,14 @@ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ - encode_copy_maxsz) + encode_copy_maxsz + \ + encode_commit_maxsz) #define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ - decode_copy_maxsz) + decode_copy_maxsz + \ + decode_commit_maxsz) #define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_deallocate_maxsz + \ @@ -222,6 +224,18 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, encode_nops(&hdr); } +static void encode_copy_commit(struct xdr_stream *xdr, + struct nfs42_copy_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); + p = reserve_space(xdr, 12); + p = xdr_encode_hyper(p, args->dst_pos); + *p = cpu_to_be32(args->count); +} + /* * Encode COPY request */ @@ -239,6 +253,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req, encode_savefh(xdr, &hdr); encode_putfh(xdr, args->dst_fh, &hdr); encode_copy(xdr, args, &hdr); + encode_copy_commit(xdr, args, &hdr); encode_nops(&hdr); } @@ -481,6 +496,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp, if (status) goto out; status = decode_copy(xdr, res); + if (status) + goto out; + status = decode_commit(xdr, &res->commit_res); out: return status; } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8346ccbf2d52e518b6fa61d0c8cbb3d033ec1f02..66776f02211131bd003e31748dec6774a02a077f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -359,11 +359,9 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, struct nfs_client *old; int error; - if (clp->cl_cons_state == NFS_CS_READY) { + if (clp->cl_cons_state == NFS_CS_READY) /* the client is initialised already */ - dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp); return clp; - } /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; @@ -421,7 +419,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, error: nfs_mark_client_ready(clp, error); nfs_put_client(clp); - dprintk("<-- nfs4_init_client() = xerror %d\n", error); return ERR_PTR(error); } @@ -469,6 +466,50 @@ static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2) return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0; } +static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new, + struct nfs_client **prev, struct nfs_net *nn) +{ + int status; + + if (pos->rpc_ops != new->rpc_ops) + return 1; + + if (pos->cl_minorversion != new->cl_minorversion) + return 1; + + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos", especially the client + * ID and serverowner fields. Wait for CREATE_SESSION + * to finish. */ + if (pos->cl_cons_state > NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + nfs_put_client(*prev); + *prev = pos; + + status = nfs_wait_client_init_complete(pos); + spin_lock(&nn->nfs_client_lock); + + if (status < 0) + return status; + } + + if (pos->cl_cons_state != NFS_CS_READY) + return 1; + + if (pos->cl_clientid != new->cl_clientid) + return 1; + + /* NFSv4.1 always uses the uniform string, however someone + * might switch the uniquifier string on us. + */ + if (!nfs4_match_client_owner_id(pos, new)) + return 1; + + return 0; +} + /** * nfs40_walk_client_list - Find server that recognizes a client ID * @@ -497,34 +538,10 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { - if (pos->rpc_ops != new->rpc_ops) - continue; - - if (pos->cl_minorversion != new->cl_minorversion) - continue; - - /* If "pos" isn't marked ready, we can't trust the - * remaining fields in "pos" */ - if (pos->cl_cons_state > NFS_CS_READY) { - atomic_inc(&pos->cl_count); - spin_unlock(&nn->nfs_client_lock); - - nfs_put_client(prev); - prev = pos; - - status = nfs_wait_client_init_complete(pos); - if (status < 0) - goto out; - status = -NFS4ERR_STALE_CLIENTID; - spin_lock(&nn->nfs_client_lock); - } - if (pos->cl_cons_state != NFS_CS_READY) - continue; - - if (pos->cl_clientid != new->cl_clientid) - continue; - - if (!nfs4_match_client_owner_id(pos, new)) + status = nfs4_match_client(pos, new, &prev, nn); + if (status < 0) + goto out_unlock; + if (status != 0) continue; /* * We just sent a new SETCLIENTID, which should have @@ -557,8 +574,6 @@ int nfs40_walk_client_list(struct nfs_client *new, prev = NULL; *result = pos; - dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", - __func__, pos, atomic_read(&pos->cl_count)); goto out; case -ERESTARTSYS: case -ETIMEDOUT: @@ -572,31 +587,16 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); } +out_unlock: spin_unlock(&nn->nfs_client_lock); /* No match found. The server lost our clientid */ out: nfs_put_client(prev); - dprintk("NFS: <-- %s status = %d\n", __func__, status); return status; } #ifdef CONFIG_NFS_V4_1 -/* - * Returns true if the client IDs match - */ -static bool nfs4_match_clientids(u64 a, u64 b) -{ - if (a != b) { - dprintk("NFS: --> %s client ID %llx does not match %llx\n", - __func__, a, b); - return false; - } - dprintk("NFS: --> %s client ID %llx matches %llx\n", - __func__, a, b); - return true; -} - /* * Returns true if the server major ids match */ @@ -605,36 +605,8 @@ nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1, struct nfs41_server_owner *o2) { if (o1->major_id_sz != o2->major_id_sz) - goto out_major_mismatch; - if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) - goto out_major_mismatch; - - dprintk("NFS: --> %s server owner major IDs match\n", __func__); - return true; - -out_major_mismatch: - dprintk("NFS: --> %s server owner major IDs do not match\n", - __func__); - return false; -} - -/* - * Returns true if server minor ids match - */ -static bool -nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1, - struct nfs41_server_owner *o2) -{ - /* Check eir_server_owner so_minor_id */ - if (o1->minor_id != o2->minor_id) - goto out_minor_mismatch; - - dprintk("NFS: --> %s server owner minor IDs match\n", __func__); - return true; - -out_minor_mismatch: - dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__); - return false; + return false; + return memcmp(o1->major_id, o2->major_id, o1->major_id_sz) == 0; } /* @@ -645,18 +617,9 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1, struct nfs41_server_scope *s2) { if (s1->server_scope_sz != s2->server_scope_sz) - goto out_scope_mismatch; - if (memcmp(s1->server_scope, s2->server_scope, - s1->server_scope_sz) != 0) - goto out_scope_mismatch; - - dprintk("NFS: --> %s server scopes match\n", __func__); - return true; - -out_scope_mismatch: - dprintk("NFS: --> %s server scopes do not match\n", - __func__); - return false; + return false; + return memcmp(s1->server_scope, s2->server_scope, + s1->server_scope_sz) == 0; } /** @@ -680,7 +643,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp, struct rpc_xprt *xprt) { /* Check eir_clientid */ - if (!nfs4_match_clientids(clp->cl_clientid, res->clientid)) + if (clp->cl_clientid != res->clientid) goto out_err; /* Check eir_server_owner so_major_id */ @@ -689,8 +652,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp, goto out_err; /* Check eir_server_owner so_minor_id */ - if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner, - res->server_owner)) + if (clp->cl_serverowner->minor_id != res->server_owner->minor_id) goto out_err; /* Check eir_server_scope */ @@ -739,33 +701,10 @@ int nfs41_walk_client_list(struct nfs_client *new, if (pos == new) goto found; - if (pos->rpc_ops != new->rpc_ops) - continue; - - if (pos->cl_minorversion != new->cl_minorversion) - continue; - - /* If "pos" isn't marked ready, we can't trust the - * remaining fields in "pos", especially the client - * ID and serverowner fields. Wait for CREATE_SESSION - * to finish. */ - if (pos->cl_cons_state > NFS_CS_READY) { - atomic_inc(&pos->cl_count); - spin_unlock(&nn->nfs_client_lock); - - nfs_put_client(prev); - prev = pos; - - status = nfs_wait_client_init_complete(pos); - spin_lock(&nn->nfs_client_lock); - if (status < 0) - break; - status = -NFS4ERR_STALE_CLIENTID; - } - if (pos->cl_cons_state != NFS_CS_READY) - continue; - - if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid)) + status = nfs4_match_client(pos, new, &prev, nn); + if (status < 0) + goto out; + if (status != 0) continue; /* @@ -777,23 +716,15 @@ int nfs41_walk_client_list(struct nfs_client *new, new->cl_serverowner)) continue; - /* Unlike NFSv4.0, we know that NFSv4.1 always uses the - * uniform string, however someone might switch the - * uniquifier string on us. - */ - if (!nfs4_match_client_owner_id(pos, new)) - continue; found: atomic_inc(&pos->cl_count); *result = pos; status = 0; - dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", - __func__, pos, atomic_read(&pos->cl_count)); break; } +out: spin_unlock(&nn->nfs_client_lock); - dprintk("NFS: <-- %s status = %d\n", __func__, status); nfs_put_client(prev); return status; } @@ -916,9 +847,6 @@ static int nfs4_set_client(struct nfs_server *server, .timeparms = timeparms, }; struct nfs_client *clp; - int error; - - dprintk("--> nfs4_set_client()\n"); if (server->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -927,15 +855,11 @@ static int nfs4_set_client(struct nfs_server *server, /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init); - if (IS_ERR(clp)) { - error = PTR_ERR(clp); - goto error; - } + if (IS_ERR(clp)) + return PTR_ERR(clp); - if (server->nfs_client == clp) { - error = -ELOOP; - goto error; - } + if (server->nfs_client == clp) + return -ELOOP; /* * Query for the lease time on clientid setup or renewal @@ -947,11 +871,7 @@ static int nfs4_set_client(struct nfs_server *server, set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); server->nfs_client = clp; - dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); return 0; -error: - dprintk("<-- nfs4_set_client() = xerror %d\n", error); - return error; } /* @@ -982,7 +902,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, }; - struct nfs_client *clp; char buf[INET6_ADDRSTRLEN + 1]; if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) @@ -998,10 +917,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, * (section 13.1 RFC 5661). */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init); - - dprintk("<-- %s %p\n", __func__, clp); - return clp; + return nfs_get_client(&cl_init); } EXPORT_SYMBOL_GPL(nfs4_set_ds_client); @@ -1098,8 +1014,6 @@ static int nfs4_init_server(struct nfs_server *server, struct rpc_timeout timeparms; int error; - dprintk("--> nfs4_init_server()\n"); - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); @@ -1127,7 +1041,7 @@ static int nfs4_init_server(struct nfs_server *server, data->minorversion, data->net); if (error < 0) - goto error; + return error; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); @@ -1138,16 +1052,10 @@ static int nfs4_init_server(struct nfs_server *server, server->acregmax = data->acregmax * HZ; server->acdirmin = data->acdirmin * HZ; server->acdirmax = data->acdirmax * HZ; + server->port = data->nfs_server.port; - server->port = data->nfs_server.port; - - error = nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); - -error: - /* Done */ - dprintk("<-- nfs4_init_server() = %d\n", error); - return error; + return nfs_init_server_rpcclient(server, &timeparms, + data->selected_flavor); } /* @@ -1163,8 +1071,6 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, bool auth_probe; int error; - dprintk("--> nfs4_create_server()\n"); - server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); @@ -1180,12 +1086,10 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, if (error < 0) goto error; - dprintk("<-- nfs4_create_server() = %p\n", server); return server; error: nfs_free_server(server); - dprintk("<-- nfs4_create_server() = error %d\n", error); return ERR_PTR(error); } @@ -1200,8 +1104,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, bool auth_probe; int error; - dprintk("--> nfs4_create_referral_server()\n"); - server = nfs_alloc_server(); if (!server) return ERR_PTR(-ENOMEM); @@ -1235,12 +1137,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - dprintk("<-- nfs_create_referral_server() = %p\n", server); return server; error: nfs_free_server(server); - dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } @@ -1300,31 +1200,16 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, struct sockaddr *localaddr = (struct sockaddr *)&address; int error; - dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - hostname); - error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); - if (error != 0) { - dprintk("<-- %s(): rpc_switch_client_transport returned %d\n", - __func__, error); - goto out; - } + if (error != 0) + return error; error = rpc_localaddr(clnt, localaddr, sizeof(address)); - if (error != 0) { - dprintk("<-- %s(): rpc_localaddr returned %d\n", - __func__, error); - goto out; - } + if (error != 0) + return error; - error = -EAFNOSUPPORT; - if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) { - dprintk("<-- %s(): rpc_ntop returned %d\n", - __func__, error); - goto out; - } + if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) + return -EAFNOSUPPORT; nfs_server_remove_lists(server); error = nfs4_set_client(server, hostname, sap, salen, buf, @@ -1333,21 +1218,12 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, nfs_put_client(clp); if (error != 0) { nfs_server_insert_lists(server); - dprintk("<-- %s(): nfs4_set_client returned %d\n", - __func__, error); - goto out; + return error; } if (server->nfs_client->cl_hostname == NULL) server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); nfs_server_insert_lists(server); - error = nfs_probe_destination(server); - if (error < 0) - goto out; - - dprintk("<-- %s() succeeded\n", __func__); - -out: - return error; + return nfs_probe_destination(server); } diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 039b3eb6d83404f33224961465406d52203ff570..ac84060189626ccb07f24b366594d36372717cbb 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -14,8 +14,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p struct nfs_fsinfo fsinfo; int ret = -ENOMEM; - dprintk("--> nfs4_get_rootfh()\n"); - fsinfo.fattr = nfs_alloc_fattr(); if (fsinfo.fattr == NULL) goto out; @@ -38,6 +36,5 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); out: nfs_free_fattr(fsinfo.fattr); - dprintk("<-- nfs4_get_rootfh() = %d\n", ret); return ret; } diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index d8b040bd9814d3199e9189519f7bdcb09c04801c..7d531da1bae37e1cd957c3181423930e1033ab8b 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -340,7 +340,6 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, out: free_page((unsigned long) page); free_page((unsigned long) page2); - dprintk("%s: done\n", __func__); return mnt; } @@ -358,11 +357,9 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry * int err; /* BUG_ON(IS_ROOT(dentry)); */ - dprintk("%s: enter\n", __func__); - page = alloc_page(GFP_KERNEL); if (page == NULL) - goto out; + return mnt; fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); if (fs_locations == NULL) @@ -386,8 +383,6 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry * out_free: __free_page(page); kfree(fs_locations); -out: - dprintk("%s: done\n", __func__); return mnt; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 201ca3f2c4bac14986220fcdf6a6c37b734ffa96..c08c46a3b8cde00ef5aa40fae87ed2fce06faea1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -698,7 +698,8 @@ static int nfs41_sequence_process(struct rpc_task *task, session = slot->table->session; if (slot->interrupted) { - slot->interrupted = 0; + if (res->sr_status != -NFS4ERR_DELAY) + slot->interrupted = 0; interrupted = true; } @@ -2300,8 +2301,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) if (status != 0) return status; } - if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { + nfs4_sequence_free_slot(&o_res->seq_res); nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); + } return 0; } @@ -3265,6 +3268,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f .rpc_resp = &res, }; int status; + int i; bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_FH_EXPIRE_TYPE | @@ -3330,8 +3334,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->cache_consistency_bitmask[2] = 0; + + /* Avoid a regression due to buggy server */ + for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++) + res.exclcreat_bitmask[i] &= res.attr_bitmask[i]; memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask, sizeof(server->exclcreat_bitmask)); + server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -4610,7 +4619,7 @@ static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, return 0; if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, - hdr->rw_ops->rw_mode) == -EIO) + hdr->rw_mode) == -EIO) return -EIO; if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) return -EIO; @@ -4804,8 +4813,10 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); - if (data == NULL) + if (data == NULL) { + nfs_put_client(clp); return -ENOMEM; + } data->client = clp; data->timestamp = jiffies; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT, @@ -5782,6 +5793,7 @@ struct nfs4_unlockdata { struct nfs_locku_res res; struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; + struct nfs_lock_context *l_ctx; struct file_lock fl; struct nfs_server *server; unsigned long timestamp; @@ -5806,6 +5818,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); + p->l_ctx = nfs_get_lock_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5816,6 +5829,7 @@ static void nfs4_locku_release_calldata(void *data) struct nfs4_unlockdata *calldata = data; nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); + nfs_put_lock_context(calldata->l_ctx); put_nfs_open_context(calldata->ctx); kfree(calldata); } @@ -5857,6 +5871,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) { struct nfs4_unlockdata *calldata = data; + if (test_bit(NFS_CONTEXT_UNLOCK, &calldata->l_ctx->open_context->flags) && + nfs_async_iocounter_wait(task, calldata->l_ctx)) + return; + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) goto out_wait; nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid); @@ -5908,6 +5926,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, * canceled lock is passed in, and it won't be an unlock. */ fl->fl_type = F_UNLCK; + if (fl->fl_flags & FL_CLOSE) + set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags); data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { @@ -6445,9 +6465,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) ctx = nfs_file_open_context(filp); state = ctx->state; - if (request->fl_start < 0 || request->fl_end < 0) - return -EINVAL; - if (IS_GETLK(cmd)) { if (state != NULL) return nfs4_proc_getlk(state, F_GETLK, request); @@ -6470,20 +6487,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) return -ENOLCK; - /* - * Don't rely on the VFS having checked the file open mode, - * since it won't do this for flock() locks. - */ - switch (request->fl_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - return -EBADF; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - } - status = nfs4_set_lock_state(state, request); if (status != 0) return status; @@ -7155,8 +7158,6 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, }; struct rpc_task *task; - dprintk("--> %s\n", __func__); - nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id); if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) args.dir = NFS4_CDFC4_FORE; @@ -7176,24 +7177,20 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, if (memcmp(res.sessionid.data, clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("NFS: %s: Session ID mismatch\n", __func__); - status = -EIO; - goto out; + return -EIO; } if ((res.dir & args.dir) != res.dir || res.dir == 0) { dprintk("NFS: %s: Unexpected direction from server\n", __func__); - status = -EIO; - goto out; + return -EIO; } if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) { dprintk("NFS: %s: Server returned RDMA mode = true\n", __func__); - status = -EIO; - goto out; + return -EIO; } } -out: - dprintk("<-- %s status= %d\n", __func__, status); + return status; } @@ -7459,15 +7456,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, }; struct nfs41_exchange_id_data *calldata; struct rpc_task *task; - int status = -EIO; + int status; if (!atomic_inc_not_zero(&clp->cl_count)) - goto out; + return -EIO; - status = -ENOMEM; calldata = kzalloc(sizeof(*calldata), GFP_NOFS); - if (!calldata) - goto out; + if (!calldata) { + nfs_put_client(clp); + return -ENOMEM; + } if (!xprt) nfs4_init_boot_verifier(clp, &verifier); @@ -7476,10 +7474,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status) goto out_calldata; - dprintk("NFS call exchange_id auth=%s, '%s'\n", - clp->cl_rpcclient->cl_auth->au_ops->au_name, - clp->cl_owner_id); - calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); status = -ENOMEM; @@ -7545,13 +7539,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, rpc_put_task(task); out: - if (clp->cl_implid != NULL) - dprintk("NFS reply exchange_id: Server Implementation ID: " - "domain: %s, name: %s, date: %llu,%u\n", - clp->cl_implid->domain, clp->cl_implid->name, - clp->cl_implid->date.seconds, - clp->cl_implid->date.nseconds); - dprintk("NFS reply exchange_id: %d\n", status); return status; out_impl_id: @@ -7769,17 +7756,13 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); nfs4_set_sequence_privileged(&args.la_seq_args); - dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); if (IS_ERR(task)) - status = PTR_ERR(task); - else { - status = task->tk_status; - rpc_put_task(task); - } - dprintk("<-- %s return %d\n", __func__, status); + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); return status; } @@ -8180,6 +8163,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf /* fall through */ case -NFS4ERR_RETRY_UNCACHED_REP: return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); + break; default: nfs4_schedule_lease_recovery(clp); } @@ -8258,7 +8247,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, if (status == 0) status = task->tk_status; rpc_put_task(task); - return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; @@ -8357,6 +8345,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, */ pnfs_mark_layout_stateid_invalid(lo, &head); spin_unlock(&inode->i_lock); + nfs_commit_inode(inode, 0); pnfs_free_lseg_list(&head); status = -EAGAIN; goto out; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8156bad6b441095199878a900e35594863cad571..b34de036501bc90e48be043aec38485f7f755e55 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1649,13 +1649,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); } -static void nfs4_reclaim_complete(struct nfs_client *clp, +static int nfs4_reclaim_complete(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops, struct rpc_cred *cred) { /* Notify the server we're done reclaiming our state */ if (ops->reclaim_complete) - (void)ops->reclaim_complete(clp, cred); + return ops->reclaim_complete(clp, cred); + return 0; } static void nfs4_clear_reclaim_server(struct nfs_server *server) @@ -1702,13 +1703,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { const struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; + int err; if (!nfs4_state_clear_reclaim_reboot(clp)) return; ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); - nfs4_reclaim_complete(clp, ops, cred); + err = nfs4_reclaim_complete(clp, ops, cred); put_rpccred(cred); + if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); } static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 80ce289eea05326336a7edecbe8a132ee4900d23..3aebfdc82b30320625b0011b4f968207e436fb48 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1000,8 +1000,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs4_label *label, + const umode_t *umask, const struct nfs_server *server, - bool excl_check, const umode_t *umask) + const uint32_t attrmask[]) { char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; @@ -1016,22 +1017,20 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, /* * We reserve enough space to write the entire attribute buffer at once. */ - if (iap->ia_valid & ATTR_SIZE) { + if ((iap->ia_valid & ATTR_SIZE) && (attrmask[0] & FATTR4_WORD0_SIZE)) { bmval[0] |= FATTR4_WORD0_SIZE; len += 8; } - if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) - umask = NULL; if (iap->ia_valid & ATTR_MODE) { - if (umask) { + if (umask && (attrmask[2] & FATTR4_WORD2_MODE_UMASK)) { bmval[2] |= FATTR4_WORD2_MODE_UMASK; len += 8; - } else { + } else if (attrmask[1] & FATTR4_WORD1_MODE) { bmval[1] |= FATTR4_WORD1_MODE; len += 4; } } - if (iap->ia_valid & ATTR_UID) { + if ((iap->ia_valid & ATTR_UID) && (attrmask[1] & FATTR4_WORD1_OWNER)) { owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); if (owner_namelen < 0) { dprintk("nfs: couldn't resolve uid %d to string\n", @@ -1044,7 +1043,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, bmval[1] |= FATTR4_WORD1_OWNER; len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } - if (iap->ia_valid & ATTR_GID) { + if ((iap->ia_valid & ATTR_GID) && + (attrmask[1] & FATTR4_WORD1_OWNER_GROUP)) { owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ); if (owner_grouplen < 0) { dprintk("nfs: couldn't resolve gid %d to string\n", @@ -1056,32 +1056,26 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, bmval[1] |= FATTR4_WORD1_OWNER_GROUP; len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); } - if (iap->ia_valid & ATTR_ATIME_SET) { - bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; - len += 16; - } else if (iap->ia_valid & ATTR_ATIME) { - bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; - len += 4; - } - if (iap->ia_valid & ATTR_MTIME_SET) { - bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; - len += 16; - } else if (iap->ia_valid & ATTR_MTIME) { - bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; - len += 4; + if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) { + if (iap->ia_valid & ATTR_ATIME_SET) { + bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; + len += 16; + } else if (iap->ia_valid & ATTR_ATIME) { + bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; + len += 4; + } } - - if (excl_check) { - const u32 *excl_bmval = server->exclcreat_bitmask; - bmval[0] &= excl_bmval[0]; - bmval[1] &= excl_bmval[1]; - bmval[2] &= excl_bmval[2]; - - if (!(excl_bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) - label = NULL; + if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) { + if (iap->ia_valid & ATTR_MTIME_SET) { + bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; + len += 16; + } else if (iap->ia_valid & ATTR_MTIME) { + bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; + len += 4; + } } - if (label) { + if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) { len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; } @@ -1188,8 +1182,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); - encode_attrs(xdr, create->attrs, create->label, create->server, false, - &create->umask); + encode_attrs(xdr, create->attrs, create->label, &create->umask, + create->server, create->server->attr_bitmask); } static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) @@ -1409,13 +1403,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op switch(arg->createmode) { case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, - &arg->umask); + encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask, + arg->server, arg->server->attr_bitmask); break; case NFS4_CREATE_GUARDED: *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, - &arg->umask); + encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask, + arg->server, arg->server->attr_bitmask); break; case NFS4_CREATE_EXCLUSIVE: *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); @@ -1424,8 +1418,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op case NFS4_CREATE_EXCLUSIVE4_1: *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); encode_nfs4_verifier(xdr, &arg->u.verifier); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true, - &arg->umask); + encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask, + arg->server, arg->server->exclcreat_bitmask); } } @@ -1681,7 +1675,8 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs { encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); encode_nfs4_stateid(xdr, &arg->stateid); - encode_attrs(xdr, arg->iap, arg->label, server, false, NULL); + encode_attrs(xdr, arg->iap, arg->label, NULL, server, + server->attr_bitmask); } static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) @@ -2005,16 +2000,10 @@ encode_layoutcommit(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ - if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { - NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( - NFS_I(inode)->layout, xdr, args); - } else { - encode_uint32(xdr, args->layoutupdate_len); - if (args->layoutupdate_pages) { - xdr_write_pages(xdr, args->layoutupdate_pages, 0, - args->layoutupdate_len); - } - } + encode_uint32(xdr, args->layoutupdate_len); + if (args->layoutupdate_pages) + xdr_write_pages(xdr, args->layoutupdate_pages, 0, + args->layoutupdate_len); return 0; } @@ -2024,7 +2013,6 @@ encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args, struct compound_hdr *hdr) { - const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld; __be32 *p; encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); @@ -2041,8 +2029,6 @@ encode_layoutreturn(struct xdr_stream *xdr, spin_unlock(&args->inode->i_lock); if (args->ld_private->ops && args->ld_private->ops->encode) args->ld_private->ops->encode(xdr, args, args->ld_private); - else if (lr_ops->encode_layoutreturn) - lr_ops->encode_layoutreturn(xdr, args); else encode_uint32(xdr, 0); } @@ -5579,6 +5565,8 @@ static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) unsigned int i; p = xdr_inline_decode(xdr, 4); + if (!p) + return -EIO; bitmap_words = be32_to_cpup(p++); if (bitmap_words > NFS4_OP_MAP_NUM_WORDS) return -EIO; diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild deleted file mode 100644 index ed30ea072bb85561045a864846b2e5d647adb489..0000000000000000000000000000000000000000 --- a/fs/nfs/objlayout/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the pNFS Objects Layout Driver kernel module -# -objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o -obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c deleted file mode 100644 index 049c1b1f2932be74266a47d97733426d89ec8154..0000000000000000000000000000000000000000 --- a/fs/nfs/objlayout/objio_osd.c +++ /dev/null @@ -1,675 +0,0 @@ -/* - * pNFS Objects layout implementation over open-osd initiator library - * - * Copyright (C) 2009 Panasas Inc. [year of first publication] - * All rights reserved. - * - * Benny Halevy - * Boaz Harrosh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * See the file COPYING included with this distribution for more details. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Panasas company nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "objlayout.h" -#include "../internal.h" - -#define NFSDBG_FACILITY NFSDBG_PNFS_LD - -struct objio_dev_ent { - struct nfs4_deviceid_node id_node; - struct ore_dev od; -}; - -static void -objio_free_deviceid_node(struct nfs4_deviceid_node *d) -{ - struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); - - dprintk("%s: free od=%p\n", __func__, de->od.od); - osduld_put_device(de->od.od); - kfree_rcu(d, rcu); -} - -struct objio_segment { - struct pnfs_layout_segment lseg; - - struct ore_layout layout; - struct ore_components oc; -}; - -static inline struct objio_segment * -OBJIO_LSEG(struct pnfs_layout_segment *lseg) -{ - return container_of(lseg, struct objio_segment, lseg); -} - -struct objio_state { - /* Generic layer */ - struct objlayout_io_res oir; - - bool sync; - /*FIXME: Support for extra_bytes at ore_get_rw_state() */ - struct ore_io_state *ios; -}; - -/* Send and wait for a get_device_info of devices in the layout, - then look them up with the osd_initiator library */ -struct nfs4_deviceid_node * -objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, - gfp_t gfp_flags) -{ - struct pnfs_osd_deviceaddr *deviceaddr; - struct objio_dev_ent *ode = NULL; - struct osd_dev *od; - struct osd_dev_info odi; - bool retry_flag = true; - __be32 *p; - int err; - - deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags); - if (!deviceaddr) - return NULL; - - p = page_address(pdev->pages[0]); - pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p); - - odi.systemid_len = deviceaddr->oda_systemid.len; - if (odi.systemid_len > sizeof(odi.systemid)) { - dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", - __func__, sizeof(odi.systemid)); - err = -EINVAL; - goto out; - } else if (odi.systemid_len) - memcpy(odi.systemid, deviceaddr->oda_systemid.data, - odi.systemid_len); - odi.osdname_len = deviceaddr->oda_osdname.len; - odi.osdname = (u8 *)deviceaddr->oda_osdname.data; - - if (!odi.osdname_len && !odi.systemid_len) { - dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", - __func__); - err = -ENODEV; - goto out; - } - -retry_lookup: - od = osduld_info_lookup(&odi); - if (IS_ERR(od)) { - err = PTR_ERR(od); - dprintk("%s: osduld_info_lookup => %d\n", __func__, err); - if (err == -ENODEV && retry_flag) { - err = objlayout_autologin(deviceaddr); - if (likely(!err)) { - retry_flag = false; - goto retry_lookup; - } - } - goto out; - } - - dprintk("Adding new dev_id(%llx:%llx)\n", - _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id)); - - ode = kzalloc(sizeof(*ode), gfp_flags); - if (!ode) { - dprintk("%s: -ENOMEM od=%p\n", __func__, od); - goto out; - } - - nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id); - kfree(deviceaddr); - - ode->od.od = od; - return &ode->id_node; - -out: - kfree(deviceaddr); - return NULL; -} - -static void copy_single_comp(struct ore_components *oc, unsigned c, - struct pnfs_osd_object_cred *src_comp) -{ - struct ore_comp *ocomp = &oc->comps[c]; - - WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ - WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); - - ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; - ocomp->obj.id = src_comp->oc_object_id.oid_object_id; - - memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); -} - -static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, - struct objio_segment **pseg) -{ -/* This is the in memory structure of the objio_segment - * - * struct __alloc_objio_segment { - * struct objio_segment olseg; - * struct ore_dev *ods[numdevs]; - * struct ore_comp comps[numdevs]; - * } *aolseg; - * NOTE: The code as above compiles and runs perfectly. It is elegant, - * type safe and compact. At some Past time Linus has decided he does not - * like variable length arrays, For the sake of this principal we uglify - * the code as below. - */ - struct objio_segment *lseg; - size_t lseg_size = sizeof(*lseg) + - numdevs * sizeof(lseg->oc.ods[0]) + - numdevs * sizeof(*lseg->oc.comps); - - lseg = kzalloc(lseg_size, gfp_flags); - if (unlikely(!lseg)) { - dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__, - numdevs, lseg_size); - return -ENOMEM; - } - - lseg->oc.numdevs = numdevs; - lseg->oc.single_comp = EC_MULTPLE_COMPS; - lseg->oc.ods = (void *)(lseg + 1); - lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); - - *pseg = lseg; - return 0; -} - -int objio_alloc_lseg(struct pnfs_layout_segment **outp, - struct pnfs_layout_hdr *pnfslay, - struct pnfs_layout_range *range, - struct xdr_stream *xdr, - gfp_t gfp_flags) -{ - struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode); - struct objio_segment *objio_seg; - struct pnfs_osd_xdr_decode_layout_iter iter; - struct pnfs_osd_layout layout; - struct pnfs_osd_object_cred src_comp; - unsigned cur_comp; - int err; - - err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); - if (unlikely(err)) - return err; - - err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); - if (unlikely(err)) - return err; - - objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; - objio_seg->layout.group_width = layout.olo_map.odm_group_width; - objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; - objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; - objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; - - err = ore_verify_layout(layout.olo_map.odm_num_comps, - &objio_seg->layout); - if (unlikely(err)) - goto err; - - objio_seg->oc.first_dev = layout.olo_comps_index; - cur_comp = 0; - while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { - struct nfs4_deviceid_node *d; - struct objio_dev_ent *ode; - - copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); - - d = nfs4_find_get_deviceid(server, - &src_comp.oc_object_id.oid_device_id, - pnfslay->plh_lc_cred, gfp_flags); - if (!d) { - err = -ENXIO; - goto err; - } - - ode = container_of(d, struct objio_dev_ent, id_node); - objio_seg->oc.ods[cur_comp++] = &ode->od; - } - /* pnfs_osd_xdr_decode_layout_comp returns false on error */ - if (unlikely(err)) - goto err; - - *outp = &objio_seg->lseg; - return 0; - -err: - kfree(objio_seg); - dprintk("%s: Error: return %d\n", __func__, err); - *outp = NULL; - return err; -} - -void objio_free_lseg(struct pnfs_layout_segment *lseg) -{ - int i; - struct objio_segment *objio_seg = OBJIO_LSEG(lseg); - - for (i = 0; i < objio_seg->oc.numdevs; i++) { - struct ore_dev *od = objio_seg->oc.ods[i]; - struct objio_dev_ent *ode; - - if (!od) - break; - ode = container_of(od, typeof(*ode), od); - nfs4_put_deviceid_node(&ode->id_node); - } - kfree(objio_seg); -} - -static int -objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, - struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, - loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, - struct objio_state **outp) -{ - struct objio_segment *objio_seg = OBJIO_LSEG(lseg); - struct ore_io_state *ios; - int ret; - struct __alloc_objio_state { - struct objio_state objios; - struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; - } *aos; - - aos = kzalloc(sizeof(*aos), gfp_flags); - if (unlikely(!aos)) - return -ENOMEM; - - objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, - aos->ioerrs, rpcdata, pnfs_layout_type); - - ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, - offset, count, &ios); - if (unlikely(ret)) { - kfree(aos); - return ret; - } - - ios->pages = pages; - ios->pgbase = pgbase; - ios->private = aos; - BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); - - aos->objios.sync = 0; - aos->objios.ios = ios; - *outp = &aos->objios; - return 0; -} - -void objio_free_result(struct objlayout_io_res *oir) -{ - struct objio_state *objios = container_of(oir, struct objio_state, oir); - - ore_put_io_state(objios->ios); - kfree(objios); -} - -static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) -{ - switch (oep) { - case OSD_ERR_PRI_NO_ERROR: - return (enum pnfs_osd_errno)0; - - case OSD_ERR_PRI_CLEAR_PAGES: - BUG_ON(1); - return 0; - - case OSD_ERR_PRI_RESOURCE: - return PNFS_OSD_ERR_RESOURCE; - case OSD_ERR_PRI_BAD_CRED: - return PNFS_OSD_ERR_BAD_CRED; - case OSD_ERR_PRI_NO_ACCESS: - return PNFS_OSD_ERR_NO_ACCESS; - case OSD_ERR_PRI_UNREACHABLE: - return PNFS_OSD_ERR_UNREACHABLE; - case OSD_ERR_PRI_NOT_FOUND: - return PNFS_OSD_ERR_NOT_FOUND; - case OSD_ERR_PRI_NO_SPACE: - return PNFS_OSD_ERR_NO_SPACE; - default: - WARN_ON(1); - /* fallthrough */ - case OSD_ERR_PRI_EIO: - return PNFS_OSD_ERR_EIO; - } -} - -static void __on_dev_error(struct ore_io_state *ios, - struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, - u64 dev_offset, u64 dev_len) -{ - struct objio_state *objios = ios->private; - struct pnfs_osd_objid pooid; - struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); - /* FIXME: what to do with more-then-one-group layouts. We need to - * translate from ore_io_state index to oc->comps index - */ - unsigned comp = dev_index; - - pooid.oid_device_id = ode->id_node.deviceid; - pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; - pooid.oid_object_id = ios->oc->comps[comp].obj.id; - - objlayout_io_set_result(&objios->oir, comp, - &pooid, osd_pri_2_pnfs_err(oep), - dev_offset, dev_len, !ios->reading); -} - -/* - * read - */ -static void _read_done(struct ore_io_state *ios, void *private) -{ - struct objio_state *objios = private; - ssize_t status; - int ret = ore_check_io(ios, &__on_dev_error); - - /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ - - if (likely(!ret)) - status = ios->length; - else - status = ret; - - objlayout_read_done(&objios->oir, status, objios->sync); -} - -int objio_read_pagelist(struct nfs_pgio_header *hdr) -{ - struct objio_state *objios; - int ret; - - ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, - hdr->lseg, hdr->args.pages, hdr->args.pgbase, - hdr->args.offset, hdr->args.count, hdr, - GFP_KERNEL, &objios); - if (unlikely(ret)) - return ret; - - objios->ios->done = _read_done; - dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - hdr->args.offset, hdr->args.count); - ret = ore_read(objios->ios); - if (unlikely(ret)) - objio_free_result(&objios->oir); - return ret; -} - -/* - * write - */ -static void _write_done(struct ore_io_state *ios, void *private) -{ - struct objio_state *objios = private; - ssize_t status; - int ret = ore_check_io(ios, &__on_dev_error); - - /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ - - if (likely(!ret)) { - /* FIXME: should be based on the OSD's persistence model - * See OSD2r05 Section 4.13 Data persistence model */ - objios->oir.committed = NFS_FILE_SYNC; - status = ios->length; - } else { - status = ret; - } - - objlayout_write_done(&objios->oir, status, objios->sync); -} - -static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) -{ - struct objio_state *objios = priv; - struct nfs_pgio_header *hdr = objios->oir.rpcdata; - struct address_space *mapping = hdr->inode->i_mapping; - pgoff_t index = offset / PAGE_SIZE; - struct page *page; - loff_t i_size = i_size_read(hdr->inode); - - if (offset >= i_size) { - *uptodate = true; - dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); - return ZERO_PAGE(0); - } - - page = find_get_page(mapping, index); - if (!page) { - page = find_or_create_page(mapping, index, GFP_NOFS); - if (unlikely(!page)) { - dprintk("%s: grab_cache_page Failed index=0x%lx\n", - __func__, index); - return NULL; - } - unlock_page(page); - } - *uptodate = PageUptodate(page); - dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); - return page; -} - -static void __r4w_put_page(void *priv, struct page *page) -{ - dprintk("%s: index=0x%lx\n", __func__, - (page == ZERO_PAGE(0)) ? -1UL : page->index); - if (ZERO_PAGE(0) != page) - put_page(page); - return; -} - -static const struct _ore_r4w_op _r4w_op = { - .get_page = &__r4w_get_page, - .put_page = &__r4w_put_page, -}; - -int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) -{ - struct objio_state *objios; - int ret; - - ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, - hdr->lseg, hdr->args.pages, hdr->args.pgbase, - hdr->args.offset, hdr->args.count, hdr, GFP_NOFS, - &objios); - if (unlikely(ret)) - return ret; - - objios->sync = 0 != (how & FLUSH_SYNC); - objios->ios->r4w = &_r4w_op; - - if (!objios->sync) - objios->ios->done = _write_done; - - dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - hdr->args.offset, hdr->args.count); - ret = ore_write(objios->ios); - if (unlikely(ret)) { - objio_free_result(&objios->oir); - return ret; - } - - if (objios->sync) - _write_done(objios->ios, objios); - - return 0; -} - -/* - * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number - * of bytes (maximum @req->wb_bytes) that can be coalesced. - */ -static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, struct nfs_page *req) -{ - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio); - unsigned int size; - - size = pnfs_generic_pg_test(pgio, prev, req); - - if (!size || mirror->pg_count + req->wb_bytes > - (unsigned long)pgio->pg_layout_private) - return 0; - - return min(size, req->wb_bytes); -} - -static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - pnfs_generic_pg_init_read(pgio, req); - if (unlikely(pgio->pg_lseg == NULL)) - return; /* Not pNFS */ - - pgio->pg_layout_private = (void *) - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; -} - -static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, - unsigned long *stripe_end) -{ - u32 stripe_off; - unsigned stripe_size; - - if (layout->raid_algorithm == PNFS_OSD_RAID_0) - return true; - - stripe_size = layout->stripe_unit * - (layout->group_width - layout->parity); - - div_u64_rem(offset, stripe_size, &stripe_off); - if (!stripe_off) - return true; - - *stripe_end = stripe_size - stripe_off; - return false; -} - -static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) -{ - unsigned long stripe_end = 0; - u64 wb_size; - - if (pgio->pg_dreq == NULL) - wb_size = i_size_read(pgio->pg_inode) - req_offset(req); - else - wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); - - pnfs_generic_pg_init_write(pgio, req, wb_size); - if (unlikely(pgio->pg_lseg == NULL)) - return; /* Not pNFS */ - - if (req->wb_offset || - !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, - &OBJIO_LSEG(pgio->pg_lseg)->layout, - &stripe_end)) { - pgio->pg_layout_private = (void *)stripe_end; - } else { - pgio->pg_layout_private = (void *) - OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; - } -} - -static const struct nfs_pageio_ops objio_pg_read_ops = { - .pg_init = objio_init_read, - .pg_test = objio_pg_test, - .pg_doio = pnfs_generic_pg_readpages, - .pg_cleanup = pnfs_generic_pg_cleanup, -}; - -static const struct nfs_pageio_ops objio_pg_write_ops = { - .pg_init = objio_init_write, - .pg_test = objio_pg_test, - .pg_doio = pnfs_generic_pg_writepages, - .pg_cleanup = pnfs_generic_pg_cleanup, -}; - -static struct pnfs_layoutdriver_type objlayout_type = { - .id = LAYOUT_OSD2_OBJECTS, - .name = "LAYOUT_OSD2_OBJECTS", - .flags = PNFS_LAYOUTRET_ON_SETATTR | - PNFS_LAYOUTRET_ON_ERROR, - - .max_deviceinfo_size = PAGE_SIZE, - .owner = THIS_MODULE, - .alloc_layout_hdr = objlayout_alloc_layout_hdr, - .free_layout_hdr = objlayout_free_layout_hdr, - - .alloc_lseg = objlayout_alloc_lseg, - .free_lseg = objlayout_free_lseg, - - .read_pagelist = objlayout_read_pagelist, - .write_pagelist = objlayout_write_pagelist, - .pg_read_ops = &objio_pg_read_ops, - .pg_write_ops = &objio_pg_write_ops, - - .sync = pnfs_generic_sync, - - .free_deviceid_node = objio_free_deviceid_node, - - .encode_layoutcommit = objlayout_encode_layoutcommit, - .encode_layoutreturn = objlayout_encode_layoutreturn, -}; - -MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); -MODULE_AUTHOR("Benny Halevy "); -MODULE_LICENSE("GPL"); - -static int __init -objlayout_init(void) -{ - int ret = pnfs_register_layoutdriver(&objlayout_type); - - if (ret) - printk(KERN_INFO - "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", - __func__, ret); - else - printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", - __func__); - return ret; -} - -static void __exit -objlayout_exit(void) -{ - pnfs_unregister_layoutdriver(&objlayout_type); - printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", - __func__); -} - -MODULE_ALIAS("nfs-layouttype4-2"); - -module_init(objlayout_init); -module_exit(objlayout_exit); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c deleted file mode 100644 index 8f3d2acb81c3d816f7bb12f78fa6c50fb8a20528..0000000000000000000000000000000000000000 --- a/fs/nfs/objlayout/objlayout.c +++ /dev/null @@ -1,706 +0,0 @@ -/* - * pNFS Objects layout driver high level definitions - * - * Copyright (C) 2007 Panasas Inc. [year of first publication] - * All rights reserved. - * - * Benny Halevy - * Boaz Harrosh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * See the file COPYING included with this distribution for more details. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Panasas company nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include "objlayout.h" - -#define NFSDBG_FACILITY NFSDBG_PNFS_LD -/* - * Create a objlayout layout structure for the given inode and return it. - */ -struct pnfs_layout_hdr * -objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) -{ - struct objlayout *objlay; - - objlay = kzalloc(sizeof(struct objlayout), gfp_flags); - if (!objlay) - return NULL; - spin_lock_init(&objlay->lock); - INIT_LIST_HEAD(&objlay->err_list); - dprintk("%s: Return %p\n", __func__, objlay); - return &objlay->pnfs_layout; -} - -/* - * Free an objlayout layout structure - */ -void -objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) -{ - struct objlayout *objlay = OBJLAYOUT(lo); - - dprintk("%s: objlay %p\n", __func__, objlay); - - WARN_ON(!list_empty(&objlay->err_list)); - kfree(objlay); -} - -/* - * Unmarshall layout and store it in pnfslay. - */ -struct pnfs_layout_segment * -objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, - struct nfs4_layoutget_res *lgr, - gfp_t gfp_flags) -{ - int status = -ENOMEM; - struct xdr_stream stream; - struct xdr_buf buf = { - .pages = lgr->layoutp->pages, - .page_len = lgr->layoutp->len, - .buflen = lgr->layoutp->len, - .len = lgr->layoutp->len, - }; - struct page *scratch; - struct pnfs_layout_segment *lseg; - - dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); - - scratch = alloc_page(gfp_flags); - if (!scratch) - goto err_nofree; - - xdr_init_decode(&stream, &buf, NULL); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); - - status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); - if (unlikely(status)) { - dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, - status); - goto err; - } - - __free_page(scratch); - - dprintk("%s: Return %p\n", __func__, lseg); - return lseg; - -err: - __free_page(scratch); -err_nofree: - dprintk("%s: Err Return=>%d\n", __func__, status); - return ERR_PTR(status); -} - -/* - * Free a layout segement - */ -void -objlayout_free_lseg(struct pnfs_layout_segment *lseg) -{ - dprintk("%s: freeing layout segment %p\n", __func__, lseg); - - if (unlikely(!lseg)) - return; - - objio_free_lseg(lseg); -} - -/* - * I/O Operations - */ -static inline u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end : NFS4_MAX_UINT64; -} - -static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, - struct page ***p_pages, unsigned *p_pgbase, - u64 offset, unsigned long count) -{ - u64 lseg_end_offset; - - BUG_ON(offset < lseg->pls_range.offset); - lseg_end_offset = end_offset(lseg->pls_range.offset, - lseg->pls_range.length); - BUG_ON(offset >= lseg_end_offset); - WARN_ON(offset + count > lseg_end_offset); - - if (*p_pgbase > PAGE_SIZE) { - dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase); - *p_pages += *p_pgbase >> PAGE_SHIFT; - *p_pgbase &= ~PAGE_MASK; - } -} - -/* - * I/O done common code - */ -static void -objlayout_iodone(struct objlayout_io_res *oir) -{ - if (likely(oir->status >= 0)) { - objio_free_result(oir); - } else { - struct objlayout *objlay = oir->objlay; - - spin_lock(&objlay->lock); - objlay->delta_space_valid = OBJ_DSU_INVALID; - list_add(&objlay->err_list, &oir->err_list); - spin_unlock(&objlay->lock); - } -} - -/* - * objlayout_io_set_result - Set an osd_error code on a specific osd comp. - * - * The @index component IO failed (error returned from target). Register - * the error for later reporting at layout-return. - */ -void -objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, - struct pnfs_osd_objid *pooid, int osd_error, - u64 offset, u64 length, bool is_write) -{ - struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index]; - - BUG_ON(index >= oir->num_comps); - if (osd_error) { - ioerr->oer_component = *pooid; - ioerr->oer_comp_offset = offset; - ioerr->oer_comp_length = length; - ioerr->oer_iswrite = is_write; - ioerr->oer_errno = osd_error; - - dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " - "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", - __func__, index, ioerr->oer_errno, - ioerr->oer_iswrite, - _DEVID_LO(&ioerr->oer_component.oid_device_id), - _DEVID_HI(&ioerr->oer_component.oid_device_id), - ioerr->oer_component.oid_partition_id, - ioerr->oer_component.oid_object_id, - ioerr->oer_comp_offset, - ioerr->oer_comp_length); - } else { - /* User need not call if no error is reported */ - ioerr->oer_errno = 0; - } -} - -/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). - * This is because the osd completion is called with ints-off from - * the block layer - */ -static void _rpc_read_complete(struct work_struct *work) -{ - struct rpc_task *task; - struct nfs_pgio_header *hdr; - - dprintk("%s enter\n", __func__); - task = container_of(work, struct rpc_task, u.tk_work); - hdr = container_of(task, struct nfs_pgio_header, task); - - pnfs_ld_read_done(hdr); -} - -void -objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) -{ - struct nfs_pgio_header *hdr = oir->rpcdata; - - oir->status = hdr->task.tk_status = status; - if (status >= 0) - hdr->res.count = status; - else - hdr->pnfs_error = status; - objlayout_iodone(oir); - /* must not use oir after this point */ - - dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, - status, hdr->res.eof, sync); - - if (sync) - pnfs_ld_read_done(hdr); - else { - INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete); - schedule_work(&hdr->task.u.tk_work); - } -} - -/* - * Perform sync or async reads. - */ -enum pnfs_try_status -objlayout_read_pagelist(struct nfs_pgio_header *hdr) -{ - struct inode *inode = hdr->inode; - loff_t offset = hdr->args.offset; - size_t count = hdr->args.count; - int err; - loff_t eof; - - eof = i_size_read(inode); - if (unlikely(offset + count > eof)) { - if (offset >= eof) { - err = 0; - hdr->res.count = 0; - hdr->res.eof = 1; - /*FIXME: do we need to call pnfs_ld_read_done() */ - goto out; - } - count = eof - offset; - } - - hdr->res.eof = (offset + count) >= eof; - _fix_verify_io_params(hdr->lseg, &hdr->args.pages, - &hdr->args.pgbase, - hdr->args.offset, hdr->args.count); - - dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n", - __func__, inode->i_ino, offset, count, hdr->res.eof); - - err = objio_read_pagelist(hdr); - out: - if (unlikely(err)) { - hdr->pnfs_error = err; - dprintk("%s: Returned Error %d\n", __func__, err); - return PNFS_NOT_ATTEMPTED; - } - return PNFS_ATTEMPTED; -} - -/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). - * This is because the osd completion is called with ints-off from - * the block layer - */ -static void _rpc_write_complete(struct work_struct *work) -{ - struct rpc_task *task; - struct nfs_pgio_header *hdr; - - dprintk("%s enter\n", __func__); - task = container_of(work, struct rpc_task, u.tk_work); - hdr = container_of(task, struct nfs_pgio_header, task); - - pnfs_ld_write_done(hdr); -} - -void -objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) -{ - struct nfs_pgio_header *hdr = oir->rpcdata; - - oir->status = hdr->task.tk_status = status; - if (status >= 0) { - hdr->res.count = status; - hdr->verf.committed = oir->committed; - } else { - hdr->pnfs_error = status; - } - objlayout_iodone(oir); - /* must not use oir after this point */ - - dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, - status, hdr->verf.committed, sync); - - if (sync) - pnfs_ld_write_done(hdr); - else { - INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete); - schedule_work(&hdr->task.u.tk_work); - } -} - -/* - * Perform sync or async writes. - */ -enum pnfs_try_status -objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how) -{ - int err; - - _fix_verify_io_params(hdr->lseg, &hdr->args.pages, - &hdr->args.pgbase, - hdr->args.offset, hdr->args.count); - - err = objio_write_pagelist(hdr, how); - if (unlikely(err)) { - hdr->pnfs_error = err; - dprintk("%s: Returned Error %d\n", __func__, err); - return PNFS_NOT_ATTEMPTED; - } - return PNFS_ATTEMPTED; -} - -void -objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, - struct xdr_stream *xdr, - const struct nfs4_layoutcommit_args *args) -{ - struct objlayout *objlay = OBJLAYOUT(pnfslay); - struct pnfs_osd_layoutupdate lou; - __be32 *start; - - dprintk("%s: Begin\n", __func__); - - spin_lock(&objlay->lock); - lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); - lou.dsu_delta = objlay->delta_space_used; - objlay->delta_space_used = 0; - objlay->delta_space_valid = OBJ_DSU_INIT; - lou.olu_ioerr_flag = !list_empty(&objlay->err_list); - spin_unlock(&objlay->lock); - - start = xdr_reserve_space(xdr, 4); - - BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); - - *start = cpu_to_be32((xdr->p - start - 1) * 4); - - dprintk("%s: Return delta_space_used %lld err %d\n", __func__, - lou.dsu_delta, lou.olu_ioerr_flag); -} - -static int -err_prio(u32 oer_errno) -{ - switch (oer_errno) { - case 0: - return 0; - - case PNFS_OSD_ERR_RESOURCE: - return OSD_ERR_PRI_RESOURCE; - case PNFS_OSD_ERR_BAD_CRED: - return OSD_ERR_PRI_BAD_CRED; - case PNFS_OSD_ERR_NO_ACCESS: - return OSD_ERR_PRI_NO_ACCESS; - case PNFS_OSD_ERR_UNREACHABLE: - return OSD_ERR_PRI_UNREACHABLE; - case PNFS_OSD_ERR_NOT_FOUND: - return OSD_ERR_PRI_NOT_FOUND; - case PNFS_OSD_ERR_NO_SPACE: - return OSD_ERR_PRI_NO_SPACE; - default: - WARN_ON(1); - /* fallthrough */ - case PNFS_OSD_ERR_EIO: - return OSD_ERR_PRI_EIO; - } -} - -static void -merge_ioerr(struct pnfs_osd_ioerr *dest_err, - const struct pnfs_osd_ioerr *src_err) -{ - u64 dest_end, src_end; - - if (!dest_err->oer_errno) { - *dest_err = *src_err; - /* accumulated device must be blank */ - memset(&dest_err->oer_component.oid_device_id, 0, - sizeof(dest_err->oer_component.oid_device_id)); - - return; - } - - if (dest_err->oer_component.oid_partition_id != - src_err->oer_component.oid_partition_id) - dest_err->oer_component.oid_partition_id = 0; - - if (dest_err->oer_component.oid_object_id != - src_err->oer_component.oid_object_id) - dest_err->oer_component.oid_object_id = 0; - - if (dest_err->oer_comp_offset > src_err->oer_comp_offset) - dest_err->oer_comp_offset = src_err->oer_comp_offset; - - dest_end = end_offset(dest_err->oer_comp_offset, - dest_err->oer_comp_length); - src_end = end_offset(src_err->oer_comp_offset, - src_err->oer_comp_length); - if (dest_end < src_end) - dest_end = src_end; - - dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; - - if ((src_err->oer_iswrite == dest_err->oer_iswrite) && - (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { - dest_err->oer_errno = src_err->oer_errno; - } else if (src_err->oer_iswrite) { - dest_err->oer_iswrite = true; - dest_err->oer_errno = src_err->oer_errno; - } -} - -static void -encode_accumulated_error(struct objlayout *objlay, __be32 *p) -{ - struct objlayout_io_res *oir, *tmp; - struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; - - list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { - unsigned i; - - for (i = 0; i < oir->num_comps; i++) { - struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; - - if (!ioerr->oer_errno) - continue; - - printk(KERN_ERR "NFS: %s: err[%d]: errno=%d " - "is_write=%d dev(%llx:%llx) par=0x%llx " - "obj=0x%llx offset=0x%llx length=0x%llx\n", - __func__, i, ioerr->oer_errno, - ioerr->oer_iswrite, - _DEVID_LO(&ioerr->oer_component.oid_device_id), - _DEVID_HI(&ioerr->oer_component.oid_device_id), - ioerr->oer_component.oid_partition_id, - ioerr->oer_component.oid_object_id, - ioerr->oer_comp_offset, - ioerr->oer_comp_length); - - merge_ioerr(&accumulated_err, ioerr); - } - list_del(&oir->err_list); - objio_free_result(oir); - } - - pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); -} - -void -objlayout_encode_layoutreturn(struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) -{ - struct pnfs_layout_hdr *pnfslay = args->layout; - struct objlayout *objlay = OBJLAYOUT(pnfslay); - struct objlayout_io_res *oir, *tmp; - __be32 *start; - - dprintk("%s: Begin\n", __func__); - start = xdr_reserve_space(xdr, 4); - BUG_ON(!start); - - spin_lock(&objlay->lock); - - list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { - __be32 *last_xdr = NULL, *p; - unsigned i; - int res = 0; - - for (i = 0; i < oir->num_comps; i++) { - struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; - - if (!ioerr->oer_errno) - continue; - - dprintk("%s: err[%d]: errno=%d is_write=%d " - "dev(%llx:%llx) par=0x%llx obj=0x%llx " - "offset=0x%llx length=0x%llx\n", - __func__, i, ioerr->oer_errno, - ioerr->oer_iswrite, - _DEVID_LO(&ioerr->oer_component.oid_device_id), - _DEVID_HI(&ioerr->oer_component.oid_device_id), - ioerr->oer_component.oid_partition_id, - ioerr->oer_component.oid_object_id, - ioerr->oer_comp_offset, - ioerr->oer_comp_length); - - p = pnfs_osd_xdr_ioerr_reserve_space(xdr); - if (unlikely(!p)) { - res = -E2BIG; - break; /* accumulated_error */ - } - - last_xdr = p; - pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]); - } - - /* TODO: use xdr_write_pages */ - if (unlikely(res)) { - /* no space for even one error descriptor */ - BUG_ON(!last_xdr); - - /* we've encountered a situation with lots and lots of - * errors and no space to encode them all. Use the last - * available slot to report the union of all the - * remaining errors. - */ - encode_accumulated_error(objlay, last_xdr); - goto loop_done; - } - list_del(&oir->err_list); - objio_free_result(oir); - } -loop_done: - spin_unlock(&objlay->lock); - - *start = cpu_to_be32((xdr->p - start - 1) * 4); - dprintk("%s: Return\n", __func__); -} - -enum { - OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, - OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, - OSD_LOGIN_UPCALL_PATHLEN = 256 -}; - -static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login"; - -module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog), - 0600); -MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program"); - -struct __auto_login { - char uri[OBJLAYOUT_MAX_URI_LEN]; - char osdname[OBJLAYOUT_MAX_OSDNAME_LEN]; - char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN]; -}; - -static int __objlayout_upcall(struct __auto_login *login) -{ - static char *envp[] = { "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL - }; - char *argv[8]; - int ret; - - if (unlikely(!osd_login_prog[0])) { - dprintk("%s: osd_login_prog is disabled\n", __func__); - return -EACCES; - } - - dprintk("%s uri: %s\n", __func__, login->uri); - dprintk("%s osdname %s\n", __func__, login->osdname); - dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex); - - argv[0] = (char *)osd_login_prog; - argv[1] = "-u"; - argv[2] = login->uri; - argv[3] = "-o"; - argv[4] = login->osdname; - argv[5] = "-s"; - argv[6] = login->systemid_hex; - argv[7] = NULL; - - ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - /* - * Disable the upcall mechanism if we're getting an ENOENT or - * EACCES error. The admin can re-enable it on the fly by using - * sysfs to set the objlayoutdriver.osd_login_prog module parameter once - * the problem has been fixed. - */ - if (ret == -ENOENT || ret == -EACCES) { - printk(KERN_ERR "PNFS-OBJ: %s was not found please set " - "objlayoutdriver.osd_login_prog kernel parameter!\n", - osd_login_prog); - osd_login_prog[0] = '\0'; - } - dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret); - - return ret; -} - -/* Assume dest is all zeros */ -static void __copy_nfsS_and_zero_terminate(struct nfs4_string s, - char *dest, int max_len, - const char *var_name) -{ - if (!s.len) - return; - - if (s.len >= max_len) { - pr_warn_ratelimited( - "objlayout_autologin: %s: s.len(%d) >= max_len(%d)", - var_name, s.len, max_len); - s.len = max_len - 1; /* space for null terminator */ - } - - memcpy(dest, s.data, s.len); -} - -/* Assume sysid is all zeros */ -static void _sysid_2_hex(struct nfs4_string s, - char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN]) -{ - int i; - char *cur; - - if (!s.len) - return; - - if (s.len != OSD_SYSTEMID_LEN) { - pr_warn_ratelimited( - "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN", - s.len); - if (s.len > OSD_SYSTEMID_LEN) - s.len = OSD_SYSTEMID_LEN; - } - - cur = sysid; - for (i = 0; i < s.len; i++) - cur = hex_byte_pack(cur, s.data[i]); -} - -int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr) -{ - int rc; - struct __auto_login login; - - if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len) - return -ENODEV; - - memset(&login, 0, sizeof(login)); - __copy_nfsS_and_zero_terminate( - deviceaddr->oda_targetaddr.ota_netaddr.r_addr, - login.uri, sizeof(login.uri), "URI"); - - __copy_nfsS_and_zero_terminate( - deviceaddr->oda_osdname, - login.osdname, sizeof(login.osdname), "OSDNAME"); - - _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex); - - rc = __objlayout_upcall(&login); - if (rc > 0) /* script returns positive values */ - rc = -ENODEV; - - return rc; -} diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h deleted file mode 100644 index fc94a5872ed417b94b89213a34ead3e2105f78ec..0000000000000000000000000000000000000000 --- a/fs/nfs/objlayout/objlayout.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Data types and function declerations for interfacing with the - * pNFS standard object layout driver. - * - * Copyright (C) 2007 Panasas Inc. [year of first publication] - * All rights reserved. - * - * Benny Halevy - * Boaz Harrosh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * See the file COPYING included with this distribution for more details. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Panasas company nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _OBJLAYOUT_H -#define _OBJLAYOUT_H - -#include -#include -#include "../pnfs.h" - -/* - * per-inode layout - */ -struct objlayout { - struct pnfs_layout_hdr pnfs_layout; - - /* for layout_commit */ - enum osd_delta_space_valid_enum { - OBJ_DSU_INIT = 0, - OBJ_DSU_VALID, - OBJ_DSU_INVALID, - } delta_space_valid; - s64 delta_space_used; /* consumed by write ops */ - - /* for layout_return */ - spinlock_t lock; - struct list_head err_list; -}; - -static inline struct objlayout * -OBJLAYOUT(struct pnfs_layout_hdr *lo) -{ - return container_of(lo, struct objlayout, pnfs_layout); -} - -/* - * per-I/O operation state - * embedded in objects provider io_state data structure - */ -struct objlayout_io_res { - struct objlayout *objlay; - - void *rpcdata; - int status; /* res */ - int committed; /* res */ - - /* Error reporting (layout_return) */ - struct list_head err_list; - unsigned num_comps; - /* Pointer to array of error descriptors of size num_comps. - * It should contain as many entries as devices in the osd_layout - * that participate in the I/O. It is up to the io_engine to allocate - * needed space and set num_comps. - */ - struct pnfs_osd_ioerr *ioerrs; -}; - -static inline -void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps, - struct pnfs_osd_ioerr *ioerrs, void *rpcdata, - struct pnfs_layout_hdr *pnfs_layout_type) -{ - oir->objlay = OBJLAYOUT(pnfs_layout_type); - oir->rpcdata = rpcdata; - INIT_LIST_HEAD(&oir->err_list); - oir->num_comps = num_comps; - oir->ioerrs = ioerrs; -} - -/* - * Raid engine I/O API - */ -extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, - struct pnfs_layout_hdr *pnfslay, - struct pnfs_layout_range *range, - struct xdr_stream *xdr, - gfp_t gfp_flags); -extern void objio_free_lseg(struct pnfs_layout_segment *lseg); - -/* objio_free_result will free these @oir structs received from - * objlayout_{read,write}_done - */ -extern void objio_free_result(struct objlayout_io_res *oir); - -extern int objio_read_pagelist(struct nfs_pgio_header *rdata); -extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how); - -/* - * callback API - */ -extern void objlayout_io_set_result(struct objlayout_io_res *oir, - unsigned index, struct pnfs_osd_objid *pooid, - int osd_error, u64 offset, u64 length, bool is_write); - -static inline void -objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used) -{ - /* If one of the I/Os errored out and the delta_space_used was - * invalid we render the complete report as invalid. Protocol mandate - * the DSU be accurate or not reported. - */ - spin_lock(&objlay->lock); - if (objlay->delta_space_valid != OBJ_DSU_INVALID) { - objlay->delta_space_valid = OBJ_DSU_VALID; - objlay->delta_space_used += space_used; - } - spin_unlock(&objlay->lock); -} - -extern void objlayout_read_done(struct objlayout_io_res *oir, - ssize_t status, bool sync); -extern void objlayout_write_done(struct objlayout_io_res *oir, - ssize_t status, bool sync); - -/* - * exported generic objects function vectors - */ - -extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); -extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); - -extern struct pnfs_layout_segment *objlayout_alloc_lseg( - struct pnfs_layout_hdr *, - struct nfs4_layoutget_res *, - gfp_t gfp_flags); -extern void objlayout_free_lseg(struct pnfs_layout_segment *); - -extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_pgio_header *); - -extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_pgio_header *, - int how); - -extern void objlayout_encode_layoutcommit( - struct pnfs_layout_hdr *, - struct xdr_stream *, - const struct nfs4_layoutcommit_args *); - -extern void objlayout_encode_layoutreturn( - struct xdr_stream *, - const struct nfs4_layoutreturn_args *); - -extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr); - -#endif /* _OBJLAYOUT_H */ diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c deleted file mode 100644 index f093c7ec983bbc2bb0b2cb21d0aa8c1eaba411d8..0000000000000000000000000000000000000000 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Object-Based pNFS Layout XDR layer - * - * Copyright (C) 2007 Panasas Inc. [year of first publication] - * All rights reserved. - * - * Benny Halevy - * Boaz Harrosh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * See the file COPYING included with this distribution for more details. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Panasas company nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#define NFSDBG_FACILITY NFSDBG_PNFS_LD - -/* - * The following implementation is based on RFC5664 - */ - -/* - * struct pnfs_osd_objid { - * struct nfs4_deviceid oid_device_id; - * u64 oid_partition_id; - * u64 oid_object_id; - * }; // xdr size 32 bytes - */ -static __be32 * -_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid) -{ - p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data, - sizeof(objid->oid_device_id.data)); - - p = xdr_decode_hyper(p, &objid->oid_partition_id); - p = xdr_decode_hyper(p, &objid->oid_object_id); - return p; -} -/* - * struct pnfs_osd_opaque_cred { - * u32 cred_len; - * void *cred; - * }; // xdr size [variable] - * The return pointers are from the xdr buffer - */ -static int -_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred, - struct xdr_stream *xdr) -{ - __be32 *p = xdr_inline_decode(xdr, 1); - - if (!p) - return -EINVAL; - - opaque_cred->cred_len = be32_to_cpu(*p++); - - p = xdr_inline_decode(xdr, opaque_cred->cred_len); - if (!p) - return -EINVAL; - - opaque_cred->cred = p; - return 0; -} - -/* - * struct pnfs_osd_object_cred { - * struct pnfs_osd_objid oc_object_id; - * u32 oc_osd_version; - * u32 oc_cap_key_sec; - * struct pnfs_osd_opaque_cred oc_cap_key - * struct pnfs_osd_opaque_cred oc_cap; - * }; // xdr size 32 + 4 + 4 + [variable] + [variable] - */ -static int -_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp, - struct xdr_stream *xdr) -{ - __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4); - int ret; - - if (!p) - return -EIO; - - p = _osd_xdr_decode_objid(p, &comp->oc_object_id); - comp->oc_osd_version = be32_to_cpup(p++); - comp->oc_cap_key_sec = be32_to_cpup(p); - - ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr); - if (unlikely(ret)) - return ret; - - ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr); - return ret; -} - -/* - * struct pnfs_osd_data_map { - * u32 odm_num_comps; - * u64 odm_stripe_unit; - * u32 odm_group_width; - * u32 odm_group_depth; - * u32 odm_mirror_cnt; - * u32 odm_raid_algorithm; - * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4 - */ -static inline int -_osd_data_map_xdr_sz(void) -{ - return 4 + 8 + 4 + 4 + 4 + 4; -} - -static __be32 * -_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map) -{ - data_map->odm_num_comps = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &data_map->odm_stripe_unit); - data_map->odm_group_width = be32_to_cpup(p++); - data_map->odm_group_depth = be32_to_cpup(p++); - data_map->odm_mirror_cnt = be32_to_cpup(p++); - data_map->odm_raid_algorithm = be32_to_cpup(p++); - dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u " - "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n", - __func__, - data_map->odm_num_comps, - (unsigned long long)data_map->odm_stripe_unit, - data_map->odm_group_width, - data_map->odm_group_depth, - data_map->odm_mirror_cnt, - data_map->odm_raid_algorithm); - return p; -} - -int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, - struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr) -{ - __be32 *p; - - memset(iter, 0, sizeof(*iter)); - - p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4); - if (unlikely(!p)) - return -EINVAL; - - p = _osd_xdr_decode_data_map(p, &layout->olo_map); - layout->olo_comps_index = be32_to_cpup(p++); - layout->olo_num_comps = be32_to_cpup(p++); - dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, - layout->olo_comps_index, layout->olo_num_comps); - - iter->total_comps = layout->olo_num_comps; - return 0; -} - -bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, - struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, - int *err) -{ - BUG_ON(iter->decoded_comps > iter->total_comps); - if (iter->decoded_comps == iter->total_comps) - return false; - - *err = _osd_xdr_decode_object_cred(comp, xdr); - if (unlikely(*err)) { - dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d " - "total_comps=%d\n", __func__, *err, - iter->decoded_comps, iter->total_comps); - return false; /* stop the loop */ - } - dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx " - "key_len=%u cap_len=%u\n", - __func__, - _DEVID_LO(&comp->oc_object_id.oid_device_id), - _DEVID_HI(&comp->oc_object_id.oid_device_id), - comp->oc_object_id.oid_partition_id, - comp->oc_object_id.oid_object_id, - comp->oc_cap_key.cred_len, comp->oc_cap.cred_len); - - iter->decoded_comps++; - return true; -} - -/* - * Get Device Information Decoding - * - * Note: since Device Information is currently done synchronously, all - * variable strings fields are left inside the rpc buffer and are only - * pointed to by the pnfs_osd_deviceaddr members. So the read buffer - * should not be freed while the returned information is in use. - */ -/* - *struct nfs4_string { - * unsigned int len; - * char *data; - *}; // size [variable] - * NOTE: Returned string points to inside the XDR buffer - */ -static __be32 * -__read_u8_opaque(__be32 *p, struct nfs4_string *str) -{ - str->len = be32_to_cpup(p++); - str->data = (char *)p; - - p += XDR_QUADLEN(str->len); - return p; -} - -/* - * struct pnfs_osd_targetid { - * u32 oti_type; - * struct nfs4_string oti_scsi_device_id; - * };// size 4 + [variable] - */ -static __be32 * -__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid) -{ - u32 oti_type; - - oti_type = be32_to_cpup(p++); - targetid->oti_type = oti_type; - - switch (oti_type) { - case OBJ_TARGET_SCSI_NAME: - case OBJ_TARGET_SCSI_DEVICE_ID: - p = __read_u8_opaque(p, &targetid->oti_scsi_device_id); - } - - return p; -} - -/* - * struct pnfs_osd_net_addr { - * struct nfs4_string r_netid; - * struct nfs4_string r_addr; - * }; - */ -static __be32 * -__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr) -{ - p = __read_u8_opaque(p, &netaddr->r_netid); - p = __read_u8_opaque(p, &netaddr->r_addr); - - return p; -} - -/* - * struct pnfs_osd_targetaddr { - * u32 ota_available; - * struct pnfs_osd_net_addr ota_netaddr; - * }; - */ -static __be32 * -__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr) -{ - u32 ota_available; - - ota_available = be32_to_cpup(p++); - targetaddr->ota_available = ota_available; - - if (ota_available) - p = __read_net_addr(p, &targetaddr->ota_netaddr); - - - return p; -} - -/* - * struct pnfs_osd_deviceaddr { - * struct pnfs_osd_targetid oda_targetid; - * struct pnfs_osd_targetaddr oda_targetaddr; - * u8 oda_lun[8]; - * struct nfs4_string oda_systemid; - * struct pnfs_osd_object_cred oda_root_obj_cred; - * struct nfs4_string oda_osdname; - * }; - */ - -/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does - * not have an xdr_stream - */ -static __be32 * -__read_opaque_cred(__be32 *p, - struct pnfs_osd_opaque_cred *opaque_cred) -{ - opaque_cred->cred_len = be32_to_cpu(*p++); - opaque_cred->cred = p; - return p + XDR_QUADLEN(opaque_cred->cred_len); -} - -static __be32 * -__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp) -{ - p = _osd_xdr_decode_objid(p, &comp->oc_object_id); - comp->oc_osd_version = be32_to_cpup(p++); - comp->oc_cap_key_sec = be32_to_cpup(p++); - - p = __read_opaque_cred(p, &comp->oc_cap_key); - p = __read_opaque_cred(p, &comp->oc_cap); - return p; -} - -void pnfs_osd_xdr_decode_deviceaddr( - struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p) -{ - p = __read_targetid(p, &deviceaddr->oda_targetid); - - p = __read_targetaddr(p, &deviceaddr->oda_targetaddr); - - p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun, - sizeof(deviceaddr->oda_lun)); - - p = __read_u8_opaque(p, &deviceaddr->oda_systemid); - - p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred); - - p = __read_u8_opaque(p, &deviceaddr->oda_osdname); - - /* libosd likes this terminated in dbg. It's last, so no problems */ - deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0; -} - -/* - * struct pnfs_osd_layoutupdate { - * u32 dsu_valid; - * s64 dsu_delta; - * u32 olu_ioerr_flag; - * }; xdr size 4 + 8 + 4 - */ -int -pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, - struct pnfs_osd_layoutupdate *lou) -{ - __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4); - - if (!p) - return -E2BIG; - - *p++ = cpu_to_be32(lou->dsu_valid); - if (lou->dsu_valid) - p = xdr_encode_hyper(p, lou->dsu_delta); - *p++ = cpu_to_be32(lou->olu_ioerr_flag); - return 0; -} - -/* - * struct pnfs_osd_objid { - * struct nfs4_deviceid oid_device_id; - * u64 oid_partition_id; - * u64 oid_object_id; - * }; // xdr size 32 bytes - */ -static inline __be32 * -pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id) -{ - p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data, - sizeof(object_id->oid_device_id.data)); - p = xdr_encode_hyper(p, object_id->oid_partition_id); - p = xdr_encode_hyper(p, object_id->oid_object_id); - - return p; -} - -/* - * struct pnfs_osd_ioerr { - * struct pnfs_osd_objid oer_component; - * u64 oer_comp_offset; - * u64 oer_comp_length; - * u32 oer_iswrite; - * u32 oer_errno; - * }; // xdr size 32 + 24 bytes - */ -void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr) -{ - p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component); - p = xdr_encode_hyper(p, ioerr->oer_comp_offset); - p = xdr_encode_hyper(p, ioerr->oer_comp_length); - *p++ = cpu_to_be32(ioerr->oer_iswrite); - *p = cpu_to_be32(ioerr->oer_errno); -} - -__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, 32 + 24); - if (unlikely(!p)) - dprintk("%s: out of xdr space\n", __func__); - - return p; -} diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6e629b856a00f2ebe52b4ba6badb356d49f380e6..ad92b401326c69a154b514b11764a2bc4a04a1cb 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -29,19 +29,6 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; -static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) -{ - p->npages = pagecount; - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; - else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); - if (!p->pagevec) - p->npages = 0; - } - return p->pagevec != NULL; -} - struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) { @@ -115,6 +102,35 @@ nfs_iocounter_wait(struct nfs_lock_context *l_ctx) TASK_KILLABLE); } +/** + * nfs_async_iocounter_wait - wait on a rpc_waitqueue for I/O + * to complete + * @task: the rpc_task that should wait + * @l_ctx: nfs_lock_context with io_counter to check + * + * Returns true if there is outstanding I/O to wait on and the + * task has been put to sleep. + */ +bool +nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) +{ + struct inode *inode = d_inode(l_ctx->open_context->dentry); + bool ret = false; + + if (atomic_read(&l_ctx->io_count) > 0) { + rpc_sleep_on(&NFS_SERVER(inode)->uoc_rpcwaitq, task, NULL); + ret = true; + } + + if (atomic_read(&l_ctx->io_count) == 0) { + rpc_wake_up_queued_task(&NFS_SERVER(inode)->uoc_rpcwaitq, task); + ret = false; + } + + return ret; +} +EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); + /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked @@ -398,8 +414,11 @@ static void nfs_clear_request(struct nfs_page *req) req->wb_page = NULL; } if (l_ctx != NULL) { - if (atomic_dec_and_test(&l_ctx->io_count)) + if (atomic_dec_and_test(&l_ctx->io_count)) { wake_up_atomic_t(&l_ctx->io_count); + if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags)) + rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq); + } nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } @@ -677,7 +696,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int io_flags) + int io_flags, + gfp_t gfp_flags) { struct nfs_pgio_mirror *new; int i; @@ -701,7 +721,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, /* until we have a request, we don't have an lseg and no * idea how many mirrors there will be */ new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, - sizeof(struct nfs_pgio_mirror), GFP_KERNEL); + sizeof(struct nfs_pgio_mirror), gfp_flags); desc->pg_mirrors_dynamic = new; desc->pg_mirrors = new; @@ -754,13 +774,24 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, *last_page; struct list_head *head = &mirror->pg_list; struct nfs_commit_info cinfo; + struct nfs_page_array *pg_array = &hdr->page_array; unsigned int pagecount, pageused; + gfp_t gfp_flags = GFP_KERNEL; pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); - if (!nfs_pgarray_set(&hdr->page_array, pagecount)) { - nfs_pgio_error(hdr); - desc->pg_error = -ENOMEM; - return desc->pg_error; + + if (pagecount <= ARRAY_SIZE(pg_array->page_array)) + pg_array->pagevec = pg_array->page_array; + else { + if (hdr->rw_mode == FMODE_WRITE) + gfp_flags = GFP_NOIO; + pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags); + if (!pg_array->pagevec) { + pg_array->npages = 0; + nfs_pgio_error(hdr); + desc->pg_error = -ENOMEM; + return desc->pg_error; + } } nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); @@ -1256,8 +1287,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) mirror = &desc->pg_mirrors[midx]; if (!list_empty(&mirror->pg_list)) { prev = nfs_list_entry(mirror->pg_list.prev); - if (index != prev->wb_index + 1) - nfs_pageio_complete_mirror(desc, midx); + if (index != prev->wb_index + 1) { + nfs_pageio_complete(desc); + break; + } } } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index dd042498ce7c67df24b4919c2e98dc8f48db13ca..c383d0913b54c90fb96020a62faaf989a5d946b3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -322,9 +322,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, static void pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) { + struct pnfs_layout_segment *lseg; lo->plh_return_iomode = 0; lo->plh_return_seq = 0; clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); + list_for_each_entry(lseg, &lo->plh_segs, pls_list) { + if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + continue; + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); + } } static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) @@ -367,9 +373,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, *next; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - pnfs_clear_layoutreturn_info(lo); list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) pnfs_clear_lseg_state(lseg, lseg_list); + pnfs_clear_layoutreturn_info(lo); pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) @@ -563,7 +569,6 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) } } } -EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); /* * is l2 fully contained in l1? @@ -728,6 +733,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); + nfs_commit_inode(&nfsi->vfs_inode, 0); pnfs_put_layout_hdr(lo); } else spin_unlock(&nfsi->vfs_inode.i_lock); @@ -1209,7 +1215,6 @@ _pnfs_return_layout(struct inode *ino) dprintk("<-- %s status: %d\n", __func__, status); return status; } -EXPORT_SYMBOL_GPL(_pnfs_return_layout); int pnfs_commit_and_return_layout(struct inode *inode) @@ -1991,6 +1996,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + if (!pnfs_layout_is_valid(lo)) + nfs_commit_inode(ino, 0); return ERR_PTR(-EAGAIN); } @@ -2051,9 +2058,11 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, bool return_now = false; spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo)) { + spin_unlock(&inode->i_lock); + return; + } pnfs_set_plh_return_info(lo, range.iomode, 0); - /* Block LAYOUTGET */ - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() @@ -2074,11 +2083,37 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); +void +pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) +{ + if (pgio->pg_lseg == NULL || + test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags)) + return; + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); + +/* + * Check for any intersection between the request and the pgio->pg_lseg, + * and if none, put this pgio->pg_lseg away. + */ +static void +pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + } +} + void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 rd_size = req->wb_bytes; + pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); @@ -2109,6 +2144,8 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { + pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -2169,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); - WARN_ON_ONCE(req_start >= seg_end); + /* start of request is past the last byte of this segment */ - if (req_start >= seg_end) { - /* reference the new lseg */ - if (pgio->pg_ops->pg_cleanup) - pgio->pg_ops->pg_cleanup(pgio); - if (pgio->pg_ops->pg_init) - pgio->pg_ops->pg_init(pgio, req); + if (req_start >= seg_end) return 0; - } /* adjust 'size' iff there are fewer bytes left in the * segment than what nfs_generic_pg_test returned */ @@ -2277,8 +2308,20 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, enum pnfs_try_status trypnfs; trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) + switch (trypnfs) { + case PNFS_NOT_ATTEMPTED: pnfs_write_through_mds(desc, hdr); + case PNFS_ATTEMPTED: + break; + case PNFS_TRY_AGAIN: + /* cleanup hdr and prepare to redo pnfs */ + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); + list_splice_init(&hdr->pages, &mirror->pg_list); + mirror->pg_recoalesce = 1; + } + hdr->mds_ops->rpc_release(hdr); + } } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) @@ -2408,10 +2451,20 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) enum pnfs_try_status trypnfs; trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); - if (trypnfs == PNFS_TRY_AGAIN) - pnfs_read_resend_pnfs(hdr); - if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) + switch (trypnfs) { + case PNFS_NOT_ATTEMPTED: pnfs_read_through_mds(desc, hdr); + case PNFS_ATTEMPTED: + break; + case PNFS_TRY_AGAIN: + /* cleanup hdr and prepare to redo pnfs */ + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); + list_splice_init(&hdr->pages, &mirror->pg_list); + mirror->pg_recoalesce = 1; + } + hdr->mds_ops->rpc_release(hdr); + } } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 590e1e35781f0b737b5b277d76ab56092f8e3f3b..99731e3e332f3ec32eec26cca47556193dcf68fe 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -173,14 +173,9 @@ struct pnfs_layoutdriver_type { gfp_t gfp_flags); int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *); - void (*encode_layoutreturn) (struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args); void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); - void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, - struct xdr_stream *xdr, - const struct nfs4_layoutcommit_args *args); int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); }; @@ -239,6 +234,7 @@ void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); void unset_pnfs_layoutdriver(struct nfs_server *); +void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, @@ -597,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2); } +static inline bool +pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req) +{ + u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length); + u64 req_last = req_offset(req) + req->wb_bytes; + + return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last, + req_offset(req), req_last); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7250b95549ecc73bd1dbdae9ec909aac64f93a49..d40755a0984bbb0942e96aee388ed50fe7629a6e 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -217,7 +217,14 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) continue; - data = nfs_commitdata_alloc(); + /* + * If the layout segment is invalid, then let + * pnfs_generic_retry_commit() clean up the bucket. + */ + if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) && + !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags)) + break; + data = nfs_commitdata_alloc(false); if (!data) break; data->ds_commit_index = i; @@ -283,16 +290,10 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, unsigned int nreq = 0; if (!list_empty(mds_pages)) { - data = nfs_commitdata_alloc(); - if (data != NULL) { - data->ds_commit_index = -1; - list_add(&data->pages, &list); - nreq++; - } else { - nfs_retry_commit(mds_pages, NULL, cinfo, 0); - pnfs_generic_retry_commit(cinfo, 0); - return -ENOMEM; - } + data = nfs_commitdata_alloc(true); + data->ds_commit_index = -1; + list_add(&data->pages, &list); + nreq++; } nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); @@ -619,7 +620,6 @@ void nfs4_pnfs_v3_ds_connect_unload(void) get_v3_ds_connect = NULL; } } -EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b7bca83039895b5692163d2a119f29c126ea5d83..9872cf676a50a7cfe945bfc5294602b4f67b5673 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -638,7 +638,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = file_inode(filp); - return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); + return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL); } /* Helper functions for NFS lock bounds checking */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index defc9233e9858c43a9438b2cf918a899b7f56afa..a8421d9dab6a125c4eb6b7ad9f074d8ce91a5a99 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -35,7 +35,11 @@ static struct kmem_cache *nfs_rdata_cachep; static struct nfs_pgio_header *nfs_readhdr_alloc(void) { - return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); + struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); + + if (p) + p->rw_mode = FMODE_READ; + return p; } static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) @@ -64,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, pg_ops = server->pnfs_curr_ld->pg_read_ops; #endif nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, - server->rsize, 0); + server->rsize, 0, GFP_KERNEL); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); @@ -451,7 +455,6 @@ void nfs_destroy_readpagecache(void) } static const struct nfs_rw_ops nfs_rw_read_ops = { - .rw_mode = FMODE_READ, .rw_alloc_header = nfs_readhdr_alloc, .rw_free_header = nfs_readhdr_free, .rw_done = nfs_readpage_done, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 54e0f9f2dd94903d2d6d96de46287ebc31fe1b6e..eceb4eabb064953f830d1bf97a5d0daeecdd27f0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock */ -inline void nfs_initialise_sb(struct super_block *sb) +static void nfs_initialise_sb(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2315,8 +2315,6 @@ inline void nfs_initialise_sb(struct super_block *sb) sb->s_blocksize = nfs_block_bits(server->wsize, &sb->s_blocksize_bits); - sb->s_bdi = &server->backing_dev_info; - nfs_super_set_maxbytes(sb, server->maxfilesize); } @@ -2350,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock */ -void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_clone_super(struct super_block *sb, + struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); @@ -2522,11 +2521,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, } #endif -static int nfs_bdi_register(struct nfs_server *server) -{ - return bdi_register_dev(&server->backing_dev_info, server->s_dev); -} - int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) { @@ -2594,11 +2588,13 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, nfs_free_server(server); server = NULL; } else { - error = nfs_bdi_register(server); + error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev), + MINOR(server->s_dev)); if (error) { mntroot = ERR_PTR(error); goto error_splat_super; } + s->s_bdi->ra_pages = server->rpages * NFS_MAX_READAHEAD; server->super = s; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index abb2c8a3be42e4755f747c62a1cec5466f13ee77..db7ba542559e7def882448708bb5dc0d530a3416 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -60,14 +60,28 @@ static mempool_t *nfs_wdata_mempool; static struct kmem_cache *nfs_cdata_cachep; static mempool_t *nfs_commit_mempool; -struct nfs_commit_data *nfs_commitdata_alloc(void) +struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) { - struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); + struct nfs_commit_data *p; - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); + if (never_fail) + p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); + else { + /* It is OK to do some reclaim, not no safe to wait + * for anything to be returned to the pool. + * mempool_alloc() cannot handle that particular combination, + * so we need two separate attempts. + */ + p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); + if (!p) + p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | + __GFP_NOWARN | __GFP_NORETRY); + if (!p) + return NULL; } + + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); return p; } EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); @@ -82,8 +96,10 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void) { struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); - if (p) + if (p) { memset(p, 0, sizeof(*p)); + p->rw_mode = FMODE_WRITE; + } return p; } @@ -263,16 +279,15 @@ int nfs_congestion_kb; static void nfs_set_page_writeback(struct page *page) { - struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host); + struct inode *inode = page_file_mapping(page)->host; + struct nfs_server *nfss = NFS_SERVER(inode); int ret = test_set_page_writeback(page); WARN_ON_ONCE(ret != 0); if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) { - set_bdi_congested(&nfss->backing_dev_info, - BLK_RW_ASYNC); - } + NFS_CONGESTION_ON_THRESH) + set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } static void nfs_end_page_writeback(struct nfs_page *req) @@ -285,7 +300,7 @@ static void nfs_end_page_writeback(struct nfs_page *req) end_page_writeback(req->wb_page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); + clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } @@ -548,9 +563,21 @@ static void nfs_write_error_remove_page(struct nfs_page *req) { nfs_unlock_request(req); nfs_end_page_writeback(req); - nfs_release_request(req); generic_error_remove_page(page_file_mapping(req->wb_page), req->wb_page); + nfs_release_request(req); +} + +static bool +nfs_error_is_fatal_on_server(int err) +{ + switch (err) { + case 0: + case -ERESTARTSYS: + case -EINTR: + return false; + } + return nfs_error_is_fatal(err); } /* @@ -558,8 +585,7 @@ static void nfs_write_error_remove_page(struct nfs_page *req) * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page, bool nonblock, - bool launder) + struct page *page, bool nonblock) { struct nfs_page *req; int ret = 0; @@ -575,19 +601,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); ret = 0; + /* If there is a fatal error that covers this write, just exit */ + if (nfs_error_is_fatal_on_server(req->wb_context->error)) + goto out_launder; + if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* - * Remove the problematic req upon fatal errors - * in launder case, while other dirty pages can - * still be around until they get flushed. + * Remove the problematic req upon fatal errors on the server */ if (nfs_error_is_fatal(ret)) { nfs_context_set_write_error(req->wb_context, ret); - if (launder) { - nfs_write_error_remove_page(req); - goto out; - } + if (nfs_error_is_fatal_on_server(ret)) + goto out_launder; } nfs_redirty_request(req); ret = -EAGAIN; @@ -596,16 +622,18 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, NFSIOS_WRITEPAGES, 1); out: return ret; +out_launder: + nfs_write_error_remove_page(req); + return ret; } static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, - struct nfs_pageio_descriptor *pgio, bool launder) + struct nfs_pageio_descriptor *pgio) { int ret; nfs_pageio_cond_complete(pgio, page_index(page)); - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE, - launder); + ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); ret = 0; @@ -617,8 +645,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, * Write an mmapped page to the server. */ static int nfs_writepage_locked(struct page *page, - struct writeback_control *wbc, - bool launder) + struct writeback_control *wbc) { struct nfs_pageio_descriptor pgio; struct inode *inode = page_file_mapping(page)->host; @@ -627,7 +654,7 @@ static int nfs_writepage_locked(struct page *page, nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, 0, false, &nfs_async_write_completion_ops); - err = nfs_do_writepage(page, wbc, &pgio, launder); + err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) return err; @@ -640,7 +667,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) { int ret; - ret = nfs_writepage_locked(page, wbc, false); + ret = nfs_writepage_locked(page, wbc); unlock_page(page); return ret; } @@ -649,7 +676,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * { int ret; - ret = nfs_do_writepage(page, wbc, data, false); + ret = nfs_do_writepage(page, wbc, data); unlock_page(page); return ret; } @@ -1368,7 +1395,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, pg_ops = server->pnfs_curr_ld->pg_write_ops; #endif nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, - server->wsize, ioflags); + server->wsize, ioflags, GFP_NOIO); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); @@ -1705,50 +1732,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, if (list_empty(head)) return 0; - data = nfs_commitdata_alloc(); - - if (!data) - goto out_bad; + data = nfs_commitdata_alloc(true); /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, 0); - out_bad: - nfs_retry_commit(head, NULL, cinfo, 0); - return -ENOMEM; -} - -int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf) -{ - struct inode *inode = file_inode(file); - struct nfs_open_context *open; - struct nfs_commit_info cinfo; - struct nfs_page *req; - int ret; - - open = get_nfs_open_context(nfs_file_open_context(file)); - req = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode)); - if (IS_ERR(req)) { - ret = PTR_ERR(req); - goto out_put; - } - - nfs_init_cinfo_from_inode(&cinfo, inode); - - memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier)); - nfs_request_add_commit_list(req, &cinfo); - ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (ret > 0) - ret = 0; - - nfs_free_request(req); -out_put: - put_nfs_open_context(open); - return ret; } -EXPORT_SYMBOL_GPL(nfs_commit_file); /* * COMMIT call returned @@ -1808,7 +1799,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) } nfss = NFS_SERVER(data->inode); if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) - clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); + clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC); nfs_init_cinfo(&cinfo, data->inode, data->dreq); nfs_commit_end(cinfo.mds); @@ -1986,7 +1977,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) /* * Write back all requests on one page - we do this before reading it. */ -int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) +int nfs_wb_page(struct inode *inode, struct page *page) { loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1); @@ -2003,7 +1994,7 @@ int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) for (;;) { wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc, launder); + ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; continue; @@ -2108,7 +2099,6 @@ void nfs_destroy_writepagecache(void) } static const struct nfs_rw_ops nfs_rw_write_ops = { - .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, .rw_done = nfs_writeback_done, diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 92b4b41d19d2a2e23b469ce993e3a9f159f5f578..fb5213afc854e2c28edafc238f374a5a93c01d9f 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -242,10 +242,11 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, req->cmd[4] = bufflen & 0xff; req->cmd_len = COMMAND_SIZE(INQUIRY); - error = blk_execute_rq(rq->q, NULL, rq, 1); - if (error) { + blk_execute_rq(rq->q, NULL, rq, 1); + if (req->result) { pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", - rq->errors); + req->result); + error = -EIO; goto out_put_request; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index dba2ff8eaa68e3365deac3d61df3da5641099d9e..452334694a5d1f37cc480e5d1cf2873c4246019d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -358,6 +358,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); + struct kvec *head = rqstp->rq_arg.head; + struct kvec *tail = rqstp->rq_arg.tail; p = decode_fh(p, &args->fh); if (!p) @@ -367,6 +369,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->count = ntohl(*p++); args->stable = ntohl(*p++); len = args->len = ntohl(*p++); + if ((void *)p > head->iov_base + head->iov_len) + return 0; /* * The count must equal the amount of data passed. */ @@ -377,9 +381,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - + rqstp->rq_arg.tail[0].iov_len - hdr; + hdr = (void*)p - head->iov_base; + dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr; /* * Round the length of the data which was specified up to * the next multiple of XDR units and then compare that @@ -396,7 +399,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, len = args->len = max_blocksize; } rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; @@ -471,6 +474,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, /* first copy and check from the first page */ old = (char*)p; vec = &rqstp->rq_arg.head[0]; + if ((void *)old > vec->iov_base + vec->iov_len) + return 0; avail = vec->iov_len - (old - (char*)vec->iov_base); while (len && avail && *old) { *new++ = *old++; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index cbeeda1e94a2fbbba61e2adeeb4f9ba89287eaf9..dadb3bf305b22f352a3f91a2df06b30284b4891c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1259,7 +1259,8 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) return NULL; } - if (!(exp->ex_layout_types & (1 << layout_type))) { + if (layout_type >= LAYOUT_TYPE_MAX || + !(exp->ex_layout_types & (1 << layout_type))) { dprintk("%s: layout type %d not supported\n", __func__, layout_type); return NULL; @@ -1768,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, opdesc->op_get_currentstateid(cstate, &op->u); op->status = opdesc->op_func(rqstp, cstate, &op->u); + /* Only from SEQUENCE */ + if (cstate->status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } if (!op->status) { if (opdesc->op_set_currentstateid) opdesc->op_set_currentstateid(cstate, &op->u); @@ -1778,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } - encode_op: - /* Only from SEQUENCE */ - if (cstate->status == nfserr_replay_cache) { - dprintk("%s NFS4.1 replay from cache\n", __func__); - status = op->status; - goto out; - } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; nfsd4_encode_replay(&resp->xdr, op); @@ -2489,7 +2489,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) { - if (op->opnum == OP_ILLEGAL) + if (op->opnum == OP_ILLEGAL || op->status == nfserr_notsupp) return op_encode_hdr_size * sizeof(__be32); BUG_ON(OPDESC(op)->op_rsize_bop == NULL); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e9ef50addddb4489534bc07f138cd8c321d9193d..22002fb75a1827f2ca08bd1dcf23a775669fdd14 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1912,28 +1912,15 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) target->cl_clientid.cl_id = source->cl_clientid.cl_id; } -int strdup_if_nonnull(char **target, char *source) -{ - if (source) { - *target = kstrdup(source, GFP_KERNEL); - if (!*target) - return -ENOMEM; - } else - *target = NULL; - return 0; -} - static int copy_cred(struct svc_cred *target, struct svc_cred *source) { - int ret; + target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL); + target->cr_raw_principal = kstrdup(source->cr_raw_principal, + GFP_KERNEL); + if ((source->cr_principal && ! target->cr_principal) || + (source->cr_raw_principal && ! target->cr_raw_principal)) + return -ENOMEM; - ret = strdup_if_nonnull(&target->cr_principal, source->cr_principal); - if (ret) - return ret; - ret = strdup_if_nonnull(&target->cr_raw_principal, - source->cr_raw_principal); - if (ret) - return ret; target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 33017d652b1da23165ff75ef0c8abb529ac55234..26780d53a6f9412ba50cf3b3711b32d53d061723 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2831,9 +2831,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, } #endif /* CONFIG_NFSD_PNFS */ if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0, - NFSD_SUPPATTR_EXCLCREAT_WORD1, - NFSD_SUPPATTR_EXCLCREAT_WORD2); + u32 supp[3]; + + memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); + supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0; + supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1; + supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2; + + status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]); if (status) goto out; } @@ -4119,8 +4124,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getdeviceinfo *gdev) { struct xdr_stream *xdr = &resp->xdr; - const struct nfsd4_layout_ops *ops = - nfsd4_layout_ops[gdev->gd_layout_type]; + const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4137,6 +4141,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, /* If maxcount is 0 then just update notifications */ if (gdev->gd_maxcount != 0) { + ops = nfsd4_layout_ops[gdev->gd_layout_type]; nfserr = ops->encode_getdeviceinfo(xdr, gdev); if (nfserr) { /* @@ -4189,8 +4194,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutget *lgp) { struct xdr_stream *xdr = &resp->xdr; - const struct nfsd4_layout_ops *ops = - nfsd4_layout_ops[lgp->lg_layout_type]; + const struct nfsd4_layout_ops *ops; __be32 *p; dprintk("%s: err %d\n", __func__, nfserr); @@ -4213,6 +4217,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(lgp->lg_seg.iomode); *p++ = cpu_to_be32(lgp->lg_layout_type); + ops = nfsd4_layout_ops[lgp->lg_layout_type]; nfserr = ops->encode_layoutget(xdr, lgp); out: kfree(lgp->lg_content); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8bf8f667a8cf2fe8359f74b41497bc9436ca0efb..6493df6b1bd5f192646d1f63b2230af840833651 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1146,7 +1146,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { - static struct tree_descr nfsd_files[] = { + static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 31e1f959345715a59f8f6b9d9ec00a0ec00fece5..59979f0bbd4bf255f5ea6f8fbd33cb9b2a5aa073 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -747,6 +747,37 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr) return nfserr; } +/* + * A write procedure can have a large argument, and a read procedure can + * have a large reply, but no NFSv2 or NFSv3 procedure has argument and + * reply that can both be larger than a page. The xdr code has taken + * advantage of this assumption to be a sloppy about bounds checking in + * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that + * problem, we enforce these assumptions here: + */ +static bool nfs_request_too_big(struct svc_rqst *rqstp, + struct svc_procedure *proc) +{ + /* + * The ACL code has more careful bounds-checking and is not + * susceptible to this problem: + */ + if (rqstp->rq_prog != NFS_PROGRAM) + return false; + /* + * Ditto NFSv4 (which can in theory have argument and reply both + * more than a page): + */ + if (rqstp->rq_vers >= 4) + return false; + /* The reply will be small, we're OK: */ + if (proc->pc_xdrressize > 0 && + proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE)) + return false; + + return rqstp->rq_arg.len > PAGE_SIZE; +} + int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { @@ -759,6 +790,11 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_vers, rqstp->rq_proc); proc = rqstp->rq_procinfo; + if (nfs_request_too_big(rqstp, proc)) { + dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers); + *statp = rpc_garbage_args; + return 1; + } /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 41b468a6a90f807fe3f3d2e4ceeaa9f8c7ae0f8c..de07ff625777820fefc98bfa56adea81962e8135 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_writeargs *args) { unsigned int len, hdr, dlen; + struct kvec *head = rqstp->rq_arg.head; int v; p = decode_fh(p, &args->fh); @@ -300,9 +301,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - - hdr; + hdr = (void*)p - head->iov_base; + if (hdr > head->iov_len) + return 0; + dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; /* * Round the length of the data which was specified up to @@ -316,7 +318,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, return 0; rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 19d50f600e8d48c6f493130076606a6213de258d..2be32955d7f27d285d0cba1dad39e88c885dc7e0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -94,6 +94,12 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, err = follow_down(&path); if (err < 0) goto out; + if (path.mnt == exp->ex_path.mnt && path.dentry == dentry && + nfsd_mountpoint(dentry, exp) == 2) { + /* This is only a mountpoint in some other namespace */ + path_put(&path); + goto out; + } exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { @@ -167,16 +173,26 @@ static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, st /* * For nfsd purposes, we treat V4ROOT exports as though there was an * export at *every* directory. + * We return: + * '1' if this dentry *must* be an export point, + * '2' if it might be, if there is really a mount here, and + * '0' if there is no chance of an export point here. */ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) { - if (d_mountpoint(dentry)) + if (!d_inode(dentry)) + return 0; + if (exp->ex_flags & NFSEXP_V4ROOT) return 1; if (nfsd4_is_junction(dentry)) return 1; - if (!(exp->ex_flags & NFSEXP_V4ROOT)) - return 0; - return d_inode(dentry) != NULL; + if (d_mountpoint(dentry)) + /* + * Might only be a mountpoint in a different namespace, + * but we need to check. + */ + return 2; + return 0; } __be32 @@ -1004,7 +1020,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, else err = nfserrno(host_err); if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) - tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); + current_restore_flags(pflags, PF_LESS_THROTTLE); return err; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e1872f36147f590765cf1d7548cce2e0d5243752..926682981d61f0bd1671e9f49c1a0a8b80e72f03 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1068,7 +1068,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdi_get(sb->s_bdev->bd_bdi); err = load_nilfs(nilfs, sb); if (err) diff --git a/fs/notify/Makefile b/fs/notify/Makefile index 96d3420d0242ba6ab1457c3312a395a57ef3ad86..3e969ae91b60dd1bbc55cb51698387f2c01ba412 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile @@ -1,5 +1,5 @@ -obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \ - mark.o vfsmount_mark.o fdinfo.o +obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o mark.o \ + fdinfo.o obj-y += dnotify/ obj-y += inotify/ diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 5a4ec309e28375eb60981f9c32314e1ae28c1efd..2430a0415995d9e8d056b2c24eda8b749a79998f 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -52,7 +52,7 @@ struct dnotify_mark { */ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) { - __u32 new_mask, old_mask; + __u32 new_mask = 0; struct dnotify_struct *dn; struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, @@ -60,17 +60,13 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) assert_spin_locked(&fsn_mark->lock); - old_mask = fsn_mark->mask; - new_mask = 0; for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); - fsnotify_set_mark_mask_locked(fsn_mark, new_mask); - - if (old_mask == new_mask) + if (fsn_mark->mask == new_mask) return; + fsn_mark->mask = new_mask; - if (fsn_mark->inode) - fsnotify_recalc_inode_mask(fsn_mark->inode); + fsnotify_recalc_mask(fsn_mark->connector); } /* @@ -86,7 +82,8 @@ static int dnotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; @@ -138,6 +135,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) static struct fsnotify_ops dnotify_fsnotify_ops = { .handle_event = dnotify_handle_event, + .free_mark = dnotify_free_mark, }; /* @@ -160,7 +158,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) if (!S_ISDIR(inode->i_mode)) return; - fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); @@ -308,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; - fsnotify_init_mark(new_fsn_mark, dnotify_free_mark); + fsnotify_init_mark(new_fsn_mark, dnotify_group); new_fsn_mark->mask = mask; new_dn_mark->dn = NULL; @@ -316,13 +314,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) mutex_lock(&dnotify_group->mark_mutex); /* add the new_fsn_mark or find an old one. */ - fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode, - NULL, 0); + fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e5f7e47de68e4bf3cc75dc1cf03e29fed3fda8be..2fa99aeaa0959e43e80db466a95e7a09ea0dbfa7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -57,14 +57,26 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static int fanotify_get_response(struct fsnotify_group *group, - struct fanotify_perm_event_info *event) + struct fanotify_perm_event_info *event, + struct fsnotify_iter_info *iter_info) { int ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); + /* + * fsnotify_prepare_user_wait() fails if we race with mark deletion. + * Just let the operation pass in that case. + */ + if (!fsnotify_prepare_user_wait(iter_info)) { + event->response = FAN_ALLOW; + goto out; + } + wait_event(group->fanotify_data.access_waitq, event->response); + fsnotify_finish_user_wait(iter_info); +out: /* userspace responded, convert to something usable */ switch (event->response) { case FAN_ALLOW: @@ -174,7 +186,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *fanotify_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { int ret = 0; struct fanotify_event_info *event; @@ -215,7 +228,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (mask & FAN_ALL_PERM_EVENTS) { - ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event)); + ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), + iter_info); fsnotify_destroy_event(group, fsn_event); } #endif @@ -248,8 +262,14 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) kmem_cache_free(fanotify_event_cachep, event); } +static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + kmem_cache_free(fanotify_mark_cache, fsn_mark); +} + const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .free_group_priv = fanotify_free_group_priv, .free_event = fanotify_free_event, + .free_mark = fanotify_free_mark, }; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4500a74f8d3873173ddc13a11ef7e5ab4c03cfa5..4eb6f5efa282527f4d2d4824fcf80267f0b68be7 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -2,6 +2,7 @@ #include #include +extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2b37f27858345ccf7bca6747768df11680067a1e..907a481ac78158612ce918e4a1f4c46ea860fb30 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -41,7 +41,7 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; -static struct kmem_cache *fanotify_mark_cache __read_mostly; +struct kmem_cache *fanotify_mark_cache __read_mostly; struct kmem_cache *fanotify_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; @@ -295,27 +295,37 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, } ret = copy_event_to_user(group, kevent, buf); + if (unlikely(ret == -EOPENSTALE)) { + /* + * We cannot report events with stale fd so drop it. + * Setting ret to 0 will continue the event loop and + * do the right thing if there are no more events to + * read (i.e. return bytes read, -EAGAIN or wait). + */ + ret = 0; + } + /* * Permission events get queued to wait for response. Other * events can be destroyed now. */ if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { fsnotify_destroy_event(group, kevent); - if (ret < 0) - break; } else { #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (ret < 0) { + if (ret <= 0) { FANOTIFY_PE(kevent)->response = FAN_DENY; wake_up(&group->fanotify_data.access_waitq); - break; + } else { + spin_lock(&group->notification_lock); + list_add_tail(&kevent->list, + &group->fanotify_data.access_list); + spin_unlock(&group->notification_lock); } - spin_lock(&group->notification_lock); - list_add_tail(&kevent->list, - &group->fanotify_data.access_list); - spin_unlock(&group->notification_lock); #endif } + if (ret < 0) + break; buf += ret; count -= ret; } @@ -445,11 +455,6 @@ static const struct file_operations fanotify_fops = { .llseek = noop_llseek, }; -static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) -{ - kmem_cache_free(fanotify_mark_cache, fsn_mark); -} - static int fanotify_find_path(int dfd, const char __user *filename, struct path *path, unsigned int flags) { @@ -511,13 +516,12 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, tmask &= ~FAN_ONDIR; oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, tmask); + fsn_mark->mask = tmask; } else { __u32 tmask = fsn_mark->ignored_mask & ~mask; if (flags & FAN_MARK_ONDIR) tmask &= ~FAN_ONDIR; - - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + fsn_mark->ignored_mask = tmask; } *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); @@ -534,7 +538,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); + fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, + group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -542,6 +547,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); + if (removed & real_mount(mnt)->mnt_fsnotify_mask) + fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); @@ -549,9 +556,6 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); - if (removed & real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_vfsmount_mask(mnt); - return 0; } @@ -564,7 +568,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -572,16 +576,16 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); + if (removed & inode->i_fsnotify_mask) + fsnotify_recalc_mask(inode->i_fsnotify_marks); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); if (destroy_mark) fsnotify_free_mark(fsn_mark); - /* matches the fsnotify_find_inode_mark() */ + /* matches the fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); - if (removed & inode->i_fsnotify_mask) - fsnotify_recalc_inode_mask(inode); return 0; } @@ -600,13 +604,13 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, tmask |= FAN_ONDIR; oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, tmask); + fsn_mark->mask = tmask; } else { __u32 tmask = fsn_mark->ignored_mask | mask; if (flags & FAN_MARK_ONDIR) tmask |= FAN_ONDIR; - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + fsn_mark->ignored_mask = tmask; if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } @@ -629,8 +633,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, if (!mark) return ERR_PTR(-ENOMEM); - fsnotify_init_mark(mark, fanotify_free_mark); - ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0); + fsnotify_init_mark(mark, group); + ret = fsnotify_add_mark_locked(mark, inode, mnt, 0); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -648,7 +652,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, __u32 added; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); + fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, + group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, NULL, mnt); if (IS_ERR(fsn_mark)) { @@ -657,10 +662,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - mutex_unlock(&group->mark_mutex); - if (added & ~real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_vfsmount_mask(mnt); + fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; @@ -686,7 +690,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, inode, NULL); if (IS_ERR(fsn_mark)) { @@ -695,10 +699,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - mutex_unlock(&group->mark_mutex); - if (added & ~inode->i_fsnotify_mask) - fsnotify_recalc_inode_mask(inode); + fsnotify_recalc_mask(inode->i_fsnotify_marks); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index fd98e5100cabedaa2a71ecef4bd8997334ae528e..dd63aa9a6f9a3fe2696496fee7afa17a4ea45210 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -76,12 +76,11 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct inotify_inode_mark *inode_mark; struct inode *inode; - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) || - !(mark->flags & FSNOTIFY_MARK_FLAG_INODE)) + if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE)) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); - inode = igrab(mark->inode); + inode = igrab(mark->connector->inode); if (inode) { /* * IN_ALL_EVENTS represents all of the mask bits @@ -113,14 +112,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) unsigned int mflags = 0; struct inode *inode; - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) - return; - if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = igrab(mark->inode); + if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = igrab(mark->connector->inode); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", @@ -129,8 +125,8 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); - } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) { - struct mount *mnt = real_mount(mark->mnt); + } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + struct mount *mnt = real_mount(mark->connector->mnt); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b41515d3f0815246185b264532d71cc43824ef9a..01a9f0f007d4518563fd9ad1f06d3b48e2aa872a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -41,6 +41,63 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +/** + * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. + * @sb: superblock being unmounted. + * + * Called during unmount with no locks held, so needs to be safe against + * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. + */ +void fsnotify_unmount_inodes(struct super_block *sb) +{ + struct inode *inode, *iput_inode = NULL; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + /* + * We cannot __iget() an inode in state I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); + continue; + } + + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); + + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + + fsnotify_inode_delete(inode); + + iput_inode = inode; + + spin_lock(&sb->s_inode_list_lock); + } + spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); +} + /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event @@ -127,7 +184,8 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_mark *vfsmount_mark, __u32 mask, const void *data, int data_is, u32 cookie, - const unsigned char *file_name) + const unsigned char *file_name, + struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; __u32 inode_test_mask = 0; @@ -178,7 +236,7 @@ static int send_to_group(struct inode *to_tell, return group->ops->handle_event(group, to_tell, inode_mark, vfsmount_mark, mask, data, data_is, - file_name, cookie); + file_name, cookie, iter_info); } /* @@ -193,8 +251,10 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; struct fsnotify_group *inode_group, *vfsmount_group; + struct fsnotify_mark_connector *inode_conn, *vfsmount_conn; + struct fsnotify_iter_info iter_info; struct mount *mnt; - int idx, ret = 0; + int ret = 0; /* global tests shouldn't care about events on child only the specific event */ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); @@ -210,8 +270,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (hlist_empty(&to_tell->i_fsnotify_marks) && - (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks))) + if (!to_tell->i_fsnotify_marks && + (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks @@ -223,19 +283,30 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, !(mnt && test_mask & mnt->mnt_fsnotify_mask)) return 0; - idx = srcu_read_lock(&fsnotify_mark_srcu); + iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + (test_mask & to_tell->i_fsnotify_mask)) { + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, - &fsnotify_mark_srcu); - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks, + &fsnotify_mark_srcu); + if (vfsmount_conn) + vfsmount_node = srcu_dereference( + vfsmount_conn->list.first, + &fsnotify_mark_srcu); } /* @@ -272,8 +343,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, vfsmount_mark = NULL; } } + + iter_info.inode_mark = inode_mark; + iter_info.vfsmount_mark = vfsmount_mark; + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, - data, data_is, cookie, file_name); + data, data_is, cookie, file_name, + &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; @@ -287,12 +363,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, } ret = 0; out: - srcu_read_unlock(&fsnotify_mark_srcu, idx); + srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); return ret; } EXPORT_SYMBOL_GPL(fsnotify); +extern struct kmem_cache *fsnotify_mark_connector_cachep; + static __init int fsnotify_init(void) { int ret; @@ -303,6 +381,9 @@ static __init int fsnotify_init(void) if (ret) panic("initializing fsnotify_mark_srcu"); + fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, + SLAB_PANIC); + return 0; } core_initcall(fsnotify_init); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 0a3bc2cf192cfdf94f51e3a814c175f26a586e74..bf012e8ecd14be9fb3d9a526eba95baa9fe7c9be 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -8,60 +8,36 @@ #include "../mount.h" +struct fsnotify_iter_info { + struct fsnotify_mark *inode_mark; + struct fsnotify_mark *vfsmount_mark; + int srcu_idx; +}; + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; -/* Calculate mask of events for a list of marks */ -extern u32 fsnotify_recalc_mask(struct hlist_head *head); - /* compare two groups for sorting of marks lists */ extern int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b); -extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, - __u32 mask); -/* Add mark to a proper place in mark list */ -extern int fsnotify_add_mark_list(struct hlist_head *head, - struct fsnotify_mark *mark, - int allow_dups); -/* add a mark to an inode */ -extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - int allow_dups); -/* add a mark to a vfsmount */ -extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct vfsmount *mnt, - int allow_dups); - -/* vfsmount specific destruction of a mark */ -extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); -/* inode specific destruction of a mark */ -extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark); -/* Find mark belonging to given group in the list of marks */ -extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, - struct fsnotify_group *group); -/* Destroy all marks in the given list protected by 'lock' */ -extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock); +/* Destroy all marks connected via given connector */ +extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp); /* run the list of all marks associated with inode and destroy them */ static inline void fsnotify_clear_marks_by_inode(struct inode *inode) { - fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock); + fsnotify_destroy_marks(&inode->i_fsnotify_marks); } /* run the list of all marks associated with vfsmount and destroy them */ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { - fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks, - &mnt->mnt_root->d_lock); + fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks); } -/* prepare for freeing all marks associated with given group */ -extern void fsnotify_detach_group_marks(struct fsnotify_group *group); -/* - * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list - */ -extern void fsnotify_mark_destroy_list(void); +/* Wait until all marks queued for destruction are destroyed */ +extern void fsnotify_wait_marks_destroyed(void); /* * update the dentry->d_flags of all of inode's children to indicate if inode cares diff --git a/fs/notify/group.c b/fs/notify/group.c index fbe3cbebec1671247231ba80b79694ef5b03157e..32357534de18db462a2f154e2e361528471acd0c 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -66,14 +66,23 @@ void fsnotify_destroy_group(struct fsnotify_group *group) */ fsnotify_group_stop_queueing(group); - /* clear all inode marks for this group, attach them to destroy_list */ - fsnotify_detach_group_marks(group); + /* Clear all marks for this group and queue them for destruction */ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES); /* - * Wait for fsnotify_mark_srcu period to end and free all marks in - * destroy_list + * Some marks can still be pinned when waiting for response from + * userspace. Wait for those now. fsnotify_prepare_user_wait() will + * not succeed now so this wait is race-free. */ - fsnotify_mark_destroy_list(); + wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); + + /* + * Wait until all marks get really destroyed. We could actually destroy + * them ourselves instead of waiting for worker to do it, however that + * would be racy as worker can already be processing some marks before + * we even entered fsnotify_destroy_group(). + */ + fsnotify_wait_marks_destroyed(); /* * Since we have waited for fsnotify_mark_srcu in @@ -124,6 +133,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) /* set to 0 when there a no external references to this group */ atomic_set(&group->refcnt, 1); atomic_set(&group->num_marks, 0); + atomic_set(&group->user_waits, 0); spin_lock_init(&group->notification_lock); INIT_LIST_HEAD(&group->notification_list); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c deleted file mode 100644 index a3645249f7ecfa4cbdae8434bb87b47ffd02dbb1..0000000000000000000000000000000000000000 --- a/fs/notify/inode_mark.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Eric Paris - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include "fsnotify.h" - -#include "../internal.h" - -/* - * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types - * any notifier is interested in hearing for this inode. - */ -void fsnotify_recalc_inode_mask(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); - - __fsnotify_update_child_dentry_flags(inode); -} - -void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) -{ - struct inode *inode = mark->inode; - - BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&inode->i_lock); - - hlist_del_init_rcu(&mark->obj_list); - mark->inode = NULL; - - /* - * this mark is now off the inode->i_fsnotify_marks list and we - * hold the inode->i_lock, so this is the perfect time to update the - * inode->i_fsnotify_mask - */ - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); -} - -/* - * Given a group clear all of the inode marks associated with that group. - */ -void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE); -} - -/* - * given a group and inode, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, - struct inode *inode) -{ - struct fsnotify_mark *mark; - - spin_lock(&inode->i_lock); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); - spin_unlock(&inode->i_lock); - - return mark; -} - -/* - * If we are setting a mark mask on an inode mark we should pin the inode - * in memory. - */ -void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, - __u32 mask) -{ - struct inode *inode; - - assert_spin_locked(&mark->lock); - - if (mask && - mark->inode && - !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) { - mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED; - inode = igrab(mark->inode); - /* - * we shouldn't be able to get here if the inode wasn't - * already safely held in memory. But bug in case it - * ever is wrong. - */ - BUG_ON(!inode); - } -} - -/* - * Attach an initialized mark to a given inode. - * These marks may be used for the fsnotify backend to determine which - * event types should be delivered to which group and for which inodes. These - * marks are ordered according to priority, highest number first, and then by - * the group's location in memory. - */ -int fsnotify_add_inode_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - int allow_dups) -{ - int ret; - - mark->flags |= FSNOTIFY_MARK_FLAG_INODE; - - BUG_ON(!mutex_is_locked(&group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&inode->i_lock); - mark->inode = inode; - ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark, - allow_dups); - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); - - return ret; -} - -/** - * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. - * @sb: superblock being unmounted. - * - * Called during unmount with no locks held, so needs to be safe against - * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. - */ -void fsnotify_unmount_inodes(struct super_block *sb) -{ - struct inode *inode, *iput_inode = NULL; - - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - /* - * We cannot __iget() an inode in state I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - - __iget(inode); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); - - if (iput_inode) - iput(iput_inode); - - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - - fsnotify_inode_delete(inode); - - iput_inode = inode; - - spin_lock(&sb->s_inode_list_lock); - } - spin_unlock(&sb->s_inode_list_lock); - - if (iput_inode) - iput(iput_inode); -} diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7c461fd49c4ccf1f81d87d1eddcacd16c3c3ab86..9ff67b61da8a6abb3e573ae7b1f31c71235b4139 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,9 +27,11 @@ extern int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie); + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info); extern const struct fsnotify_ops inotify_fsnotify_ops; +extern struct kmem_cache *inotify_inode_mark_cachep; #ifdef CONFIG_INOTIFY_USER static inline void dec_inotify_instances(struct ucounts *ucounts) diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1aeb837ae41405c94c72041bbba87fa7e9d0e1a5..8b73332735ba5071f67c5f39b6079c506cdea6f7 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -68,7 +68,8 @@ int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -156,8 +157,8 @@ static int idr_callback(int id, void *p, void *data) * BUG() that was here. */ if (fsn_mark) - printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n", - fsn_mark->group, fsn_mark->inode, i_mark->wd); + printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", + fsn_mark->group, i_mark->wd); return 0; } @@ -175,9 +176,20 @@ static void inotify_free_event(struct fsnotify_event *fsn_event) kfree(INOTIFY_E(fsn_event)); } +/* ding dong the mark is dead */ +static void inotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + struct inotify_inode_mark *i_mark; + + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); + + kmem_cache_free(inotify_inode_mark_cachep, i_mark); +} + const struct fsnotify_ops inotify_fsnotify_ops = { .handle_event = inotify_handle_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, + .free_mark = inotify_free_mark, }; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 498d609b26c7dbf4d81537477b338aedc1a64156..7cc7d3fb1862fcb557933e63ec66361cf8bcfb7a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -47,7 +47,7 @@ /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #ifdef CONFIG_SYSCTL @@ -395,21 +395,6 @@ static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, return i_mark; } -static void do_inotify_remove_from_idr(struct fsnotify_group *group, - struct inotify_inode_mark *i_mark) -{ - struct idr *idr = &group->inotify_data.idr; - spinlock_t *idr_lock = &group->inotify_data.idr_lock; - int wd = i_mark->wd; - - assert_spin_locked(idr_lock); - - idr_remove(idr, wd); - - /* removed from the idr, drop that ref */ - fsnotify_put_mark(&i_mark->fsn_mark); -} - /* * Remove the mark from the idr (if present) and drop the reference * on the mark because it was in the idr. @@ -417,6 +402,7 @@ static void do_inotify_remove_from_idr(struct fsnotify_group *group, static void inotify_remove_from_idr(struct fsnotify_group *group, struct inotify_inode_mark *i_mark) { + struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; struct inotify_inode_mark *found_i_mark = NULL; int wd; @@ -429,18 +415,16 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, * if it wasn't.... */ if (wd == -1) { - WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } /* Lets look in the idr to see if we find it */ found_i_mark = inotify_idr_find_locked(group, wd); if (unlikely(!found_i_mark)) { - WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } @@ -451,35 +435,33 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, */ if (unlikely(found_i_mark != i_mark)) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " - "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d " - "found_i_mark->group=%p found_i_mark->inode=%p\n", - __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, - i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd, - found_i_mark->fsn_mark.group, - found_i_mark->fsn_mark.inode); + "found_i_mark=%p found_i_mark->wd=%d " + "found_i_mark->group=%p\n", __func__, i_mark, + i_mark->wd, i_mark->fsn_mark.group, found_i_mark, + found_i_mark->wd, found_i_mark->fsn_mark.group); goto out; } /* * One ref for being in the idr - * one ref held by the caller trying to kill us * one ref grabbed by inotify_idr_find */ - if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) { - printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { + printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ BUG(); } - do_inotify_remove_from_idr(group, i_mark); + idr_remove(idr, wd); + /* Removed from the idr, drop that ref. */ + fsnotify_put_mark(&i_mark->fsn_mark); out: + i_mark->wd = -1; + spin_unlock(idr_lock); /* match the ref taken by inotify_idr_find_locked() */ if (found_i_mark) fsnotify_put_mark(&found_i_mark->fsn_mark); - i_mark->wd = -1; - spin_unlock(idr_lock); } /* @@ -492,7 +474,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* Queue ignore event for the watch */ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL, 0); + NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -501,16 +483,6 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, dec_inotify_watches(group->inotify_data.ucounts); } -/* ding dong the mark is dead */ -static void inotify_free_mark(struct fsnotify_mark *fsn_mark) -{ - struct inotify_inode_mark *i_mark; - - i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); - - kmem_cache_free(inotify_inode_mark_cachep, i_mark); -} - static int inotify_update_existing_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) @@ -524,21 +496,19 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, mask = inotify_arg_to_mask(arg); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); spin_lock(&fsn_mark->lock); - old_mask = fsn_mark->mask; if (add) - fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask)); + fsn_mark->mask |= mask; else - fsnotify_set_mark_mask_locked(fsn_mark, mask); + fsn_mark->mask = mask; new_mask = fsn_mark->mask; - spin_unlock(&fsn_mark->lock); if (old_mask != new_mask) { @@ -549,7 +519,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* update the inode with this new fsn_mark */ if (dropped || do_inode) - fsnotify_recalc_inode_mask(inode); + fsnotify_recalc_mask(inode->i_fsnotify_marks); } @@ -578,7 +548,7 @@ static int inotify_new_watch(struct fsnotify_group *group, if (unlikely(!tmp_i_mark)) return -ENOMEM; - fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark); + fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->wd = -1; @@ -594,8 +564,7 @@ static int inotify_new_watch(struct fsnotify_group *group, } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, - NULL, 0); + ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 6043306e8e21518daeb945ebf2569d19fe9edbbf..9991f88267342f9e699655cc29129225334e8dd6 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -33,7 +33,7 @@ * * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock * * group->mark_mutex protects the marks_list anchored inside a given group and * each mark is hooked via the g_list. It also protects the groups private @@ -44,14 +44,22 @@ * is assigned to as well as the access to a reference of the inode/vfsmount * that is being watched by the mark. * - * inode->i_lock protects the i_fsnotify_marks list anchored inside a - * given inode and each mark is hooked via the i_list. (and sorta the - * free_i_list) + * mark->connector->lock protects the list of marks anchored inside an + * inode / vfsmount and each mark is hooked via the i_list. * + * A list of notification marks relating to inode / mnt is contained in + * fsnotify_mark_connector. That structure is alive as long as there are any + * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets + * detached from fsnotify_mark_connector when last reference to the mark is + * dropped. Thus having mark reference is enough to protect mark->connector + * pointer and to make sure fsnotify_mark_connector cannot disappear. Also + * because we remove mark from g_list before dropping mark reference associated + * with that, any mark found through g_list is guaranteed to have + * mark->connector set until we drop group->mark_mutex. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their - * refcnt==0. + * refcnt==0. Marks are also protected by fsnotify_mark_srcu. * * The inode mark can be cleared for a number of different reasons including: * - The inode is unlinked for the last time. (fsnotify_inode_remove) @@ -61,17 +69,6 @@ * - The fsnotify_group associated with the mark is going away and all such marks * need to be cleaned up. (fsnotify_clear_marks_by_group) * - * Worst case we are given an inode and need to clean up all the marks on that - * inode. We take i_lock and walk the i_fsnotify_marks safely. For each - * mark on the list we take a reference (so the mark can't disappear under us). - * We remove that mark form the inode's list of marks and we add this mark to a - * private list anchored on the stack using i_free_list; we walk i_free_list - * and before we destroy the mark we make sure that we dont race with a - * concurrent destroy_group by getting a ref to the marks group and taking the - * groups mutex. - - * Very similarly for freeing by group, except we use free_g_list. - * * This has the very interesting property of being able to run concurrently with * any (or all) other directions. */ @@ -94,94 +91,281 @@ #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ struct srcu_struct fsnotify_mark_srcu; +struct kmem_cache *fsnotify_mark_connector_cachep; + static DEFINE_SPINLOCK(destroy_lock); static LIST_HEAD(destroy_list); +static struct fsnotify_mark_connector *connector_destroy_list; static void fsnotify_mark_destroy_workfn(struct work_struct *work); static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); +static void fsnotify_connector_destroy_workfn(struct work_struct *work); +static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); + void fsnotify_get_mark(struct fsnotify_mark *mark) { + WARN_ON_ONCE(!atomic_read(&mark->refcnt)); atomic_inc(&mark->refcnt); } -void fsnotify_put_mark(struct fsnotify_mark *mark) +/* + * Get mark reference when we found the mark via lockless traversal of object + * list. Mark can be already removed from the list by now and on its way to be + * destroyed once SRCU period ends. + */ +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - if (atomic_dec_and_test(&mark->refcnt)) { - if (mark->group) - fsnotify_put_group(mark->group); - mark->free_mark(mark); - } + return atomic_inc_not_zero(&mark->refcnt); } -/* Calculate mask of events for a list of marks */ -u32 fsnotify_recalc_mask(struct hlist_head *head) +static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; struct fsnotify_mark *mark; - hlist_for_each_entry(mark, head, obj_list) - new_mask |= mark->mask; - return new_mask; + assert_spin_locked(&conn->lock); + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) + new_mask |= mark->mask; + } + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + conn->inode->i_fsnotify_mask = new_mask; + else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) + real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; } /* - * Remove mark from inode / vfsmount list, group list, drop inode reference - * if we got one. - * - * Must be called with group->mark_mutex held. + * Calculate mask of events for a list of marks. The caller must make sure + * connector and connector->inode cannot disappear under us. Callers achieve + * this by holding a mark->lock or mark->group->mark_mutex for a mark on this + * list. */ -void fsnotify_detach_mark(struct fsnotify_mark *mark) +void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +{ + if (!conn) + return; + + spin_lock(&conn->lock); + __fsnotify_recalc_mask(conn); + spin_unlock(&conn->lock); + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + __fsnotify_update_child_dentry_flags(conn->inode); +} + +/* Free all connectors queued for freeing once SRCU period ends */ +static void fsnotify_connector_destroy_workfn(struct work_struct *work) +{ + struct fsnotify_mark_connector *conn, *free; + + spin_lock(&destroy_lock); + conn = connector_destroy_list; + connector_destroy_list = NULL; + spin_unlock(&destroy_lock); + + synchronize_srcu(&fsnotify_mark_srcu); + while (conn) { + free = conn; + conn = conn->destroy_next; + kmem_cache_free(fsnotify_mark_connector_cachep, free); + } +} + +static struct inode *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn) { struct inode *inode = NULL; + + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = conn->inode; + rcu_assign_pointer(inode->i_fsnotify_marks, NULL); + inode->i_fsnotify_mask = 0; + conn->inode = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, + NULL); + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; + conn->mnt = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + } + + return inode; +} + +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) +{ struct fsnotify_group *group = mark->group; - BUG_ON(!mutex_is_locked(&group->mark_mutex)); + if (WARN_ON_ONCE(!group)) + return; + group->ops->free_mark(mark); + fsnotify_put_group(group); +} - spin_lock(&mark->lock); +void fsnotify_put_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_mark_connector *conn; + struct inode *inode = NULL; + bool free_conn = false; - /* something else already called this function on this mark */ - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { - spin_unlock(&mark->lock); + /* Catch marks that were actually never attached to object */ + if (!mark->connector) { + if (atomic_dec_and_test(&mark->refcnt)) + fsnotify_final_mark_destroy(mark); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; + /* + * We have to be careful so that traversals of obj_list under lock can + * safely grab mark reference. + */ + if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + return; - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = mark->inode; - fsnotify_destroy_inode_mark(mark); - } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) - fsnotify_destroy_vfsmount_mark(mark); - else - BUG(); + conn = mark->connector; + hlist_del_init_rcu(&mark->obj_list); + if (hlist_empty(&conn->list)) { + inode = fsnotify_detach_connector_from_object(conn); + free_conn = true; + } else { + __fsnotify_recalc_mask(conn); + } + mark->connector = NULL; + spin_unlock(&conn->lock); + + iput(inode); + + if (free_conn) { + spin_lock(&destroy_lock); + conn->destroy_next = connector_destroy_list; + connector_destroy_list = conn; + spin_unlock(&destroy_lock); + queue_work(system_unbound_wq, &connector_reaper_work); + } /* * Note that we didn't update flags telling whether inode cares about * what's happening with children. We update these flags from * __fsnotify_parent() lazily when next event happens on one of our * children. */ + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); +} - list_del_init(&mark->g_list); +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *group; - spin_unlock(&mark->lock); + if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) + return false; + + if (iter_info->inode_mark) + group = iter_info->inode_mark->group; + else + group = iter_info->vfsmount_mark->group; + + /* + * Since acquisition of mark reference is an atomic op as well, we can + * be sure this inc is seen before any effect of refcount increment. + */ + atomic_inc(&group->user_waits); + + if (iter_info->inode_mark) { + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->inode_mark)) + goto out_wait; + } + if (iter_info->vfsmount_mark) { + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) + goto out_inode; + } - if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) - iput(inode); + /* + * Now that both marks are pinned by refcount in the inode / vfsmount + * lists, we can drop SRCU lock, and safely resume the list iteration + * once userspace returns. + */ + srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); + + return true; +out_inode: + if (iter_info->inode_mark) + fsnotify_put_mark(iter_info->inode_mark); +out_wait: + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); + return false; +} + +void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *group = NULL; + + iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); + if (iter_info->inode_mark) { + group = iter_info->inode_mark->group; + fsnotify_put_mark(iter_info->inode_mark); + } + if (iter_info->vfsmount_mark) { + group = iter_info->vfsmount_mark->group; + fsnotify_put_mark(iter_info->vfsmount_mark); + } + /* + * We abuse notification_waitq on group shutdown for waiting for all + * marks pinned when waiting for userspace. + */ + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); +} + +/* + * Mark mark as detached, remove it from group list. Mark still stays in object + * list until its last reference is dropped. Note that we rely on mark being + * removed from group list before corresponding reference to it is dropped. In + * particular we rely on mark->connector being valid while we hold + * group->mark_mutex if we found the mark through g_list. + * + * Must be called with group->mark_mutex held. The caller must either hold + * reference to the mark or be protected by fsnotify_mark_srcu. + */ +void fsnotify_detach_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_group *group = mark->group; + + WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && + atomic_read(&mark->refcnt) < 1 + + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); + + spin_lock(&mark->lock); + /* something else already called this function on this mark */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { + spin_unlock(&mark->lock); + return; + } + mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; + list_del_init(&mark->g_list); + spin_unlock(&mark->lock); atomic_dec(&group->num_marks); + + /* Drop mark reference acquired in fsnotify_add_mark_locked() */ + fsnotify_put_mark(mark); } /* - * Prepare mark for freeing and add it to the list of marks prepared for - * freeing. The actual freeing must happen after SRCU period ends and the - * caller is responsible for this. + * Free fsnotify mark. The mark is actually only marked as being freed. The + * freeing is actually happening only once last reference to the mark is + * dropped from a workqueue which first waits for srcu period end. * - * The function returns true if the mark was added to the list of marks for - * freeing. The function returns false if someone else has already called - * __fsnotify_free_mark() for the mark. + * Caller must have a reference to the mark or be protected by + * fsnotify_mark_srcu. */ -static bool __fsnotify_free_mark(struct fsnotify_mark *mark) +void fsnotify_free_mark(struct fsnotify_mark *mark) { struct fsnotify_group *group = mark->group; @@ -189,7 +373,7 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark) /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { spin_unlock(&mark->lock); - return false; + return; } mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; spin_unlock(&mark->lock); @@ -201,25 +385,6 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark) */ if (group->ops->freeing_mark) group->ops->freeing_mark(mark, group); - - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - - return true; -} - -/* - * Free fsnotify mark. The freeing is actually happening from a workqueue which - * first waits for srcu period end. Caller must have a reference to the mark - * or be protected by fsnotify_mark_srcu. - */ -void fsnotify_free_mark(struct fsnotify_mark *mark) -{ - if (__fsnotify_free_mark(mark)) { - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - } } void fsnotify_destroy_mark(struct fsnotify_mark *mark, @@ -231,54 +396,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, fsnotify_free_mark(mark); } -void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock) -{ - struct fsnotify_mark *mark; - - while (1) { - /* - * We have to be careful since we can race with e.g. - * fsnotify_clear_marks_by_group() and once we drop 'lock', - * mark can get removed from the obj_list and destroyed. But - * we are holding mark reference so mark cannot be freed and - * calling fsnotify_destroy_mark() more than once is fine. - */ - spin_lock(lock); - if (hlist_empty(head)) { - spin_unlock(lock); - break; - } - mark = hlist_entry(head->first, struct fsnotify_mark, obj_list); - /* - * We don't update i_fsnotify_mask / mnt_fsnotify_mask here - * since inode / mount is going away anyway. So just remove - * mark from the list. - */ - hlist_del_init_rcu(&mark->obj_list); - fsnotify_get_mark(mark); - spin_unlock(lock); - fsnotify_destroy_mark(mark, mark->group); - fsnotify_put_mark(mark); - } -} - -void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) -{ - assert_spin_locked(&mark->lock); - - mark->mask = mask; - - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) - fsnotify_set_inode_mark_mask_locked(mark, mask); -} - -void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask) -{ - assert_spin_locked(&mark->lock); - - mark->ignored_mask = mask; -} - /* * Sorting function for lists of fsnotify marks. * @@ -315,37 +432,133 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } -/* Add mark into proper place in given list of marks */ -int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, - int allow_dups) +static int fsnotify_attach_connector_to_object( + struct fsnotify_mark_connector __rcu **connp, + struct inode *inode, + struct vfsmount *mnt) +{ + struct fsnotify_mark_connector *conn; + + conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); + if (!conn) + return -ENOMEM; + spin_lock_init(&conn->lock); + INIT_HLIST_HEAD(&conn->list); + if (inode) { + conn->flags = FSNOTIFY_OBJ_TYPE_INODE; + conn->inode = igrab(inode); + } else { + conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->mnt = mnt; + } + /* + * cmpxchg() provides the barrier so that readers of *connp can see + * only initialized structure + */ + if (cmpxchg(connp, NULL, conn)) { + /* Someone else created list structure for us */ + if (inode) + iput(inode); + kmem_cache_free(fsnotify_mark_connector_cachep, conn); + } + + return 0; +} + +/* + * Get mark connector, make sure it is alive and return with its lock held. + * This is for users that get connector pointer from inode or mount. Users that + * hold reference to a mark on the list may directly lock connector->lock as + * they are sure list cannot go away under them. + */ +static struct fsnotify_mark_connector *fsnotify_grab_connector( + struct fsnotify_mark_connector __rcu **connp) +{ + struct fsnotify_mark_connector *conn; + int idx; + + idx = srcu_read_lock(&fsnotify_mark_srcu); + conn = srcu_dereference(*connp, &fsnotify_mark_srcu); + if (!conn) + goto out; + spin_lock(&conn->lock); + if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | + FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { + spin_unlock(&conn->lock); + srcu_read_unlock(&fsnotify_mark_srcu, idx); + return NULL; + } +out: + srcu_read_unlock(&fsnotify_mark_srcu, idx); + return conn; +} + +/* + * Add mark into proper place in given list of marks. These marks may be used + * for the fsnotify backend to determine which event types should be delivered + * to which group and for which inodes. These marks are ordered according to + * priority, highest number first, and then by the group's location in memory. + */ +static int fsnotify_add_mark_list(struct fsnotify_mark *mark, + struct inode *inode, struct vfsmount *mnt, + int allow_dups) { struct fsnotify_mark *lmark, *last = NULL; + struct fsnotify_mark_connector *conn; + struct fsnotify_mark_connector __rcu **connp; int cmp; + int err = 0; + + if (WARN_ON(!inode && !mnt)) + return -EINVAL; + if (inode) + connp = &inode->i_fsnotify_marks; + else + connp = &real_mount(mnt)->mnt_fsnotify_marks; +restart: + spin_lock(&mark->lock); + conn = fsnotify_grab_connector(connp); + if (!conn) { + spin_unlock(&mark->lock); + err = fsnotify_attach_connector_to_object(connp, inode, mnt); + if (err) + return err; + goto restart; + } /* is mark the first mark? */ - if (hlist_empty(head)) { - hlist_add_head_rcu(&mark->obj_list, head); - return 0; + if (hlist_empty(&conn->list)) { + hlist_add_head_rcu(&mark->obj_list, &conn->list); + goto added; } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, head, obj_list) { + hlist_for_each_entry(lmark, &conn->list, obj_list) { last = lmark; - if ((lmark->group == mark->group) && !allow_dups) - return -EEXIST; + if ((lmark->group == mark->group) && + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && + !allow_dups) { + err = -EEXIST; + goto out_err; + } cmp = fsnotify_compare_groups(lmark->group, mark->group); if (cmp >= 0) { hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); - return 0; + goto added; } } BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); - return 0; +added: + mark->connector = conn; +out_err: + spin_unlock(&conn->lock); + spin_unlock(&mark->lock); + return err; } /* @@ -353,10 +566,10 @@ int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, struct vfsmount *mnt, int allow_dups) { + struct fsnotify_group *group = mark->group; int ret = 0; BUG_ON(inode && mnt); @@ -367,61 +580,42 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, * LOCKING ORDER!!!! * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock */ spin_lock(&mark->lock); mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; - fsnotify_get_group(group); - mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); - fsnotify_get_mark(mark); /* for i_list and g_list */ - - if (inode) { - ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups); - if (ret) - goto err; - } else if (mnt) { - ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups); - if (ret) - goto err; - } else { - BUG(); - } - - /* this will pin the object if appropriate */ - fsnotify_set_mark_mask_locked(mark, mark->mask); + fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - if (inode) - __fsnotify_update_child_dentry_flags(inode); + ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); + if (ret) + goto err; + + if (mark->mask) + fsnotify_recalc_mask(mark->connector); return ret; err: - mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; + mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | + FSNOTIFY_MARK_FLAG_ATTACHED); list_del_init(&mark->g_list); - fsnotify_put_group(group); - mark->group = NULL; atomic_dec(&group->num_marks); - spin_unlock(&mark->lock); - - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - + fsnotify_put_mark(mark); return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, - struct inode *inode, struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, + struct vfsmount *mnt, int allow_dups) { int ret; + struct fsnotify_group *group = mark->group; + mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); + ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); mutex_unlock(&group->mark_mutex); return ret; } @@ -430,29 +624,42 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, - struct fsnotify_group *group) +struct fsnotify_mark *fsnotify_find_mark( + struct fsnotify_mark_connector __rcu **connp, + struct fsnotify_group *group) { + struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; - hlist_for_each_entry(mark, head, obj_list) { - if (mark->group == group) { + conn = fsnotify_grab_connector(connp); + if (!conn) + return NULL; + + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->group == group && + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { fsnotify_get_mark(mark); + spin_unlock(&conn->lock); return mark; } } + spin_unlock(&conn->lock); return NULL; } -/* - * clear any marks in a group in which mark->flags & flags is true - */ -void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, - unsigned int flags) +/* Clear any marks in a group with given type */ +void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + unsigned int type) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); + struct list_head *head = &to_free; + /* Skip selection step if we want to clear all marks. */ + if (type == FSNOTIFY_OBJ_ALL_TYPES) { + head = &group->marks_list; + goto clear; + } /* * We have to be really careful here. Anytime we drop mark_mutex, e.g. * fsnotify_clear_marks_by_inode() can come and free marks. Even in our @@ -464,18 +671,19 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) + if (mark->connector->flags & type) list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); +clear: while (1) { mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - if (list_empty(&to_free)) { + if (list_empty(head)) { mutex_unlock(&group->mark_mutex); break; } - mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + mark = list_first_entry(head, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); @@ -484,49 +692,62 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, } } -/* - * Given a group, prepare for freeing all the marks associated with that group. - * The marks are attached to the list of marks prepared for destruction, the - * caller is responsible for freeing marks in that list after SRCU period has - * ended. - */ -void fsnotify_detach_group_marks(struct fsnotify_group *group) +/* Destroy all marks attached to inode / vfsmount */ +void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) { - struct fsnotify_mark *mark; + struct fsnotify_mark_connector *conn; + struct fsnotify_mark *mark, *old_mark = NULL; + struct inode *inode; - while (1) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - if (list_empty(&group->marks_list)) { - mutex_unlock(&group->mark_mutex); - break; - } - mark = list_first_entry(&group->marks_list, - struct fsnotify_mark, g_list); + conn = fsnotify_grab_connector(connp); + if (!conn) + return; + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the + * list can get modified. However we are holding mark reference and + * thus our mark cannot be removed from obj_list so we can continue + * iteration after regaining conn->lock. + */ + hlist_for_each_entry(mark, &conn->list, obj_list) { fsnotify_get_mark(mark); - fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); - __fsnotify_free_mark(mark); - fsnotify_put_mark(mark); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + old_mark = mark; + fsnotify_destroy_mark(mark, mark->group); + spin_lock(&conn->lock); } + /* + * Detach list from object now so that we don't pin inode until all + * mark references get dropped. It would lead to strange results such + * as delaying inode deletion or blocking unmount. + */ + inode = fsnotify_detach_connector_from_object(conn); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + iput(inode); } /* * Nothing fancy, just initialize lists and locks and counters. */ void fsnotify_init_mark(struct fsnotify_mark *mark, - void (*free_mark)(struct fsnotify_mark *mark)) + struct fsnotify_group *group) { memset(mark, 0, sizeof(*mark)); spin_lock_init(&mark->lock); atomic_set(&mark->refcnt, 1); - mark->free_mark = free_mark; + fsnotify_get_group(group); + mark->group = group; } /* * Destroy all marks in destroy_list, waits for SRCU period to finish before * actually freeing marks. */ -void fsnotify_mark_destroy_list(void) +static void fsnotify_mark_destroy_workfn(struct work_struct *work) { struct fsnotify_mark *mark, *next; struct list_head private_destroy_list; @@ -540,11 +761,12 @@ void fsnotify_mark_destroy_list(void) list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { list_del_init(&mark->g_list); - fsnotify_put_mark(mark); + fsnotify_final_mark_destroy(mark); } } -static void fsnotify_mark_destroy_workfn(struct work_struct *work) +/* Wait for all marks queued for destruction to be actually destroyed */ +void fsnotify_wait_marks_destroyed(void) { - fsnotify_mark_destroy_list(); + flush_delayed_work(&reaper_work); } diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c deleted file mode 100644 index a8fcab68faef1cdc826103d095b5b387895dfb20..0000000000000000000000000000000000000000 --- a/fs/notify/vfsmount_mark.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Eric Paris - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include "fsnotify.h" - -void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT); -} - -/* - * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types - * any notifier is interested in hearing for this mount point - */ -void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - - spin_lock(&mnt->mnt_root->d_lock); - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); -} - -void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) -{ - struct vfsmount *mnt = mark->mnt; - struct mount *m = real_mount(mnt); - - BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&mnt->mnt_root->d_lock); - - hlist_del_init_rcu(&mark->obj_list); - mark->mnt = NULL; - - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); -} - -/* - * given a group and vfsmount, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - struct fsnotify_mark *mark; - - spin_lock(&mnt->mnt_root->d_lock); - mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group); - spin_unlock(&mnt->mnt_root->d_lock); - - return mark; -} - -/* - * Attach an initialized mark to a given group and vfsmount. - * These marks may be used for the fsnotify backend to determine which - * event types should be delivered to which groups. - */ -int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct vfsmount *mnt, - int allow_dups) -{ - struct mount *m = real_mount(mnt); - int ret; - - mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; - - BUG_ON(!mutex_is_locked(&group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&mnt->mnt_root->d_lock); - mark->mnt = mnt; - ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups); - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); - - return ret; -} diff --git a/fs/nsfs.c b/fs/nsfs.c index 1656843e87d2bef47b69d65b23c23946e8936f33..f3db56e83dd204279895751e8cd885f15c28b2f5 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -91,6 +91,7 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns) return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { @@ -195,9 +196,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task, { struct ns_common *ns; int res = -ENOENT; + const char *name; ns = ns_ops->get(task); if (ns) { - res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); + name = ns_ops->real_ns_name ? : ns_ops->name; + res = snprintf(buf, size, "%s:[%u]", name, ns->inum); ns_ops->put(ns); } return res; diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 358258364616cd3c2fee997daca2a192719cb045..4690cd75d8d7948a056fe899bc4600ade10b8566 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, PTR_ERR(dent_inode)); kfree(name); /* Return the error code. */ - return (struct dentry *)dent_inode; + return ERR_CAST(dent_inode); } /* It is guaranteed that @name is no longer allocated at this point. */ if (MREF_ERR(mref) == -ENOENT) { diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f6e871760f8d97265f4974c159dca7a7b6439b7f..0da0332725aafcf253707f7ca59c9385cd6b4fb4 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2242,13 +2242,13 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, spin_unlock(&o2hb_live_lock); } -static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item, +static ssize_t o2hb_heartbeat_group_dead_threshold_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", o2hb_dead_threshold); } -static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item, +static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *item, const char *page, size_t count) { unsigned long tmp; @@ -2297,11 +2297,11 @@ static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item, } -CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold); +CONFIGFS_ATTR(o2hb_heartbeat_group_, dead_threshold); CONFIGFS_ATTR(o2hb_heartbeat_group_, mode); static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { - &o2hb_heartbeat_group_attr_threshold, + &o2hb_heartbeat_group_attr_dead_threshold, &o2hb_heartbeat_group_attr_mode, NULL, }; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d0ab7e56d0b41a7a97f3640eb0ab4801f747dffc..8d779227370ab1d121fdc2fc6f546f7844e95a5b 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -450,9 +450,8 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); INIT_DELAYED_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req); - init_timer(&sc->sc_idle_timeout); - sc->sc_idle_timeout.function = o2net_idle_timer; - sc->sc_idle_timeout.data = (unsigned long)sc; + setup_timer(&sc->sc_idle_timeout, o2net_idle_timer, + (unsigned long)sc); sclog(sc, "alloced\n"); @@ -956,7 +955,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc, mutex_lock(&sc->sc_send_lock); ret = sc->sc_sock->ops->sendpage(sc->sc_sock, virt_to_page(kmalloced_virt), - (long)kmalloced_virt & ~PAGE_MASK, + offset_in_page(kmalloced_virt), size, MSG_DONTWAIT); mutex_unlock(&sc->sc_send_lock); if (ret == size) @@ -1460,27 +1459,10 @@ static void o2net_rx_until_empty(struct work_struct *work) static int o2net_set_nodelay(struct socket *sock) { - int ret, val = 1; - mm_segment_t oldfs; + int val = 1; - oldfs = get_fs(); - set_fs(KERNEL_DS); - - /* - * Dear unsuspecting programmer, - * - * Don't use sock_setsockopt() for SOL_TCP. It doesn't check its level - * argument and assumes SOL_SOCKET so, say, your TCP_NODELAY will - * silently turn into SO_DEBUG. - * - * Yours, - * Keeper of hilariously fragile interfaces. - */ - ret = sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char __user *)&val, sizeof(val)); - - set_fs(oldfs); - return ret; + return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (void *)&val, sizeof(val)); } static int o2net_set_usertimeout(struct socket *sock) @@ -1488,7 +1470,7 @@ static int o2net_set_usertimeout(struct socket *sock) int user_timeout = O2NET_TCP_USER_TIMEOUT; return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (char *)&user_timeout, sizeof(user_timeout)); + (void *)&user_timeout, sizeof(user_timeout)); } static void o2net_initialize_handshake(void) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 827fc9809bc271f09b2c3b7abf4019c31d0e1636..9f88188060db9c7fa59e6882ecf33b55cf921788 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -119,7 +119,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, if (IS_ERR(inode)) { mlog_errno(PTR_ERR(inode)); - result = (void *)inode; + result = ERR_CAST(inode); goto bail; } diff --git a/fs/open.c b/fs/open.c index 949cef29c3bba9395efd2e461d2078d0a9a9b47c..cd0c5be8d01247e8bab3f4e00e6f171803291881 100644 --- a/fs/open.c +++ b/fs/open.c @@ -193,7 +193,8 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) goto out_putf; error = -EPERM; - if (IS_APPEND(inode)) + /* Check IS_APPEND on real upper inode */ + if (IS_APPEND(file_inode(f.file))) goto out_putf; sb_start_write(inode->i_sb); @@ -459,20 +460,17 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct fd f = fdget_raw(fd); - struct inode *inode; - int error = -EBADF; + int error; error = -EBADF; if (!f.file) goto out; - inode = file_inode(f.file); - error = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) + if (!d_can_lookup(f.file->f_path.dentry)) goto out_putf; - error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); + error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: @@ -900,6 +898,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode = ACC_MODE(flags); + /* + * Clear out all open flags we don't know about so that we don't report + * them in fcntl(F_GETFD) or similar interfaces. + */ + flags &= VALID_OPEN_FLAGS; + if (flags & (O_CREAT | __O_TMPFILE)) op->mode = (mode & S_IALLUGO) | S_IFREG; else @@ -1078,6 +1082,26 @@ SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, return do_sys_open(dfd, filename, flags, mode); } +#ifdef CONFIG_COMPAT +/* + * Exactly like sys_open(), except that it doesn't set the + * O_LARGEFILE flag. + */ +COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) +{ + return do_sys_open(AT_FDCWD, filename, flags, mode); +} + +/* + * Exactly like sys_openat(), except that it doesn't set the + * O_LARGEFILE flag. + */ +COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode) +{ + return do_sys_open(dfd, filename, flags, mode); +} +#endif + #ifndef __alpha__ /* diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index e1534c9bab16ce69e30eefd6614ca2872595fb04..c19f0787c9c65bddfc8b82ca7fad2f88549f2f91 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -180,6 +180,10 @@ static ssize_t orangefs_devreq_read(struct file *file, return -EINVAL; } + /* Check for an empty list before locking. */ + if (list_empty(&orangefs_request_list)) + return -EAGAIN; + restart: /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index 284373a57a082fa0e38422afbc49fa2e8519b0df..d327cbd17756dcbc18ca787602e436c24e70d198 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -1,396 +1,404 @@ /* - * (C) 2001 Clemson University and The University of Chicago - * - * See COPYING in top-level directory. + * Copyright 2017 Omnibond Systems, L.L.C. */ #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" +struct orangefs_dir_part { + struct orangefs_dir_part *next; + size_t len; +}; + +struct orangefs_dir { + __u64 token; + struct orangefs_dir_part *part; + loff_t end; + int error; +}; + +#define PART_SHIFT (24) +#define PART_SIZE (1<<24) +#define PART_MASK (~(PART_SIZE - 1)) + /* - * decode routine used by kmod to deal with the blob sent from - * userspace for readdirs. The blob contains zero or more of these - * sub-blobs: - * __u32 - represents length of the character string that follows. - * string - between 1 and ORANGEFS_NAME_MAX bytes long. - * padding - (if needed) to cause the __u32 plus the string to be - * eight byte aligned. - * khandle - sizeof(khandle) bytes. + * There can be up to 512 directory entries. Each entry is encoded as + * follows: + * 4 bytes: string size (n) + * n bytes: string + * 1 byte: trailing zero + * padding to 8 bytes + * 16 bytes: khandle + * padding to 8 bytes + * + * The trailer_buf starts with a struct orangefs_readdir_response_s + * which must be skipped to get to the directory data. + * + * The data which is received from the userspace daemon is termed a + * part and is stored in a linked list in case more than one part is + * needed for a large directory. + * + * The position pointer (ctx->pos) encodes the part and offset on which + * to begin reading at. Bits above PART_SHIFT encode the part and bits + * below PART_SHIFT encode the offset. Parts are stored in a linked + * list which grows as data is received from the server. The overhead + * associated with managing the list is presumed to be small compared to + * the overhead of communicating with the server. + * + * As data is received from the server, it is placed at the end of the + * part list. Data is parsed from the current position as it is needed. + * When data is determined to be corrupt, it is either because the + * userspace component has sent back corrupt data or because the file + * pointer has been moved to an invalid location. Since the two cannot + * be differentiated, return EIO. + * + * Part zero is synthesized to contains `.' and `..'. Part one is the + * first part of the part list. */ -static long decode_dirents(char *ptr, size_t size, - struct orangefs_readdir_response_s *readdir) + +static int do_readdir(struct orangefs_inode_s *oi, + struct orangefs_dir *od, struct dentry *dentry, + struct orangefs_kernel_op_s *op) { - int i; - struct orangefs_readdir_response_s *rd = - (struct orangefs_readdir_response_s *) ptr; - char *buf = ptr; - int khandle_size = sizeof(struct orangefs_khandle); - size_t offset = offsetof(struct orangefs_readdir_response_s, - dirent_array); - /* 8 reflects eight byte alignment */ - int smallest_blob = khandle_size + 8; - __u32 len; - int aligned_len; - int sizeof_u32 = sizeof(__u32); - long ret; - - gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size); - - /* size is = offset on empty dirs, > offset on non-empty dirs... */ - if (size < offset) { - gossip_err("%s: size:%zu: offset:%zu:\n", - __func__, - size, - offset); - ret = -EINVAL; - goto out; - } + struct orangefs_readdir_response_s *resp; + int bufi, r; - if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) { - gossip_err("%s: size:%zu: dirent_outcount:%d:\n", - __func__, - size, - readdir->orangefs_dirent_outcount); - ret = -EINVAL; - goto out; - } + /* + * Despite the badly named field, readdir does not use shared + * memory. However, there are a limited number of readdir + * slots, which must be allocated here. This flag simply tells + * the op scheduler to return the op here for retry. + */ + op->uses_shared_memory = 1; + op->upcall.req.readdir.refn = oi->refn; + op->upcall.req.readdir.token = od->token; + op->upcall.req.readdir.max_dirent_count = + ORANGEFS_MAX_DIRENT_COUNT_READDIR; - readdir->token = rd->token; - readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; - readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, - sizeof(*readdir->dirent_array), - GFP_KERNEL); - if (readdir->dirent_array == NULL) { - gossip_err("%s: kcalloc failed.\n", __func__); - ret = -ENOMEM; - goto out; +again: + bufi = orangefs_readdir_index_get(); + if (bufi < 0) { + od->error = bufi; + return bufi; } - buf += offset; - size -= offset; - - for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { - if (size < smallest_blob) { - gossip_err("%s: size:%zu: smallest_blob:%d:\n", - __func__, - size, - smallest_blob); - ret = -EINVAL; - goto free; - } + op->upcall.req.readdir.buf_index = bufi; - len = *(__u32 *)buf; - if ((len < 1) || (len > ORANGEFS_NAME_MAX)) { - gossip_err("%s: len:%d:\n", __func__, len); - ret = -EINVAL; - goto free; - } + r = service_operation(op, "orangefs_readdir", + get_interruptible_flag(dentry->d_inode)); - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: size:%zu: len:%d:\n", - __func__, - size, - len); + orangefs_readdir_index_put(bufi); - readdir->dirent_array[i].d_name = buf + sizeof_u32; - readdir->dirent_array[i].d_length = len; + if (op_state_purged(op)) { + if (r == -EAGAIN) { + vfree(op->downcall.trailer_buf); + goto again; + } else if (r == -EIO) { + vfree(op->downcall.trailer_buf); + od->error = r; + return r; + } + } - /* - * Calculate "aligned" length of this string and its - * associated __u32 descriptor. - */ - aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7; - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: aligned_len:%d:\n", - __func__, - aligned_len); + if (r < 0) { + vfree(op->downcall.trailer_buf); + od->error = r; + return r; + } else if (op->downcall.status) { + vfree(op->downcall.trailer_buf); + od->error = op->downcall.status; + return op->downcall.status; + } - /* - * The end of the blob should coincide with the end - * of the last sub-blob. - */ - if (size < aligned_len + khandle_size) { - gossip_err("%s: ran off the end of the blob.\n", - __func__); - ret = -EINVAL; - goto free; - } - size -= aligned_len + khandle_size; + /* + * The maximum size is size per entry times the 512 entries plus + * the header. This is well under the limit. + */ + if (op->downcall.trailer_size > PART_SIZE) { + vfree(op->downcall.trailer_buf); + od->error = -EIO; + return -EIO; + } - buf += aligned_len; + resp = (struct orangefs_readdir_response_s *) + op->downcall.trailer_buf; + od->token = resp->token; + return 0; +} - readdir->dirent_array[i].khandle = - *(struct orangefs_khandle *) buf; - buf += khandle_size; +static int parse_readdir(struct orangefs_dir *od, + struct orangefs_kernel_op_s *op) +{ + struct orangefs_dir_part *part, *new; + size_t count; + + count = 1; + part = od->part; + while (part) { + count++; + if (part->next) + part = part->next; + else + break; } - ret = buf - ptr; - gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret); - goto out; -free: - kfree(readdir->dirent_array); - readdir->dirent_array = NULL; + new = (void *)op->downcall.trailer_buf; + new->next = NULL; + new->len = op->downcall.trailer_size - + sizeof(struct orangefs_readdir_response_s); + if (!od->part) + od->part = new; + else + part->next = new; + count++; + od->end = count << PART_SHIFT; -out: - return ret; + return 0; } -/* - * Read directory entries from an instance of an open directory. - */ -static int orangefs_readdir(struct file *file, struct dir_context *ctx) +static int orangefs_dir_more(struct orangefs_inode_s *oi, + struct orangefs_dir *od, struct dentry *dentry) { - int ret = 0; - int buffer_index; - /* - * ptoken supports Orangefs' distributed directory logic, added - * in 2.9.2. - */ - __u64 *ptoken = file->private_data; - __u64 pos = 0; - ino_t ino = 0; - struct dentry *dentry = file->f_path.dentry; - struct orangefs_kernel_op_s *new_op = NULL; - struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); - struct orangefs_readdir_response_s readdir_response; - void *dents_buf; - int i = 0; - int len = 0; - ino_t current_ino = 0; - char *current_entry = NULL; - long bytes_decoded; - - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: ctx->pos:%lld, ptoken = %llu\n", - __func__, - lld(ctx->pos), - llu(*ptoken)); - - pos = (__u64) ctx->pos; - - /* are we done? */ - if (pos == ORANGEFS_READDIR_END) { - gossip_debug(GOSSIP_DIR_DEBUG, - "Skipping to termination path\n"); - return 0; + struct orangefs_kernel_op_s *op; + int r; + + op = op_alloc(ORANGEFS_VFS_OP_READDIR); + if (!op) { + od->error = -ENOMEM; + return -ENOMEM; + } + r = do_readdir(oi, od, dentry, op); + if (r) { + od->error = r; + goto out; + } + r = parse_readdir(od, op); + if (r) { + od->error = r; + goto out; } - gossip_debug(GOSSIP_DIR_DEBUG, - "orangefs_readdir called on %pd (pos=%llu)\n", - dentry, llu(pos)); + od->error = 0; +out: + op_release(op); + return od->error; +} - memset(&readdir_response, 0, sizeof(readdir_response)); +static int fill_from_part(struct orangefs_dir_part *part, + struct dir_context *ctx) +{ + const int offset = sizeof(struct orangefs_readdir_response_s); + struct orangefs_khandle *khandle; + __u32 *len, padlen; + loff_t i; + char *s; + i = ctx->pos & ~PART_MASK; - new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); - if (!new_op) - return -ENOMEM; + /* The file offset from userspace is too large. */ + if (i > part->len) + return 1; /* - * Only the indices are shared. No memory is actually shared, but the - * mechanism is used. + * If the seek pointer is positioned just before an entry it + * should find the next entry. */ - new_op->uses_shared_memory = 1; - new_op->upcall.req.readdir.refn = orangefs_inode->refn; - new_op->upcall.req.readdir.max_dirent_count = - ORANGEFS_MAX_DIRENT_COUNT_READDIR; - - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: upcall.req.readdir.refn.khandle: %pU\n", - __func__, - &new_op->upcall.req.readdir.refn.khandle); + if (i % 8) + i = i + (8 - i%8)%8; - new_op->upcall.req.readdir.token = *ptoken; - -get_new_buffer_index: - buffer_index = orangefs_readdir_index_get(); - if (buffer_index < 0) { - ret = buffer_index; - gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", - ret); - goto out_free_op; + while (i < part->len) { + if (part->len < i + sizeof *len) + break; + len = (void *)part + offset + i; + /* + * len is the size of the string itself. padlen is the + * total size of the encoded string. + */ + padlen = (sizeof *len + *len + 1) + + (8 - (sizeof *len + *len + 1)%8)%8; + if (part->len < i + padlen + sizeof *khandle) + goto next; + s = (void *)part + offset + i + sizeof *len; + if (s[*len] != 0) + goto next; + khandle = (void *)part + offset + i + padlen; + if (!dir_emit(ctx, s, *len, + orangefs_khandle_to_ino(khandle), + DT_UNKNOWN)) + return 0; + i += padlen + sizeof *khandle; + i = i + (8 - i%8)%8; + BUG_ON(i > part->len); + ctx->pos = (ctx->pos & PART_MASK) | i; + continue; +next: + i += 8; } - new_op->upcall.req.readdir.buf_index = buffer_index; - - ret = service_operation(new_op, - "orangefs_readdir", - get_interruptible_flag(dentry->d_inode)); + return 1; +} - gossip_debug(GOSSIP_DIR_DEBUG, - "Readdir downcall status is %d. ret:%d\n", - new_op->downcall.status, - ret); +static int orangefs_dir_fill(struct orangefs_inode_s *oi, + struct orangefs_dir *od, struct dentry *dentry, + struct dir_context *ctx) +{ + struct orangefs_dir_part *part; + size_t count; - orangefs_readdir_index_put(buffer_index); + count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; - if (ret == -EAGAIN && op_state_purged(new_op)) { - /* Client-core indices are invalid after it restarted. */ - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: Getting new buffer_index for retry of readdir..\n", - __func__); - goto get_new_buffer_index; + part = od->part; + while (part->next && count) { + count--; + part = part->next; } - - if (ret == -EIO && op_state_purged(new_op)) { - gossip_err("%s: Client is down. Aborting readdir call.\n", - __func__); - goto out_free_op; + /* This means the userspace file offset is invalid. */ + if (count) { + od->error = -EIO; + return -EIO; } - if (ret < 0 || new_op->downcall.status != 0) { - gossip_debug(GOSSIP_DIR_DEBUG, - "Readdir request failed. Status:%d\n", - new_op->downcall.status); - if (ret >= 0) - ret = new_op->downcall.status; - goto out_free_op; + while (part && part->len) { + int r; + r = fill_from_part(part, ctx); + if (r < 0) { + od->error = r; + return r; + } else if (r == 0) { + /* Userspace buffer is full. */ + break; + } else { + /* + * The part ran out of data. Move to the next + * part. */ + ctx->pos = (ctx->pos & PART_MASK) + + (1 << PART_SHIFT); + part = part->next; + } } + return 0; +} - dents_buf = new_op->downcall.trailer_buf; - if (dents_buf == NULL) { - gossip_err("Invalid NULL buffer in readdir response\n"); - ret = -ENOMEM; - goto out_free_op; +static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, + int whence) +{ + struct orangefs_dir *od = file->private_data; + /* + * Delete the stored data so userspace sees new directory + * entries. + */ + if (!whence && offset < od->end) { + struct orangefs_dir_part *part = od->part; + while (part) { + struct orangefs_dir_part *next = part->next; + vfree(part); + part = next; + } + od->token = ORANGEFS_ITERATE_START; + od->part = NULL; + od->end = 1 << PART_SHIFT; } + return default_llseek(file, offset, whence); +} - bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size, - &readdir_response); - if (bytes_decoded < 0) { - ret = bytes_decoded; - gossip_err("Could not decode readdir from buffer %d\n", ret); - goto out_vfree; - } +static int orangefs_dir_iterate(struct file *file, + struct dir_context *ctx) +{ + struct orangefs_inode_s *oi; + struct orangefs_dir *od; + struct dentry *dentry; + int r; - if (bytes_decoded != new_op->downcall.trailer_size) { - gossip_err("orangefs_readdir: # bytes decoded (%ld) " - "!= trailer size (%ld)\n", - bytes_decoded, - (long)new_op->downcall.trailer_size); - ret = -EINVAL; - goto out_destroy_handle; - } + dentry = file->f_path.dentry; + oi = ORANGEFS_I(dentry->d_inode); + od = file->private_data; - /* - * orangefs doesn't actually store dot and dot-dot, but - * we need to have them represented. - */ - if (pos == 0) { - ino = get_ino_from_khandle(dentry->d_inode); - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: calling dir_emit of \".\" with pos = %llu\n", - __func__, - llu(pos)); - ret = dir_emit(ctx, ".", 1, ino, DT_DIR); - pos += 1; - } + if (od->error) + return od->error; - if (pos == 1) { - ino = get_parent_ino_from_dentry(dentry); - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: calling dir_emit of \"..\" with pos = %llu\n", - __func__, - llu(pos)); - ret = dir_emit(ctx, "..", 2, ino, DT_DIR); - pos += 1; + if (ctx->pos == 0) { + if (!dir_emit_dot(file, ctx)) + return 0; + ctx->pos++; + } + if (ctx->pos == 1) { + if (!dir_emit_dotdot(file, ctx)) + return 0; + ctx->pos = 1 << PART_SHIFT; } /* - * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around - * to prevent "finding" dot and dot-dot on any iteration - * other than the first. + * The seek position is in the first synthesized part but is not + * valid. */ - if (ctx->pos == ORANGEFS_ITERATE_NEXT) - ctx->pos = 0; - - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: dirent_outcount:%d:\n", - __func__, - readdir_response.orangefs_dirent_outcount); - for (i = ctx->pos; - i < readdir_response.orangefs_dirent_outcount; - i++) { - len = readdir_response.dirent_array[i].d_length; - current_entry = readdir_response.dirent_array[i].d_name; - current_ino = orangefs_khandle_to_ino( - &readdir_response.dirent_array[i].khandle); - - gossip_debug(GOSSIP_DIR_DEBUG, - "calling dir_emit for %s with len %d" - ", ctx->pos %ld\n", - current_entry, - len, - (unsigned long)ctx->pos); - /* - * type is unknown. We don't return object type - * in the dirent_array. This leaves getdents - * clueless about type. - */ - ret = - dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); - if (!ret) - break; - ctx->pos++; - gossip_debug(GOSSIP_DIR_DEBUG, - "%s: ctx->pos:%lld\n", - __func__, - lld(ctx->pos)); + if ((ctx->pos & PART_MASK) == 0) + return -EIO; - } + r = 0; /* - * we ran all the way through the last batch, set up for - * getting another batch... + * Must read more if the user has sought past what has been read + * so far. Stop a user who has sought past the end. */ - if (ret) { - *ptoken = readdir_response.token; - ctx->pos = ORANGEFS_ITERATE_NEXT; + while (od->token != ORANGEFS_ITERATE_END && + ctx->pos > od->end) { + r = orangefs_dir_more(oi, od, dentry); + if (r) + return r; + } + if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) + return -EIO; + + /* Then try to fill if there's any left in the buffer. */ + if (ctx->pos < od->end) { + r = orangefs_dir_fill(oi, od, dentry, ctx); + if (r) + return r; } - /* - * Did we hit the end of the directory? - */ - if (readdir_response.token == ORANGEFS_READDIR_END) { - gossip_debug(GOSSIP_DIR_DEBUG, - "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); - ctx->pos = ORANGEFS_READDIR_END; + /* Finally get some more and try to fill. */ + if (od->token != ORANGEFS_ITERATE_END) { + r = orangefs_dir_more(oi, od, dentry); + if (r) + return r; + r = orangefs_dir_fill(oi, od, dentry, ctx); } -out_destroy_handle: - /* kfree(NULL) is safe */ - kfree(readdir_response.dirent_array); -out_vfree: - gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf); - vfree(dents_buf); -out_free_op: - op_release(new_op); - gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); - return ret; + return r; } static int orangefs_dir_open(struct inode *inode, struct file *file) { - __u64 *ptoken; - - file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); + struct orangefs_dir *od; + file->private_data = kmalloc(sizeof(struct orangefs_dir), + GFP_KERNEL); if (!file->private_data) return -ENOMEM; - - ptoken = file->private_data; - *ptoken = ORANGEFS_READDIR_START; + od = file->private_data; + od->token = ORANGEFS_ITERATE_START; + od->part = NULL; + od->end = 1 << PART_SHIFT; + od->error = 0; return 0; } static int orangefs_dir_release(struct inode *inode, struct file *file) { + struct orangefs_dir *od = file->private_data; + struct orangefs_dir_part *part = od->part; orangefs_flush_inode(inode); - kfree(file->private_data); + while (part) { + struct orangefs_dir_part *next = part->next; + vfree(part); + part = next; + } + kfree(od); return 0; } -/** ORANGEFS implementation of VFS directory operations */ const struct file_operations orangefs_dir_operations = { + .llseek = orangefs_dir_llseek, .read = generic_read_dir, - .iterate = orangefs_readdir, + .iterate = orangefs_dir_iterate, .open = orangefs_dir_open, - .release = orangefs_dir_release, + .release = orangefs_dir_release }; diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h index 3b8923f8bf21d6902f0d8f440e2102cb57be94bf..163001c9550184feff1255364a3b59a61de1ecf9 100644 --- a/fs/orangefs/downcall.h +++ b/fs/orangefs/downcall.h @@ -40,16 +40,6 @@ struct orangefs_mkdir_response { struct orangefs_object_kref refn; }; -/* - * duplication of some system interface structures so that I don't have - * to allocate extra memory - */ -struct orangefs_dirent { - char *d_name; - int d_length; - struct orangefs_khandle khandle; -}; - struct orangefs_statfs_response { __s64 block_size; __s64 blocks_total; @@ -131,12 +121,16 @@ struct orangefs_downcall_s { } resp; }; +/* + * The readdir response comes in the trailer. It is followed by the + * directory entries as described in dir.c. + */ + struct orangefs_readdir_response_s { __u64 token; __u64 directory_version; __u32 __pad2; __u32 orangefs_dirent_outcount; - struct orangefs_dirent *dirent_array; }; #endif /* __DOWNCALL_H */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index e6bbc8083d77235919ee56e093605283bb2d8dbb..28f38d813ad23f69f24900a4e8bd6ee75e57659f 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -114,7 +114,6 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; struct orangefs_kernel_op_s *new_op = NULL; - struct iov_iter saved = *iter; int buffer_index = -1; ssize_t ret; @@ -193,7 +192,7 @@ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inod orangefs_bufmap_put(buffer_index); buffer_index = -1; if (type == ORANGEFS_IO_WRITE) - *iter = saved; + iov_iter_revert(iter, total_size); gossip_debug(GOSSIP_FILE_DEBUG, "%s:going to repopulate_shared_memory.\n", __func__); @@ -475,7 +474,8 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite /* Make sure generic_write_checks sees an up to date inode size. */ if (file->f_flags & O_APPEND) { - rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1); + rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1, + STATX_SIZE); if (rc == -ESTALE) rc = -EIO; if (rc) { @@ -693,7 +693,8 @@ static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) * NOTE: We are only interested in file size here, * so we set mask accordingly. */ - ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1); + ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1, + STATX_SIZE); if (ret == -ESTALE) ret = -EIO; if (ret) { diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index a304bf34b2127d47d4aff2d8c8b4b0a99a55ec46..9428ea0aac16d32e14fa74501ad470b5eab1ffda 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -161,7 +161,7 @@ static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) iattr->ia_size); /* Ensure that we have a up to date size, so we know if it changed. */ - ret = orangefs_inode_getattr(inode, 0, 1); + ret = orangefs_inode_getattr(inode, 0, 1, STATX_SIZE); if (ret == -ESTALE) ret = -EIO; if (ret) { @@ -218,8 +218,7 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) if (ret) goto out; - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(inode)) { + if (iattr->ia_valid & ATTR_SIZE) { ret = orangefs_setattr_size(inode, iattr); if (ret) goto out; @@ -256,13 +255,19 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, "orangefs_getattr: called on %pd\n", path->dentry); - ret = orangefs_inode_getattr(inode, 0, 0); + ret = orangefs_inode_getattr(inode, 0, 0, request_mask); if (ret == 0) { generic_fillattr(inode, stat); /* override block size reported to stat */ orangefs_inode = ORANGEFS_I(inode); stat->blksize = orangefs_inode->blksize; + + if (request_mask & STATX_SIZE) + stat->result_mask = STATX_BASIC_STATS; + else + stat->result_mask = STATX_BASIC_STATS & + ~STATX_SIZE; } return ret; } @@ -277,7 +282,7 @@ int orangefs_permission(struct inode *inode, int mask) gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__); /* Make sure the permission (and other common attrs) are up to date. */ - ret = orangefs_inode_getattr(inode, 0, 0); + ret = orangefs_inode_getattr(inode, 0, 0, STATX_MODE); if (ret < 0) return ret; @@ -375,7 +380,7 @@ struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref if (!inode || !(inode->i_state & I_NEW)) return inode; - error = orangefs_inode_getattr(inode, 1, 1); + error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL); if (error) { iget_failed(inode); return ERR_PTR(error); @@ -420,7 +425,7 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir, orangefs_set_inode(inode, ref); inode->i_ino = hash; /* needed for stat etc */ - error = orangefs_inode_getattr(inode, 1, 1); + error = orangefs_inode_getattr(inode, 1, 1, STATX_ALL); if (error) goto out_iput; diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index a290ff6ec7569dc3b5eea9ded2df0e8483c49d65..478e88bd7f9d3d3d79e4f8edf76d4df5a5acc395 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -74,6 +74,7 @@ static int orangefs_create(struct inode *dir, unlock_new_inode(inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; + ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; gossip_debug(GOSSIP_NAME_DEBUG, "%s: dentry instantiated for %pd\n", @@ -193,8 +194,6 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ORANGEFS_I(inode)->getattr_time = jiffies - 1; - gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d " "Found good inode [%lu] with count [%d]\n", @@ -324,6 +323,7 @@ static int orangefs_symlink(struct inode *dir, unlock_new_inode(inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; + ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; gossip_debug(GOSSIP_NAME_DEBUG, "Inode (Symlink) %pU -> %pd\n", @@ -388,6 +388,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode unlock_new_inode(inode); orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; + ORANGEFS_I(inode)->getattr_mask = STATX_BASIC_STATS; gossip_debug(GOSSIP_NAME_DEBUG, "Inode (Directory) %pU -> %pd\n", diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 6333cbbdfef7ae652c1a4e6c4d2818ae1cd188d7..83b506020718980a69e5285a8c86496c14a2d050 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -521,13 +521,11 @@ int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter, size_t n = size; if (n > PAGE_SIZE) n = PAGE_SIZE; - n = copy_page_from_iter(page, 0, n, iter); - if (!n) + if (copy_page_from_iter(page, 0, n, iter) != n) return -EFAULT; size -= n; } return 0; - } /* diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 791912da97d7487d99c618ce8ac6f5fbe3cd11fb..716ed337f1663ddfaf0729c5d8275b0366308f19 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -440,6 +440,9 @@ static ssize_t orangefs_debug_write(struct file *file, "orangefs_debug_write: %pD\n", file); + if (count == 0) + return 0; + /* * Thwart users who try to jamb a ridiculous number * of bytes into the debug file... diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h index f380f9ed1b286a7eedfb41fa5f7f8f9259abfdf3..efe08c763e566bffc500439645757966c65cc93c 100644 --- a/fs/orangefs/orangefs-dev-proto.h +++ b/fs/orangefs/orangefs-dev-proto.h @@ -52,12 +52,7 @@ */ #define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000800 -/* - * The maximum number of directory entries in a single request is 96. - * XXX: Why can this not be higher. The client-side code can handle up to 512. - * XXX: What happens if we expect more than the client can return? - */ -#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 96 +#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 512 #include "upcall.h" #include "downcall.h" diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 8afac46fcc87a1e1d3ea8c658e0ae60c336c3d5d..ea0ce507a6ab641d17c1983b898fb2f12a2d0ed2 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -215,6 +215,7 @@ struct orangefs_inode_s { unsigned long pinode_flags; unsigned long getattr_time; + u32 getattr_mask; }; #define P_ATIME_FLAG 0 @@ -340,11 +341,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode) return &(ORANGEFS_I(inode)->refn.khandle); } -static inline __s32 get_fsid_from_ino(struct inode *inode) -{ - return ORANGEFS_I(inode)->refn.fs_id; -} - static inline ino_t get_ino_from_khandle(struct inode *inode) { struct orangefs_khandle *khandle; @@ -500,7 +496,8 @@ int orangefs_inode_setxattr(struct inode *inode, size_t size, int flags); -int orangefs_inode_getattr(struct inode *inode, int new, int bypass); +int orangefs_inode_getattr(struct inode *inode, int new, int bypass, + u32 request_mask); int orangefs_inode_check_changed(struct inode *inode); diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index 9b96b99539d62501718bd752f26d059900095864..aab6f1842963d3d0e5891900f46fc9eb570e9a4f 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -251,7 +251,8 @@ static int orangefs_inode_is_stale(struct inode *inode, int new, return 0; } -int orangefs_inode_getattr(struct inode *inode, int new, int bypass) +int orangefs_inode_getattr(struct inode *inode, int new, int bypass, + u32 request_mask) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; @@ -262,7 +263,13 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass) get_khandle_from_ino(inode)); if (!new && !bypass) { - if (time_before(jiffies, orangefs_inode->getattr_time)) + /* + * Must have all the attributes in the mask and be within cache + * time. + */ + if ((request_mask & orangefs_inode->getattr_mask) == + request_mask && + time_before(jiffies, orangefs_inode->getattr_time)) return 0; } @@ -270,7 +277,15 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass) if (!new_op) return -ENOMEM; new_op->upcall.req.getattr.refn = orangefs_inode->refn; - new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; + /* + * Size is the hardest attribute to get. The incremental cost of any + * other attribute is essentially zero. + */ + if (request_mask & STATX_SIZE || new) + new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; + else + new_op->upcall.req.getattr.mask = + ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE; ret = service_operation(new_op, __func__, get_interruptible_flag(inode)); @@ -291,25 +306,29 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass) case S_IFREG: inode->i_flags = orangefs_inode_flags(&new_op-> downcall.resp.getattr.attributes); - inode_size = (loff_t)new_op-> - downcall.resp.getattr.attributes.size; - rounded_up_size = - (inode_size + (4096 - (inode_size % 4096))); - inode->i_size = inode_size; - orangefs_inode->blksize = - new_op->downcall.resp.getattr.attributes.blksize; - spin_lock(&inode->i_lock); - inode->i_bytes = inode_size; - inode->i_blocks = - (unsigned long)(rounded_up_size / 512); - spin_unlock(&inode->i_lock); + if (request_mask & STATX_SIZE || new) { + inode_size = (loff_t)new_op-> + downcall.resp.getattr.attributes.size; + rounded_up_size = + (inode_size + (4096 - (inode_size % 4096))); + inode->i_size = inode_size; + orangefs_inode->blksize = + new_op->downcall.resp.getattr.attributes.blksize; + spin_lock(&inode->i_lock); + inode->i_bytes = inode_size; + inode->i_blocks = + (unsigned long)(rounded_up_size / 512); + spin_unlock(&inode->i_lock); + } break; case S_IFDIR: - inode->i_size = PAGE_SIZE; - orangefs_inode->blksize = i_blocksize(inode); - spin_lock(&inode->i_lock); - inode_set_bytes(inode, inode->i_size); - spin_unlock(&inode->i_lock); + if (request_mask & STATX_SIZE || new) { + inode->i_size = PAGE_SIZE; + orangefs_inode->blksize = i_blocksize(inode); + spin_lock(&inode->i_lock); + inode_set_bytes(inode, inode->i_size); + spin_unlock(&inode->i_lock); + } set_nlink(inode, 1); break; case S_IFLNK: @@ -349,6 +368,10 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass) orangefs_inode->getattr_time = jiffies + orangefs_getattr_timeout_msecs*HZ/1000; + if (request_mask & STATX_SIZE || new) + orangefs_inode->getattr_mask = STATX_BASIC_STATS; + else + orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE; ret = 0; out: op_release(new_op); @@ -500,41 +523,6 @@ int orangefs_flush_inode(struct inode *inode) return ret; } -int orangefs_unmount_sb(struct super_block *sb) -{ - int ret = -EINVAL; - struct orangefs_kernel_op_s *new_op = NULL; - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_unmount_sb called on sb %p\n", - sb); - - new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT); - if (!new_op) - return -ENOMEM; - new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id; - new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id; - strncpy(new_op->upcall.req.fs_umount.orangefs_config_server, - ORANGEFS_SB(sb)->devname, - ORANGEFS_MAX_SERVER_ADDR_LEN); - - gossip_debug(GOSSIP_UTILS_DEBUG, - "Attempting ORANGEFS Unmount via host %s\n", - new_op->upcall.req.fs_umount.orangefs_config_server); - - ret = service_operation(new_op, "orangefs_fs_umount", 0); - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_unmount: got return value of %d\n", ret); - if (ret) - sb = ERR_PTR(ret); - else - ORANGEFS_SB(sb)->mount_pending = 1; - - op_release(new_op); - return ret; -} - void orangefs_make_bad_inode(struct inode *inode) { if (is_root_handle(inode)) { diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h index 971307ad69be4b08cd7be8df8b5a213a314a25b7..48bcc1bbe415f1a4f2394fb1b72ab704b2d0a1d3 100644 --- a/fs/orangefs/protocol.h +++ b/fs/orangefs/protocol.h @@ -138,13 +138,8 @@ typedef __s64 ORANGEFS_offset; #define ORANGEFS_G_SGID (1 << 10) #define ORANGEFS_U_SUID (1 << 11) -/* definition taken from stdint.h */ -#define INT32_MAX (2147483647) -#define ORANGEFS_ITERATE_START (INT32_MAX - 1) -#define ORANGEFS_ITERATE_END (INT32_MAX - 2) -#define ORANGEFS_ITERATE_NEXT (INT32_MAX - 3) -#define ORANGEFS_READDIR_START ORANGEFS_ITERATE_START -#define ORANGEFS_READDIR_END ORANGEFS_ITERATE_END +#define ORANGEFS_ITERATE_START 2147483646 +#define ORANGEFS_ITERATE_END 2147483645 #define ORANGEFS_IMMUTABLE_FL FS_IMMUTABLE_FL #define ORANGEFS_APPEND_FL FS_APPEND_FL #define ORANGEFS_NOATIME_FL FS_NOATIME_FL diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 629d8c917fa679886715360fcfe8f6cee1b5a37f..5c7c273e17ec3b32c3c87d313e2c68eb82bdac84 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -376,6 +376,25 @@ static const struct export_operations orangefs_export_ops = { .fh_to_dentry = orangefs_fh_to_dentry, }; +static int orangefs_unmount(int id, __s32 fs_id, const char *devname) +{ + struct orangefs_kernel_op_s *op; + int r; + op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT); + if (!op) + return -ENOMEM; + op->upcall.req.fs_umount.id = id; + op->upcall.req.fs_umount.fs_id = fs_id; + strncpy(op->upcall.req.fs_umount.orangefs_config_server, + devname, ORANGEFS_MAX_SERVER_ADDR_LEN); + r = service_operation(op, "orangefs_fs_umount", 0); + /* Not much to do about an error here. */ + if (r) + gossip_err("orangefs_unmount: service_operation %d\n", r); + op_release(op); + return r; +} + static int orangefs_fill_sb(struct super_block *sb, struct orangefs_fs_mount_response *fs_mount, void *data, int silent) @@ -484,6 +503,8 @@ struct dentry *orangefs_mount(struct file_system_type *fst, if (IS_ERR(sb)) { d = ERR_CAST(sb); + orangefs_unmount(new_op->downcall.resp.fs_mount.id, + new_op->downcall.resp.fs_mount.fs_id, devname); goto free_op; } @@ -539,6 +560,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, free_sb_and_op: /* Will call orangefs_kill_sb with sb not in list. */ ORANGEFS_SB(sb)->no_list = 1; + /* ORANGEFS_VFS_OP_FS_UMOUNT is done by orangefs_kill_sb. */ deactivate_locked_super(sb); free_op: gossip_err("orangefs_mount: mount request failed with %d\n", ret); @@ -554,6 +576,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, void orangefs_kill_sb(struct super_block *sb) { + int r; gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n"); /* provided sb cleanup */ @@ -563,7 +586,10 @@ void orangefs_kill_sb(struct super_block *sb) * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock */ - orangefs_unmount_sb(sb); + r = orangefs_unmount(ORANGEFS_SB(sb)->id, ORANGEFS_SB(sb)->fs_id, + ORANGEFS_SB(sb)->devname); + if (!r) + ORANGEFS_SB(sb)->mount_pending = 1; if (!ORANGEFS_SB(sb)->no_list) { /* remove the sb from our list of orangefs specific sb's */ diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index abcfa3fa9992957006461c202c54380018a17787..61e2ca7fec553d3a694cf80d5e7083da98853015 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -124,7 +124,14 @@ int service_operation(struct orangefs_kernel_op_s *op, gossip_debug(GOSSIP_WAIT_DEBUG, "%s:client core is NOT in service.\n", __func__); - timeout = op_timeout_secs * HZ; + /* + * Don't wait for the userspace component to return if + * the filesystem is being umounted anyway. + */ + if (op->upcall.type == ORANGEFS_VFS_OP_FS_UMOUNT) + timeout = 0; + else + timeout = op_timeout_secs * HZ; } spin_unlock(&orangefs_request_list_lock); diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index 74a81b1daaac8850b68f37bc46110657082dfcc1..237c9c04dc3b49ffcaae3de63a752dbb4891fd0c 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -76,11 +76,8 @@ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) { - gossip_err("Invalid key length (%d)\n", - (int)strlen(name)); + if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; - } fsuid = from_kuid(&init_user_ns, current_fsuid()); fsgid = from_kgid(&init_user_ns, current_fsgid()); @@ -172,6 +169,9 @@ static int orangefs_inode_removexattr(struct inode *inode, const char *name, struct orangefs_kernel_op_s *new_op = NULL; int ret = -ENOMEM; + if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) + return -EINVAL; + down_write(&orangefs_inode->xattr_sem); new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR); if (!new_op) @@ -231,23 +231,13 @@ int orangefs_inode_setxattr(struct inode *inode, const char *name, "%s: name %s, buffer_size %zd\n", __func__, name, size); - if (size >= ORANGEFS_MAX_XATTR_VALUELEN || - flags < 0) { - gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n", - (int)size, - flags); + if (size > ORANGEFS_MAX_XATTR_VALUELEN) + return -EINVAL; + if (strlen(name) > ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; - } internal_flag = convert_to_internal_xattr_flags(flags); - if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) { - gossip_err - ("orangefs_inode_setxattr: bogus key size (%d)\n", - (int)(strlen(name))); - return -EINVAL; - } - /* This is equivalent to a removexattr */ if (size == 0 && value == NULL) { gossip_debug(GOSSIP_XATTR_DEBUG, @@ -358,7 +348,7 @@ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) returned_count = new_op->downcall.resp.listxattr.returned_count; if (returned_count < 0 || - returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) { + returned_count > ORANGEFS_MAX_XATTR_LISTLEN) { gossip_err("%s: impossible value for returned_count:%d:\n", __func__, returned_count); diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 0daac5112f7a32384b5febd39bd8499f31da8c31..c0c9683934b7a7883ab59eb8bbcd412e07385d0b 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,5 +1,6 @@ config OVERLAY_FS tristate "Overlay filesystem support" + select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem and a 'lower' filesystem. When a name exists in both filesystems, the diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 906ea6c93260179c9c4947abe97c35750162c3e3..7a44533f4bbf24134a95bdc030bde5779f28457a 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "overlayfs.h" #include "ovl_entry.h" @@ -232,6 +233,83 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } +static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid) +{ + struct ovl_fh *fh; + int fh_type, fh_len, dwords; + void *buf; + int buflen = MAX_HANDLE_SZ; + + buf = kmalloc(buflen, GFP_TEMPORARY); + if (!buf) + return ERR_PTR(-ENOMEM); + + /* + * We encode a non-connectable file handle for non-dir, because we + * only need to find the lower inode number and we don't want to pay + * the price or reconnecting the dentry. + */ + dwords = buflen >> 2; + fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); + buflen = (dwords << 2); + + fh = ERR_PTR(-EIO); + if (WARN_ON(fh_type < 0) || + WARN_ON(buflen > MAX_HANDLE_SZ) || + WARN_ON(fh_type == FILEID_INVALID)) + goto out; + + BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255); + fh_len = offsetof(struct ovl_fh, fid) + buflen; + fh = kmalloc(fh_len, GFP_KERNEL); + if (!fh) { + fh = ERR_PTR(-ENOMEM); + goto out; + } + + fh->version = OVL_FH_VERSION; + fh->magic = OVL_FH_MAGIC; + fh->type = fh_type; + fh->flags = OVL_FH_FLAG_CPU_ENDIAN; + fh->len = fh_len; + fh->uuid = *uuid; + memcpy(fh->fid, buf, buflen); + +out: + kfree(buf); + return fh; +} + +static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) +{ + struct super_block *sb = lower->d_sb; + uuid_be *uuid = (uuid_be *) &sb->s_uuid; + const struct ovl_fh *fh = NULL; + int err; + + /* + * When lower layer doesn't support export operations store a 'null' fh, + * so we can use the overlay.origin xattr to distignuish between a copy + * up and a pure upper inode. + */ + if (sb->s_export_op && sb->s_export_op->fh_to_dentry && + uuid_be_cmp(*uuid, NULL_UUID_BE)) { + fh = ovl_encode_fh(lower, uuid); + if (IS_ERR(fh)) + return PTR_ERR(fh); + } + + /* + * Do not fail when upper doesn't support xattrs. + */ + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh, + fh ? fh->len : 0, 0); + kfree(fh); + + return err; +} + static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct dentry *dentry, struct path *lowerpath, struct kstat *stat, const char *link, @@ -268,13 +346,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (tmpfile) temp = ovl_do_tmpfile(upperdir, stat->mode); else - temp = ovl_lookup_temp(workdir, dentry); - err = PTR_ERR(temp); - if (IS_ERR(temp)) - goto out1; - + temp = ovl_lookup_temp(workdir); err = 0; - if (!tmpfile) + if (IS_ERR(temp)) { + err = PTR_ERR(temp); + temp = NULL; + } + + if (!err && !tmpfile) err = ovl_create_real(wdir, temp, &cattr, NULL, true); if (new_creds) { @@ -316,6 +395,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (err) goto out_cleanup; + /* + * Store identifier of lower inode in upper inode xattr to + * allow lookup of the copy up origin inode. + */ + err = ovl_set_origin(dentry, lowerpath->dentry, temp); + if (err) + goto out_cleanup; + if (tmpfile) err = ovl_do_link(temp, udir, upper, true); else @@ -372,6 +459,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; + /* Mark parent "impure" because it may now contain non-pure upper */ + err = ovl_set_impure(parent, upperdir); + if (err) + return err; + err = vfs_getattr(&parentpath, &pstat, STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 6515796460dfe170fb33b109feb09c9ac0b17519..a63a71656e9bdaef6ed5cadf8acdb6d8002fe1b6 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) } } -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, struct dentry *whiteout; struct inode *wdir = workdir->d_inode; - whiteout = ovl_lookup_temp(workdir, dentry); + whiteout = ovl_lookup_temp(workdir); if (IS_ERR(whiteout)) return whiteout; @@ -127,45 +127,26 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, return err; } -static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, + int xerr) { int err; - err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); return err; } -static int ovl_dir_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) { - struct dentry *dentry = path->dentry; - int err; - enum ovl_path_type type; - struct path realpath; - const struct cred *old_cred; - - type = ovl_path_real(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); - revert_creds(old_cred); - if (err) - return err; - - stat->dev = dentry->d_sb->s_dev; - stat->ino = dentry->d_inode->i_ino; - /* - * It's probably not worth it to count subdirs to get the - * correct link count. nlink=1 seems to pacify 'find' and - * other utilities. + * Fail with -EIO when trying to create opaque dir and upper doesn't + * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to + * return a specific error for noxattr case. */ - if (OVL_TYPE_MERGE(type)) - stat->nlink = 1; - - return 0; + return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } /* Common operations required to be done after creation of file on upper */ @@ -182,6 +163,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, inc_nlink(inode); } d_instantiate(dentry, inode); + /* Force lookup of new upper hardlink to find its lower */ + if (hardlink) + d_drop(dentry); } static bool ovl_type_merge(struct dentry *dentry) @@ -189,6 +173,11 @@ static bool ovl_type_merge(struct dentry *dentry) return OVL_TYPE_MERGE(ovl_path_type(dentry)); } +static bool ovl_type_origin(struct dentry *dentry) +{ + return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -210,7 +199,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (err) goto out_dput; - if (ovl_type_merge(dentry->d_parent)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } @@ -277,7 +266,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (upper->d_parent->d_inode != udir) goto out_unlock; - opaquedir = ovl_lookup_temp(workdir, dentry); + opaquedir = ovl_lookup_temp(workdir); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out_unlock; @@ -409,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out; - newdentry = ovl_lookup_temp(workdir, dentry); + newdentry = ovl_lookup_temp(workdir); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; @@ -873,18 +862,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, - redirect, strlen(redirect), 0); + err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + OVL_XATTR_REDIRECT, + redirect, strlen(redirect), -EXDEV); if (!err) { spin_lock(&dentry->d_lock); ovl_dentry_set_redirect(dentry, redirect); spin_unlock(&dentry->d_lock); } else { kfree(redirect); - if (err == -EOPNOTSUPP) - ovl_clear_redirect_dir(dentry->d_sb); - else - pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; } @@ -970,6 +957,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); + if (!samedir) { + /* + * When moving a merge dir or non-dir with copy up origin into + * a new parent, we are marking the new parent dir "impure". + * When ovl_iterate() iterates an "impure" upper dir, it will + * lookup the origin inodes of the entries to fill d_ino. + */ + if (ovl_type_origin(old)) { + err = ovl_set_impure(new->d_parent, new_upperdir); + if (err) + goto out_revert_creds; + } + if (!overwrite && ovl_type_origin(new)) { + err = ovl_set_impure(old->d_parent, old_upperdir); + if (err) + goto out_revert_creds; + } + } + trap = lock_rename(new_upperdir, old_upperdir); olddentry = lookup_one_len(old->d_name.name, old_upperdir, @@ -1019,7 +1025,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (!old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque(old, olddentry); + err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); if (err) goto out_dput; } @@ -1027,7 +1033,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(new)) err = ovl_set_redirect(new, samedir); else if (!new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque(new, newdentry); + err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); if (err) goto out_dput; } @@ -1070,7 +1076,7 @@ const struct inode_operations ovl_dir_inode_operations = { .create = ovl_create, .mknod = ovl_mknod, .permission = ovl_permission, - .getattr = ovl_dir_getattr, + .getattr = ovl_getattr, .listxattr = ovl_listxattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f8fe6bf2036df3bd531132877101cb979c1feed0..d613e2c41242a52a6c018f43f9987bdbf461e0bb 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -57,18 +57,78 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) return err; } -static int ovl_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; + enum ovl_path_type type; struct path realpath; const struct cred *old_cred; + bool is_dir = S_ISDIR(dentry->d_inode->i_mode); int err; - ovl_path_real(dentry, &realpath); + type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getattr(&realpath, stat, request_mask, flags); + if (err) + goto out; + + /* + * When all layers are on the same fs, all real inode number are + * unique, so we use the overlay st_dev, which is friendly to du -x. + * + * We also use st_ino of the copy up origin, if we know it. + * This guaranties constant st_dev/st_ino across copy up. + * + * If filesystem supports NFS export ops, this also guaranties + * persistent st_ino across mount cycle. + */ + if (ovl_same_sb(dentry->d_sb)) { + if (OVL_TYPE_ORIGIN(type)) { + struct kstat lowerstat; + u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); + + ovl_path_lower(dentry, &realpath); + err = vfs_getattr(&realpath, &lowerstat, + lowermask, flags); + if (err) + goto out; + + WARN_ON_ONCE(stat->dev != lowerstat.dev); + /* + * Lower hardlinks are broken on copy up to different + * upper files, so we cannot use the lower origin st_ino + * for those different files, even for the same fs case. + */ + if (is_dir || lowerstat.nlink == 1) + stat->ino = lowerstat.ino; + } + stat->dev = dentry->d_sb->s_dev; + } else if (is_dir) { + /* + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino. + * + * Always use the overlay st_dev for directories, so 'find + * -xdev' will scan the entire overlay mount and won't cross the + * overlay mount boundaries. + */ + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; + } + + /* + * It's probably not worth it to count subdirs to get the + * correct link count. nlink=1 seems to pacify 'find' and + * other utilities. + */ + if (is_dir && OVL_TYPE_MERGE(type)) + stat->nlink = 1; + +out: revert_creds(old_cred); + return err; } @@ -180,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name, return res; } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); @@ -203,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { @@ -303,6 +373,41 @@ static const struct inode_operations ovl_symlink_inode_operations = { .update_time = ovl_update_time, }; +/* + * It is possible to stack overlayfs instance on top of another + * overlayfs instance as lower layer. We need to annonate the + * stackable i_mutex locks according to stack level of the super + * block instance. An overlayfs instance can never be in stack + * depth 0 (there is always a real fs below it). An overlayfs + * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. + * + * For example, here is a snip from /proc/lockdep_chains after + * dir_iterate of nested overlayfs: + * + * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) + * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) + * [...] &type->i_mutex_dir_key (stack_depth=0) + */ +#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH + +static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) +{ +#ifdef CONFIG_LOCKDEP + static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; + static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; + + int depth = inode->i_sb->s_stack_depth - 1; + + if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) + depth = 0; + + if (S_ISDIR(inode->i_mode)) + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); + else + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); +#endif +} + static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_ino = get_next_ino(); @@ -312,6 +417,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; #endif + ovl_lockdep_annotate_inode_mutex_key(inode); + switch (mode & S_IFMT) { case S_IFREG: inode->i_op = &ovl_file_inode_operations; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index b8b077821fb03bea9d63b3bf3508039836409e1b..f3136c31e72af24cbb9949449a12d292fc3bf11b 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "overlayfs.h" #include "ovl_entry.h" @@ -81,19 +83,93 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, goto err_free; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static int ovl_acceptable(void *ctx, struct dentry *dentry) +{ + return 1; +} + +static struct dentry *ovl_get_origin(struct dentry *dentry, + struct vfsmount *mnt) { int res; - char val; + struct ovl_fh *fh = NULL; + struct dentry *origin = NULL; + int bytes; - if (!d_is_dir(dentry)) - return false; + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + if (res < 0) { + if (res == -ENODATA || res == -EOPNOTSUPP) + return NULL; + goto fail; + } + /* Zero size value means "copied up but origin unknown" */ + if (res == 0) + return NULL; - res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); - if (res == 1 && val == 'y') - return true; + fh = kzalloc(res, GFP_TEMPORARY); + if (!fh) + return ERR_PTR(-ENOMEM); + + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + if (res < 0) + goto fail; + + if (res < sizeof(struct ovl_fh) || res < fh->len) + goto invalid; + + if (fh->magic != OVL_FH_MAGIC) + goto invalid; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) + goto out; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + goto out; + + bytes = (fh->len - offsetof(struct ovl_fh, fid)); + + /* + * Make sure that the stored uuid matches the uuid of the lower + * layer where file handle will be decoded. + */ + if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid)) + goto out; - return false; + origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, + bytes >> 2, (int)fh->type, + ovl_acceptable, NULL); + if (IS_ERR(origin)) { + /* Treat stale file handle as "origin unknown" */ + if (origin == ERR_PTR(-ESTALE)) + origin = NULL; + goto out; + } + + if (ovl_dentry_weird(origin) || + ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) { + dput(origin); + origin = NULL; + goto invalid; + } + +out: + kfree(fh); + return origin; + +fail: + pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + goto out; +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + goto out; +} + +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); } static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, @@ -192,6 +268,45 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, return 0; } + +static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry, + struct path **stackp, unsigned int *ctrp) +{ + struct super_block *same_sb = ovl_same_sb(dentry->d_sb); + struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; + struct vfsmount *mnt; + struct dentry *origin; + + if (!same_sb || !roe->numlower) + return 0; + + /* + * Since all layers are on the same fs, we use the first layer for + * decoding the file handle. We may get a disconnected dentry, + * which is fine, because we only need to hold the origin inode in + * cache and use its inode number. We may even get a connected dentry, + * that is not under the first layer's root. That is also fine for + * using it's inode number - it's the same as if we held a reference + * to a dentry in first layer that was moved under us. + */ + mnt = roe->lowerstack[0].mnt; + + origin = ovl_get_origin(upperdentry, mnt); + if (IS_ERR_OR_NULL(origin)) + return PTR_ERR(origin); + + BUG_ON(*stackp || *ctrp); + *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); + if (!*stackp) { + dput(origin); + return -ENOMEM; + } + **stackp = (struct path) { .dentry = origin, .mnt = mnt }; + *ctrp = 1; + + return 0; +} + /* * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. @@ -220,11 +335,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, const struct cred *old_cred; struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct ovl_entry *poe = dentry->d_parent->d_fsdata; + struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; unsigned int ctr = 0; struct inode *inode = NULL; bool upperopaque = false; + bool upperimpure = false; char *upperredirect = NULL; struct dentry *this; unsigned int i; @@ -253,15 +370,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = -EREMOTE; goto out; } + if (upperdentry && !d.is_dir) { + BUG_ON(!d.stop || d.redirect); + err = ovl_check_origin(dentry, upperdentry, + &stack, &ctr); + if (err) + goto out; + } if (d.redirect) { upperredirect = kstrdup(d.redirect, GFP_KERNEL); if (!upperredirect) goto out_put_upper; if (d.redirect[0] == '/') - poe = dentry->d_sb->s_root->d_fsdata; + poe = roe; } upperopaque = d.opaque; + if (upperdentry && d.is_dir) + upperimpure = ovl_is_impuredir(upperdentry); } if (!d.stop && poe->numlower) { @@ -290,10 +416,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (d.stop) break; - if (d.redirect && - d.redirect[0] == '/' && - poe != dentry->d_sb->s_root->d_fsdata) { - poe = dentry->d_sb->s_root->d_fsdata; + if (d.redirect && d.redirect[0] == '/' && poe != roe) { + poe = roe; /* Find the current layer on the root dentry */ for (i = 0; i < poe->numlower; i++) @@ -332,6 +456,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, revert_creds(old_cred); oe->opaque = upperopaque; + oe->impure = upperimpure; oe->redirect = upperredirect; oe->__upperdentry = upperdentry; memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 741dc0b6931fe90fc62874989c93e659f7e7a4ef..0623cebeefff8661d49d65a228ceec6290cee877 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -8,18 +8,57 @@ */ #include +#include enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), + __OVL_PATH_ORIGIN = (1 << 2), }; #define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) +#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN) #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" +#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" +#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" + +/* + * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, + * where: + * origin.fh - exported file handle of the lower file + * origin.uuid - uuid of the lower filesystem + */ +#define OVL_FH_VERSION 0 +#define OVL_FH_MAGIC 0xfb + +/* CPU byte order required for fid decoding: */ +#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0) +#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1) + +#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN) + +#if defined(__LITTLE_ENDIAN) +#define OVL_FH_FLAG_CPU_ENDIAN 0 +#elif defined(__BIG_ENDIAN) +#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN +#else +#error Endianness not defined +#endif + +/* On-disk and in-memeory format for redirect by file handle */ +struct ovl_fh { + u8 version; /* 0 */ + u8 magic; /* 0xfb */ + u8 len; /* size of this header + size of fid */ + u8 flags; /* OVL_FH_FLAG_* */ + u8 type; /* fid_type of fid */ + uuid_be uuid; /* uuid of filesystem */ + u8 fid[0]; /* file identifier */ +} __packed; #define OVL_ISUPPER_MASK 1UL @@ -151,6 +190,7 @@ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); +struct super_block *ovl_same_sb(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); @@ -164,10 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); bool ovl_dentry_is_opaque(struct dentry *dentry); +bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); -void ovl_clear_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); @@ -180,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry); void ovl_copy_up_end(struct dentry *dentry); +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); + +static inline bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + /* namei.c */ int ovl_path_next(int idx, struct dentry *dentry, struct path *path); @@ -197,6 +248,8 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, /* inode.c */ int ovl_setattr(struct dentry *dentry, struct iattr *attr); +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); @@ -222,7 +275,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct cattr { dev_t rdev; umode_t mode; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 59614faa14c315fb8f6e5998f91bc7061f818bab..34bc4a9f5c61d95f049b3f34ccd0de243aa27ddd 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -28,7 +28,10 @@ struct ovl_fs { /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; bool tmpfile; + bool noxattr; wait_queue_head_t copyup_wq; + /* sb common to all layers */ + struct super_block *same_sb; }; /* private information held for every overlayfs dentry */ @@ -40,6 +43,7 @@ struct ovl_entry { u64 version; const char *redirect; bool opaque; + bool impure; bool copying; }; struct rcu_head rcu; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index c9e70d39c1ea1cafd730be5caaea6924a29e7c58..4882ffb37baead1c4da41684158d22cbe58e5353 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -49,11 +49,28 @@ static void ovl_dentry_release(struct dentry *dentry) } } +static int ovl_check_append_only(struct inode *inode, int flag) +{ + /* + * This test was moot in vfs may_open() because overlay inode does + * not have the S_APPEND flag, so re-check on real upper inode + */ + if (IS_APPEND(inode)) { + if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) + return -EPERM; + if (flag & O_TRUNC) + return -EPERM; + } + + return 0; +} + static struct dentry *ovl_d_real(struct dentry *dentry, const struct inode *inode, unsigned int open_flags) { struct dentry *real; + int err; if (!d_is_reg(dentry)) { if (!inode || inode == d_inode(dentry)) @@ -65,15 +82,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry, return dentry; if (open_flags) { - int err = ovl_open_maybe_copy_up(dentry, open_flags); - + err = ovl_open_maybe_copy_up(dentry, open_flags); if (err) return ERR_PTR(err); } real = ovl_dentry_upper(dentry); - if (real && (!inode || inode == d_inode(real))) + if (real && (!inode || inode == d_inode(real))) { + if (!inode) { + err = ovl_check_append_only(d_inode(real), open_flags); + if (err) + return ERR_PTR(err); + } return real; + } real = ovl_dentry_lower(dentry); if (!real) @@ -709,8 +731,8 @@ static const struct xattr_handler *ovl_xattr_handlers[] = { static int ovl_fill_super(struct super_block *sb, void *data, int silent) { - struct path upperpath = { NULL, NULL }; - struct path workpath = { NULL, NULL }; + struct path upperpath = { }; + struct path workpath = { }; struct dentry *root_dentry; struct inode *realinode; struct ovl_entry *oe; @@ -869,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) dput(temp); else pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* + * Check if upper/work fs supports trusted.overlay.* + * xattr + */ + err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, + "0", 1, 0); + if (err) { + ufs->noxattr = true; + pr_warn("overlayfs: upper fs does not support xattr.\n"); + } else { + vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); + } } } @@ -892,11 +927,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->lower_mnt[ufs->numlower] = mnt; ufs->numlower++; + + /* Check if all lower layers are on same sb */ + if (i == 0) + ufs->same_sb = mnt->mnt_sb; + else if (ufs->same_sb != mnt->mnt_sb) + ufs->same_sb = NULL; } /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; + else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) + ufs->same_sb = NULL; if (remote) sb->s_d_op = &ovl_reval_dentry_operations; @@ -931,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) path_put(&workpath); kfree(lowertmp); - oe->__upperdentry = upperpath.dentry; + if (upperpath.dentry) { + oe->__upperdentry = upperpath.dentry; + oe->impure = ovl_is_impuredir(upperpath.dentry); + } for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 6e610a205e1556477ba80e512f1243629c193141..809048913889189d083339d1d015ef4cad2af035 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -40,6 +40,13 @@ const struct cred *ovl_override_creds(struct super_block *sb) return override_creds(ofs->creator_cred); } +struct super_block *ovl_same_sb(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->same_sb; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -75,11 +82,13 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) type = __OVL_PATH_UPPER; /* - * Non-dir dentry can hold lower dentry from previous - * location. + * Non-dir dentry can hold lower dentry of its copy up origin. */ - if (oe->numlower && d_is_dir(dentry)) - type |= __OVL_PATH_MERGE; + if (oe->numlower) { + type |= __OVL_PATH_ORIGIN; + if (d_is_dir(dentry)) + type |= __OVL_PATH_MERGE; + } } else { if (oe->numlower > 1) type |= __OVL_PATH_MERGE; @@ -100,7 +109,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path) { struct ovl_entry *oe = dentry->d_fsdata; - *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; + *path = oe->numlower ? oe->lowerstack[0] : (struct path) { }; } enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) @@ -166,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry) return oe->opaque; } +bool ovl_dentry_is_impure(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->impure; +} + bool ovl_dentry_is_whiteout(struct dentry *dentry) { return !dentry->d_inode && ovl_dentry_is_opaque(dentry); @@ -182,14 +198,7 @@ bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; - return ofs->config.redirect_dir; -} - -void ovl_clear_redirect_dir(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - ofs->config.redirect_dir = false; + return ofs->config.redirect_dir && !ofs->noxattr; } const char *ovl_dentry_get_redirect(struct dentry *dentry) @@ -294,3 +303,59 @@ void ovl_copy_up_end(struct dentry *dentry) wake_up_locked(&ofs->copyup_wq); spin_unlock(&ofs->copyup_wq.lock); } + +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) +{ + int res; + char val; + + if (!d_is_dir(dentry)) + return false; + + res = vfs_getxattr(dentry, name, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr) +{ + int err; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (ofs->noxattr) + return xerr; + + err = ovl_do_setxattr(upperdentry, name, value, size, 0); + + if (err == -EOPNOTSUPP) { + pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + ofs->noxattr = true; + return xerr; + } + + return err; +} + +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + struct ovl_entry *oe = dentry->d_fsdata; + + if (oe->impure) + return 0; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + oe->impure = true; + + return err; +} diff --git a/fs/proc/base.c b/fs/proc/base.c index c87b6b9a8a76b0a11078a391b0f6e4772cd1941e..f1e1927ccd484e7372fe2a38db7455468bbf06e8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -821,10 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!mmget_not_zero(mm)) goto free; - /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ - flags = FOLL_FORCE; - if (write) - flags |= FOLL_WRITE; + flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -2834,6 +2831,15 @@ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, return err; } +#ifdef CONFIG_LIVEPATCH +static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + seq_printf(m, "%d\n", task->patch_state); + return 0; +} +#endif /* CONFIG_LIVEPATCH */ + /* * Thread groups */ @@ -2933,6 +2939,9 @@ static const struct pid_entry tgid_base_stuff[] = { REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), +#ifdef CONFIG_LIVEPATCH + ONE("patch_state", S_IRUSR, proc_pid_patch_state), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3315,6 +3324,9 @@ static const struct pid_entry tid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif +#ifdef CONFIG_LIVEPATCH + ONE("patch_state", S_IRUSR, proc_pid_patch_state), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index ee27feb34cf4d50fc867dc138198c2da9cd7d9ef..9425c0d97262b548d8c8fbf7ce222cbad6e1ef58 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -472,6 +472,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->data = NULL; ent->proc_fops = NULL; ent->proc_iops = NULL; + parent->nlink++; if (proc_register(parent, ent) < 0) { kfree(ent); parent->nlink--; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 2cc7a8030275c1bc98a0da9adb870b33d99f5433..e250910cffc8fd54af942e8009da413ce76093ac 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -58,7 +58,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) struct proc_inode *ei; struct inode *inode; - ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->pid = NULL; diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 766f0c637ad1b429c83749ce01a48f56b5863899..3803b24ca220a4dc6cd02b4637e7576a071a9f98 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -23,6 +23,7 @@ static const struct proc_ns_operations *ns_entries[] = { #endif #ifdef CONFIG_PID_NS &pidns_operations, + &pidns_for_children_operations, #endif #ifdef CONFIG_USER_NS &userns_operations, diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d04ea43499096e4fc02eec0f363683e3713f65ba..67985a7233c24023e9810e25057e2a28ae7874e0 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -408,10 +408,6 @@ static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentr *pentry = entry; } -void register_sysctl_root(struct ctl_table_root *root) -{ -} - /* * sysctl_perm does NOT grant the superuser all rights automatically, because * some sysctl variables are readonly even to root. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 312578089544dbd652aac303d0cbabee8fbcb968..520802da059c25655eda0c1a421e963007b65ed7 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (stack_guard_page_start(vma, start)) - start += PAGE_SIZE; end = vma->vm_end; - if (stack_guard_page_end(vma, end)) - end -= PAGE_SIZE; seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", @@ -441,6 +437,7 @@ struct mem_size_stats { unsigned long private_dirty; unsigned long referenced; unsigned long anonymous; + unsigned long lazyfree; unsigned long anonymous_thp; unsigned long shmem_thp; unsigned long swap; @@ -457,8 +454,11 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, int i, nr = compound ? 1 << compound_order(page) : 1; unsigned long size = nr * PAGE_SIZE; - if (PageAnon(page)) + if (PageAnon(page)) { mss->anonymous += size; + if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) + mss->lazyfree += size; + } mss->resident += size; /* Accumulate the size in pages that have been accessed. */ @@ -771,6 +771,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" "Anonymous: %8lu kB\n" + "LazyFree: %8lu kB\n" "AnonHugePages: %8lu kB\n" "ShmemPmdMapped: %8lu kB\n" "Shared_Hugetlb: %8lu kB\n" @@ -789,6 +790,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) mss.private_dirty >> 10, mss.referenced >> 10, mss.anonymous >> 10, + mss.lazyfree >> 10, mss.anonymous_thp >> 10, mss.shmem_thp >> 10, mss.shared_hugetlb >> 10, diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 899d0ba0bd6cca44593f25be751efbe52a507d31..06aab07b6bb71bdabd8fa794c39a7b11cb8cb0a0 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -37,6 +37,12 @@ static void notrace pstore_ftrace_call(unsigned long ip, { unsigned long flags; struct pstore_ftrace_record rec = {}; + struct pstore_record record = { + .type = PSTORE_TYPE_FTRACE, + .buf = (char *)&rec, + .size = sizeof(rec), + .psi = psinfo, + }; if (unlikely(oops_in_progress)) return; @@ -47,8 +53,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, rec.parent_ip = parent_ip; pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++); pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); - psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, - 0, sizeof(rec), psinfo); + psinfo->write(&record); local_irq_restore(flags); } @@ -117,7 +122,7 @@ void pstore_register_ftrace(void) { struct dentry *file; - if (!psinfo->write_buf) + if (!psinfo->write) return; pstore_ftrace_dir = debugfs_create_dir("pstore", NULL); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 57c0646479f51cc0baa9bd82ba3cbb5689f41696..792a4e5f9226d16fefa0b6fd47061a76c79416ba 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -47,12 +47,8 @@ static LIST_HEAD(allpstore); struct pstore_private { struct list_head list; - struct pstore_info *psi; - enum pstore_type_id type; - u64 id; - int count; - ssize_t size; - char data[]; + struct pstore_record *record; + size_t total_size; }; struct pstore_ftrace_seq_data { @@ -63,6 +59,17 @@ struct pstore_ftrace_seq_data { #define REC_SIZE sizeof(struct pstore_ftrace_record) +static void free_pstore_private(struct pstore_private *private) +{ + if (!private) + return; + if (private->record) { + kfree(private->record->buf); + kfree(private->record); + } + kfree(private); +} + static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) { struct pstore_private *ps = s->private; @@ -72,9 +79,9 @@ static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos) if (!data) return NULL; - data->off = ps->size % REC_SIZE; + data->off = ps->total_size % REC_SIZE; data->off += *pos * REC_SIZE; - if (data->off + REC_SIZE > ps->size) { + if (data->off + REC_SIZE > ps->total_size) { kfree(data); return NULL; } @@ -94,7 +101,7 @@ static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos) struct pstore_ftrace_seq_data *data = v; data->off += REC_SIZE; - if (data->off + REC_SIZE > ps->size) + if (data->off + REC_SIZE > ps->total_size) return NULL; (*pos)++; @@ -105,7 +112,9 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v) { struct pstore_private *ps = s->private; struct pstore_ftrace_seq_data *data = v; - struct pstore_ftrace_record *rec = (void *)(ps->data + data->off); + struct pstore_ftrace_record *rec; + + rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off); seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n", pstore_ftrace_decode_cpu(rec), @@ -125,7 +134,7 @@ static const struct seq_operations pstore_ftrace_seq_ops = { static int pstore_check_syslog_permissions(struct pstore_private *ps) { - switch (ps->type) { + switch (ps->record->type) { case PSTORE_TYPE_DMESG: case PSTORE_TYPE_CONSOLE: return check_syslog_permissions(SYSLOG_ACTION_READ_ALL, @@ -141,9 +150,10 @@ static ssize_t pstore_file_read(struct file *file, char __user *userbuf, struct seq_file *sf = file->private_data; struct pstore_private *ps = sf->private; - if (ps->type == PSTORE_TYPE_FTRACE) + if (ps->record->type == PSTORE_TYPE_FTRACE) return seq_read(file, userbuf, count, ppos); - return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size); + return simple_read_from_buffer(userbuf, count, ppos, + ps->record->buf, ps->total_size); } static int pstore_file_open(struct inode *inode, struct file *file) @@ -157,7 +167,7 @@ static int pstore_file_open(struct inode *inode, struct file *file) if (err) return err; - if (ps->type == PSTORE_TYPE_FTRACE) + if (ps->record->type == PSTORE_TYPE_FTRACE) sops = &pstore_ftrace_seq_ops; err = seq_open(file, sops); @@ -193,20 +203,19 @@ static const struct file_operations pstore_file_operations = { static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; + struct pstore_record *record = p->record; int err; err = pstore_check_syslog_permissions(p); if (err) return err; - if (p->psi->erase) { - mutex_lock(&p->psi->read_mutex); - p->psi->erase(p->type, p->id, p->count, - d_inode(dentry)->i_ctime, p->psi); - mutex_unlock(&p->psi->read_mutex); - } else { + if (!record->psi->erase) return -EPERM; - } + + mutex_lock(&record->psi->read_mutex); + record->psi->erase(record); + mutex_unlock(&record->psi->read_mutex); return simple_unlink(dir, dentry); } @@ -221,7 +230,7 @@ static void pstore_evict_inode(struct inode *inode) spin_lock_irqsave(&allpstore_lock, flags); list_del(&p->list); spin_unlock_irqrestore(&allpstore_lock, flags); - kfree(p); + free_pstore_private(p); } } @@ -302,23 +311,23 @@ bool pstore_is_mounted(void) * Load it up with "size" bytes of data from "buf". * Set the mtime & ctime to the date that this record was originally stored. */ -int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, - char *data, bool compressed, size_t size, - struct timespec time, struct pstore_info *psi) +int pstore_mkfile(struct dentry *root, struct pstore_record *record) { - struct dentry *root = pstore_sb->s_root; struct dentry *dentry; struct inode *inode; int rc = 0; char name[PSTORE_NAMELEN]; struct pstore_private *private, *pos; unsigned long flags; + size_t size = record->size + record->ecc_notice_size; + + WARN_ON(!inode_is_locked(d_inode(root))); spin_lock_irqsave(&allpstore_lock, flags); list_for_each_entry(pos, &allpstore, list) { - if (pos->type == type && - pos->id == id && - pos->psi == psi) { + if (pos->record->type == record->type && + pos->record->id == record->id && + pos->record->psi == record->psi) { rc = -EEXIST; break; } @@ -328,72 +337,74 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, return rc; rc = -ENOMEM; - inode = pstore_get_inode(pstore_sb); + inode = pstore_get_inode(root->d_sb); if (!inode) goto fail; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; - private = kmalloc(sizeof *private + size, GFP_KERNEL); + private = kzalloc(sizeof(*private), GFP_KERNEL); if (!private) goto fail_alloc; - private->type = type; - private->id = id; - private->count = count; - private->psi = psi; + private->record = record; - switch (type) { + switch (record->type) { case PSTORE_TYPE_DMESG: scnprintf(name, sizeof(name), "dmesg-%s-%lld%s", - psname, id, compressed ? ".enc.z" : ""); + record->psi->name, record->id, + record->compressed ? ".enc.z" : ""); break; case PSTORE_TYPE_CONSOLE: - scnprintf(name, sizeof(name), "console-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "console-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_FTRACE: - scnprintf(name, sizeof(name), "ftrace-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "ftrace-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_MCE: - scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "mce-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_RTAS: - scnprintf(name, sizeof(name), "rtas-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "rtas-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_OF: scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld", - psname, id); + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_COMMON: scnprintf(name, sizeof(name), "powerpc-common-%s-%lld", - psname, id); + record->psi->name, record->id); break; case PSTORE_TYPE_PMSG: - scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "pmsg-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_PPC_OPAL: - sprintf(name, "powerpc-opal-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "powerpc-opal-%s-%lld", + record->psi->name, record->id); break; case PSTORE_TYPE_UNKNOWN: - scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "unknown-%s-%lld", + record->psi->name, record->id); break; default: scnprintf(name, sizeof(name), "type%d-%s-%lld", - type, psname, id); + record->type, record->psi->name, record->id); break; } - inode_lock(d_inode(root)); - dentry = d_alloc_name(root, name); if (!dentry) - goto fail_lockedalloc; + goto fail_private; - memcpy(private->data, data, size); - inode->i_size = private->size = size; + inode->i_size = private->total_size = size; inode->i_private = private; - if (time.tv_sec) - inode->i_mtime = inode->i_ctime = time; + if (record->time.tv_sec) + inode->i_mtime = inode->i_ctime = record->time; d_add(dentry, inode); @@ -401,13 +412,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, list_add(&private->list, &allpstore); spin_unlock_irqrestore(&allpstore_lock, flags); - inode_unlock(d_inode(root)); - return 0; -fail_lockedalloc: - inode_unlock(d_inode(root)); - kfree(private); +fail_private: + free_pstore_private(private); fail_alloc: iput(inode); @@ -415,6 +423,27 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, return rc; } +/* + * Read all the records from the persistent store. Create + * files in our filesystem. Don't warn about -EEXIST errors + * when we are re-scanning the backing store looking to add new + * error records. + */ +void pstore_get_records(int quiet) +{ + struct pstore_info *psi = psinfo; + struct dentry *root; + + if (!psi || !pstore_sb) + return; + + root = pstore_sb->s_root; + + inode_lock(d_inode(root)); + pstore_get_backend_records(psi, root, quiet); + inode_unlock(d_inode(root)); +} + static int pstore_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index da416e6591c9d91cd937b36438e31a51bde6bfa4..c416e653dc4f59d34a261ee2a617d82850067587 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -25,10 +25,10 @@ extern struct pstore_info *psinfo; extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(int); -extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, - int count, char *data, bool compressed, - size_t size, struct timespec time, - struct pstore_info *psi); +extern void pstore_get_backend_records(struct pstore_info *psi, + struct dentry *root, int quiet); +extern int pstore_mkfile(struct dentry *root, + struct pstore_record *record); extern bool pstore_is_mounted(void); #endif diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index efab7b64925ba76e2aa0a2a39d9e659c0a94472e..d468eec9b8a6ba9aa574055c0e76c615e2ede9d0 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -267,7 +267,7 @@ static void free_zlib(void) big_oops_buf_sz = 0; } -static struct pstore_zbackend backend_zlib = { +static const struct pstore_zbackend backend_zlib = { .compress = compress_zlib, .decompress = decompress_zlib, .allocate = allocate_zlib, @@ -328,7 +328,7 @@ static void free_lzo(void) big_oops_buf_sz = 0; } -static struct pstore_zbackend backend_lzo = { +static const struct pstore_zbackend backend_lzo = { .compress = compress_lzo, .decompress = decompress_lzo, .allocate = allocate_lzo, @@ -393,7 +393,7 @@ static void free_lz4(void) big_oops_buf_sz = 0; } -static struct pstore_zbackend backend_lz4 = { +static const struct pstore_zbackend backend_lz4 = { .compress = compress_lz4, .decompress = decompress_lz4, .allocate = allocate_lz4, @@ -402,7 +402,7 @@ static struct pstore_zbackend backend_lz4 = { }; #endif -static struct pstore_zbackend *zbackend = +static const struct pstore_zbackend *zbackend = #if defined(CONFIG_PSTORE_ZLIB_COMPRESS) &backend_zlib; #elif defined(CONFIG_PSTORE_LZO_COMPRESS) @@ -484,7 +484,6 @@ static void pstore_dump(struct kmsg_dumper *dumper, { unsigned long total = 0; const char *why; - u64 id; unsigned int part = 1; unsigned long flags = 0; int is_locked; @@ -506,48 +505,59 @@ static void pstore_dump(struct kmsg_dumper *dumper, oopscount++; while (total < kmsg_bytes) { char *dst; - unsigned long size; - int hsize; + size_t dst_size; + int header_size; int zipped_len = -1; - size_t len; - bool compressed = false; - size_t total_len; + size_t dump_size; + struct pstore_record record = { + .type = PSTORE_TYPE_DMESG, + .count = oopscount, + .reason = reason, + .part = part, + .compressed = false, + .buf = psinfo->buf, + .psi = psinfo, + }; if (big_oops_buf && is_locked) { dst = big_oops_buf; - size = big_oops_buf_sz; + dst_size = big_oops_buf_sz; } else { dst = psinfo->buf; - size = psinfo->bufsize; + dst_size = psinfo->bufsize; } - hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount, part); - size -= hsize; + /* Write dump header. */ + header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why, + oopscount, part); + dst_size -= header_size; - if (!kmsg_dump_get_buffer(dumper, true, dst + hsize, - size, &len)) + /* Write dump contents. */ + if (!kmsg_dump_get_buffer(dumper, true, dst + header_size, + dst_size, &dump_size)) break; if (big_oops_buf && is_locked) { zipped_len = pstore_compress(dst, psinfo->buf, - hsize + len, psinfo->bufsize); + header_size + dump_size, + psinfo->bufsize); if (zipped_len > 0) { - compressed = true; - total_len = zipped_len; + record.compressed = true; + record.size = zipped_len; } else { - total_len = copy_kmsg_to_buffer(hsize, len); + record.size = copy_kmsg_to_buffer(header_size, + dump_size); } } else { - total_len = hsize + len; + record.size = header_size + dump_size; } - ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, - oopscount, compressed, total_len, psinfo); + ret = psinfo->write(&record); if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_new_entry = 1; - total += total_len; + total += record.size; part++; } if (is_locked) @@ -577,8 +587,11 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) const char *e = s + c; while (s < e) { + struct pstore_record record = { + .type = PSTORE_TYPE_CONSOLE, + .psi = psinfo, + }; unsigned long flags; - u64 id; if (c > psinfo->bufsize) c = psinfo->bufsize; @@ -589,8 +602,9 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } else { spin_lock_irqsave(&psinfo->buf_lock, flags); } - psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0, - s, 0, c, psinfo); + record.buf = (char *)s; + record.size = c; + psinfo->write(&record); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; @@ -618,48 +632,30 @@ static void pstore_register_console(void) {} static void pstore_unregister_console(void) {} #endif -static int pstore_write_compat(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, int count, - bool compressed, size_t size, - struct pstore_info *psi) -{ - return psi->write_buf(type, reason, id, part, psinfo->buf, compressed, - size, psi); -} - -static int pstore_write_buf_user_compat(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char __user *buf, - bool compressed, size_t size, - struct pstore_info *psi) -{ - unsigned long flags = 0; - size_t i, bufsize = size; - long ret = 0; - - if (unlikely(!access_ok(VERIFY_READ, buf, size))) - return -EFAULT; - if (bufsize > psinfo->bufsize) - bufsize = psinfo->bufsize; - spin_lock_irqsave(&psinfo->buf_lock, flags); - for (i = 0; i < size; ) { - size_t c = min(size - i, bufsize); - - ret = __copy_from_user(psinfo->buf, buf + i, c); - if (unlikely(ret != 0)) { - ret = -EFAULT; - break; - } - ret = psi->write_buf(type, reason, id, part, psinfo->buf, - compressed, c, psi); - if (unlikely(ret < 0)) - break; - i += c; +static int pstore_write_user_compat(struct pstore_record *record, + const char __user *buf) +{ + int ret = 0; + + if (record->buf) + return -EINVAL; + + record->buf = kmalloc(record->size, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + + if (unlikely(copy_from_user(record->buf, buf, record->size))) { + ret = -EFAULT; + goto out; } - spin_unlock_irqrestore(&psinfo->buf_lock, flags); - return unlikely(ret < 0) ? ret : size; + + ret = record->psi->write(record); + +out: + kfree(record->buf); + record->buf = NULL; + + return unlikely(ret < 0) ? ret : record->size; } /* @@ -673,19 +669,35 @@ int pstore_register(struct pstore_info *psi) { struct module *owner = psi->owner; - if (backend && strcmp(backend, psi->name)) + if (backend && strcmp(backend, psi->name)) { + pr_warn("ignoring unexpected backend '%s'\n", psi->name); return -EPERM; + } + + /* Sanity check flags. */ + if (!psi->flags) { + pr_warn("backend '%s' must support at least one frontend\n", + psi->name); + return -EINVAL; + } + + /* Check for required functions. */ + if (!psi->read || !psi->write) { + pr_warn("backend '%s' must implement read() and write()\n", + psi->name); + return -EINVAL; + } spin_lock(&pstore_lock); if (psinfo) { + pr_warn("backend '%s' already loaded: ignoring '%s'\n", + psinfo->name, psi->name); spin_unlock(&pstore_lock); return -EBUSY; } - if (!psi->write) - psi->write = pstore_write_compat; - if (!psi->write_buf_user) - psi->write_buf_user = pstore_write_buf_user_compat; + if (!psi->write_user) + psi->write_user = pstore_write_user_compat; psinfo = psi; mutex_init(&psinfo->read_mutex); spin_unlock(&pstore_lock); @@ -709,6 +721,7 @@ int pstore_register(struct pstore_info *psi) if (psi->flags & PSTORE_FLAGS_PMSG) pstore_register_pmsg(); + /* Start watching for new records, if desired. */ if (pstore_update_ms >= 0) { pstore_timer.expires = jiffies + msecs_to_jiffies(pstore_update_ms); @@ -721,16 +734,21 @@ int pstore_register(struct pstore_info *psi) */ backend = psi->name; - module_put(owner); - pr_info("Registered %s as persistent store backend\n", psi->name); + module_put(owner); + return 0; } EXPORT_SYMBOL_GPL(pstore_register); void pstore_unregister(struct pstore_info *psi) { + /* Stop timer and make sure all work has finished. */ + pstore_update_ms = -1; + del_timer_sync(&pstore_timer); + flush_work(&pstore_work); + if (psi->flags & PSTORE_FLAGS_PMSG) pstore_unregister_pmsg(); if (psi->flags & PSTORE_FLAGS_FTRACE) @@ -747,66 +765,99 @@ void pstore_unregister(struct pstore_info *psi) } EXPORT_SYMBOL_GPL(pstore_unregister); +static void decompress_record(struct pstore_record *record) +{ + int unzipped_len; + char *decompressed; + + /* Only PSTORE_TYPE_DMESG support compression. */ + if (!record->compressed || record->type != PSTORE_TYPE_DMESG) { + pr_warn("ignored compressed record type %d\n", record->type); + return; + } + + /* No compression method has created the common buffer. */ + if (!big_oops_buf) { + pr_warn("no decompression buffer allocated\n"); + return; + } + + unzipped_len = pstore_decompress(record->buf, big_oops_buf, + record->size, big_oops_buf_sz); + if (unzipped_len <= 0) { + pr_err("decompression failed: %d\n", unzipped_len); + return; + } + + /* Build new buffer for decompressed contents. */ + decompressed = kmalloc(unzipped_len + record->ecc_notice_size, + GFP_KERNEL); + if (!decompressed) { + pr_err("decompression ran out of memory\n"); + return; + } + memcpy(decompressed, big_oops_buf, unzipped_len); + + /* Append ECC notice to decompressed buffer. */ + memcpy(decompressed + unzipped_len, record->buf + record->size, + record->ecc_notice_size); + + /* Swap out compresed contents with decompressed contents. */ + kfree(record->buf); + record->buf = decompressed; + record->size = unzipped_len; + record->compressed = false; +} + /* - * Read all the records from the persistent store. Create + * Read all the records from one persistent store backend. Create * files in our filesystem. Don't warn about -EEXIST errors * when we are re-scanning the backing store looking to add new * error records. */ -void pstore_get_records(int quiet) -{ - struct pstore_info *psi = psinfo; - char *buf = NULL; - ssize_t size; - u64 id; - int count; - enum pstore_type_id type; - struct timespec time; - int failed = 0, rc; - bool compressed; - int unzipped_len = -1; - ssize_t ecc_notice_size = 0; - - if (!psi) +void pstore_get_backend_records(struct pstore_info *psi, + struct dentry *root, int quiet) +{ + int failed = 0; + + if (!psi || !root) return; mutex_lock(&psi->read_mutex); if (psi->open && psi->open(psi)) goto out; - while ((size = psi->read(&id, &type, &count, &time, &buf, &compressed, - &ecc_notice_size, psi)) > 0) { - if (compressed && (type == PSTORE_TYPE_DMESG)) { - if (big_oops_buf) - unzipped_len = pstore_decompress(buf, - big_oops_buf, size, - big_oops_buf_sz); - - if (unzipped_len > 0) { - if (ecc_notice_size) - memcpy(big_oops_buf + unzipped_len, - buf + size, ecc_notice_size); - kfree(buf); - buf = big_oops_buf; - size = unzipped_len; - compressed = false; - } else { - pr_err("decompression failed;returned %d\n", - unzipped_len); - compressed = true; - } + /* + * Backend callback read() allocates record.buf. decompress_record() + * may reallocate record.buf. On success, pstore_mkfile() will keep + * the record.buf, so free it only on failure. + */ + for (;;) { + struct pstore_record *record; + int rc; + + record = kzalloc(sizeof(*record), GFP_KERNEL); + if (!record) { + pr_err("out of memory creating record\n"); + break; + } + record->psi = psi; + + record->size = psi->read(record); + + /* No more records left in backend? */ + if (record->size <= 0) + break; + + decompress_record(record); + rc = pstore_mkfile(root, record); + if (rc) { + /* pstore_mkfile() did not take record, so free it. */ + kfree(record->buf); + kfree(record); + if (rc != -EEXIST || !quiet) + failed++; } - rc = pstore_mkfile(type, psi->name, id, count, buf, - compressed, size + ecc_notice_size, - time, psi); - if (unzipped_len < 0) { - /* Free buffer other than big oops */ - kfree(buf); - buf = NULL; - } else - unzipped_len = -1; - if (rc && (rc != -EEXIST || !quiet)) - failed++; } if (psi->close) psi->close(psi); @@ -830,7 +881,9 @@ static void pstore_timefunc(unsigned long dummy) schedule_work(&pstore_work); } - mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); + if (pstore_update_ms >= 0) + mod_timer(&pstore_timer, + jiffies + msecs_to_jiffies(pstore_update_ms)); } module_param(backend, charp, 0444); diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index 78f6176c020f8256dad089221af85d9560e5ac2f..209755e0d7c8ee92e3f19ff8fd4b45b6a9a9d1fa 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -15,7 +15,6 @@ #include #include #include -#include #include "internal.h" static DEFINE_MUTEX(pmsg_lock); @@ -23,19 +22,22 @@ static DEFINE_MUTEX(pmsg_lock); static ssize_t write_pmsg(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - u64 id; + struct pstore_record record = { + .type = PSTORE_TYPE_PMSG, + .size = count, + .psi = psinfo, + }; int ret; if (!count) return 0; - /* check outside lock, page in any data. write_buf_user also checks */ + /* check outside lock, page in any data. write_user also checks */ if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; mutex_lock(&pmsg_lock); - ret = psinfo->write_buf_user(PSTORE_TYPE_PMSG, 0, &id, 0, buf, 0, count, - psinfo); + ret = psinfo->write_user(&record, buf); mutex_unlock(&pmsg_lock); return ret ? ret : count; } diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 11f918d34b1e50556fda04f6f1f50e62f5dc5e3c..5cb022c8cd33f23b566721627dc25dd0263da3a8 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -58,7 +58,7 @@ module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400); MODULE_PARM_DESC(pmsg_size, "size of user space message log"); static unsigned long long mem_address; -module_param(mem_address, ullong, 0400); +module_param_hw(mem_address, ullong, other, 0400); MODULE_PARM_DESC(mem_address, "start of reserved RAM used to store oops/panic logs"); @@ -235,35 +235,34 @@ static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, return 0; } -static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, - int *count, struct timespec *time, - char **buf, bool *compressed, - ssize_t *ecc_notice_size, - struct pstore_info *psi) +static ssize_t ramoops_pstore_read(struct pstore_record *record) { ssize_t size = 0; - struct ramoops_context *cxt = psi->data; + struct ramoops_context *cxt = record->psi->data; struct persistent_ram_zone *prz = NULL; int header_length = 0; bool free_prz = false; - /* Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but + /* + * Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but * PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have * valid time stamps, so it is initialized to zero. */ - time->tv_sec = 0; - time->tv_nsec = 0; - *compressed = false; + record->time.tv_sec = 0; + record->time.tv_nsec = 0; + record->compressed = false; /* Find the next valid persistent_ram_zone for DMESG */ while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) { prz = ramoops_get_next_prz(cxt->dprzs, &cxt->dump_read_cnt, - cxt->max_dump_cnt, id, type, + cxt->max_dump_cnt, &record->id, + &record->type, PSTORE_TYPE_DMESG, 1); if (!prz_ok(prz)) continue; header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), - time, compressed); + &record->time, + &record->compressed); /* Clear and skip this DMESG record if it has no valid header */ if (!header_length) { persistent_ram_free_old(prz); @@ -274,18 +273,20 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, - 1, id, type, PSTORE_TYPE_CONSOLE, 0); + 1, &record->id, &record->type, + PSTORE_TYPE_CONSOLE, 0); if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, - 1, id, type, PSTORE_TYPE_PMSG, 0); + 1, &record->id, &record->type, + PSTORE_TYPE_PMSG, 0); /* ftrace is last since it may want to dynamically allocate memory. */ if (!prz_ok(prz)) { if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) { prz = ramoops_get_next_prz(cxt->fprzs, - &cxt->ftrace_read_cnt, 1, id, type, - PSTORE_TYPE_FTRACE, 0); + &cxt->ftrace_read_cnt, 1, &record->id, + &record->type, PSTORE_TYPE_FTRACE, 0); } else { /* * Build a new dummy record which combines all the @@ -302,8 +303,10 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { prz_next = ramoops_get_next_prz(cxt->fprzs, &cxt->ftrace_read_cnt, - cxt->max_ftrace_cnt, id, - type, PSTORE_TYPE_FTRACE, 0); + cxt->max_ftrace_cnt, + &record->id, + &record->type, + PSTORE_TYPE_FTRACE, 0); if (!prz_ok(prz_next)) continue; @@ -316,7 +319,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (size) goto out; } - *id = 0; + record->id = 0; prz = tmp_prz; } } @@ -329,17 +332,19 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, size = persistent_ram_old_size(prz) - header_length; /* ECC correction notice */ - *ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); + record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); - *buf = kmalloc(size + *ecc_notice_size + 1, GFP_KERNEL); - if (*buf == NULL) { + record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL); + if (record->buf == NULL) { size = -ENOMEM; goto out; } - memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size); + memcpy(record->buf, (char *)persistent_ram_old(prz) + header_length, + size); - persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1); + persistent_ram_ecc_string(prz, record->buf + size, + record->ecc_notice_size + 1); out: if (free_prz) { @@ -373,23 +378,18 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz, return len; } -static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char *buf, - bool compressed, size_t size, - struct pstore_info *psi) +static int notrace ramoops_pstore_write(struct pstore_record *record) { - struct ramoops_context *cxt = psi->data; + struct ramoops_context *cxt = record->psi->data; struct persistent_ram_zone *prz; - size_t hlen; + size_t size, hlen; - if (type == PSTORE_TYPE_CONSOLE) { + if (record->type == PSTORE_TYPE_CONSOLE) { if (!cxt->cprz) return -ENOMEM; - persistent_ram_write(cxt->cprz, buf, size); + persistent_ram_write(cxt->cprz, record->buf, record->size); return 0; - } else if (type == PSTORE_TYPE_FTRACE) { + } else if (record->type == PSTORE_TYPE_FTRACE) { int zonenum; if (!cxt->fprzs) @@ -402,33 +402,36 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, else zonenum = 0; - persistent_ram_write(cxt->fprzs[zonenum], buf, size); + persistent_ram_write(cxt->fprzs[zonenum], record->buf, + record->size); return 0; - } else if (type == PSTORE_TYPE_PMSG) { + } else if (record->type == PSTORE_TYPE_PMSG) { pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__); return -EINVAL; } - if (type != PSTORE_TYPE_DMESG) + if (record->type != PSTORE_TYPE_DMESG) return -EINVAL; - /* Out of the various dmesg dump types, ramoops is currently designed + /* + * Out of the various dmesg dump types, ramoops is currently designed * to only store crash logs, rather than storing general kernel logs. */ - if (reason != KMSG_DUMP_OOPS && - reason != KMSG_DUMP_PANIC) + if (record->reason != KMSG_DUMP_OOPS && + record->reason != KMSG_DUMP_PANIC) return -EINVAL; /* Skip Oopes when configured to do so. */ - if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops) + if (record->reason == KMSG_DUMP_OOPS && !cxt->dump_oops) return -EINVAL; - /* Explicitly only take the first part of any new crash. + /* + * Explicitly only take the first part of any new crash. * If our buffer is larger than kmsg_bytes, this can never happen, * and if our buffer is smaller than kmsg_bytes, we don't want the * report split across multiple records. */ - if (part != 1) + if (record->part != 1) return -ENOSPC; if (!cxt->dprzs) @@ -436,53 +439,50 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, prz = cxt->dprzs[cxt->dump_write_cnt]; - hlen = ramoops_write_kmsg_hdr(prz, compressed); + /* Build header and append record contents. */ + hlen = ramoops_write_kmsg_hdr(prz, record->compressed); + size = record->size; if (size + hlen > prz->buffer_size) size = prz->buffer_size - hlen; - persistent_ram_write(prz, buf, size); + persistent_ram_write(prz, record->buf, size); cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt; return 0; } -static int notrace ramoops_pstore_write_buf_user(enum pstore_type_id type, - enum kmsg_dump_reason reason, - u64 *id, unsigned int part, - const char __user *buf, - bool compressed, size_t size, - struct pstore_info *psi) +static int notrace ramoops_pstore_write_user(struct pstore_record *record, + const char __user *buf) { - if (type == PSTORE_TYPE_PMSG) { - struct ramoops_context *cxt = psi->data; + if (record->type == PSTORE_TYPE_PMSG) { + struct ramoops_context *cxt = record->psi->data; if (!cxt->mprz) return -ENOMEM; - return persistent_ram_write_user(cxt->mprz, buf, size); + return persistent_ram_write_user(cxt->mprz, buf, record->size); } return -EINVAL; } -static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, - struct timespec time, struct pstore_info *psi) +static int ramoops_pstore_erase(struct pstore_record *record) { - struct ramoops_context *cxt = psi->data; + struct ramoops_context *cxt = record->psi->data; struct persistent_ram_zone *prz; - switch (type) { + switch (record->type) { case PSTORE_TYPE_DMESG: - if (id >= cxt->max_dump_cnt) + if (record->id >= cxt->max_dump_cnt) return -EINVAL; - prz = cxt->dprzs[id]; + prz = cxt->dprzs[record->id]; break; case PSTORE_TYPE_CONSOLE: prz = cxt->cprz; break; case PSTORE_TYPE_FTRACE: - if (id >= cxt->max_ftrace_cnt) + if (record->id >= cxt->max_ftrace_cnt) return -EINVAL; - prz = cxt->fprzs[id]; + prz = cxt->fprzs[record->id]; break; case PSTORE_TYPE_PMSG: prz = cxt->mprz; @@ -503,8 +503,8 @@ static struct ramoops_context oops_cxt = { .name = "ramoops", .open = ramoops_pstore_open, .read = ramoops_pstore_read, - .write_buf = ramoops_pstore_write_buf, - .write_buf_user = ramoops_pstore_write_buf_user, + .write = ramoops_pstore_write, + .write_user = ramoops_pstore_write_user, .erase = ramoops_pstore_erase, }, }; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index bc927e30bdccd4fe89fd39d673126ae68d4e0e8e..e11672aa457514b8d84695688edc342f80a57b20 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -532,7 +532,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, } /* Initialize general buffer state. */ - prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock); + raw_spin_lock_init(&prz->buffer_lock); prz->flags = flags; ret = persistent_ram_buffer_map(start, size, prz, memtype); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 74b489e3714d51cb66de83d4a9f3510c250456fe..48813aeaab8067e921d67b110e28e2a8dd94a3c0 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode) } EXPORT_SYMBOL(dquot_initialize); +bool dquot_initialize_needed(struct inode *inode) +{ + struct dquot **dquots; + int i; + + if (!dquot_active(inode)) + return false; + + dquots = i_dquot(inode); + for (i = 0; i < MAXQUOTAS; i++) + if (!dquots[i] && sb_has_quota_active(inode->i_sb, i)) + return true; + return false; +} +EXPORT_SYMBOL(dquot_initialize_needed); + /* * Release all quotas referenced by inode. * @@ -2188,8 +2204,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) /* This can happen when suspending quotas on remount-ro... */ if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) { inode_lock(toputinode[cnt]); - toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | - S_NOATIME | S_NOQUOTA); + toputinode[cnt]->i_flags &= ~S_NOQUOTA; truncate_inode_pages(&toputinode[cnt]->i_data, 0); inode_unlock(toputinode[cnt]); mark_inode_dirty_sync(toputinode[cnt]); @@ -2237,7 +2252,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); int error; - int oldflags = -1; if (!fmt) return -ESRCH; @@ -2285,9 +2299,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, * possible) Also nobody should write to the file - we use * special IO operations which ignore the immutable bit. */ inode_lock(inode); - oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | - S_NOQUOTA); - inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; + inode->i_flags |= S_NOQUOTA; inode_unlock(inode); /* * When S_NOQUOTA is set, remove dquot references as no more @@ -2329,14 +2341,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dqopt->files[type] = NULL; iput(inode); out_file_flags: - if (oldflags != -1) { - inode_lock(inode); - /* Set the flags back (in the case of accidental quotaon() - * on a wrong file we don't want to mess up the flags) */ - inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); - inode->i_flags |= oldflags; - inode_unlock(inode); - } + inode_lock(inode); + inode->i_flags &= ~S_NOQUOTA; + inode_unlock(inode); out_fmt: put_quota_format(fmt); @@ -2780,18 +2787,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) } EXPORT_SYMBOL(dquot_set_dqinfo); -const struct quotactl_ops dquot_quotactl_ops = { - .quota_on = dquot_quota_on, - .quota_off = dquot_quota_off, - .quota_sync = dquot_quota_sync, - .get_state = dquot_get_state, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .get_nextdqblk = dquot_get_next_dqblk, - .set_dqblk = dquot_set_dqblk -}; -EXPORT_SYMBOL(dquot_quotactl_ops); - const struct quotactl_ops dquot_quotactl_sysfile_ops = { .quota_enable = dquot_quota_enable, .quota_disable = dquot_quota_disable, diff --git a/fs/read_write.c b/fs/read_write.c index c4f88afbc67f49ae9e451e06a81c45c37f8171f5..19d4d88fa285b39493d4b7793a69a23876a0988e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -841,6 +841,81 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, return ret; } +#ifdef CONFIG_COMPAT +ssize_t compat_rw_copy_check_uvector(int type, + const struct compat_iovec __user *uvector, unsigned long nr_segs, + unsigned long fast_segs, struct iovec *fast_pointer, + struct iovec **ret_pointer) +{ + compat_ssize_t tot_len; + struct iovec *iov = *ret_pointer = fast_pointer; + ssize_t ret = 0; + int seg; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + if (nr_segs == 0) + goto out; + + ret = -EINVAL; + if (nr_segs > UIO_MAXIOV) + goto out; + if (nr_segs > fast_segs) { + ret = -ENOMEM; + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + if (iov == NULL) + goto out; + } + *ret_pointer = iov; + + ret = -EFAULT; + if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + + /* + * Single unix specification: + * We should -EINVAL if an element length is not >= 0 and fitting an + * ssize_t. + * + * In Linux, the total length is limited to MAX_RW_COUNT, there is + * no overflow possibility. + */ + tot_len = 0; + ret = -EINVAL; + for (seg = 0; seg < nr_segs; seg++) { + compat_uptr_t buf; + compat_ssize_t len; + + if (__get_user(len, &uvector->iov_len) || + __get_user(buf, &uvector->iov_base)) { + ret = -EFAULT; + goto out; + } + if (len < 0) /* size_t not fitting in compat_ssize_t .. */ + goto out; + if (type >= 0 && + !access_ok(vrfy_dir(type), compat_ptr(buf), len)) { + ret = -EFAULT; + goto out; + } + if (len > MAX_RW_COUNT - tot_len) + len = MAX_RW_COUNT - tot_len; + tot_len += len; + iov->iov_base = compat_ptr(buf); + iov->iov_len = (compat_size_t) len; + uvector++; + iov++; + } + ret = tot_len; + +out: + return ret; +} +#endif + static ssize_t __do_readv_writev(int type, struct file *file, struct iov_iter *iter, loff_t *pos, int flags) { @@ -1210,7 +1285,7 @@ static size_t compat_writev(struct file *file, if (!(file->f_mode & FMODE_CAN_WRITE)) goto out; - ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0); + ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags); out: if (ret > 0) diff --git a/fs/readdir.c b/fs/readdir.c index 0e8a7f355f7a1916218a9bd4d874e3ed771c6fc1..89659549c09d81b42d26aef337f62c93ca4c1379 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -324,3 +325,167 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, fdput_pos(f); return error; } + +#ifdef CONFIG_COMPAT +struct compat_old_linux_dirent { + compat_ulong_t d_ino; + compat_ulong_t d_offset; + unsigned short d_namlen; + char d_name[1]; +}; + +struct compat_readdir_callback { + struct dir_context ctx; + struct compat_old_linux_dirent __user *dirent; + int result; +}; + +static int compat_fillonedir(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct compat_readdir_callback *buf = + container_of(ctx, struct compat_readdir_callback, ctx); + struct compat_old_linux_dirent __user *dirent; + compat_ulong_t d_ino; + + if (buf->result) + return -EINVAL; + d_ino = ino; + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->result = -EOVERFLOW; + return -EOVERFLOW; + } + buf->result++; + dirent = buf->dirent; + if (!access_ok(VERIFY_WRITE, dirent, + (unsigned long)(dirent->d_name + namlen + 1) - + (unsigned long)dirent)) + goto efault; + if ( __put_user(d_ino, &dirent->d_ino) || + __put_user(offset, &dirent->d_offset) || + __put_user(namlen, &dirent->d_namlen) || + __copy_to_user(dirent->d_name, name, namlen) || + __put_user(0, dirent->d_name + namlen)) + goto efault; + return 0; +efault: + buf->result = -EFAULT; + return -EFAULT; +} + +COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, + struct compat_old_linux_dirent __user *, dirent, unsigned int, count) +{ + int error; + struct fd f = fdget_pos(fd); + struct compat_readdir_callback buf = { + .ctx.actor = compat_fillonedir, + .dirent = dirent + }; + + if (!f.file) + return -EBADF; + + error = iterate_dir(f.file, &buf.ctx); + if (buf.result) + error = buf.result; + + fdput_pos(f); + return error; +} + +struct compat_linux_dirent { + compat_ulong_t d_ino; + compat_ulong_t d_off; + unsigned short d_reclen; + char d_name[1]; +}; + +struct compat_getdents_callback { + struct dir_context ctx; + struct compat_linux_dirent __user *current_dir; + struct compat_linux_dirent __user *previous; + int count; + int error; +}; + +static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct compat_linux_dirent __user * dirent; + struct compat_getdents_callback *buf = + container_of(ctx, struct compat_getdents_callback, ctx); + compat_ulong_t d_ino; + int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + + namlen + 2, sizeof(compat_long_t)); + + buf->error = -EINVAL; /* only used if we fail.. */ + if (reclen > buf->count) + return -EINVAL; + d_ino = ino; + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->error = -EOVERFLOW; + return -EOVERFLOW; + } + dirent = buf->previous; + if (dirent) { + if (signal_pending(current)) + return -EINTR; + if (__put_user(offset, &dirent->d_off)) + goto efault; + } + dirent = buf->current_dir; + if (__put_user(d_ino, &dirent->d_ino)) + goto efault; + if (__put_user(reclen, &dirent->d_reclen)) + goto efault; + if (copy_to_user(dirent->d_name, name, namlen)) + goto efault; + if (__put_user(0, dirent->d_name + namlen)) + goto efault; + if (__put_user(d_type, (char __user *) dirent + reclen - 1)) + goto efault; + buf->previous = dirent; + dirent = (void __user *)dirent + reclen; + buf->current_dir = dirent; + buf->count -= reclen; + return 0; +efault: + buf->error = -EFAULT; + return -EFAULT; +} + +COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, + struct compat_linux_dirent __user *, dirent, unsigned int, count) +{ + struct fd f; + struct compat_linux_dirent __user * lastdirent; + struct compat_getdents_callback buf = { + .ctx.actor = compat_filldir, + .current_dir = dirent, + .count = count + }; + int error; + + if (!access_ok(VERIFY_WRITE, dirent, count)) + return -EFAULT; + + f = fdget_pos(fd); + if (!f.file) + return -EBADF; + + error = iterate_dir(f.file, &buf.ctx); + if (error >= 0) + error = buf.error; + lastdirent = buf.previous; + if (lastdirent) { + if (put_user(buf.ctx.pos, &lastdirent->d_off)) + error = -EFAULT; + else + error = count - buf.count; + } + fdput_pos(f); + return error; +} +#endif diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6ab9d64ea1b32777fa79a42f6396f9f52bcab08..873fc04e9403aa7d6ca9c2d13a7d045dd963fe75 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1375,7 +1375,6 @@ static void init_inode(struct inode *inode, struct treepath *path) static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; - __u16 flags; set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); @@ -1390,9 +1389,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); else set_sd_v2_generation(sd_v2, inode->i_generation); - flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs(inode, &flags); - set_sd_v2_attrs(sd_v2, flags); + set_sd_v2_attrs(sd_v2, REISERFS_I(inode)->i_attrs); } /* used to copy inode's fields to old stat data */ @@ -2002,10 +1999,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, /* uid and gid must already be set by the caller for quota init */ - /* symlink cannot be immutable or append only, right? */ - if (S_ISLNK(inode->i_mode)) - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_size = i_size; inode->i_blocks = 0; @@ -3095,28 +3088,6 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) } } -void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) -{ - if (reiserfs_attrs(inode->i_sb)) { - if (inode->i_flags & S_IMMUTABLE) - *sd_attrs |= REISERFS_IMMUTABLE_FL; - else - *sd_attrs &= ~REISERFS_IMMUTABLE_FL; - if (inode->i_flags & S_SYNC) - *sd_attrs |= REISERFS_SYNC_FL; - else - *sd_attrs &= ~REISERFS_SYNC_FL; - if (inode->i_flags & S_NOATIME) - *sd_attrs |= REISERFS_NOATIME_FL; - else - *sd_attrs &= ~REISERFS_NOATIME_FL; - if (REISERFS_I(inode)->i_flags & i_nopack_mask) - *sd_attrs |= REISERFS_NOTAIL_FL; - else - *sd_attrs &= ~REISERFS_NOTAIL_FL; - } -} - /* * decide if this buffer needs to stay around for data logging or ordered * write purposes diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 1f4692a505a0c08af54f388d09a7fb9a2f0d75fd..acbbaf7a0bb26be8e25039ca4fee2a64a0db02be 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -47,7 +47,6 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs(inode, (__u16 *) & flags); err = put_user(flags, (int __user *)arg); break; case REISERFS_IOC_SETFLAGS:{ diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index aca73dd739066477a8f4eb6293122cfe84f3f055..e3c558d1b78c02c711ae9868fe409d349127103f 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -724,18 +724,18 @@ static void errcatch_print_vi(struct virtual_item *vi) } static struct item_operations errcatch_ops = { - errcatch_bytes_number, - errcatch_decrement_key, - errcatch_is_left_mergeable, - errcatch_print_item, - errcatch_check_item, - - errcatch_create_vi, - errcatch_check_left, - errcatch_check_right, - errcatch_part_size, - errcatch_unit_num, - errcatch_print_vi + .bytes_number = errcatch_bytes_number, + .decrement_key = errcatch_decrement_key, + .is_left_mergeable = errcatch_is_left_mergeable, + .print_item = errcatch_print_item, + .check_item = errcatch_check_item, + + .create_vi = errcatch_create_vi, + .check_left = errcatch_check_left, + .check_right = errcatch_check_right, + .part_size = errcatch_part_size, + .unit_num = errcatch_unit_num, + .print_vi = errcatch_print_vi }; #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index aa40c242f1db3d9901c02eee0fd3e13a351a3c1b..39bb1e838d8da683fa64dadf7ff150b9369d699e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s, depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) __sync_dirty_buffer(jl->j_commit_bh, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock_nested(s, depth); @@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb, if (reiserfs_barrier_flush(sb)) __sync_dirty_buffer(journal->j_header_bh, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(journal->j_header_bh); @@ -1961,7 +1961,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * will be requeued because superblock is being shutdown and doesn't * have MS_ACTIVE set. */ - cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); + reiserfs_cancel_old_flush(sb); /* wait for all commits to finish */ cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work); diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 249594a821e0a5e8c60d96c7cad0cd841fccd70b..f5cebd70d9038571a69ab24ef25af0d4779c600f 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -475,7 +475,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, * 'cpy_bytes'; create new item header; * n_ih = new item_header; */ - memcpy(&n_ih, ih, SHORT_KEY_SIZE); + memcpy(&n_ih.ih_key, &ih->ih_key, KEY_SIZE); /* Endian safe, both le */ n_ih.ih_version = ih->ih_version; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 2adcde137c3fb8d569cddcdf83dfd443a11f693f..1d34377fef97624e2ca695b4a552f01e94e84712 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1326,7 +1326,6 @@ struct cpu_key { #define KEY_NOT_FOUND 0 #define KEY_SIZE (sizeof(struct reiserfs_key)) -#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32)) /* return values for search_by_key and clones */ #define ITEM_FOUND 1 @@ -2949,6 +2948,7 @@ int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); void reiserfs_schedule_old_flush(struct super_block *s); +void reiserfs_cancel_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); @@ -3099,7 +3099,6 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, } void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); -void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index feabcde0290d27103a11813682091abd1a191661..685f1e05699868cf59237cec9d3821b1ae2e1a61 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -89,11 +89,27 @@ static void flush_old_commits(struct work_struct *work) sbi = container_of(work, struct reiserfs_sb_info, old_work.work); s = sbi->s_journal->j_work_sb; + /* + * We need s_umount for protecting quota writeback. We have to use + * trylock as reiserfs_cancel_old_flush() may be waiting for this work + * to complete with s_umount held. + */ + if (!down_read_trylock(&s->s_umount)) { + /* Requeue work if we are not cancelling it */ + spin_lock(&sbi->old_work_lock); + if (sbi->work_queued == 1) + queue_delayed_work(system_long_wq, &sbi->old_work, HZ); + spin_unlock(&sbi->old_work_lock); + return; + } spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; + /* Avoid clobbering the cancel state... */ + if (sbi->work_queued == 1) + sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); reiserfs_sync_fs(s, 1); + up_read(&s->s_umount); } void reiserfs_schedule_old_flush(struct super_block *s) @@ -117,21 +133,22 @@ void reiserfs_schedule_old_flush(struct super_block *s) spin_unlock(&sbi->old_work_lock); } -static void cancel_old_flush(struct super_block *s) +void reiserfs_cancel_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; + /* Make sure no new flushes will be queued */ + sbi->work_queued = 2; spin_unlock(&sbi->old_work_lock); + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); } static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; - cancel_old_flush(s); + reiserfs_cancel_old_flush(s); reiserfs_write_lock(s); if (!(s->s_flags & MS_RDONLY)) { @@ -152,7 +169,13 @@ static int reiserfs_freeze(struct super_block *s) static int reiserfs_unfreeze(struct super_block *s) { + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + reiserfs_allow_writes(s); + spin_lock(&sbi->old_work_lock); + /* Allow old_work to run again */ + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); return 0; } @@ -547,12 +570,28 @@ static void reiserfs_kill_sb(struct super_block *s) kill_block_super(s); } +#ifdef CONFIG_QUOTA +static int reiserfs_quota_off(struct super_block *sb, int type); + +static void reiserfs_quota_off_umount(struct super_block *s) +{ + int type; + + for (type = 0; type < REISERFS_MAXQUOTAS; type++) + reiserfs_quota_off(s, type); +} +#else +static inline void reiserfs_quota_off_umount(struct super_block *s) +{ +} +#endif + static void reiserfs_put_super(struct super_block *s) { struct reiserfs_transaction_handle th; th.t_trans_id = 0; - dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + reiserfs_quota_off_umount(s); reiserfs_write_lock(s); @@ -817,7 +856,7 @@ static const struct dquot_operations reiserfs_quota_operations = { static const struct quotactl_ops reiserfs_qctl_operations = { .quota_on = reiserfs_quota_on, - .quota_off = dquot_quota_off, + .quota_off = reiserfs_quota_off, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, @@ -2194,7 +2233,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (sbi->commit_wq) destroy_workqueue(sbi->commit_wq); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + reiserfs_cancel_old_flush(s); reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) @@ -2405,12 +2444,47 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, goto out; } reiserfs_write_unlock(sb); - return dquot_quota_on(sb, type, format_id, path); + err = dquot_quota_on(sb, type, format_id, path); + if (!err) { + inode_lock(inode); + REISERFS_I(inode)->i_attrs |= REISERFS_IMMUTABLE_FL | + REISERFS_NOATIME_FL; + inode_set_flags(inode, S_IMMUTABLE | S_NOATIME, + S_IMMUTABLE | S_NOATIME); + inode_unlock(inode); + mark_inode_dirty(inode); + } + return err; out: reiserfs_write_unlock(sb); return err; } +static int reiserfs_quota_off(struct super_block *sb, int type) +{ + int err; + struct inode *inode = sb_dqopt(sb)->files[type]; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + REISERFS_I(inode)->i_attrs &= ~(REISERFS_IMMUTABLE_FL | + REISERFS_NOATIME_FL); + inode_set_flags(inode, 0, S_IMMUTABLE | S_NOATIME); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} + /* * Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code diff --git a/fs/select.c b/fs/select.c index e2112270d75a5f878e291bb5bb681474e3c4eeaf..d6c652a31e99d18d2d924af983c11504315af56f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -338,6 +338,53 @@ static int poll_select_copy_remaining(struct timespec64 *end_time, return ret; } +/* + * Scalable version of the fd_set. + */ + +typedef struct { + unsigned long *in, *out, *ex; + unsigned long *res_in, *res_out, *res_ex; +} fd_set_bits; + +/* + * How many longwords for "nr" bits? + */ +#define FDS_BITPERLONG (8*sizeof(long)) +#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) +#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) + +/* + * We do a VERIFY_WRITE here even though we are only reading this time: + * we'll write to it eventually.. + * + * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. + */ +static inline +int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) +{ + nr = FDS_BYTES(nr); + if (ufdset) + return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; + + memset(fdset, 0, nr); + return 0; +} + +static inline unsigned long __must_check +set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) +{ + if (ufdset) + return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); + return 0; +} + +static inline +void zero_fd_set(unsigned long nr, unsigned long *fdset) +{ + memset(fdset, 0, FDS_BYTES(nr)); +} + #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) @@ -401,7 +448,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in, wait->_key |= POLLOUT_SET; } -int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) +static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) { ktime_t expire, *to = NULL; struct poll_wqueues table; @@ -409,7 +456,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) int retval, i, timed_out = 0; u64 slack = 0; unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; - unsigned long busy_end = 0; + unsigned long busy_start = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -512,11 +559,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) /* only if found POLL_BUSY_LOOP sockets && not out of time */ if (can_busy_loop && !need_resched()) { - if (!busy_end) { - busy_end = busy_loop_end_time(); + if (!busy_start) { + busy_start = busy_loop_current_time(); continue; } - if (!busy_loop_timeout(busy_end)) + if (!busy_loop_timeout(busy_start)) continue; } busy_flag = 0; @@ -586,10 +633,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, goto out_nofds; alloc_size = 6 * size; - bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN); - if (!bits && alloc_size > PAGE_SIZE) - bits = vmalloc(alloc_size); - + bits = kvmalloc(alloc_size, GFP_KERNEL); if (!bits) goto out_nofds; } @@ -800,7 +844,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait, int timed_out = 0, count = 0; u64 slack = 0; unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; - unsigned long busy_end = 0; + unsigned long busy_start = 0; /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { @@ -853,11 +897,11 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait, /* only if found POLL_BUSY_LOOP sockets && not out of time */ if (can_busy_loop && !need_resched()) { - if (!busy_end) { - busy_end = busy_loop_end_time(); + if (!busy_start) { + busy_start = busy_loop_current_time(); continue; } - if (!busy_loop_timeout(busy_end)) + if (!busy_loop_timeout(busy_start)) continue; } busy_flag = 0; @@ -881,7 +925,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait, #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ sizeof(struct pollfd)) -int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, +static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct timespec64 *end_time) { struct poll_wqueues table; @@ -1053,3 +1097,373 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, return ret; } + +#ifdef CONFIG_COMPAT +#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) + +static +int compat_poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec ts; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&ts); + ts = timespec_sub(*end_time, ts); + if (ts.tv_sec < 0) + ts.tv_sec = ts.tv_nsec = 0; + + if (timeval) { + struct compat_timeval rtv; + + rtv.tv_sec = ts.tv_sec; + rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + } else { + struct compat_timespec rts; + + rts.tv_sec = ts.tv_sec; + rts.tv_nsec = ts.tv_nsec; + + if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + } + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + +/* + * Ooo, nasty. We need here to frob 32-bit unsigned longs to + * 64-bit unsigned longs. + */ +static +int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, + unsigned long *fdset) +{ + nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); + if (ufdset) { + unsigned long odd; + + if (!access_ok(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) + return -EFAULT; + + odd = nr & 1UL; + nr &= ~1UL; + while (nr) { + unsigned long h, l; + if (__get_user(l, ufdset) || __get_user(h, ufdset+1)) + return -EFAULT; + ufdset += 2; + *fdset++ = h << 32 | l; + nr -= 2; + } + if (odd && __get_user(*fdset, ufdset)) + return -EFAULT; + } else { + /* Tricky, must clear full unsigned long in the + * kernel fdset at the end, this makes sure that + * actually happens. + */ + memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); + } + return 0; +} + +static +int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, + unsigned long *fdset) +{ + unsigned long odd; + nr = DIV_ROUND_UP(nr, __COMPAT_NFDBITS); + + if (!ufdset) + return 0; + + odd = nr & 1UL; + nr &= ~1UL; + while (nr) { + unsigned long h, l; + l = *fdset++; + h = l >> 32; + if (__put_user(l, ufdset) || __put_user(h, ufdset+1)) + return -EFAULT; + ufdset += 2; + nr -= 2; + } + if (odd && __put_user(*fdset, ufdset)) + return -EFAULT; + return 0; +} + + +/* + * This is a virtual copy of sys_select from fs/select.c and probably + * should be compared to it from time to time + */ + +/* + * We can actually return ERESTARTSYS instead of EINTR, but I'd + * like to be certain this leads to no problems. So I return + * EINTR just for safety. + * + * Update: ERESTARTSYS breaks at least the xview clock binary, so + * I'm trying ERESTARTNOHAND which restart only when you want to. + */ +static int compat_core_sys_select(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct timespec *end_time) +{ + fd_set_bits fds; + void *bits; + int size, max_fds, ret = -EINVAL; + struct fdtable *fdt; + long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; + + if (n < 0) + goto out_nofds; + + /* max_fds can increase, so grab it once to avoid race */ + rcu_read_lock(); + fdt = files_fdtable(current->files); + max_fds = fdt->max_fds; + rcu_read_unlock(); + if (n > max_fds) + n = max_fds; + + /* + * We need 6 bitmaps (in/out/ex for both incoming and outgoing), + * since we used fdset we need to allocate memory in units of + * long-words. + */ + size = FDS_BYTES(n); + bits = stack_fds; + if (size > sizeof(stack_fds) / 6) { + bits = kmalloc(6 * size, GFP_KERNEL); + ret = -ENOMEM; + if (!bits) + goto out_nofds; + } + fds.in = (unsigned long *) bits; + fds.out = (unsigned long *) (bits + size); + fds.ex = (unsigned long *) (bits + 2*size); + fds.res_in = (unsigned long *) (bits + 3*size); + fds.res_out = (unsigned long *) (bits + 4*size); + fds.res_ex = (unsigned long *) (bits + 5*size); + + if ((ret = compat_get_fd_set(n, inp, fds.in)) || + (ret = compat_get_fd_set(n, outp, fds.out)) || + (ret = compat_get_fd_set(n, exp, fds.ex))) + goto out; + zero_fd_set(n, fds.res_in); + zero_fd_set(n, fds.res_out); + zero_fd_set(n, fds.res_ex); + + ret = do_select(n, &fds, end_time); + + if (ret < 0) + goto out; + if (!ret) { + ret = -ERESTARTNOHAND; + if (signal_pending(current)) + goto out; + ret = 0; + } + + if (compat_set_fd_set(n, inp, fds.res_in) || + compat_set_fd_set(n, outp, fds.res_out) || + compat_set_fd_set(n, exp, fds.res_ex)) + ret = -EFAULT; +out: + if (bits != stack_fds) + kfree(bits); +out_nofds: + return ret; +} + +COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timeval __user *, tvp) +{ + struct timespec end_time, *to = NULL; + struct compat_timeval tv; + int ret; + + if (tvp) { + if (copy_from_user(&tv, tvp, sizeof(tv))) + return -EFAULT; + + to = &end_time; + if (poll_select_set_timeout(to, + tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), + (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) + return -EINVAL; + } + + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = compat_poll_select_copy_remaining(&end_time, tvp, 1, ret); + + return ret; +} + +struct compat_sel_arg_struct { + compat_ulong_t n; + compat_uptr_t inp; + compat_uptr_t outp; + compat_uptr_t exp; + compat_uptr_t tvp; +}; + +COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) +{ + struct compat_sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), + compat_ptr(a.exp), compat_ptr(a.tvp)); +} + +static long do_compat_pselect(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) +{ + compat_sigset_t ss32; + sigset_t ksigmask, sigsaved; + struct compat_timespec ts; + struct timespec end_time, *to = NULL; + int ret; + + if (tsp) { + if (copy_from_user(&ts, tsp, sizeof(ts))) + return -EFAULT; + + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + if (sigmask) { + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + return -EFAULT; + sigset_from_compat(&ksigmask, &ss32); + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); + + if (ret == -ERESTARTNOHAND) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return ret; +} + +COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timespec __user *, tsp, void __user *, sig) +{ + compat_size_t sigsetsize = 0; + compat_uptr_t up = 0; + + if (sig) { + if (!access_ok(VERIFY_READ, sig, + sizeof(compat_uptr_t)+sizeof(compat_size_t)) || + __get_user(up, (compat_uptr_t __user *)sig) || + __get_user(sigsetsize, + (compat_size_t __user *)(sig+sizeof(up)))) + return -EFAULT; + } + return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), + sigsetsize); +} + +COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, + unsigned int, nfds, struct compat_timespec __user *, tsp, + const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) +{ + compat_sigset_t ss32; + sigset_t ksigmask, sigsaved; + struct compat_timespec ts; + struct timespec end_time, *to = NULL; + int ret; + + if (tsp) { + if (copy_from_user(&ts, tsp, sizeof(ts))) + return -EFAULT; + + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + if (sigmask) { + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + if (copy_from_user(&ss32, sigmask, sizeof(ss32))) + return -EFAULT; + sigset_from_compat(&ksigmask, &ss32); + + sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); + sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); + } + + ret = do_sys_poll(ufds, nfds, to); + + /* We can restart this syscall, usually */ + if (ret == -EINTR) { + /* + * Don't restore the signal mask yet. Let do_signal() deliver + * the signal on the way back to userspace, before the signal + * mask is restored. + */ + if (sigmask) { + memcpy(¤t->saved_sigmask, &sigsaved, + sizeof(sigsaved)); + set_restore_sigmask(); + } + ret = -ERESTARTNOHAND; + } else if (sigmask) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + ret = compat_poll_select_copy_remaining(&end_time, tsp, 0, ret); + + return ret; +} +#endif diff --git a/fs/seq_file.c b/fs/seq_file.c index ca69fb99e41a8872d6d204d8fd8cffe7f08b3336..dc7c2be963ed4e2c2751313a44270b7655a45b4d 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -25,21 +25,7 @@ static void seq_set_overflow(struct seq_file *m) static void *seq_buf_alloc(unsigned long size) { - void *buf; - gfp_t gfp = GFP_KERNEL; - - /* - * For high order allocations, use __GFP_NORETRY to avoid oom-killing - - * it's better to fall back to vmalloc() than to kill things. For small - * allocations, just use GFP_KERNEL which will oom kill, thus no need - * for vmalloc fallback. - */ - if (size > PAGE_SIZE) - gfp |= __GFP_NORETRY | __GFP_NOWARN; - buf = kmalloc(size, gfp); - if (!buf && size > PAGE_SIZE) - buf = vmalloc(size); - return buf; + return kvmalloc(size, GFP_KERNEL); } /** diff --git a/fs/signalfd.c b/fs/signalfd.c index 270221fcef42cc42fcfdbc098b587b571be65a12..7e3d71109f51334d2bb8c71a9d2419e6ca4eeb3f 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -38,7 +38,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) /* * The lockless check can race with remove_wait_queue() in progress, * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. + * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. */ if (likely(!waitqueue_active(wqh))) return; diff --git a/fs/splice.c b/fs/splice.c index 006ba50f4ece671f48367b644641ab58b04b65d8..540c4a44756c20cdd0e7e2f3eb3db98712858210 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -247,11 +247,6 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) } EXPORT_SYMBOL(add_to_pipe); -void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) -{ - put_page(spd->pages[i]); -} - /* * Check if we need to grow the arrays holding pages and partial page * descriptions. @@ -393,7 +388,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct iov_iter to; struct page **pages; unsigned int nr_pages; - size_t offset, dummy, copied = 0; + size_t offset, base, copied = 0; ssize_t res; int i; @@ -408,12 +403,11 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset); - res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy); + res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base); if (res <= 0) return -ENOMEM; - BUG_ON(dummy); - nr_pages = DIV_ROUND_UP(res, PAGE_SIZE); + nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE); vec = __vec; if (nr_pages > PIPE_DEF_BUFFERS) { @@ -1359,6 +1353,8 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, struct fd f; long error; + if (unlikely(flags & ~SPLICE_F_ALL)) + return -EINVAL; if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; else if (unlikely(!nr_segs)) @@ -1409,6 +1405,9 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, if (unlikely(!len)) return 0; + if (unlikely(flags & ~SPLICE_F_ALL)) + return -EINVAL; + error = -EBADF; in = fdget(fd_in); if (in.file) { @@ -1737,6 +1736,9 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) struct fd in; int error; + if (unlikely(flags & ~SPLICE_F_ALL)) + return -EINVAL; + if (unlikely(!len)) return 0; diff --git a/fs/stat.c b/fs/stat.c index c6c963b2546b4f2d0301f40706064498c31fd3b6..c35610845ab19f8c17e4ef96e851cddc2205bba9 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -547,13 +548,13 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. - * @filename: File to stat *or* NULL. + * @filename: File to stat or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * - * Note that if filename is NULL, then it does the equivalent of fstat() using - * dfd to indicate the file of interest. + * Note that fstat() can be emulated by setting dfd to the fd of interest, + * supplying "" as the filename and setting AT_EMPTY_PATH in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, @@ -568,16 +569,98 @@ SYSCALL_DEFINE5(statx, if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; - if (filename) - error = vfs_statx(dfd, filename, flags, &stat, mask); - else - error = vfs_statx_fd(dfd, &stat, mask, flags); + error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; return cp_statx(&stat, buffer); } +#ifdef CONFIG_COMPAT +static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) +{ + struct compat_stat tmp; + + if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + + memset(&tmp, 0, sizeof(tmp)); + tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_ino = stat->ino; + if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) + return -EOVERFLOW; + tmp.st_mode = stat->mode; + tmp.st_nlink = stat->nlink; + if (tmp.st_nlink != stat->nlink) + return -EOVERFLOW; + SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); + tmp.st_rdev = old_encode_dev(stat->rdev); + if ((u64) stat->size > MAX_NON_LFS) + return -EOVERFLOW; + tmp.st_size = stat->size; + tmp.st_atime = stat->atime.tv_sec; + tmp.st_atime_nsec = stat->atime.tv_nsec; + tmp.st_mtime = stat->mtime.tv_sec; + tmp.st_mtime_nsec = stat->mtime.tv_nsec; + tmp.st_ctime = stat->ctime.tv_sec; + tmp.st_ctime_nsec = stat->ctime.tv_nsec; + tmp.st_blocks = stat->blocks; + tmp.st_blksize = stat->blksize; + return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; +} + +COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, + struct compat_stat __user *, statbuf) +{ + struct kstat stat; + int error; + + error = vfs_stat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); +} + +COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, + struct compat_stat __user *, statbuf) +{ + struct kstat stat; + int error; + + error = vfs_lstat(filename, &stat); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); +} + +#ifndef __ARCH_WANT_STAT64 +COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, + const char __user *, filename, + struct compat_stat __user *, statbuf, int, flag) +{ + struct kstat stat; + int error; + + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_compat_stat(&stat, statbuf); +} +#endif + +COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, + struct compat_stat __user *, statbuf) +{ + struct kstat stat; + int error = vfs_fstat(fd, &stat); + + if (!error) + error = cp_compat_stat(&stat, statbuf); + return error; +} +#endif + /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) { @@ -589,6 +672,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes) inode->i_bytes -= 512; } } +EXPORT_SYMBOL(__inode_add_bytes); void inode_add_bytes(struct inode *inode, loff_t bytes) { diff --git a/fs/statfs.c b/fs/statfs.c index 13ae259d48795a782e566b7deaceadc73f86d8c7..4e4623c7a1260eafaf564e87b4812091a9b3eb0e 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "internal.h" static int flags_by_mnt(int mnt_flags) @@ -239,3 +240,142 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) return copy_to_user(ubuf, &tmp, sizeof(struct ustat)) ? -EFAULT : 0; } + +#ifdef CONFIG_COMPAT +static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *kbuf) +{ + if (sizeof ubuf->f_blocks == 4) { + if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail | + kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL) + return -EOVERFLOW; + /* f_files and f_ffree may be -1; it's okay + * to stuff that into 32 bits */ + if (kbuf->f_files != 0xffffffffffffffffULL + && (kbuf->f_files & 0xffffffff00000000ULL)) + return -EOVERFLOW; + if (kbuf->f_ffree != 0xffffffffffffffffULL + && (kbuf->f_ffree & 0xffffffff00000000ULL)) + return -EOVERFLOW; + } + if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || + __put_user(kbuf->f_type, &ubuf->f_type) || + __put_user(kbuf->f_bsize, &ubuf->f_bsize) || + __put_user(kbuf->f_blocks, &ubuf->f_blocks) || + __put_user(kbuf->f_bfree, &ubuf->f_bfree) || + __put_user(kbuf->f_bavail, &ubuf->f_bavail) || + __put_user(kbuf->f_files, &ubuf->f_files) || + __put_user(kbuf->f_ffree, &ubuf->f_ffree) || + __put_user(kbuf->f_namelen, &ubuf->f_namelen) || + __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || + __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || + __put_user(kbuf->f_frsize, &ubuf->f_frsize) || + __put_user(kbuf->f_flags, &ubuf->f_flags) || + __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) + return -EFAULT; + return 0; +} + +/* + * The following statfs calls are copies of code from fs/statfs.c and + * should be checked against those from time to time + */ +COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) +{ + struct kstatfs tmp; + int error = user_statfs(pathname, &tmp); + if (!error) + error = put_compat_statfs(buf, &tmp); + return error; +} + +COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) +{ + struct kstatfs tmp; + int error = fd_statfs(fd, &tmp); + if (!error) + error = put_compat_statfs(buf, &tmp); + return error; +} + +static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) +{ + if (sizeof(ubuf->f_bsize) == 4) { + if ((kbuf->f_type | kbuf->f_bsize | kbuf->f_namelen | + kbuf->f_frsize | kbuf->f_flags) & 0xffffffff00000000ULL) + return -EOVERFLOW; + /* f_files and f_ffree may be -1; it's okay + * to stuff that into 32 bits */ + if (kbuf->f_files != 0xffffffffffffffffULL + && (kbuf->f_files & 0xffffffff00000000ULL)) + return -EOVERFLOW; + if (kbuf->f_ffree != 0xffffffffffffffffULL + && (kbuf->f_ffree & 0xffffffff00000000ULL)) + return -EOVERFLOW; + } + if (!access_ok(VERIFY_WRITE, ubuf, sizeof(*ubuf)) || + __put_user(kbuf->f_type, &ubuf->f_type) || + __put_user(kbuf->f_bsize, &ubuf->f_bsize) || + __put_user(kbuf->f_blocks, &ubuf->f_blocks) || + __put_user(kbuf->f_bfree, &ubuf->f_bfree) || + __put_user(kbuf->f_bavail, &ubuf->f_bavail) || + __put_user(kbuf->f_files, &ubuf->f_files) || + __put_user(kbuf->f_ffree, &ubuf->f_ffree) || + __put_user(kbuf->f_namelen, &ubuf->f_namelen) || + __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || + __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || + __put_user(kbuf->f_frsize, &ubuf->f_frsize) || + __put_user(kbuf->f_flags, &ubuf->f_flags) || + __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) + return -EFAULT; + return 0; +} + +COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + struct kstatfs tmp; + int error; + + if (sz != sizeof(*buf)) + return -EINVAL; + + error = user_statfs(pathname, &tmp); + if (!error) + error = put_compat_statfs64(buf, &tmp); + return error; +} + +COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + struct kstatfs tmp; + int error; + + if (sz != sizeof(*buf)) + return -EINVAL; + + error = fd_statfs(fd, &tmp); + if (!error) + error = put_compat_statfs64(buf, &tmp); + return error; +} + +/* + * This is a copy of sys_ustat, just dealing with a structure layout. + * Given how simple this syscall is that apporach is more maintainable + * than the various conversion hacks. + */ +COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) +{ + struct compat_ustat tmp; + struct kstatfs sbuf; + int err = vfs_ustat(new_decode_dev(dev), &sbuf); + if (err) + return err; + + memset(&tmp, 0, sizeof(struct compat_ustat)); + tmp.f_tfree = sbuf.f_bfree; + tmp.f_tinode = sbuf.f_ffree; + if (copy_to_user(u, &tmp, sizeof(struct compat_ustat))) + return -EFAULT; + return 0; +} +#endif diff --git a/fs/super.c b/fs/super.c index b8b6a086c03b9a5478dc6a67e0b5054127032f47..adb0c0de428c2c88c0322ae739cbe108f01ae085 100644 --- a/fs/super.c +++ b/fs/super.c @@ -446,6 +446,10 @@ void generic_shutdown_super(struct super_block *sb) hlist_del_init(&sb->s_instances); spin_unlock(&sb_lock); up_write(&sb->s_umount); + if (sb->s_bdi != &noop_backing_dev_info) { + bdi_put(sb->s_bdi); + sb->s_bdi = &noop_backing_dev_info; + } } EXPORT_SYMBOL(generic_shutdown_super); @@ -1049,12 +1053,8 @@ static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; + s->s_bdi = bdi_get(s->s_bdev->bd_bdi); - /* - * We set the bdi here to the queue backing, file systems can - * overwrite this in ->fill_super() - */ - s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } @@ -1255,6 +1255,49 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) return ERR_PTR(error); } +/* + * Setup private BDI for given superblock. It gets automatically cleaned up + * in generic_shutdown_super(). + */ +int super_setup_bdi_name(struct super_block *sb, char *fmt, ...) +{ + struct backing_dev_info *bdi; + int err; + va_list args; + + bdi = bdi_alloc(GFP_KERNEL); + if (!bdi) + return -ENOMEM; + + bdi->name = sb->s_type->name; + + va_start(args, fmt); + err = bdi_register_va(bdi, fmt, args); + va_end(args); + if (err) { + bdi_put(bdi); + return err; + } + WARN_ON(sb->s_bdi != &noop_backing_dev_info); + sb->s_bdi = bdi; + + return 0; +} +EXPORT_SYMBOL(super_setup_bdi_name); + +/* + * Setup private BDI for given superblock. I gets automatically cleaned up + * in generic_shutdown_super(). + */ +int super_setup_bdi(struct super_block *sb) +{ + static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); + + return super_setup_bdi_name(sb, "%.28s-%ld", sb->s_type->name, + atomic_long_inc_return(&bdi_seq)); +} +EXPORT_SYMBOL(super_setup_bdi); + /* * This is an internal function, please use sb_end_{write,pagefault,intwrite} * instead. diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 21d36d28473562d4a40f75134ae3916b11c8041c..328e89c2cf835d4ff3cec1572ae30a30621fa183 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -266,7 +266,7 @@ static const struct super_operations tracefs_super_operations = { static int trace_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr trace_files[] = {{""}}; + static const struct tree_descr trace_files[] = {{""}}; struct tracefs_fs_info *fsi; int err; diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index b0d0623c83ed88eae3afa31f079dfd2486767d07..83a961bf7280112f84a2567e2dd775f5679af3f3 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -61,3 +61,16 @@ config UBIFS_FS_ENCRYPTION feature is similar to ecryptfs, but it is more memory efficient since it avoids caching the encrypted and decrypted pages in the page cache. + +config UBIFS_FS_SECURITY + bool "UBIFS Security Labels" + depends on UBIFS_FS + default y + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the ubifs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 1e712a36468064a75910e8ba9c0e1cfb2bd3ab28..7cd8a7b95299c2b096c93d0d0fbfd7838f8dff2c 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "ubifs.h" static DEFINE_SPINLOCK(dbg_lock); @@ -286,8 +287,10 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) break; } - pr_err("\t%d: %s (%s)\n", - count++, dent->name, get_dent_type(dent->type)); + pr_err("\t%d: inode %llu, type %s, len %d\n", + count++, (unsigned long long) le64_to_cpu(dent->inum), + get_dent_type(dent->type), + le16_to_cpu(dent->nlen)); fname_name(&nm) = dent->name; fname_len(&nm) = le16_to_cpu(dent->nlen); @@ -464,7 +467,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) - pr_cont("%c", dent->name[i]); + pr_cont("%c", isprint(dent->name[i]) ? + dent->name[i] : '?'); } pr_cont("\n"); @@ -2387,8 +2391,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) ubifs_dump_node(c, sa->node); return -EINVAL; } - if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && - sa->type != UBIFS_XENT_NODE) { + if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE && + sb->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sb->type); ubifs_dump_node(c, sb->node); return -EINVAL; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 30825d882aa94a4c2486d47581f33d1dfca1a409..566079d9b402cdde6082260fca6e07f42f0064e8 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -121,7 +121,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = - ubifs_current_time(inode); + current_time(inode); inode->i_mapping->nrpages = 0; switch (mode & S_IFMT) { @@ -285,6 +285,15 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out_dent; } + if (ubifs_crypt_is_encrypted(dir) && + (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && + !fscrypt_has_permitted_context(dir, inode)) { + ubifs_warn(c, "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); + err = -EPERM; + goto out_inode; + } + done: kfree(dent); fscrypt_free_filename(&nm); @@ -295,6 +304,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, d_add(dentry, inode); return NULL; +out_inode: + iput(inode); out_dent: kfree(dent); out_fname: @@ -606,8 +617,8 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } while (1) { - dbg_gen("feed '%s', ino %llu, new f_pos %#x", - dent->name, (unsigned long long)le64_to_cpu(dent->inum), + dbg_gen("ino %llu, new f_pos %#x", + (unsigned long long)le64_to_cpu(dent->inum), key_hash_flash(c, &dent->key)); ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); @@ -748,9 +759,14 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, goto out_fname; lock_2_inodes(dir, inode); + + /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ + if (inode->i_nlink == 0) + ubifs_delete_orphan(c, inode->i_ino); + inc_nlink(inode); ihold(inode); - inode->i_ctime = ubifs_current_time(inode); + inode->i_ctime = current_time(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; dir->i_mtime = dir->i_ctime = inode->i_ctime; @@ -768,6 +784,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); + if (inode->i_nlink == 0) + ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); @@ -823,7 +841,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = ubifs_current_time(dir); + inode->i_ctime = current_time(dir); drop_nlink(inode); dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; @@ -927,7 +945,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) } lock_2_inodes(dir, inode); - inode->i_ctime = ubifs_current_time(dir); + inode->i_ctime = current_time(dir); clear_nlink(inode); drop_nlink(dir); dir->i_size -= sz_change; @@ -1068,8 +1086,10 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, } err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); - if (err) + if (err) { + kfree(dev); goto out_budg; + } sz_change = CALC_DENT_SIZE(fname_len(&nm)); @@ -1316,9 +1336,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned int uninitialized_var(saved_nlink); struct fscrypt_name old_nm, new_nm; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. @@ -1405,7 +1422,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the @i_ctime for inodes on a * rename. */ - time = ubifs_current_time(old_dir); + time = current_time(old_dir); old_inode->i_ctime = time; /* We must adjust parent link count when renaming directories */ @@ -1578,7 +1595,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, lock_4_inodes(old_dir, new_dir, NULL, NULL); - time = ubifs_current_time(old_dir); + time = current_time(old_dir); fst_inode->i_ctime = time; snd_inode->i_ctime = time; old_dir->i_mtime = old_dir->i_ctime = time; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index d9ae86f96df7b52255c259489b5343a7cd94faf8..2cda3d67e2d01c0e047a58c327a62e2778c9a117 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1196,7 +1196,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); err = ubifs_jnl_truncate(c, inode, old_size, new_size); @@ -1243,7 +1243,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1420,7 +1420,7 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, */ static int update_mctime(struct inode *inode) { - struct timespec now = ubifs_current_time(inode); + struct timespec now = current_time(inode); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1434,7 +1434,7 @@ static int update_mctime(struct inode *inode) return err; mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); @@ -1511,7 +1511,7 @@ static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct ubifs_info *c = inode->i_sb->s_fs_info; - struct timespec now = ubifs_current_time(inode); + struct timespec now = current_time(inode); struct ubifs_budget_req req = { .new_page = 1 }; int err, update_time; @@ -1579,7 +1579,7 @@ static int ubifs_vm_page_mkwrite(struct vm_fault *vmf) struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index da519ba205f614fb7a5ea9d3eb72b4c58b4504e9..fdc311246807a4c3a018ff53e06f44614f5c6800 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -53,7 +53,7 @@ void ubifs_set_inode_flags(struct inode *inode) * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags. * @ioctl_flags: flags to convert * - * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags + * This function converts ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags * (@UBIFS_COMPR_FL, etc). */ static int ioctl2ubifs(int ioctl_flags) @@ -78,8 +78,8 @@ static int ioctl2ubifs(int ioctl_flags) * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags. * @ubifs_flags: flags to convert * - * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags - * (@FS_COMPR_FL, etc). + * This function converts UBIFS inode flags (@UBIFS_COMPR_FL, etc) to ioctl + * flags (@FS_COMPR_FL, etc). */ static int ubifs2ioctl(int ubifs_flags) { @@ -126,7 +126,7 @@ static int setflags(struct inode *inode, int flags) ui->flags = ioctl2ubifs(flags); ubifs_set_inode_flags(inode); - inode->i_ctime = ubifs_current_time(inode); + inode->i_ctime = current_time(inode); release = ui->dirty; mark_inode_dirty_sync(inode); mutex_unlock(&ui->ui_mutex); diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 8ece6ca58c0ba128f658ea0c42ddc7976d942314..caf83d68fb38c2918bf9d421b4a67cd8a96571b7 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -224,16 +224,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, return (void *)((struct ubifs_branch *)idx->branches)->key; } -/** - * ubifs_current_time - round current time to time granularity. - * @inode: inode - */ -static inline struct timespec ubifs_current_time(struct inode *inode) -{ - return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? - current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -} - /** * ubifs_tnc_lookup - look up a file-system node. * @c: UBIFS file-system description object diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 586d59347fff00c4572af57bf6e487f6fa2932d8..3af4472061cc0139638efa80245516f27fe175b0 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -442,7 +442,6 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, { int empty_offs, pad_len; - lnum = lnum; dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); ubifs_assert(!(*offs & 7)); diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 7f1ead29e7270a39374bef080035beaf42374820..8c25081a510969055405b95ed57e8d94ec556f44 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -84,6 +84,8 @@ static int create_default_filesystem(struct ubifs_info *c) int min_leb_cnt = UBIFS_MIN_LEB_CNT; long long tmp64, main_bytes; __le64 tmp_le64; + __le32 tmp_le32; + struct timespec ts; /* Some functions called from here depend on the @c->key_len filed */ c->key_len = UBIFS_SK_LEN; @@ -298,13 +300,17 @@ static int create_default_filesystem(struct ubifs_info *c) ino->ch.node_type = UBIFS_INO_NODE; ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); ino->nlink = cpu_to_le32(2); - tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); + + ktime_get_real_ts(&ts); + ts = timespec_trunc(ts, DEFAULT_TIME_GRAN); + tmp_le64 = cpu_to_le64(ts.tv_sec); ino->atime_sec = tmp_le64; ino->ctime_sec = tmp_le64; ino->mtime_sec = tmp_le64; - ino->atime_nsec = 0; - ino->ctime_nsec = 0; - ino->mtime_nsec = 0; + tmp_le32 = cpu_to_le32(ts.tv_nsec); + ino->atime_nsec = tmp_le32; + ino->ctime_nsec = tmp_le32; + ino->mtime_nsec = tmp_le32; ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index b73811bd7676d6221243f21d52b06d9efed7cf4a..cf4cc99b75b55531789a03b065d86f521489f67e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1827,7 +1827,6 @@ static void ubifs_put_super(struct super_block *sb) } ubifs_umount(c); - bdi_destroy(&c->bdi); ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); } @@ -2019,29 +2018,25 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out; } + err = ubifs_parse_options(c, data, 0); + if (err) + goto out_close; + /* * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For * UBIFS, I/O is not deferred, it is done immediately in readpage, * which means the user would have to wait not just for their own I/O * but the read-ahead I/O as well i.e. completely pointless. * - * Read-ahead will be disabled because @c->bdi.ra_pages is 0. + * Read-ahead will be disabled because @sb->s_bdi->ra_pages is 0. Also + * @sb->s_bdi->capabilities are initialized to 0 so there won't be any + * writeback happening. */ - c->bdi.name = "ubifs", - c->bdi.capabilities = 0; - err = bdi_init(&c->bdi); + err = super_setup_bdi_name(sb, "ubifs_%d_%d", c->vi.ubi_num, + c->vi.vol_id); if (err) goto out_close; - err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", - c->vi.ubi_num, c->vi.vol_id); - if (err) - goto out_bdi; - - err = ubifs_parse_options(c, data, 0); - if (err) - goto out_bdi; - sb->s_bdi = &c->bdi; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; @@ -2080,8 +2075,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) ubifs_umount(c); out_unlock: mutex_unlock(&c->umount_mutex); -out_bdi: - bdi_destroy(&c->bdi); out_close: ubi_close_volume(c->ubi); out: diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4d57e488038e342f3b5ed84f5554862a86e440f3..298b4d89eee98b74440d84097d1789ba5de8a906 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -972,7 +972,6 @@ struct ubifs_debug_info; * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). * @vfs_sb: VFS @struct super_block object - * @bdi: backing device info object to make VFS happy and disable read-ahead * * @highest_inum: highest used inode number * @max_sqnum: current global sequence number @@ -1220,7 +1219,6 @@ struct ubifs_debug_info; */ struct ubifs_info { struct super_block *vfs_sb; - struct backing_dev_info bdi; ino_t highest_inum; unsigned long long max_sqnum; @@ -1461,7 +1459,6 @@ extern const struct inode_operations ubifs_file_inode_operations; extern const struct file_operations ubifs_dir_operations; extern const struct inode_operations ubifs_dir_inode_operations; extern const struct inode_operations ubifs_symlink_inode_operations; -extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ @@ -1756,13 +1753,23 @@ int ubifs_check_dir_empty(struct inode *dir); /* xattr.c */ extern const struct xattr_handler *ubifs_xattr_handlers[]; ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); -int ubifs_init_security(struct inode *dentry, struct inode *inode, - const struct qstr *qstr); int ubifs_xattr_set(struct inode *host, const char *name, const void *value, size_t size, int flags); ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, size_t size); +#ifdef CONFIG_UBIFS_FS_SECURITY +extern int ubifs_init_security(struct inode *dentry, struct inode *inode, + const struct qstr *qstr); +#else +static inline int ubifs_init_security(struct inode *dentry, + struct inode *inode, const struct qstr *qstr) +{ + return 0; +} +#endif + + /* super.c */ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index efe00fcb8b750961918a7e2c98ee3d670968ca72..6c9e62c2ef559b2d6b3d3383436775795e927375 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -152,7 +152,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data_len = size; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -234,7 +234,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, mutex_unlock(&ui->ui_mutex); mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_size -= CALC_XATTR_BYTES(old_size); host_ui->xattr_size += CALC_XATTR_BYTES(size); @@ -488,7 +488,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, return err; mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); + host->i_ctime = current_time(host); host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm)); host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); @@ -559,6 +559,7 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) return err; } +#ifdef CONFIG_UBIFS_FS_SECURITY static int init_xattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { @@ -599,6 +600,7 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, } return err; } +#endif static int xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, @@ -639,15 +641,19 @@ static const struct xattr_handler ubifs_trusted_xattr_handler = { .set = xattr_set, }; +#ifdef CONFIG_UBIFS_FS_SECURITY static const struct xattr_handler ubifs_security_xattr_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = xattr_get, .set = xattr_set, }; +#endif const struct xattr_handler *ubifs_xattr_handlers[] = { &ubifs_user_xattr_handler, &ubifs_trusted_xattr_handler, +#ifdef CONFIG_UBIFS_FS_SECURITY &ubifs_security_xattr_handler, +#endif NULL }; diff --git a/fs/udf/file.c b/fs/udf/file.c index e04cc0cdca9d736683c4fc5ac647e7fbdcc872c4..f5eb2d5b3bac86d91eaaddddbec6237dbbadfa38 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -44,12 +44,12 @@ static void __udf_adinicb_readpage(struct page *page) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - kaddr = kmap(page); + kaddr = kmap_atomic(page); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); - kunmap(page); + kunmap_atomic(kaddr); } static int udf_adinicb_readpage(struct file *file, struct page *page) @@ -70,11 +70,11 @@ static int udf_adinicb_writepage(struct page *page, BUG_ON(!PageLocked(page)); - kaddr = kmap(page); + kaddr = kmap_atomic(page); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); - mark_inode_dirty(inode); SetPageUptodate(page); - kunmap(page); + kunmap_atomic(kaddr); + mark_inode_dirty(inode); unlock_page(page); return 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index a8d8f71ef8bdb577675284e9cb5f03d38f1d34f7..98c510e172032df27a92b296507a3c1cb3017cdb 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -276,14 +276,14 @@ int udf_expand_file_adinicb(struct inode *inode) return -ENOMEM; if (!PageUptodate(page)) { - kaddr = kmap(page); + kaddr = kmap_atomic(page); memset(kaddr + iinfo->i_lenAlloc, 0x00, PAGE_SIZE - iinfo->i_lenAlloc); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, iinfo->i_lenAlloc); flush_dcache_page(page); SetPageUptodate(page); - kunmap(page); + kunmap_atomic(kaddr); } down_write(&iinfo->i_data_sem); memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, @@ -300,11 +300,11 @@ int udf_expand_file_adinicb(struct inode *inode) if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); - kaddr = kmap(page); down_write(&iinfo->i_data_sem); + kaddr = kmap_atomic(page); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); - kunmap(page); + kunmap_atomic(kaddr); unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; @@ -1535,7 +1535,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode) inode->i_data.a_ops = &udf_symlink_aops; inode->i_op = &udf_symlink_inode_operations; inode_nohighmem(inode); - inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_mode = S_IFLNK | 0777; break; case ICBTAG_FILE_TYPE_MAIN: udf_debug("METADATA FILE-----\n"); @@ -1591,9 +1591,9 @@ static umode_t udf_convert_permissions(struct fileEntry *fe) permissions = le32_to_cpu(fe->permissions); flags = le16_to_cpu(fe->icbTag.flags); - mode = ((permissions) & S_IRWXO) | - ((permissions >> 2) & S_IRWXG) | - ((permissions >> 4) & S_IRWXU) | + mode = ((permissions) & 0007) | + ((permissions >> 2) & 0070) | + ((permissions >> 4) & 0700) | ((flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) | ((flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) | ((flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0); @@ -1669,9 +1669,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) else fe->gid = cpu_to_le32(i_gid_read(inode)); - udfperms = ((inode->i_mode & S_IRWXO)) | - ((inode->i_mode & S_IRWXG) << 2) | - ((inode->i_mode & S_IRWXU) << 4); + udfperms = ((inode->i_mode & 0007)) | + ((inode->i_mode & 0070) << 2) | + ((inode->i_mode & 0700) << 4); udfperms |= (le32_to_cpu(fe->permissions) & (FE_PERM_O_DELETE | FE_PERM_O_CHATTR | diff --git a/fs/udf/namei.c b/fs/udf/namei.c index babf48d0e55330ec798460c2a7500878b90eeb06..385ee89d58242d9ffc920b9d2cb6db55999d073b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -906,7 +906,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) static int udf_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO); + struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777); struct pathComponent *pc; const char *compstart; struct extent_position epos = {}; diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index a0376a2c1c29c7adc4a7a60976efd0bf49c8c9dd..0315fea1d589e104ac4f10bf4db3f9f705c3f7ad 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -82,7 +82,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ufs_error (sb, "ufs_free_fragments", "bit already cleared for fragment %u", i); } - + + inode_sub_bytes(inode, count << uspi->s_fshift); fs32_add(sb, &ucg->cg_cs.cs_nffree, count); uspi->cs_total.cs_nffree += count; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); @@ -184,6 +185,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) ufs_error(sb, "ufs_free_blocks", "freeing free fragment"); } ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); + inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); @@ -398,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, /* * There is not enough space for user on the device */ - if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) { - mutex_unlock(&UFS_SB(sb)->s_lock); - UFSD("EXIT (FAILED)\n"); - return 0; + if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) { + if (!capable(CAP_SYS_RESOURCE)) { + mutex_unlock(&UFS_SB(sb)->s_lock); + UFSD("EXIT (FAILED)\n"); + return 0; + } } if (goal >= uspi->s_size) @@ -419,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, if (result) { ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); + *err = 0; write_seqlock(&UFS_I(inode)->meta_lock); ufs_cpu_to_data_ptr(sb, p, result); - write_sequnlock(&UFS_I(inode)->meta_lock); - *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); + write_sequnlock(&UFS_I(inode)->meta_lock); } mutex_unlock(&UFS_SB(sb)->s_lock); UFSD("EXIT, result %llu\n", (unsigned long long)result); @@ -437,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, result = ufs_add_fragments(inode, tmp, oldcount, newcount); if (result) { *err = 0; + read_seqlock_excl(&UFS_I(inode)->meta_lock); UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); + read_sequnlock_excl(&UFS_I(inode)->meta_lock); ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); mutex_unlock(&UFS_SB(sb)->s_lock); @@ -472,16 +478,16 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, if (result) { ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); + mutex_unlock(&UFS_SB(sb)->s_lock); ufs_change_blocknr(inode, fragment - oldcount, oldcount, uspi->s_sbbase + tmp, uspi->s_sbbase + result, locked_page); + *err = 0; write_seqlock(&UFS_I(inode)->meta_lock); ufs_cpu_to_data_ptr(sb, p, result); - write_sequnlock(&UFS_I(inode)->meta_lock); - *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); - mutex_unlock(&UFS_SB(sb)->s_lock); + write_sequnlock(&UFS_I(inode)->meta_lock); if (newcount < request) ufs_free_fragments (inode, result + newcount, request - newcount); ufs_free_fragments (inode, tmp, oldcount); @@ -494,6 +500,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return 0; } +static bool try_add_frags(struct inode *inode, unsigned frags) +{ + unsigned size = frags * i_blocksize(inode); + spin_lock(&inode->i_lock); + __inode_add_bytes(inode, size); + if (unlikely((u32)inode->i_blocks != inode->i_blocks)) { + __inode_sub_bytes(inode, size); + spin_unlock(&inode->i_lock); + return false; + } + spin_unlock(&inode->i_lock); + return true; +} + static u64 ufs_add_fragments(struct inode *inode, u64 fragment, unsigned oldcount, unsigned newcount) { @@ -530,6 +550,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, for (i = oldcount; i < newcount; i++) if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) return 0; + + if (!try_add_frags(inode, count)) + return 0; /* * Block can be extended */ @@ -647,6 +670,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); i = uspi->s_fpb - count; + inode_sub_bytes(inode, i << uspi->s_fshift); fs32_add(sb, &ucg->cg_cs.cs_nffree, i); uspi->cs_total.cs_nffree += i; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i); @@ -657,6 +681,8 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, result = ufs_bitmap_search (sb, ucpi, goal, allocsize); if (result == INVBLOCK) return 0; + if (!try_add_frags(inode, count)) + return 0; for (i = 0; i < count; i++) ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); @@ -716,6 +742,8 @@ static u64 ufs_alloccg_block(struct inode *inode, return INVBLOCK; ucpi->c_rotor = result; gotit: + if (!try_add_frags(inode, uspi->s_fpb)) + return 0; blkno = ufs_fragstoblks(result); ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 9774555b37217eba4c7fa5375e6df0c7cfa64ae8..d1dd8cc33179137988aedf9bb15098547906a561 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -176,6 +176,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) struct ufs_cg_private_info * ucpi; struct ufs_cylinder_group * ucg; struct inode * inode; + struct timespec64 ts; unsigned cg, bit, i, j, start; struct ufs_inode_info *ufsi; int err = -ENOSPC; @@ -323,8 +324,9 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) lock_buffer(bh); ufs2_inode = (struct ufs2_inode *)bh->b_data; ufs2_inode += ufs_inotofsbo(inode->i_ino); - ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec); - ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec); + ktime_get_real_ts64(&ts); + ufs2_inode->ui_birthtime = cpu_to_fs64(sb, ts.tv_sec); + ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, ts.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); if (sb->s_flags & MS_SYNCHRONOUS) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7e41aee7b69a660914e7cb26765714746929a306..9f4590261134085cbabcea1ee9a72382239ba479 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to, p = ufs_get_direct_data_ptr(uspi, ufsi, block); tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p), - new_size, err, locked_page); + new_size - (lastfrag & uspi->s_fpbmask), err, + locked_page); return tmp != 0; } @@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index, goal += uspi->s_fpb; } tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), - goal, uspi->s_fpb, err, locked_page); + goal, nfrags, err, locked_page); if (!tmp) { *err = -ENOSPC; @@ -400,11 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff u64 phys64 = 0; unsigned frag = fragment & uspi->s_fpbmask; - if (!create) { - phys64 = ufs_frag_map(inode, offsets, depth); - goto out; - } + phys64 = ufs_frag_map(inode, offsets, depth); + if (!create) + goto done; + if (phys64) { + if (fragment >= UFS_NDIR_FRAGMENT) + goto done; + read_seqlock_excl(&UFS_I(inode)->meta_lock); + if (fragment < UFS_I(inode)->i_lastfrag) { + read_sequnlock_excl(&UFS_I(inode)->meta_lock); + goto done; + } + read_sequnlock_excl(&UFS_I(inode)->meta_lock); + } /* This code entered only while writing ....? */ mutex_lock(&UFS_I(inode)->truncate_mutex); @@ -448,6 +458,11 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff } mutex_unlock(&UFS_I(inode)->truncate_mutex); return err; + +done: + if (phys64) + map_bh(bh_result, sb, phys64 + frag); + return 0; } static int ufs_writepage(struct page *page, struct writeback_control *wbc) @@ -841,8 +856,11 @@ void ufs_evict_inode(struct inode * inode) truncate_inode_pages_final(&inode->i_data); if (want_delete) { inode->i_size = 0; - if (inode->i_blocks) + if (inode->i_blocks && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) ufs_truncate_blocks(inode); + ufs_update_inode(inode, inode_needs_sync(inode)); } invalidate_inode_buffers(inode); @@ -868,7 +886,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count) ctx->to = from + count; } -#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift) #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift) static void ufs_trunc_direct(struct inode *inode) @@ -1100,25 +1117,30 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size) return err; } -static void __ufs_truncate_blocks(struct inode *inode) +static void ufs_truncate_blocks(struct inode *inode) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; unsigned offsets[4]; - int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets); + int depth; int depth2; unsigned i; struct ufs_buffer_head *ubh[3]; void *p; u64 block; - if (!depth) - return; + if (inode->i_size) { + sector_t last = (inode->i_size - 1) >> uspi->s_bshift; + depth = ufs_block_to_path(inode, last, offsets); + if (!depth) + return; + } else { + depth = 1; + } - /* find the last non-zero in offsets[] */ for (depth2 = depth - 1; depth2; depth2--) - if (offsets[depth2]) + if (offsets[depth2] != uspi->s_apb - 1) break; mutex_lock(&ufsi->truncate_mutex); @@ -1127,9 +1149,8 @@ static void __ufs_truncate_blocks(struct inode *inode) offsets[0] = UFS_IND_BLOCK; } else { /* get the blocks that should be partially emptied */ - p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]); + p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++); for (i = 0; i < depth2; i++) { - offsets[i]++; /* next branch is fully freed */ block = ufs_data_ptr_to_cpu(sb, p); if (!block) break; @@ -1140,7 +1161,7 @@ static void __ufs_truncate_blocks(struct inode *inode) write_sequnlock(&ufsi->meta_lock); break; } - p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]); + p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++); } while (i--) free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1); @@ -1155,7 +1176,9 @@ static void __ufs_truncate_blocks(struct inode *inode) free_full_branch(inode, block, i - UFS_IND_BLOCK + 1); } } + read_seqlock_excl(&ufsi->meta_lock); ufsi->i_lastfrag = DIRECT_FRAGMENT; + read_sequnlock_excl(&ufsi->meta_lock); mark_inode_dirty(inode); mutex_unlock(&ufsi->truncate_mutex); } @@ -1183,7 +1206,7 @@ static int ufs_truncate(struct inode *inode, loff_t size) truncate_setsize(inode, size); - __ufs_truncate_blocks(inode); + ufs_truncate_blocks(inode); inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); out: @@ -1191,16 +1214,6 @@ static int ufs_truncate(struct inode *inode, loff_t size) return err; } -static void ufs_truncate_blocks(struct inode *inode) -{ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - __ufs_truncate_blocks(inode); -} - int ufs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 131b2b77c8185403dc3cdf0393e8e0c915f3a850..d5300adbfd79bafd05217cd7088d1221a4501187 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb) usb3 = ubh_get_usb_third(uspi); if ((mtype == UFS_MOUNT_UFSTYPE_44BSD && - (usb1->fs_flags & UFS_FLAGS_UPDATED)) || + (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) || mtype == UFS_MOUNT_UFSTYPE_UFS2) { /*we have statistic in different place, then usual*/ uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir); @@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb) usb2 = ubh_get_usb_second(uspi); usb3 = ubh_get_usb_third(uspi); - if ((mtype == UFS_MOUNT_UFSTYPE_44BSD && - (usb1->fs_flags & UFS_FLAGS_UPDATED)) || - mtype == UFS_MOUNT_UFSTYPE_UFS2) { + if (mtype == UFS_MOUNT_UFSTYPE_UFS2) { /*we have statistic in different place, then usual*/ usb2->fs_un.fs_u2.cs_ndir = cpu_to_fs64(sb, uspi->cs_total.cs_ndir); @@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb) cpu_to_fs64(sb, uspi->cs_total.cs_nifree); usb3->fs_un1.fs_u2.cs_nffree = cpu_to_fs64(sb, uspi->cs_total.cs_nffree); - } else { - usb1->fs_cstotal.cs_ndir = - cpu_to_fs32(sb, uspi->cs_total.cs_ndir); - usb1->fs_cstotal.cs_nbfree = - cpu_to_fs32(sb, uspi->cs_total.cs_nbfree); - usb1->fs_cstotal.cs_nifree = - cpu_to_fs32(sb, uspi->cs_total.cs_nifree); - usb1->fs_cstotal.cs_nffree = - cpu_to_fs32(sb, uspi->cs_total.cs_nffree); + goto out; } + + if (mtype == UFS_MOUNT_UFSTYPE_44BSD && + (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) { + /* store stats in both old and new places */ + usb2->fs_un.fs_u2.cs_ndir = + cpu_to_fs64(sb, uspi->cs_total.cs_ndir); + usb2->fs_un.fs_u2.cs_nbfree = + cpu_to_fs64(sb, uspi->cs_total.cs_nbfree); + usb3->fs_un1.fs_u2.cs_nifree = + cpu_to_fs64(sb, uspi->cs_total.cs_nifree); + usb3->fs_un1.fs_u2.cs_nffree = + cpu_to_fs64(sb, uspi->cs_total.cs_nffree); + } + usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir); + usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree); + usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree); + usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree); +out: ubh_mark_buffer_dirty(USPI_UBH(uspi)); ufs_print_super_stuff(sb, usb1, usb2, usb3); UFSD("EXIT\n"); @@ -746,6 +754,23 @@ static void ufs_put_super(struct super_block *sb) return; } +static u64 ufs_max_bytes(struct super_block *sb) +{ + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; + int bits = uspi->s_apbshift; + u64 res; + + if (bits > 21) + res = ~0ULL; + else + res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) + + (1LL << (3*bits)); + + if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift)) + return MAX_LFS_FILESIZE; + return res << uspi->s_bshift; +} + static int ufs_fill_super(struct super_block *sb, void *data, int silent) { struct ufs_sb_info * sbi; @@ -812,9 +837,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_dirblksize = UFS_SECTOR_SIZE; super_block_offset=UFS_SBLOCK; - /* Keep 2Gig file limit. Some UFS variants need to override - this but as I don't know which I'll let those in the know loosen - the rules */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { case UFS_MOUNT_UFSTYPE_44BSD: UFSD("ufstype=44bsd\n"); @@ -980,6 +1004,13 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) flags |= UFS_ST_SUN; } + if ((flags & UFS_ST_MASK) == UFS_ST_44BSD && + uspi->s_postblformat == UFS_42POSTBLFMT) { + if (!silent) + pr_err("this is not a 44bsd filesystem"); + goto failed; + } + /* * Check ufs magic number */ @@ -1127,8 +1158,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask); if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { - uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size); - uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); + uspi->s_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size); + uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); } else { uspi->s_size = fs32_to_cpu(sb, usb1->fs_size); uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize); @@ -1177,6 +1208,9 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff); uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff); + uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize, + uspi->s_minfree, 100); + /* * Compute another frequently used values */ @@ -1212,6 +1246,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) "fast symlink size (%u)\n", uspi->s_maxsymlinklen); uspi->s_maxsymlinklen = maxsymlen; } + sb->s_maxbytes = ufs_max_bytes(sb); sb->s_max_links = UFS_LINK_MAX; inode = ufs_iget(sb, UFS_ROOTINO); @@ -1365,19 +1400,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) mutex_lock(&UFS_SB(sb)->s_lock); usb3 = ubh_get_usb_third(uspi); - if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { + if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) buf->f_type = UFS2_MAGIC; - buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize); - } else { + else buf->f_type = UFS_MAGIC; - buf->f_blocks = uspi->s_dsize; - } - buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) + - uspi->cs_total.cs_nffree; + + buf->f_blocks = uspi->s_dsize; + buf->f_bfree = ufs_freefrags(uspi); buf->f_ffree = uspi->cs_total.cs_nifree; buf->f_bsize = sb->s_blocksize; - buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree)) - ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0; + buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks) + ? (buf->f_bfree - uspi->s_root_blocks) : 0; buf->f_files = uspi->s_ncg * uspi->s_ipg; buf->f_namelen = UFS_MAXNAMLEN; buf->f_fsid.val[0] = (u32)id; diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 0cbd5d340b6705b9c6e22ff0a60a49891c302860..823d55a37586037f7ed02f9be5e1dbee890123d4 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h @@ -733,10 +733,8 @@ struct ufs_sb_private_info { __u32 s_dblkno; /* offset of first data after cg */ __u32 s_cgoffset; /* cylinder group offset in cylinder */ __u32 s_cgmask; /* used to calc mod fs_ntrak */ - __u32 s_size; /* number of blocks (fragments) in fs */ - __u32 s_dsize; /* number of data blocks in fs */ - __u64 s_u2_size; /* ufs2: number of blocks (fragments) in fs */ - __u64 s_u2_dsize; /*ufs2: number of data blocks in fs */ + __u64 s_size; /* number of blocks (fragments) in fs */ + __u64 s_dsize; /* number of data blocks in fs */ __u32 s_ncg; /* number of cylinder groups */ __u32 s_bsize; /* size of basic blocks */ __u32 s_fsize; /* size of fragments */ @@ -793,6 +791,7 @@ struct ufs_sb_private_info { __u32 s_maxsymlinklen;/* upper limit on fast symlinks' size */ __s32 fs_magic; /* filesystem magic */ unsigned int s_dirblksize; + __u64 s_root_blocks; }; /* diff --git a/fs/ufs/util.c b/fs/ufs/util.c index f41ad0a6106f28a2165c94068f6c7c3d71a67b3e..02497a492eb25085fc7fcefc446ad0e9cd0f967a 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev struct page *ufs_get_locked_page(struct address_space *mapping, pgoff_t index) { - struct page *page; - - page = find_lock_page(mapping, index); + struct inode *inode = mapping->host; + struct page *page = find_lock_page(mapping, index); if (!page) { page = read_mapping_page(mapping, index, NULL); @@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping, printk(KERN_ERR "ufs_change_blocknr: " "read_mapping_page error: ino %lu, index: %lu\n", mapping->host->i_ino, index); - goto out; + return page; } lock_page(page); @@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping, /* Truncate got there first */ unlock_page(page); put_page(page); - page = NULL; - goto out; + return NULL; } if (!PageUptodate(page) || PageError(page)) { @@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping, printk(KERN_ERR "ufs_change_blocknr: " "can not read page: ino %lu, index: %lu\n", - mapping->host->i_ino, index); + inode->i_ino, index); - page = ERR_PTR(-EIO); + return ERR_PTR(-EIO); } } -out: + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, 0); return page; } diff --git a/fs/ufs/util.h b/fs/ufs/util.h index b7fbf53dbc81a044e2bd10428bf2601b094cb750..9fc7119a1551f8ff82a02e9d89d05d6e60024c55 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi, #define ubh_blkmap(ubh,begin,bit) \ ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb))) -/* - * Determine the number of available frags given a - * percentage to hold in reserve. - */ static inline u64 -ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved) +ufs_freefrags(struct ufs_sb_private_info *uspi) { return ufs_blkstofrags(uspi->cs_total.cs_nbfree) + - uspi->cs_total.cs_nffree - - (uspi->s_dsize * (percentreserved) / 100); + uspi->cs_total.cs_nffree; } /* @@ -473,15 +468,19 @@ static inline unsigned _ubh_find_last_zero_bit_( static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi, struct ufs_buffer_head * ubh, unsigned begin, unsigned block) { + u8 mask; switch (uspi->s_fpb) { case 8: return (*ubh_get_addr (ubh, begin + block) == 0xff); case 4: - return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2))); + mask = 0x0f << ((block & 0x01) << 2); + return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask; case 2: - return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1))); + mask = 0x03 << ((block & 0x03) << 1); + return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask; case 1: - return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07))); + mask = 0x01 << (block & 0x07); + return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask; } return 0; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f7555fc25877435e13b65cbe597ae9bdb11c6528..1d622f276e3a2c0fba23d979d7ae18cf6a899f30 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -340,9 +340,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) bool must_wait, return_to_userland; long blocking_state; - BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - ret = VM_FAULT_SIGBUS; + + /* + * We don't do userfault handling for the final child pid update. + * + * We also don't do userfault handling during + * coredumping. hugetlbfs has the special + * follow_hugetlb_page() to skip missing pages in the + * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with + * the no_page_table() helper in follow_page_mask(), but the + * shmem_vm_ops->fault method is invoked even during + * coredumping without mmap_sem and it ends up here. + */ + if (current->flags & (PF_EXITING|PF_DUMPCORE)) + goto out; + + /* + * Coredumping runs without mmap_sem so we can only check that + * the mmap_sem is held, if PF_DUMPCORE was not set. + */ + WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem)); + ctx = vmf->vma->vm_userfaultfd_ctx.ctx; if (!ctx) goto out; @@ -360,12 +379,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) if (unlikely(ACCESS_ONCE(ctx->released))) goto out; - /* - * We don't do userfault handling for the final child pid update. - */ - if (current->flags & PF_EXITING) - goto out; - /* * Check that we can return VM_FAULT_RETRY. * diff --git a/fs/utimes.c b/fs/utimes.c index 32b15b3f6629023e55c34474bae0fa942a8df6ea..6571d8c848a04054e7517f7ad29b55d5b9022232 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -1,14 +1,10 @@ -#include #include -#include -#include #include #include -#include -#include #include #include #include +#include #include #ifdef __ARCH_WANT_SYS_UTIME @@ -219,3 +215,63 @@ SYSCALL_DEFINE2(utimes, char __user *, filename, { return sys_futimesat(AT_FDCWD, filename, utimes); } + +#ifdef CONFIG_COMPAT +/* + * Not all architectures have sys_utime, so implement this in terms + * of sys_utimes. + */ +COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, + struct compat_utimbuf __user *, t) +{ + struct timespec tv[2]; + + if (t) { + if (get_user(tv[0].tv_sec, &t->actime) || + get_user(tv[1].tv_sec, &t->modtime)) + return -EFAULT; + tv[0].tv_nsec = 0; + tv[1].tv_nsec = 0; + } + return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); +} + +COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) +{ + struct timespec tv[2]; + + if (t) { + if (compat_get_timespec(&tv[0], &t[0]) || + compat_get_timespec(&tv[1], &t[1])) + return -EFAULT; + + if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) + return 0; + } + return do_utimes(dfd, filename, t ? tv : NULL, flags); +} + +COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) +{ + struct timespec tv[2]; + + if (t) { + if (get_user(tv[0].tv_sec, &t[0].tv_sec) || + get_user(tv[0].tv_nsec, &t[0].tv_usec) || + get_user(tv[1].tv_sec, &t[1].tv_sec) || + get_user(tv[1].tv_nsec, &t[1].tv_usec)) + return -EFAULT; + if (tv[0].tv_nsec >= 1000000 || tv[0].tv_nsec < 0 || + tv[1].tv_nsec >= 1000000 || tv[1].tv_nsec < 0) + return -EINVAL; + tv[0].tv_nsec *= 1000; + tv[1].tv_nsec *= 1000; + } + return do_utimes(dfd, filename, t ? tv : NULL, 0); +} + +COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) +{ + return compat_sys_futimesat(AT_FDCWD, filename, t); +} +#endif diff --git a/fs/xattr.c b/fs/xattr.c index 7e3317cf4045523a7d0b17f102d51775808a8950..464c94bf65f9e55339ec9d4bc515ec990691deb7 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -431,12 +431,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, if (size) { if (size > XATTR_SIZE_MAX) return -E2BIG; - kvalue = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!kvalue) { - kvalue = vmalloc(size); - if (!kvalue) - return -ENOMEM; - } + kvalue = kvmalloc(size, GFP_KERNEL); + if (!kvalue) + return -ENOMEM; if (copy_from_user(kvalue, value, size)) { error = -EFAULT; goto out; @@ -528,12 +525,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, if (size) { if (size > XATTR_SIZE_MAX) size = XATTR_SIZE_MAX; - kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!kvalue) { - kvalue = vmalloc(size); - if (!kvalue) - return -ENOMEM; - } + kvalue = kvzalloc(size, GFP_KERNEL); + if (!kvalue) + return -ENOMEM; } error = vfs_getxattr(d, kname, kvalue, size); @@ -611,12 +605,9 @@ listxattr(struct dentry *d, char __user *list, size_t size) if (size) { if (size > XATTR_LIST_MAX) size = XATTR_LIST_MAX; - klist = kmalloc(size, __GFP_NOWARN | GFP_KERNEL); - if (!klist) { - klist = vmalloc(size); - if (!klist) - return -ENOMEM; - } + klist = kvmalloc(size, GFP_KERNEL); + if (!klist) + return -ENOMEM; } error = vfs_listxattr(d, klist, size); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 26ef1958b65b5289583090b6143b2ddec03c8c90..5c90f82b8f6b88d4f93a3f7f33b3c0e1b8ec4b20 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -79,6 +79,7 @@ xfs-y += xfs_aops.o \ xfs_extent_busy.o \ xfs_file.o \ xfs_filestream.o \ + xfs_fsmap.o \ xfs_fsops.o \ xfs_globals.o \ xfs_icache.o \ diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 70a5b55e0870a0523c0dd8ce629debf2fccebe25..393b6849aeb31991ed43808ff213fb3cadba00d0 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -48,7 +48,7 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) void * kmem_zalloc_large(size_t size, xfs_km_flags_t flags) { - unsigned noio_flag = 0; + unsigned nofs_flag = 0; void *ptr; gfp_t lflags; @@ -60,17 +60,17 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags) * __vmalloc() will allocate data pages and auxillary structures (e.g. * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context * here. Hence we need to tell memory reclaim that we are in such a - * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering + * context via PF_MEMALLOC_NOFS to prevent memory reclaim re-entering * the filesystem here and potentially deadlocking. */ - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - noio_flag = memalloc_noio_save(); + if (flags & KM_NOFS) + nofs_flag = memalloc_nofs_save(); lflags = kmem_flags_convert(flags); - ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL); - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - memalloc_noio_restore(noio_flag); + if (flags & KM_NOFS) + memalloc_nofs_restore(nofs_flag); return ptr; } diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index f0fc84fcaac2553283f90bc3f157b924bd03d932..d6ea520162b26530bbb560b667d925b6c6690b63 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -50,7 +50,7 @@ kmem_flags_convert(xfs_km_flags_t flags) lflags = GFP_ATOMIC | __GFP_NOWARN; } else { lflags = GFP_KERNEL | __GFP_NOWARN; - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) + if (flags & KM_NOFS) lflags &= ~__GFP_FS; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 369adcc18c02900f096a5ccd543a8c424dff7894..7486401ccbd36e0b076c020f15f433e9592322cc 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2868,3 +2868,60 @@ xfs_free_extent( xfs_trans_brelse(tp, agbp); return error; } + +struct xfs_alloc_query_range_info { + xfs_alloc_query_range_fn fn; + void *priv; +}; + +/* Format btree record and pass to our callback. */ +STATIC int +xfs_alloc_query_range_helper( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_alloc_query_range_info *query = priv; + struct xfs_alloc_rec_incore irec; + + irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); + irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); + return query->fn(cur, &irec, query->priv); +} + +/* Find all free space within a given range of blocks. */ +int +xfs_alloc_query_range( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *low_rec, + struct xfs_alloc_rec_incore *high_rec, + xfs_alloc_query_range_fn fn, + void *priv) +{ + union xfs_btree_irec low_brec; + union xfs_btree_irec high_brec; + struct xfs_alloc_query_range_info query; + + ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); + low_brec.a = *low_rec; + high_brec.a = *high_rec; + query.priv = priv; + query.fn = fn; + return xfs_btree_query_range(cur, &low_brec, &high_brec, + xfs_alloc_query_range_helper, &query); +} + +/* Find all free space records. */ +int +xfs_alloc_query_all( + struct xfs_btree_cur *cur, + xfs_alloc_query_range_fn fn, + void *priv) +{ + struct xfs_alloc_query_range_info query; + + ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); + query.priv = priv; + query.fn = fn; + return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 2a8d0fa6fbbea3973f96183934d2ff2e0e581f53..77d9c27330abc2fbe7dbf192a8d25e8c2352fa17 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -219,4 +219,16 @@ int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); +typedef int (*xfs_alloc_query_range_fn)( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv); + +int xfs_alloc_query_range(struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *low_rec, + struct xfs_alloc_rec_incore *high_rec, + xfs_alloc_query_range_fn fn, void *priv); +int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn, + void *priv); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index efb467b10a71d35927426a5e916b5d96d7dd9978..e1fcfe7f0a9a4fb5665c296c6b171d69cdf80542 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -205,19 +205,37 @@ xfs_allocbt_init_key_from_rec( union xfs_btree_key *key, union xfs_btree_rec *rec) { - ASSERT(rec->alloc.ar_startblock != 0); - key->alloc.ar_startblock = rec->alloc.ar_startblock; key->alloc.ar_blockcount = rec->alloc.ar_blockcount; } +STATIC void +xfs_bnobt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + __u32 x; + + x = be32_to_cpu(rec->alloc.ar_startblock); + x += be32_to_cpu(rec->alloc.ar_blockcount) - 1; + key->alloc.ar_startblock = cpu_to_be32(x); + key->alloc.ar_blockcount = 0; +} + +STATIC void +xfs_cntbt_init_high_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->alloc.ar_blockcount = rec->alloc.ar_blockcount; + key->alloc.ar_startblock = 0; +} + STATIC void xfs_allocbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { - ASSERT(cur->bc_rec.a.ar_startblock != 0); - rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); } @@ -236,18 +254,24 @@ xfs_allocbt_init_ptr_from_cur( } STATIC __int64_t -xfs_allocbt_key_diff( +xfs_bnobt_key_diff( struct xfs_btree_cur *cur, union xfs_btree_key *key) { xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; xfs_alloc_key_t *kp = &key->alloc; - __int64_t diff; - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return (__int64_t)be32_to_cpu(kp->ar_startblock) - - rec->ar_startblock; - } + return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; +} + +STATIC __int64_t +xfs_cntbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; + xfs_alloc_key_t *kp = &key->alloc; + __int64_t diff; diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; if (diff) @@ -256,6 +280,33 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } +STATIC __int64_t +xfs_bnobt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) - + be32_to_cpu(k2->alloc.ar_startblock); +} + +STATIC __int64_t +xfs_cntbt_diff_two_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + __int64_t diff; + + diff = be32_to_cpu(k1->alloc.ar_blockcount) - + be32_to_cpu(k2->alloc.ar_blockcount); + if (diff) + return diff; + + return be32_to_cpu(k1->alloc.ar_startblock) - + be32_to_cpu(k2->alloc.ar_startblock); +} + static bool xfs_allocbt_verify( struct xfs_buf *bp) @@ -346,44 +397,54 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = { #if defined(DEBUG) || defined(XFS_WARN) STATIC int -xfs_allocbt_keys_inorder( +xfs_bnobt_keys_inorder( struct xfs_btree_cur *cur, union xfs_btree_key *k1, union xfs_btree_key *k2) { - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return be32_to_cpu(k1->alloc.ar_startblock) < - be32_to_cpu(k2->alloc.ar_startblock); - } else { - return be32_to_cpu(k1->alloc.ar_blockcount) < - be32_to_cpu(k2->alloc.ar_blockcount) || - (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && - be32_to_cpu(k1->alloc.ar_startblock) < - be32_to_cpu(k2->alloc.ar_startblock)); - } + return be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock); } STATIC int -xfs_allocbt_recs_inorder( +xfs_bnobt_recs_inorder( struct xfs_btree_cur *cur, union xfs_btree_rec *r1, union xfs_btree_rec *r2) { - if (cur->bc_btnum == XFS_BTNUM_BNO) { - return be32_to_cpu(r1->alloc.ar_startblock) + - be32_to_cpu(r1->alloc.ar_blockcount) <= - be32_to_cpu(r2->alloc.ar_startblock); - } else { - return be32_to_cpu(r1->alloc.ar_blockcount) < - be32_to_cpu(r2->alloc.ar_blockcount) || - (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && - be32_to_cpu(r1->alloc.ar_startblock) < - be32_to_cpu(r2->alloc.ar_startblock)); - } + return be32_to_cpu(r1->alloc.ar_startblock) + + be32_to_cpu(r1->alloc.ar_blockcount) <= + be32_to_cpu(r2->alloc.ar_startblock); +} + +STATIC int +xfs_cntbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be32_to_cpu(k1->alloc.ar_blockcount) < + be32_to_cpu(k2->alloc.ar_blockcount) || + (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && + be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock)); } -#endif /* DEBUG */ -static const struct xfs_btree_ops xfs_allocbt_ops = { +STATIC int +xfs_cntbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return be32_to_cpu(r1->alloc.ar_blockcount) < + be32_to_cpu(r2->alloc.ar_blockcount) || + (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && + be32_to_cpu(r1->alloc.ar_startblock) < + be32_to_cpu(r2->alloc.ar_startblock)); +} +#endif /* DEBUG */ + +static const struct xfs_btree_ops xfs_bnobt_ops = { .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -395,13 +456,39 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec, .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, - .key_diff = xfs_allocbt_key_diff, + .key_diff = xfs_bnobt_key_diff, .buf_ops = &xfs_allocbt_buf_ops, + .diff_two_keys = xfs_bnobt_diff_two_keys, #if defined(DEBUG) || defined(XFS_WARN) - .keys_inorder = xfs_allocbt_keys_inorder, - .recs_inorder = xfs_allocbt_recs_inorder, + .keys_inorder = xfs_bnobt_keys_inorder, + .recs_inorder = xfs_bnobt_recs_inorder, +#endif +}; + +static const struct xfs_btree_ops xfs_cntbt_ops = { + .rec_len = sizeof(xfs_alloc_rec_t), + .key_len = sizeof(xfs_alloc_key_t), + + .dup_cursor = xfs_allocbt_dup_cursor, + .set_root = xfs_allocbt_set_root, + .alloc_block = xfs_allocbt_alloc_block, + .free_block = xfs_allocbt_free_block, + .update_lastrec = xfs_allocbt_update_lastrec, + .get_minrecs = xfs_allocbt_get_minrecs, + .get_maxrecs = xfs_allocbt_get_maxrecs, + .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec, + .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, + .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, + .key_diff = xfs_cntbt_key_diff, + .buf_ops = &xfs_allocbt_buf_ops, + .diff_two_keys = xfs_cntbt_diff_two_keys, +#if defined(DEBUG) || defined(XFS_WARN) + .keys_inorder = xfs_cntbt_keys_inorder, + .recs_inorder = xfs_cntbt_recs_inorder, #endif }; @@ -427,16 +514,15 @@ xfs_allocbt_init_cursor( cur->bc_mp = mp; cur->bc_btnum = btnum; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_allocbt_ops; - if (btnum == XFS_BTNUM_BNO) - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); - else - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); if (btnum == XFS_BTNUM_CNT) { + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); + cur->bc_ops = &xfs_cntbt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; } else { + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); + cur->bc_ops = &xfs_bnobt_ops; cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9bd104f32908962046af6d2dd4437a045fecdb36..a7048eafa8e6d8c92a0c6888017389d8562c75ba 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -764,7 +764,6 @@ xfs_bmap_extents_to_btree( args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { args.type = XFS_ALLOCTYPE_START_BNO; -try_another_ag: args.fsbno = *firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -779,20 +778,6 @@ xfs_bmap_extents_to_btree( return error; } - /* - * During a CoW operation, the allocation and bmbt updates occur in - * different transactions. The mapping code tries to put new bmbt - * blocks near extents being mapped, but the only way to guarantee this - * is if the alloc and the mapping happen in a single transaction that - * has a block reservation. That isn't the case here, so if we run out - * of space we'll try again with another AG. - */ - if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && - args.fsbno == NULLFSBLOCK && - args.type == XFS_ALLOCTYPE_NEAR_BNO) { - args.type = XFS_ALLOCTYPE_FIRST_AG; - goto try_another_ag; - } if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { xfs_iroot_realloc(ip, -1, whichfork); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); @@ -925,7 +910,6 @@ xfs_bmap_local_to_extents( * file currently fits in an inode. */ if (*firstblock == NULLFSBLOCK) { -try_another_ag: args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); args.type = XFS_ALLOCTYPE_START_BNO; } else { @@ -938,19 +922,6 @@ xfs_bmap_local_to_extents( if (error) goto done; - /* - * During a CoW operation, the allocation and bmbt updates occur in - * different transactions. The mapping code tries to put new bmbt - * blocks near extents being mapped, but the only way to guarantee this - * is if the alloc and the mapping happen in a single transaction that - * has a block reservation. That isn't the case here, so if we run out - * of space we'll try again with another AG. - */ - if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) && - args.fsbno == NULLFSBLOCK && - args.type == XFS_ALLOCTYPE_NEAR_BNO) { - goto try_another_ag; - } /* Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); @@ -1260,7 +1231,6 @@ xfs_bmap_read_extents( xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ - xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ xfs_extnum_t i, j; /* index into the extents list */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ @@ -1271,8 +1241,6 @@ xfs_bmap_read_extents( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : - XFS_EXTFMT_INODE(ip); block = ifp->if_broot; /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. @@ -1312,7 +1280,6 @@ xfs_bmap_read_extents( xfs_bmbt_rec_t *frp; xfs_fsblock_t nextbno; xfs_extnum_t num_recs; - xfs_extnum_t start; num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { @@ -1335,23 +1302,13 @@ xfs_bmap_read_extents( * Copy records into the extent records. */ frp = XFS_BMBT_REC_ADDR(mp, block, 1); - start = i; for (j = 0; j < num_recs; j++, i++, frp++) { xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); trp->l0 = be64_to_cpu(frp->l0); trp->l1 = be64_to_cpu(frp->l1); - } - if (exntf == XFS_EXTFMT_NOSTATE) { - /* - * Check all attribute bmap btree records and - * any "older" data bmap btree records for a - * set bit in the "extent flag" position. - */ - if (unlikely(xfs_check_nostate_extents(ifp, - start, num_recs))) { + if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) { XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); + XFS_ERRLEVEL_LOW, mp); goto error0; } } @@ -2106,8 +2063,10 @@ xfs_bmap_add_extent_delay_real( } temp = xfs_bmap_worst_indlen(bma->ip, temp); temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + diff = (int)(temp + temp2 - + (startblockval(PREV.br_startblock) - + (bma->cur ? + bma->cur->bc_private.b.allocated : 0))); if (diff > 0) { error = xfs_mod_fdblocks(bma->ip->i_mount, -((int64_t)diff), false); @@ -2164,7 +2123,6 @@ xfs_bmap_add_extent_delay_real( temp = da_new; if (bma->cur) temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); if (temp < da_old) xfs_mod_fdblocks(bma->ip->i_mount, (int64_t)(da_old - temp), false); @@ -2879,27 +2837,30 @@ xfs_bmap_add_extent_hole_delay( */ STATIC int /* error */ xfs_bmap_add_extent_hole_real( - struct xfs_bmalloca *bma, - int whichfork) + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork, + xfs_extnum_t *idx, + struct xfs_btree_cur **curp, + struct xfs_bmbt_irec *new, + xfs_fsblock_t *first, + struct xfs_defer_ops *dfops, + int *logflagsp) { - struct xfs_bmbt_irec *new = &bma->got; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_mount *mp = ip->i_mount; + struct xfs_btree_cur *cur = *curp; int error; /* error return value */ int i; /* temp state */ - xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_bmbt_irec_t right; /* right neighbor extent entry */ int rval=0; /* return value (logging flags) */ int state; /* state bits, accessed thru macros */ - struct xfs_mount *mp; - - mp = bma->ip->i_mount; - ifp = XFS_IFORK_PTR(bma->ip, whichfork); - ASSERT(bma->idx >= 0); - ASSERT(bma->idx <= xfs_iext_count(ifp)); + ASSERT(*idx >= 0); + ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2912,9 +2873,9 @@ xfs_bmap_add_extent_hole_real( /* * Check and set flags if this segment has a left neighbor. */ - if (bma->idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -2923,9 +2884,9 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (bma->idx < xfs_iext_count(ifp)) { + if (*idx < xfs_iext_count(ifp)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -2962,36 +2923,36 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount + right.br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); - if (bma->cur == NULL) { + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, + error = xfs_bmbt_lookup_eq(cur, right.br_startoff, right.br_startblock, right.br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_btree_delete(bma->cur, &i); + error = xfs_btree_delete(cur, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_btree_decrement(bma->cur, 0, &i); + error = xfs_btree_decrement(cur, 0, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, left.br_blockcount + new->br_blockcount + @@ -3008,23 +2969,23 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - --bma->idx; - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (bma->cur == NULL) { + if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, + error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, left.br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, left.br_startoff, + error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, left.br_blockcount + new->br_blockcount, @@ -3040,25 +3001,25 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - if (bma->cur == NULL) { + if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { rval = 0; - error = xfs_bmbt_lookup_eq(bma->cur, + error = xfs_bmbt_lookup_eq(cur, right.br_startoff, right.br_startblock, right.br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); - error = xfs_bmbt_update(bma->cur, new->br_startoff, + error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, @@ -3074,22 +3035,22 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_iext_insert(bma->ip, bma->idx, 1, new, state); - XFS_IFORK_NEXT_SET(bma->ip, whichfork, - XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); - if (bma->cur == NULL) { + xfs_iext_insert(ip, *idx, 1, new, state); + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { rval = XFS_ILOG_CORE; - error = xfs_bmbt_lookup_eq(bma->cur, + error = xfs_bmbt_lookup_eq(cur, new->br_startoff, new->br_startblock, new->br_blockcount, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); - bma->cur->bc_rec.b.br_state = new->br_state; - error = xfs_btree_insert(bma->cur, &i); + cur->bc_rec.b.br_state = new->br_state; + error = xfs_btree_insert(cur, &i); if (error) goto done; XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); @@ -3098,30 +3059,30 @@ xfs_bmap_add_extent_hole_real( } /* add reverse mapping */ - error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new); + error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new); if (error) goto done; /* convert to a btree if necessary */ - if (xfs_bmap_needs_btree(bma->ip, whichfork)) { + if (xfs_bmap_needs_btree(ip, whichfork)) { int tmp_logflags; /* partial log flag return val */ - ASSERT(bma->cur == NULL); - error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, - bma->firstblock, bma->dfops, &bma->cur, + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp, 0, &tmp_logflags, whichfork); - bma->logflags |= tmp_logflags; + *logflagsp |= tmp_logflags; + cur = *curp; if (error) goto done; } /* clear out the allocated field, done with it now in any case. */ - if (bma->cur) - bma->cur->bc_private.b.allocated = 0; + if (cur) + cur->bc_private.b.allocated = 0; - xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); + xfs_bmap_check_leaf_extents(cur, ip, whichfork); done: - bma->logflags |= rval; + *logflagsp |= rval; return error; } @@ -3852,60 +3813,6 @@ xfs_bmap_btalloc( return 0; } -/* - * For a remap operation, just "allocate" an extent at the address that the - * caller passed in, and ensure that the AGFL is the right size. The caller - * will then map the "allocated" extent into the file somewhere. - */ -STATIC int -xfs_bmap_remap_alloc( - struct xfs_bmalloca *ap) -{ - struct xfs_trans *tp = ap->tp; - struct xfs_mount *mp = tp->t_mountp; - xfs_agblock_t bno; - struct xfs_alloc_arg args; - int error; - - /* - * validate that the block number is legal - the enables us to detect - * and handle a silent filesystem corruption rather than crashing. - */ - memset(&args, 0, sizeof(struct xfs_alloc_arg)); - args.tp = ap->tp; - args.mp = ap->tp->t_mountp; - bno = *ap->firstblock; - args.agno = XFS_FSB_TO_AGNO(mp, bno); - args.agbno = XFS_FSB_TO_AGBNO(mp, bno); - if (args.agno >= mp->m_sb.sb_agcount || - args.agbno >= mp->m_sb.sb_agblocks) - return -EFSCORRUPTED; - - /* "Allocate" the extent from the range we passed in. */ - trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length); - ap->blkno = bno; - ap->ip->i_d.di_nblocks += ap->length; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - - /* Fix the freelist, like a real allocator does. */ - args.datatype = ap->datatype; - args.pag = xfs_perag_get(args.mp, args.agno); - ASSERT(args.pag); - - /* - * The freelist fixing code will decline the allocation if - * the size and shape of the free space doesn't allow for - * allocating the extent and updating all the metadata that - * happens during an allocation. We're remapping, not - * allocating, so skip that check by pretending to be freeing. - */ - error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); - xfs_perag_put(args.pag); - if (error) - trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); - return error; -} - /* * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. * It figures out where to ask the underlying allocator to put the new extent. @@ -3914,8 +3821,6 @@ STATIC int xfs_bmap_alloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - if (ap->flags & XFS_BMAPI_REMAP) - return xfs_bmap_remap_alloc(ap); if (XFS_IS_REALTIME_INODE(ap->ip) && xfs_alloc_is_userdata(ap->datatype)) return xfs_bmap_rtalloc(ap); @@ -4386,7 +4291,9 @@ xfs_bmapi_allocate( if (bma->wasdel) error = xfs_bmap_add_extent_delay_real(bma, whichfork); else - error = xfs_bmap_add_extent_hole_real(bma, whichfork); + error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, + whichfork, &bma->idx, &bma->cur, &bma->got, + bma->firstblock, bma->dfops, &bma->logflags); bma->logflags |= tmp_logflags; if (error) @@ -4549,9 +4456,7 @@ xfs_bmapi_write( ASSERT(len > 0); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); - ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); - ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); + ASSERT(!(flags & XFS_BMAPI_REMAP)); /* zeroing is for currently only for data extents, not metadata */ ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != @@ -4635,13 +4540,8 @@ xfs_bmapi_write( } else { need_alloc = true; } - } else { - /* - * Make sure we only reflink into a hole. - */ - ASSERT(!(flags & XFS_BMAPI_REMAP)); - if (isnullstartblock(bma.got.br_startblock)) - wasdelay = true; + } else if (isnullstartblock(bma.got.br_startblock)) { + wasdelay = true; } /* @@ -4770,6 +4670,93 @@ xfs_bmapi_write( return error; } +static int +xfs_bmapi_remap( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fileoff_t bno, + xfs_filblks_t len, + xfs_fsblock_t startblock, + struct xfs_defer_ops *dfops) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + struct xfs_btree_cur *cur = NULL; + xfs_fsblock_t firstblock = NULLFSBLOCK; + struct xfs_bmbt_irec got; + xfs_extnum_t idx; + int logflags = 0, error; + + ASSERT(len > 0); + ASSERT(len <= (xfs_filblks_t)MAXEXTLEN); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (unlikely(XFS_TEST_ERROR( + (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), + mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { + XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + if (error) + return error; + } + + if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) { + /* make sure we only reflink into a hole. */ + ASSERT(got.br_startoff > bno); + ASSERT(got.br_startoff - bno >= len); + } + + ip->i_d.di_nblocks += len; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + if (ifp->if_flags & XFS_IFBROOT) { + cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); + cur->bc_private.b.firstblock = firstblock; + cur->bc_private.b.dfops = dfops; + cur->bc_private.b.flags = 0; + } + + got.br_startoff = bno; + got.br_startblock = startblock; + got.br_blockcount = len; + got.br_state = XFS_EXT_NORM; + + error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur, + &got, &firstblock, dfops, &logflags); + if (error) + goto error0; + + if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) { + int tmp_logflags = 0; + + error = xfs_bmap_btree_to_extents(tp, ip, cur, + &tmp_logflags, XFS_DATA_FORK); + logflags |= tmp_logflags; + } + +error0: + if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) + logflags &= ~XFS_ILOG_DEXT; + else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) + logflags &= ~XFS_ILOG_DBROOT; + + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (cur) { + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + return error; +} + /* * When a delalloc extent is split (e.g., due to a hole punch), the original * indlen reservation must be shared across the two new extents that are left @@ -4887,7 +4874,7 @@ xfs_bmap_del_extent_delay( ASSERT(got_endoff >= del_endoff); if (isrt) { - int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); + uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_frextents(mp, rtexts); @@ -6488,27 +6475,15 @@ xfs_bmap_finish_one( xfs_filblks_t blockcount, xfs_exntst_t state) { - struct xfs_bmbt_irec bmap; - int nimaps = 1; - xfs_fsblock_t firstfsb; - int flags = XFS_BMAPI_REMAP; - int done; - int error = 0; - - bmap.br_startblock = startblock; - bmap.br_startoff = startoff; - bmap.br_blockcount = blockcount; - bmap.br_state = state; + int error = 0, done; trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), ip->i_ino, whichfork, startoff, blockcount, state); - if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) + if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) return -EFSCORRUPTED; - if (whichfork == XFS_ATTR_FORK) - flags |= XFS_BMAPI_ATTRFORK; if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE, @@ -6517,16 +6492,12 @@ xfs_bmap_finish_one( switch (type) { case XFS_BMAP_MAP: - firstfsb = bmap.br_startblock; - error = xfs_bmapi_write(tp, ip, bmap.br_startoff, - bmap.br_blockcount, flags, &firstfsb, - bmap.br_blockcount, &bmap, &nimaps, - dfops); + error = xfs_bmapi_remap(tp, ip, startoff, blockcount, + startblock, dfops); break; case XFS_BMAP_UNMAP: - error = xfs_bunmapi(tp, ip, bmap.br_startoff, - bmap.br_blockcount, flags, 1, &firstfsb, - dfops, &done); + error = xfs_bunmapi(tp, ip, startoff, blockcount, + XFS_BMAPI_REMAP, 1, &startblock, dfops, &done); ASSERT(done); break; default: diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cdef87db5262bdc2f43b17f5a1e76fb45a107233..c35a14fa15272ab5ef38f8b47953ae120d72fb4e 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -171,6 +171,18 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags) { BMAP_COWFORK, "COW" } +/* + * Return true if the extent is a real, allocated extent, or false if it is a + * delayed allocation, and unwritten extent or a hole. + */ +static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) +{ + return irec->br_state != XFS_EXT_UNWRITTEN && + irec->br_startblock != HOLESTARTBLOCK && + irec->br_startblock != DELAYSTARTBLOCK && + !isnullstartblock(irec->br_startblock); +} + /* * This macro is used to determine how many extents will be shifted * in one write transaction. We could require two splits, @@ -232,8 +244,6 @@ int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *del); void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); -int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, - xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index fd55db47938562868d25d4998407ce7650c4da4f..6cba69aff077470752daa8457116ff57802575f2 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -366,32 +366,6 @@ xfs_bmbt_to_bmdr( memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } -/* - * Check extent records, which have just been read, for - * any bit in the extent flag field. ASSERT on debug - * kernels, as this condition should not occur. - * Return an error condition (1) if any flags found, - * otherwise return 0. - */ - -int -xfs_check_nostate_extents( - xfs_ifork_t *ifp, - xfs_extnum_t idx, - xfs_extnum_t num) -{ - for (; num > 0; num--, idx++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx); - if ((ep->l0 >> - (64 - BMBT_EXNTFLAG_BITLEN)) != 0) { - ASSERT(0); - return 1; - } - } - return 0; -} - - STATIC struct xfs_btree_cur * xfs_bmbt_dup_cursor( struct xfs_btree_cur *cur) @@ -448,7 +422,6 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); args.type = XFS_ALLOCTYPE_START_BNO; -try_another_ag: /* * Make sure there is sufficient room left in the AG to * complete a full tree split for an extent insert. If @@ -477,22 +450,6 @@ xfs_bmbt_alloc_block( if (error) goto error0; - /* - * During a CoW operation, the allocation and bmbt updates occur in - * different transactions. The mapping code tries to put new bmbt - * blocks near extents being mapped, but the only way to guarantee this - * is if the alloc and the mapping happen in a single transaction that - * has a block reservation. That isn't the case here, so if we run out - * of space we'll try again with another AG. - */ - if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && - args.fsbno == NULLFSBLOCK && - args.type == XFS_ALLOCTYPE_NEAR_BNO) { - args.fsbno = cur->bc_private.b.firstblock; - args.type = XFS_ALLOCTYPE_FIRST_AG; - goto try_another_ag; - } - if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 819a8a4dee952d4648ee6333a7c61ea7dff6e78f..9da5a8d4f184b6384638f4090b6441626a6a1ab0 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -24,14 +24,6 @@ struct xfs_mount; struct xfs_inode; struct xfs_trans; -/* - * Extent state and extent format macros. - */ -#define XFS_EXTFMT_INODE(x) \ - (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \ - XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE) -#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) - /* * Btree block header size depends on a superblock flag. */ @@ -140,4 +132,18 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); +/* + * Check that the extent does not contain an invalid unwritten extent flag. + */ +static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork, + struct xfs_bmbt_rec_host *ep) +{ + if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0) + return true; + if (whichfork == XFS_DATA_FORK && + xfs_sb_version_hasextflgbit(&mp->m_sb)) + return true; + return false; +} + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index c3decedc94557e14c3cb7fa7eee81d09e54f4c9e..3a673ba201aae9f39c8190bfa2b8904120284ce5 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2886,7 +2886,7 @@ xfs_btree_split_worker( struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args, work); unsigned long pflags; - unsigned long new_pflags = PF_FSTRANS; + unsigned long new_pflags = PF_MEMALLOC_NOFS; /* * we are in a transaction context here, but may also be doing work @@ -4395,7 +4395,7 @@ xfs_btree_visit_blocks( xfs_btree_readahead_ptr(cur, ptr, 1); /* save for the next iteration of the loop */ - lptr = *ptr; + xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); } /* for each buffer in the level */ @@ -4842,6 +4842,21 @@ xfs_btree_query_range( fn, priv); } +/* Query a btree for all records. */ +int +xfs_btree_query_all( + struct xfs_btree_cur *cur, + xfs_btree_query_range_fn fn, + void *priv) +{ + union xfs_btree_irec low_rec; + union xfs_btree_irec high_rec; + + memset(&low_rec, 0, sizeof(low_rec)); + memset(&high_rec, 0xFF, sizeof(high_rec)); + return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv); +} + /* * Calculate the number of blocks needed to store a given number of records * in a short-format (per-AG metadata) btree. diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4bb62580a7fd0029702b7d5092638af34b78c70b..27bed08261c530fac9508f9138f00b0f13a8cdd9 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -496,6 +496,8 @@ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, int xfs_btree_query_range(struct xfs_btree_cur *cur, union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec, xfs_btree_query_range_fn fn, void *priv); +int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn, + void *priv); typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, void *data); diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index ac9a003dd29acf80d7c766788cb40d1f98d9132c..747085b4ef4406d387464b40fdea302b83c25c4c 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -35,13 +35,8 @@ int xfs_calc_dquots_per_chunk( unsigned int nbblks) /* basic block units */ { - unsigned int ndquots; - ASSERT(nbblks > 0); - ndquots = BBTOB(nbblks); - do_div(ndquots, sizeof(xfs_dqblk_t)); - - return ndquots; + return BBTOB(nbblks) / sizeof(xfs_dqblk_t); } /* diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 6b7579e7b60a228ee6e633221bc64952f4a93a41..a1dccd8d96bcecaae119d7404f4c461022cab07d 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -930,10 +930,8 @@ static inline uint xfs_dinode_size(int version) /* * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. * Since the pathconf interface is signed, we use 2^31 - 1 instead. - * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. */ #define XFS_MAXLINK ((1U << 31) - 1U) -#define XFS_MAXLINK_1 65535U /* * Values for di_format @@ -1577,20 +1575,11 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x) return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); } -/* - * Possible extent formats. - */ -typedef enum { - XFS_EXTFMT_NOSTATE = 0, - XFS_EXTFMT_HASSTATE -} xfs_exntfmt_t; - /* * Possible extent states. */ typedef enum { XFS_EXT_NORM, XFS_EXT_UNWRITTEN, - XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID } xfs_exntst_t; /* diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b72dc821d78b97ba5f93a21122eab9946e2e947a..095bdf049a3fe2242aed690fceb2167d4b97eaad 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -92,6 +92,18 @@ struct getbmapx { #define BMV_OF_LAST 0x4 /* segment is the last in the file */ #define BMV_OF_SHARED 0x8 /* segment shared with another file */ +/* fmr_owner special values for FS_IOC_GETFSMAP */ +#define XFS_FMR_OWN_FREE FMR_OWN_FREE /* free space */ +#define XFS_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */ +#define XFS_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */ +#define XFS_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */ +#define XFS_FMR_OWN_AG FMR_OWNER('X', 3) /* per-AG metadata */ +#define XFS_FMR_OWN_INOBT FMR_OWNER('X', 4) /* inode btree blocks */ +#define XFS_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */ +#define XFS_FMR_OWN_REFC FMR_OWNER('X', 6) /* refcount tree */ +#define XFS_FMR_OWN_COW FMR_OWNER('X', 7) /* cow staging */ +#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */ + /* * Structure for XFS_IOC_FSSETDM. * For use by backup and restore programs to set the XFS on-disk inode @@ -502,6 +514,7 @@ typedef struct xfs_swapext #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) +/* XFS_IOC_GETFSMAP ------ hoisted 59 */ /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index d93f9d918cfc11ada2ded50d21e0eb9226381f93..09c3d1aecef267cbb207e274f892542be22d1275 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -508,7 +508,7 @@ xfs_iread( /* even unallocated inodes are verified */ if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld failed", + xfs_alert(mp, "%s: validation failed for inode %lld", __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 8a37efe04de3235d72357914647e5f480cbbd512..0e80f34fe97c509cef3eef61ff7237c20a19bdd4 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -42,35 +42,6 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); -#ifdef DEBUG -/* - * Make sure that the extents in the given memory buffer - * are valid. - */ -void -xfs_validate_extents( - xfs_ifork_t *ifp, - int nrecs, - xfs_exntfmt_t fmt) -{ - xfs_bmbt_irec_t irec; - xfs_bmbt_rec_host_t rec; - int i; - - for (i = 0; i < nrecs; i++) { - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - rec.l0 = get_unaligned(&ep->l0); - rec.l1 = get_unaligned(&ep->l1); - xfs_bmbt_get_all(&rec, &irec); - if (fmt == XFS_EXTFMT_NOSTATE) - ASSERT(irec.br_state == XFS_EXT_NORM); - } -} -#else /* DEBUG */ -#define xfs_validate_extents(ifp, nrecs, fmt) -#endif /* DEBUG */ - - /* * Move inode type and inode format specific information from the * on-disk inode to the in-core inode. For fifos, devs, and sockets @@ -352,40 +323,33 @@ xfs_iformat_local( } /* - * The file consists of a set of extents all - * of which fit into the on-disk inode. - * If there are few enough extents to fit into - * the if_inline_ext, then copy them there. - * Otherwise allocate a buffer for them and copy - * them into it. Either way, set if_extents - * to point at the extents. + * The file consists of a set of extents all of which fit into the on-disk + * inode. If there are few enough extents to fit into the if_inline_ext, then + * copy them there. Otherwise allocate a buffer for them and copy them into it. + * Either way, set if_extents to point at the extents. */ STATIC int xfs_iformat_extents( - xfs_inode_t *ip, - xfs_dinode_t *dip, - int whichfork) + struct xfs_inode *ip, + struct xfs_dinode *dip, + int whichfork) { - xfs_bmbt_rec_t *dp; - xfs_ifork_t *ifp; - int nex; - int size; - int i; - - ifp = XFS_IFORK_PTR(ip, whichfork); - nex = XFS_DFORK_NEXTENTS(dip, whichfork); - size = nex * (uint)sizeof(xfs_bmbt_rec_t); + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int nex = XFS_DFORK_NEXTENTS(dip, whichfork); + int size = nex * sizeof(xfs_bmbt_rec_t); + struct xfs_bmbt_rec *dp; + int i; /* - * If the number of extents is unreasonable, then something - * is wrong and we just bail out rather than crash in - * kmem_alloc() or memcpy() below. + * If the number of extents is unreasonable, then something is wrong and + * we just bail out rather than crash in kmem_alloc() or memcpy() below. */ - if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { + if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", (unsigned long long) ip->i_ino, nex); XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); + mp, dip); return -EFSCORRUPTED; } @@ -400,22 +364,17 @@ xfs_iformat_extents( ifp->if_bytes = size; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); - xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); for (i = 0; i < nex; i++, dp++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); ep->l0 = get_unaligned_be64(&dp->l0); ep->l1 = get_unaligned_be64(&dp->l1); + if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) { + XFS_ERROR_REPORT("xfs_iformat_extents(2)", + XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } } XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); - if (whichfork != XFS_DATA_FORK || - XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) - if (unlikely(xfs_check_nostate_extents( - ifp, 0, nex))) { - XFS_ERROR_REPORT("xfs_iformat_extents(2)", - XFS_ERRLEVEL_LOW, - ip->i_mount); - return -EFSCORRUPTED; - } } ifp->if_flags |= XFS_IFEXTENTS; return 0; @@ -518,7 +477,6 @@ xfs_iread_extents( xfs_iext_destroy(ifp); return error; } - xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip)); ifp->if_flags |= XFS_IFEXTENTS; return 0; } @@ -837,6 +795,9 @@ xfs_iextents_copy( copied = 0; for (i = 0; i < nrecs; i++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); + + ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep)); + start_block = xfs_bmbt_get_startblock(ep); if (isnullstartblock(start_block)) { /* @@ -852,7 +813,6 @@ xfs_iextents_copy( copied++; } ASSERT(copied != 0); - xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip)); return (copied * (uint)sizeof(xfs_bmbt_rec_t)); } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index b177ef33cd4c3215de5f3bc228a467bcf73031e1..82a38d86ebad83346c441e802340a172402ef532 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1629,13 +1629,28 @@ xfs_refcount_recover_cow_leftovers( if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) return -EOPNOTSUPP; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + INIT_LIST_HEAD(&debris); + + /* + * In this first part, we use an empty transaction to gather up + * all the leftover CoW extents so that we can subsequently + * delete them. The empty transaction is used to avoid + * a buffer lock deadlock if there happens to be a loop in the + * refcountbt because we're allowed to re-grab a buffer that is + * already attached to our transaction. When we're done + * recording the CoW debris we cancel the (empty) transaction + * and everything goes away cleanly. + */ + error = xfs_trans_alloc_empty(mp, &tp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (error) + goto out_trans; + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ - INIT_LIST_HEAD(&debris); memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); low.rc.rc_startblock = XFS_REFC_COW_START; @@ -1645,10 +1660,11 @@ xfs_refcount_recover_cow_leftovers( if (error) goto out_cursor; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); + xfs_trans_brelse(tp, agbp); + xfs_trans_cancel(tp); /* Now iterate the list to free the leftovers */ - list_for_each_entry(rr, &debris, rr_list) { + list_for_each_entry_safe(rr, n, &debris, rr_list) { /* Set up transaction. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); if (error) @@ -1676,8 +1692,16 @@ xfs_refcount_recover_cow_leftovers( error = xfs_trans_commit(tp); if (error) goto out_free; + + list_del(&rr->rr_list); + kmem_free(rr); } + return error; +out_defer: + xfs_defer_cancel(&dfops); +out_trans: + xfs_trans_cancel(tp); out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { @@ -1688,11 +1712,6 @@ xfs_refcount_recover_cow_leftovers( out_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_buf_relse(agbp); - goto out_free; - -out_defer: - xfs_defer_cancel(&dfops); - xfs_trans_cancel(tp); - goto out_free; + xfs_trans_brelse(tp, agbp); + goto out_trans; } diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 3a8cc7139912bf83d0830090c3c88a91e79b6677..06cfb93c2ef95b2c9454272db24d9ae7b7f6816a 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2001,14 +2001,14 @@ xfs_rmap_query_range_helper( /* Find all rmaps between two keys. */ int xfs_rmap_query_range( - struct xfs_btree_cur *cur, - struct xfs_rmap_irec *low_rec, - struct xfs_rmap_irec *high_rec, - xfs_rmap_query_range_fn fn, - void *priv) + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *low_rec, + struct xfs_rmap_irec *high_rec, + xfs_rmap_query_range_fn fn, + void *priv) { - union xfs_btree_irec low_brec; - union xfs_btree_irec high_brec; + union xfs_btree_irec low_brec; + union xfs_btree_irec high_brec; struct xfs_rmap_query_range_info query; low_brec.r = *low_rec; @@ -2019,6 +2019,20 @@ xfs_rmap_query_range( xfs_rmap_query_range_helper, &query); } +/* Find all rmaps. */ +int +xfs_rmap_query_all( + struct xfs_btree_cur *cur, + xfs_rmap_query_range_fn fn, + void *priv) +{ + struct xfs_rmap_query_range_info query; + + query.priv = priv; + query.fn = fn; + return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query); +} + /* Clean up after calling xfs_rmap_finish_one. */ void xfs_rmap_finish_one_cleanup( @@ -2291,3 +2305,31 @@ xfs_rmap_free_extent( return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap); } + +/* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */ +int +xfs_rmap_compare( + const struct xfs_rmap_irec *a, + const struct xfs_rmap_irec *b) +{ + __u64 oa; + __u64 ob; + + oa = xfs_rmap_irec_offset_pack(a); + ob = xfs_rmap_irec_offset_pack(b); + + if (a->rm_startblock < b->rm_startblock) + return -1; + else if (a->rm_startblock > b->rm_startblock) + return 1; + else if (a->rm_owner < b->rm_owner) + return -1; + else if (a->rm_owner > b->rm_owner) + return 1; + else if (oa < ob) + return -1; + else if (oa > ob) + return 1; + else + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 789930599339058ed8db437a6ff5be061095877c..98f908fea103508a55955c97fc42f9052cf3316b 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -162,6 +162,8 @@ typedef int (*xfs_rmap_query_range_fn)( int xfs_rmap_query_range(struct xfs_btree_cur *cur, struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec, xfs_rmap_query_range_fn fn, void *priv); +int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn, + void *priv); enum xfs_rmap_intent_type { XFS_RMAP_MAP, @@ -212,5 +214,7 @@ int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno, int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, struct xfs_rmap_irec *irec, int *stat); +int xfs_rmap_compare(const struct xfs_rmap_irec *a, + const struct xfs_rmap_irec *b); #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index ea45584a9913bbd51908c9a477bb25c90d000fa9..e47b99e59f6035c0bb76a98aa76df998c659348c 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1016,3 +1016,73 @@ xfs_rtfree_extent( } return 0; } + +/* Find all the free records within a given range. */ +int +xfs_rtalloc_query_range( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *low_rec, + struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, + void *priv) +{ + struct xfs_rtalloc_rec rec; + struct xfs_mount *mp = tp->t_mountp; + xfs_rtblock_t rtstart; + xfs_rtblock_t rtend; + xfs_rtblock_t rem; + int is_free; + int error = 0; + + if (low_rec->ar_startblock > high_rec->ar_startblock) + return -EINVAL; + else if (low_rec->ar_startblock == high_rec->ar_startblock) + return 0; + + /* Iterate the bitmap, looking for discrepancies. */ + rtstart = low_rec->ar_startblock; + rem = high_rec->ar_startblock - rtstart; + while (rem) { + /* Is the first block free? */ + error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend, + &is_free); + if (error) + break; + + /* How long does the extent go for? */ + error = xfs_rtfind_forw(mp, tp, rtstart, + high_rec->ar_startblock - 1, &rtend); + if (error) + break; + + if (is_free) { + rec.ar_startblock = rtstart; + rec.ar_blockcount = rtend - rtstart + 1; + + error = fn(tp, &rec, priv); + if (error) + break; + } + + rem -= rtend - rtstart + 1; + rtstart = rtend + 1; + } + + return error; +} + +/* Find all the free records. */ +int +xfs_rtalloc_query_all( + struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, + void *priv) +{ + struct xfs_rtalloc_rec keys[2]; + + keys[0].ar_startblock = 0; + keys[1].ar_startblock = tp->t_mountp->m_sb.sb_rblocks; + keys[0].ar_blockcount = keys[1].ar_blockcount = 0; + + return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv); +} diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 7917f6e44286a4591e9109138a7be07dc1a18eb1..d787c677d2a3201cc62d042fbee55415923934f6 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -21,8 +21,20 @@ /* * Components of space reservations. */ + +/* Worst case number of rmaps that can be held in a block. */ #define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \ (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0])) + +/* Adding one rmap could split every level up to the top of the tree. */ +#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels) + +/* Blocks we might need to add "b" rmaps to a tree. */ +#define XFS_NRMAPADD_SPACE_RES(mp, b)\ + (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ + XFS_RMAPADD_SPACE_RES(mp)) + #define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) #define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) @@ -30,13 +42,12 @@ (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ XFS_EXTENTADD_SPACE_RES(mp,w)) + +/* Blocks we might need to add "b" mappings & rmappings to a file. */ #define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\ - (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ - XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ - XFS_EXTENTADD_SPACE_RES(mp,w) + \ - ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \ - XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \ - (mp)->m_rmap_maxlevels) + (XFS_NEXTENTADD_SPACE_RES((mp), (b), (w)) + \ + XFS_NRMAPADD_SPACE_RES((mp), (b))) + #define XFS_DAENTER_1B(mp,w) \ ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) #define XFS_DAENTER_DBS(mp,w) \ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 61494295d92fe1acb7d343bc3a4e1594f09027ab..09af0f7cd55e278312881999755d3d8d0793d5c8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -111,11 +111,11 @@ xfs_finish_page_writeback( bsize = bh->b_size; do { + if (off > end) + break; next = bh->b_this_page; if (off < bvec->bv_offset) goto next_bh; - if (off > end) - break; bh->b_end_io(bh, !error); next_bh: off += bsize; @@ -189,7 +189,7 @@ xfs_setfilesize_trans_alloc( * We hand off the transaction to the completion thread now, so * clear the flag here. */ - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return 0; } @@ -252,7 +252,7 @@ xfs_setfilesize_ioend( * thus we need to mark ourselves as being in a transaction manually. * Similarly for freeze protection. */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); /* we abort the update if there was an IO error */ @@ -1016,7 +1016,7 @@ xfs_do_writepage( * Given that we do not allow direct reclaim to call us, we should * never be called while in a filesystem transaction. */ - if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) + if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) goto redirty; /* @@ -1261,8 +1261,8 @@ xfs_get_blocks( if (nimaps) { trace_xfs_get_blocks_found(ip, offset, size, - ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN - : XFS_IO_OVERWRITE, &imap); + imap.br_state == XFS_EXT_UNWRITTEN ? + XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); xfs_iunlock(ip, lockmode); } else { trace_xfs_get_blocks_notfound(ip, offset, size); @@ -1276,9 +1276,7 @@ xfs_get_blocks( * For unwritten extents do not report a disk address in the buffered * read case (treat as if we're reading into a hole). */ - if (imap.br_startblock != HOLESTARTBLOCK && - imap.br_startblock != DELAYSTARTBLOCK && - !ISUNWRITTEN(&imap)) + if (xfs_bmap_is_real_extent(&imap)) xfs_map_buffer(inode, bh_result, &imap, offset); /* diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9bf57c76623b404ebf5e1e57dd1582ad52de7c92..d419d23fa2149ae3b022cdec3aba6879c987eec1 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -34,6 +34,8 @@ #include "xfs_bmap.h" #include "xfs_icache.h" #include "xfs_trace.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" kmem_zone_t *xfs_bui_zone; @@ -215,6 +217,7 @@ void xfs_bui_release( struct xfs_bui_log_item *buip) { + ASSERT(atomic_read(&buip->bui_refcount) > 0); if (atomic_dec_and_test(&buip->bui_refcount)) { xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); xfs_bui_item_free(buip); @@ -446,7 +449,8 @@ xfs_bui_recover( return -EIO; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); if (error) return error; budp = xfs_trans_get_bud(tp, buip); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 828532ce0adca80ddfa174540aae6aa06663302b..9e3cc2146d5b03cdc84a4a0d92ce3c186314525d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -81,7 +81,7 @@ xfs_zero_extent( return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), block << (mp->m_super->s_blocksize_bits - 9), count_fsb << (mp->m_super->s_blocksize_bits - 9), - GFP_NOFS, true); + GFP_NOFS, 0); } int @@ -448,10 +448,9 @@ xfs_getbmap_adjust_shared( next_map->br_blockcount = 0; /* Only written data blocks can be shared. */ - if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK || - map->br_startblock == DELAYSTARTBLOCK || - map->br_startblock == HOLESTARTBLOCK || - ISUNWRITTEN(map)) + if (!xfs_is_reflink_inode(ip) || + whichfork != XFS_DATA_FORK || + !xfs_bmap_is_real_extent(map)) return 0; agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); @@ -583,9 +582,13 @@ xfs_getbmap( } break; default: + /* Local format data forks report no extents. */ + if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + bmv->bmv_entries = 0; + return 0; + } if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE && - ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + ip->i_d.di_format != XFS_DINODE_FMT_BTREE) return -EINVAL; if (xfs_get_extsz_hint(ip) || @@ -713,7 +716,7 @@ xfs_getbmap( * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) + map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && @@ -904,9 +907,9 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) } /* - * This is called by xfs_inactive to free any blocks beyond eof - * when the link count isn't zero and by xfs_dm_punch_hole() when - * punching a hole to EOF. + * This is called to free any blocks beyond eof. The caller must hold + * IOLOCK_EXCL unless we are in the inode reclaim path and have the only + * reference to the inode. */ int xfs_free_eofblocks( @@ -921,8 +924,6 @@ xfs_free_eofblocks( struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. @@ -1209,11 +1210,8 @@ xfs_adjust_extent_unmap_boundaries( return error; if (nimap && imap.br_startblock != HOLESTARTBLOCK) { - xfs_daddr_t block; - ASSERT(imap.br_startblock != DELAYSTARTBLOCK); - block = imap.br_startblock; - mod = do_div(block, mp->m_sb.sb_rextsize); + mod = do_mod(imap.br_startblock, mp->m_sb.sb_rextsize); if (mod) *startoffset_fsb += mp->m_sb.sb_rextsize - mod; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b6208728ba39767bbb2898880fbb515abcc5ab4f..16d6a578fc160528651d522359eb6e9ccecac78d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -97,12 +97,16 @@ static inline void xfs_buf_ioacct_inc( struct xfs_buf *bp) { - if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) + if (bp->b_flags & XBF_NO_IOACCT) return; ASSERT(bp->b_flags & XBF_ASYNC); - bp->b_flags |= _XBF_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { + bp->b_state |= XFS_BSTATE_IN_FLIGHT; + percpu_counter_inc(&bp->b_target->bt_io_count); + } + spin_unlock(&bp->b_lock); } /* @@ -110,14 +114,24 @@ xfs_buf_ioacct_inc( * freed and unaccount from the buftarg. */ static inline void -xfs_buf_ioacct_dec( +__xfs_buf_ioacct_dec( struct xfs_buf *bp) { - if (!(bp->b_flags & _XBF_IN_FLIGHT)) - return; + lockdep_assert_held(&bp->b_lock); - bp->b_flags &= ~_XBF_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); + if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { + bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; + percpu_counter_dec(&bp->b_target->bt_io_count); + } +} + +static inline void +xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + spin_unlock(&bp->b_lock); } /* @@ -149,9 +163,9 @@ xfs_buf_stale( * unaccounted (released to LRU) before that occurs. Drop in-flight * status now to preserve accounting consistency. */ - xfs_buf_ioacct_dec(bp); - spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) @@ -443,17 +457,17 @@ _xfs_buf_map_pages( bp->b_addr = NULL; } else { int retried = 0; - unsigned noio_flag; + unsigned nofs_flag; /* * vm_map_ram() will allocate auxillary structures (e.g. * pagetables) with GFP_KERNEL, yet we are likely to be under * GFP_NOFS context here. Hence we need to tell memory reclaim - * that we are in such a context via PF_MEMALLOC_NOIO to prevent + * that we are in such a context via PF_MEMALLOC_NOFS to prevent * memory reclaim re-entering the filesystem here and * potentially deadlocking. */ - noio_flag = memalloc_noio_save(); + nofs_flag = memalloc_nofs_save(); do { bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, -1, PAGE_KERNEL); @@ -461,7 +475,7 @@ _xfs_buf_map_pages( break; vm_unmap_aliases(); } while (retried++ <= 1); - memalloc_noio_restore(noio_flag); + memalloc_nofs_restore(nofs_flag); if (!bp->b_addr) return -ENOMEM; @@ -979,12 +993,12 @@ xfs_buf_rele( * ensures the decrement occurs only once per-buf. */ if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); goto out_unlock; } /* the last reference has been dropped ... */ - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU take a new reference to the @@ -1079,6 +1093,8 @@ void xfs_buf_unlock( struct xfs_buf *bp) { + ASSERT(xfs_buf_islocked(bp)); + XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1814,6 +1830,28 @@ xfs_alloc_buftarg( return NULL; } +/* + * Cancel a delayed write list. + * + * Remove each buffer from the list, clear the delwri queue flag and drop the + * associated buffer reference. + */ +void +xfs_buf_delwri_cancel( + struct list_head *list) +{ + struct xfs_buf *bp; + + while (!list_empty(list)) { + bp = list_first_entry(list, struct xfs_buf, b_list); + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + } +} + /* * Add a buffer to the delayed write list. * diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 3c867e5a63e1d591bd51cc9ba34ef185bbd9e0e7..1508121f29f29191da1a4efc7c8f12cf42eb0107 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -63,7 +63,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */ typedef unsigned int xfs_buf_flags_t; @@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_IN_FLIGHT, "IN_FLIGHT" } + { _XBF_COMPOUND, "COMPOUND" } /* * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ /* * The xfs_buftarg contains 2 notions of "sector size" - @@ -291,7 +290,6 @@ xfs_buf_readahead( return xfs_buf_readahead_map(target, &map, 1, ops); } -struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); @@ -330,6 +328,7 @@ extern void *xfs_buf_offset(struct xfs_buf *, size_t); extern void xfs_buf_stale(struct xfs_buf *bp); /* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index ad9396e516f6e389b88bca5dc2dc41d3372ed714..20b7a5c6eb2f68e93e1ad11be1d086a0d1d6aa33 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -394,6 +394,7 @@ xfs_dir2_leaf_readbuf( /* * Do we need more readahead? + * Each loop tries to process 1 full dir blk; last may be partial. */ blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; @@ -404,7 +405,8 @@ xfs_dir2_leaf_readbuf( * Read-ahead a contiguous directory block. */ if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= geo->fsbcount) { + (map[mip->ra_index].br_blockcount - mip->ra_offset) >= + geo->fsbcount) { xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, XFS_FSB_TO_DADDR(dp->i_mount, @@ -425,14 +427,19 @@ xfs_dir2_leaf_readbuf( } /* - * Advance offset through the mapping table. + * Advance offset through the mapping table, processing a full + * dir block even if it is fragmented into several extents. + * But stop if we have consumed all valid mappings, even if + * it's not yet a full directory block. */ - for (j = 0; j < geo->fsbcount; j += length ) { + for (j = 0; + j < geo->fsbcount && mip->ra_index < mip->map_valid; + j += length ) { /* * The rest of this extent but not more than a dir * block. */ - length = min_t(int, geo->fsbcount, + length = min_t(int, geo->fsbcount - j, map[mip->ra_index].br_blockcount - mip->ra_offset); mip->ra_offset += length; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index d796ffac729690ab5273a6007380e76170602cef..6a05d278da64f666e525ca96dfae306face43565 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -132,6 +132,11 @@ xfs_trim_extents( error = xfs_btree_decrement(cur, 0, &i); if (error) goto out_del_cursor; + + if (fatal_signal_pending(current)) { + error = -ERESTARTSYS; + goto out_del_cursor; + } } out_del_cursor: @@ -196,8 +201,11 @@ xfs_ioc_trim( for (agno = start_agno; agno <= end_agno; agno++) { error = xfs_trim_extents(mp, agno, start, end, minlen, &blocks_trimmed); - if (error) + if (error) { last_error = error; + if (error == -ERESTARTSYS) + break; + } } if (last_error) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index d7bc14906af87f14ef570a968ead42100445c8bd..44f8c54512102577dbe768e5f137298ba1ee79c7 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -290,6 +290,7 @@ void xfs_efi_release( struct xfs_efi_log_item *efip) { + ASSERT(atomic_read(&efip->efi_refcount) > 0); if (atomic_dec_and_test(&efip->efi_refcount)) { xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); xfs_efi_item_free(efip); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 35703a80137208b81cb38ab91ce50e28d80a19f2..5fb5a0958a1485db46a2f753446c11828ff1913e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1043,49 +1043,17 @@ xfs_find_get_desired_pgoff( index = startoff >> PAGE_SHIFT; endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); - end = endoff >> PAGE_SHIFT; + end = (endoff - 1) >> PAGE_SHIFT; do { int want; unsigned nr_pages; unsigned int i; - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); - /* - * No page mapped into given range. If we are searching holes - * and if this is the first time we got into the loop, it means - * that the given offset is landed in a hole, return it. - * - * If we have already stepped through some block buffers to find - * holes but they all contains data. In this case, the last - * offset is already updated and pointed to the end of the last - * mapped page, if it does not reach the endpoint to search, - * that means there should be a hole between them. - */ - if (nr_pages == 0) { - /* Data search found nothing */ - if (type == DATA_OFF) - break; - - ASSERT(type == HOLE_OFF); - if (lastoff == startoff || lastoff < endoff) { - found = true; - *offset = lastoff; - } - break; - } - - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; + if (nr_pages == 0) break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -1098,18 +1066,18 @@ xfs_find_get_desired_pgoff( * file mapping. However, page->index will not change * because we have a reference on the page. * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. + * If current page offset is beyond where we've ended, + * we've found a hole. */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } + if (type == HOLE_OFF && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { + found = true; + *offset = lastoff; goto out; } + /* Searching done if the page index is out of range. */ + if (page->index > end) + goto out; lock_page(page); /* @@ -1151,21 +1119,20 @@ xfs_find_get_desired_pgoff( /* * The number of returned pages less than our desired, search - * done. In this case, nothing was found for searching data, - * but we found a hole behind the last offset. + * done. */ - if (nr_pages < want) { - if (type == HOLE_OFF) { - *offset = lastoff; - found = true; - } + if (nr_pages < want) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + /* No page at lastoff and we are not done - we found a hole. */ + if (type == HOLE_OFF && lastoff < endoff) { + *offset = lastoff; + found = true; + } out: pagevec_release(&pvec); return found; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c new file mode 100644 index 0000000000000000000000000000000000000000..814ed729881d9a4305c3dd5646d75ef0f112b87b --- /dev/null +++ b/fs/xfs/xfs_fsmap.c @@ -0,0 +1,943 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_error.h" +#include "xfs_btree.h" +#include "xfs_rmap_btree.h" +#include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_rmap.h" +#include "xfs_alloc.h" +#include "xfs_bit.h" +#include +#include "xfs_fsmap.h" +#include "xfs_refcount.h" +#include "xfs_refcount_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_rtalloc.h" + +/* Convert an xfs_fsmap to an fsmap. */ +void +xfs_fsmap_from_internal( + struct fsmap *dest, + struct xfs_fsmap *src) +{ + dest->fmr_device = src->fmr_device; + dest->fmr_flags = src->fmr_flags; + dest->fmr_physical = BBTOB(src->fmr_physical); + dest->fmr_owner = src->fmr_owner; + dest->fmr_offset = BBTOB(src->fmr_offset); + dest->fmr_length = BBTOB(src->fmr_length); + dest->fmr_reserved[0] = 0; + dest->fmr_reserved[1] = 0; + dest->fmr_reserved[2] = 0; +} + +/* Convert an fsmap to an xfs_fsmap. */ +void +xfs_fsmap_to_internal( + struct xfs_fsmap *dest, + struct fsmap *src) +{ + dest->fmr_device = src->fmr_device; + dest->fmr_flags = src->fmr_flags; + dest->fmr_physical = BTOBBT(src->fmr_physical); + dest->fmr_owner = src->fmr_owner; + dest->fmr_offset = BTOBBT(src->fmr_offset); + dest->fmr_length = BTOBBT(src->fmr_length); +} + +/* Convert an fsmap owner into an rmapbt owner. */ +static int +xfs_fsmap_owner_to_rmap( + struct xfs_rmap_irec *dest, + struct xfs_fsmap *src) +{ + if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) { + dest->rm_owner = src->fmr_owner; + return 0; + } + + switch (src->fmr_owner) { + case 0: /* "lowest owner id possible" */ + case -1ULL: /* "highest owner id possible" */ + dest->rm_owner = 0; + break; + case XFS_FMR_OWN_FREE: + dest->rm_owner = XFS_RMAP_OWN_NULL; + break; + case XFS_FMR_OWN_UNKNOWN: + dest->rm_owner = XFS_RMAP_OWN_UNKNOWN; + break; + case XFS_FMR_OWN_FS: + dest->rm_owner = XFS_RMAP_OWN_FS; + break; + case XFS_FMR_OWN_LOG: + dest->rm_owner = XFS_RMAP_OWN_LOG; + break; + case XFS_FMR_OWN_AG: + dest->rm_owner = XFS_RMAP_OWN_AG; + break; + case XFS_FMR_OWN_INOBT: + dest->rm_owner = XFS_RMAP_OWN_INOBT; + break; + case XFS_FMR_OWN_INODES: + dest->rm_owner = XFS_RMAP_OWN_INODES; + break; + case XFS_FMR_OWN_REFC: + dest->rm_owner = XFS_RMAP_OWN_REFC; + break; + case XFS_FMR_OWN_COW: + dest->rm_owner = XFS_RMAP_OWN_COW; + break; + case XFS_FMR_OWN_DEFECTIVE: /* not implemented */ + /* fall through */ + default: + return -EINVAL; + } + return 0; +} + +/* Convert an rmapbt owner into an fsmap owner. */ +static int +xfs_fsmap_owner_from_rmap( + struct xfs_fsmap *dest, + struct xfs_rmap_irec *src) +{ + dest->fmr_flags = 0; + if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) { + dest->fmr_owner = src->rm_owner; + return 0; + } + dest->fmr_flags |= FMR_OF_SPECIAL_OWNER; + + switch (src->rm_owner) { + case XFS_RMAP_OWN_FS: + dest->fmr_owner = XFS_FMR_OWN_FS; + break; + case XFS_RMAP_OWN_LOG: + dest->fmr_owner = XFS_FMR_OWN_LOG; + break; + case XFS_RMAP_OWN_AG: + dest->fmr_owner = XFS_FMR_OWN_AG; + break; + case XFS_RMAP_OWN_INOBT: + dest->fmr_owner = XFS_FMR_OWN_INOBT; + break; + case XFS_RMAP_OWN_INODES: + dest->fmr_owner = XFS_FMR_OWN_INODES; + break; + case XFS_RMAP_OWN_REFC: + dest->fmr_owner = XFS_FMR_OWN_REFC; + break; + case XFS_RMAP_OWN_COW: + dest->fmr_owner = XFS_FMR_OWN_COW; + break; + case XFS_RMAP_OWN_NULL: /* "free" */ + dest->fmr_owner = XFS_FMR_OWN_FREE; + break; + default: + return -EFSCORRUPTED; + } + return 0; +} + +/* getfsmap query state */ +struct xfs_getfsmap_info { + struct xfs_fsmap_head *head; + xfs_fsmap_format_t formatter; /* formatting fn */ + void *format_arg; /* format buffer */ + struct xfs_buf *agf_bp; /* AGF, for refcount queries */ + xfs_daddr_t next_daddr; /* next daddr we expect */ + u64 missing_owner; /* owner of holes */ + u32 dev; /* device id */ + xfs_agnumber_t agno; /* AG number, if applicable */ + struct xfs_rmap_irec low; /* low rmap key */ + struct xfs_rmap_irec high; /* high rmap key */ + bool last; /* last extent? */ +}; + +/* Associate a device with a getfsmap handler. */ +struct xfs_getfsmap_dev { + u32 dev; + int (*fn)(struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info); +}; + +/* Compare two getfsmap device handlers. */ +static int +xfs_getfsmap_dev_compare( + const void *p1, + const void *p2) +{ + const struct xfs_getfsmap_dev *d1 = p1; + const struct xfs_getfsmap_dev *d2 = p2; + + return d1->dev - d2->dev; +} + +/* Decide if this mapping is shared. */ +STATIC int +xfs_getfsmap_is_shared( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_rmap_irec *rec, + bool *stat) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_btree_cur *cur; + xfs_agblock_t fbno; + xfs_extlen_t flen; + int error; + + *stat = false; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return 0; + /* rt files will have agno set to NULLAGNUMBER */ + if (info->agno == NULLAGNUMBER) + return 0; + + /* Are there any shared blocks here? */ + flen = 0; + cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, + info->agno, NULL); + + error = xfs_refcount_find_shared(cur, rec->rm_startblock, + rec->rm_blockcount, &fbno, &flen, false); + + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + if (error) + return error; + + *stat = flen > 0; + return 0; +} + +/* + * Format a reverse mapping for getfsmap, having translated rm_startblock + * into the appropriate daddr units. + */ +STATIC int +xfs_getfsmap_helper( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_rmap_irec *rec, + xfs_daddr_t rec_daddr) +{ + struct xfs_fsmap fmr; + struct xfs_mount *mp = tp->t_mountp; + bool shared; + int error; + + if (fatal_signal_pending(current)) + return -EINTR; + + /* + * Filter out records that start before our startpoint, if the + * caller requested that. + */ + if (xfs_rmap_compare(rec, &info->low) < 0) { + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return XFS_BTREE_QUERY_RANGE_CONTINUE; + } + + /* Are we just counting mappings? */ + if (info->head->fmh_count == 0) { + if (rec_daddr > info->next_daddr) + info->head->fmh_entries++; + + if (info->last) + return XFS_BTREE_QUERY_RANGE_CONTINUE; + + info->head->fmh_entries++; + + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return XFS_BTREE_QUERY_RANGE_CONTINUE; + } + + /* + * If the record starts past the last physical block we saw, + * then we've found a gap. Report the gap as being owned by + * whatever the caller specified is the missing owner. + */ + if (rec_daddr > info->next_daddr) { + if (info->head->fmh_entries >= info->head->fmh_count) + return XFS_BTREE_QUERY_RANGE_ABORT; + + fmr.fmr_device = info->dev; + fmr.fmr_physical = info->next_daddr; + fmr.fmr_owner = info->missing_owner; + fmr.fmr_offset = 0; + fmr.fmr_length = rec_daddr - info->next_daddr; + fmr.fmr_flags = FMR_OF_SPECIAL_OWNER; + error = info->formatter(&fmr, info->format_arg); + if (error) + return error; + info->head->fmh_entries++; + } + + if (info->last) + goto out; + + /* Fill out the extent we found */ + if (info->head->fmh_entries >= info->head->fmh_count) + return XFS_BTREE_QUERY_RANGE_ABORT; + + trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec); + + fmr.fmr_device = info->dev; + fmr.fmr_physical = rec_daddr; + error = xfs_fsmap_owner_from_rmap(&fmr, rec); + if (error) + return error; + fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset); + fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (rec->rm_flags & XFS_RMAP_UNWRITTEN) + fmr.fmr_flags |= FMR_OF_PREALLOC; + if (rec->rm_flags & XFS_RMAP_ATTR_FORK) + fmr.fmr_flags |= FMR_OF_ATTR_FORK; + if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) + fmr.fmr_flags |= FMR_OF_EXTENT_MAP; + if (fmr.fmr_flags == 0) { + error = xfs_getfsmap_is_shared(tp, info, rec, &shared); + if (error) + return error; + if (shared) + fmr.fmr_flags |= FMR_OF_SHARED; + } + error = info->formatter(&fmr, info->format_arg); + if (error) + return error; + info->head->fmh_entries++; + +out: + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return XFS_BTREE_QUERY_RANGE_CONTINUE; +} + +/* Transform a rmapbt irec into a fsmap */ +STATIC int +xfs_getfsmap_datadev_helper( + struct xfs_btree_cur *cur, + struct xfs_rmap_irec *rec, + void *priv) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_getfsmap_info *info = priv; + xfs_fsblock_t fsb; + xfs_daddr_t rec_daddr; + + fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock); + rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); + + return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); +} + +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +} + +/* Transform a bnobt irec into a fsmap */ +STATIC int +xfs_getfsmap_datadev_bnobt_helper( + struct xfs_btree_cur *cur, + struct xfs_alloc_rec_incore *rec, + void *priv) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno, + rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr); +} + +/* Set rmap flags based on the getfsmap flags */ +static void +xfs_getfsmap_set_irec_flags( + struct xfs_rmap_irec *irec, + struct xfs_fsmap *fmr) +{ + irec->rm_flags = 0; + if (fmr->fmr_flags & FMR_OF_ATTR_FORK) + irec->rm_flags |= XFS_RMAP_ATTR_FORK; + if (fmr->fmr_flags & FMR_OF_EXTENT_MAP) + irec->rm_flags |= XFS_RMAP_BMBT_BLOCK; + if (fmr->fmr_flags & FMR_OF_PREALLOC) + irec->rm_flags |= XFS_RMAP_UNWRITTEN; +} + +/* Execute a getfsmap query against the log device. */ +STATIC int +xfs_getfsmap_logdev( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rmap_irec rmap; + int error; + + /* Set up search keys */ + info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, keys); + if (error) + return error; + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + + error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1); + if (error) + return error; + info->high.rm_startblock = -1U; + info->high.rm_owner = ULLONG_MAX; + info->high.rm_offset = ULLONG_MAX; + info->high.rm_blockcount = 0; + info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; + info->missing_owner = XFS_FMR_OWN_FREE; + + trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high); + + if (keys[0].fmr_physical > 0) + return 0; + + /* Fabricate an rmap entry for the external log device. */ + rmap.rm_startblock = 0; + rmap.rm_blockcount = mp->m_sb.sb_logblocks; + rmap.rm_owner = XFS_RMAP_OWN_LOG; + rmap.rm_offset = 0; + rmap.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &rmap, 0); +} + +/* Execute a getfsmap query against the realtime device. */ +STATIC int +__xfs_getfsmap_rtdev( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + int (*query_fn)(struct xfs_trans *, + struct xfs_getfsmap_info *), + struct xfs_getfsmap_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + xfs_fsblock_t start_fsb; + xfs_fsblock_t end_fsb; + xfs_daddr_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); + if (keys[0].fmr_physical >= eofs) + return 0; + if (keys[1].fmr_physical >= eofs) + keys[1].fmr_physical = eofs - 1; + start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); + end_fsb = XFS_BB_TO_FSB(mp, keys[1].fmr_physical); + + /* Set up search keys */ + info->low.rm_startblock = start_fsb; + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) + return error; + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + + info->high.rm_startblock = end_fsb; + error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); + if (error) + return error; + info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); + info->high.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); + + trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high); + + return query_fn(tp, info); +} + +/* Actually query the realtime bitmap. */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_query( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info) +{ + struct xfs_rtalloc_rec alow; + struct xfs_rtalloc_rec ahigh; + int error; + + xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); + + alow.ar_startblock = info->low.rm_startblock; + ahigh.ar_startblock = info->high.rm_startblock; + error = xfs_rtalloc_query_range(tp, &alow, &ahigh, + xfs_getfsmap_rtdev_rtbitmap_helper, info); + if (error) + goto err; + + /* Report any gaps at the end of the rtbitmap */ + info->last = true; + error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info); + if (error) + goto err; +err: + xfs_iunlock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); + return error; +} + +/* Execute a getfsmap query against the realtime device rtbitmap. */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + info->missing_owner = XFS_FMR_OWN_UNKNOWN; + return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, + info); +} + +/* Execute a getfsmap query against the regular data device. */ +STATIC int +__xfs_getfsmap_datadev( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info, + int (*query_fn)(struct xfs_trans *, + struct xfs_getfsmap_info *, + struct xfs_btree_cur **, + void *), + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_btree_cur *bt_cur = NULL; + xfs_fsblock_t start_fsb; + xfs_fsblock_t end_fsb; + xfs_agnumber_t start_ag; + xfs_agnumber_t end_ag; + xfs_daddr_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); + if (keys[0].fmr_physical >= eofs) + return 0; + if (keys[1].fmr_physical >= eofs) + keys[1].fmr_physical = eofs - 1; + start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); + end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical); + + /* + * Convert the fsmap low/high keys to AG based keys. Initialize + * low to the fsmap low key and max out the high key to the end + * of the AG. + */ + info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) + return error; + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + + info->high.rm_startblock = -1U; + info->high.rm_owner = ULLONG_MAX; + info->high.rm_offset = ULLONG_MAX; + info->high.rm_blockcount = 0; + info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; + + start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); + end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); + + /* Query each AG */ + for (info->agno = start_ag; info->agno <= end_ag; info->agno++) { + /* + * Set the AG high key from the fsmap high key if this + * is the last AG that we're querying. + */ + if (info->agno == end_ag) { + info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, + end_fsb); + info->high.rm_offset = XFS_BB_TO_FSBT(mp, + keys[1].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); + if (error) + goto err; + xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); + } + + if (bt_cur) { + xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); + bt_cur = NULL; + xfs_trans_brelse(tp, info->agf_bp); + info->agf_bp = NULL; + } + + error = xfs_alloc_read_agf(mp, tp, info->agno, 0, + &info->agf_bp); + if (error) + goto err; + + trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, info->agno, + &info->high); + + error = query_fn(tp, info, &bt_cur, priv); + if (error) + goto err; + + /* + * Set the AG low key to the start of the AG prior to + * moving on to the next AG. + */ + if (info->agno == start_ag) { + info->low.rm_startblock = 0; + info->low.rm_owner = 0; + info->low.rm_offset = 0; + info->low.rm_flags = 0; + } + } + + /* Report any gap at the end of the AG */ + info->last = true; + error = query_fn(tp, info, &bt_cur, priv); + if (error) + goto err; + +err: + if (bt_cur) + xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : + XFS_BTREE_NOERROR); + if (info->agf_bp) { + xfs_trans_brelse(tp, info->agf_bp); + info->agf_bp = NULL; + } + + return error; +} + +/* Actually query the rmap btree. */ +STATIC int +xfs_getfsmap_datadev_rmapbt_query( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_btree_cur **curpp, + void *priv) +{ + /* Report any gap at the end of the last AG. */ + if (info->last) + return xfs_getfsmap_datadev_helper(*curpp, &info->high, info); + + /* Allocate cursor for this AG and query_range it. */ + *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp, + info->agno); + return xfs_rmap_query_range(*curpp, &info->low, &info->high, + xfs_getfsmap_datadev_helper, info); +} + +/* Execute a getfsmap query against the regular data device rmapbt. */ +STATIC int +xfs_getfsmap_datadev_rmapbt( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + info->missing_owner = XFS_FMR_OWN_FREE; + return __xfs_getfsmap_datadev(tp, keys, info, + xfs_getfsmap_datadev_rmapbt_query, NULL); +} + +/* Actually query the bno btree. */ +STATIC int +xfs_getfsmap_datadev_bnobt_query( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + struct xfs_btree_cur **curpp, + void *priv) +{ + struct xfs_alloc_rec_incore *key = priv; + + /* Report any gap at the end of the last AG. */ + if (info->last) + return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info); + + /* Allocate cursor for this AG and query_range it. */ + *curpp = xfs_allocbt_init_cursor(tp->t_mountp, tp, info->agf_bp, + info->agno, XFS_BTNUM_BNO); + key->ar_startblock = info->low.rm_startblock; + key[1].ar_startblock = info->high.rm_startblock; + return xfs_alloc_query_range(*curpp, key, &key[1], + xfs_getfsmap_datadev_bnobt_helper, info); +} + +/* Execute a getfsmap query against the regular data device's bnobt. */ +STATIC int +xfs_getfsmap_datadev_bnobt( + struct xfs_trans *tp, + struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + struct xfs_alloc_rec_incore akeys[2]; + + info->missing_owner = XFS_FMR_OWN_UNKNOWN; + return __xfs_getfsmap_datadev(tp, keys, info, + xfs_getfsmap_datadev_bnobt_query, &akeys[0]); +} + +/* Do we recognize the device? */ +STATIC bool +xfs_getfsmap_is_valid_device( + struct xfs_mount *mp, + struct xfs_fsmap *fm) +{ + if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || + fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev)) + return true; + if (mp->m_logdev_targp && + fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev)) + return true; + if (mp->m_rtdev_targp && + fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev)) + return true; + return false; +} + +/* Ensure that the low key is less than the high key. */ +STATIC bool +xfs_getfsmap_check_keys( + struct xfs_fsmap *low_key, + struct xfs_fsmap *high_key) +{ + if (low_key->fmr_device > high_key->fmr_device) + return false; + if (low_key->fmr_device < high_key->fmr_device) + return true; + + if (low_key->fmr_physical > high_key->fmr_physical) + return false; + if (low_key->fmr_physical < high_key->fmr_physical) + return true; + + if (low_key->fmr_owner > high_key->fmr_owner) + return false; + if (low_key->fmr_owner < high_key->fmr_owner) + return true; + + if (low_key->fmr_offset > high_key->fmr_offset) + return false; + if (low_key->fmr_offset < high_key->fmr_offset) + return true; + + return false; +} + +#define XFS_GETFSMAP_DEVS 3 +/* + * Get filesystem's extents as described in head, and format for + * output. Calls formatter to fill the user's buffer until all + * extents are mapped, until the passed-in head->fmh_count slots have + * been filled, or until the formatter short-circuits the loop, if it + * is tracking filled-in extents on its own. + * + * Key to Confusion + * ---------------- + * There are multiple levels of keys and counters at work here: + * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; + * these reflect fs-wide sector addrs. + * dkeys -- fmh_keys used to query each device; + * these are fmh_keys but w/ the low key + * bumped up by fmr_length. + * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this + * is how we detect gaps in the fsmap + records and report them. + * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from + * dkeys; used to query the metadata. + */ +int +xfs_getfsmap( + struct xfs_mount *mp, + struct xfs_fsmap_head *head, + xfs_fsmap_format_t formatter, + void *arg) +{ + struct xfs_trans *tp = NULL; + struct xfs_fsmap dkeys[2]; /* per-dev keys */ + struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; + struct xfs_getfsmap_info info = { NULL }; + bool use_rmap; + int i; + int error = 0; + + if (head->fmh_iflags & ~FMH_IF_VALID) + return -EINVAL; + if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || + !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) + return -EINVAL; + + use_rmap = capable(CAP_SYS_ADMIN) && + xfs_sb_version_hasrmapbt(&mp->m_sb); + head->fmh_entries = 0; + + /* Set up our device handlers. */ + memset(handlers, 0, sizeof(handlers)); + handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); + if (use_rmap) + handlers[0].fn = xfs_getfsmap_datadev_rmapbt; + else + handlers[0].fn = xfs_getfsmap_datadev_bnobt; + if (mp->m_logdev_targp != mp->m_ddev_targp) { + handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); + handlers[1].fn = xfs_getfsmap_logdev; + } + if (mp->m_rtdev_targp) { + handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); + handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; + } + + xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), + xfs_getfsmap_dev_compare); + + /* + * To continue where we left off, we allow userspace to use the + * last mapping from a previous call as the low key of the next. + * This is identified by a non-zero length in the low key. We + * have to increment the low key in this scenario to ensure we + * don't return the same mapping again, and instead return the + * very next mapping. + * + * If the low key mapping refers to file data, the same physical + * blocks could be mapped to several other files/offsets. + * According to rmapbt record ordering, the minimal next + * possible record for the block range is the next starting + * offset in the same inode. Therefore, bump the file offset to + * continue the search appropriately. For all other low key + * mapping types (attr blocks, metadata), bump the physical + * offset as there can be no other mapping for the same physical + * block range. + */ + dkeys[0] = head->fmh_keys[0]; + if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { + dkeys[0].fmr_physical += dkeys[0].fmr_length; + dkeys[0].fmr_owner = 0; + if (dkeys[0].fmr_offset) + return -EINVAL; + } else + dkeys[0].fmr_offset += dkeys[0].fmr_length; + dkeys[0].fmr_length = 0; + memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); + + if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) + return -EINVAL; + + info.next_daddr = head->fmh_keys[0].fmr_physical + + head->fmh_keys[0].fmr_length; + info.formatter = formatter; + info.format_arg = arg; + info.head = head; + + /* For each device we support... */ + for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { + /* Is this device within the range the user asked for? */ + if (!handlers[i].fn) + continue; + if (head->fmh_keys[0].fmr_device > handlers[i].dev) + continue; + if (head->fmh_keys[1].fmr_device < handlers[i].dev) + break; + + /* + * If this device number matches the high key, we have + * to pass the high key to the handler to limit the + * query results. If the device number exceeds the + * low key, zero out the low key so that we get + * everything from the beginning. + */ + if (handlers[i].dev == head->fmh_keys[1].fmr_device) + dkeys[1] = head->fmh_keys[1]; + if (handlers[i].dev > head->fmh_keys[0].fmr_device) + memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + break; + + info.dev = handlers[i].dev; + info.last = false; + info.agno = NULLAGNUMBER; + error = handlers[i].fn(tp, dkeys, &info); + if (error) + break; + xfs_trans_cancel(tp); + tp = NULL; + info.next_daddr = 0; + } + + if (tp) + xfs_trans_cancel(tp); + head->fmh_oflags = FMH_OF_DEV_T; + return error; +} diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h new file mode 100644 index 0000000000000000000000000000000000000000..0b9bf822595c0e7d43c6f43ae9104c8fcdaa995c --- /dev/null +++ b/fs/xfs/xfs_fsmap.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_FSMAP_H__ +#define __XFS_FSMAP_H__ + +struct fsmap; + +/* internal fsmap representation */ +struct xfs_fsmap { + dev_t fmr_device; /* device id */ + uint32_t fmr_flags; /* mapping flags */ + uint64_t fmr_physical; /* device offset of segment */ + uint64_t fmr_owner; /* owner id */ + xfs_fileoff_t fmr_offset; /* file offset of segment */ + xfs_filblks_t fmr_length; /* length of segment, blocks */ +}; + +struct xfs_fsmap_head { + uint32_t fmh_iflags; /* control flags */ + uint32_t fmh_oflags; /* output flags */ + unsigned int fmh_count; /* # of entries in array incl. input */ + unsigned int fmh_entries; /* # of entries filled in (output). */ + + struct xfs_fsmap fmh_keys[2]; /* low and high keys */ +}; + +void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src); +void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src); + +/* fsmap to userspace formatter - copy to user & advance pointer */ +typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *); + +int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head, + xfs_fsmap_format_t formatter, void *arg); + +#endif /* __XFS_FSMAP_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3531f8f72fa5e10b83f0fa8bd37afc560b2dbf0a..990210fcb9c326f923c0c25a86557a0aba3bb40f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -66,7 +66,6 @@ xfs_inode_alloc( XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); @@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (pag->pag_ici_reclaimable++) return; @@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag( { struct xfs_mount *mp = pag->pag_mount; - ASSERT(spin_is_locked(&pag->pag_ici_lock)); + lockdep_assert_held(&pag->pag_ici_lock); if (--pag->pag_ici_reclaimable) return; @@ -262,6 +261,22 @@ xfs_inode_clear_reclaim_tag( xfs_perag_clear_reclaim_tag(pag); } +static void +xfs_inew_wait( + struct xfs_inode *ip) +{ + wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT); + DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); + + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + if (!xfs_iflags_test(ip, XFS_INEW)) + break; + schedule(); + } while (true); + finish_wait(wq, &wait.wait); +} + /* * When we recycle a reclaimable inode, we need to re-initialise the VFS inode * part of the structure. This is made more complex by the fact we store @@ -366,14 +381,17 @@ xfs_iget_cache_hit( error = xfs_reinit_inode(mp, inode); if (error) { + bool wake; /* * Re-initializing the inode failed, and we are in deep * trouble. Try to re-add it to the reclaim list. */ rcu_read_lock(); spin_lock(&ip->i_flags_lock); - + wake = !!__xfs_iflags_test(ip, XFS_INEW); ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + if (wake) + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; @@ -623,9 +641,11 @@ xfs_iget( STATIC int xfs_inode_ag_walk_grab( - struct xfs_inode *ip) + struct xfs_inode *ip, + int flags) { struct inode *inode = VFS_I(ip); + bool newinos = !!(flags & XFS_AGITER_INEW_WAIT); ASSERT(rcu_read_lock_held()); @@ -643,7 +663,8 @@ xfs_inode_ag_walk_grab( goto out_unlock_noent; /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) || + __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)) goto out_unlock_noent; spin_unlock(&ip->i_flags_lock); @@ -671,7 +692,8 @@ xfs_inode_ag_walk( void *args), int flags, void *args, - int tag) + int tag, + int iter_flags) { uint32_t first_index; int last_error = 0; @@ -713,7 +735,7 @@ xfs_inode_ag_walk( for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; - if (done || xfs_inode_ag_walk_grab(ip)) + if (done || xfs_inode_ag_walk_grab(ip, iter_flags)) batch[i] = NULL; /* @@ -741,6 +763,9 @@ xfs_inode_ag_walk( for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; + if ((iter_flags & XFS_AGITER_INEW_WAIT) && + xfs_iflags_test(batch[i], XFS_INEW)) + xfs_inew_wait(batch[i]); error = execute(batch[i], flags, args); IRELE(batch[i]); if (error == -EAGAIN) { @@ -820,12 +845,13 @@ xfs_cowblocks_worker( } int -xfs_inode_ag_iterator( +xfs_inode_ag_iterator_flags( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, - void *args) + void *args, + int iter_flags) { struct xfs_perag *pag; int error = 0; @@ -835,7 +861,8 @@ xfs_inode_ag_iterator( ag = 0; while ((pag = xfs_perag_get(mp, ag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, + iter_flags); xfs_perag_put(pag); if (error) { last_error = error; @@ -846,6 +873,17 @@ xfs_inode_ag_iterator( return last_error; } +int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int flags, + void *args) +{ + return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0); +} + int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, @@ -863,7 +901,8 @@ xfs_inode_ag_iterator_tag( ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, tag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, + 0); xfs_perag_put(pag); if (error) { last_error = error; diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 8a7c849b4deade24831871a28d76c9af5c7f3629..9183f77958ef7946b50d6b3fde672ad6108702d4 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -48,6 +48,11 @@ struct xfs_eofblocks { #define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_DONTCACHE 0x4 +/* + * flags for AG inode iterator + */ +#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */ + int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp); @@ -79,6 +84,9 @@ void xfs_cowblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); +int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, void *args), + int flags, void *args, int iter_flags); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7605d83965963566e1d6acef679591cf9787d161..ec9826c565009e5244682beae93c4efc5261b487 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1906,12 +1906,13 @@ xfs_inactive( * force is true because we are evicting an inode from the * cache. Post-eof blocks must be freed, lest we end up with * broken free space accounting. + * + * Note: don't bother with iolock here since lockdep complains + * about acquiring it in reclaim context. We have the only + * reference to the inode at this point anyways. */ - if (xfs_can_free_eofblocks(ip, true)) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (xfs_can_free_eofblocks(ip, true)) xfs_free_eofblocks(ip); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } return; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 10dcf27b4c85ae9d437e77a554dd15f139a40bb3..10e89fcb49d7441d0a4e5e931809562de4c4b754 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -216,7 +216,8 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ -#define XFS_INEW (1 << 3) /* inode has just been allocated */ +#define __XFS_INEW_BIT 3 /* inode has just been allocated */ +#define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -464,6 +465,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip) xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(VFS_I(ip)); + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); } static inline void xfs_setup_existing_inode(struct xfs_inode *ip) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index d90e7811ccdd9ccec14f9ebb29219bc065d36270..08cb7d1a4a3a40ebc5f456c3767d998f433c8ed4 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -731,22 +731,27 @@ xfs_iflush_done( * holding the lock before removing the inode from the AIL. */ if (need_ail) { - struct xfs_log_item *log_items[need_ail]; - int i = 0; + bool mlip_changed = false; + + /* this is an opencoded batch version of xfs_trans_ail_delete */ spin_lock(&ailp->xa_lock); for (blip = lip; blip; blip = blip->li_bio_list) { - iip = INODE_ITEM(blip); - if (iip->ili_logged && - blip->li_lsn == iip->ili_flush_lsn) { - log_items[i++] = blip; - } - ASSERT(i <= need_ail); + if (INODE_ITEM(blip)->ili_logged && + blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) + mlip_changed |= xfs_ail_delete_one(ailp, blip); } - /* xfs_trans_ail_delete_bulk() drops the AIL lock. */ - xfs_trans_ail_delete_bulk(ailp, log_items, i, - SHUTDOWN_CORRUPT_INCORE); - } + if (mlip_changed) { + if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) + xlog_assign_tail_lsn_locked(ailp->xa_mount); + if (list_empty(&ailp->xa_ail)) + wake_up_all(&ailp->xa_empty); + } + spin_unlock(&ailp->xa_lock); + + if (mlip_changed) + xfs_log_space_wake(ailp->xa_mount); + } /* * clean up and unlock the flush lock now we are done. We can clear the diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 2fd7fdf5438f0be85220b9981bff09efce773079..6190697603c947d3706e102e4c00b9c391bfbe2f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -41,6 +41,9 @@ #include "xfs_trans.h" #include "xfs_pnfs.h" #include "xfs_acl.h" +#include "xfs_btree.h" +#include +#include "xfs_fsmap.h" #include #include @@ -1543,10 +1546,11 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmapx bmx; + struct getbmapx bmx = { 0 }; int error; - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + /* struct getbmap is a strict subset of struct getbmapx. */ + if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags))) return -EFAULT; if (bmx.bmv_count < 2) @@ -1608,6 +1612,84 @@ xfs_ioc_getbmapx( return 0; } +struct getfsmap_info { + struct xfs_mount *mp; + struct fsmap_head __user *data; + unsigned int idx; + __u32 last_flags; +}; + +STATIC int +xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv) +{ + struct getfsmap_info *info = priv; + struct fsmap fm; + + trace_xfs_getfsmap_mapping(info->mp, xfm); + + info->last_flags = xfm->fmr_flags; + xfs_fsmap_from_internal(&fm, xfm); + if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm, + sizeof(struct fsmap))) + return -EFAULT; + + return 0; +} + +STATIC int +xfs_ioc_getfsmap( + struct xfs_inode *ip, + struct fsmap_head __user *arg) +{ + struct getfsmap_info info = { NULL }; + struct xfs_fsmap_head xhead = {0}; + struct fsmap_head head; + bool aborted = false; + int error; + + if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) + return -EFAULT; + if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || + memchr_inv(head.fmh_keys[0].fmr_reserved, 0, + sizeof(head.fmh_keys[0].fmr_reserved)) || + memchr_inv(head.fmh_keys[1].fmr_reserved, 0, + sizeof(head.fmh_keys[1].fmr_reserved))) + return -EINVAL; + + xhead.fmh_iflags = head.fmh_iflags; + xhead.fmh_count = head.fmh_count; + xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); + xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); + + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); + + info.mp = ip->i_mount; + info.data = arg; + error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + error = 0; + aborted = true; + } else if (error) + return error; + + /* If we didn't abort, set the "last" flag in the last fmx */ + if (!aborted && info.idx) { + info.last_flags |= FMR_OF_LAST; + if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags, + &info.last_flags, sizeof(info.last_flags))) + return -EFAULT; + } + + /* copy back header */ + head.fmh_entries = xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) + return -EFAULT; + + return 0; +} + int xfs_ioc_swapext( xfs_swapext_t *sxp) @@ -1788,6 +1870,9 @@ xfs_file_ioctl( case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(ip, arg); + case FS_IOC_GETFSMAP: + return xfs_ioc_getfsmap(ip, arg); + case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: { diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 7c49938c5aed417cf0a83b08526d32fea1190fad..fa0bc4d46065ab01024bf65fb690282bef020b62 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" @@ -554,6 +555,7 @@ xfs_file_compat_ioctl( case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: + case FS_IOC_GETFSMAP: return xfs_file_ioctl(filp, cmd, p); #ifndef BROKEN_X86_ALIGNMENT /* These are handled fine if no alignment issues */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 288ee5b840d738116b8981e9618fac36fb24614f..94e5bdf7304cff79c9ad7784013520776e3086f8 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -240,7 +240,7 @@ xfs_iomap_write_direct( */ if (IS_DAX(VFS_I(ip))) { bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; - if (ISUNWRITTEN(imap)) { + if (imap->br_state == XFS_EXT_UNWRITTEN) { tflags |= XFS_TRANS_RESERVE; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; } @@ -945,7 +945,7 @@ static inline bool imap_needs_alloc(struct inode *inode, return !nimaps || imap->br_startblock == HOLESTARTBLOCK || imap->br_startblock == DELAYSTARTBLOCK || - (IS_DAX(inode) && ISUNWRITTEN(imap)); + (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN); } static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) @@ -976,6 +976,7 @@ xfs_file_iomap_begin( int nimaps = 1, error = 0; bool shared = false, trimmed = false; unsigned lockmode; + struct block_device *bdev; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -1063,6 +1064,14 @@ xfs_file_iomap_begin( } xfs_bmbt_to_iomap(ip, iomap, &imap); + + /* optionally associate a dax device with the iomap bdev */ + bdev = iomap->bdev; + if (blk_queue_dax(bdev->bd_queue)) + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); + else + iomap->dax_dev = NULL; + if (shared) iomap->flags |= IOMAP_F_SHARED; return 0; @@ -1140,6 +1149,7 @@ xfs_file_iomap_end( unsigned flags, struct iomap *iomap) { + fs_put_dax(iomap->dax_dev); if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, length, written, iomap); @@ -1170,10 +1180,10 @@ xfs_xattr_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - lockmode = xfs_ilock_data_map_shared(ip); + lockmode = xfs_ilock_attr_map_shared(ip); /* if there are no attribute fork or extents, return ENOENT */ - if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { + if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { error = -ENOENT; goto out_unlock; } diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 592fdf7111cbfb2e69d536cebe117eb00b2626be..044fb0e15390c7c5538514ec2f49b72fa5942ade 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -212,88 +212,6 @@ static inline kgid_t xfs_gid_to_kgid(__uint32_t gid) #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() - -/* Move the kernel do_div definition off to one side */ - -#if defined __i386__ -/* For ia32 we need to pull some tricks to get past various versions - * of the compiler which do not like us using do_div in the middle - * of large functions. - */ -static inline __u32 xfs_do_div(void *a, __u32 b, int n) -{ - __u32 mod; - - switch (n) { - case 4: - mod = *(__u32 *)a % b; - *(__u32 *)a = *(__u32 *)a / b; - return mod; - case 8: - { - unsigned long __upper, __low, __high, __mod; - __u64 c = *(__u64 *)a; - __upper = __high = c >> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - *(__u64 *)a = c; - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} - -/* Side effect free 64 bit mod operation */ -static inline __u32 xfs_do_mod(void *a, __u32 b, int n) -{ - switch (n) { - case 4: - return *(__u32 *)a % b; - case 8: - { - unsigned long __upper, __low, __high, __mod; - __u64 c = *(__u64 *)a; - __upper = __high = c >> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} -#else -static inline __u32 xfs_do_div(void *a, __u32 b, int n) -{ - __u32 mod; - - switch (n) { - case 4: - mod = *(__u32 *)a % b; - *(__u32 *)a = *(__u32 *)a / b; - return mod; - case 8: - mod = do_div(*(__u64 *)a, b); - return mod; - } - - /* NOTREACHED */ - return 0; -} - /* Side effect free 64 bit mod operation */ static inline __u32 xfs_do_mod(void *a, __u32 b, int n) { @@ -310,10 +228,7 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n) /* NOTREACHED */ return 0; } -#endif -#undef do_div -#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) #define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b1469f0a91a6c04329cb2fff2dc6e9953485a071..3731f13f63e982e50c0342ec7550c3e6e0a1294b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1293,7 +1293,7 @@ void xfs_log_work_queue( struct xfs_mount *mp) { - queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, + queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work, msecs_to_jiffies(xfs_syncd_centisecs * 10)); } @@ -1852,7 +1852,7 @@ xlog_sync( */ if (log->l_badcrc_factor && (prandom_u32() % log->l_badcrc_factor == 0)) { - iclog->ic_header.h_crc &= 0xAAAAAAAA; + iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); iclog->ic_state |= XLOG_STATE_IOABORT; xfs_warn(log->l_mp, "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4a98762ec8b45f3c0dd021c04cb5bcf9a74e63bc..cd0b077deb354b6de5dda287cffaeb351e0e8cce 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3796,7 +3796,7 @@ xlog_recover_bud_pass2( * This routine is called when an inode create format structure is found in a * committed transaction in the log. It's purpose is to initialise the inodes * being allocated on disk. This requires us to get inode cluster buffers that - * match the range to be intialised, stamped with inode templates and written + * match the range to be initialised, stamped with inode templates and written * by delayed write so that subsequent modifications will hit the cached buffer * and only need writing out at the end of recovery. */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 688ebff1f66384a309cca74539cbe4d27172b177..2eaf8185916610ee115ebd98f8869f2df59b5fe9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -73,6 +73,10 @@ xfs_uuid_mount( uuid_t *uuid = &mp->m_sb.sb_uuid; int hole, i; + /* Publish UUID in struct super_block */ + BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t)); + memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t)); + if (mp->m_flags & XFS_MOUNT_NOUUID) return 0; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 6db6fd6b82b0ae7eaee47fd88735325734aa51e0..9fa312a41c930cc188ec14f43097b0400a07bb27 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -183,6 +183,7 @@ typedef struct xfs_mount { struct workqueue_struct *m_reclaim_workqueue; struct workqueue_struct *m_log_workqueue; struct workqueue_struct *m_eofblocks_workqueue; + struct workqueue_struct *m_sync_workqueue; /* * Generation of the filesysyem layout. This is incremented by each @@ -312,7 +313,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, static inline xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) { - xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); + xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); do_div(ld, mp->m_sb.sb_agblocks); return (xfs_agnumber_t) ld; } @@ -320,7 +321,7 @@ xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) static inline xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) { - xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); + xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d); return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b669b123287bb115e561a6ea1c0be8ae4db359db..5fe6e70b88efe39847ea238945ba7963a741e34a 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -851,8 +851,8 @@ xfs_qm_reset_dqcounts( * started afresh by xfs_qm_quotacheck. */ #ifdef DEBUG - j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - do_div(j, sizeof(xfs_dqblk_t)); + j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) / + sizeof(xfs_dqblk_t); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif dqb = bp->b_addr; @@ -1384,12 +1384,7 @@ xfs_qm_quotacheck( mp->m_qflags |= flags; error_return: - while (!list_empty(&buffer_list)) { - struct xfs_buf *bp = - list_first_entry(&buffer_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_buf_relse(bp); - } + xfs_buf_delwri_cancel(&buffer_list); if (error) { xfs_warn(mp, diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 475a3882a81fef1cedaabf33383d04c1ee8d3e4b..9cb5c381b01cf3b53cbaadbc738906ec8952402c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -759,5 +759,6 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); + xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL, + XFS_AGITER_INEW_WAIT); } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 6e4c7446c3d4561f85d86686d5b4a40bc4cd0ce6..96fe209b5eb61fb5b45fcc46f6733c6b46f29308 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -221,6 +221,7 @@ void xfs_cui_release( struct xfs_cui_log_item *cuip) { + ASSERT(atomic_read(&cuip->cui_refcount) > 0); if (atomic_dec_and_test(&cuip->cui_refcount)) { xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); xfs_cui_item_free(cuip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 4a84c5ea266d8f8fcec61aa55776fec339d27aaf..ffe6fe7a7eb546721827bcdb8e810a8777a1455b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -206,11 +206,7 @@ xfs_reflink_trim_around_shared( int error = 0; /* Holes, unwritten, and delalloc extents cannot be shared */ - if (!xfs_is_reflink_inode(ip) || - ISUNWRITTEN(irec) || - irec->br_startblock == HOLESTARTBLOCK || - irec->br_startblock == DELAYSTARTBLOCK || - isnullstartblock(irec->br_startblock)) { + if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) { *shared = false; return 0; } @@ -709,8 +705,22 @@ xfs_reflink_end_cow( offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); - /* Start a rolling transaction to switch the mappings */ - resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); + /* + * Start a rolling transaction to switch the mappings. We're + * unlikely ever to have to remap 16T worth of single-block + * extents, so just cap the worst case extent count to 2^32-1. + * Stick a warning in just in case, and avoid 64-bit division. + */ + BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX); + if (end_fsb - offset_fsb > UINT_MAX) { + error = -EFSCORRUPTED; + xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE); + ASSERT(0); + goto out; + } + resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount, + (unsigned int)(end_fsb - offset_fsb), + XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, resblks, 0, 0, &tp); if (error) @@ -1045,12 +1055,12 @@ xfs_reflink_remap_extent( xfs_off_t new_isize) { struct xfs_mount *mp = ip->i_mount; + bool real_extent = xfs_bmap_is_real_extent(irec); struct xfs_trans *tp; xfs_fsblock_t firstfsb; unsigned int resblks; struct xfs_defer_ops dfops; struct xfs_bmbt_irec uirec; - bool real_extent; xfs_filblks_t rlen; xfs_filblks_t unmap_len; xfs_off_t newlen; @@ -1059,11 +1069,6 @@ xfs_reflink_remap_extent( unmap_len = irec->br_startoff + irec->br_blockcount - destoff; trace_xfs_reflink_punch_range(ip, destoff, unmap_len); - /* Only remap normal extents. */ - real_extent = (irec->br_startblock != HOLESTARTBLOCK && - irec->br_startblock != DELAYSTARTBLOCK && - !ISUNWRITTEN(irec)); - /* No reflinking if we're low on space */ if (real_extent) { error = xfs_reflink_ag_has_free_space(mp, @@ -1359,9 +1364,7 @@ xfs_reflink_dirty_extents( goto out; if (nmaps == 0) break; - if (map[0].br_startblock == HOLESTARTBLOCK || - map[0].br_startblock == DELAYSTARTBLOCK || - ISUNWRITTEN(&map[0])) + if (!xfs_bmap_is_real_extent(&map[0])) goto next; map[1] = map[0]; @@ -1435,9 +1438,7 @@ xfs_reflink_clear_inode_flag( return error; if (nmaps == 0) break; - if (map.br_startblock == HOLESTARTBLOCK || - map.br_startblock == DELAYSTARTBLOCK || - ISUNWRITTEN(&map)) + if (!xfs_bmap_is_real_extent(&map)) goto next; agno = XFS_FSB_TO_AGNO(mp, map.br_startblock); diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 73c827831551942550c3e0a3a66c4a7abdf52ef6..f3b139c9aa1674a3f652ec622286e9aba616d5c3 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -243,6 +243,7 @@ void xfs_rui_release( struct xfs_rui_log_item *ruip) { + ASSERT(atomic_read(&ruip->rui_refcount) > 0); if (atomic_dec_and_test(&ruip->rui_refcount)) { xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); xfs_rui_item_free(ruip); diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 51dd3c7266086d5039e5b77f8ec9ea72c5d95f31..f13133e6f19fa4a25e4359d19d7cadebf5e2b970 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -23,6 +23,16 @@ struct xfs_mount; struct xfs_trans; +struct xfs_rtalloc_rec { + xfs_rtblock_t ar_startblock; + xfs_rtblock_t ar_blockcount; +}; + +typedef int (*xfs_rtalloc_query_range_fn)( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv); + #ifdef CONFIG_XFS_RT /* * Function prototypes for exported functions. @@ -118,13 +128,21 @@ int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); - - +int xfs_rtalloc_query_range(struct xfs_trans *tp, + struct xfs_rtalloc_rec *low_rec, + struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, + void *priv); +int xfs_rtalloc_query_all(struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, + void *priv); #else # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) # define xfs_growfs_rt(mp,in) (ENOSYS) +# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) +# define xfs_rtalloc_query_all(t,f,p) (ENOSYS) static inline int /* error */ xfs_rtmount_init( xfs_mount_t *mp) /* file system mount structure */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 685c042a120f16a8a9a8dad69d8ee7ce6f9274a0..455a575f101db1f22b8c8c0aca247d627f82ea4c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -52,6 +52,7 @@ #include "xfs_reflink.h" #include +#include #include #include #include @@ -877,8 +878,15 @@ xfs_init_mount_workqueues( if (!mp->m_eofblocks_workqueue) goto out_destroy_log; + mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, + mp->m_fsname); + if (!mp->m_sync_workqueue) + goto out_destroy_eofb; + return 0; +out_destroy_eofb: + destroy_workqueue(mp->m_eofblocks_workqueue); out_destroy_log: destroy_workqueue(mp->m_log_workqueue); out_destroy_reclaim: @@ -899,6 +907,7 @@ STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { + destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); destroy_workqueue(mp->m_log_workqueue); destroy_workqueue(mp->m_reclaim_workqueue); diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 7f17ae6d709a1013f277ab608eb98f64298573f2..5d95fe34829438a0a2486bc50278d84b0d0714bf 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -47,6 +47,7 @@ #include "xfs_inode_item.h" #include "xfs_bmap_btree.h" #include "xfs_filestream.h" +#include "xfs_fsmap.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 383ac227ce2c324cee29b5e8a209fa6b98583192..7c5a16528d8bb0506e999d2673404604c2b76920 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -40,6 +40,8 @@ struct xfs_inode_log_format; struct xfs_bmbt_irec; struct xfs_btree_cur; struct xfs_refcount_irec; +struct xfs_fsmap; +struct xfs_rmap_irec; DECLARE_EVENT_CLASS(xfs_attr_list_class, TP_PROTO(struct xfs_attr_list_context *ctx), @@ -2190,7 +2192,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class, __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d agno %u agbno %u len %u\n", + TP_printk("dev %d:%d agno %u agbno %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2253,8 +2255,8 @@ DECLARE_EVENT_CLASS(xfs_defer_class, TP_STRUCT__entry( __field(dev_t, dev) __field(void *, dop) - __field(bool, committed) - __field(bool, low) + __field(char, committed) + __field(char, low) ), TP_fast_assign( __entry->dev = mp ? mp->m_super->s_dev : 0; @@ -2262,7 +2264,7 @@ DECLARE_EVENT_CLASS(xfs_defer_class, __entry->committed = dop->dop_committed; __entry->low = dop->dop_low; ), - TP_printk("dev %d:%d ops %p committed %d low %d\n", + TP_printk("dev %d:%d ops %p committed %d low %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dop, __entry->committed, @@ -2279,8 +2281,8 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class, TP_STRUCT__entry( __field(dev_t, dev) __field(void *, dop) - __field(bool, committed) - __field(bool, low) + __field(char, committed) + __field(char, low) __field(int, error) ), TP_fast_assign( @@ -2290,7 +2292,7 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class, __entry->low = dop->dop_low; __entry->error = error; ), - TP_printk("dev %d:%d ops %p committed %d low %d err %d\n", + TP_printk("dev %d:%d ops %p committed %d low %d err %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->dop, __entry->committed, @@ -2309,7 +2311,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, __field(dev_t, dev) __field(int, type) __field(void *, intent) - __field(bool, committed) + __field(char, committed) __field(int, nr) ), TP_fast_assign( @@ -2319,7 +2321,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class, __entry->committed = dfp->dfp_done != NULL; __entry->nr = dfp->dfp_count; ), - TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", + TP_printk("dev %d:%d optype %d intent %p committed %d nr %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->intent, @@ -2614,7 +2616,8 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class, __entry->asked = r ? r->ar_asked : 0; __entry->len = len; ), - TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n", + TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u " + "resv %u ask %u len %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->resv, @@ -2667,7 +2670,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class, __entry->agbno = agbno; __entry->dir = dir; ), - TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n", + TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -2700,7 +2703,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n", + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -2735,7 +2738,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n", + TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->startblock, @@ -2776,7 +2779,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, __entry->i2_refcount = i2->rc_refcount; ), TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u\n", + "agbno %u len %u refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -2822,7 +2825,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, __entry->agbno = agbno; ), TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u @ agbno %u\n", + "agbno %u len %u refcount %u @ agbno %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -2875,7 +2878,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, ), TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " "agbno %u len %u refcount %u -- " - "agbno %u len %u refcount %u\n", + "agbno %u len %u refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->i1_startblock, @@ -3001,31 +3004,6 @@ DEFINE_EVENT(xfs_inode_error_class, name, \ unsigned long caller_ip), \ TP_ARGS(ip, error, caller_ip)) -/* reflink allocator */ -TRACE_EVENT(xfs_bmap_remap_alloc, - TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno, - xfs_extlen_t len), - TP_ARGS(ip, fsbno, len), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsblock_t, fsbno) - __field(xfs_extlen_t, len) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->fsbno = fsbno; - __entry->len = len; - ), - TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->fsbno, - __entry->len) -); -DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error); - /* reflink tracepoint classes */ /* two-file io tracepoint class */ @@ -3227,7 +3205,7 @@ TRACE_EVENT(xfs_ioctl_clone, ), TP_printk("dev %d:%d " "ino 0x%lx isize 0x%llx -> " - "ino 0x%lx isize 0x%llx\n", + "ino 0x%lx isize 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->src_ino, __entry->src_isize, @@ -3267,6 +3245,88 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap); DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece); DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); +/* fsmap traces */ +DECLARE_EVENT_CLASS(xfs_fsmap_class, + TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, + struct xfs_rmap_irec *rmap), + TP_ARGS(mp, keydev, agno, rmap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, keydev) + __field(xfs_agnumber_t, agno) + __field(xfs_fsblock_t, bno) + __field(xfs_filblks_t, len) + __field(__uint64_t, owner) + __field(__uint64_t, offset) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->keydev = new_decode_dev(keydev); + __entry->agno = agno; + __entry->bno = rmap->rm_startblock; + __entry->len = rmap->rm_blockcount; + __entry->owner = rmap->rm_owner; + __entry->offset = rmap->rm_offset; + __entry->flags = rmap->rm_flags; + ), + TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->keydev), MINOR(__entry->keydev), + __entry->agno, + __entry->bno, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +) +#define DEFINE_FSMAP_EVENT(name) \ +DEFINE_EVENT(xfs_fsmap_class, name, \ + TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \ + struct xfs_rmap_irec *rmap), \ + TP_ARGS(mp, keydev, agno, rmap)) +DEFINE_FSMAP_EVENT(xfs_fsmap_low_key); +DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); +DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); + +DECLARE_EVENT_CLASS(xfs_getfsmap_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), + TP_ARGS(mp, fsmap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, keydev) + __field(xfs_daddr_t, block) + __field(xfs_daddr_t, len) + __field(__uint64_t, owner) + __field(__uint64_t, offset) + __field(__uint64_t, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->keydev = new_decode_dev(fsmap->fmr_device); + __entry->block = fsmap->fmr_physical; + __entry->len = fsmap->fmr_length; + __entry->owner = fsmap->fmr_owner; + __entry->offset = fsmap->fmr_offset; + __entry->flags = fsmap->fmr_flags; + ), + TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->keydev), MINOR(__entry->keydev), + __entry->block, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +) +#define DEFINE_GETFSMAP_EVENT(name) \ +DEFINE_EVENT(xfs_getfsmap_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), \ + TP_ARGS(mp, fsmap)) +DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key); +DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key); +DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 70f42ea86dfbd4a7de39d0709ee4a90e59f0a758..2011620008de8a4d422643bc20dbc2313873c16e 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -134,7 +134,7 @@ xfs_trans_reserve( bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* Mark this thread as being in a transaction */ - current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); /* * Attempt to reserve the needed disk blocks by decrementing @@ -144,7 +144,7 @@ xfs_trans_reserve( if (blocks > 0) { error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); if (error != 0) { - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return -ENOSPC; } tp->t_blk_res += blocks; @@ -221,7 +221,7 @@ xfs_trans_reserve( tp->t_blk_res = 0; } - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); return error; } @@ -262,6 +262,28 @@ xfs_trans_alloc( return 0; } +/* + * Create an empty transaction with no reservation. This is a defensive + * mechanism for routines that query metadata without actually modifying + * them -- if the metadata being queried is somehow cross-linked (think a + * btree block pointer that points higher in the tree), we risk deadlock. + * However, blocks grabbed as part of a transaction can be re-grabbed. + * The verifiers will notice the corrupt block and the operation will fail + * back to userspace without deadlocking. + * + * Note the zero-length reservation; this transaction MUST be cancelled + * without any dirty data. + */ +int +xfs_trans_alloc_empty( + struct xfs_mount *mp, + struct xfs_trans **tpp) +{ + struct xfs_trans_res resv = {0}; + + return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); +} + /* * Record the indicated change to the given field for application * to the file system's superblock when the transaction commits. @@ -914,7 +936,7 @@ __xfs_trans_commit( xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free(tp); /* @@ -944,7 +966,7 @@ __xfs_trans_commit( if (commit_lsn == -1 && !error) error = -EIO; } - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); xfs_trans_free(tp); @@ -998,7 +1020,7 @@ xfs_trans_cancel( xfs_log_done(mp, tp->t_ticket, NULL, false); /* mark this thread as no longer being in a transaction */ - current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); + current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); xfs_trans_free(tp); @@ -1012,17 +1034,14 @@ xfs_trans_cancel( * chunk we've been working on and get a new transaction to continue. */ int -__xfs_trans_roll( +xfs_trans_roll( struct xfs_trans **tpp, - struct xfs_inode *dp, - int *committed) + struct xfs_inode *dp) { struct xfs_trans *trans; struct xfs_trans_res tres; int error; - *committed = 0; - /* * Ensure that the inode is always logged. */ @@ -1048,7 +1067,6 @@ __xfs_trans_roll( if (error) return error; - *committed = 1; trans = *tpp; /* @@ -1071,12 +1089,3 @@ __xfs_trans_roll( xfs_trans_ijoin(trans, dp, 0); return 0; } - -int -xfs_trans_roll( - struct xfs_trans **tpp, - struct xfs_inode *dp) -{ - int committed; - return __xfs_trans_roll(tpp, dp, &committed); -} diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 1646f659b60fbeaac09985ebbe984eb2d345cd80..a07acbf0bd8a4f491fc4976b31dfcc696e7a5b18 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -158,6 +158,8 @@ typedef struct xfs_trans { int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, uint blocks, uint rtextents, uint flags, struct xfs_trans **tpp); +int xfs_trans_alloc_empty(struct xfs_mount *mp, + struct xfs_trans **tpp); void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, @@ -226,7 +228,6 @@ int xfs_trans_free_extent(struct xfs_trans *, struct xfs_efd_log_item *, xfs_fsblock_t, xfs_extlen_t, struct xfs_owner_info *); int xfs_trans_commit(struct xfs_trans *); -int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_cancel(xfs_trans_t *); int xfs_trans_ail_init(struct xfs_mount *); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d6c9c3e9e02b2c45f2cd57074cbbcdebde1e804a..9056c0f34a3c42d40a6acaba8a189a55bbf0cc97 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -684,8 +684,23 @@ xfs_trans_ail_update_bulk( } } -/* - * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL +bool +xfs_ail_delete_one( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_log_item *mlip = xfs_ail_min(ailp); + + trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); + xfs_ail_delete(ailp, lip); + lip->li_flags &= ~XFS_LI_IN_AIL; + lip->li_lsn = 0; + + return mlip == lip; +} + +/** + * Remove a log items from the AIL * * @xfs_trans_ail_delete_bulk takes an array of log items that all need to * removed from the AIL. The caller is already holding the AIL lock, and done @@ -706,52 +721,36 @@ xfs_trans_ail_update_bulk( * before returning. */ void -xfs_trans_ail_delete_bulk( +xfs_trans_ail_delete( struct xfs_ail *ailp, - struct xfs_log_item **log_items, - int nr_items, + struct xfs_log_item *lip, int shutdown_type) __releases(ailp->xa_lock) { - xfs_log_item_t *mlip; - int mlip_changed = 0; - int i; + struct xfs_mount *mp = ailp->xa_mount; + bool mlip_changed; - mlip = xfs_ail_min(ailp); - - for (i = 0; i < nr_items; i++) { - struct xfs_log_item *lip = log_items[i]; - if (!(lip->li_flags & XFS_LI_IN_AIL)) { - struct xfs_mount *mp = ailp->xa_mount; - - spin_unlock(&ailp->xa_lock); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_alert_tag(mp, XFS_PTAG_AILDELETE, - "%s: attempting to delete a log item that is not in the AIL", - __func__); - xfs_force_shutdown(mp, shutdown_type); - } - return; + if (!(lip->li_flags & XFS_LI_IN_AIL)) { + spin_unlock(&ailp->xa_lock); + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_alert_tag(mp, XFS_PTAG_AILDELETE, + "%s: attempting to delete a log item that is not in the AIL", + __func__); + xfs_force_shutdown(mp, shutdown_type); } - - trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); - xfs_ail_delete(ailp, lip); - lip->li_flags &= ~XFS_LI_IN_AIL; - lip->li_lsn = 0; - if (mlip == lip) - mlip_changed = 1; + return; } + mlip_changed = xfs_ail_delete_one(ailp, lip); if (mlip_changed) { - if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - xlog_assign_tail_lsn_locked(ailp->xa_mount); + if (!XFS_FORCED_SHUTDOWN(mp)) + xlog_assign_tail_lsn_locked(mp); if (list_empty(&ailp->xa_ail)) wake_up_all(&ailp->xa_empty); - spin_unlock(&ailp->xa_lock); + } + spin_unlock(&ailp->xa_lock); + if (mlip_changed) xfs_log_space_wake(ailp->xa_mount); - } else { - spin_unlock(&ailp->xa_lock); - } } int diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 49931b72da8a8400f767a1cb7d368b31ce62c04b..d91706c56c633a1eb4ed243cf182d2891200e24d 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -106,18 +106,9 @@ xfs_trans_ail_update( xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } -void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, - struct xfs_log_item **log_items, int nr_items, - int shutdown_type) - __releases(ailp->xa_lock); -static inline void -xfs_trans_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip, - int shutdown_type) __releases(ailp->xa_lock) -{ - xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type); -} +bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); +void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, + int shutdown_type) __releases(ailp->xa_lock); static inline void xfs_trans_ail_remove( diff --git a/include/Kbuild b/include/Kbuild deleted file mode 100644 index bab1145bc7a7d620c579e60abef2b6652603df78..0000000000000000000000000000000000000000 --- a/include/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# Top-level Makefile calls into asm-$(ARCH) -# List only non-arch directories below diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 07740072da55d9bc64794c5f7232be4d5e256bc0..6db3b4668b1a2b1e6c618d49565f96b1c4521021 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -78,6 +78,7 @@ #define ACPI_MAX_EXTPARSE_CACHE_DEPTH 96 /* Parse tree objects */ #define ACPI_MAX_OBJECT_CACHE_DEPTH 96 /* Interpreter operand objects */ #define ACPI_MAX_NAMESPACE_CACHE_DEPTH 96 /* Namespace objects */ +#define ACPI_MAX_COMMENT_CACHE_DEPTH 96 /* Comments for the -ca option */ /* * Should the subsystem abort the loading of an ACPI table if the diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index ef0ae8aaa56771a3a1c4e00a4712f55cbdda9114..197f3fffc9a7151ed61d0b960f5e452f6beccb5c 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -88,6 +88,7 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func, } bool acpi_dev_found(const char *hid); +bool acpi_dev_present(const char *hid, const char *uid, s64 hrv); #ifdef CONFIG_ACPI @@ -386,6 +387,7 @@ struct acpi_data_node { const char *name; acpi_handle handle; struct fwnode_handle fwnode; + struct fwnode_handle *parent; struct acpi_device_data data; struct list_head sibling; struct kobject kobj; @@ -575,7 +577,7 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); -void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); +int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); void acpi_dma_deconfigure(struct device *dev); struct acpi_device *acpi_find_child_device(struct acpi_device *parent, @@ -586,6 +588,15 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); +#ifdef CONFIG_X86 +bool acpi_device_always_present(struct acpi_device *adev); +#else +static inline bool acpi_device_always_present(struct acpi_device *adev) +{ + return false; +} +#endif + #ifdef CONFIG_PM acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev, void (*work_func)(struct work_struct *work)); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 3795386ea7068a1e0a77801ee98e9a569c59f3e2..15c86ce4df537bec81aaf1e83e5961b0b9e5913f 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20170119 +#define ACPI_CA_VERSION 0x20170303 #include #include diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index d92543f3bbfdcaaeac47252729af30ba428eb371..bdc55c0da19cd06c65e589d9071f65c2eb5c2332 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -374,6 +374,20 @@ struct acpi_table_desc { u16 validation_count; }; +/* + * Maximum value of the validation_count field in struct acpi_table_desc. + * When reached, validation_count cannot be changed any more and the table will + * be permanently regarded as validated. + * + * This is to prevent situations in which unbalanced table get/put operations + * may cause premature table unmapping in the OS to happen. + * + * The maximum validation count can be defined to any value, but should be + * greater than the maximum number of OS early stage mapping slots to avoid + * leaking early stage table mappings to the late stage. + */ +#define ACPI_MAX_TABLE_VALIDATIONS ACPI_UINT16_MAX + /* Masks for Flags field above */ #define ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL (0) /* Virtual address, external maintained */ diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 7aee9fb3bd1f06f88c3a83b708ae0b5e452170cb..faa9f2c0d5de8cd545bd1ea911b352fa74a0128d 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -87,6 +87,7 @@ #define ACPI_SIG_WDAT "WDAT" /* Watchdog Action Table */ #define ACPI_SIG_WDDT "WDDT" /* Watchdog Timer Description Table */ #define ACPI_SIG_WDRT "WDRT" /* Watchdog Resource Table */ +#define ACPI_SIG_XXXX "XXXX" /* Intermediate AML header for ASL/ASL+ converter */ #ifdef ACPI_UNDEFINED_TABLES /* @@ -783,6 +784,15 @@ struct acpi_iort_smmu { #define ACPI_IORT_SMMU_DVM_SUPPORTED (1) #define ACPI_IORT_SMMU_COHERENT_WALK (1<<1) +/* Global interrupt format */ + +struct acpi_iort_smmu_gsi { + u32 nsg_irpt; + u32 nsg_irpt_flags; + u32 nsg_cfg_irpt; + u32 nsg_cfg_irpt_flags; +}; + struct acpi_iort_smmu_v3 { u64 base_address; /* SMMUv3 base address */ u32 flags; @@ -1294,6 +1304,7 @@ struct acpi_table_tpm2 { #define ACPI_TPM2_MEMORY_MAPPED 6 #define ACPI_TPM2_COMMAND_BUFFER 7 #define ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD 8 +#define ACPI_TPM2_COMMAND_BUFFER_WITH_SMC 11 /******************************************************************************* * diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 427a7c3e6c75c5d3789ed12a4836d0d19c202938..2010c0516f273f05185d8131b363996c37d68903 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -103,6 +103,7 @@ struct cppc_perf_caps { u32 highest_perf; u32 nominal_perf; u32 lowest_perf; + u32 lowest_nonlinear_perf; }; struct cppc_perf_ctrls { @@ -115,7 +116,7 @@ struct cppc_perf_fb_ctrs { u64 reference; u64 delivered; u64 reference_perf; - u64 ctr_wrap_time; + u64 wraparound_time; }; /* Per CPU container for runtime CPPC management. */ diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm deleted file mode 100644 index d2ee86b4c091c7bcde862f8b34cb822570468d98..0000000000000000000000000000000000000000 --- a/include/asm-generic/Kbuild.asm +++ /dev/null @@ -1 +0,0 @@ -include include/uapi/asm-generic/Kbuild.asm diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 6f96247226a4d2b9c1e9aa56be423ffa0c4d49df..d6f4aed479a12b0da4ed81c3c905183a08e55e39 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -5,7 +5,9 @@ #ifdef CONFIG_GENERIC_BUG #define BUGFLAG_WARNING (1 << 0) -#define BUGFLAG_TAINT(taint) (BUGFLAG_WARNING | ((taint) << 8)) +#define BUGFLAG_ONCE (1 << 1) +#define BUGFLAG_DONE (1 << 2) +#define BUGFLAG_TAINT(taint) ((taint) << 8) #define BUG_GET_TAINT(bug) ((bug)->flags >> 8) #endif @@ -55,6 +57,18 @@ struct bug_entry { #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) #endif +#ifdef __WARN_FLAGS +#define __WARN_TAINT(taint) __WARN_FLAGS(BUGFLAG_TAINT(taint)) +#define __WARN_ONCE_TAINT(taint) __WARN_FLAGS(BUGFLAG_ONCE|BUGFLAG_TAINT(taint)) + +#define WARN_ON_ONCE(condition) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_ONCE_TAINT(TAINT_WARN); \ + unlikely(__ret_warn_on); \ +}) +#endif + /* * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant issues that need prompt attention if they should ever @@ -97,7 +111,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #endif #ifndef WARN -#define WARN(condition, format...) ({ \ +#define WARN(condition, format...) ({ \ int __ret_warn_on = !!(condition); \ if (unlikely(__ret_warn_on)) \ __WARN_printf(format); \ @@ -112,6 +126,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, unlikely(__ret_warn_on); \ }) +#ifndef WARN_ON_ONCE #define WARN_ON_ONCE(condition) ({ \ static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ @@ -122,6 +137,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, } \ unlikely(__ret_warn_once); \ }) +#endif #define WARN_ONCE(condition, format...) ({ \ static bool __section(.data.unlikely) __warned; \ diff --git a/include/asm-generic/extable.h b/include/asm-generic/extable.h new file mode 100644 index 0000000000000000000000000000000000000000..ca14c6664027d40f88473ca1a97bbaeef6dcc1ed --- /dev/null +++ b/include/asm-generic/extable.h @@ -0,0 +1,26 @@ +#ifndef __ASM_GENERIC_EXTABLE_H +#define __ASM_GENERIC_EXTABLE_H + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + + +struct pt_regs; +extern int fixup_exception(struct pt_regs *regs); + +#endif diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index cc5d9a1405df925c43ec62a03be03c30ae921f56..41e5b6784b975d1974604c6265486ab858c3865f 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -32,10 +32,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, /* by default, allow everything */ return true; } - -static inline bool arch_pte_access_permitted(pte_t pte, bool write) -{ - /* by default, allow everything */ - return true; -} #endif /* _ASM_GENERIC_MM_HOOKS_H */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 1fad160f35de8e89953af075173a2ad219c9693b..7dfa767dc68012ac52ac81d48e92b3ec79c97311 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -341,6 +341,31 @@ static inline int pte_unused(pte_t pte) } #endif +#ifndef pte_access_permitted +#define pte_access_permitted(pte, write) \ + (pte_present(pte) && (!(write) || pte_write(pte))) +#endif + +#ifndef pmd_access_permitted +#define pmd_access_permitted(pmd, write) \ + (pmd_present(pmd) && (!(write) || pmd_write(pmd))) +#endif + +#ifndef pud_access_permitted +#define pud_access_permitted(pud, write) \ + (pud_present(pud) && (!(write) || pud_write(pud))) +#endif + +#ifndef p4d_access_permitted +#define p4d_access_permitted(p4d, write) \ + (p4d_present(p4d) && (!(write) || p4d_write(p4d))) +#endif + +#ifndef pgd_access_permitted +#define pgd_access_permitted(pgd, write) \ + (pgd_present(pgd) && (!(write) || pgd_write(pgd))) +#endif + #ifndef __HAVE_ARCH_PMD_SAME #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) diff --git a/include/asm-generic/set_memory.h b/include/asm-generic/set_memory.h new file mode 100644 index 0000000000000000000000000000000000000000..83e81f8996b22c0d3185e7c61ad0e566ac11aa3a --- /dev/null +++ b/include/asm-generic/set_memory.h @@ -0,0 +1,12 @@ +#ifndef __ASM_SET_MEMORY_H +#define __ASM_SET_MEMORY_H + +/* + * Functions to change memory attributes. + */ +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); + +#endif diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index cc6bb319e464f46e8e2b8b9010261ba585718e94..bbe4bb438e3934e99748bf9c1a815ace2b0fda7f 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -6,7 +6,6 @@ * on any machine that has kernel and user data in the same * address space, e.g. all NOMMU machines. */ -#include #include #include @@ -35,9 +34,6 @@ static inline void set_fs(mm_segment_t fs) #define segment_eq(a, b) ((a).seg == (b).seg) #endif -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - #define access_ok(type, addr, size) __access_ok((unsigned long)(addr),(size)) /* @@ -51,87 +47,6 @@ static inline int __access_ok(unsigned long addr, unsigned long size) } #endif -/* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. - * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. - */ - -struct exception_table_entry -{ - unsigned long insn, fixup; -}; - -/* - * architectures with an MMU should override these two - */ -#ifndef __copy_from_user -static inline __must_check long __copy_from_user(void *to, - const void __user * from, unsigned long n) -{ - if (__builtin_constant_p(n)) { - switch(n) { - case 1: - *(u8 *)to = *(u8 __force *)from; - return 0; - case 2: - *(u16 *)to = *(u16 __force *)from; - return 0; - case 4: - *(u32 *)to = *(u32 __force *)from; - return 0; -#ifdef CONFIG_64BIT - case 8: - *(u64 *)to = *(u64 __force *)from; - return 0; -#endif - default: - break; - } - } - - memcpy(to, (const void __force *)from, n); - return 0; -} -#endif - -#ifndef __copy_to_user -static inline __must_check long __copy_to_user(void __user *to, - const void *from, unsigned long n) -{ - if (__builtin_constant_p(n)) { - switch(n) { - case 1: - *(u8 __force *)to = *(u8 *)from; - return 0; - case 2: - *(u16 __force *)to = *(u16 *)from; - return 0; - case 4: - *(u32 __force *)to = *(u32 *)from; - return 0; -#ifdef CONFIG_64BIT - case 8: - *(u64 __force *)to = *(u64 *)from; - return 0; -#endif - default: - break; - } - } - - memcpy((void __force *)to, from, n); - return 0; -} -#endif - /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. @@ -171,8 +86,7 @@ static inline __must_check long __copy_to_user(void __user *to, static inline int __put_user_fn(size_t size, void __user *ptr, void *x) { - size = __copy_to_user(ptr, x, size); - return size ? -EFAULT : size; + return unlikely(raw_copy_to_user(ptr, x, size)) ? -EFAULT : 0; } #define __put_user_fn(sz, u, k) __put_user_fn(sz, u, k) @@ -187,28 +101,28 @@ extern int __put_user_bad(void) __attribute__((noreturn)); __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ - unsigned char __x; \ + unsigned char __x = 0; \ __gu_err = __get_user_fn(sizeof (*(ptr)), \ ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ - unsigned short __x; \ + unsigned short __x = 0; \ __gu_err = __get_user_fn(sizeof (*(ptr)), \ ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ - unsigned int __x; \ + unsigned int __x = 0; \ __gu_err = __get_user_fn(sizeof (*(ptr)), \ ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ - unsigned long long __x; \ + unsigned long long __x = 0; \ __gu_err = __get_user_fn(sizeof (*(ptr)), \ ptr, &__x); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ @@ -233,12 +147,7 @@ extern int __put_user_bad(void) __attribute__((noreturn)); #ifndef __get_user_fn static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) { - size_t n = __copy_from_user(x, ptr, size); - if (unlikely(n)) { - memset(x + (size - n), 0, n); - return -EFAULT; - } - return 0; + return unlikely(raw_copy_from_user(x, ptr, size)) ? -EFAULT : 0; } #define __get_user_fn(sz, u, k) __get_user_fn(sz, u, k) @@ -247,36 +156,6 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) extern int __get_user_bad(void) __attribute__((noreturn)); -#ifndef __copy_from_user_inatomic -#define __copy_from_user_inatomic __copy_from_user -#endif - -#ifndef __copy_to_user_inatomic -#define __copy_to_user_inatomic __copy_to_user -#endif - -static inline long copy_from_user(void *to, - const void __user * from, unsigned long n) -{ - unsigned long res = n; - might_fault(); - if (likely(access_ok(VERIFY_READ, from, n))) - res = __copy_from_user(to, from, n); - if (unlikely(res)) - memset(to + (n - res), 0, res); - return res; -} - -static inline long copy_to_user(void __user *to, - const void *from, unsigned long n) -{ - might_fault(); - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_to_user(to, from, n); - else - return n; -} - /* * Copy a null terminated string from userspace. */ @@ -348,4 +227,6 @@ clear_user(void __user *to, unsigned long n) return __clear_user(to, n); } +#include + #endif /* __ASM_GENERIC_UACCESS_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 143db9c523e25f38488bd43302b82f316e3124a9..314a0b9219c64704151ad50680bd89d133f1af57 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -287,8 +287,6 @@ *(.rodata1) \ } \ \ - BUG_TABLE \ - \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -568,7 +566,6 @@ IRQCHIP_OF_MATCH_TABLE() \ ACPI_PROBE_TABLE(irqchip) \ ACPI_PROBE_TABLE(clksrc) \ - ACPI_PROBE_TABLE(iort) \ EARLYCON_TABLE() #define INIT_TEXT \ @@ -857,7 +854,8 @@ READ_MOSTLY_DATA(cacheline) \ DATA_DATA \ CONSTRUCTORS \ - } + } \ + BUG_TABLE #define INIT_TEXT_SECTION(inittext_align) \ . = ALIGN(inittext_align); \ diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index caedb74c92108188e57652b56a1c198f3263c092..cc805b72994acd1442a50a4ec6e7d5593a833d76 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -16,9 +16,13 @@ #ifndef __CLKSOURCE_ARM_ARCH_TIMER_H #define __CLKSOURCE_ARM_ARCH_TIMER_H +#include #include #include +#define ARCH_TIMER_TYPE_CP15 BIT(0) +#define ARCH_TIMER_TYPE_MEM BIT(1) + #define ARCH_TIMER_CTRL_ENABLE (1 << 0) #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) @@ -34,11 +38,27 @@ enum arch_timer_reg { ARCH_TIMER_REG_TVAL, }; +enum arch_timer_ppi_nr { + ARCH_TIMER_PHYS_SECURE_PPI, + ARCH_TIMER_PHYS_NONSECURE_PPI, + ARCH_TIMER_VIRT_PPI, + ARCH_TIMER_HYP_PPI, + ARCH_TIMER_MAX_TIMER_PPI +}; + +enum arch_timer_spi_nr { + ARCH_TIMER_PHYS_SPI, + ARCH_TIMER_VIRT_SPI, + ARCH_TIMER_MAX_TIMER_SPI +}; + #define ARCH_TIMER_PHYS_ACCESS 0 #define ARCH_TIMER_VIRT_ACCESS 1 #define ARCH_TIMER_MEM_PHYS_ACCESS 2 #define ARCH_TIMER_MEM_VIRT_ACCESS 3 +#define ARCH_TIMER_MEM_MAX_FRAMES 8 + #define ARCH_TIMER_USR_PCT_ACCESS_EN (1 << 0) /* physical counter */ #define ARCH_TIMER_USR_VCT_ACCESS_EN (1 << 1) /* virtual counter */ #define ARCH_TIMER_VIRT_EVT_EN (1 << 2) @@ -54,6 +74,20 @@ struct arch_timer_kvm_info { int virtual_irq; }; +struct arch_timer_mem_frame { + bool valid; + phys_addr_t cntbase; + size_t size; + int phys_irq; + int virt_irq; +}; + +struct arch_timer_mem { + phys_addr_t cntctlbase; + size_t size; + struct arch_timer_mem_frame frame[ARCH_TIMER_MEM_MAX_FRAMES]; +}; + #ifdef CONFIG_ARM_ARCH_TIMER extern u32 arch_timer_get_rate(void); diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index 592d47e565a8b305406ca8e4b8745d0c9715f109..0977fb18ff68c63ad66eccbb34cc4bff8c514af0 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -43,12 +43,13 @@ --------------------------------------------------------------------------- Issue Date: 31/01/2006 - An implementation of field multiplication in Galois Field GF(128) + An implementation of field multiplication in Galois Field GF(2^128) */ #ifndef _CRYPTO_GF128MUL_H #define _CRYPTO_GF128MUL_H +#include #include #include @@ -65,7 +66,7 @@ * are left and the lsb's are right. char b[16] is an array and b[0] is * the first octet. * - * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 + * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 * b[0] b[1] b[2] b[3] b[13] b[14] b[15] * * Every bit is a coefficient of some power of X. We can store the bits @@ -85,15 +86,17 @@ * Both of the above formats are easy to implement on big-endian * machines. * - * EME (which is patent encumbered) uses the ble format (bits are stored - * in big endian order and the bytes in little endian). The above buffer - * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. + * XTS and EME (the latter of which is patent encumbered) use the ble + * format (bits are stored in big endian order and the bytes in little + * endian). The above buffer represents X^7 in this case and the + * primitive polynomial is b[0] = 0x87. * * The common machine word-size is smaller than 128 bits, so to make * an efficient implementation we must split into machine word sizes. - * This file uses one 32bit for the moment. Machine endianness comes into - * play. The lle format in relation to machine endianness is discussed - * below by the original author of gf128mul Dr Brian Gladman. + * This implementation uses 64-bit words for the moment. Machine + * endianness comes into play. The lle format in relation to machine + * endianness is discussed below by the original author of gf128mul Dr + * Brian Gladman. * * Let's look at the bbe and ble format on a little endian machine. * @@ -127,10 +130,10 @@ * machines this will automatically aligned to wordsize and on a 64-bit * machine also. */ -/* Multiply a GF128 field element by x. Field elements are held in arrays - of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower - indexed bits placed in the more numerically significant bit positions - within bytes. +/* Multiply a GF(2^128) field element by x. Field elements are + held in arrays of bytes in which field bits 8n..8n + 7 are held in + byte[n], with lower indexed bits placed in the more numerically + significant bit positions within bytes. On little endian machines the bit indexes translate into the bit positions within four 32-bit words in the following way @@ -161,8 +164,58 @@ void gf128mul_lle(be128 *a, const be128 *b); void gf128mul_bbe(be128 *a, const be128 *b); -/* multiply by x in ble format, needed by XTS */ -void gf128mul_x_ble(be128 *a, const be128 *b); +/* + * The following functions multiply a field element by x in + * the polynomial field representation. They use 64-bit word operations + * to gain speed but compensate for machine endianness and hence work + * correctly on both styles of machine. + * + * They are defined here for performance. + */ + +static inline u64 gf128mul_mask_from_bit(u64 x, int which) +{ + /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ + return ((s64)(x << (63 - which)) >> 63); +} + +static inline void gf128mul_x_lle(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 + * (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); + + r->b = cpu_to_be64((b >> 1) | (a << 63)); + r->a = cpu_to_be64((a >> 1) ^ _tt); +} + +static inline void gf128mul_x_bbe(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; + + r->a = cpu_to_be64((a << 1) | (b >> 63)); + r->b = cpu_to_be64((b << 1) ^ _tt); +} + +/* needed by XTS */ +static inline void gf128mul_x_ble(le128 *r, const le128 *x) +{ + u64 a = le64_to_cpu(x->a); + u64 b = le64_to_cpu(x->b); + + /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; + + r->a = cpu_to_le64((a << 1) | (b >> 63)); + r->b = cpu_to_le64((b << 1) ^ _tt); +} /* 4k table optimization */ @@ -172,8 +225,8 @@ struct gf128mul_4k { struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); -void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); -void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); +void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t); +void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t); static inline void gf128mul_free_4k(struct gf128mul_4k *t) { @@ -194,6 +247,6 @@ struct gf128mul_64k { */ struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); void gf128mul_free_64k(struct gf128mul_64k *t); -void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); +void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t); #endif /* _CRYPTO_GF128MUL_H */ diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 1de2b5af12d7347bce4c593dfbf7386c48ddfa66..51052f65cefc6a81ef465437038492d1fe6c00f4 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -78,4 +78,7 @@ int crypto_register_acomp(struct acomp_alg *alg); */ int crypto_unregister_acomp(struct acomp_alg *alg); +int crypto_register_acomps(struct acomp_alg *algs, int count); +void crypto_unregister_acomps(struct acomp_alg *algs, int count); + #endif diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 1d4f365d8f03a439ab0e20caf27ae34ad6e13e74..f6d9af3efa45a6cc8eb448a71f5d43d72d8fcbd1 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -166,6 +166,16 @@ static inline struct ahash_instance *ahash_alloc_instance( return crypto_alloc_instance2(name, alg, ahash_instance_headroom()); } +static inline void ahash_request_complete(struct ahash_request *req, int err) +{ + req->base.complete(&req->base, err); +} + +static inline u32 ahash_request_flags(struct ahash_request *req) +{ + return req->base.flags; +} + static inline struct crypto_ahash *crypto_spawn_ahash( struct crypto_ahash_spawn *spawn) { diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h index 3fda3c5655a0e071870dd2f6ea1859bb4b269822..ccad9b2c9bd67679790cf3eb9d884cb5f47b0202 100644 --- a/include/crypto/internal/scompress.h +++ b/include/crypto/internal/scompress.h @@ -133,4 +133,7 @@ int crypto_register_scomp(struct scomp_alg *alg); */ int crypto_unregister_scomp(struct scomp_alg *alg); +int crypto_register_scomps(struct scomp_alg *algs, int count); +void crypto_unregister_scomps(struct scomp_alg *algs, int count); + #endif diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 4307a2f2365f49433ecf46a03007a7316ab4df58..ce8e1f79374b64c30eb045d2cff8ad021bf340b1 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -74,7 +74,7 @@ struct crypto_kpp { * @base: Common crypto API algorithm data structure */ struct kpp_alg { - int (*set_secret)(struct crypto_kpp *tfm, void *buffer, + int (*set_secret)(struct crypto_kpp *tfm, const void *buffer, unsigned int len); int (*generate_public_key)(struct kpp_request *req); int (*compute_shared_secret)(struct kpp_request *req); @@ -273,8 +273,8 @@ struct kpp_secret { * * Return: zero on success; error code in case of error */ -static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, void *buffer, - unsigned int len) +static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm, + const void *buffer, unsigned int len) { struct kpp_alg *alg = crypto_kpp_alg(tfm); diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h index 882ca0e1e7a5967e1dde952c8e5ecdf616b0f2fd..e0b681a717bac93ce3323b1a353fceba8310f811 100644 --- a/include/crypto/public_key.h +++ b/include/crypto/public_key.h @@ -50,9 +50,20 @@ struct key; struct key_type; union key_payload; -extern int restrict_link_by_signature(struct key *trust_keyring, +extern int restrict_link_by_signature(struct key *dest_keyring, const struct key_type *type, - const union key_payload *payload); + const union key_payload *payload, + struct key *trust_keyring); + +extern int restrict_link_by_key_or_keyring(struct key *dest_keyring, + const struct key_type *type, + const union key_payload *payload, + struct key *trusted); + +extern int restrict_link_by_key_or_keyring_chain(struct key *trust_keyring, + const struct key_type *type, + const union key_payload *payload, + struct key *trusted); extern int verify_signature(const struct key *key, const struct public_key_signature *sig); diff --git a/include/crypto/xts.h b/include/crypto/xts.h index 77b630672b2c1a7754605a2094fb714b5ff002b8..c0bde308b28ae6566b6f96046b3a53b9951bec7f 100644 --- a/include/crypto/xts.h +++ b/include/crypto/xts.h @@ -11,7 +11,7 @@ struct blkcipher_desc; #define XTS_BLOCK_SIZE 16 struct xts_crypt_req { - be128 *tbuf; + le128 *tbuf; unsigned int tbuflen; void *tweak_ctx; diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c0bd0d7651a947bf06407d7c680dde5c377b9b26..bb837310c07e98c529472abceda0a8b426d8c9a5 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -913,4 +913,55 @@ void drm_dp_aux_unregister(struct drm_dp_aux *aux); int drm_dp_start_crc(struct drm_dp_aux *aux, struct drm_crtc *crtc); int drm_dp_stop_crc(struct drm_dp_aux *aux); +struct drm_dp_dpcd_ident { + u8 oui[3]; + u8 device_id[6]; + u8 hw_rev; + u8 sw_major_rev; + u8 sw_minor_rev; +} __packed; + +/** + * struct drm_dp_desc - DP branch/sink device descriptor + * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch). + * @quirks: Quirks; use drm_dp_has_quirk() to query for the quirks. + */ +struct drm_dp_desc { + struct drm_dp_dpcd_ident ident; + u32 quirks; +}; + +int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, + bool is_branch); + +/** + * enum drm_dp_quirk - Display Port sink/branch device specific quirks + * + * Display Port sink and branch devices in the wild have a variety of bugs, try + * to collect them here. The quirks are shared, but it's up to the drivers to + * implement workarounds for them. + */ +enum drm_dp_quirk { + /** + * @DP_DPCD_QUIRK_LIMITED_M_N: + * + * The device requires main link attributes Mvid and Nvid to be limited + * to 16 bits. + */ + DP_DPCD_QUIRK_LIMITED_M_N, +}; + +/** + * drm_dp_has_quirk() - does the DP device have a specific quirk + * @desc: Device decriptor filled by drm_dp_read_desc() + * @quirk: Quirk to query for + * + * Return true if DP device identified by @desc has @quirk. + */ +static inline bool +drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) +{ + return desc->quirks & BIT(quirk); +} + #endif /* _DRM_DP_HELPER_H_ */ diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h index 70d4e221a3adb485ff7927c33737db23a127b577..d0f6cf2e5324411ac20d3f53515df0899a9601ed 100644 --- a/include/drm/drm_mem_util.h +++ b/include/drm/drm_mem_util.h @@ -37,8 +37,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) if (size * nmemb <= PAGE_SIZE) return kcalloc(nmemb, size, GFP_KERNEL); - return __vmalloc(size * nmemb, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + return vzalloc(size * nmemb); } /* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */ @@ -50,8 +49,7 @@ static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size) if (size * nmemb <= PAGE_SIZE) return kmalloc(nmemb * size, GFP_KERNEL); - return __vmalloc(size * nmemb, - GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); + return vmalloc(size * nmemb); } static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp) @@ -69,8 +67,7 @@ static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp) return ptr; } - return __vmalloc(size * nmemb, - gfp | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size * nmemb, gfp, PAGE_KERNEL); } static __inline void drm_free_large(void *ptr) diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h index 692846c7941b53ac8449ed69a21b2a946a89c3bf..3190e30b9398ca60037193daf59efd7006bf66ab 100644 --- a/include/dt-bindings/clock/gxbb-clkc.h +++ b/include/dt-bindings/clock/gxbb-clkc.h @@ -10,12 +10,18 @@ #define CLKID_FCLK_DIV2 4 #define CLKID_FCLK_DIV3 5 #define CLKID_FCLK_DIV4 6 +#define CLKID_GP0_PLL 9 #define CLKID_CLK81 12 #define CLKID_MPLL2 15 -#define CLKID_SPI 34 #define CLKID_I2C 22 #define CLKID_SAR_ADC 23 +#define CLKID_RNG0 25 +#define CLKID_SPI 34 #define CLKID_ETH 36 +#define CLKID_AIU_GLUE 38 +#define CLKID_I2S_OUT 40 +#define CLKID_MIXER_IFACE 44 +#define CLKID_AIU 47 #define CLKID_USB0 50 #define CLKID_USB1 51 #define CLKID_USB 55 @@ -24,11 +30,17 @@ #define CLKID_USB0_DDR_BRIDGE 65 #define CLKID_SANA 69 #define CLKID_GCLK_VENCI_INT0 77 +#define CLKID_AOCLK_GATE 80 #define CLKID_AO_I2C 93 #define CLKID_SD_EMMC_A 94 #define CLKID_SD_EMMC_B 95 #define CLKID_SD_EMMC_C 96 #define CLKID_SAR_ADC_CLK 97 #define CLKID_SAR_ADC_SEL 98 +#define CLKID_MALI_0_SEL 100 +#define CLKID_MALI_0 102 +#define CLKID_MALI_1_SEL 103 +#define CLKID_MALI_1 105 +#define CLKID_MALI 106 #endif /* __GXBB_CLKC_H */ diff --git a/include/dt-bindings/clock/hi6220-clock.h b/include/dt-bindings/clock/hi6220-clock.h index 6b03c84f42783635573307c053d63e9f0d3ee083..b8ba665aab7b3486ad39777acda4a4aa0a249a5d 100644 --- a/include/dt-bindings/clock/hi6220-clock.h +++ b/include/dt-bindings/clock/hi6220-clock.h @@ -124,7 +124,10 @@ #define HI6220_CS_DAPB 57 #define HI6220_CS_ATB_DIV 58 -#define HI6220_SYS_NR_CLKS 59 +/* gate clock */ +#define HI6220_DAPB_CLK 59 + +#define HI6220_SYS_NR_CLKS 60 /* clk in Hi6220 media controller */ /* gate clocks */ diff --git a/include/dt-bindings/clock/mt6797-clk.h b/include/dt-bindings/clock/mt6797-clk.h new file mode 100644 index 0000000000000000000000000000000000000000..2f25a5aca01992fa32aa421bb3c5c8c6b3120b38 --- /dev/null +++ b/include/dt-bindings/clock/mt6797-clk.h @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2017 MediaTek Inc. + * Author: Kevin Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _DT_BINDINGS_CLK_MT6797_H +#define _DT_BINDINGS_CLK_MT6797_H + +/* TOPCKGEN */ +#define CLK_TOP_MUX_ULPOSC_AXI_CK_MUX_PRE 1 +#define CLK_TOP_MUX_ULPOSC_AXI_CK_MUX 2 +#define CLK_TOP_MUX_AXI 3 +#define CLK_TOP_MUX_MEM 4 +#define CLK_TOP_MUX_DDRPHYCFG 5 +#define CLK_TOP_MUX_MM 6 +#define CLK_TOP_MUX_PWM 7 +#define CLK_TOP_MUX_VDEC 8 +#define CLK_TOP_MUX_VENC 9 +#define CLK_TOP_MUX_MFG 10 +#define CLK_TOP_MUX_CAMTG 11 +#define CLK_TOP_MUX_UART 12 +#define CLK_TOP_MUX_SPI 13 +#define CLK_TOP_MUX_ULPOSC_SPI_CK_MUX 14 +#define CLK_TOP_MUX_USB20 15 +#define CLK_TOP_MUX_MSDC50_0_HCLK 16 +#define CLK_TOP_MUX_MSDC50_0 17 +#define CLK_TOP_MUX_MSDC30_1 18 +#define CLK_TOP_MUX_MSDC30_2 19 +#define CLK_TOP_MUX_AUDIO 20 +#define CLK_TOP_MUX_AUD_INTBUS 21 +#define CLK_TOP_MUX_PMICSPI 22 +#define CLK_TOP_MUX_SCP 23 +#define CLK_TOP_MUX_ATB 24 +#define CLK_TOP_MUX_MJC 25 +#define CLK_TOP_MUX_DPI0 26 +#define CLK_TOP_MUX_AUD_1 27 +#define CLK_TOP_MUX_AUD_2 28 +#define CLK_TOP_MUX_SSUSB_TOP_SYS 29 +#define CLK_TOP_MUX_SPM 30 +#define CLK_TOP_MUX_BSI_SPI 31 +#define CLK_TOP_MUX_AUDIO_H 32 +#define CLK_TOP_MUX_ANC_MD32 33 +#define CLK_TOP_MUX_MFG_52M 34 +#define CLK_TOP_SYSPLL_CK 35 +#define CLK_TOP_SYSPLL_D2 36 +#define CLK_TOP_SYSPLL1_D2 37 +#define CLK_TOP_SYSPLL1_D4 38 +#define CLK_TOP_SYSPLL1_D8 39 +#define CLK_TOP_SYSPLL1_D16 40 +#define CLK_TOP_SYSPLL_D3 41 +#define CLK_TOP_SYSPLL_D3_D3 42 +#define CLK_TOP_SYSPLL2_D2 43 +#define CLK_TOP_SYSPLL2_D4 44 +#define CLK_TOP_SYSPLL2_D8 45 +#define CLK_TOP_SYSPLL_D5 46 +#define CLK_TOP_SYSPLL3_D2 47 +#define CLK_TOP_SYSPLL3_D4 48 +#define CLK_TOP_SYSPLL_D7 49 +#define CLK_TOP_SYSPLL4_D2 50 +#define CLK_TOP_SYSPLL4_D4 51 +#define CLK_TOP_UNIVPLL_CK 52 +#define CLK_TOP_UNIVPLL_D7 53 +#define CLK_TOP_UNIVPLL_D26 54 +#define CLK_TOP_SSUSB_PHY_48M_CK 55 +#define CLK_TOP_USB_PHY48M_CK 56 +#define CLK_TOP_UNIVPLL_D2 57 +#define CLK_TOP_UNIVPLL1_D2 58 +#define CLK_TOP_UNIVPLL1_D4 59 +#define CLK_TOP_UNIVPLL1_D8 60 +#define CLK_TOP_UNIVPLL_D3 61 +#define CLK_TOP_UNIVPLL2_D2 62 +#define CLK_TOP_UNIVPLL2_D4 63 +#define CLK_TOP_UNIVPLL2_D8 64 +#define CLK_TOP_UNIVPLL_D5 65 +#define CLK_TOP_UNIVPLL3_D2 66 +#define CLK_TOP_UNIVPLL3_D4 67 +#define CLK_TOP_UNIVPLL3_D8 68 +#define CLK_TOP_ULPOSC_CK_ORG 69 +#define CLK_TOP_ULPOSC_CK 70 +#define CLK_TOP_ULPOSC_D2 71 +#define CLK_TOP_ULPOSC_D3 72 +#define CLK_TOP_ULPOSC_D4 73 +#define CLK_TOP_ULPOSC_D8 74 +#define CLK_TOP_ULPOSC_D10 75 +#define CLK_TOP_APLL1_CK 76 +#define CLK_TOP_APLL2_CK 77 +#define CLK_TOP_MFGPLL_CK 78 +#define CLK_TOP_MFGPLL_D2 79 +#define CLK_TOP_IMGPLL_CK 80 +#define CLK_TOP_IMGPLL_D2 81 +#define CLK_TOP_IMGPLL_D4 82 +#define CLK_TOP_CODECPLL_CK 83 +#define CLK_TOP_CODECPLL_D2 84 +#define CLK_TOP_VDECPLL_CK 85 +#define CLK_TOP_TVDPLL_CK 86 +#define CLK_TOP_TVDPLL_D2 87 +#define CLK_TOP_TVDPLL_D4 88 +#define CLK_TOP_TVDPLL_D8 89 +#define CLK_TOP_TVDPLL_D16 90 +#define CLK_TOP_MSDCPLL_CK 91 +#define CLK_TOP_MSDCPLL_D2 92 +#define CLK_TOP_MSDCPLL_D4 93 +#define CLK_TOP_MSDCPLL_D8 94 +#define CLK_TOP_NR 95 + +/* APMIXED_SYS */ +#define CLK_APMIXED_MAINPLL 1 +#define CLK_APMIXED_UNIVPLL 2 +#define CLK_APMIXED_MFGPLL 3 +#define CLK_APMIXED_MSDCPLL 4 +#define CLK_APMIXED_IMGPLL 5 +#define CLK_APMIXED_TVDPLL 6 +#define CLK_APMIXED_CODECPLL 7 +#define CLK_APMIXED_VDECPLL 8 +#define CLK_APMIXED_APLL1 9 +#define CLK_APMIXED_APLL2 10 +#define CLK_APMIXED_NR 11 + +/* INFRA_SYS */ +#define CLK_INFRA_PMIC_TMR 1 +#define CLK_INFRA_PMIC_AP 2 +#define CLK_INFRA_PMIC_MD 3 +#define CLK_INFRA_PMIC_CONN 4 +#define CLK_INFRA_SCP 5 +#define CLK_INFRA_SEJ 6 +#define CLK_INFRA_APXGPT 7 +#define CLK_INFRA_SEJ_13M 8 +#define CLK_INFRA_ICUSB 9 +#define CLK_INFRA_GCE 10 +#define CLK_INFRA_THERM 11 +#define CLK_INFRA_I2C0 12 +#define CLK_INFRA_I2C1 13 +#define CLK_INFRA_I2C2 14 +#define CLK_INFRA_I2C3 15 +#define CLK_INFRA_PWM_HCLK 16 +#define CLK_INFRA_PWM1 17 +#define CLK_INFRA_PWM2 18 +#define CLK_INFRA_PWM3 19 +#define CLK_INFRA_PWM4 20 +#define CLK_INFRA_PWM 21 +#define CLK_INFRA_UART0 22 +#define CLK_INFRA_UART1 23 +#define CLK_INFRA_UART2 24 +#define CLK_INFRA_UART3 25 +#define CLK_INFRA_MD2MD_CCIF_0 26 +#define CLK_INFRA_MD2MD_CCIF_1 27 +#define CLK_INFRA_MD2MD_CCIF_2 28 +#define CLK_INFRA_FHCTL 29 +#define CLK_INFRA_BTIF 30 +#define CLK_INFRA_MD2MD_CCIF_3 31 +#define CLK_INFRA_SPI 32 +#define CLK_INFRA_MSDC0 33 +#define CLK_INFRA_MD2MD_CCIF_4 34 +#define CLK_INFRA_MSDC1 35 +#define CLK_INFRA_MSDC2 36 +#define CLK_INFRA_MD2MD_CCIF_5 37 +#define CLK_INFRA_GCPU 38 +#define CLK_INFRA_TRNG 39 +#define CLK_INFRA_AUXADC 40 +#define CLK_INFRA_CPUM 41 +#define CLK_INFRA_AP_C2K_CCIF_0 42 +#define CLK_INFRA_AP_C2K_CCIF_1 43 +#define CLK_INFRA_CLDMA 44 +#define CLK_INFRA_DISP_PWM 45 +#define CLK_INFRA_AP_DMA 46 +#define CLK_INFRA_DEVICE_APC 47 +#define CLK_INFRA_L2C_SRAM 48 +#define CLK_INFRA_CCIF_AP 49 +#define CLK_INFRA_AUDIO 50 +#define CLK_INFRA_CCIF_MD 51 +#define CLK_INFRA_DRAMC_F26M 52 +#define CLK_INFRA_I2C4 53 +#define CLK_INFRA_I2C_APPM 54 +#define CLK_INFRA_I2C_GPUPM 55 +#define CLK_INFRA_I2C2_IMM 56 +#define CLK_INFRA_I2C2_ARB 57 +#define CLK_INFRA_I2C3_IMM 58 +#define CLK_INFRA_I2C3_ARB 59 +#define CLK_INFRA_I2C5 60 +#define CLK_INFRA_SYS_CIRQ 61 +#define CLK_INFRA_SPI1 62 +#define CLK_INFRA_DRAMC_B_F26M 63 +#define CLK_INFRA_ANC_MD32 64 +#define CLK_INFRA_ANC_MD32_32K 65 +#define CLK_INFRA_DVFS_SPM1 66 +#define CLK_INFRA_AES_TOP0 67 +#define CLK_INFRA_AES_TOP1 68 +#define CLK_INFRA_SSUSB_BUS 69 +#define CLK_INFRA_SPI2 70 +#define CLK_INFRA_SPI3 71 +#define CLK_INFRA_SPI4 72 +#define CLK_INFRA_SPI5 73 +#define CLK_INFRA_IRTX 74 +#define CLK_INFRA_SSUSB_SYS 75 +#define CLK_INFRA_SSUSB_REF 76 +#define CLK_INFRA_AUDIO_26M 77 +#define CLK_INFRA_AUDIO_26M_PAD_TOP 78 +#define CLK_INFRA_MODEM_TEMP_SHARE 79 +#define CLK_INFRA_VAD_WRAP_SOC 80 +#define CLK_INFRA_DRAMC_CONF 81 +#define CLK_INFRA_DRAMC_B_CONF 82 +#define CLK_INFRA_MFG_VCG 83 +#define CLK_INFRA_13M 84 +#define CLK_INFRA_NR 85 + +/* IMG_SYS */ +#define CLK_IMG_FDVT 1 +#define CLK_IMG_DPE 2 +#define CLK_IMG_DIP 3 +#define CLK_IMG_LARB6 4 +#define CLK_IMG_NR 5 + +/* MM_SYS */ +#define CLK_MM_SMI_COMMON 1 +#define CLK_MM_SMI_LARB0 2 +#define CLK_MM_SMI_LARB5 3 +#define CLK_MM_CAM_MDP 4 +#define CLK_MM_MDP_RDMA0 5 +#define CLK_MM_MDP_RDMA1 6 +#define CLK_MM_MDP_RSZ0 7 +#define CLK_MM_MDP_RSZ1 8 +#define CLK_MM_MDP_RSZ2 9 +#define CLK_MM_MDP_TDSHP 10 +#define CLK_MM_MDP_COLOR 11 +#define CLK_MM_MDP_WDMA 12 +#define CLK_MM_MDP_WROT0 13 +#define CLK_MM_MDP_WROT1 14 +#define CLK_MM_FAKE_ENG 15 +#define CLK_MM_DISP_OVL0 16 +#define CLK_MM_DISP_OVL1 17 +#define CLK_MM_DISP_OVL0_2L 18 +#define CLK_MM_DISP_OVL1_2L 19 +#define CLK_MM_DISP_RDMA0 20 +#define CLK_MM_DISP_RDMA1 21 +#define CLK_MM_DISP_WDMA0 22 +#define CLK_MM_DISP_WDMA1 23 +#define CLK_MM_DISP_COLOR 24 +#define CLK_MM_DISP_CCORR 25 +#define CLK_MM_DISP_AAL 26 +#define CLK_MM_DISP_GAMMA 27 +#define CLK_MM_DISP_OD 28 +#define CLK_MM_DISP_DITHER 29 +#define CLK_MM_DISP_UFOE 30 +#define CLK_MM_DISP_DSC 31 +#define CLK_MM_DISP_SPLIT 32 +#define CLK_MM_DSI0_MM_CLOCK 33 +#define CLK_MM_DSI1_MM_CLOCK 34 +#define CLK_MM_DPI_MM_CLOCK 35 +#define CLK_MM_DPI_INTERFACE_CLOCK 36 +#define CLK_MM_LARB4_AXI_ASIF_MM_CLOCK 37 +#define CLK_MM_LARB4_AXI_ASIF_MJC_CLOCK 38 +#define CLK_MM_DISP_OVL0_MOUT_CLOCK 39 +#define CLK_MM_FAKE_ENG2 40 +#define CLK_MM_DSI0_INTERFACE_CLOCK 41 +#define CLK_MM_DSI1_INTERFACE_CLOCK 42 +#define CLK_MM_NR 43 + +/* VDEC_SYS */ +#define CLK_VDEC_CKEN_ENG 1 +#define CLK_VDEC_ACTIVE 2 +#define CLK_VDEC_CKEN 3 +#define CLK_VDEC_LARB1_CKEN 4 +#define CLK_VDEC_NR 5 + +/* VENC_SYS */ +#define CLK_VENC_0 1 +#define CLK_VENC_1 2 +#define CLK_VENC_2 3 +#define CLK_VENC_3 4 +#define CLK_VENC_NR 5 + +#endif /* _DT_BINDINGS_CLK_MT6797_H */ diff --git a/include/dt-bindings/clock/r7s72100-clock.h b/include/dt-bindings/clock/r7s72100-clock.h index ce09915c298fd8179a3289ebac8b2fe101015649..bc256d31099a6eafa64f73078e87ca99b10ec0fa 100644 --- a/include/dt-bindings/clock/r7s72100-clock.h +++ b/include/dt-bindings/clock/r7s72100-clock.h @@ -29,6 +29,9 @@ #define R7S72100_CLK_OSTM0 1 #define R7S72100_CLK_OSTM1 0 +/* MSTP6 */ +#define R7S72100_CLK_RTC 0 + /* MSTP7 */ #define R7S72100_CLK_ETHER 4 @@ -49,7 +52,9 @@ #define R7S72100_CLK_SPI4 3 /* MSTP12 */ -#define R7S72100_CLK_SDHI0 3 -#define R7S72100_CLK_SDHI1 2 +#define R7S72100_CLK_SDHI00 3 +#define R7S72100_CLK_SDHI01 2 +#define R7S72100_CLK_SDHI10 1 +#define R7S72100_CLK_SDHI11 0 #endif /* __DT_BINDINGS_CLOCK_R7S72100_H__ */ diff --git a/include/dt-bindings/clock/r8a73a4-clock.h b/include/dt-bindings/clock/r8a73a4-clock.h index dd11ecdf837e8e65da9e241ef55cdfe376722eb2..4b36681572570c15874e7027b5d987dd61ecaa3d 100644 --- a/include/dt-bindings/clock/r8a73a4-clock.h +++ b/include/dt-bindings/clock/r8a73a4-clock.h @@ -54,6 +54,7 @@ #define R8A73A4_CLK_IIC3 11 #define R8A73A4_CLK_IIC4 10 #define R8A73A4_CLK_IIC5 9 +#define R8A73A4_CLK_INTC_SYS 8 #define R8A73A4_CLK_IRQC 7 /* MSTP5 */ diff --git a/include/dt-bindings/clock/r8a7790-clock.h b/include/dt-bindings/clock/r8a7790-clock.h index fa5e8da809f2e802f3d440dca3d23f1ecb666e82..20641fa68e731ec0a49f3c44b24a8629fca30dd8 100644 --- a/include/dt-bindings/clock/r8a7790-clock.h +++ b/include/dt-bindings/clock/r8a7790-clock.h @@ -82,6 +82,7 @@ /* MSTP4 */ #define R8A7790_CLK_IRQC 7 +#define R8A7790_CLK_INTC_SYS 8 /* MSTP5 */ #define R8A7790_CLK_AUDIO_DMAC1 1 diff --git a/include/dt-bindings/clock/r8a7791-clock.h b/include/dt-bindings/clock/r8a7791-clock.h index ffa11379b3f0ade901ff7d293695ca5f593cb066..adc50dc31ab3423e008efd9959655d2150749403 100644 --- a/include/dt-bindings/clock/r8a7791-clock.h +++ b/include/dt-bindings/clock/r8a7791-clock.h @@ -72,6 +72,7 @@ /* MSTP4 */ #define R8A7791_CLK_IRQC 7 +#define R8A7791_CLK_INTC_SYS 8 /* MSTP5 */ #define R8A7791_CLK_AUDIO_DMAC1 1 diff --git a/include/dt-bindings/clock/r8a7792-clock.h b/include/dt-bindings/clock/r8a7792-clock.h index 9a8b392ceb000fc3eebbca97d659a877542c0625..5be90bc23bd7909788e658069f385f6ebec7fc7e 100644 --- a/include/dt-bindings/clock/r8a7792-clock.h +++ b/include/dt-bindings/clock/r8a7792-clock.h @@ -17,7 +17,6 @@ #define R8A7792_CLK_PLL3 3 #define R8A7792_CLK_LB 4 #define R8A7792_CLK_QSPI 5 -#define R8A7792_CLK_Z 6 /* MSTP0 */ #define R8A7792_CLK_MSIOF0 0 @@ -45,6 +44,7 @@ /* MSTP4 */ #define R8A7792_CLK_IRQC 7 +#define R8A7792_CLK_INTC_SYS 8 /* MSTP5 */ #define R8A7792_CLK_AUDIO_DMAC0 2 diff --git a/include/dt-bindings/clock/r8a7793-clock.h b/include/dt-bindings/clock/r8a7793-clock.h index efcbc594fe82e6837a73d575c215b826dcddca6d..7318d45d4e7e9888c41aa1fb66def087b4cb6ff4 100644 --- a/include/dt-bindings/clock/r8a7793-clock.h +++ b/include/dt-bindings/clock/r8a7793-clock.h @@ -77,10 +77,11 @@ /* MSTP4 */ #define R8A7793_CLK_IRQC 7 +#define R8A7793_CLK_INTC_SYS 8 /* MSTP5 */ -#define R8A7793_CLK_AUDIO_DMAC1 1 -#define R8A7793_CLK_AUDIO_DMAC0 2 +#define R8A7793_CLK_AUDIO_DMAC1 1 +#define R8A7793_CLK_AUDIO_DMAC0 2 #define R8A7793_CLK_ADSP_MOD 6 #define R8A7793_CLK_THERMAL 22 #define R8A7793_CLK_PWM 23 diff --git a/include/dt-bindings/clock/r8a7794-clock.h b/include/dt-bindings/clock/r8a7794-clock.h index 88e64846cf370ff4a32c8423b5d00833c8cb5a81..93e99c3ffc8dafebef94d0644cb15532c86ab1b4 100644 --- a/include/dt-bindings/clock/r8a7794-clock.h +++ b/include/dt-bindings/clock/r8a7794-clock.h @@ -64,6 +64,7 @@ /* MSTP4 */ #define R8A7794_CLK_IRQC 7 +#define R8A7794_CLK_INTC_SYS 8 /* MSTP5 */ #define R8A7794_CLK_AUDIO_DMAC0 2 @@ -81,6 +82,7 @@ #define R8A7794_CLK_SCIF2 19 #define R8A7794_CLK_SCIF1 20 #define R8A7794_CLK_SCIF0 21 +#define R8A7794_CLK_DU1 23 #define R8A7794_CLK_DU0 24 /* MSTP8 */ diff --git a/include/dt-bindings/clock/r8a7795-cpg-mssr.h b/include/dt-bindings/clock/r8a7795-cpg-mssr.h index e864aae0a2561c4bc3fac807c0edfc9d25b0a8dd..f047eaf261f34ac783b2187997894daffe552572 100644 --- a/include/dt-bindings/clock/r8a7795-cpg-mssr.h +++ b/include/dt-bindings/clock/r8a7795-cpg-mssr.h @@ -60,4 +60,11 @@ #define R8A7795_CLK_R 45 #define R8A7795_CLK_OSC 46 +/* r8a7795 ES2.0 CPG Core Clocks */ +#define R8A7795_CLK_S0D2 47 +#define R8A7795_CLK_S0D3 48 +#define R8A7795_CLK_S0D6 49 +#define R8A7795_CLK_S0D8 50 +#define R8A7795_CLK_S0D12 51 + #endif /* __DT_BINDINGS_CLOCK_R8A7795_CPG_MSSR_H__ */ diff --git a/include/dt-bindings/clock/rk1108-cru.h b/include/dt-bindings/clock/rk1108-cru.h deleted file mode 100644 index 9350a5527a36c9dd05537b9f04792ebfbdb100ca..0000000000000000000000000000000000000000 --- a/include/dt-bindings/clock/rk1108-cru.h +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (c) 2016 Rockchip Electronics Co. Ltd. - * Author: Shawn Lin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H -#define _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H - -/* pll id */ -#define PLL_APLL 0 -#define PLL_DPLL 1 -#define PLL_GPLL 2 -#define ARMCLK 3 - -/* sclk gates (special clocks) */ -#define SCLK_SPI0 65 -#define SCLK_NANDC 67 -#define SCLK_SDMMC 68 -#define SCLK_SDIO 69 -#define SCLK_EMMC 71 -#define SCLK_UART0 72 -#define SCLK_UART1 73 -#define SCLK_UART2 74 -#define SCLK_I2S0 75 -#define SCLK_I2S1 76 -#define SCLK_I2S2 77 -#define SCLK_TIMER0 78 -#define SCLK_TIMER1 79 -#define SCLK_SFC 80 -#define SCLK_SDMMC_DRV 81 -#define SCLK_SDIO_DRV 82 -#define SCLK_EMMC_DRV 83 -#define SCLK_SDMMC_SAMPLE 84 -#define SCLK_SDIO_SAMPLE 85 -#define SCLK_EMMC_SAMPLE 86 - -/* aclk gates */ -#define ACLK_DMAC 192 -#define ACLK_PRE 193 -#define ACLK_CORE 194 -#define ACLK_ENMCORE 195 - -/* pclk gates */ -#define PCLK_GPIO1 256 -#define PCLK_GPIO2 257 -#define PCLK_GPIO3 258 -#define PCLK_GRF 259 -#define PCLK_I2C1 260 -#define PCLK_I2C2 261 -#define PCLK_I2C3 262 -#define PCLK_SPI 263 -#define PCLK_SFC 264 -#define PCLK_UART0 265 -#define PCLK_UART1 266 -#define PCLK_UART2 267 -#define PCLK_TSADC 268 -#define PCLK_PWM 269 -#define PCLK_TIMER 270 -#define PCLK_PERI 271 - -/* hclk gates */ -#define HCLK_I2S0_8CH 320 -#define HCLK_I2S1_8CH 321 -#define HCLK_I2S2_2CH 322 -#define HCLK_NANDC 323 -#define HCLK_SDMMC 324 -#define HCLK_SDIO 325 -#define HCLK_EMMC 326 -#define HCLK_PERI 327 -#define HCLK_SFC 328 - -#define CLK_NR_CLKS (HCLK_SFC + 1) - -/* reset id */ -#define SRST_CORE_PO_AD 0 -#define SRST_CORE_AD 1 -#define SRST_L2_AD 2 -#define SRST_CPU_NIU_AD 3 -#define SRST_CORE_PO 4 -#define SRST_CORE 5 -#define SRST_L2 6 -#define SRST_CORE_DBG 8 -#define PRST_DBG 9 -#define RST_DAP 10 -#define PRST_DBG_NIU 11 -#define ARST_STRC_SYS_AD 15 - -#define SRST_DDRPHY_CLKDIV 16 -#define SRST_DDRPHY 17 -#define PRST_DDRPHY 18 -#define PRST_HDMIPHY 19 -#define PRST_VDACPHY 20 -#define PRST_VADCPHY 21 -#define PRST_MIPI_CSI_PHY 22 -#define PRST_MIPI_DSI_PHY 23 -#define PRST_ACODEC 24 -#define ARST_BUS_NIU 25 -#define PRST_TOP_NIU 26 -#define ARST_INTMEM 27 -#define HRST_ROM 28 -#define ARST_DMAC 29 -#define SRST_MSCH_NIU 30 -#define PRST_MSCH_NIU 31 - -#define PRST_DDRUPCTL 32 -#define NRST_DDRUPCTL 33 -#define PRST_DDRMON 34 -#define HRST_I2S0_8CH 35 -#define MRST_I2S0_8CH 36 -#define HRST_I2S1_2CH 37 -#define MRST_IS21_2CH 38 -#define HRST_I2S2_2CH 39 -#define MRST_I2S2_2CH 40 -#define HRST_CRYPTO 41 -#define SRST_CRYPTO 42 -#define PRST_SPI 43 -#define SRST_SPI 44 -#define PRST_UART0 45 -#define PRST_UART1 46 -#define PRST_UART2 47 - -#define SRST_UART0 48 -#define SRST_UART1 49 -#define SRST_UART2 50 -#define PRST_I2C1 51 -#define PRST_I2C2 52 -#define PRST_I2C3 53 -#define SRST_I2C1 54 -#define SRST_I2C2 55 -#define SRST_I2C3 56 -#define PRST_PWM1 58 -#define SRST_PWM1 60 -#define PRST_WDT 61 -#define PRST_GPIO1 62 -#define PRST_GPIO2 63 - -#define PRST_GPIO3 64 -#define PRST_GRF 65 -#define PRST_EFUSE 66 -#define PRST_EFUSE512 67 -#define PRST_TIMER0 68 -#define SRST_TIMER0 69 -#define SRST_TIMER1 70 -#define PRST_TSADC 71 -#define SRST_TSADC 72 -#define PRST_SARADC 73 -#define SRST_SARADC 74 -#define HRST_SYSBUS 75 -#define PRST_USBGRF 76 - -#define ARST_PERIPH_NIU 80 -#define HRST_PERIPH_NIU 81 -#define PRST_PERIPH_NIU 82 -#define HRST_PERIPH 83 -#define HRST_SDMMC 84 -#define HRST_SDIO 85 -#define HRST_EMMC 86 -#define HRST_NANDC 87 -#define NRST_NANDC 88 -#define HRST_SFC 89 -#define SRST_SFC 90 -#define ARST_GMAC 91 -#define HRST_OTG 92 -#define SRST_OTG 93 -#define SRST_OTG_ADP 94 -#define HRST_HOST0 95 - -#define HRST_HOST0_AUX 96 -#define HRST_HOST0_ARB 97 -#define SRST_HOST0_EHCIPHY 98 -#define SRST_HOST0_UTMI 99 -#define SRST_USBPOR 100 -#define SRST_UTMI0 101 -#define SRST_UTMI1 102 - -#define ARST_VIO0_NIU 102 -#define ARST_VIO1_NIU 103 -#define HRST_VIO_NIU 104 -#define PRST_VIO_NIU 105 -#define ARST_VOP 106 -#define HRST_VOP 107 -#define DRST_VOP 108 -#define ARST_IEP 109 -#define HRST_IEP 110 -#define ARST_RGA 111 -#define HRST_RGA 112 -#define SRST_RGA 113 -#define PRST_CVBS 114 -#define PRST_HDMI 115 -#define SRST_HDMI 116 -#define PRST_MIPI_DSI 117 - -#define ARST_ISP_NIU 118 -#define HRST_ISP_NIU 119 -#define HRST_ISP 120 -#define SRST_ISP 121 -#define ARST_VIP0 122 -#define HRST_VIP0 123 -#define PRST_VIP0 124 -#define ARST_VIP1 125 -#define HRST_VIP1 126 -#define PRST_VIP1 127 -#define ARST_VIP2 128 -#define HRST_VIP2 129 -#define PRST_VIP2 120 -#define ARST_VIP3 121 -#define HRST_VIP3 122 -#define PRST_VIP4 123 - -#define PRST_CIF1TO4 124 -#define SRST_CVBS_CLK 125 -#define HRST_CVBS 126 - -#define ARST_VPU_NIU 140 -#define HRST_VPU_NIU 141 -#define ARST_VPU 142 -#define HRST_VPU 143 -#define ARST_RKVDEC_NIU 144 -#define HRST_RKVDEC_NIU 145 -#define ARST_RKVDEC 146 -#define HRST_RKVDEC 147 -#define SRST_RKVDEC_CABAC 148 -#define SRST_RKVDEC_CORE 149 -#define ARST_RKVENC_NIU 150 -#define HRST_RKVENC_NIU 151 -#define ARST_RKVENC 152 -#define HRST_RKVENC 153 -#define SRST_RKVENC_CORE 154 - -#define SRST_DSP_CORE 156 -#define SRST_DSP_SYS 157 -#define SRST_DSP_GLOBAL 158 -#define SRST_DSP_OECM 159 -#define PRST_DSP_IOP_NIU 160 -#define ARST_DSP_EPP_NIU 161 -#define ARST_DSP_EDP_NIU 162 -#define PRST_DSP_DBG_NIU 163 -#define PRST_DSP_CFG_NIU 164 -#define PRST_DSP_GRF 165 -#define PRST_DSP_MAILBOX 166 -#define PRST_DSP_INTC 167 -#define PRST_DSP_PFM_MON 169 -#define SRST_DSP_PFM_MON 170 -#define ARST_DSP_EDAP_NIU 171 - -#define SRST_PMU 172 -#define SRST_PMU_I2C0 173 -#define PRST_PMU_I2C0 174 -#define PRST_PMU_GPIO0 175 -#define PRST_PMU_INTMEM 176 -#define PRST_PMU_PWM0 177 -#define SRST_PMU_PWM0 178 -#define PRST_PMU_GRF 179 -#define SRST_PMU_NIU 180 -#define SRST_PMU_PVTM 181 -#define ARST_DSP_EDP_PERF 184 -#define ARST_DSP_EPP_PERF 185 - -#endif /* _DT_BINDINGS_CLK_ROCKCHIP_RK1108_H */ diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h index ee702c8e4c091532480561cd5759a32f5fad8c01..d2b26a4b43ebde2eae06c6fd596895e5147adc76 100644 --- a/include/dt-bindings/clock/rk3328-cru.h +++ b/include/dt-bindings/clock/rk3328-cru.h @@ -97,6 +97,7 @@ #define SCLK_MAC2IO_SRC 99 #define SCLK_MAC2IO 100 #define SCLK_MAC2PHY 101 +#define SCLK_MAC2IO_EXT 102 /* dclk gates */ #define DCLK_LCDC 120 diff --git a/include/dt-bindings/clock/rk3368-cru.h b/include/dt-bindings/clock/rk3368-cru.h index 9c5dd9ba2f6cd9fa06c1870b7b0704a7eebff004..aeb83e581a11ec5af82df30cdcf167c3a45b964b 100644 --- a/include/dt-bindings/clock/rk3368-cru.h +++ b/include/dt-bindings/clock/rk3368-cru.h @@ -44,13 +44,12 @@ #define SCLK_I2S_8CH 82 #define SCLK_SPDIF_8CH 83 #define SCLK_I2S_2CH 84 -#define SCLK_TIMER0 85 -#define SCLK_TIMER1 86 -#define SCLK_TIMER2 87 -#define SCLK_TIMER3 88 -#define SCLK_TIMER4 89 -#define SCLK_TIMER5 90 -#define SCLK_TIMER6 91 +#define SCLK_TIMER00 85 +#define SCLK_TIMER01 86 +#define SCLK_TIMER02 87 +#define SCLK_TIMER03 88 +#define SCLK_TIMER04 89 +#define SCLK_TIMER05 90 #define SCLK_OTGPHY0 93 #define SCLK_OTG_ADP 96 #define SCLK_HSICPHY480M 97 @@ -82,6 +81,12 @@ #define SCLK_SFC 126 #define SCLK_MAC 127 #define SCLK_MACREF_OUT 128 +#define SCLK_TIMER10 133 +#define SCLK_TIMER11 134 +#define SCLK_TIMER12 135 +#define SCLK_TIMER13 136 +#define SCLK_TIMER14 137 +#define SCLK_TIMER15 138 #define DCLK_VOP 190 #define MCLK_CRYPTO 191 diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h new file mode 100644 index 0000000000000000000000000000000000000000..ae26f81059142867a009197e1c37c8e00d3dbd21 --- /dev/null +++ b/include/dt-bindings/clock/rv1108-cru.h @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2016 Rockchip Electronics Co. Ltd. + * Author: Shawn Lin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H +#define _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H + +/* pll id */ +#define PLL_APLL 0 +#define PLL_DPLL 1 +#define PLL_GPLL 2 +#define ARMCLK 3 + +/* sclk gates (special clocks) */ +#define SCLK_SPI0 65 +#define SCLK_NANDC 67 +#define SCLK_SDMMC 68 +#define SCLK_SDIO 69 +#define SCLK_EMMC 71 +#define SCLK_UART0 72 +#define SCLK_UART1 73 +#define SCLK_UART2 74 +#define SCLK_I2S0 75 +#define SCLK_I2S1 76 +#define SCLK_I2S2 77 +#define SCLK_TIMER0 78 +#define SCLK_TIMER1 79 +#define SCLK_SFC 80 +#define SCLK_SDMMC_DRV 81 +#define SCLK_SDIO_DRV 82 +#define SCLK_EMMC_DRV 83 +#define SCLK_SDMMC_SAMPLE 84 +#define SCLK_SDIO_SAMPLE 85 +#define SCLK_EMMC_SAMPLE 86 + +/* aclk gates */ +#define ACLK_DMAC 192 +#define ACLK_PRE 193 +#define ACLK_CORE 194 +#define ACLK_ENMCORE 195 + +/* pclk gates */ +#define PCLK_GPIO1 256 +#define PCLK_GPIO2 257 +#define PCLK_GPIO3 258 +#define PCLK_GRF 259 +#define PCLK_I2C1 260 +#define PCLK_I2C2 261 +#define PCLK_I2C3 262 +#define PCLK_SPI 263 +#define PCLK_SFC 264 +#define PCLK_UART0 265 +#define PCLK_UART1 266 +#define PCLK_UART2 267 +#define PCLK_TSADC 268 +#define PCLK_PWM 269 +#define PCLK_TIMER 270 +#define PCLK_PERI 271 + +/* hclk gates */ +#define HCLK_I2S0_8CH 320 +#define HCLK_I2S1_8CH 321 +#define HCLK_I2S2_2CH 322 +#define HCLK_NANDC 323 +#define HCLK_SDMMC 324 +#define HCLK_SDIO 325 +#define HCLK_EMMC 326 +#define HCLK_PERI 327 +#define HCLK_SFC 328 + +#define CLK_NR_CLKS (HCLK_SFC + 1) + +/* reset id */ +#define SRST_CORE_PO_AD 0 +#define SRST_CORE_AD 1 +#define SRST_L2_AD 2 +#define SRST_CPU_NIU_AD 3 +#define SRST_CORE_PO 4 +#define SRST_CORE 5 +#define SRST_L2 6 +#define SRST_CORE_DBG 8 +#define PRST_DBG 9 +#define RST_DAP 10 +#define PRST_DBG_NIU 11 +#define ARST_STRC_SYS_AD 15 + +#define SRST_DDRPHY_CLKDIV 16 +#define SRST_DDRPHY 17 +#define PRST_DDRPHY 18 +#define PRST_HDMIPHY 19 +#define PRST_VDACPHY 20 +#define PRST_VADCPHY 21 +#define PRST_MIPI_CSI_PHY 22 +#define PRST_MIPI_DSI_PHY 23 +#define PRST_ACODEC 24 +#define ARST_BUS_NIU 25 +#define PRST_TOP_NIU 26 +#define ARST_INTMEM 27 +#define HRST_ROM 28 +#define ARST_DMAC 29 +#define SRST_MSCH_NIU 30 +#define PRST_MSCH_NIU 31 + +#define PRST_DDRUPCTL 32 +#define NRST_DDRUPCTL 33 +#define PRST_DDRMON 34 +#define HRST_I2S0_8CH 35 +#define MRST_I2S0_8CH 36 +#define HRST_I2S1_2CH 37 +#define MRST_IS21_2CH 38 +#define HRST_I2S2_2CH 39 +#define MRST_I2S2_2CH 40 +#define HRST_CRYPTO 41 +#define SRST_CRYPTO 42 +#define PRST_SPI 43 +#define SRST_SPI 44 +#define PRST_UART0 45 +#define PRST_UART1 46 +#define PRST_UART2 47 + +#define SRST_UART0 48 +#define SRST_UART1 49 +#define SRST_UART2 50 +#define PRST_I2C1 51 +#define PRST_I2C2 52 +#define PRST_I2C3 53 +#define SRST_I2C1 54 +#define SRST_I2C2 55 +#define SRST_I2C3 56 +#define PRST_PWM1 58 +#define SRST_PWM1 60 +#define PRST_WDT 61 +#define PRST_GPIO1 62 +#define PRST_GPIO2 63 + +#define PRST_GPIO3 64 +#define PRST_GRF 65 +#define PRST_EFUSE 66 +#define PRST_EFUSE512 67 +#define PRST_TIMER0 68 +#define SRST_TIMER0 69 +#define SRST_TIMER1 70 +#define PRST_TSADC 71 +#define SRST_TSADC 72 +#define PRST_SARADC 73 +#define SRST_SARADC 74 +#define HRST_SYSBUS 75 +#define PRST_USBGRF 76 + +#define ARST_PERIPH_NIU 80 +#define HRST_PERIPH_NIU 81 +#define PRST_PERIPH_NIU 82 +#define HRST_PERIPH 83 +#define HRST_SDMMC 84 +#define HRST_SDIO 85 +#define HRST_EMMC 86 +#define HRST_NANDC 87 +#define NRST_NANDC 88 +#define HRST_SFC 89 +#define SRST_SFC 90 +#define ARST_GMAC 91 +#define HRST_OTG 92 +#define SRST_OTG 93 +#define SRST_OTG_ADP 94 +#define HRST_HOST0 95 + +#define HRST_HOST0_AUX 96 +#define HRST_HOST0_ARB 97 +#define SRST_HOST0_EHCIPHY 98 +#define SRST_HOST0_UTMI 99 +#define SRST_USBPOR 100 +#define SRST_UTMI0 101 +#define SRST_UTMI1 102 + +#define ARST_VIO0_NIU 102 +#define ARST_VIO1_NIU 103 +#define HRST_VIO_NIU 104 +#define PRST_VIO_NIU 105 +#define ARST_VOP 106 +#define HRST_VOP 107 +#define DRST_VOP 108 +#define ARST_IEP 109 +#define HRST_IEP 110 +#define ARST_RGA 111 +#define HRST_RGA 112 +#define SRST_RGA 113 +#define PRST_CVBS 114 +#define PRST_HDMI 115 +#define SRST_HDMI 116 +#define PRST_MIPI_DSI 117 + +#define ARST_ISP_NIU 118 +#define HRST_ISP_NIU 119 +#define HRST_ISP 120 +#define SRST_ISP 121 +#define ARST_VIP0 122 +#define HRST_VIP0 123 +#define PRST_VIP0 124 +#define ARST_VIP1 125 +#define HRST_VIP1 126 +#define PRST_VIP1 127 +#define ARST_VIP2 128 +#define HRST_VIP2 129 +#define PRST_VIP2 120 +#define ARST_VIP3 121 +#define HRST_VIP3 122 +#define PRST_VIP4 123 + +#define PRST_CIF1TO4 124 +#define SRST_CVBS_CLK 125 +#define HRST_CVBS 126 + +#define ARST_VPU_NIU 140 +#define HRST_VPU_NIU 141 +#define ARST_VPU 142 +#define HRST_VPU 143 +#define ARST_RKVDEC_NIU 144 +#define HRST_RKVDEC_NIU 145 +#define ARST_RKVDEC 146 +#define HRST_RKVDEC 147 +#define SRST_RKVDEC_CABAC 148 +#define SRST_RKVDEC_CORE 149 +#define ARST_RKVENC_NIU 150 +#define HRST_RKVENC_NIU 151 +#define ARST_RKVENC 152 +#define HRST_RKVENC 153 +#define SRST_RKVENC_CORE 154 + +#define SRST_DSP_CORE 156 +#define SRST_DSP_SYS 157 +#define SRST_DSP_GLOBAL 158 +#define SRST_DSP_OECM 159 +#define PRST_DSP_IOP_NIU 160 +#define ARST_DSP_EPP_NIU 161 +#define ARST_DSP_EDP_NIU 162 +#define PRST_DSP_DBG_NIU 163 +#define PRST_DSP_CFG_NIU 164 +#define PRST_DSP_GRF 165 +#define PRST_DSP_MAILBOX 166 +#define PRST_DSP_INTC 167 +#define PRST_DSP_PFM_MON 169 +#define SRST_DSP_PFM_MON 170 +#define ARST_DSP_EDAP_NIU 171 + +#define SRST_PMU 172 +#define SRST_PMU_I2C0 173 +#define PRST_PMU_I2C0 174 +#define PRST_PMU_GPIO0 175 +#define PRST_PMU_INTMEM 176 +#define PRST_PMU_PWM0 177 +#define SRST_PMU_PWM0 178 +#define PRST_PMU_GRF 179 +#define SRST_PMU_NIU 180 +#define SRST_PMU_PVTM 181 +#define ARST_DSP_EDP_PERF 184 +#define ARST_DSP_EPP_PERF 185 + +#endif /* _DT_BINDINGS_CLK_ROCKCHIP_RV1108_H */ diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h index efb7ba2bd51510533006efe7f0c9f9dcb016e24c..c2afc41d69644af3d9f920a56341d62003630883 100644 --- a/include/dt-bindings/clock/sun8i-h3-ccu.h +++ b/include/dt-bindings/clock/sun8i-h3-ccu.h @@ -91,7 +91,7 @@ #define CLK_BUS_UART1 63 #define CLK_BUS_UART2 64 #define CLK_BUS_UART3 65 -#define CLK_BUS_SCR 66 +#define CLK_BUS_SCR0 66 #define CLK_BUS_EPHY 67 #define CLK_BUS_DBG 68 @@ -142,4 +142,7 @@ #define CLK_GPU 114 +/* New clocks imported in H5 */ +#define CLK_BUS_SCR1 115 + #endif /* _DT_BINDINGS_CLK_SUN8I_H3_H_ */ diff --git a/include/dt-bindings/clock/sun8i-r-ccu.h b/include/dt-bindings/clock/sun8i-r-ccu.h new file mode 100644 index 0000000000000000000000000000000000000000..779d20aa0d05cffd8d565f5d570be52f8b94cee6 --- /dev/null +++ b/include/dt-bindings/clock/sun8i-r-ccu.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016 Icenowy Zheng + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_CLK_SUN8I_R_CCU_H_ +#define _DT_BINDINGS_CLK_SUN8I_R_CCU_H_ + +#define CLK_AR100 0 + +#define CLK_APB0_PIO 3 +#define CLK_APB0_IR 4 +#define CLK_APB0_TIMER 5 +#define CLK_APB0_RSB 6 +#define CLK_APB0_UART 7 +/* 8 is reserved for CLK_APB0_W1 on A31 */ +#define CLK_APB0_I2C 9 +#define CLK_APB0_TWD 10 + +#define CLK_IR 11 + +#endif /* _DT_BINDINGS_CLK_SUN8I_R_CCU_H_ */ diff --git a/include/dt-bindings/clock/tegra114-car.h b/include/dt-bindings/clock/tegra114-car.h index 534c03f8ad72bc23675e0a4fe7761b2257f1b741..ed5ca218c8573e735ccb3fc96ebd9c0ad43dafc5 100644 --- a/include/dt-bindings/clock/tegra114-car.h +++ b/include/dt-bindings/clock/tegra114-car.h @@ -156,7 +156,7 @@ /* 133 */ /* 134 */ /* 135 */ -/* 136 */ +#define TEGRA114_CLK_CEC 136 /* 137 */ /* 138 */ /* 139 */ diff --git a/include/dt-bindings/clock/tegra124-car-common.h b/include/dt-bindings/clock/tegra124-car-common.h index a2156090563f357ae6076a08639f14cb79469671..9352c7e2ce0ba6349b1d298b8356e2c04ba742fe 100644 --- a/include/dt-bindings/clock/tegra124-car-common.h +++ b/include/dt-bindings/clock/tegra124-car-common.h @@ -156,7 +156,7 @@ /* 133 */ /* 134 */ /* 135 */ -/* 136 */ +#define TEGRA124_CLK_CEC 136 /* 137 */ /* 138 */ /* 139 */ diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h index 35288b20f2c9cffcaebdf5075946b25dce6fad4b..46689cd3750bf11aed2115f4d34df9f1642ce265 100644 --- a/include/dt-bindings/clock/tegra210-car.h +++ b/include/dt-bindings/clock/tegra210-car.h @@ -39,7 +39,7 @@ /* 20 (register bit affects vi and vi_sensor) */ /* 21 */ #define TEGRA210_CLK_USBD 22 -#define TEGRA210_CLK_ISP 23 +#define TEGRA210_CLK_ISPA 23 /* 24 */ /* 25 */ #define TEGRA210_CLK_DISP2 26 @@ -156,7 +156,7 @@ /* 133 */ /* 134 */ /* 135 */ -/* 136 */ +#define TEGRA210_CLK_CEC 136 /* 137 */ /* 138 */ /* 139 */ @@ -173,7 +173,7 @@ #define TEGRA210_CLK_ENTROPY 149 /* 150 */ /* 151 */ -/* 152 */ +#define TEGRA210_CLK_DP2 152 /* 153 */ /* 154 */ /* 155 (bit affects dfll_ref and dfll_soc) */ @@ -210,7 +210,7 @@ #define TEGRA210_CLK_DBGAPB 185 /* 186 */ #define TEGRA210_CLK_PLL_P_OUT_ADSP 187 -/* 188 */ +/* 188 ((bit affects pll_a_out_adsp and pll_a_out0_out_adsp)*/ #define TEGRA210_CLK_PLL_G_REF 189 /* 190 */ /* 191 */ @@ -222,7 +222,7 @@ /* 196 */ #define TEGRA210_CLK_DMIC3 197 #define TEGRA210_CLK_APE 198 -/* 199 */ +#define TEGRA210_CLK_ADSP 199 /* 200 */ /* 201 */ #define TEGRA210_CLK_MAUD 202 @@ -241,10 +241,10 @@ /* 215 */ /* 216 */ /* 217 */ -/* 218 */ +#define TEGRA210_CLK_ADSP_NEON 218 #define TEGRA210_CLK_NVENC 219 -/* 220 */ -/* 221 */ +#define TEGRA210_CLK_IQC2 220 +#define TEGRA210_CLK_IQC1 221 #define TEGRA210_CLK_SOR_SAFE 222 #define TEGRA210_CLK_PLL_P_OUT_CPU 223 @@ -349,9 +349,9 @@ #define TEGRA210_CLK_PLL_RE_OUT1 319 /* 320 */ /* 321 */ -/* 322 */ -/* 323 */ -/* 324 */ +#define TEGRA210_CLK_ISP 322 +#define TEGRA210_CLK_PLL_A_OUT_ADSP 323 +#define TEGRA210_CLK_PLL_A_OUT0_OUT_ADSP 324 /* 325 */ /* 326 */ /* 327 */ @@ -396,6 +396,15 @@ #define TEGRA210_CLK_PLL_C_UD 364 #define TEGRA210_CLK_SCLK_MUX 365 -#define TEGRA210_CLK_CLK_MAX 366 +#define TEGRA210_CLK_ACLK 370 + +#define TEGRA210_CLK_DMIC1_SYNC_CLK 388 +#define TEGRA210_CLK_DMIC1_SYNC_CLK_MUX 389 +#define TEGRA210_CLK_DMIC2_SYNC_CLK 390 +#define TEGRA210_CLK_DMIC2_SYNC_CLK_MUX 391 +#define TEGRA210_CLK_DMIC3_SYNC_CLK 392 +#define TEGRA210_CLK_DMIC3_SYNC_CLK_MUX 393 + +#define TEGRA210_CLK_CLK_MAX 394 #endif /* _DT_BINDINGS_CLOCK_TEGRA210_CAR_H */ diff --git a/include/dt-bindings/clock/tegra30-car.h b/include/dt-bindings/clock/tegra30-car.h index 889e49ba0aa3de3f3b83ad27b1d0f4b12521a05a..7213354b9652c77f99fbd41a9a4c26d0bfb4662f 100644 --- a/include/dt-bindings/clock/tegra30-car.h +++ b/include/dt-bindings/clock/tegra30-car.h @@ -156,7 +156,7 @@ /* 133 */ /* 134 */ /* 135 */ -/* 136 */ +#define TEGRA30_CLK_CEC 136 /* 137 */ /* 138 */ /* 139 */ diff --git a/include/dt-bindings/genpd/k2g.h b/include/dt-bindings/genpd/k2g.h new file mode 100644 index 0000000000000000000000000000000000000000..1f31f17e19ebca5e528ad9498047ec9bd973d3c4 --- /dev/null +++ b/include/dt-bindings/genpd/k2g.h @@ -0,0 +1,90 @@ +/* + * TI K2G SoC Device definitions + * + * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _DT_BINDINGS_GENPD_K2G_H +#define _DT_BINDINGS_GENPD_K2G_H + +/* Documented in http://processors.wiki.ti.com/index.php/TISCI */ + +#define K2G_DEV_PMMC0 0x0000 +#define K2G_DEV_MLB0 0x0001 +#define K2G_DEV_DSS0 0x0002 +#define K2G_DEV_MCBSP0 0x0003 +#define K2G_DEV_MCASP0 0x0004 +#define K2G_DEV_MCASP1 0x0005 +#define K2G_DEV_MCASP2 0x0006 +#define K2G_DEV_DCAN0 0x0008 +#define K2G_DEV_DCAN1 0x0009 +#define K2G_DEV_EMIF0 0x000a +#define K2G_DEV_MMCHS0 0x000b +#define K2G_DEV_MMCHS1 0x000c +#define K2G_DEV_GPMC0 0x000d +#define K2G_DEV_ELM0 0x000e +#define K2G_DEV_SPI0 0x0010 +#define K2G_DEV_SPI1 0x0011 +#define K2G_DEV_SPI2 0x0012 +#define K2G_DEV_SPI3 0x0013 +#define K2G_DEV_ICSS0 0x0014 +#define K2G_DEV_ICSS1 0x0015 +#define K2G_DEV_USB0 0x0016 +#define K2G_DEV_USB1 0x0017 +#define K2G_DEV_NSS0 0x0018 +#define K2G_DEV_PCIE0 0x0019 +#define K2G_DEV_GPIO0 0x001b +#define K2G_DEV_GPIO1 0x001c +#define K2G_DEV_TIMER64_0 0x001d +#define K2G_DEV_TIMER64_1 0x001e +#define K2G_DEV_TIMER64_2 0x001f +#define K2G_DEV_TIMER64_3 0x0020 +#define K2G_DEV_TIMER64_4 0x0021 +#define K2G_DEV_TIMER64_5 0x0022 +#define K2G_DEV_TIMER64_6 0x0023 +#define K2G_DEV_MSGMGR0 0x0025 +#define K2G_DEV_BOOTCFG0 0x0026 +#define K2G_DEV_ARM_BOOTROM0 0x0027 +#define K2G_DEV_DSP_BOOTROM0 0x0029 +#define K2G_DEV_DEBUGSS0 0x002b +#define K2G_DEV_UART0 0x002c +#define K2G_DEV_UART1 0x002d +#define K2G_DEV_UART2 0x002e +#define K2G_DEV_EHRPWM0 0x002f +#define K2G_DEV_EHRPWM1 0x0030 +#define K2G_DEV_EHRPWM2 0x0031 +#define K2G_DEV_EHRPWM3 0x0032 +#define K2G_DEV_EHRPWM4 0x0033 +#define K2G_DEV_EHRPWM5 0x0034 +#define K2G_DEV_EQEP0 0x0035 +#define K2G_DEV_EQEP1 0x0036 +#define K2G_DEV_EQEP2 0x0037 +#define K2G_DEV_ECAP0 0x0038 +#define K2G_DEV_ECAP1 0x0039 +#define K2G_DEV_I2C0 0x003a +#define K2G_DEV_I2C1 0x003b +#define K2G_DEV_I2C2 0x003c +#define K2G_DEV_EDMA0 0x003f +#define K2G_DEV_SEMAPHORE0 0x0040 +#define K2G_DEV_INTC0 0x0041 +#define K2G_DEV_GIC0 0x0042 +#define K2G_DEV_QSPI0 0x0043 +#define K2G_DEV_ARM_64B_COUNTER0 0x0044 +#define K2G_DEV_TETRIS0 0x0045 +#define K2G_DEV_CGEM0 0x0046 +#define K2G_DEV_MSMC0 0x0047 +#define K2G_DEV_CBASS0 0x0049 +#define K2G_DEV_BOARD0 0x004c +#define K2G_DEV_EDMA1 0x004f + +#endif diff --git a/include/dt-bindings/gpio/gpio.h b/include/dt-bindings/gpio/gpio.h index c673d2c87c604ae86fc6bfd523a580308752148a..b4f54da694eb4a65ecff5a6a34c3ed2db1c088db 100644 --- a/include/dt-bindings/gpio/gpio.h +++ b/include/dt-bindings/gpio/gpio.h @@ -17,11 +17,15 @@ #define GPIO_PUSH_PULL 0 #define GPIO_SINGLE_ENDED 2 +/* Bit 2 express Open drain or open source */ +#define GPIO_LINE_OPEN_SOURCE 0 +#define GPIO_LINE_OPEN_DRAIN 4 + /* - * Open Drain/Collector is the combination of single-ended active low, - * Open Source/Emitter is the combination of single-ended active high. + * Open Drain/Collector is the combination of single-ended open drain interface. + * Open Source/Emitter is the combination of single-ended open source interface. */ -#define GPIO_OPEN_DRAIN (GPIO_SINGLE_ENDED | GPIO_ACTIVE_LOW) -#define GPIO_OPEN_SOURCE (GPIO_SINGLE_ENDED | GPIO_ACTIVE_HIGH) +#define GPIO_OPEN_DRAIN (GPIO_SINGLE_ENDED | GPIO_LINE_OPEN_DRAIN) +#define GPIO_OPEN_SOURCE (GPIO_SINGLE_ENDED | GPIO_LINE_OPEN_SOURCE) #endif diff --git a/include/dt-bindings/mfd/stm32f7-rcc.h b/include/dt-bindings/mfd/stm32f7-rcc.h new file mode 100644 index 0000000000000000000000000000000000000000..e36cc69959c76bc3b75f3aac20176d1cbdbd3e0a --- /dev/null +++ b/include/dt-bindings/mfd/stm32f7-rcc.h @@ -0,0 +1,112 @@ +/* + * This header provides constants for the STM32F7 RCC IP + */ + +#ifndef _DT_BINDINGS_MFD_STM32F7_RCC_H +#define _DT_BINDINGS_MFD_STM32F7_RCC_H + +/* AHB1 */ +#define STM32F7_RCC_AHB1_GPIOA 0 +#define STM32F7_RCC_AHB1_GPIOB 1 +#define STM32F7_RCC_AHB1_GPIOC 2 +#define STM32F7_RCC_AHB1_GPIOD 3 +#define STM32F7_RCC_AHB1_GPIOE 4 +#define STM32F7_RCC_AHB1_GPIOF 5 +#define STM32F7_RCC_AHB1_GPIOG 6 +#define STM32F7_RCC_AHB1_GPIOH 7 +#define STM32F7_RCC_AHB1_GPIOI 8 +#define STM32F7_RCC_AHB1_GPIOJ 9 +#define STM32F7_RCC_AHB1_GPIOK 10 +#define STM32F7_RCC_AHB1_CRC 12 +#define STM32F7_RCC_AHB1_BKPSRAM 18 +#define STM32F7_RCC_AHB1_DTCMRAM 20 +#define STM32F7_RCC_AHB1_DMA1 21 +#define STM32F7_RCC_AHB1_DMA2 22 +#define STM32F7_RCC_AHB1_DMA2D 23 +#define STM32F7_RCC_AHB1_ETHMAC 25 +#define STM32F7_RCC_AHB1_ETHMACTX 26 +#define STM32F7_RCC_AHB1_ETHMACRX 27 +#define STM32FF_RCC_AHB1_ETHMACPTP 28 +#define STM32F7_RCC_AHB1_OTGHS 29 +#define STM32F7_RCC_AHB1_OTGHSULPI 30 + +#define STM32F7_AHB1_RESET(bit) (STM32F7_RCC_AHB1_##bit + (0x10 * 8)) +#define STM32F7_AHB1_CLOCK(bit) (STM32F7_RCC_AHB1_##bit) + + +/* AHB2 */ +#define STM32F7_RCC_AHB2_DCMI 0 +#define STM32F7_RCC_AHB2_CRYP 4 +#define STM32F7_RCC_AHB2_HASH 5 +#define STM32F7_RCC_AHB2_RNG 6 +#define STM32F7_RCC_AHB2_OTGFS 7 + +#define STM32F7_AHB2_RESET(bit) (STM32F7_RCC_AHB2_##bit + (0x14 * 8)) +#define STM32F7_AHB2_CLOCK(bit) (STM32F7_RCC_AHB2_##bit + 0x20) + +/* AHB3 */ +#define STM32F7_RCC_AHB3_FMC 0 +#define STM32F7_RCC_AHB3_QSPI 1 + +#define STM32F7_AHB3_RESET(bit) (STM32F7_RCC_AHB3_##bit + (0x18 * 8)) +#define STM32F7_AHB3_CLOCK(bit) (STM32F7_RCC_AHB3_##bit + 0x40) + +/* APB1 */ +#define STM32F7_RCC_APB1_TIM2 0 +#define STM32F7_RCC_APB1_TIM3 1 +#define STM32F7_RCC_APB1_TIM4 2 +#define STM32F7_RCC_APB1_TIM5 3 +#define STM32F7_RCC_APB1_TIM6 4 +#define STM32F7_RCC_APB1_TIM7 5 +#define STM32F7_RCC_APB1_TIM12 6 +#define STM32F7_RCC_APB1_TIM13 7 +#define STM32F7_RCC_APB1_TIM14 8 +#define STM32F7_RCC_APB1_LPTIM1 9 +#define STM32F7_RCC_APB1_WWDG 11 +#define STM32F7_RCC_APB1_SPI2 14 +#define STM32F7_RCC_APB1_SPI3 15 +#define STM32F7_RCC_APB1_SPDIFRX 16 +#define STM32F7_RCC_APB1_UART2 17 +#define STM32F7_RCC_APB1_UART3 18 +#define STM32F7_RCC_APB1_UART4 19 +#define STM32F7_RCC_APB1_UART5 20 +#define STM32F7_RCC_APB1_I2C1 21 +#define STM32F7_RCC_APB1_I2C2 22 +#define STM32F7_RCC_APB1_I2C3 23 +#define STM32F7_RCC_APB1_I2C4 24 +#define STM32F7_RCC_APB1_CAN1 25 +#define STM32F7_RCC_APB1_CAN2 26 +#define STM32F7_RCC_APB1_CEC 27 +#define STM32F7_RCC_APB1_PWR 28 +#define STM32F7_RCC_APB1_DAC 29 +#define STM32F7_RCC_APB1_UART7 30 +#define STM32F7_RCC_APB1_UART8 31 + +#define STM32F7_APB1_RESET(bit) (STM32F7_RCC_APB1_##bit + (0x20 * 8)) +#define STM32F7_APB1_CLOCK(bit) (STM32F7_RCC_APB1_##bit + 0x80) + +/* APB2 */ +#define STM32F7_RCC_APB2_TIM1 0 +#define STM32F7_RCC_APB2_TIM8 1 +#define STM32F7_RCC_APB2_USART1 4 +#define STM32F7_RCC_APB2_USART6 5 +#define STM32F7_RCC_APB2_ADC1 8 +#define STM32F7_RCC_APB2_ADC2 9 +#define STM32F7_RCC_APB2_ADC3 10 +#define STM32F7_RCC_APB2_SDMMC1 11 +#define STM32F7_RCC_APB2_SPI1 12 +#define STM32F7_RCC_APB2_SPI4 13 +#define STM32F7_RCC_APB2_SYSCFG 14 +#define STM32F7_RCC_APB2_TIM9 16 +#define STM32F7_RCC_APB2_TIM10 17 +#define STM32F7_RCC_APB2_TIM11 18 +#define STM32F7_RCC_APB2_SPI5 20 +#define STM32F7_RCC_APB2_SPI6 21 +#define STM32F7_RCC_APB2_SAI1 22 +#define STM32F7_RCC_APB2_SAI2 23 +#define STM32F7_RCC_APB2_LTDC 26 + +#define STM32F7_APB2_RESET(bit) (STM32F7_RCC_APB2_##bit + (0x24 * 8)) +#define STM32F7_APB2_CLOCK(bit) (STM32F7_RCC_APB2_##bit + 0xA0) + +#endif /* _DT_BINDINGS_MFD_STM32F7_RCC_H */ diff --git a/include/dt-bindings/pinctrl/hisi.h b/include/dt-bindings/pinctrl/hisi.h index 38f1ea879ea136685f6a2b03d1087f0cdf8dd7ae..0359bfdc9119720e80f6e703f39ac30f0091fb12 100644 --- a/include/dt-bindings/pinctrl/hisi.h +++ b/include/dt-bindings/pinctrl/hisi.h @@ -56,4 +56,19 @@ #define DRIVE4_08MA (4 << 4) #define DRIVE4_10MA (6 << 4) +/* drive strength definition for hi3660 */ +#define DRIVE6_MASK (15 << 4) +#define DRIVE6_04MA (0 << 4) +#define DRIVE6_12MA (4 << 4) +#define DRIVE6_19MA (8 << 4) +#define DRIVE6_27MA (10 << 4) +#define DRIVE6_32MA (15 << 4) +#define DRIVE7_02MA (0 << 4) +#define DRIVE7_04MA (1 << 4) +#define DRIVE7_06MA (2 << 4) +#define DRIVE7_08MA (3 << 4) +#define DRIVE7_10MA (4 << 4) +#define DRIVE7_12MA (5 << 4) +#define DRIVE7_14MA (6 << 4) +#define DRIVE7_16MA (7 << 4) #endif diff --git a/include/dt-bindings/pinctrl/mt7623-pinfunc.h b/include/dt-bindings/pinctrl/mt7623-pinfunc.h index 2f00bdc4244274143f39b4e43ef185d42ba77420..436a87be864afbbf857ffc0cbfaea4d39b4dc91d 100644 --- a/include/dt-bindings/pinctrl/mt7623-pinfunc.h +++ b/include/dt-bindings/pinctrl/mt7623-pinfunc.h @@ -185,6 +185,12 @@ #define MT7623_PIN_56_SPI0_MO_FUNC_SPI0_MO (MTK_PIN_NO(56) | 1) #define MT7623_PIN_56_SPI0_MO_FUNC_SPI0_MI (MTK_PIN_NO(56) | 2) +#define MT7623_PIN_57_SDA1_FUNC_GPIO57 (MTK_PIN_NO(57) | 0) +#define MT7623_PIN_57_SDA1_FUNC_SDA1 (MTK_PIN_NO(57) | 1) + +#define MT7623_PIN_58_SCL1_FUNC_GPIO58 (MTK_PIN_NO(58) | 0) +#define MT7623_PIN_58_SCL1_FUNC_SCL1 (MTK_PIN_NO(58) | 1) + #define MT7623_PIN_60_WB_RSTB_FUNC_GPIO60 (MTK_PIN_NO(60) | 0) #define MT7623_PIN_60_WB_RSTB_FUNC_WB_RSTB (MTK_PIN_NO(60) | 1) @@ -244,6 +250,22 @@ #define MT7623_PIN_76_SCL0_FUNC_GPIO76 (MTK_PIN_NO(76) | 0) #define MT7623_PIN_76_SCL0_FUNC_SCL0 (MTK_PIN_NO(76) | 1) +#define MT7623_PIN_79_URXD0_FUNC_GPIO79 (MTK_PIN_NO(79) | 0) +#define MT7623_PIN_79_URXD0_FUNC_URXD0 (MTK_PIN_NO(79) | 1) +#define MT7623_PIN_79_URXD0_FUNC_UTXD0 (MTK_PIN_NO(79) | 2) + +#define MT7623_PIN_80_UTXD0_FUNC_GPIO80 (MTK_PIN_NO(80) | 0) +#define MT7623_PIN_80_UTXD0_FUNC_UTXD0 (MTK_PIN_NO(80) | 1) +#define MT7623_PIN_80_UTXD0_FUNC_URXD0 (MTK_PIN_NO(80) | 2) + +#define MT7623_PIN_81_URXD1_FUNC_GPIO81 (MTK_PIN_NO(81) | 0) +#define MT7623_PIN_81_URXD1_FUNC_URXD1 (MTK_PIN_NO(81) | 1) +#define MT7623_PIN_81_URXD1_FUNC_UTXD1 (MTK_PIN_NO(81) | 2) + +#define MT7623_PIN_82_UTXD1_FUNC_GPIO82 (MTK_PIN_NO(82) | 0) +#define MT7623_PIN_82_UTXD1_FUNC_UTXD1 (MTK_PIN_NO(82) | 1) +#define MT7623_PIN_82_UTXD1_FUNC_URXD1 (MTK_PIN_NO(82) | 2) + #define MT7623_PIN_83_LCM_RST_FUNC_GPIO83 (MTK_PIN_NO(83) | 0) #define MT7623_PIN_83_LCM_RST_FUNC_LCM_RST (MTK_PIN_NO(83) | 1) @@ -351,10 +373,10 @@ #define MT7623_PIN_122_GPIO122_FUNC_SDA2 (MTK_PIN_NO(122) | 4) #define MT7623_PIN_122_GPIO122_FUNC_URXD0 (MTK_PIN_NO(122) | 5) -#define MT7623_PIN_123_GPIO123_FUNC_GPIO123 (MTK_PIN_NO(123) | 0) -#define MT7623_PIN_123_GPIO123_FUNC_TEST (MTK_PIN_NO(123) | 1) -#define MT7623_PIN_123_GPIO123_FUNC_SCL2 (MTK_PIN_NO(123) | 4) -#define MT7623_PIN_123_GPIO123_FUNC_UTXD0 (MTK_PIN_NO(123) | 5) +#define MT7623_PIN_123_HTPLG_FUNC_GPIO123 (MTK_PIN_NO(123) | 0) +#define MT7623_PIN_123_HTPLG_FUNC_HTPLG (MTK_PIN_NO(123) | 1) +#define MT7623_PIN_123_HTPLG_FUNC_SCL2 (MTK_PIN_NO(123) | 4) +#define MT7623_PIN_123_HTPLG_FUNC_UTXD0 (MTK_PIN_NO(123) | 5) #define MT7623_PIN_124_GPIO124_FUNC_GPIO124 (MTK_PIN_NO(124) | 0) #define MT7623_PIN_124_GPIO124_FUNC_TEST (MTK_PIN_NO(124) | 1) diff --git a/include/dt-bindings/power/imx7-power.h b/include/dt-bindings/power/imx7-power.h new file mode 100644 index 0000000000000000000000000000000000000000..3a181e4105171d5a039cee8ce27862254fd7444d --- /dev/null +++ b/include/dt-bindings/power/imx7-power.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2017 Impinj + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __DT_BINDINGS_IMX7_POWER_H__ +#define __DT_BINDINGS_IMX7_POWER_H__ + +#define IMX7_POWER_DOMAIN_MIPI_PHY 0 +#define IMX7_POWER_DOMAIN_PCIE_PHY 1 +#define IMX7_POWER_DOMAIN_USB_HSIC_PHY 2 + +#endif diff --git a/include/dt-bindings/power/r8a7795-sysc.h b/include/dt-bindings/power/r8a7795-sysc.h index ee2e26ba605ef9a37801d1b2858506f8d17864cb..ad679eeda1370a9dec9dd21c0fae84c1cb53a150 100644 --- a/include/dt-bindings/power/r8a7795-sysc.h +++ b/include/dt-bindings/power/r8a7795-sysc.h @@ -33,7 +33,7 @@ #define R8A7795_PD_CA53_SCU 21 #define R8A7795_PD_3DG_E 22 #define R8A7795_PD_A3IR 24 -#define R8A7795_PD_A2VC0 25 +#define R8A7795_PD_A2VC0 25 /* ES1.x only */ #define R8A7795_PD_A2VC1 26 /* Always-on power area */ diff --git a/include/dt-bindings/reset/altr,rst-mgr-a10sr.h b/include/dt-bindings/reset/altr,rst-mgr-a10sr.h new file mode 100644 index 0000000000000000000000000000000000000000..9855925e5256a7b9d5ece637b605b8ff937ec99a --- /dev/null +++ b/include/dt-bindings/reset/altr,rst-mgr-a10sr.h @@ -0,0 +1,33 @@ +/* + * Copyright Intel Corporation (C) 2017. All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * Reset binding definitions for Altera Arria10 MAX5 System Resource Chip + * + * Adapted from altr,rst-mgr-a10.h + */ + +#ifndef _DT_BINDINGS_RESET_ALTR_RST_MGR_A10SR_H +#define _DT_BINDINGS_RESET_ALTR_RST_MGR_A10SR_H + +/* Peripheral PHY resets */ +#define A10SR_RESET_ENET_HPS 0 +#define A10SR_RESET_PCIE 1 +#define A10SR_RESET_FILE 2 +#define A10SR_RESET_BQSPI 3 +#define A10SR_RESET_USB 4 + +#define A10SR_RESET_NUM 5 + +#endif diff --git a/include/dt-bindings/reset/imx7-reset.h b/include/dt-bindings/reset/imx7-reset.h new file mode 100644 index 0000000000000000000000000000000000000000..63948170c7b2d3ae5b62a219c83770aaa05be8ef --- /dev/null +++ b/include/dt-bindings/reset/imx7-reset.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2017 Impinj, Inc. + * + * Author: Andrey Smirnov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef DT_BINDING_RESET_IMX7_H +#define DT_BINDING_RESET_IMX7_H + +#define IMX7_RESET_A7_CORE_POR_RESET0 0 +#define IMX7_RESET_A7_CORE_POR_RESET1 1 +#define IMX7_RESET_A7_CORE_RESET0 2 +#define IMX7_RESET_A7_CORE_RESET1 3 +#define IMX7_RESET_A7_DBG_RESET0 4 +#define IMX7_RESET_A7_DBG_RESET1 5 +#define IMX7_RESET_A7_ETM_RESET0 6 +#define IMX7_RESET_A7_ETM_RESET1 7 +#define IMX7_RESET_A7_SOC_DBG_RESET 8 +#define IMX7_RESET_A7_L2RESET 9 +#define IMX7_RESET_SW_M4C_RST 10 +#define IMX7_RESET_SW_M4P_RST 11 +#define IMX7_RESET_EIM_RST 12 +#define IMX7_RESET_HSICPHY_PORT_RST 13 +#define IMX7_RESET_USBPHY1_POR 14 +#define IMX7_RESET_USBPHY1_PORT_RST 15 +#define IMX7_RESET_USBPHY2_POR 16 +#define IMX7_RESET_USBPHY2_PORT_RST 17 +#define IMX7_RESET_MIPI_PHY_MRST 18 +#define IMX7_RESET_MIPI_PHY_SRST 19 + +/* + * IMX7_RESET_PCIEPHY is a logical reset line combining PCIEPHY_BTN + * and PCIEPHY_G_RST + */ +#define IMX7_RESET_PCIEPHY 20 +#define IMX7_RESET_PCIEPHY_PERST 21 + +/* + * IMX7_RESET_PCIE_CTRL_APPS_EN is not strictly a reset line, but it + * can be used to inhibit PCIe LTTSM, so, in a way, it can be thoguht + * of as one + */ +#define IMX7_RESET_PCIE_CTRL_APPS_EN 22 +#define IMX7_RESET_DDRC_PRST 23 +#define IMX7_RESET_DDRC_CORE_RST 24 + +#define IMX7_RESET_NUM 25 + +#endif + diff --git a/include/dt-bindings/reset/mt2701-resets.h b/include/dt-bindings/reset/mt2701-resets.h index aaf03057f755c3657e779f24bb821c150a82f413..21deb547cfa404cdc544936dafbbe09b24a7ef4d 100644 --- a/include/dt-bindings/reset/mt2701-resets.h +++ b/include/dt-bindings/reset/mt2701-resets.h @@ -80,4 +80,11 @@ #define MT2701_HIFSYS_PCIE1_RST 25 #define MT2701_HIFSYS_PCIE2_RST 26 +/* ETHSYS resets */ +#define MT2701_ETHSYS_SYS_RST 0 +#define MT2701_ETHSYS_MCM_RST 2 +#define MT2701_ETHSYS_FE_RST 6 +#define MT2701_ETHSYS_GMAC_RST 23 +#define MT2701_ETHSYS_PPE_RST 31 + #endif /* _DT_BINDINGS_RESET_CONTROLLER_MT2701 */ diff --git a/include/dt-bindings/reset/sun8i-h3-ccu.h b/include/dt-bindings/reset/sun8i-h3-ccu.h index 6b7af80c26ec610d5db90a19efc05022340911e1..484c2a22919d7270aeaed71ec0bf3151c38fb873 100644 --- a/include/dt-bindings/reset/sun8i-h3-ccu.h +++ b/include/dt-bindings/reset/sun8i-h3-ccu.h @@ -98,6 +98,9 @@ #define RST_BUS_UART1 50 #define RST_BUS_UART2 51 #define RST_BUS_UART3 52 -#define RST_BUS_SCR 53 +#define RST_BUS_SCR0 53 + +/* New resets imported in H5 */ +#define RST_BUS_SCR1 54 #endif /* _DT_BINDINGS_RST_SUN8I_H3_H_ */ diff --git a/include/dt-bindings/reset/sun8i-r-ccu.h b/include/dt-bindings/reset/sun8i-r-ccu.h new file mode 100644 index 0000000000000000000000000000000000000000..4ba64f3d6fc9e9c6122d436e664b07f3f646a008 --- /dev/null +++ b/include/dt-bindings/reset/sun8i-r-ccu.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2016 Icenowy Zheng + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_RST_SUN8I_R_CCU_H_ +#define _DT_BINDINGS_RST_SUN8I_R_CCU_H_ + +#define RST_APB0_IR 0 +#define RST_APB0_TIMER 1 +#define RST_APB0_RSB 2 +#define RST_APB0_UART 3 +/* 4 is reserved for RST_APB0_W1 on A31 */ +#define RST_APB0_I2C 5 + +#endif /* _DT_BINDINGS_RST_SUN8I_R_CCU_H_ */ diff --git a/include/dt-bindings/reset/tegra210-car.h b/include/dt-bindings/reset/tegra210-car.h new file mode 100644 index 0000000000000000000000000000000000000000..296ec6e3f8c0b3ed22e22e71d28a72ea33a40c22 --- /dev/null +++ b/include/dt-bindings/reset/tegra210-car.h @@ -0,0 +1,13 @@ +/* + * This header provides Tegra210-specific constants for binding + * nvidia,tegra210-car. + */ + +#ifndef _DT_BINDINGS_RESET_TEGRA210_CAR_H +#define _DT_BINDINGS_RESET_TEGRA210_CAR_H + +#define TEGRA210_RESET(x) (7 * 32 + (x)) +#define TEGRA210_RST_DFLL_DVCO TEGRA210_RESET(0) +#define TEGRA210_RST_ADSP TEGRA210_RESET(1) + +#endif /* _DT_BINDINGS_RESET_TEGRA210_CAR_H */ diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index fbd4647767e9162f10c5741825796c5c2061cef0..359c2f936004b42d4e892170f6e686213d7e3d63 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -18,7 +18,8 @@ extern int restrict_link_by_builtin_trusted(struct key *keyring, const struct key_type *type, - const union key_payload *payload); + const union key_payload *payload, + struct key *restriction_key); #else #define restrict_link_by_builtin_trusted restrict_link_reject @@ -28,11 +29,24 @@ extern int restrict_link_by_builtin_trusted(struct key *keyring, extern int restrict_link_by_builtin_and_secondary_trusted( struct key *keyring, const struct key_type *type, - const union key_payload *payload); + const union key_payload *payload, + struct key *restriction_key); #else #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +#ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING +extern int mark_hash_blacklisted(const char *hash); +extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, + const char *type); +#else +static inline int is_hash_blacklisted(const u8 *hash, size_t hash_len, + const char *type) +{ + return 0; +} +#endif + #ifdef CONFIG_IMA_BLACKLIST_KEYRING extern struct key *ima_blacklist_keyring; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index fe797d6ef89d90e3163c0e4f927286b314c8d5c8..295584f31a4ef3b04a6e6bf0d68467ada190382c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -63,6 +63,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); +void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 92e7e97ca8ff0f95ef9dba95b3e7078dd8c73049..1ab4633adf4fd16971edfa32f197a9e7b24c2d7b 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -50,6 +50,8 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu); +bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu); +void kvm_pmu_update_run(struct kvm_vcpu *vcpu); void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, @@ -85,6 +87,11 @@ static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {} +static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) +{ + return false; +} +static inline void kvm_pmu_update_run(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index c0b3d999c266f2271d61bd58b042352580faf5d2..ef718586321ce0dea03aabe8b114beca9cb71a74 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -148,7 +148,6 @@ struct vgic_its { gpa_t vgic_its_base; bool enabled; - bool initialized; struct vgic_io_device iodev; struct kvm_device *dev; @@ -162,6 +161,9 @@ struct vgic_its { u32 creadr; u32 cwriter; + /* migration ABI revision in use */ + u32 abi_rev; + /* Protects the device and collection lists */ struct mutex its_lock; struct list_head device_list; @@ -193,7 +195,10 @@ struct vgic_dist { /* either a GICv2 CPU interface */ gpa_t vgic_cpu_base; /* or a number of GICv3 redistributor regions */ - gpa_t vgic_redist_base; + struct { + gpa_t vgic_redist_base; + gpa_t vgic_redist_free_offset; + }; }; /* distributor enabled */ @@ -225,8 +230,6 @@ struct vgic_dist { struct vgic_v2_cpu_if { u32 vgic_hcr; u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u64 vgic_eisr; /* Saved only */ u64 vgic_elrsr; /* Saved only */ u32 vgic_apr; u32 vgic_lr[VGIC_V2_MAX_LRS]; @@ -236,8 +239,6 @@ struct vgic_v3_cpu_if { u32 vgic_hcr; u32 vgic_vmcr; u32 vgic_sre; /* Restored only, change ignored */ - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr; /* Saved only */ u32 vgic_elrsr; /* Saved only */ u32 vgic_ap0r[4]; u32 vgic_ap1r[4]; @@ -264,8 +265,6 @@ struct vgic_cpu { */ struct list_head ap_list_head; - u64 live_lrs; - /* * Members below are used with GICv3 emulation only and represent * parts of the redistributor. @@ -289,6 +288,7 @@ extern struct static_key_false vgic_v2_cpuif_trap; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); void kvm_vgic_early_init(struct kvm *kvm); +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); @@ -307,6 +307,9 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); +void kvm_vgic_load(struct kvm_vcpu *vcpu); +void kvm_vgic_put(struct kvm_vcpu *vcpu); + #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) #define vgic_ready(k) ((k)->arch.vgic.ready) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773cde8439a0c3e21b39ad29460c440..137e4a3d89c5225dc6a9535265f13e874bdbb8eb 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -233,10 +233,6 @@ int acpi_numa_init (void); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); -int __init acpi_parse_entries(char *id, unsigned long table_size, - acpi_tbl_entry_handler handler, - struct acpi_table_header *table_header, - int entry_id, unsigned int max_entries); int __init acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, acpi_tbl_entry_handler handler, @@ -595,6 +591,13 @@ enum acpi_reconfig_event { int acpi_reconfig_notifier_register(struct notifier_block *nb); int acpi_reconfig_notifier_unregister(struct notifier_block *nb); +#ifdef CONFIG_ACPI_GTDT +int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); +int acpi_gtdt_map_ppi(int type); +bool acpi_gtdt_c3stop(int type); +int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); +#endif + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -611,6 +614,11 @@ static inline bool acpi_dev_found(const char *hid) return false; } +static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) +{ + return false; +} + static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; @@ -762,8 +770,11 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline void acpi_dma_configure(struct device *dev, - enum dev_dma_attr attr) { } +static inline int acpi_dma_configure(struct device *dev, + enum dev_dma_attr attr) +{ + return 0; +} static inline void acpi_dma_deconfigure(struct device *dev) { } @@ -949,6 +960,10 @@ static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) adev->driver_gpios = NULL; } +int devm_acpi_dev_add_driver_gpios(struct device *dev, + const struct acpi_gpio_mapping *gpios); +void devm_acpi_dev_remove_driver_gpios(struct device *dev); + int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); #else static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, @@ -958,6 +973,13 @@ static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, } static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {} +static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, + const struct acpi_gpio_mapping *gpios) +{ + return -ENXIO; +} +static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {} + static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { return -ENXIO; @@ -997,8 +1019,16 @@ int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname, int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); -struct fwnode_handle *acpi_get_next_subnode(struct device *dev, - struct fwnode_handle *subnode); +struct fwnode_handle *acpi_get_next_subnode(struct fwnode_handle *fwnode, + struct fwnode_handle *child); +struct fwnode_handle *acpi_node_get_parent(struct fwnode_handle *fwnode); + +struct fwnode_handle *acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle *prev); +int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle **remote, + struct fwnode_handle **port, + struct fwnode_handle **endpoint); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, @@ -1115,12 +1145,34 @@ static inline int acpi_dev_prop_read(struct acpi_device *adev, return -ENXIO; } -static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, - struct fwnode_handle *subnode) +static inline struct fwnode_handle * +acpi_get_next_subnode(struct fwnode_handle *fwnode, struct fwnode_handle *child) { return NULL; } +static inline struct fwnode_handle * +acpi_node_get_parent(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct fwnode_handle * +acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle *prev) +{ + return ERR_PTR(-ENXIO); +} + +static inline int +acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle **remote, + struct fwnode_handle **port, + struct fwnode_handle **endpoint) +{ + return -ENXIO; +} + #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 77e08099e55404b53225e71f7bd0b9ca34caec4a..3ff9acea86161ae51fde3953de6bf27b8f5ebca0 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,6 +34,8 @@ void acpi_iort_init(void); bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); +void acpi_configure_pmsi_domain(struct device *dev); +int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); @@ -45,6 +47,7 @@ static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) static inline struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) { return NULL; } +static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline void iort_set_dma_mask(struct device *dev) { } static inline @@ -52,7 +55,4 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) { return NULL; } #endif -#define IORT_ACPI_DECLARE(name, table_id, fn) \ - ACPI_DECLARE_PROBE_ENTRY(iort, name, table_id, 0, NULL, 0, fn) - #endif /* __ACPI_IORT_H__ */ diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h index 91b84a7f053933089e63224aea1f616809b8af57..580b5323a717bfe720919ffba7f093d8e389c957 100644 --- a/include/linux/amba/pl080.h +++ b/include/linux/amba/pl080.h @@ -38,24 +38,16 @@ #define PL080_SOFT_LSREQ (0x2C) #define PL080_CONFIG (0x30) -#define PL080_CONFIG_M2_BE (1 << 2) -#define PL080_CONFIG_M1_BE (1 << 1) -#define PL080_CONFIG_ENABLE (1 << 0) +#define PL080_CONFIG_M2_BE BIT(2) +#define PL080_CONFIG_M1_BE BIT(1) +#define PL080_CONFIG_ENABLE BIT(0) #define PL080_SYNC (0x34) /* Per channel configuration registers */ -#define PL080_Cx_STRIDE (0x20) +/* Per channel configuration registers */ #define PL080_Cx_BASE(x) ((0x100 + (x * 0x20))) -#define PL080_Cx_SRC_ADDR(x) ((0x100 + (x * 0x20))) -#define PL080_Cx_DST_ADDR(x) ((0x104 + (x * 0x20))) -#define PL080_Cx_LLI(x) ((0x108 + (x * 0x20))) -#define PL080_Cx_CONTROL(x) ((0x10C + (x * 0x20))) -#define PL080_Cx_CONFIG(x) ((0x110 + (x * 0x20))) -#define PL080S_Cx_CONTROL2(x) ((0x110 + (x * 0x20))) -#define PL080S_Cx_CONFIG(x) ((0x114 + (x * 0x20))) - #define PL080_CH_SRC_ADDR (0x00) #define PL080_CH_DST_ADDR (0x04) #define PL080_CH_LLI (0x08) @@ -66,18 +58,18 @@ #define PL080_LLI_ADDR_MASK (0x3fffffff << 2) #define PL080_LLI_ADDR_SHIFT (2) -#define PL080_LLI_LM_AHB2 (1 << 0) +#define PL080_LLI_LM_AHB2 BIT(0) -#define PL080_CONTROL_TC_IRQ_EN (1 << 31) +#define PL080_CONTROL_TC_IRQ_EN BIT(31) #define PL080_CONTROL_PROT_MASK (0x7 << 28) #define PL080_CONTROL_PROT_SHIFT (28) -#define PL080_CONTROL_PROT_CACHE (1 << 30) -#define PL080_CONTROL_PROT_BUFF (1 << 29) -#define PL080_CONTROL_PROT_SYS (1 << 28) -#define PL080_CONTROL_DST_INCR (1 << 27) -#define PL080_CONTROL_SRC_INCR (1 << 26) -#define PL080_CONTROL_DST_AHB2 (1 << 25) -#define PL080_CONTROL_SRC_AHB2 (1 << 24) +#define PL080_CONTROL_PROT_CACHE BIT(30) +#define PL080_CONTROL_PROT_BUFF BIT(29) +#define PL080_CONTROL_PROT_SYS BIT(28) +#define PL080_CONTROL_DST_INCR BIT(27) +#define PL080_CONTROL_SRC_INCR BIT(26) +#define PL080_CONTROL_DST_AHB2 BIT(25) +#define PL080_CONTROL_SRC_AHB2 BIT(24) #define PL080_CONTROL_DWIDTH_MASK (0x7 << 21) #define PL080_CONTROL_DWIDTH_SHIFT (21) #define PL080_CONTROL_SWIDTH_MASK (0x7 << 18) @@ -103,20 +95,20 @@ #define PL080_WIDTH_16BIT (0x1) #define PL080_WIDTH_32BIT (0x2) -#define PL080N_CONFIG_ITPROT (1 << 20) -#define PL080N_CONFIG_SECPROT (1 << 19) -#define PL080_CONFIG_HALT (1 << 18) -#define PL080_CONFIG_ACTIVE (1 << 17) /* RO */ -#define PL080_CONFIG_LOCK (1 << 16) -#define PL080_CONFIG_TC_IRQ_MASK (1 << 15) -#define PL080_CONFIG_ERR_IRQ_MASK (1 << 14) +#define PL080N_CONFIG_ITPROT BIT(20) +#define PL080N_CONFIG_SECPROT BIT(19) +#define PL080_CONFIG_HALT BIT(18) +#define PL080_CONFIG_ACTIVE BIT(17) /* RO */ +#define PL080_CONFIG_LOCK BIT(16) +#define PL080_CONFIG_TC_IRQ_MASK BIT(15) +#define PL080_CONFIG_ERR_IRQ_MASK BIT(14) #define PL080_CONFIG_FLOW_CONTROL_MASK (0x7 << 11) #define PL080_CONFIG_FLOW_CONTROL_SHIFT (11) #define PL080_CONFIG_DST_SEL_MASK (0xf << 6) #define PL080_CONFIG_DST_SEL_SHIFT (6) #define PL080_CONFIG_SRC_SEL_MASK (0xf << 1) #define PL080_CONFIG_SRC_SEL_SHIFT (1) -#define PL080_CONFIG_ENABLE (1 << 0) +#define PL080_CONFIG_ENABLE BIT(0) #define PL080_FLOW_MEM2MEM (0x0) #define PL080_FLOW_MEM2PER (0x1) diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h deleted file mode 100644 index fe93758e84036b8abd8a2b5c5191c0545050094b..0000000000000000000000000000000000000000 --- a/include/linux/amba/pl330.h +++ /dev/null @@ -1,35 +0,0 @@ -/* linux/include/linux/amba/pl330.h - * - * Copyright (C) 2010 Samsung Electronics Co. Ltd. - * Jaswinder Singh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __AMBA_PL330_H_ -#define __AMBA_PL330_H_ - -#include - -struct dma_pl330_platdata { - /* - * Number of valid peripherals connected to DMAC. - * This may be different from the value read from - * CR0, as the PL330 implementation might have 'holes' - * in the peri list or the peri could also be reached - * from another DMAC which the platform prefers. - */ - u8 nr_valid_peri; - /* Array of valid peripherals */ - u8 *peri_id; - /* Operational capabilities */ - dma_cap_mask_t cap_mask; - /* Bytes to allocate for MC buffer */ - unsigned mcbuf_sz; -}; - -extern bool pl330_filter(struct dma_chan *chan, void *param); -#endif /* __AMBA_PL330_H_ */ diff --git a/include/linux/ata.h b/include/linux/ata.h index af6859b3a93d75194eab0817607fd02366a8a61d..ad7d9ee89ff043eaf2d8db8589d2acaccc07c855 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -817,11 +817,6 @@ static inline bool ata_id_sct_error_recovery_ctrl(const u16 *id) return id[ATA_ID_SCT_CMD_XPORT] & (1 << 3) ? true : false; } -static inline bool ata_id_sct_write_same(const u16 *id) -{ - return id[ATA_ID_SCT_CMD_XPORT] & (1 << 2) ? true : false; -} - static inline bool ata_id_sct_long_sector_access(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 1) ? true : false; diff --git a/include/linux/atomic.h b/include/linux/atomic.h index e71835bf60a977a37277d44c6357a02e70c3a41d..c56be74101305f74524bc816d9c8fdb67ccb895d 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -423,6 +423,29 @@ #endif #endif /* atomic_cmpxchg_relaxed */ +#ifndef atomic_try_cmpxchg + +#define __atomic_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __r, __o = *__po; \ + __r = atomic_cmpxchg##type((_p), __o, (_n)); \ + if (unlikely(__r != __o)) \ + *__po = __r; \ + likely(__r == __o); \ +}) + +#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) +#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic_try_cmpxchg */ +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + /* cmpxchg_relaxed */ #ifndef cmpxchg_relaxed #define cmpxchg_relaxed cmpxchg @@ -996,6 +1019,29 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_cmpxchg_relaxed */ +#ifndef atomic64_try_cmpxchg + +#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __r, __o = *__po; \ + __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ + if (unlikely(__r != __o)) \ + *__po = __r; \ + likely(__r == __o); \ +}) + +#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) +#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic64_try_cmpxchg */ +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { diff --git a/include/linux/audit.h b/include/linux/audit.h index 504e784b7ffa6da23212a0a0d110efe91774eb27..2150bdccfbab2953c33cd56d6d64eb798ca180ea 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -163,8 +163,7 @@ extern void audit_log_task_info(struct audit_buffer *ab, extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ -extern int audit_rule_change(int type, __u32 portid, int seq, - void *data, size_t datasz); +extern int audit_rule_change(int type, int seq, void *data, size_t datasz); extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; @@ -332,7 +331,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial); + struct timespec64 *t, unsigned int *serial); extern int audit_set_loginuid(kuid_t loginuid); static inline kuid_t audit_get_loginuid(struct task_struct *tsk) @@ -511,7 +510,7 @@ static inline void __audit_seccomp(unsigned long syscall, long signr, int code) static inline void audit_seccomp(unsigned long syscall, long signr, int code) { } static inline int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { return 0; } diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ad955817916d00c35b6b41d204ec61013c88a10f..866c433e7d3220b29170ed98ccd1b70803a43964 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -21,6 +21,7 @@ struct dentry; */ enum wb_state { WB_registered, /* bdi_register() was done */ + WB_shutting_down, /* wb_shutdown() in progress */ WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ }; @@ -54,7 +55,9 @@ struct bdi_writeback_congested { atomic_t refcnt; /* nr of attached wb's and blkg */ #ifdef CONFIG_CGROUP_WRITEBACK - struct backing_dev_info *bdi; /* the associated bdi */ + struct backing_dev_info *__bdi; /* the associated bdi, set to NULL + * on bdi unregistration. For memcg-wb + * internal use only! */ int blkcg_id; /* ID of the associated blkcg */ struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ #endif @@ -143,7 +146,7 @@ struct backing_dev_info { congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ - char *name; + const char *name; struct kref refcnt; /* Reference counter for the structure */ unsigned int capabilities; /* Device capabilities */ @@ -161,7 +164,6 @@ struct backing_dev_info { #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ - atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ #else struct bdi_writeback_congested *wb_congested; #endif diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c52a48cb9a66379971be13008bd8cdfba087807e..557d84063934c65c2aa96c2b6141425af8a567bf 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,8 +17,6 @@ #include #include -int __must_check bdi_init(struct backing_dev_info *bdi); - static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); @@ -27,16 +25,18 @@ static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) void bdi_put(struct backing_dev_info *bdi); -__printf(3, 4) -int bdi_register(struct backing_dev_info *bdi, struct device *parent, - const char *fmt, ...); -int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); +__printf(2, 3) +int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); +int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, + va_list args); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); void bdi_unregister(struct backing_dev_info *bdi); -int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); -void bdi_destroy(struct backing_dev_info *bdi); struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); +static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) +{ + return bdi_alloc_node(gfp_mask, NUMA_NO_NODE); +} void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 9657f11d48a7547cbf63639c23f0de9bc07920c4..bca6a5e4ca3db3a6370b83621c80bb5ca876f27a 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -80,7 +80,7 @@ struct pci_dev; #define BCMA_CORE_PCI_MDIODATA_DEV_TX 0x1e /* SERDES TX Dev */ #define BCMA_CORE_PCI_MDIODATA_DEV_RX 0x1f /* SERDES RX Dev */ #define BCMA_CORE_PCI_PCIEIND_ADDR 0x0130 /* indirect access to the internal register */ -#define BCMA_CORE_PCI_PCIEIND_DATA 0x0134 /* Data to/from the internal regsiter */ +#define BCMA_CORE_PCI_PCIEIND_DATA 0x0134 /* Data to/from the internal register */ #define BCMA_CORE_PCI_CLKREQENCTRL 0x0138 /* >= rev 6, Clkreq rdma control */ #define BCMA_CORE_PCI_PCICFG0 0x0400 /* PCI config space 0 (rev >= 8) */ #define BCMA_CORE_PCI_PCICFG1 0x0500 /* PCI config space 1 (rev >= 8) */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e521194f6fc4ad32138a51c962a365c74debaed..d1b04b0e99cf8c293d4ded6eccb2b0aa2fce0d41 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -183,7 +183,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) -static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) +static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; @@ -205,17 +205,12 @@ static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) break; } - __bio_for_each_segment(bv, bio, iter, *bvec) + bio_for_each_segment(bv, bio, iter) segs++; return segs; } -static inline unsigned bio_segments(struct bio *bio) -{ - return __bio_segments(bio, &bio->bi_iter); -} - /* * get a reference to a bio, so it won't disappear. the intended use is * something like: @@ -383,14 +378,12 @@ extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(int pool_entries); -extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); +extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *); extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); -extern struct bio *bio_clone_bioset_partial(struct bio *, gfp_t, - struct bio_set *, int, int); extern struct bio_set *fs_bio_set; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9382c5da7a2edfbc7cbccca2ff1304cd042c3bd2..fcd641032f8d3a87162b0e9f1a63e08ae16d10df 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -15,7 +15,7 @@ struct blk_mq_hw_ctx { unsigned long state; /* BLK_MQ_S_* flags */ } ____cacheline_aligned_in_smp; - struct work_struct run_work; + struct delayed_work run_work; cpumask_var_t cpumask; int next_cpu; int next_cpu_batch; @@ -51,15 +51,17 @@ struct blk_mq_hw_ctx { atomic_t nr_active; - struct delayed_work delayed_run_work; - struct delayed_work delay_work; - struct hlist_node cpuhp_dead; struct kobject kobj; unsigned long poll_considered; unsigned long poll_invoked; unsigned long poll_success; + +#ifdef CONFIG_BLK_DEBUG_FS + struct dentry *debugfs_dir; + struct dentry *sched_debugfs_dir; +#endif }; struct blk_mq_tag_set { @@ -82,7 +84,6 @@ struct blk_mq_tag_set { struct blk_mq_queue_data { struct request *rq; - struct list_head *list; bool last; }; @@ -90,9 +91,9 @@ typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(void *, struct request *, unsigned int, +typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); -typedef void (exit_request_fn)(void *, struct request *, unsigned int, +typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); typedef int (reinit_request_fn)(void *, struct request *); @@ -143,6 +144,14 @@ struct blk_mq_ops { reinit_request_fn *reinit_request; map_queues_fn *map_queues; + +#ifdef CONFIG_BLK_DEBUG_FS + /* + * Used by the debugfs implementation to show driver-specific + * information about a request. + */ + void (*show_rq)(struct seq_file *m, struct request *rq); +#endif }; enum { @@ -153,7 +162,6 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, - BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, @@ -163,6 +171,7 @@ enum { BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, BLK_MQ_S_TAG_WAITING = 3, + BLK_MQ_S_START_ON_RUN = 4, BLK_MQ_MAX_DEPTH = 10240, @@ -229,8 +238,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_abort_requeue_list(struct request_queue *q); -void blk_mq_complete_request(struct request *rq, int error); +void blk_mq_complete_request(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); @@ -240,13 +248,14 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); -void blk_mq_freeze_queue_start(struct request_queue *q); +void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d703acb55d0f0d196296ef4d82f4e97a5efd9c3a..61339bc444006a477bf3e84b32a1d8199a9e5f01 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -17,6 +17,10 @@ struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); +struct blk_issue_stat { + u64 stat; +}; + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -29,7 +33,7 @@ struct bio { * top bits REQ_OP. Use * accessors. */ - unsigned short bi_flags; /* status, command, etc */ + unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; struct bvec_iter bi_iter; @@ -58,6 +62,10 @@ struct bio { */ struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + void *bi_cg_private; + struct blk_issue_stat bi_issue_stat; +#endif #endif union { #if defined(CONFIG_BLK_DEV_INTEGRITY) @@ -102,12 +110,9 @@ struct bio { #define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ #define BIO_THROTTLED 9 /* This bio has already been subjected to * throttling rules. Don't do it again. */ - -/* - * Flags starting here get preserved by bio_reset() - this includes - * BVEC_POOL_IDX() - */ -#define BIO_RESET_BITS 10 +#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion + * of this bio. */ +/* See BVEC_POOL_OFFSET below before adding new flags */ /* * We support 6 different bvec pools, the last one is magic in that it @@ -117,13 +122,22 @@ struct bio { #define BVEC_POOL_MAX (BVEC_POOL_NR - 1) /* - * Top 4 bits of bio flags indicate the pool the bvecs came from. We add + * Top 3 bits of bio flags indicate the pool the bvecs came from. We add * 1 to the actual index so that 0 indicates that there are no bvecs to be * freed. */ -#define BVEC_POOL_BITS (4) +#define BVEC_POOL_BITS (3) #define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) +#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) +# error "BVEC_POOL_BITS is too small" +#endif + +/* + * Flags starting here get preserved by bio_reset() - this includes + * only BVEC_POOL_IDX() + */ +#define BIO_RESET_BITS BVEC_POOL_OFFSET /* * Operations and flags common to the bio and request structures. @@ -160,7 +174,7 @@ enum req_opf { /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ - REQ_OP_WRITE_ZEROES = 8, + REQ_OP_WRITE_ZEROES = 9, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, @@ -187,6 +201,10 @@ enum req_flag_bits { __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ + + /* command specific flags for REQ_OP_WRITE_ZEROES: */ + __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_NR_BITS, /* stops here */ }; @@ -204,6 +222,8 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) +#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) + #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) @@ -283,12 +303,6 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie) return (cookie & BLK_QC_T_INTERNAL) != 0; } -struct blk_issue_stat { - u64 time; -}; - -#define BLK_RQ_STAT_BATCH 64 - struct blk_rq_stat { s64 mean; u64 min; @@ -296,7 +310,6 @@ struct blk_rq_stat { s32 nr_samples; s32 nr_batch; u64 batch; - s64 time; }; #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 01a696b0a4d3ae0118a2742c96d68775ef9f637a..b74a3edcb3da82903568981a5b49fbbf1f4269cb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -40,15 +40,20 @@ struct blkcg_gq; struct blk_flush_queue; struct pr_ops; struct rq_wb; +struct blk_queue_stats; +struct blk_stat_callback; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ +/* Must be consisitent with blk_mq_poll_stats_bkt() */ +#define BLK_MQ_POLL_STATS_BKTS 16 + /* * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 2 +#define BLKCG_MAX_POLS 3 typedef void (rq_end_io_fn)(struct request *, int); @@ -173,6 +178,7 @@ struct request { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; void *completion_data; + int error_count; /* for legacy drivers, don't use */ }; /* @@ -213,16 +219,14 @@ struct request { unsigned short ioprio; - void *special; /* opaque pointer available for LLD use */ + unsigned int timeout; - int errors; + void *special; /* opaque pointer available for LLD use */ unsigned int extra_len; /* length of alignment and padding */ unsigned long deadline; struct list_head timeout_list; - unsigned int timeout; - int retries; /* * completion callback. @@ -337,7 +341,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char cluster; - unsigned char discard_zeroes_data; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; }; @@ -388,6 +391,7 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + struct blk_queue_stats *stats; struct rq_wb *rq_wb; /* @@ -505,8 +509,6 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; - struct blk_rq_stat rq_stats[2]; - /* * Number of active block driver functions for which blk_drain_queue() * must wait. Must be incremented around functions that unlock the @@ -516,6 +518,10 @@ struct request_queue { unsigned int rq_timeout; int poll_nsec; + + struct blk_stat_callback *poll_cb; + struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS]; + struct timer_list timeout; struct work_struct timeout_work; struct list_head timeout_list; @@ -573,13 +579,15 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; - struct dentry *mq_debugfs_dir; + struct dentry *sched_debugfs_dir; #endif bool mq_sysfs_init_done; size_t cmd_size; void *rq_alloc_data; + + struct work_struct release_work; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -610,6 +618,8 @@ struct request_queue { #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ +#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ +#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -918,6 +928,7 @@ extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); +extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); @@ -963,7 +974,7 @@ extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, uns extern int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); -extern int blk_execute_rq(struct request_queue *, struct gendisk *, +extern void blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); @@ -1080,20 +1091,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } -/* - * blk_rq_set_prio - associate a request with prio from ioc - * @rq: request of interest - * @ioc: target iocontext - * - * Assocate request prio with ioc prio so request based drivers - * can leverage priority information. - */ -static inline void blk_rq_set_prio(struct request *rq, struct io_context *ioc) -{ - if (ioc) - rq->ioprio = ioc->ioprio; -} - /* * Request issue related functions. */ @@ -1120,13 +1117,10 @@ extern void blk_finish_request(struct request *rq, int error); extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); -extern bool blk_end_request_cur(struct request *rq, int error); -extern bool blk_end_request_err(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, int error); extern bool __blk_end_request_cur(struct request *rq, int error); -extern bool __blk_end_request_err(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); @@ -1329,23 +1323,27 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return bqt->tag_index[tag]; } +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); +extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct page *page); #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ -#define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */ -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, int flags, struct bio **biop); -extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page); + +#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ +#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ + extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, - bool discard); + unsigned flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, bool discard); + sector_t nr_sects, gfp_t gfp_mask, unsigned flags); + static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { @@ -1359,7 +1357,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, return blkdev_issue_zeroout(sb->s_bdev, block << (sb->s_blocksize_bits - 9), nr_blocks << (sb->s_blocksize_bits - 9), - gfp_mask, true); + gfp_mask, 0); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); @@ -1529,19 +1527,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev) return q->limits.discard_alignment; } -static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) -{ - if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) - return 1; - - return 0; -} - -static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) -{ - return queue_discard_zeroes_data(bdev_get_queue(bdev)); -} - static inline unsigned int bdev_write_same(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1726,6 +1711,7 @@ int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); +int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1939,28 +1925,12 @@ static inline bool integrity_req_gap_front_merge(struct request *req, #endif /* CONFIG_BLK_DEV_INTEGRITY */ -/** - * struct blk_dax_ctl - control and output parameters for ->direct_access - * @sector: (input) offset relative to a block_device - * @addr: (output) kernel virtual address for @sector populated by driver - * @pfn: (output) page frame number for @addr populated by driver - * @size: (input) number of bytes requested - */ -struct blk_dax_ctl { - sector_t sector; - void *addr; - long size; - pfn_t pfn; -}; - struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, bool); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, - long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ @@ -1979,9 +1949,6 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); -extern int bdev_dax_supported(struct super_block *, int); -extern bool bdev_dax_capable(struct block_device *); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 909fc033173a7c893ffe7113f0e32568392b76ae..6bb38d76faf42a18437eb7565380b75a71096f78 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -35,6 +35,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); + u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); }; struct bpf_map { @@ -49,12 +50,7 @@ struct bpf_map { const struct bpf_map_ops *ops; struct work_struct work; atomic_t usercnt; -}; - -struct bpf_map_type_list { - struct list_head list_node; - const struct bpf_map_ops *ops; - enum bpf_map_type type; + struct bpf_map *inner_map_meta; }; /* function argument constraints */ @@ -167,12 +163,8 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog); -}; - -struct bpf_prog_type_list { - struct list_head list_node; - const struct bpf_verifier_ops *ops; - enum bpf_prog_type type; + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); }; struct bpf_prog_aux { @@ -231,11 +223,21 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -void bpf_register_prog_type(struct bpf_prog_type_list *tl); -void bpf_register_map_type(struct bpf_map_type_list *tl); +#define BPF_PROG_TYPE(_id, _ops) \ + extern const struct bpf_verifier_ops _ops; +#define BPF_MAP_TYPE(_id, _ops) \ + extern const struct bpf_map_ops _ops; +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); @@ -275,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); void bpf_fd_array_map_clear(struct bpf_map *map); +int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags); /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. @@ -295,10 +299,6 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else -static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) -{ -} - static inline struct bpf_prog *bpf_prog_get(u32 ufd) { return ERR_PTR(-EOPNOTSUPP); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h new file mode 100644 index 0000000000000000000000000000000000000000..03bf223f18be0001c80f4307ed4f521361984104 --- /dev/null +++ b/include/linux/bpf_types.h @@ -0,0 +1,36 @@ +/* internal file - do not include directly */ + +#ifdef CONFIG_NET +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) +#endif +#ifdef CONFIG_BPF_EVENTS +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) +#endif + +BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) +#ifdef CONFIG_CGROUPS +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) +#endif +BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) +#ifdef CONFIG_PERF_EVENTS +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) +#endif +BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a13b031dc6b807f38c0e7e687ba2cfbfe4c65fb7..d5093b52b4855f5ec0fbd6d29df265da80945a68 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -40,6 +40,9 @@ struct bpf_reg_state { */ s64 min_value; u64 max_value; + u32 min_align; + u32 aux_off; + u32 aux_off_align; }; enum bpf_stack_slot_type { @@ -66,7 +69,10 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -84,6 +90,7 @@ struct bpf_verifier_env { struct bpf_prog *prog; /* eBPF program being verified */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ + bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 55e517130311980b36ad628054043130f88626af..abcda9b458ab65143acb1fb28b50225adbec83fd 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -25,6 +25,9 @@ #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 +#define PHY_ID_BCM7260 0xae025190 +#define PHY_ID_BCM7268 0xae025090 +#define PHY_ID_BCM7271 0xae0253b0 #define PHY_ID_BCM7278 0xae0251a0 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 79591c3660cc1dc61e1e3f4b501c470161c41eb9..bd029e52ef5ee232f77ff28ef3a7b4ec391f1d71 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -196,8 +196,6 @@ void ll_rw_block(int, int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int _submit_bh(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags); int submit_bh(int, int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/bug.h b/include/linux/bug.h index 5828489309bbd22a255f11064418dc3a6b9366de..687b557fc5eb9fca13f4dadd3643eb769886da67 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -105,7 +105,7 @@ static inline int is_warning_bug(const struct bug_entry *bug) return bug->flags & BUGFLAG_WARNING; } -const struct bug_entry *find_bug(unsigned long bugaddr); +struct bug_entry *find_bug(unsigned long bugaddr); enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); diff --git a/include/linux/can/core.h b/include/linux/can/core.h index df08a41d5be5f26cfa4cdc74935f5eae7fa51385..c9a17bb1221c326f7569d423d93304fe241f72e0 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -5,7 +5,7 @@ * * Authors: Oliver Hartkopp * Urs Thuermann - * Copyright (c) 2002-2007 Volkswagen Group Electronic Research + * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * */ @@ -17,7 +17,7 @@ #include #include -#define CAN_VERSION "20120528" +#define CAN_VERSION "20170425" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "9" @@ -45,12 +45,13 @@ struct can_proto { extern int can_proto_register(const struct can_proto *cp); extern void can_proto_unregister(const struct can_proto *cp); -int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, +int can_rx_register(struct net *net, struct net_device *dev, + canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, char *ident, struct sock *sk); -extern void can_rx_unregister(struct net_device *dev, canid_t can_id, - canid_t mask, +extern void can_rx_unregister(struct net *net, struct net_device *dev, + canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data); diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h new file mode 100644 index 0000000000000000000000000000000000000000..46dceef2cfa6c0efa6d4a52df5467dba6b6a7c2c --- /dev/null +++ b/include/linux/can/dev/peak_canfd.h @@ -0,0 +1,308 @@ +/* + * CAN driver for PEAK System micro-CAN based adapters + * + * Copyright (C) 2003-2011 PEAK System-Technik GmbH + * Copyright (C) 2011-2013 Stephane Grosjean + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef PUCAN_H +#define PUCAN_H + +/* uCAN commands opcodes list (low-order 10 bits) */ +#define PUCAN_CMD_NOP 0x000 +#define PUCAN_CMD_RESET_MODE 0x001 +#define PUCAN_CMD_NORMAL_MODE 0x002 +#define PUCAN_CMD_LISTEN_ONLY_MODE 0x003 +#define PUCAN_CMD_TIMING_SLOW 0x004 +#define PUCAN_CMD_TIMING_FAST 0x005 +#define PUCAN_CMD_SET_STD_FILTER 0x006 +#define PUCAN_CMD_RESERVED2 0x007 +#define PUCAN_CMD_FILTER_STD 0x008 +#define PUCAN_CMD_TX_ABORT 0x009 +#define PUCAN_CMD_WR_ERR_CNT 0x00a +#define PUCAN_CMD_SET_EN_OPTION 0x00b +#define PUCAN_CMD_CLR_DIS_OPTION 0x00c +#define PUCAN_CMD_RX_BARRIER 0x010 +#define PUCAN_CMD_END_OF_COLLECTION 0x3ff + +/* uCAN received messages list */ +#define PUCAN_MSG_CAN_RX 0x0001 +#define PUCAN_MSG_ERROR 0x0002 +#define PUCAN_MSG_STATUS 0x0003 +#define PUCAN_MSG_BUSLOAD 0x0004 + +#define PUCAN_MSG_CACHE_CRITICAL 0x0102 + +/* uCAN transmitted messages */ +#define PUCAN_MSG_CAN_TX 0x1000 + +/* uCAN command common header */ +struct __packed pucan_command { + __le16 opcode_channel; + u16 args[3]; +}; + +/* return the opcode from the opcode_channel field of a command */ +static inline u16 pucan_cmd_get_opcode(struct pucan_command *c) +{ + return le16_to_cpu(c->opcode_channel) & 0x3ff; +} + +#define PUCAN_TSLOW_BRP_BITS 10 +#define PUCAN_TSLOW_TSGEG1_BITS 8 +#define PUCAN_TSLOW_TSGEG2_BITS 7 +#define PUCAN_TSLOW_SJW_BITS 7 + +#define PUCAN_TSLOW_BRP_MASK ((1 << PUCAN_TSLOW_BRP_BITS) - 1) +#define PUCAN_TSLOW_TSEG1_MASK ((1 << PUCAN_TSLOW_TSGEG1_BITS) - 1) +#define PUCAN_TSLOW_TSEG2_MASK ((1 << PUCAN_TSLOW_TSGEG2_BITS) - 1) +#define PUCAN_TSLOW_SJW_MASK ((1 << PUCAN_TSLOW_SJW_BITS) - 1) + +/* uCAN TIMING_SLOW command fields */ +#define PUCAN_TSLOW_SJW_T(s, t) (((s) & PUCAN_TSLOW_SJW_MASK) | \ + ((!!(t)) << 7)) +#define PUCAN_TSLOW_TSEG2(t) ((t) & PUCAN_TSLOW_TSEG2_MASK) +#define PUCAN_TSLOW_TSEG1(t) ((t) & PUCAN_TSLOW_TSEG1_MASK) +#define PUCAN_TSLOW_BRP(b) ((b) & PUCAN_TSLOW_BRP_MASK) + +struct __packed pucan_timing_slow { + __le16 opcode_channel; + + u8 ewl; /* Error Warning limit */ + u8 sjw_t; /* Sync Jump Width + Triple sampling */ + u8 tseg2; /* Timing SEGment 2 */ + u8 tseg1; /* Timing SEGment 1 */ + + __le16 brp; /* BaudRate Prescaler */ +}; + +#define PUCAN_TFAST_BRP_BITS 10 +#define PUCAN_TFAST_TSGEG1_BITS 5 +#define PUCAN_TFAST_TSGEG2_BITS 4 +#define PUCAN_TFAST_SJW_BITS 4 + +#define PUCAN_TFAST_BRP_MASK ((1 << PUCAN_TFAST_BRP_BITS) - 1) +#define PUCAN_TFAST_TSEG1_MASK ((1 << PUCAN_TFAST_TSGEG1_BITS) - 1) +#define PUCAN_TFAST_TSEG2_MASK ((1 << PUCAN_TFAST_TSGEG2_BITS) - 1) +#define PUCAN_TFAST_SJW_MASK ((1 << PUCAN_TFAST_SJW_BITS) - 1) + +/* uCAN TIMING_FAST command fields */ +#define PUCAN_TFAST_SJW(s) ((s) & PUCAN_TFAST_SJW_MASK) +#define PUCAN_TFAST_TSEG2(t) ((t) & PUCAN_TFAST_TSEG2_MASK) +#define PUCAN_TFAST_TSEG1(t) ((t) & PUCAN_TFAST_TSEG1_MASK) +#define PUCAN_TFAST_BRP(b) ((b) & PUCAN_TFAST_BRP_MASK) + +struct __packed pucan_timing_fast { + __le16 opcode_channel; + + u8 unused; + u8 sjw; /* Sync Jump Width */ + u8 tseg2; /* Timing SEGment 2 */ + u8 tseg1; /* Timing SEGment 1 */ + + __le16 brp; /* BaudRate Prescaler */ +}; + +/* uCAN FILTER_STD command fields */ +#define PUCAN_FLTSTD_ROW_IDX_BITS 6 + +struct __packed pucan_filter_std { + __le16 opcode_channel; + + __le16 idx; + __le32 mask; /* CAN-ID bitmask in idx range */ +}; + +#define PUCAN_FLTSTD_ROW_IDX_MAX ((1 << PUCAN_FLTSTD_ROW_IDX_BITS) - 1) + +/* uCAN SET_STD_FILTER command fields */ +struct __packed pucan_std_filter { + __le16 opcode_channel; + + u8 unused; + u8 idx; + __le32 mask; /* CAN-ID bitmask in idx range */ +}; + +/* uCAN TX_ABORT commands fields */ +#define PUCAN_TX_ABORT_FLUSH 0x0001 + +struct __packed pucan_tx_abort { + __le16 opcode_channel; + + __le16 flags; + u32 unused; +}; + +/* uCAN WR_ERR_CNT command fields */ +#define PUCAN_WRERRCNT_TE 0x4000 /* Tx error cntr write Enable */ +#define PUCAN_WRERRCNT_RE 0x8000 /* Rx error cntr write Enable */ + +struct __packed pucan_wr_err_cnt { + __le16 opcode_channel; + + __le16 sel_mask; + u8 tx_counter; /* Tx error counter new value */ + u8 rx_counter; /* Rx error counter new value */ + + u16 unused; +}; + +/* uCAN SET_EN/CLR_DIS _OPTION command fields */ +#define PUCAN_OPTION_ERROR 0x0001 +#define PUCAN_OPTION_BUSLOAD 0x0002 +#define PUCAN_OPTION_CANDFDISO 0x0004 + +struct __packed pucan_options { + __le16 opcode_channel; + + __le16 options; + u32 unused; +}; + +/* uCAN received messages global format */ +struct __packed pucan_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; +}; + +/* uCAN flags for CAN/CANFD messages */ +#define PUCAN_MSG_SELF_RECEIVE 0x80 +#define PUCAN_MSG_ERROR_STATE_IND 0x40 /* error state indicator */ +#define PUCAN_MSG_BITRATE_SWITCH 0x20 /* bitrate switch */ +#define PUCAN_MSG_EXT_DATA_LEN 0x10 /* extended data length */ +#define PUCAN_MSG_SINGLE_SHOT 0x08 +#define PUCAN_MSG_LOOPED_BACK 0x04 +#define PUCAN_MSG_EXT_ID 0x02 +#define PUCAN_MSG_RTR 0x01 + +struct __packed pucan_rx_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; + __le32 tag_low; + __le32 tag_high; + u8 channel_dlc; + u8 client; + __le16 flags; + __le32 can_id; + u8 d[0]; +}; + +/* uCAN error types */ +#define PUCAN_ERMSG_BIT_ERROR 0 +#define PUCAN_ERMSG_FORM_ERROR 1 +#define PUCAN_ERMSG_STUFF_ERROR 2 +#define PUCAN_ERMSG_OTHER_ERROR 3 +#define PUCAN_ERMSG_ERR_CNT_DEC 4 + +struct __packed pucan_error_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; + u8 channel_type_d; + u8 code_g; + u8 tx_err_cnt; + u8 rx_err_cnt; +}; + +static inline int pucan_error_get_channel(const struct pucan_error_msg *msg) +{ + return msg->channel_type_d & 0x0f; +} + +#define PUCAN_RX_BARRIER 0x10 +#define PUCAN_BUS_PASSIVE 0x20 +#define PUCAN_BUS_WARNING 0x40 +#define PUCAN_BUS_BUSOFF 0x80 + +struct __packed pucan_status_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; + u8 channel_p_w_b; + u8 unused[3]; +}; + +static inline int pucan_status_get_channel(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & 0x0f; +} + +static inline int pucan_status_is_rx_barrier(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_RX_BARRIER; +} + +static inline int pucan_status_is_passive(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_BUS_PASSIVE; +} + +static inline int pucan_status_is_warning(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_BUS_WARNING; +} + +static inline int pucan_status_is_busoff(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_BUS_BUSOFF; +} + +/* uCAN transmitted message format */ +#define PUCAN_MSG_CHANNEL_DLC(c, d) (((c) & 0xf) | ((d) << 4)) + +struct __packed pucan_tx_msg { + __le16 size; + __le16 type; + __le32 tag_low; + __le32 tag_high; + u8 channel_dlc; + u8 client; + __le16 flags; + __le32 can_id; + u8 d[0]; +}; + +/* build the cmd opcode_channel field with respect to the correct endianness */ +static inline __le16 pucan_cmd_opcode_channel(int index, int opcode) +{ + return cpu_to_le16(((index) << 12) | ((opcode) & 0x3ff)); +} + +/* return the channel number part from any received message channel_dlc field */ +static inline int pucan_msg_get_channel(const struct pucan_rx_msg *msg) +{ + return msg->channel_dlc & 0xf; +} + +/* return the dlc value from any received message channel_dlc field */ +static inline int pucan_msg_get_dlc(const struct pucan_rx_msg *msg) +{ + return msg->channel_dlc >> 4; +} + +static inline int pucan_ermsg_get_channel(const struct pucan_error_msg *msg) +{ + return msg->channel_type_d & 0x0f; +} + +static inline int pucan_stmsg_get_channel(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & 0x0f; +} + +#endif diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h deleted file mode 100644 index a52f47ca6c8ad9c5159c34b4f568b5d84eccbd18..0000000000000000000000000000000000000000 --- a/include/linux/can/platform/ti_hecc.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _CAN_PLATFORM_TI_HECC_H -#define _CAN_PLATFORM_TI_HECC_H - -/* - * TI HECC (High End CAN Controller) driver platform header - * - * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed as is WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -/** - * struct hecc_platform_data - HECC Platform Data - * - * @scc_hecc_offset: mostly 0 - should really never change - * @scc_ram_offset: SCC RAM offset - * @hecc_ram_offset: HECC RAM offset - * @mbx_offset: Mailbox RAM offset - * @int_line: Interrupt line to use - 0 or 1 - * @version: version for future use - * @transceiver_switch: platform specific callback fn for transceiver control - * - * Platform data structure to get all platform specific settings. - * this structure also accounts the fact that the IP may have different - * RAM and mailbox offsets for different SOC's - */ -struct ti_hecc_platform_data { - u32 scc_hecc_offset; - u32 scc_ram_offset; - u32 hecc_ram_offset; - u32 mbx_offset; - u32 int_line; - u32 version; - void (*transceiver_switch) (int); -}; -#endif /* !_CAN_PLATFORM_TI_HECC_H */ diff --git a/include/linux/ccp.h b/include/linux/ccp.h index c41b8d99dd0e7f352bc8e57e4e2ffc4dd08c63b7..3285c944194adcccdde5ae31745c939471260da6 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -123,6 +123,10 @@ enum ccp_aes_mode { CCP_AES_MODE_CFB, CCP_AES_MODE_CTR, CCP_AES_MODE_CMAC, + CCP_AES_MODE_GHASH, + CCP_AES_MODE_GCTR, + CCP_AES_MODE_GCM, + CCP_AES_MODE_GMAC, CCP_AES_MODE__LAST, }; @@ -137,6 +141,9 @@ enum ccp_aes_action { CCP_AES_ACTION_ENCRYPT, CCP_AES_ACTION__LAST, }; +/* Overloaded field */ +#define CCP_AES_GHASHAAD CCP_AES_ACTION_DECRYPT +#define CCP_AES_GHASHFINAL CCP_AES_ACTION_ENCRYPT /** * struct ccp_aes_engine - CCP AES operation @@ -181,6 +188,8 @@ struct ccp_aes_engine { struct scatterlist *cmac_key; /* K1/K2 cmac key required for * final cmac cmd */ u32 cmac_key_len; /* In bytes */ + + u32 aad_len; /* In bytes */ }; /***** XTS-AES engine *****/ @@ -249,6 +258,8 @@ enum ccp_sha_type { CCP_SHA_TYPE_1 = 1, CCP_SHA_TYPE_224, CCP_SHA_TYPE_256, + CCP_SHA_TYPE_384, + CCP_SHA_TYPE_512, CCP_SHA_TYPE__LAST, }; @@ -290,6 +301,60 @@ struct ccp_sha_engine { * final sha cmd */ }; +/***** 3DES engine *****/ +enum ccp_des3_mode { + CCP_DES3_MODE_ECB = 0, + CCP_DES3_MODE_CBC, + CCP_DES3_MODE_CFB, + CCP_DES3_MODE__LAST, +}; + +enum ccp_des3_type { + CCP_DES3_TYPE_168 = 1, + CCP_DES3_TYPE__LAST, + }; + +enum ccp_des3_action { + CCP_DES3_ACTION_DECRYPT = 0, + CCP_DES3_ACTION_ENCRYPT, + CCP_DES3_ACTION__LAST, +}; + +/** + * struct ccp_des3_engine - CCP SHA operation + * @type: Type of 3DES operation + * @mode: cipher mode + * @action: 3DES operation (decrypt/encrypt) + * @key: key to be used for this 3DES operation + * @key_len: length of key (in bytes) + * @iv: IV to be used for this AES operation + * @iv_len: length in bytes of iv + * @src: input data to be used for this operation + * @src_len: length of input data used for this operation (in bytes) + * @dst: output data produced by this operation + * + * Variables required to be set when calling ccp_enqueue_cmd(): + * - type, mode, action, key, key_len, src, dst, src_len + * - iv, iv_len for any mode other than ECB + * + * The iv variable is used as both input and output. On completion of the + * 3DES operation the new IV overwrites the old IV. + */ +struct ccp_des3_engine { + enum ccp_des3_type type; + enum ccp_des3_mode mode; + enum ccp_des3_action action; + + struct scatterlist *key; + u32 key_len; /* In bytes */ + + struct scatterlist *iv; + u32 iv_len; /* In bytes */ + + struct scatterlist *src, *dst; + u64 src_len; /* In bytes */ +}; + /***** RSA engine *****/ /** * struct ccp_rsa_engine - CCP RSA operation @@ -539,7 +604,7 @@ struct ccp_ecc_engine { enum ccp_engine { CCP_ENGINE_AES = 0, CCP_ENGINE_XTS_AES_128, - CCP_ENGINE_RSVD1, + CCP_ENGINE_DES3, CCP_ENGINE_SHA, CCP_ENGINE_RSA, CCP_ENGINE_PASSTHRU, @@ -587,6 +652,7 @@ struct ccp_cmd { union { struct ccp_aes_engine aes; struct ccp_xts_aes_engine xts; + struct ccp_des3_engine des3; struct ccp_sha_engine sha; struct ccp_rsa_engine rsa; struct ccp_passthru_engine passthru; diff --git a/include/linux/cdev.h b/include/linux/cdev.h index f8763615a5f2dc87073cbaa49e64a9e6a1ebe5e4..408bc09ce497bb14fdd058b11debbdda7c22fa78 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -4,6 +4,7 @@ #include #include #include +#include struct file_operations; struct inode; @@ -26,6 +27,10 @@ void cdev_put(struct cdev *p); int cdev_add(struct cdev *, dev_t, unsigned); +void cdev_set_parent(struct cdev *p, struct kobject *kobj); +int cdev_device_add(struct cdev *cdev, struct device *dev); +void cdev_device_del(struct cdev *cdev, struct device *dev); + void cdev_del(struct cdev *); void cd_forget(struct inode *); diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index aa2e19182d990eedda70de69bf638db5856811d1..51c5bd64bd0022c8a3f197d846c23448e7bb6a6e 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -3,6 +3,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include + #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG /* @@ -12,12 +14,10 @@ */ # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) -extern const char *ceph_file_part(const char *s, int len); # define dout(fmt, ...) \ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ - ceph_file_part(__FILE__, sizeof(__FILE__)), \ - __LINE__, ##__VA_ARGS__) + kbasename(__FILE__), __LINE__, ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index ae2f66833762cd7d63bd77eafb9aed13b6ab2308..fd8b2953c78f85539104fd3741e82a9f37e1e1b5 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -105,8 +105,10 @@ static inline u64 ceph_sanitize_features(u64 features) */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_FLOCK | \ CEPH_FEATURE_SUBSCRIBE2 | \ CEPH_FEATURE_RECONNECT_SEQ | \ + CEPH_FEATURE_DIRLAYOUTHASH | \ CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ @@ -114,11 +116,13 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_MDSENC | \ CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ CEPH_FEATURE_OSDMAP_ENC | \ + CEPH_FEATURE_MDS_INLINE_DATA | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f4b2ee18f38cbd51d2ded16380e15da7697d9f89..ad078ebe25d6beaad4a80cc906be78c3dd26c06e 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -365,6 +365,19 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_READDIR_FRAG_END (1<<0) #define CEPH_READDIR_FRAG_COMPLETE (1<<8) #define CEPH_READDIR_HASH_ORDER (1<<9) +#define CEPH_READDIR_OFFSET_HASH (1<<10) + +/* + * open request flags + */ +#define CEPH_O_RDONLY 00000000 +#define CEPH_O_WRONLY 00000001 +#define CEPH_O_RDWR 00000002 +#define CEPH_O_CREAT 00000100 +#define CEPH_O_EXCL 00000200 +#define CEPH_O_TRUNC 00001000 +#define CEPH_O_DIRECTORY 00200000 +#define CEPH_O_NOFOLLOW 00400000 union ceph_mds_request_args { struct { @@ -384,6 +397,7 @@ union ceph_mds_request_args { __le32 max_entries; /* how many dentries to grab */ __le32 max_bytes; __le16 flags; + __le32 offset_hash; } __attribute__ ((packed)) readdir; struct { __le32 mode; diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h index 84884d8d4710bc568b1590e2ede3775c863e63a1..0594d3bba774c5a78c0bc7e4fde5fbd741bd1c17 100644 --- a/include/linux/ceph/cls_lock_client.h +++ b/include/linux/ceph/cls_lock_client.h @@ -37,6 +37,11 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc, struct ceph_object_locator *oloc, char *lock_name, char *cookie, struct ceph_entity_name *locker); +int ceph_cls_set_cookie(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 type, char *old_cookie, + char *tag, char *new_cookie); void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 88cd5dc8e238a2fa7e8c66a034a8e5b0ae248f21..3229ae6c78469019f0eb747c4e09a10137e26b0f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -161,7 +162,7 @@ struct ceph_client { * dirtied. */ struct ceph_snap_context { - atomic_t nref; + refcount_t nref; u64 seq; u32 num_snaps; u64 snaps[]; @@ -262,10 +263,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client); extern void ceph_destroy_options(struct ceph_options *opt); extern int ceph_compare_options(struct ceph_options *new_opt, struct ceph_client *client); -extern struct ceph_client *ceph_create_client(struct ceph_options *opt, - void *private, - u64 supported_features, - u64 required_features); +struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private); struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client); u64 ceph_client_gid(struct ceph_client *client); extern void ceph_destroy_client(struct ceph_client *client); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 8ed5dc505fbb2e0672efd81089d1e35caf8e062c..d5f783f3226a8deb43f9336e7fe3c1b955b0e137 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,6 +25,7 @@ struct ceph_mdsmap { u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; u32 m_max_mds; /* size of m_addr, m_state arrays */ + int m_num_mds; struct ceph_mds_info *m_info; /* which object pools file data can be stored in */ @@ -40,7 +41,7 @@ struct ceph_mdsmap { static inline struct ceph_entity_addr * ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) { - if (w >= m->m_max_mds) + if (w >= m->m_num_mds) return NULL; return &m->m_info[w].addr; } @@ -48,14 +49,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) { BUG_ON(w < 0); - if (w >= m->m_max_mds) + if (w >= m->m_num_mds) return CEPH_MDS_STATE_DNE; return m->m_info[w].state; } static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) { - if (w >= 0 && w < m->m_max_mds) + if (w >= 0 && w < m->m_num_mds) return m->m_info[w].laggy; return false; } diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c125b5d9e13ceddacd921286f19133607835dee1..85650b415e73ff38487a6b2e6be7253466411a14 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); /* a given osd we're communicating with */ struct ceph_osd { - atomic_t o_ref; + refcount_t o_ref; struct ceph_osd_client *o_osdc; int o_osd; int o_incarnation; @@ -186,12 +187,12 @@ struct ceph_osd_request { struct timespec r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ bool r_linger; /* don't resend on failure */ + bool r_abort_on_full; /* return ENOSPC when full */ /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ int r_attempts; - struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; u32 r_map_dne_bound; @@ -266,6 +267,7 @@ struct ceph_osd_client { struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ spinlock_t osd_lru_lock; + u32 epoch_barrier; struct ceph_osd homeless_osd; atomic64_t last_tid; /* tid of last request */ u64 last_linger_id; @@ -304,6 +306,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); +void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); extern void osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index 13d71fe18b0cf54f6097242fd08b9a30c6c48bbe..75a7db21457de5e6fce1e75cfca3749ff539057c 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -2,7 +2,7 @@ #define __FS_CEPH_PAGELIST_H #include -#include +#include #include #include @@ -13,7 +13,7 @@ struct ceph_pagelist { size_t room; struct list_head free_list; size_t num_pages_free; - atomic_t refcnt; + refcount_t refcnt; }; struct ceph_pagelist_cursor { @@ -30,7 +30,7 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) pl->room = 0; INIT_LIST_HEAD(&pl->free_list); pl->num_pages_free = 0; - atomic_set(&pl->refcnt, 1); + refcount_set(&pl->refcnt, 1); } extern void ceph_pagelist_release(struct ceph_pagelist *pl); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6a3f850cababb6f96130503014d83d256f2b9213..ec47101cb1bf80f0867dbcff1d6aa10878df7418 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ enum { CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ CSS_VISIBLE = (1 << 3), /* css is visible to userland */ + CSS_DYING = (1 << 4), /* css is dying */ }; /* bits in struct cgroup flags field */ @@ -106,9 +108,6 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; - /* PI: the parent css */ - struct cgroup_subsys_state *parent; - /* siblings list anchored at the parent's ->children */ struct list_head sibling; struct list_head children; @@ -138,6 +137,12 @@ struct cgroup_subsys_state { /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; + + /* + * PI: the parent css. Placed here for cache proximity to following + * fields of the containing structure. + */ + struct cgroup_subsys_state *parent; }; /* @@ -156,7 +161,7 @@ struct css_set { struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; /* reference count */ - atomic_t refcount; + refcount_t refcount; /* the default cgroup associated with this css_set */ struct cgroup *dfl_cgrp; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index af9c86e958bdad3be90cfb5f19aad99ede457339..710a005c6b7a652bb9c32b5457dbd64196f6f4a0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,11 +17,11 @@ #include #include #include -#include #include #include #include #include +#include #include @@ -343,6 +343,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css) return true; } +/** + * css_is_dying - test whether the specified css is dying + * @css: target css + * + * Test whether @css is in the process of offlining or already offline. In + * most cases, ->css_online() and ->css_offline() callbacks should be + * enough; however, the actual offline operations are RCU delayed and this + * test returns %true also when @css is scheduled to be offlined. + * + * This is useful, for example, when the use case requires synchronous + * behavior with respect to cgroup removal. cgroup removal schedules css + * offlining but the css can seem alive while the operation is being + * delayed. If the delay affects user visible semantics, this test can be + * used to resolve the situation. + */ +static inline bool css_is_dying(struct cgroup_subsys_state *css) +{ + return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); +} + /** * css_put - put a css reference * @css: target css @@ -661,7 +681,7 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { - atomic_t count; + refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -696,12 +716,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { - if (ns && atomic_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->count)) free_cgroup_ns(ns); } diff --git a/include/linux/clk.h b/include/linux/clk.h index e9d36b3e49de5b1bfa0f01277c093222e1ae2a6d..024cd07870d0e240c48f1eeb2793659150a57eef 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -132,8 +132,8 @@ int clk_get_phase(struct clk *clk); * @q: clk compared against p * * Returns true if the two struct clk pointers both point to the same hardware - * clock node. Put differently, returns true if struct clk *p and struct clk *q - * share the same struct clk_core object. + * clock node. Put differently, returns true if @p and @q + * share the same &struct clk_core object. * * Returns false otherwise. Note that two NULL clks are treated as matching. */ diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 7007a5f480802ebcad8951b113f976801981a063..d23c9cf26993b5f4518710c2495f426cb48abf85 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -125,5 +125,8 @@ extern void tegra210_xusb_pll_hw_control_enable(void); extern void tegra210_xusb_pll_hw_sequence_start(void); extern void tegra210_sata_pll_hw_control_enable(void); extern void tegra210_sata_pll_hw_sequence_start(void); +extern void tegra210_set_sata_pll_seq_sw(bool state); +extern void tegra210_put_utmipll_in_iddq(void); +extern void tegra210_put_utmipll_out_iddq(void); #endif /* __LINUX_CLK_TEGRA_H_ */ diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 6110fe09ed18c289881e63d3b8e1ecb62dfb08c9..d18da839b81013b0624bfcddb11451460e39bee9 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -18,6 +18,18 @@ #include #include +/** + * struct clk_omap_reg - OMAP register declaration + * @offset: offset from the master IP module base address + * @index: index of the master IP module + */ +struct clk_omap_reg { + void __iomem *ptr; + u16 offset; + u8 index; + u8 flags; +}; + /** * struct dpll_data - DPLL registers and integration data * @mult_div1_reg: register containing the DPLL M and N bitfields @@ -67,12 +79,12 @@ * can be placed into read-only space. */ struct dpll_data { - void __iomem *mult_div1_reg; + struct clk_omap_reg mult_div1_reg; u32 mult_mask; u32 div1_mask; struct clk_hw *clk_bypass; struct clk_hw *clk_ref; - void __iomem *control_reg; + struct clk_omap_reg control_reg; u32 enable_mask; unsigned long last_rounded_rate; u16 last_rounded_m; @@ -84,8 +96,8 @@ struct dpll_data { u16 max_divider; unsigned long max_rate; u8 modes; - void __iomem *autoidle_reg; - void __iomem *idlest_reg; + struct clk_omap_reg autoidle_reg; + struct clk_omap_reg idlest_reg; u32 autoidle_mask; u32 freqsel_mask; u32 idlest_mask; @@ -113,10 +125,10 @@ struct clk_hw_omap; */ struct clk_hw_omap_ops { void (*find_idlest)(struct clk_hw_omap *oclk, - void __iomem **idlest_reg, + struct clk_omap_reg *idlest_reg, u8 *idlest_bit, u8 *idlest_val); void (*find_companion)(struct clk_hw_omap *oclk, - void __iomem **other_reg, + struct clk_omap_reg *other_reg, u8 *other_bit); void (*allow_idle)(struct clk_hw_omap *oclk); void (*deny_idle)(struct clk_hw_omap *oclk); @@ -129,8 +141,6 @@ struct clk_hw_omap_ops { * @enable_bit: bitshift to write to enable/disable the clock (see @enable_reg) * @flags: see "struct clk.flags possibilities" above * @clksel_reg: for clksel clks, register va containing src/divisor select - * @clksel_mask: bitmask in @clksel_reg for the src/divisor selector - * @clksel: for clksel clks, pointer to struct clksel for this clock * @dpll_data: for DPLLs, pointer to struct dpll_data for this clock * @clkdm_name: clockdomain name that this clock is contained in * @clkdm: pointer to struct clockdomain, resolved from @clkdm_name at runtime @@ -141,12 +151,10 @@ struct clk_hw_omap { struct list_head node; unsigned long fixed_rate; u8 fixed_div; - void __iomem *enable_reg; + struct clk_omap_reg enable_reg; u8 enable_bit; u8 flags; - void __iomem *clksel_reg; - u32 clksel_mask; - const struct clksel *clksel; + struct clk_omap_reg clksel_reg; struct dpll_data *dpll_data; const char *clkdm_name; struct clockdomain *clkdm; @@ -172,7 +180,6 @@ struct clk_hw_omap { * should be used. This is a temporary solution - a better approach * would be to associate clock type-specific data with the clock, * similar to the struct dpll_data approach. - * MEMMAP_ADDRESSING: Use memmap addressing to access clock registers. */ #define ENABLE_REG_32BIT (1 << 0) /* Use 32-bit access */ #define CLOCK_IDLE_CONTROL (1 << 1) @@ -180,7 +187,6 @@ struct clk_hw_omap { #define ENABLE_ON_INIT (1 << 3) /* Enable upon framework init */ #define INVERT_ENABLE (1 << 4) /* 0 enables, 1 disables */ #define CLOCK_CLKOUTX2 (1 << 5) -#define MEMMAP_ADDRESSING (1 << 6) /* CM_CLKEN_PLL*.EN* bit values - not all are available for every DPLL */ #define DPLL_LOW_POWER_STOP 0x1 @@ -201,22 +207,13 @@ enum { CLK_MAX_MEMMAPS }; -/** - * struct clk_omap_reg - OMAP register declaration - * @offset: offset from the master IP module base address - * @index: index of the master IP module - */ -struct clk_omap_reg { - u16 offset; - u16 index; -}; - /** * struct ti_clk_ll_ops - low-level ops for clocks * @clk_readl: pointer to register read function * @clk_writel: pointer to register write function * @clkdm_clk_enable: pointer to clockdomain enable function * @clkdm_clk_disable: pointer to clockdomain disable function + * @clkdm_lookup: pointer to clockdomain lookup function * @cm_wait_module_ready: pointer to CM module wait ready function * @cm_split_idlest_reg: pointer to CM module function to split idlest reg * @@ -227,20 +224,20 @@ struct clk_omap_reg { * operations not provided directly by clock drivers. */ struct ti_clk_ll_ops { - u32 (*clk_readl)(void __iomem *reg); - void (*clk_writel)(u32 val, void __iomem *reg); + u32 (*clk_readl)(const struct clk_omap_reg *reg); + void (*clk_writel)(u32 val, const struct clk_omap_reg *reg); int (*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk); int (*clkdm_clk_disable)(struct clockdomain *clkdm, struct clk *clk); + struct clockdomain * (*clkdm_lookup)(const char *name); int (*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg, u8 idlest_shift); - int (*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst, - u8 *idlest_reg_id); + int (*cm_split_idlest_reg)(struct clk_omap_reg *idlest_reg, + s16 *prcm_inst, u8 *idlest_reg_id); }; #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) -void omap2_init_clk_clkdm(struct clk_hw *clk); int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 6d7edc3082f98466566cd1ae67640d7e0bd976d3..acc9ce05e5f022cdee1dd9e22019e6461ddf6475 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -182,7 +182,6 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *e extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); -extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index cfc75848a35d2c58ed85720fcd344fdc2c6f04ad..f2b10d9ebd04e7d49d032b5cb097e7f2ae630e56 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -120,7 +120,7 @@ struct clocksource { #define CLOCK_SOURCE_RESELECT 0x100 /* simplify initialization of mask field */ -#define CLOCKSOURCE_MASK(bits) (u64)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) +#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0) static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from) { diff --git a/include/linux/cma.h b/include/linux/cma.h index 03f32d0bd1d8a7be5a0fddbc058e0a7f6b059efe..3e8fbf5a5c73ae1b0a40308de1158df65988c2fa 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -21,15 +21,19 @@ struct cma; extern unsigned long totalcma_pages; extern phys_addr_t cma_get_base(const struct cma *cma); extern unsigned long cma_get_size(const struct cma *cma); +extern const char *cma_get_name(const struct cma *cma); extern int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, struct cma **res_cma); + bool fixed, const char *name, struct cma **res_cma); extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, + const char *name, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); + +extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); #endif diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 5b8721efa948ec7d93fa7caeb79a535c51e8d69b..31e4e1f1547cc1698514d854898110446ba4a4f7 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -15,7 +15,6 @@ struct venus_comm { struct list_head vc_processing; int vc_inuse; struct super_block *vc_sb; - struct backing_dev_info bdi; struct mutex vc_mutex; }; diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1a3fd3ea75161f5bc93627772515c..1c5f3152cbb57796caf1a587103ed3d092324a04 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -295,6 +295,13 @@ struct compat_old_sigaction { }; #endif +struct compat_keyctl_kdf_params { + compat_uptr_t hashname; + compat_uptr_t otherinfo; + __u32 otherinfolen; + __u32 __spare[8]; +}; + struct compat_statfs; struct compat_statfs64; struct compat_old_linux_dirent; @@ -528,11 +535,6 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); -#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 -asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user *dirent, - unsigned int count); -#endif asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); asmlinkage long compat_sys_open(const char __user *filename, int flags, @@ -723,6 +725,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index de179993e039d41d7e9034b5744be40954f81c09..d614c5ea1b5ea4a1772594b35525f1e64aec68ff 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -15,3 +15,11 @@ * with any version that can compile the kernel */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +/* + * GCC does not warn about unused static inline functions for + * -Wunused-function. This turns out to avoid the need for complex #ifdef + * directives. Suppress the warning in clang as well. + */ +#undef inline +#define inline inline __attribute__((unused)) notrace diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2319b8c108e87b9e87c11cc4c9aa314d24eb0364..c9670904968329ce3032825d55b28a72cb8b223c 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -74,7 +74,8 @@ extern void config_item_init_type_name(struct config_item *item, const char *name, struct config_item_type *type); -extern struct config_item * config_item_get(struct config_item *); +extern struct config_item *config_item_get(struct config_item *); +extern struct config_item *config_item_get_unless_zero(struct config_item *); extern void config_item_put(struct config_item *); struct config_item_type { diff --git a/include/linux/console.h b/include/linux/console.h index 5949d185558990daed3b4c76bbdfdad20fb34608..b8920a031a3e357d71101905e3396ba245160781 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -212,4 +212,6 @@ extern bool vgacon_text_force(void); static inline bool vgacon_text_force(void) { return false; } #endif +extern void console_init(void); + #endif /* _LINUX_CONSOLE_H */ diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 2a5982c37dfb660d344777c79dcde044ca304f1c..035c16c9a5051afccd8e77db5417335866812c8d 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -201,7 +201,7 @@ struct coresight_ops_sink { void *sink_config); unsigned long (*reset_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost); + void *sink_config); void (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 87165f06a3079dcfe507674e495859c5627f74ca..a5ce0bbeadb5de5a1d28bb085258d6e4ae94abbf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -120,6 +120,13 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Preferred average time interval between consecutive invocations of + * the driver to set the frequency for this policy. To be set by the + * scaling driver (0, which is the default, means no preference). + */ + unsigned int transition_delay_us; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 62d240e962f036ad72d1ddf96933dd41f41acaf2..0f2a80377520ec1d12299c78775d7162c63ac8c2 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -94,6 +94,7 @@ enum cpuhp_state { CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, + CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, @@ -137,6 +138,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_CCN_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, + CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 96f1e88b767c0058bee7f3dbcc0d175e68735cfe..2404ad238c0b8c0309e8c1480339a19967968edf 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -40,9 +40,9 @@ extern int nr_cpu_ids; #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids +#define nr_cpumask_bits ((unsigned int)nr_cpu_ids) #else -#define nr_cpumask_bits NR_CPUS +#define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif /* @@ -667,6 +667,11 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); +static inline bool cpumask_available(cpumask_var_t mask) +{ + return mask != NULL; +} + #else typedef struct cpumask cpumask_var_t[1]; @@ -708,6 +713,11 @@ static inline void free_cpumask_var(cpumask_var_t mask) static inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } + +static inline bool cpumask_available(cpumask_var_t mask) +{ + return true; +} #endif /* CONFIG_CPUMASK_OFFSTACK */ /* It's common to want to use cpu_all_mask in struct member initializers, diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 611fce58d67039387215073978c02fda572c93fd..119a3f9604b0d90b499bdbd6627d30fbc936469b 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -42,7 +42,7 @@ static inline void cpuset_dec(void) extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_update_active_cpus(bool cpu_online); +extern void cpuset_update_active_cpus(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -155,7 +155,7 @@ static inline bool cpusets_enabled(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_update_active_cpus(bool cpu_online) +static inline void cpuset_update_active_cpus(void) { partition_sched_domains(1, NULL, NULL); } diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h new file mode 100644 index 0000000000000000000000000000000000000000..541a197ba4a2107af28dfed68a2625e7bebf8f7c --- /dev/null +++ b/include/linux/crash_core.h @@ -0,0 +1,69 @@ +#ifndef LINUX_CRASH_CORE_H +#define LINUX_CRASH_CORE_H + +#include +#include +#include + +#define CRASH_CORE_NOTE_NAME "CORE" +#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) + +#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + CRASH_CORE_NOTE_NAME_BYTES + \ + CRASH_CORE_NOTE_DESC_BYTES) + +#define VMCOREINFO_BYTES (4096) +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + VMCOREINFO_NOTE_NAME_BYTES + \ + VMCOREINFO_BYTES) + +typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; + +void crash_save_vmcoreinfo(void); +void arch_crash_save_vmcoreinfo(void); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); +phys_addr_t paddr_vmcoreinfo_note(void); + +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(name)) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_LENGTH(name, value) \ + vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) +#define VMCOREINFO_NUMBER(name) \ + vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) +#define VMCOREINFO_CONFIG(name) \ + vmcoreinfo_append_str("CONFIG_%s=y\n", #name) + +extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +extern size_t vmcoreinfo_size; +extern size_t vmcoreinfo_max_size; + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len); +void final_note(Elf_Word *buf); + +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); + +#endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index c0b0cf3d2d2fac2ec58ac9f0b39f7fd30b552ffc..84da9978e9516a591fea2c1ad33af4c7db98a7b5 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -123,7 +123,7 @@ /* * Miscellaneous stuff. */ -#define CRYPTO_MAX_ALG_NAME 64 +#define CRYPTO_MAX_ALG_NAME 128 /* * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h index 3252799832cf933cba189710fb443cdf89061902..df4d3e943d288ed7acd8b27defe890cdd345c8f4 100644 --- a/include/linux/cryptohash.h +++ b/include/linux/cryptohash.h @@ -10,9 +10,4 @@ void sha_init(__u32 *buf); void sha_transform(__u32 *digest, const char *data, __u32 *W); -#define MD5_DIGEST_WORDS 4 -#define MD5_MESSAGE_BYTES 64 - -void md5_transform(__u32 *hash, __u32 const *in); - #endif diff --git a/include/linux/dax.h b/include/linux/dax.h index d8a3dc042e1cbb81f1c3a906ee6fc0d28fead8f8..5ec1f6c47716d6fe7c750456cef81cc0e321b1bc 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,6 +7,74 @@ #include struct iomap_ops; +struct dax_device; +struct dax_operations { + /* + * direct_access: translate a device-relative + * logical-page-offset into an absolute physical pfn. Return the + * number of pages available for DAX at that pfn. + */ + long (*direct_access)(struct dax_device *, pgoff_t, long, + void **, pfn_t *); +}; + +#if IS_ENABLED(CONFIG_DAX) +struct dax_device *dax_get_by_host(const char *host); +void put_dax(struct dax_device *dax_dev); +#else +static inline struct dax_device *dax_get_by_host(const char *host) +{ + return NULL; +} + +static inline void put_dax(struct dax_device *dax_dev) +{ +} +#endif + +int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); +#if IS_ENABLED(CONFIG_FS_DAX) +int __bdev_dax_supported(struct super_block *sb, int blocksize); +static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +{ + return __bdev_dax_supported(sb, blocksize); +} + +static inline struct dax_device *fs_dax_get_by_host(const char *host) +{ + return dax_get_by_host(host); +} + +static inline void fs_put_dax(struct dax_device *dax_dev) +{ + put_dax(dax_dev); +} + +#else +static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +{ + return -EOPNOTSUPP; +} + +static inline struct dax_device *fs_dax_get_by_host(const char *host) +{ + return NULL; +} + +static inline void fs_put_dax(struct dax_device *dax_dev) +{ +} +#endif + +int dax_read_lock(void); +void dax_read_unlock(int id); +struct dax_device *alloc_dax(void *private, const char *host, + const struct dax_operations *ops); +bool dax_alive(struct dax_device *dax_dev); +void kill_dax(struct dax_device *dax_dev); +void *dax_get_private(struct dax_device *dax_dev); +long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, + void **kaddr, pfn_t *pfn); /* * We use lowest available bit in exceptional entry for locking, one bit for @@ -41,24 +109,19 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); -int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); #ifdef CONFIG_FS_DAX -struct page *read_dax_sector(struct block_device *bdev, sector_t n); -int __dax_zero_page_range(struct block_device *bdev, sector_t sector, +int __dax_zero_page_range(struct block_device *bdev, + struct dax_device *dax_dev, sector_t sector, unsigned int offset, unsigned int length); #else -static inline struct page *read_dax_sector(struct block_device *bdev, - sector_t n) -{ - return ERR_PTR(-ENXIO); -} static inline int __dax_zero_page_range(struct block_device *bdev, - sector_t sector, unsigned int offset, unsigned int length) + struct dax_device *dax_dev, sector_t sector, + unsigned int offset, unsigned int length) { return -ENXIO; } diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 7dff776e6d160027b34d56fd7bf13030a07ebaea..9174b0d285824a879611e4ff93b5445d15cc7bfb 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -74,7 +74,7 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ .write = debugfs_attr_write, \ - .llseek = generic_file_llseek, \ + .llseek = no_llseek, \ } #if defined(CONFIG_DEBUG_FS) diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h index 7009b8bec77bb4bb305b79c4c1c8437ba61d7078..3f033c48071e65d6bb11af2897d3f234b47643c5 100644 --- a/include/linux/dell-led.h +++ b/include/linux/dell-led.h @@ -1,10 +1,6 @@ #ifndef __DELL_LED_H__ #define __DELL_LED_H__ -enum { - DELL_LED_MICMUTE, -}; - -int dell_app_wmi_led_set(int whichled, int on); +int dell_micmute_led_set(int on); #endif diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index e0acb0e5243b49552480ed8cfac2d037226f4304..6c220e4ebb6b9d8441a811b822a10333fee21498 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -27,6 +27,7 @@ #define DEVFREQ_POSTCHANGE (1) struct devfreq; +struct devfreq_governor; /** * struct devfreq_dev_status - Data given from devfreq user device to @@ -100,35 +101,6 @@ struct devfreq_dev_profile { unsigned int max_state; }; -/** - * struct devfreq_governor - Devfreq policy governor - * @node: list node - contains registered devfreq governors - * @name: Governor's name - * @immutable: Immutable flag for governor. If the value is 1, - * this govenror is never changeable to other governor. - * @get_target_freq: Returns desired operating frequency for the device. - * Basically, get_target_freq will run - * devfreq_dev_profile.get_dev_status() to get the - * status of the device (load = busy_time / total_time). - * If no_central_polling is set, this callback is called - * only with update_devfreq() notified by OPP. - * @event_handler: Callback for devfreq core framework to notify events - * to governors. Events include per device governor - * init and exit, opp changes out of devfreq, suspend - * and resume of per device devfreq during device idle. - * - * Note that the callbacks are called with devfreq->lock locked by devfreq. - */ -struct devfreq_governor { - struct list_head node; - - const char name[DEVFREQ_NAME_LEN]; - const unsigned int immutable; - int (*get_target_freq)(struct devfreq *this, unsigned long *freq); - int (*event_handler)(struct devfreq *devfreq, - unsigned int event, void *data); -}; - /** * struct devfreq - Device devfreq structure * @node: list node - contains the devices with devfreq that have been diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index c35d0c0e0ada7a4e99363d217a86fc4cad8dafaf..4635f95000a4c50bff074ac5bb2f174985f7477a 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -34,6 +34,23 @@ * If get_dynamic_power() is NULL, then the * dynamic power is calculated as * @dyn_power_coeff * frequency * voltage^2 + * @get_real_power: When this is set, the framework uses it to ask the + * device driver for the actual power. + * Some devices have more sophisticated methods + * (like power counters) to approximate the actual power + * that they use. + * This function provides more accurate data to the + * thermal governor. When the driver does not provide + * such function, framework just uses pre-calculated + * table and scale the power by 'utilization' + * (based on 'busy_time' and 'total_time' taken from + * devfreq 'last_status'). + * The value returned by this function must be lower + * or equal than the maximum power value + * for the current state + * (which can be found in power_table[state]). + * When this interface is used, the power_table holds + * max total (static + dynamic) power value for each OPP. */ struct devfreq_cooling_power { unsigned long (*get_static_power)(struct devfreq *devfreq, @@ -41,6 +58,8 @@ struct devfreq_cooling_power { unsigned long (*get_dynamic_power)(struct devfreq *devfreq, unsigned long freq, unsigned long voltage); + int (*get_real_power)(struct devfreq *df, u32 *power, + unsigned long freq, unsigned long voltage); unsigned long dyn_power_coeff; }; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7e6903866fdc98689bb5189cffd4b8cb7a715fa..f4c639c0c362fd4c5106c5a6d2d6ceae1f3c795a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -22,11 +22,13 @@ struct bio_vec; /* * Type of table, mapped_device's mempool and request_queue */ -#define DM_TYPE_NONE 0 -#define DM_TYPE_BIO_BASED 1 -#define DM_TYPE_REQUEST_BASED 2 -#define DM_TYPE_MQ_REQUEST_BASED 3 -#define DM_TYPE_DAX_BIO_BASED 4 +enum dm_queue_mode { + DM_TYPE_NONE = 0, + DM_TYPE_BIO_BASED = 1, + DM_TYPE_REQUEST_BASED = 2, + DM_TYPE_MQ_REQUEST_BASED = 3, + DM_TYPE_DAX_BIO_BASED = 4, +}; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; @@ -128,13 +130,15 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); * < 0 : error * >= 0 : the number of bytes accessible at the address */ -typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, - void **kaddr, pfn_t *pfn, long size); +typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn); +#define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); struct dm_dev { struct block_device *bdev; + struct dax_device *dax_dev; fmode_t mode; char name[16]; }; @@ -176,7 +180,7 @@ struct target_type { dm_busy_fn busy; dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; - dm_direct_access_fn direct_access; + dm_dax_direct_access_fn direct_access; /* For internal device-mapper use. */ struct list_head list; @@ -221,6 +225,18 @@ struct target_type { */ typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio); +/* + * A target implements own bio data integrity. + */ +#define DM_TARGET_INTEGRITY 0x00000010 +#define dm_target_has_integrity(type) ((type)->features & DM_TARGET_INTEGRITY) + +/* + * A target passes integrity data to the lower device. + */ +#define DM_TARGET_PASSES_INTEGRITY 0x00000020 +#define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY) + struct dm_target { struct dm_table *table; struct target_type *type; @@ -254,6 +270,12 @@ struct dm_target { */ unsigned num_write_same_bios; + /* + * The number of WRITE ZEROES bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. + */ + unsigned num_write_zeroes_bios; + /* * The minimum number of extra bytes allocated in each io for the * target to use. @@ -290,11 +312,6 @@ struct dm_target { * on max_io_len boundary. */ bool split_discard_bios:1; - - /* - * Set if this target does not return zeroes on discarded blocks. - */ - bool discard_zeroes_data_unsupported:1; }; /* Each target can link one of these into the table */ @@ -464,7 +481,7 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback * Useful for "hybrid" target (supports both bio-based * and request-based). */ -void dm_table_set_type(struct dm_table *t, unsigned type); +void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type); /* * Finally call this to make the table ready for use. @@ -578,6 +595,7 @@ extern struct ratelimit_state dm_ratelimit_state; /* * Definitions of return values from target end_io function. */ +#define DM_ENDIO_DONE 0 #define DM_ENDIO_INCOMPLETE 1 #define DM_ENDIO_REQUEUE 2 @@ -588,6 +606,7 @@ extern struct ratelimit_state dm_ratelimit_state; #define DM_MAPIO_REMAPPED 1 #define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE #define DM_MAPIO_DELAY_REQUEUE 3 +#define DM_MAPIO_KILL 4 #define dm_sector_div64(x, y)( \ { \ diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 6048fa404e571165b40a7964cb1408f413bba9f0..a5195a7d6f77e40d23d29ba2793c7b393f4eb0bb 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) * * Function returns NULL if no refcount could be obtained, or the fence. * This function handles acquiring a reference to a fence that may be - * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), + * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU), * so long as the caller is using RCU on the pointer to the fence. * * An alternative mechanism is to employ a seqlock to protect a bunch of @@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) * have successfully acquire a reference to it. If it no * longer matches, we are holding a reference to some other * reallocated pointer. This is possible if the allocator - * is using a freelist like SLAB_DESTROY_BY_RCU where the + * is using a freelist like SLAB_TYPESAFE_BY_RCU where the * fence remains valid for the RCU grace period, but it * may be reallocated. When using such allocators, we are * responsible for ensuring the reference we get is to diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5725c94b1f121ece268c0fd8558cb94770c2c9cd..92f20832fd28770c16ab9d34c5560efce2a888c6 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ #include #ifdef CONFIG_IOMMU_DMA +#include #include #include @@ -71,11 +72,13 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); #else struct iommu_domain; struct msi_msg; +struct device; static inline int iommu_dma_init(void) { @@ -100,6 +103,10 @@ static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg) { } +static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __KERNEL__ */ #endif /* __DMA_IOMMU_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0977317c6835c2526428f61c12fcfab976650b99..4f3eecedca2d7c75c683cc2f08d64be9ac95118d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -728,6 +728,18 @@ dma_mark_declared_memory_occupied(struct device *dev, } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ +#ifdef CONFIG_HAS_DMA +int dma_configure(struct device *dev); +void dma_deconfigure(struct device *dev); +#else +static inline int dma_configure(struct device *dev) +{ + return 0; +} + +static inline void dma_deconfigure(struct device *dev) {} +#endif + /* * Managed DMA API */ diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 187c102997226d4107f2dd703413c105e980e39f..90884072fa73254f5da8bb2091c6c3d2bded2f20 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -39,6 +39,7 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 5e9c74cf889481ddbf6ae3a9caef166bd6ae6dee..9bbf21a516e4aa339aff03e9c2091043bb265fba 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -136,7 +136,7 @@ static inline int dmi_name_in_vendors(const char *s) { return 0; } static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *), - void *private_data) { return -1; } + void *private_data) { return -ENXIO; } static inline bool dmi_match(enum dmi_field f, const char *str) { return false; } static inline void dmi_memdev_name(u16 handle, const char **bank, diff --git a/include/linux/edac.h b/include/linux/edac.h index 5b6adf964248dd478dd6d266ccc59468d26951ce..8ae0f45fafd650f57b10b9db50cef9d91578506c 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -28,12 +28,10 @@ struct device; #define EDAC_OPSTATE_INT 2 extern int edac_op_state; -extern int edac_err_assert; -extern atomic_t edac_handlers; -extern int edac_handler_set(void); -extern void edac_atomic_assert_error(void); -extern struct bus_type *edac_get_sysfs_subsys(void); +struct bus_type *edac_get_sysfs_subsys(void); +int edac_get_report_status(void); +void edac_set_report_status(int new); enum { EDAC_REPORTING_ENABLED, @@ -41,28 +39,6 @@ enum { EDAC_REPORTING_FORCE }; -extern int edac_report_status; -#ifdef CONFIG_EDAC -static inline int get_edac_report_status(void) -{ - return edac_report_status; -} - -static inline void set_edac_report_status(int new) -{ - edac_report_status = new; -} -#else -static inline int get_edac_report_status(void) -{ - return EDAC_REPORTING_DISABLED; -} - -static inline void set_edac_report_status(int new) -{ -} -#endif - static inline void opstate_init(void) { switch (edac_op_state) { diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h index 2fd3993c370b273c9ae8c0f58604a1280527f832..e6f624b53c3d89c6ba575bb61a3d868a251473ae 100644 --- a/include/linux/efi-bgrt.h +++ b/include/linux/efi-bgrt.h @@ -6,6 +6,7 @@ #ifdef CONFIG_ACPI_BGRT void efi_bgrt_init(struct acpi_table_header *table); +int __init acpi_parse_bgrt(struct acpi_table_header *table); /* The BGRT data itself; only valid if bgrt_image != NULL. */ extern size_t bgrt_image_size; @@ -14,6 +15,10 @@ extern struct acpi_table_bgrt bgrt_tab; #else /* !CONFIG_ACPI_BGRT */ static inline void efi_bgrt_init(struct acpi_table_header *table) {} +static inline int __init acpi_parse_bgrt(struct acpi_table_header *table) +{ + return 0; +} #endif /* !CONFIG_ACPI_BGRT */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 94d34e0be24f55477573f48806de2d2817fe9722..ec36f42a2adde07cd9ca9b37ddaaf9442dccee31 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1435,9 +1435,6 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) /* prototypes shared between arch specific and generic stub code */ -#define pr_efi(sys_table, msg) efi_printk(sys_table, "EFI stub: "msg) -#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg) - void efi_printk(efi_system_table_t *sys_table_arg, char *str); void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, @@ -1471,7 +1468,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, unsigned long *load_addr, unsigned long *load_size); -efi_status_t efi_parse_options(char *cmdline); +efi_status_t efi_parse_options(char const *cmdline); efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 22d39e8d4de16be2b69762cf24c92799dfb445d6..0e306c5a86d6ee90debc824aa5d18e8f6d078f4d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -8,6 +8,9 @@ struct io_cq; struct elevator_type; +#ifdef CONFIG_BLK_DEBUG_FS +struct blk_mq_debugfs_attr; +#endif /* * Return values from elevator merger @@ -93,6 +96,8 @@ struct blk_mq_hw_ctx; struct elevator_mq_ops { int (*init_sched)(struct request_queue *, struct elevator_type *); void (*exit_sched)(struct elevator_queue *); + int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); + void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); @@ -104,7 +109,7 @@ struct elevator_mq_ops { void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); - void (*completed_request)(struct blk_mq_hw_ctx *, struct request *); + void (*completed_request)(struct request *); void (*started_request)(struct request *); void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); @@ -142,9 +147,13 @@ struct elevator_type char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; bool uses_mq; +#ifdef CONFIG_BLK_DEBUG_FS + const struct blk_mq_debugfs_attr *queue_debugfs_attrs; + const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; +#endif /* managed by elevator core */ - char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ + char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */ struct list_head list; }; @@ -212,7 +221,6 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct request_queue *, struct elevator_queue *); -extern int elevator_change(struct request_queue *, const char *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, struct elevator_type *); diff --git a/include/linux/elf.h b/include/linux/elf.h index 20fa8d8ae31335744c4566eb8b6305997b44db8b..ba069e8f4f7873b002da2d632d81520eb2a662fd 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -29,6 +29,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elf_note elf32_note #define elf_addr_t Elf32_Off #define Elf_Half Elf32_Half +#define Elf_Word Elf32_Word #else @@ -39,6 +40,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elf_note elf64_note #define elf_addr_t Elf64_Off #define Elf_Half Elf64_Half +#define Elf_Word Elf64_Word #endif diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c62b709b1ce087b7891f5d9c76aa2940b7f4a9a9..2d9f80848d4bd2b6e60dc06f1053ba91256c37ac 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -446,21 +446,6 @@ static inline void eth_addr_dec(u8 *addr) u64_to_ether_addr(u, addr); } -/** - * ether_addr_greater - Compare two Ethernet addresses - * @addr1: Pointer to a six-byte array containing the Ethernet address - * @addr2: Pointer other six-byte array containing the Ethernet address - * - * Compare two Ethernet addresses, returns true addr1 is greater than addr2 - */ -static inline bool ether_addr_greater(const u8 *addr1, const u8 *addr2) -{ - u64 u1 = ether_addr_to_u64(addr1); - u64 u2 = ether_addr_to_u64(addr2); - - return u1 > u2; -} - /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9ded8c6d8176b909cf68da0e125eef4441b7c9a9..83cc9863444b078765255b9010b015578768be09 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -60,6 +60,7 @@ enum ethtool_phys_id_state { enum { ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */ ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */ + ETH_RSS_HASH_CRC32_BIT, /* Configurable RSS hash function - Crc32 */ /* * Add your fresh new hash function bits above and remember to update @@ -73,6 +74,7 @@ enum { #define ETH_RSS_HASH_TOP __ETH_RSS_HASH(TOP) #define ETH_RSS_HASH_XOR __ETH_RSS_HASH(XOR) +#define ETH_RSS_HASH_CRC32 __ETH_RSS_HASH(CRC32) #define ETH_RSS_HASH_UNKNOWN 0 #define ETH_RSS_HASH_NO_CHANGE 0 diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 7010fb01a81a342e20abd9ac917d5c3f7aa3de14..7e206a9f88db81d2feb152ef4a98a9f0119af09e 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -236,11 +236,11 @@ extern int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); /* - * Following APIs are to monitor every action of a notifier. - * Registrar gets notified for every external port of a connection device. - * Probably this could be used to debug an action of notifier; however, - * we do not recommend to use this for normal 'notifiee' device drivers who - * want to be notified by a specific external port of the notifier. + * Following APIs are to monitor the status change of the external connectors. + * extcon_register_notifier(*edev, id, *nb) : Register a notifier block + * for specific external connector of the extcon. + * extcon_register_notifier_all(*edev, *nb) : Register a notifier block + * for all supported external connectors of the extcon. */ extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); @@ -253,6 +253,17 @@ extern void devm_extcon_unregister_notifier(struct device *dev, struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); +extern int extcon_register_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb); +extern int extcon_unregister_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb); +extern int devm_extcon_register_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb); +extern void devm_extcon_unregister_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb); + /* * Following API get the extcon device from devicetree. * This function use phandle of devicetree to get extcon device directly. diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index e2d239ed4c60cb05fbf6072e1a3a3ea4e34594f3..b6feed6547ce92862acaf3153c5859d3eb132ccb 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -32,9 +32,9 @@ /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) -#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) -#define F2FS_META_INO(sbi) (sbi->meta_ino_num) +#define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num) +#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) +#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ @@ -114,6 +114,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 #define CP_CRC_RECOVERY_FLAG 0x00000040 #define CP_FASTBOOT_FLAG 0x00000020 @@ -161,7 +162,7 @@ struct f2fs_checkpoint { */ #define F2FS_ORPHANS_PER_BLOCK 1020 -#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \ +#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) struct f2fs_orphan_block { @@ -301,6 +302,12 @@ struct f2fs_nat_block { #define SIT_VBLOCK_MAP_SIZE 64 #define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +/* + * F2FS uses 4 bytes to represent block address. As a result, supported size of + * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. + */ +#define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) + /* * Note that f2fs_sit_entry->vblocks has the following bit-field information. * [15:10] : allocation type such as CURSEG_XXXX_TYPE @@ -449,7 +456,7 @@ typedef __le32 f2fs_hash_t; #define F2FS_SLOT_LEN 8 #define F2FS_SLOT_LEN_BITS 3 -#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) +#define GET_DENTRY_SLOTS(x) (((x) + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) /* MAX level for dir lookup */ #define MAX_DIR_HASH_DEPTH 63 diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 76ce329e656d1eaca5a6d6792f97d3c067fa37c6..1b48d9c9a5613cc1b619444e2046c36dcfeda872 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -3,6 +3,12 @@ #include +/* list of all valid flags for the open/openat flags argument: */ +#define VALID_OPEN_FLAGS \ + (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ + O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \ + FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) #ifndef force_o_largefile #define force_o_largefile() (BITS_PER_LONG != 32) diff --git a/include/linux/filter.h b/include/linux/filter.h index fbf7b39e81035506b73ddc860e3029119104b255..62d948f80730fdd94c587b4f0235c72cf5d3399a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -18,7 +19,9 @@ #include -#include +#ifdef CONFIG_ARCH_HAS_SET_MEMORY +#include +#endif #include #include @@ -269,6 +272,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ @@ -412,8 +425,7 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - xdp_adjust_head:1; /* Adjusting pkt head? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ @@ -430,7 +442,7 @@ struct bpf_prog { }; struct sk_filter { - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; @@ -693,6 +705,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool ebpf_jit_enabled(void) +{ + return bpf_jit_enable && bpf_jit_is_ebpf(); +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return fp->jited && bpf_jit_is_ebpf(); @@ -753,6 +770,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp); #else /* CONFIG_BPF_JIT */ +static inline bool ebpf_jit_enabled(void) +{ + return false; +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 8e953c6f394ae58ef5902c2ae4fd44fd5f3c1173..37a5eaea69dde299a18855ffc99b1227513b25da 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -25,7 +25,7 @@ int meson_sm_call(unsigned int cmd_index, u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); -int meson_sm_call_read(void *buffer, unsigned int cmd_index, u32 arg0, u32 arg1, - u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call_read(void *buffer, unsigned int bsize, unsigned int cmd_index, + u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); #endif /* _MESON_SM_FW_H_ */ diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index b6efb0c64408b35b7874726ca22c72f2e5fe21f3..11366b3ff0b4fda30d8ff466c180ed3e23beef5e 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -61,16 +61,83 @@ struct flex_array { FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ } +/** + * flex_array_alloc() - Creates a flexible array. + * @element_size: individual object size. + * @total: maximum number of objects which can be stored. + * @flags: GFP flags + * + * Return: Returns an object of structure flex_array. + */ struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); + +/** + * flex_array_prealloc() - Ensures that memory for the elements indexed in the + * range defined by start and nr_elements has been allocated. + * @fa: array to allocate memory to. + * @start: start address + * @nr_elements: number of elements to be allocated. + * @flags: GFP flags + * + */ int flex_array_prealloc(struct flex_array *fa, unsigned int start, unsigned int nr_elements, gfp_t flags); + +/** + * flex_array_free() - Removes all elements of a flexible array. + * @fa: array to be freed. + */ void flex_array_free(struct flex_array *fa); + +/** + * flex_array_free_parts() - Removes all elements of a flexible array, but + * leaves the array itself in place. + * @fa: array to be emptied. + */ void flex_array_free_parts(struct flex_array *fa); + +/** + * flex_array_put() - Stores data into a flexible array. + * @fa: array where element is to be stored. + * @element_nr: position to copy, must be less than the maximum specified when + * the array was created. + * @src: data source to be copied into the array. + * @flags: GFP flags + * + * Return: Returns zero on success, a negative error code otherwise. + */ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); + +/** + * flex_array_clear() - Clears an individual element in the array, sets the + * given element to FLEX_ARRAY_FREE. + * @element_nr: element position to clear. + * @fa: array to which element to be cleared belongs. + * + * Return: Returns zero on success, -EINVAL otherwise. + */ int flex_array_clear(struct flex_array *fa, unsigned int element_nr); + +/** + * flex_array_get() - Retrieves data into a flexible array. + * + * @element_nr: Element position to retrieve data from. + * @fa: array from which data is to be retrieved. + * + * Return: Returns a pointer to the data element, or NULL if that + * particular element has never been allocated. + */ void *flex_array_get(struct flex_array *fa, unsigned int element_nr); + +/** + * flex_array_shrink() - Reduces the allocated size of an array. + * @fa: array to shrink. + * + * Return: Returns number of pages of memory actually freed. + * + */ int flex_array_shrink(struct flex_array *fa); #define flex_array_put_ptr(fa, nr, src, gfp) \ diff --git a/include/linux/fpga/altera-pr-ip-core.h b/include/linux/fpga/altera-pr-ip-core.h new file mode 100644 index 0000000000000000000000000000000000000000..3810a9033f496c0ca61715e13736602e37c61e96 --- /dev/null +++ b/include/linux/fpga/altera-pr-ip-core.h @@ -0,0 +1,29 @@ +/* + * Driver for Altera Partial Reconfiguration IP Core + * + * Copyright (C) 2016 Intel Corporation + * + * Based on socfpga-a10.c Copyright (C) 2015-2016 Altera Corporation + * by Alan Tull + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _ALT_PR_IP_CORE_H +#define _ALT_PR_IP_CORE_H +#include + +int alt_pr_register(struct device *dev, void __iomem *reg_base); +int alt_pr_unregister(struct device *dev); + +#endif /* _ALT_PR_IP_CORE_H */ diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 57beb5d09bfcb2b876266ba89c25e7a9ed81734b..b4ac24c4411d0fae1a77aa40024f7b9a5285ad5c 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -70,17 +70,21 @@ enum fpga_mgr_states { */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) #define FPGA_MGR_EXTERNAL_CONFIG BIT(1) +#define FPGA_MGR_ENCRYPTED_BITSTREAM BIT(2) /** * struct fpga_image_info - information specific to a FPGA image * @flags: boolean flags as defined above * @enable_timeout_us: maximum time to enable traffic through bridge (uSec) * @disable_timeout_us: maximum time to disable traffic through bridge (uSec) + * @config_complete_timeout_us: maximum time for FPGA to switch to operating + * status in the write_complete op. */ struct fpga_image_info { u32 flags; u32 enable_timeout_us; u32 disable_timeout_us; + u32 config_complete_timeout_us; }; /** diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8b80b44f28c2051c5bef8e947e6bb..803e5a9b265422d2c2034678331b6d4dd353d1de 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -250,9 +249,8 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; -#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ -#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ -#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct +#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */ +#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct * helper code (eg buffer layer) * to clear GFP_FS from alloc */ @@ -546,6 +544,8 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 +struct fsnotify_mark_connector; + /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -645,7 +645,7 @@ struct inode { #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ - struct hlist_head i_fsnotify_marks; + struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #if IS_ENABLED(CONFIG_FS_ENCRYPTION) @@ -909,6 +909,8 @@ static inline struct file *get_file(struct file *f) #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) + /* * Special return value from posix_lock_file() and vfs_lock_file() for * asynchronous locking. @@ -1429,7 +1431,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); } -extern struct timespec current_fs_time(struct super_block *sb); extern struct timespec current_time(struct inode *inode); /* @@ -2121,6 +2122,9 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); +extern __printf(2, 3) +int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); +extern int super_setup_bdi(struct super_block *sb); extern int current_umask(void); @@ -2921,17 +2925,19 @@ extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { - return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); + return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { - return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { @@ -2996,9 +3002,10 @@ extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; extern void make_empty_dir_inode(struct inode *inode); extern bool is_empty_dir_inode(struct inode *inode); -struct tree_descr { char *name; const struct file_operations *ops; int mode; }; +struct tree_descr { const char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); -extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); +extern int simple_fill_super(struct super_block *, unsigned long, + const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 10c1abfbac6c45d1049fdf9f1b1f133371521461..0a30c106c1e535188637d7947a74f77c933968a1 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -46,17 +46,6 @@ struct fscrypt_symlink_data { char encrypted_path[1]; } __packed; -/** - * This function is used to calculate the disk space required to - * store a filename of length l in encrypted symlink format. - */ -static inline u32 fscrypt_symlink_data_len(u32 l) -{ - if (l < FS_CRYPTO_BLOCK_SIZE) - l = FS_CRYPTO_BLOCK_SIZE; - return (l + sizeof(struct fscrypt_symlink_data) - 1); -} - struct fscrypt_str { unsigned char *name; u32 len; diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 3511ca7988043e75440886f0530abae2dde1f1d8..ec406aed2f2f8c3331d0002fba8a83036e75c08b 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -147,6 +147,15 @@ static inline int fscrypt_fname_usr_to_disk(struct inode *inode, return -EOPNOTSUPP; } +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + /* Encryption support disabled; use standard comparison */ + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + /* bio.c */ static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio) diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index a140f47e9b271212ebc0a6ecc6a1db0527e37afe..cd4e82c17304ffbe97868b4aec146ca623ec2d78 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -57,6 +57,80 @@ extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, struct fscrypt_str *); +#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 + +/* Extracts the second-to-last ciphertext block; see explanation below */ +#define FSCRYPT_FNAME_DIGEST(name, len) \ + ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ + FS_CRYPTO_BLOCK_SIZE)) + +#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE + +/** + * fscrypt_digested_name - alternate identifier for an on-disk filename + * + * When userspace lists an encrypted directory without access to the key, + * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE + * bytes are shown in this abbreviated form (base64-encoded) rather than as the + * full ciphertext (base64-encoded). This is necessary to allow supporting + * filenames up to NAME_MAX bytes, since base64 encoding expands the length. + * + * To make it possible for filesystems to still find the correct directory entry + * despite not knowing the full on-disk name, we encode any filesystem-specific + * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, + * followed by the second-to-last ciphertext block of the filename. Due to the + * use of the CBC-CTS encryption mode, the second-to-last ciphertext block + * depends on the full plaintext. (Note that ciphertext stealing causes the + * last two blocks to appear "flipped".) This makes accidental collisions very + * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they + * share the same filesystem-specific hashes. + * + * However, this scheme isn't immune to intentional collisions, which can be + * created by anyone able to create arbitrary plaintext filenames and view them + * without the key. Making the "digest" be a real cryptographic hash like + * SHA-256 over the full ciphertext would prevent this, although it would be + * less efficient and harder to implement, especially since the filesystem would + * need to calculate it for each directory entry examined during a search. + */ +struct fscrypt_digested_name { + u32 hash; + u32 minor_hash; + u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; +}; + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and a filename in it is + * very long, then we won't have the full disk_name and we'll instead need to + * match against the fscrypt_digested_name. + * + * Return: %true if the name matches, otherwise %false. + */ +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + if (unlikely(!fname->disk_name.name)) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) + return false; + if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) + return false; + return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), + n->digest, FSCRYPT_FNAME_DIGEST_SIZE); + } + + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + /* bio.c */ extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); extern void fscrypt_pullback_bio_page(struct page **, bool); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e6e689b5569e6f2abf00859874645620fadd3155..c6c69318752bd7b8caeecee48496bb39227c2754 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -80,6 +80,7 @@ struct fsnotify_event; struct fsnotify_mark; struct fsnotify_event_private_data; struct fsnotify_fname; +struct fsnotify_iter_info; /* * Each group much define these ops. The fsnotify infrastructure will call @@ -98,10 +99,13 @@ struct fsnotify_ops { struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie); + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); + /* called on final put+free to free memory */ + void (*free_mark)(struct fsnotify_mark *mark); }; /* @@ -163,6 +167,8 @@ struct fsnotify_group { struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ + atomic_t user_waits; /* Number of tasks waiting for user + * response */ /* groups can define private fields here or use the void *private */ union { @@ -194,6 +200,30 @@ struct fsnotify_group { #define FSNOTIFY_EVENT_PATH 1 #define FSNOTIFY_EVENT_INODE 2 +/* + * Inode / vfsmount point to this structure which tracks all marks attached to + * the inode / vfsmount. The reference to inode / vfsmount is held by this + * structure. We destroy this structure when there are no more marks attached + * to it. The structure is protected by fsnotify_mark_srcu. + */ +struct fsnotify_mark_connector { + spinlock_t lock; +#define FSNOTIFY_OBJ_TYPE_INODE 0x01 +#define FSNOTIFY_OBJ_TYPE_VFSMOUNT 0x02 +#define FSNOTIFY_OBJ_ALL_TYPES (FSNOTIFY_OBJ_TYPE_INODE | \ + FSNOTIFY_OBJ_TYPE_VFSMOUNT) + unsigned int flags; /* Type of object [lock] */ + union { /* Object pointer [lock] */ + struct inode *inode; + struct vfsmount *mnt; + }; + union { + struct hlist_head list; + /* Used listing heads to free after srcu period expires */ + struct fsnotify_mark_connector *destroy_next; + }; +}; + /* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events @@ -223,22 +253,16 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [obj_lock] */ + /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; - union { /* Object pointer [mark->lock, group->mark_mutex] */ - struct inode *inode; /* inode this mark is associated with */ - struct vfsmount *mnt; /* vfsmount this mark is associated with */ - }; + /* Head of list of marks for an object [mark ref] */ + struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -#define FSNOTIFY_MARK_FLAG_INODE 0x01 -#define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 -#define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04 -#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 -#define FSNOTIFY_MARK_FLAG_ALIVE 0x10 -#define FSNOTIFY_MARK_FLAG_ATTACHED 0x20 +#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 +#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 +#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 unsigned int flags; /* flags [mark->lock] */ - void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ }; #ifdef CONFIG_FSNOTIFY @@ -315,23 +339,18 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* functions used to manipulate the marks attached to inodes */ -/* run all marks associated with a vfsmount and update mnt->mnt_fsnotify_mask */ -extern void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt); -/* run all marks associated with an inode and update inode->i_fsnotify_mask */ -extern void fsnotify_recalc_inode_mask(struct inode *inode); -extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark)); -/* find (and take a reference) to a mark associated with group and inode */ -extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); -/* find (and take a reference) to a mark associated with group and vfsmount */ -extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt); -/* set the ignored_mask of a mark */ -extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask); -/* set the mask of a mark (might pin the object into memory */ -extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask); -/* attach the mark to both the group and the inode */ -extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, - struct inode *inode, struct vfsmount *mnt, int allow_dups); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, +/* Calculate mask of events for a list of marks */ +extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); +extern void fsnotify_init_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group); +/* Find mark belonging to given group in the list of marks */ +extern struct fsnotify_mark *fsnotify_find_mark( + struct fsnotify_mark_connector __rcu **connp, + struct fsnotify_group *group); +/* attach the mark to the inode or vfsmount */ +extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, + struct vfsmount *mnt, int allow_dups); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, @@ -340,15 +359,23 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); +/* run all the marks in a group, and clear all of the marks attached to given object type */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); /* run all the marks in a group, and clear all of the vfsmount marks */ -extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); +static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); +} /* run all the marks in a group, and clear all of the inode marks */ -extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); -/* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ -extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); +static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); +} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); +extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); +extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); /* put here because inotify does some weird stuff when destroying watches */ extern void fsnotify_init_event(struct fsnotify_event *event, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3633e8beff39e3ad67409a133109350128607ddd..473f088aabeac7b3899c155c778628e389e4d3fe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,8 +42,10 @@ /* Main tracing buffer and events set up */ #ifdef CONFIG_TRACING void trace_init(void); +void early_trace_init(void); #else static inline void trace_init(void) { } +static inline void early_trace_init(void) { } #endif struct module; @@ -70,7 +72,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and * IPMODIFY are a kind of attribute flags which can be set only before * registering the ftrace_ops, and can not be modified while registered. - * Changing those attribute flags after regsitering ftrace_ops will + * Changing those attribute flags after registering ftrace_ops will * cause unexpected results. * * ENABLED - set/unset when ftrace_ops is registered/unregistered @@ -144,6 +146,10 @@ struct ftrace_ops_hash { struct ftrace_hash *filter_hash; struct mutex regex_lock; }; + +void ftrace_free_init_mem(void); +#else +static inline void ftrace_free_init_mem(void) { } #endif /* @@ -260,6 +266,7 @@ static inline int ftrace_nr_registered_ops(void) } static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } +static inline void ftrace_free_init_mem(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER @@ -279,15 +286,45 @@ int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -#endif -struct ftrace_func_command { - struct list_head list; - char *name; - int (*func)(struct ftrace_hash *hash, - char *func, char *cmd, - char *params, int enable); -}; +/* DO NOT MODIFY THIS VARIABLE DIRECTLY! */ +DECLARE_PER_CPU(int, disable_stack_tracer); + +/** + * stack_tracer_disable - temporarily disable the stack tracer + * + * There's a few locations (namely in RCU) where stack tracing + * cannot be executed. This function is used to disable stack + * tracing during those critical sections. + * + * This function must be called with preemption or interrupts + * disabled and stack_tracer_enable() must be called shortly after + * while preemption or interrupts are still disabled. + */ +static inline void stack_tracer_disable(void) +{ + /* Preemption or interupts must be disabled */ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_inc(disable_stack_tracer); +} + +/** + * stack_tracer_enable - re-enable the stack tracer + * + * After stack_tracer_disable() is called, stack_tracer_enable() + * must be called shortly afterward. + */ +static inline void stack_tracer_enable(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_dec(disable_stack_tracer); +} +#else +static inline void stack_tracer_disable(void) { } +static inline void stack_tracer_enable(void) { } +#endif #ifdef CONFIG_DYNAMIC_FTRACE @@ -315,30 +352,6 @@ void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; -struct ftrace_probe_ops { - void (*func)(unsigned long ip, - unsigned long parent_ip, - void **data); - int (*init)(struct ftrace_probe_ops *ops, - unsigned long ip, void **data); - void (*free)(struct ftrace_probe_ops *ops, - unsigned long ip, void **data); - int (*print)(struct seq_file *m, - unsigned long ip, - struct ftrace_probe_ops *ops, - void *data); -}; - -extern int -register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data); -extern void -unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data); -extern void -unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); -extern void unregister_ftrace_function_probe_all(char *glob); - extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); @@ -400,9 +413,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); void ftrace_free_filter(struct ftrace_ops *ops); void ftrace_ops_set_global_filter(struct ftrace_ops *ops); -int register_ftrace_command(struct ftrace_func_command *cmd); -int unregister_ftrace_command(struct ftrace_func_command *cmd); - enum { FTRACE_UPDATE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -433,8 +443,8 @@ enum { FTRACE_ITER_FILTER = (1 << 0), FTRACE_ITER_NOTRACE = (1 << 1), FTRACE_ITER_PRINTALL = (1 << 2), - FTRACE_ITER_DO_HASH = (1 << 3), - FTRACE_ITER_HASH = (1 << 4), + FTRACE_ITER_DO_PROBES = (1 << 3), + FTRACE_ITER_PROBE = (1 << 4), FTRACE_ITER_ENABLED = (1 << 5), }; @@ -618,14 +628,6 @@ static inline void ftrace_enable_daemon(void) { } static inline void ftrace_module_init(struct module *mod) { } static inline void ftrace_module_enable(struct module *mod) { } static inline void ftrace_release_mod(struct module *mod) { } -static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) -{ - return -EINVAL; -} -static inline __init int unregister_ftrace_command(char *cmd_name) -{ - return -EINVAL; -} static inline int ftrace_text_reserved(const void *start, const void *end) { return 0; diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 8bd28ce6d76e2491d10942f60ad77cc15e1edea0..3dff2398a5f0beb8cf0e5f4dd1d139bb727bf480 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -27,4 +27,16 @@ struct fwnode_handle { struct fwnode_handle *secondary; }; +/** + * struct fwnode_endpoint - Fwnode graph endpoint + * @port: Port number + * @id: Endpoint id + * @local_fwnode: reference to the related fwnode + */ +struct fwnode_endpoint { + unsigned int port; + unsigned int id; + const struct fwnode_handle *local_fwnode; +}; + #endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 76f39754e7b0299df616bc3cb909f9a35fce9ea1..acff9437e5c3776e48a9357af40291165719416d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -159,11 +159,11 @@ struct badblocks; #if defined(CONFIG_BLK_DEV_INTEGRITY) struct blk_integrity { - struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -722,11 +722,9 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) #if defined(CONFIG_BLK_DEV_INTEGRITY) extern void blk_integrity_add(struct gendisk *); extern void blk_integrity_del(struct gendisk *); -extern void blk_integrity_revalidate(struct gendisk *); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline void blk_integrity_add(struct gendisk *disk) { } static inline void blk_integrity_del(struct gendisk *disk) { } -static inline void blk_integrity_revalidate(struct gendisk *disk) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ #else /* CONFIG_BLOCK */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index db373b9d322361f7553cfbe36026d918fb9d09ee..a89d37e8b3873cc8e6fa79389a6c0d3a2f4843ab 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,6 +40,11 @@ struct vm_area_struct; #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP 0x2000000u +#else +#define ___GFP_NOLOCKDEP 0 +#endif /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* @@ -179,8 +184,11 @@ struct vm_area_struct; #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT 25 +#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* @@ -202,8 +210,16 @@ struct vm_area_struct; * * GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. * * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. * * GFP_USER is for userspace allocations that also need to be directly * accessibly by the kernel or hardware. It is typically used by hardware @@ -297,8 +313,8 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) /* * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the - * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long - * and there are 16 of them to cover all possible combinations of + * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT + * bits long and there are 16 of them to cover all possible combinations of * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. * * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 933d936566055de430f9db64ae152eb31785b7ff..8f702fcbe4853e349235813c3438493b362fea3c 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -170,14 +170,14 @@ static inline struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_descs *__must_check @@ -191,7 +191,7 @@ static inline struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void gpiod_put(struct gpio_desc *desc) @@ -231,14 +231,14 @@ static inline struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_descs *__must_check @@ -252,7 +252,7 @@ static inline struct gpio_descs *__must_check devm_gpiod_get_array_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 846f3b9894805c482242c8b0117fe2537842e5bb..393582867afddb17e2969a659bedcb975f710208 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -168,7 +168,7 @@ struct gpio_chip { unsigned int irq_base; irq_flow_handler_t irq_handler; unsigned int irq_default_type; - int irq_chained_parent; + unsigned int irq_chained_parent; bool irq_nested; bool irq_need_valid_mask; unsigned long *irq_valid_mask; @@ -244,12 +244,12 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, - int parent_irq, + unsigned int parent_irq, irq_flow_handler_t parent_handler); void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, - int parent_irq); + unsigned int parent_irq); int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, struct irq_chip *irqchip, diff --git a/include/linux/gpio/gpio-reg.h b/include/linux/gpio/gpio-reg.h new file mode 100644 index 0000000000000000000000000000000000000000..90e0b9060e6dfc4c6d4ecf5781f0487ee283b657 --- /dev/null +++ b/include/linux/gpio/gpio-reg.h @@ -0,0 +1,13 @@ +#ifndef GPIO_REG_H +#define GPIO_REG_H + +struct device; +struct irq_domain; + +struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg, + int base, int num, const char *label, u32 direction, u32 def_out, + const char *const *names, struct irq_domain *irqdom, const int *irqs); + +int gpio_reg_resume(struct gpio_chip *gc); + +#endif diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index c0d712d22b079ebc16129ef2618f41762276cf5e..f738d50cc17d3fcaa0b9b7cf681b70dd2646897d 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -56,7 +56,14 @@ struct gpiod_lookup_table { .flags = _flags, \ } +#ifdef CONFIG_GPIOLIB void gpiod_add_lookup_table(struct gpiod_lookup_table *table); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); +#else +static inline +void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} +static inline +void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} +#endif #endif /* __LINUX_GPIO_MACHINE_H */ diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 7ef111d3ecc5561c419dc16ff4c08d88016e9085..f32d7c392c1ec9532a51e801e45a22e4942d0661 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -231,6 +231,8 @@ struct hid_sensor_common { unsigned usage_id; atomic_t data_ready; atomic_t user_requested_state; + int poll_interval; + int raw_hystersis; struct iio_trigger *trigger; int timestamp_ns_scale; struct hid_sensor_hub_attribute_info poll; diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 30c7dc45e45fcc0110dfaae19ff69d523621414b..761f86242473609ed94cee70672e9e639e32e818 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -45,6 +45,14 @@ #define HID_USAGE_SENSOR_DATA_ATMOSPHERIC_PRESSURE 0x200430 #define HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE 0x200431 +/* Tempreture (200033) */ +#define HID_USAGE_SENSOR_TEMPERATURE 0x200033 +#define HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE 0x200434 + +/* humidity */ +#define HID_USAGE_SENSOR_HUMIDITY 0x200032 +#define HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY 0x200433 + /* Gyro 3D: (200076) */ #define HID_USAGE_SENSOR_GYRO_3D 0x200076 #define HID_USAGE_SENSOR_DATA_ANGL_VELOCITY 0x200456 diff --git a/include/linux/hid.h b/include/linux/hid.h index 28f38e2b8f309387b7a983937b50908c3095f7d9..5be325d890d96f9d823753e92296c379e3751076 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -268,6 +268,8 @@ struct hid_item { #define HID_CP_APPLICATIONLAUNCHBUTTONS 0x000c0180 #define HID_CP_GENERICGUIAPPLICATIONCONTROLS 0x000c0200 +#define HID_DG_DEVICECONFIG 0x000d000e +#define HID_DG_DEVICESETTINGS 0x000d0023 #define HID_DG_CONFIDENCE 0x000d0047 #define HID_DG_WIDTH 0x000d0048 #define HID_DG_HEIGHT 0x000d0049 @@ -322,7 +324,7 @@ struct hid_item { #define HID_QUIRK_MULTI_INPUT 0x00000040 #define HID_QUIRK_HIDINPUT_FORCE 0x00000080 #define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 -#define HID_QUIRK_NO_INIT_INPUT_REPORTS 0x00000200 +/* 0x00000200 reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL 0x00000400 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID 0x00020000 @@ -541,7 +543,6 @@ struct hid_device { /* device report descriptor */ struct list_head inputs; /* The list of inputs */ void *hiddev; /* The hiddev structure */ void *hidraw; - int minor; /* Hiddev minor number */ int open; /* is the device open by anyone? */ char name[128]; /* Device name */ diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h index a5dd8148660b7eb15482c99b1a602226007ec840..9216222229578075168262ac1c0cec5f88619f57 100644 --- a/include/linux/hiddev.h +++ b/include/linux/hiddev.h @@ -32,6 +32,18 @@ * In-kernel definitions. */ +struct hiddev { + int minor; + int exist; + int open; + struct mutex existancelock; + wait_queue_head_t wait; + struct hid_device *hid; + struct list_head list; + spinlock_t list_lock; + bool initialized; +}; + struct hid_device; struct hid_usage; struct hid_field; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 1ffbf2a8cb997a7986e0c44c2f72ce52814445f2..3d04aa1dc83e48207bc97bf2bce32f5b53291d24 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -26,6 +26,7 @@ enum host1x_class { HOST1X_CLASS_HOST1X = 0x1, HOST1X_CLASS_GR2D = 0x51, HOST1X_CLASS_GR2D_SB = 0x52, + HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, }; diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 249e579ecd4c4d7ecf06f38e3c0fad75ba46000b..8c5b10eb7265fb221360cc9bb1a4eebf0dde06ca 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -276,8 +276,6 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) return timer->base->cpu_base->hres_active; } -extern void hrtimer_peek_ahead_timers(void); - /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -300,8 +298,6 @@ extern unsigned int hrtimer_resolution; #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline void hrtimer_peek_ahead_timers(void) { } - static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; @@ -456,7 +452,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer, } /* Precise sleep: */ -extern long hrtimer_nanosleep(struct timespec *rqtp, +extern long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid); diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 88b6737491210aaf7a2ba66756672188992902ac..ceb751987c401e4015fc0c70ffb13d8d5b3eef27 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -337,7 +337,7 @@ struct hwmon_ops { int (*read)(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val); int (*read_string)(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, char **str); + u32 attr, int channel, const char **str); int (*write)(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long val); }; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 970771a5f7390268abb5c43ba1e1c8df48f2caa4..e09fc8290c2f02d92689f4499bcf2a22ae8ce8be 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -491,6 +491,12 @@ struct vmbus_channel_rescind_offer { u32 child_relid; } __packed; +static inline u32 +hv_ringbuffer_pending_size(const struct hv_ring_buffer_info *rbi) +{ + return rbi->ring_buffer->pending_send_sz; +} + /* * Request Offer -- no parameters, SynIC message contains the partition ID * Set Snoop -- no parameters, SynIC message contains the partition ID @@ -524,10 +530,10 @@ struct vmbus_channel_open_channel { u32 target_vp; /* - * The upstream ring buffer begins at offset zero in the memory - * described by RingBufferGpadlHandle. The downstream ring buffer - * follows it at this offset (in pages). - */ + * The upstream ring buffer begins at offset zero in the memory + * described by RingBufferGpadlHandle. The downstream ring buffer + * follows it at this offset (in pages). + */ u32 downstream_ringbuffer_pageoffset; /* User-specific data to be passed along to the server endpoint. */ @@ -1013,7 +1019,7 @@ extern int vmbus_open(struct vmbus_channel *channel, u32 recv_ringbuffersize, void *userdata, u32 userdatalen, - void(*onchannel_callback)(void *context), + void (*onchannel_callback)(void *context), void *context); extern void vmbus_close(struct vmbus_channel *channel); @@ -1155,6 +1161,17 @@ static inline void *hv_get_drvdata(struct hv_device *dev) return dev_get_drvdata(&dev->device); } +struct hv_ring_buffer_debug_info { + u32 current_interrupt_mask; + u32 current_read_index; + u32 current_write_index; + u32 bytes_avail_toread; + u32 bytes_avail_towrite; +}; + +void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); + /* Vmbus interface */ #define vmbus_driver_register(driver) \ __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) @@ -1428,7 +1445,7 @@ struct hyperv_service_callback { char *log_msg; uuid_le data; struct vmbus_channel *channel; - void (*callback) (void *context); + void (*callback)(void *context); }; #define MAX_SRV_VER 0x7ffffff @@ -1504,16 +1521,6 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) cached_write_sz = hv_get_cached_bytes_to_write(rbi); if (cached_write_sz < pending_sz) vmbus_setevent(channel); - - return; -} - -static inline void -init_cached_read_index(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *rbi = &channel->inbound; - - rbi->cached_read_index = rbi->ring_buffer->read_index; } /* @@ -1549,76 +1556,48 @@ static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi) /* * An API to support in-place processing of incoming VMBUS packets. */ -#define VMBUS_PKT_TRAILER 8 -static inline struct vmpacket_descriptor * -get_next_pkt_raw(struct vmbus_channel *channel) +/* Get data payload associated with descriptor */ +static inline void *hv_pkt_data(const struct vmpacket_descriptor *desc) { - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 priv_read_loc = ring_info->priv_read_index; - void *ring_buffer = hv_get_ring_buffer(ring_info); - u32 dsize = ring_info->ring_datasize; - /* - * delta is the difference between what is available to read and - * what was already consumed in place. We commit read index after - * the whole batch is processed. - */ - u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? - priv_read_loc - ring_info->ring_buffer->read_index : - (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; - u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); - - if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) - return NULL; - - return ring_buffer + priv_read_loc; + return (void *)((unsigned long)desc + (desc->offset8 << 3)); } -/* - * A helper function to step through packets "in-place" - * This API is to be called after each successful call - * get_next_pkt_raw(). - */ -static inline void put_pkt_raw(struct vmbus_channel *channel, - struct vmpacket_descriptor *desc) +/* Get data size associated with descriptor */ +static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc) { - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 packetlen = desc->len8 << 3; - u32 dsize = ring_info->ring_datasize; - - /* - * Include the packet trailer. - */ - ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; - ring_info->priv_read_index %= dsize; + return (desc->len8 << 3) - (desc->offset8 << 3); } + +struct vmpacket_descriptor * +hv_pkt_iter_first(struct vmbus_channel *channel); + +struct vmpacket_descriptor * +__hv_pkt_iter_next(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt); + +void hv_pkt_iter_close(struct vmbus_channel *channel); + /* - * This call commits the read index and potentially signals the host. - * Here is the pattern for using the "in-place" consumption APIs: - * - * init_cached_read_index(); - * - * while (get_next_pkt_raw() { - * process the packet "in-place"; - * put_pkt_raw(); - * } - * if (packets processed in place) - * commit_rd_index(); + * Get next packet descriptor from iterator + * If at end of list, return NULL and update host. */ -static inline void commit_rd_index(struct vmbus_channel *channel) +static inline struct vmpacket_descriptor * +hv_pkt_iter_next(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt) { - struct hv_ring_buffer_info *ring_info = &channel->inbound; - /* - * Make sure all reads are done before we update the read index since - * the writer may start writing to the read area once the read index - * is updated. - */ - virt_rmb(); - ring_info->ring_buffer->read_index = ring_info->priv_read_index; + struct vmpacket_descriptor *nxt; + + nxt = __hv_pkt_iter_next(channel, pkt); + if (!nxt) + hv_pkt_iter_close(channel); - hv_signal_on_read(channel); + return nxt; } +#define foreach_vmbus_pkt(pkt, channel) \ + for (pkt = hv_pkt_iter_first(channel); pkt; \ + pkt = hv_pkt_iter_next(channel, pkt)) #endif /* _HYPERV_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 6b183521c61697aa3af56082b830247f960d06fb..72d0ece70ed30dba632f8df51a4cc7d229f02300 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -38,6 +38,7 @@ extern struct bus_type i2c_bus_type; extern struct device_type i2c_adapter_type; +extern struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ @@ -149,6 +150,7 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) + * @disable_i2c_core_irq_mapping: Tell the i2c-core to not do irq-mapping * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -212,6 +214,8 @@ struct i2c_driver { int (*detect)(struct i2c_client *, struct i2c_board_info *); const unsigned short *address_list; struct list_head clients; + + bool disable_i2c_core_irq_mapping; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) @@ -303,6 +307,8 @@ static inline int i2c_slave_event(struct i2c_client *client, * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware * @properties: additional device properties for the device + * @resources: resources associated with the device + * @num_resources: number of resources in the @resources array * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and @@ -325,6 +331,8 @@ struct i2c_board_info { struct device_node *of_node; struct fwnode_handle *fwnode; const struct property_entry *properties; + const struct resource *resources; + unsigned int num_resources; int irq; }; @@ -824,11 +832,18 @@ static inline const struct of_device_id #if IS_ENABLED(CONFIG_ACPI) u32 i2c_acpi_find_bus_speed(struct device *dev); +struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, + struct i2c_board_info *info); #else static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; } +static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, + int index, struct i2c_board_info *info) +{ + return NULL; +} #endif /* CONFIG_ACPI */ #endif /* _LINUX_I2C_H */ diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/i2c/i2c-hid.h index 7aa901d920585949b2ecf6c546d65cb004daceea..1fb088239d12b9f4a5a849a2ce662343743c8bd9 100644 --- a/include/linux/i2c/i2c-hid.h +++ b/include/linux/i2c/i2c-hid.h @@ -14,9 +14,13 @@ #include +struct regulator; + /** * struct i2chid_platform_data - used by hid over i2c implementation. * @hid_descriptor_address: i2c register where the HID descriptor is stored. + * @supply: regulator for powering on the device. + * @post_power_delay_ms: delay after powering on before device is usable. * * Note that it is the responsibility of the platform driver (or the acpi 5.0 * driver, or the flattened device tree) to setup the irq related to the gpio in @@ -31,6 +35,8 @@ */ struct i2c_hid_platform_data { u16 hid_descriptor_address; + struct regulator *supply; + int post_power_delay_ms; }; #endif /* __LINUX_I2C_HID_H */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 2f51c1724b5af647423770dfadafaa7a8837b600..6980ca322074b9cd80f69517195a02ea837c3f34 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -88,7 +88,7 @@ static inline bool ata_pm_request(struct request *rq) ide_req(rq)->type == ATA_PRIV_PM_RESUME); } -/* Error codes returned in rq->errors to the higher part of the driver. */ +/* Error codes returned in result to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, IDE_DRV_ERROR_FILEMARK = 102, diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0dd9498c694f95c166508e40896c94ccc724075c..69033353d0d1c2bc28cc831b4cea67ca49c0bfdf 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -7,7 +7,7 @@ * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright (c) 2016 Intel Deutschland GmbH + * Copyright (c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1411,6 +1411,8 @@ struct ieee80211_ht_operation { #define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 #define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 +#define IEEE80211_HT_OP_MODE_CCFS2_SHIFT 5 +#define IEEE80211_HT_OP_MODE_CCFS2_MASK 0x1fe0 /* for stbc_param */ #define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 @@ -1525,14 +1527,14 @@ enum ieee80211_vht_chanwidth { * This structure is the "VHT operation element" as * described in 802.11ac D3.0 8.4.2.161 * @chan_width: Operating channel width + * @center_freq_seg0_idx: center freq segment 0 index * @center_freq_seg1_idx: center freq segment 1 index - * @center_freq_seg2_idx: center freq segment 2 index * @basic_mcs_set: VHT Basic MCS rate set */ struct ieee80211_vht_operation { u8 chan_width; + u8 center_freq_seg0_idx; u8 center_freq_seg1_idx; - u8 center_freq_seg2_idx; __le16 basic_mcs_set; } __packed; @@ -1721,6 +1723,9 @@ enum ieee80211_statuscode { WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, + /* 802.11ai */ + WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 108, + WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, }; @@ -2102,6 +2107,12 @@ enum ieee80211_key_len { #define FILS_NONCE_LEN 16 #define FILS_MAX_KEK_LEN 64 +#define FILS_ERP_MAX_USERNAME_LEN 16 +#define FILS_ERP_MAX_REALM_LEN 253 +#define FILS_ERP_MAX_RRK_LEN 64 + +#define PMK_MAX_LEN 48 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, @@ -2166,37 +2177,37 @@ enum ieee80211_tdls_actioncode { #define WLAN_BSS_COEX_INFORMATION_REQUEST BIT(0) /** - * enum - mesh synchronization method identifier + * enum ieee80211_mesh_sync_method - mesh synchronization method identifier * * @IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET: the default synchronization method * @IEEE80211_SYNC_METHOD_VENDOR: a vendor specific synchronization method * that will be specified in a vendor specific information element */ -enum { +enum ieee80211_mesh_sync_method { IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET = 1, IEEE80211_SYNC_METHOD_VENDOR = 255, }; /** - * enum - mesh path selection protocol identifier + * enum ieee80211_mesh_path_protocol - mesh path selection protocol identifier * * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will * be specified in a vendor specific information element */ -enum { +enum ieee80211_mesh_path_protocol { IEEE80211_PATH_PROTOCOL_HWMP = 1, IEEE80211_PATH_PROTOCOL_VENDOR = 255, }; /** - * enum - mesh path selection metric identifier + * enum ieee80211_mesh_path_metric - mesh path selection metric identifier * * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be * specified in a vendor specific information element */ -enum { +enum ieee80211_mesh_path_metric { IEEE80211_PATH_METRIC_AIRTIME = 1, IEEE80211_PATH_METRIC_VENDOR = 255, }; @@ -2305,6 +2316,32 @@ struct ieee80211_timeout_interval_ie { __le32 value; } __packed; +/** + * enum ieee80211_idle_options - BSS idle options + * @WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE: the station should send an RSN + * protected frame to the AP to reset the idle timer at the AP for + * the station. + */ +enum ieee80211_idle_options { + WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE = BIT(0), +}; + +/** + * struct ieee80211_bss_max_idle_period_ie + * + * This structure refers to "BSS Max idle period element" + * + * @max_idle_period: indicates the time period during which a station can + * refrain from transmitting frames to its associated AP without being + * disassociated. In units of 1000 TUs. + * @idle_options: indicates the options associated with the BSS idle capability + * as specified in &enum ieee80211_idle_options. + */ +struct ieee80211_bss_max_idle_period_ie { + __le16 max_idle_period; + u8 idle_options; +} __packed; + /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, @@ -2345,13 +2382,21 @@ enum ieee80211_sa_query_action { #define WLAN_CIPHER_SUITE_SMS4 SUITE(0x001472, 1) /* AKM suite selectors */ -#define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) -#define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) -#define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) -#define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) -#define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) -#define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) -#define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) +#define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) +#define WLAN_AKM_SUITE_FT_8021X SUITE(0x000FAC, 3) +#define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) +#define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) +#define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) +#define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) +#define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) +#define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) +#define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) +#define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) +#define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) +#define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) +#define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_MAX_KEY_LEN 32 diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index c5847dc75a937a5ceed4a762ffcc9f2644b499bc..0c16866a7aacc0cbc361e8d5b902252f71323f3b 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -48,6 +48,7 @@ struct br_ip_list { #define BR_MCAST_FLOOD BIT(11) #define BR_MULTICAST_TO_UNICAST BIT(12) #define BR_VLAN_TUNNEL BIT(13) +#define BR_BCAST_FLOOD BIT(14) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8d5fcd6284ce0f4702d9eb28703bf2ae80d7e399..283dc2f5364d77c13df5a16ed0474154ee6724ff 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, netdev_features_t features) { - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged_multi(skb)) { + /* In the case of multi-tagged packets, use a direct mask + * instead of using netdev_interesect_features(), to make + * sure that only devices supporting NETIF_F_HW_CSUM will + * have checksum offloading support. + */ + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + } return features; } diff --git a/include/linux/inet.h b/include/linux/inet.h index 4cca05c9678e760a07c0c8dcd88f73cc90ddfdc1..636ebe87e6f886a2edbb6e3e8bd01b360998127a 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -43,6 +43,8 @@ #define _LINUX_INET_H #include +#include +#include /* * These mimic similar macros defined in user-space for inet_ntop(3). @@ -54,4 +56,8 @@ extern __be32 in_aton(const char *str); extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); + +extern int inet_pton_with_scope(struct net *net, unsigned short af, + const char *src, const char *port, struct sockaddr_storage *addr); + #endif /* _LINUX_INET_H */ diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ee971f335a8b659f04e5a3048ca70e5bc361ee5f..a2e9d6ea1349fb85418a9ebafc55dd08d16ca6b0 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -153,8 +153,8 @@ struct in_ifaddr { int register_inetaddr_notifier(struct notifier_block *nb); int unregister_inetaddr_notifier(struct notifier_block *nb); -void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf); +void inet_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv4_devconf *devconf); struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) diff --git a/include/linux/init.h b/include/linux/init.h index 79af0962fd525980064241c653d748f2f9be63c0..94769d687cf07d8b41081f3e966d7319b5a71ee9 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -39,7 +39,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold notrace __latent_entropy +#define __init __section(.init.text) __cold __inittrace __latent_entropy #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) @@ -68,8 +68,10 @@ #ifdef MODULE #define __exitused +#define __inittrace notrace #else #define __exitused __used +#define __inittrace #endif #define __exit __section(.exit.text) __exitused __cold notrace diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 91d9049f003938a77002d76f8a2f0b3313437294..e049526bc188f765c7521653cf6fb90a574bc673 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -181,6 +182,7 @@ extern struct cred init_cred; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = RB_ROOT, \ + .pi_top_task = NULL, \ .pi_waiters_leftmost = NULL, #else # define INIT_RT_MUTEXES(tsk) @@ -202,6 +204,13 @@ extern struct cred init_cred; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_LIVEPATCH +# define INIT_LIVEPATCH(tsk) \ + .patch_state = KLP_UNDEFINED, +#else +# define INIT_LIVEPATCH(tsk) +#endif + #ifdef CONFIG_THREAD_INFO_IN_TASK # define INIT_TASK_TI(tsk) \ .thread_info = INIT_THREAD_INFO(tsk), \ @@ -210,6 +219,12 @@ extern struct cred init_cred; # define INIT_TASK_TI(tsk) #endif +#ifdef CONFIG_SECURITY +#define INIT_TASK_SECURITY .security = NULL, +#else +#define INIT_TASK_SECURITY +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -288,6 +303,8 @@ extern struct cred init_cred; INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ INIT_KASAN(tsk) \ + INIT_LIVEPATCH(tsk) \ + INIT_TASK_SECURITY \ } diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h deleted file mode 100644 index 16625d799b6fc1ca85f89d5d9f81415332c8e86e..0000000000000000000000000000000000000000 --- a/include/linux/input/eeti_ts.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef LINUX_INPUT_EETI_TS_H -#define LINUX_INPUT_EETI_TS_H - -struct eeti_ts_platform_data { - int irq_gpio; - unsigned int irq_active_high; -}; - -#endif /* LINUX_INPUT_EETI_TS_H */ - diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 37b04a0fdea4e004a63c053bfb3c90d5de3612f1..6174733a57ebc60b68fe674d1c271bb8af6d9d51 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -49,6 +49,8 @@ struct matrix_keymap_data { * @wakeup: controls whether the device should be set up as wakeup * source * @no_autorepeat: disable key autorepeat + * @drive_inactive_cols: drive inactive columns during scan, rather than + * making them inputs. * * This structure represents platform-specific data that use used by * matrix_keypad driver to perform proper initialization. @@ -73,6 +75,7 @@ struct matrix_keypad_platform_data { bool active_low; bool wakeup; bool no_autorepeat; + bool drive_inactive_cols; }; int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c573a52ae440e83894709328820bacb79391d15c..485a5b48f0380460fa0b46243e34aa6c83796c07 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -30,6 +30,8 @@ #include #include #include +#include + #include #include @@ -72,24 +74,8 @@ #define OFFSET_STRIDE (9) -#ifdef CONFIG_64BIT #define dmar_readq(a) readq(a) #define dmar_writeq(a,v) writeq(v,a) -#else -static inline u64 dmar_readq(void __iomem *addr) -{ - u32 lo, hi; - lo = readl(addr); - hi = readl(addr + 4); - return (((u64) hi) << 32) + lo; -} - -static inline void dmar_writeq(void __iomem *addr, u64 val) -{ - writel((u32)val, addr); - writel((u32)(val >> 32), addr + 4); -} -#endif #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 53144e78a3691d2d0ea46b48319c5af337f0fda2..a6fba4804672858b87430b81c0af52378bd7ed89 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -155,7 +155,7 @@ extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id); -extern void free_irq(unsigned int, void *); +extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); struct device; diff --git a/include/linux/io.h b/include/linux/io.h index 82ef36eac8a16a8fc2b7b021f345910ac04b04b4..2195d9ea4aaae0c054f04aab2da7cff851d2b997 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -90,6 +90,27 @@ void devm_memunmap(struct device *dev, void *addr); void *__devm_memremap_pages(struct device *dev, struct resource *res); +#ifdef CONFIG_PCI +/* + * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and + * Posting") mandate non-posted configuration transactions. There is + * no ioremap API in the kernel that can guarantee non-posted write + * semantics across arches so provide a default implementation for + * mapping PCI config space that defaults to ioremap_nocache(); arches + * should override it if they have memory mapping implementations that + * guarantee non-posted writes semantics to make the memory mapping + * compliant with the PCI specification. + */ +#ifndef pci_remap_cfgspace +#define pci_remap_cfgspace pci_remap_cfgspace +static inline void __iomem *pci_remap_cfgspace(phys_addr_t offset, + size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif +#endif + /* * Some systems do not have legacy ISA devices. * /dev/port is not a valid interface on these systems. diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 7291810067eb636dfa622b8b385ac45d7a520395..f753e788da3109cc85cd07ac44b2f7e47fcde9b6 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -41,6 +41,7 @@ struct iomap { u16 type; /* type of mapping */ u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ + struct dax_device *dax_dev; /* dax_dev for dax operations */ }; /* diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2e4de0deee531adbd7c1cd8ff1d25e0d0eb98d47..2cb54adc4a334aa3f3a1732c35eaf9749d64b9e8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -19,12 +19,12 @@ #ifndef __LINUX_IOMMU_H #define __LINUX_IOMMU_H +#include +#include +#include #include #include #include -#include -#include -#include #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -32,10 +32,13 @@ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ /* - * This is to make the IOMMU API setup privileged - * mapppings accessible by the master only at higher - * privileged execution level and inaccessible at - * less privileged levels. + * Where the bus hardware includes a privilege level as part of its access type + * markings, and certain devices are capable of issuing transactions marked as + * either 'supervisor' or 'user', the IOMMU_PRIV flag requests that the other + * given permission flags only apply to accesses at the higher privilege level, + * and that unprivileged transactions should have as little access as possible. + * This would usually imply the same permissions as kernel mappings on the CPU, + * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) @@ -336,46 +339,9 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); -/** - * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework - * @domain: the iommu domain where the fault has happened - * @dev: the device where the fault has happened - * @iova: the faulting address - * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) - * - * This function should be called by the low-level IOMMU implementations - * whenever IOMMU faults happen, to allow high-level users, that are - * interested in such events, to know about them. - * - * This event may be useful for several possible use cases: - * - mere logging of the event - * - dynamic TLB/PTE loading - * - if restarting of the faulting device is required - * - * Returns 0 on success and an appropriate error code otherwise (if dynamic - * PTE/TLB loading will one day be supported, implementations will be able - * to tell whether it succeeded or not according to this return value). - * - * Specifically, -ENOSYS is returned if a fault handler isn't installed - * (though fault handlers can also return -ENOSYS, in case they want to - * elicit the default behavior of the IOMMU drivers). - */ -static inline int report_iommu_fault(struct iommu_domain *domain, - struct device *dev, unsigned long iova, int flags) -{ - int ret = -ENOSYS; - - /* - * if upper layers showed interest and installed a fault handler, - * invoke it. - */ - if (domain->handler) - ret = domain->handler(domain, dev, iova, flags, - domain->handler_token); - trace_io_page_fault(dev, iova, flags); - return ret; -} +extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags); static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, diff --git a/include/linux/iova.h b/include/linux/iova.h index f27bb2c62fca5cf19a74f3f03c64b8cb9036bd19..e0a892ae45c0834a82d42e06a6bfab65b424c899 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -82,6 +82,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } +#if IS_ENABLED(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); @@ -106,5 +107,95 @@ void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); +#else +static inline int iova_cache_get(void) +{ + return -ENOTSUPP; +} + +static inline void iova_cache_put(void) +{ +} + +static inline struct iova *alloc_iova_mem(void) +{ + return NULL; +} + +static inline void free_iova_mem(struct iova *iova) +{ +} + +static inline void free_iova(struct iova_domain *iovad, unsigned long pfn) +{ +} + +static inline void __free_iova(struct iova_domain *iovad, struct iova *iova) +{ +} + +static inline struct iova *alloc_iova(struct iova_domain *iovad, + unsigned long size, + unsigned long limit_pfn, + bool size_aligned) +{ + return NULL; +} + +static inline void free_iova_fast(struct iova_domain *iovad, + unsigned long pfn, + unsigned long size) +{ +} + +static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, + unsigned long size, + unsigned long limit_pfn) +{ + return 0; +} + +static inline struct iova *reserve_iova(struct iova_domain *iovad, + unsigned long pfn_lo, + unsigned long pfn_hi) +{ + return NULL; +} + +static inline void copy_reserved_iova(struct iova_domain *from, + struct iova_domain *to) +{ +} + +static inline void init_iova_domain(struct iova_domain *iovad, + unsigned long granule, + unsigned long start_pfn, + unsigned long pfn_32bit) +{ +} + +static inline struct iova *find_iova(struct iova_domain *iovad, + unsigned long pfn) +{ + return NULL; +} + +static inline void put_iova_domain(struct iova_domain *iovad) +{ +} + +static inline struct iova *split_and_remove_iova(struct iova_domain *iovad, + struct iova *iova, + unsigned long pfn_lo, + unsigned long pfn_hi) +{ + return NULL; +} + +static inline void free_cpu_cached_iovas(unsigned int cpu, + struct iova_domain *iovad) +{ +} +#endif #endif diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 9d84942ae2e577835a338b8a352f9c93eabe148b..71fd92d81b266edf1781c542de550159bc33949a 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -8,8 +8,7 @@ #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ /* used by in-kernel data structures */ -struct kern_ipc_perm -{ +struct kern_ipc_perm { spinlock_t lock; bool deleted; int id; @@ -18,9 +17,9 @@ struct kern_ipc_perm kgid_t gid; kuid_t cuid; kgid_t cgid; - umode_t mode; + umode_t mode; unsigned long seq; void *security; -}; +} ____cacheline_aligned_in_smp; #endif /* _LINUX_IPC_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 71be5b330d21305af23f8f7e1779988930755ed8..e1b442996f810529a755533270b6d21c350fbd5a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -37,6 +37,7 @@ struct ipv6_devconf { __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO + __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif @@ -70,6 +71,7 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; + __s32 disable_policy; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 97cbca19430d82aa2b4c835db1edf2d6620517ba..1fa293a37f4a7e18bff0b2989764454dd0732df7 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -132,6 +132,9 @@ #define GIC_BASER_SHAREABILITY(reg, type) \ (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + #define GICR_PROPBASER_SHAREABILITY_SHIFT (10) #define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) #define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) @@ -156,6 +159,8 @@ #define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) #define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) #define GICR_PENDBASER_SHAREABILITY_SHIFT (10) #define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) @@ -232,12 +237,18 @@ #define GITS_CTLR_QUIESCENT (1U << 31) #define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) #define GITS_TYPER_HWCOLLCNT_SHIFT 24 +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 + #define GITS_CBASER_VALID (1ULL << 63) #define GITS_CBASER_SHAREABILITY_SHIFT (10) #define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) @@ -290,6 +301,7 @@ #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) @@ -337,9 +349,11 @@ #define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 #define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 #define E_ITS_MAPC_PROCNUM_OOR 0x010902 #define E_ITS_MAPC_COLLECTION_OOR 0x010903 #define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 #define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 #define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 #define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 @@ -403,6 +417,10 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) #define ICH_VMCR_CBPR_SHIFT 4 #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) #define ICH_VMCR_EOIM_SHIFT 9 diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index dc30f3d057eb0801e9ae8d22fc3ce11943d188d1..d3453ee072fc8aa859544e07598398884239d56f 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -25,7 +25,18 @@ #define GICC_ENABLE 0x1 #define GICC_INT_PRI_THRESHOLD 0xf0 -#define GIC_CPU_CTRL_EOImodeNS (1 << 9) +#define GIC_CPU_CTRL_EnableGrp0_SHIFT 0 +#define GIC_CPU_CTRL_EnableGrp0 (1 << GIC_CPU_CTRL_EnableGrp0_SHIFT) +#define GIC_CPU_CTRL_EnableGrp1_SHIFT 1 +#define GIC_CPU_CTRL_EnableGrp1 (1 << GIC_CPU_CTRL_EnableGrp1_SHIFT) +#define GIC_CPU_CTRL_AckCtl_SHIFT 2 +#define GIC_CPU_CTRL_AckCtl (1 << GIC_CPU_CTRL_AckCtl_SHIFT) +#define GIC_CPU_CTRL_FIQEn_SHIFT 3 +#define GIC_CPU_CTRL_FIQEn (1 << GIC_CPU_CTRL_FIQEn_SHIFT) +#define GIC_CPU_CTRL_CBPR_SHIFT 4 +#define GIC_CPU_CTRL_CBPR (1 << GIC_CPU_CTRL_CBPR_SHIFT) +#define GIC_CPU_CTRL_EOImodeNS_SHIFT 9 +#define GIC_CPU_CTRL_EOImodeNS (1 << GIC_CPU_CTRL_EOImodeNS_SHIFT) #define GICC_IAR_INT_ID_MASK 0x3ff #define GICC_INT_SPURIOUS 1023 @@ -84,8 +95,19 @@ #define GICH_LR_EOI (1 << 19) #define GICH_LR_HW (1 << 31) -#define GICH_VMCR_CTRL_SHIFT 0 -#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) +#define GICH_VMCR_ENABLE_GRP0_SHIFT 0 +#define GICH_VMCR_ENABLE_GRP0_MASK (1 << GICH_VMCR_ENABLE_GRP0_SHIFT) +#define GICH_VMCR_ENABLE_GRP1_SHIFT 1 +#define GICH_VMCR_ENABLE_GRP1_MASK (1 << GICH_VMCR_ENABLE_GRP1_SHIFT) +#define GICH_VMCR_ACK_CTL_SHIFT 2 +#define GICH_VMCR_ACK_CTL_MASK (1 << GICH_VMCR_ACK_CTL_SHIFT) +#define GICH_VMCR_FIQ_EN_SHIFT 3 +#define GICH_VMCR_FIQ_EN_MASK (1 << GICH_VMCR_FIQ_EN_SHIFT) +#define GICH_VMCR_CBPR_SHIFT 4 +#define GICH_VMCR_CBPR_MASK (1 << GICH_VMCR_CBPR_SHIFT) +#define GICH_VMCR_EOI_MODE_SHIFT 9 +#define GICH_VMCR_EOI_MODE_MASK (1 << GICH_VMCR_EOI_MODE_SHIFT) + #define GICH_VMCR_PRIMASK_SHIFT 27 #define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) #define GICH_VMCR_BINPOINT_SHIFT 21 diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 7b49c71c968ba0935a5482afb71055107563542b..2b0e56619e5377064e9dfe7e701c09b36c200bf2 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -258,7 +258,6 @@ extern unsigned int gic_present; extern void gic_init(unsigned long gic_base_addr, unsigned long gic_addrspace_size, unsigned int cpu_vec, unsigned int irqbase); -extern void gic_clocksource_init(unsigned int); extern u64 gic_read_count(void); extern unsigned int gic_get_count_width(void); extern u64 gic_read_compare(void); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index dfaa1f4dcb0c54911c16a8683a38dabbfb4dcb0f..606b6bce3a5bb062d12b35d84bf8a022cbe0abe8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -491,6 +491,8 @@ struct jbd2_journal_handle unsigned long h_start_jiffies; unsigned int h_requested_credits; + + unsigned int saved_alloc_context; }; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 624215cebee5235a3375549f62e026b3c7e41f74..734377ad42e9f0e4719edc4750b57245fa2d3f17 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -1,6 +1,7 @@ #ifndef _LINUX_JIFFIES_H #define _LINUX_JIFFIES_H +#include #include #include #include @@ -63,19 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) -/* some arch's have a small-data section that can be accessed register-relative - * but that can only take up to, say, 4-byte variables. jiffies being part of - * an 8-byte variable may not be correctly accessed unless we force the issue - */ -#define __jiffy_data __attribute__((section(".data"))) +#ifndef __jiffy_arch_data +#define __jiffy_arch_data +#endif /* * The 64-bit value is not atomic - you MUST NOT read it * without sampling the sequence number in jiffies_lock. * get_jiffies_64() will do this for you as appropriate. */ -extern u64 __jiffy_data jiffies_64; -extern unsigned long volatile __jiffy_data jiffies; +extern u64 __cacheline_aligned_in_smp jiffies_64; +extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h index 22a72198c14b7e338f4f10ed334f7c73be8cfcae..4e80f3a9ad58abc815926b701a8fbc83c153b5a9 100644 --- a/include/linux/kbuild.h +++ b/include/linux/kbuild.h @@ -2,14 +2,14 @@ #define __LINUX_KBUILD_H #define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + asm volatile("\n.ascii \"->" #sym " %0 " #val "\"" : : "i" (val)) -#define BLANK() asm volatile("\n->" : : ) +#define BLANK() asm volatile("\n.ascii \"->\"" : : ) #define OFFSET(sym, str, mem) \ DEFINE(sym, offsetof(struct str, mem)) #define COMMENT(x) \ - asm volatile("\n->#" x) + asm volatile("\n.ascii \"->#" x "\"") #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4c26dc3a8295a9fb2910dcb18946ee86f54ceac8..13bc08aba70470dfa4ab5a5112e11035ac3fbc99 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -47,6 +47,7 @@ /* @a is a power of 2 value */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) @@ -438,6 +439,7 @@ extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); extern bool parse_option_str(const char *str, const char *option); +extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d419d0e51fe55172e5cc5b42bb4c8c2fd9694141..c9481ebcbc0cab09429bde237f21e41cc7dcb061 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -14,17 +14,15 @@ #if !defined(__ASSEMBLY__) +#include #include #include #ifdef CONFIG_KEXEC_CORE #include -#include #include #include -#include -#include #include #include @@ -62,19 +60,15 @@ #define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE #endif -#define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define KEXEC_CORE_NOTE_NAME "CORE" -#define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) -#define KEXEC_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) +#define KEXEC_CORE_NOTE_NAME CRASH_CORE_NOTE_NAME + /* * The per-cpu notes area is a list of notes terminated by a "NULL" * note header. For kdump, the code in vmcore.c runs in the context * of the second kernel to combine them into one note. */ #ifndef KEXEC_NOTE_BYTES -#define KEXEC_NOTE_BYTES ( (KEXEC_NOTE_HEAD_BYTES * 2) + \ - KEXEC_CORE_NOTE_NAME_BYTES + \ - KEXEC_CORE_NOTE_DESC_BYTES ) +#define KEXEC_NOTE_BYTES CRASH_CORE_NOTE_BYTES #endif /* @@ -256,33 +250,6 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); -void crash_save_vmcoreinfo(void); -void arch_crash_save_vmcoreinfo(void); -__printf(1, 2) -void vmcoreinfo_append_str(const char *fmt, ...); -phys_addr_t paddr_vmcoreinfo_note(void); - -#define VMCOREINFO_OSRELEASE(value) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(name)) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) -#define VMCOREINFO_NUMBER(name) \ - vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) -#define VMCOREINFO_CONFIG(name) \ - vmcoreinfo_append_str("CONFIG_%s=y\n", #name) extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; @@ -303,31 +270,15 @@ extern int kexec_load_disabled; #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) -#define VMCOREINFO_BYTES (4096) -#define VMCOREINFO_NOTE_NAME "VMCOREINFO" -#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) -#define VMCOREINFO_NOTE_SIZE (KEXEC_NOTE_HEAD_BYTES*2 + VMCOREINFO_BYTES \ - + VMCOREINFO_NOTE_NAME_BYTES) - /* Location of a reserved region to hold the crash kernel. */ extern struct resource crashk_res; extern struct resource crashk_low_res; -typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; extern note_buf_t __percpu *crash_notes; -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; -extern size_t vmcoreinfo_size; -extern size_t vmcoreinfo_max_size; /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); diff --git a/include/linux/key-type.h b/include/linux/key-type.h index eaee981c55584bc0aa8ee72365449e376713a536..8496cf64575c679b49a0eeedf0c0d3c4d1e2dd1c 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -147,6 +147,14 @@ struct key_type { */ request_key_actor_t request_key; + /* Look up a keyring access restriction (optional) + * + * - NULL is a valid return value (meaning the requested restriction + * is known but will never block addition of a key) + * - should return -EINVAL if the restriction is unknown + */ + struct key_restriction *(*lookup_restriction)(const char *params); + /* internal fields */ struct list_head link; /* link in types list */ struct lock_class_key lock_class; /* key->sem lock class */ diff --git a/include/linux/key.h b/include/linux/key.h index e45212f2777e36f531efe35ad7d56fcf67105c0b..78e25aabedaf6696772347bb25b20c4cb61254db 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include @@ -126,6 +127,17 @@ static inline bool is_key_possessed(const key_ref_t key_ref) return (unsigned long) key_ref & 1UL; } +typedef int (*key_restrict_link_func_t)(struct key *dest_keyring, + const struct key_type *type, + const union key_payload *payload, + struct key *restriction_key); + +struct key_restriction { + key_restrict_link_func_t check; + struct key *key; + struct key_type *keytype; +}; + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -135,7 +147,7 @@ static inline bool is_key_possessed(const key_ref_t key_ref) * - Kerberos TGTs and tickets */ struct key { - atomic_t usage; /* number of references */ + refcount_t usage; /* number of references */ key_serial_t serial; /* key serial number */ union { struct list_head graveyard_link; @@ -161,7 +173,6 @@ struct key { #ifdef KEY_DEBUGGING unsigned magic; #define KEY_DEBUG_MAGIC 0x18273645u -#define KEY_DEBUG_MAGIC_X 0xf8e9dacbu #endif unsigned long flags; /* status flags (change with bitops) */ @@ -205,18 +216,17 @@ struct key { }; /* This is set on a keyring to restrict the addition of a link to a key - * to it. If this method isn't provided then it is assumed that the + * to it. If this structure isn't provided then it is assumed that the * keyring is open to any addition. It is ignored for non-keyring - * keys. + * keys. Only set this value using keyring_restrict(), keyring_alloc(), + * or key_alloc(). * * This is intended for use with rings of trusted keys whereby addition * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION * overrides this, allowing the kernel to add extra keys without * restriction. */ - int (*restrict_link)(struct key *keyring, - const struct key_type *type, - const union key_payload *payload); + struct key_restriction *restrict_link; }; extern struct key *key_alloc(struct key_type *type, @@ -225,9 +235,7 @@ extern struct key *key_alloc(struct key_type *type, const struct cred *cred, key_perm_t perm, unsigned long flags, - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *)); + struct key_restriction *restrict_link); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ @@ -242,7 +250,7 @@ extern void key_put(struct key *key); static inline struct key *__key_get(struct key *key) { - atomic_inc(&key->usage); + refcount_inc(&key->usage); return key; } @@ -303,14 +311,13 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid const struct cred *cred, key_perm_t perm, unsigned long flags, - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *), + struct key_restriction *restrict_link, struct key *dest); extern int restrict_link_reject(struct key *keyring, const struct key_type *type, - const union key_payload *payload); + const union key_payload *payload, + struct key *restriction_key); extern int keyring_clear(struct key *keyring); @@ -321,6 +328,9 @@ extern key_ref_t keyring_search(key_ref_t keyring, extern int keyring_add_key(struct key *keyring, struct key *key); +extern int keyring_restrict(key_ref_t keyring, const char *type, + const char *restriction); + extern struct key *key_lookup(key_serial_t id); static inline key_serial_t key_serial(const struct key *key) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e6284591599ec5361b9abcffb2f731512b76eec9..ca85cb80e99a60e33df79e2ed0d8815953d1d21d 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -108,6 +108,8 @@ extern int __must_check kobject_rename(struct kobject *, const char *new_name); extern int __must_check kobject_move(struct kobject *, struct kobject *); extern struct kobject *kobject_get(struct kobject *kobj); +extern struct kobject * __must_check kobject_get_unless_zero( + struct kobject *kobj); extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c328e4f7dcadb4276bddcfbcc97caa323c48a0a3..541df0b5b81530741586d6eaf382e9d1a425ac06 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -267,6 +267,8 @@ extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); +extern bool arch_function_offset_within_entry(unsigned long offset); +extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); @@ -347,6 +349,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +extern void wait_for_kprobe_optimizer(void); +#else +static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, @@ -379,6 +384,7 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); diff --git a/include/linux/kref.h b/include/linux/kref.h index f4156f88f5575dceb23a78807a12e5fde037c701..29220724bf1cb43df4cc6840f4016fe7ba30d54c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -66,8 +66,6 @@ static inline void kref_get(struct kref *kref) */ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { - WARN_ON(release == NULL); - if (refcount_dec_and_test(&kref->refcount)) { release(kref); return 1; @@ -79,8 +77,6 @@ static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) { - WARN_ON(release == NULL); - if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { release(kref); return 1; @@ -92,8 +88,6 @@ static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) { - WARN_ON(release == NULL); - if (refcount_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index e1cfda4bee588d726e2cfe9089ccc20baa031864..78b44a024eaae8e9a9a0e2c448b4b3e2b7f48719 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -61,7 +61,7 @@ static inline void set_page_stable_node(struct page *page, struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); -int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); void ksm_migrate_page(struct page *newpage, struct page *oldpage); #else /* !CONFIG_KSM */ @@ -94,10 +94,9 @@ static inline int page_referenced_ksm(struct page *page, return 0; } -static inline int rmap_walk_ksm(struct page *page, +static inline void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) { - return 0; } static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d0250744507a284138d0e7e702bb64e1d205e769..8c0664309815ff95b0308b379e863125866f31f6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -115,14 +115,17 @@ static inline bool is_error_page(struct page *page) return IS_ERR(page); } +#define KVM_REQUEST_MASK GENMASK(7,0) +#define KVM_REQUEST_NO_WAKEUP BIT(8) +#define KVM_REQUEST_WAIT BIT(9) /* * Architecture-independent vcpu->requests bit members * Bits 4-7 are reserved for more arch-independent bits. */ -#define KVM_REQ_TLB_FLUSH 0 -#define KVM_REQ_MMU_RELOAD 1 -#define KVM_REQ_PENDING_TIMER 2 -#define KVM_REQ_UNHALT 3 +#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_PENDING_TIMER 2 +#define KVM_REQ_UNHALT 3 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -268,6 +271,12 @@ struct kvm_vcpu { static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { + /* + * The memory barrier ensures a previous write to vcpu->requests cannot + * be reordered with the read of vcpu->mode. It pairs with the general + * memory barrier following the write of vcpu->mode in VCPU RUN. + */ + smp_mb__before_atomic(); return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); } @@ -375,8 +384,6 @@ struct kvm { struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; - struct srcu_struct srcu; - struct srcu_struct irq_srcu; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; /* @@ -403,7 +410,7 @@ struct kvm { struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; struct list_head coalesced_zones; @@ -429,6 +436,8 @@ struct kvm { struct list_head devices; struct dentry *debugfs_dentry; struct kvm_stat_data **debugfs_stat_data; + struct srcu_struct srcu; + struct srcu_struct irq_srcu; }; #define kvm_err(fmt, ...) \ @@ -490,6 +499,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } +static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *tmp; + int idx; + + kvm_for_each_vcpu(idx, tmp, vcpu->kvm) + if (tmp == vcpu) + return idx; + BUG(); +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ @@ -502,10 +522,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else -static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) @@ -641,18 +661,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); -int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); -int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len); -int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len); +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); @@ -682,7 +702,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); -void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); +bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); @@ -767,8 +787,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void *kvm_kvzalloc(unsigned long size); - #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { @@ -877,22 +895,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT -int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); -void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); -#else -static inline int kvm_iommu_map_pages(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - return 0; -} - -static inline void kvm_iommu_unmap_pages(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} -#endif - /* * search_memslots() and __gfn_to_memslot() are here because they are * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. @@ -1025,6 +1027,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #define KVM_MAX_IRQ_ROUTES 1024 #endif +bool kvm_arch_can_set_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, @@ -1092,13 +1095,23 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) * caller. Paired with the smp_mb__after_atomic in kvm_check_request. */ smp_wmb(); - set_bit(req, &vcpu->requests); + set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); +} + +static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) +{ + return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); +} + +static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) +{ + clear_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { - if (test_bit(req, &vcpu->requests)) { - clear_bit(req, &vcpu->requests); + if (kvm_test_request(req, vcpu)) { + kvm_clear_request(req, vcpu); /* * Ensure the rest of the request is visible to kvm_check_request's @@ -1165,7 +1178,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; -extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h index d215b45611809ddc0c0d1e6d51b622b0db91cdbb..5e240b2b4d581b29c792de8ce8d6bff9ad045c91 100644 --- a/include/linux/leds-pca9532.h +++ b/include/linux/leds-pca9532.h @@ -22,7 +22,8 @@ enum pca9532_state { PCA9532_OFF = 0x0, PCA9532_ON = 0x1, PCA9532_PWM0 = 0x2, - PCA9532_PWM1 = 0x3 + PCA9532_PWM1 = 0x3, + PCA9532_KEEP = 0xff, }; struct pca9532_led { @@ -44,4 +45,3 @@ struct pca9532_platform_data { }; #endif /* __LINUX_PCA9532_H */ - diff --git a/include/linux/leds.h b/include/linux/leds.h index 38c0bd7ca1074af234d516275791d05f945ce1f0..64c56d454f7df9f864a5242ce4212df586f66886 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -122,10 +122,16 @@ struct led_classdev { struct mutex led_access; }; -extern int led_classdev_register(struct device *parent, - struct led_classdev *led_cdev); -extern int devm_led_classdev_register(struct device *parent, - struct led_classdev *led_cdev); +extern int of_led_classdev_register(struct device *parent, + struct device_node *np, + struct led_classdev *led_cdev); +#define led_classdev_register(parent, led_cdev) \ + of_led_classdev_register(parent, NULL, led_cdev) +extern int devm_of_led_classdev_register(struct device *parent, + struct device_node *np, + struct led_classdev *led_cdev); +#define devm_led_classdev_register(parent, led_cdev) \ + devm_of_led_classdev_register(parent, NULL, led_cdev) extern void led_classdev_unregister(struct led_classdev *led_cdev); extern void devm_led_classdev_unregister(struct device *parent, struct led_classdev *led_cdev); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 77e7af32543f698d0413286dc8702d7124bcf81b..6c807017128d4645d423b6ded0737c452fa3a3e6 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -20,9 +20,11 @@ enum { /* when a dimm supports both PMEM and BLK access a label is required */ - NDD_ALIASING = 1 << 0, + NDD_ALIASING = 0, /* unarmed memory devices may not persist writes */ - NDD_UNARMED = 1 << 1, + NDD_UNARMED = 1, + /* locked memory devices should not be accessed */ + NDD_LOCKED = 2, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, @@ -120,7 +122,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); -void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, +void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start, unsigned int len); struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ca45e4a088a91eb929a208de5ffb061b33c9ff2c..7dfa56ebbc6d0f1bd4e215c93d5e642b3231334b 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -56,7 +56,6 @@ typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); -typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, @@ -70,7 +69,6 @@ struct nvm_dev_ops { nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; - nvm_erase_blk_fn *erase_block; nvm_create_dma_pool_fn *create_dma_pool; nvm_destroy_dma_pool_fn *destroy_dma_pool; @@ -125,7 +123,7 @@ enum { /* NAND Access Modes */ NVM_IO_SUSPEND = 0x80, NVM_IO_SLC_MODE = 0x100, - NVM_IO_SCRAMBLE_DISABLE = 0x200, + NVM_IO_SCRAMBLE_ENABLE = 0x200, /* Block Types */ NVM_BLK_T_FREE = 0x0, @@ -438,7 +436,8 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); -typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); +typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, + int flags); typedef void (nvm_tgt_exit_fn)(void *); typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); @@ -479,10 +478,10 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); -extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, +extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); +extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *, const struct ppa_addr *, int, int); -extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); -extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); +extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 9072f04db616ee2467333411fdd287a0fe6b3d84..194991ef93476743d0a2c57d13de101ba1b9ac12 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -23,15 +23,16 @@ #include #include +#include #if IS_ENABLED(CONFIG_LIVEPATCH) #include -enum klp_state { - KLP_DISABLED, - KLP_ENABLED -}; +/* task patch states */ +#define KLP_UNDEFINED -1 +#define KLP_UNPATCHED 0 +#define KLP_PATCHED 1 /** * struct klp_func - function structure for live patching @@ -39,10 +40,29 @@ enum klp_state { * @new_func: pointer to the patched function code * @old_sympos: a hint indicating which symbol position the old function * can be found (optional) + * @immediate: patch the func immediately, bypassing safety mechanisms * @old_addr: the address of the function being patched * @kobj: kobject for sysfs resources - * @state: tracks function-level patch application state * @stack_node: list node for klp_ops func_stack list + * @old_size: size of the old function + * @new_size: size of the new function + * @patched: the func has been added to the klp_ops list + * @transition: the func is currently being applied or reverted + * + * The patched and transition variables define the func's patching state. When + * patching, a func is always in one of the following states: + * + * patched=0 transition=0: unpatched + * patched=0 transition=1: unpatched, temporary starting state + * patched=1 transition=1: patched, may be visible to some tasks + * patched=1 transition=0: patched, visible to all tasks + * + * And when unpatching, it goes in the reverse order: + * + * patched=1 transition=0: patched, visible to all tasks + * patched=1 transition=1: patched, may be visible to some tasks + * patched=0 transition=1: unpatched, temporary ending state + * patched=0 transition=0: unpatched */ struct klp_func { /* external */ @@ -56,12 +76,15 @@ struct klp_func { * in kallsyms for the given object is used. */ unsigned long old_sympos; + bool immediate; /* internal */ unsigned long old_addr; struct kobject kobj; - enum klp_state state; struct list_head stack_node; + unsigned long old_size, new_size; + bool patched; + bool transition; }; /** @@ -70,8 +93,8 @@ struct klp_func { * @funcs: function entries for functions to be patched in the object * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object - * (NULL for vmlinux) - * @state: tracks object-level patch application state + * (NULL for vmlinux) + * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { /* external */ @@ -81,26 +104,30 @@ struct klp_object { /* internal */ struct kobject kobj; struct module *mod; - enum klp_state state; + bool patched; }; /** * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched + * @immediate: patch all funcs immediately, bypassing safety mechanisms * @list: list node for global list of registered patches * @kobj: kobject for sysfs resources - * @state: tracks patch-level application state + * @enabled: the patch is enabled (but operation may be incomplete) + * @finish: for waiting till it is safe to remove the patch module */ struct klp_patch { /* external */ struct module *mod; struct klp_object *objs; + bool immediate; /* internal */ struct list_head list; struct kobject kobj; - enum klp_state state; + bool enabled; + struct completion finish; }; #define klp_for_each_object(patch, obj) \ @@ -123,10 +150,27 @@ void arch_klp_init_object_loaded(struct klp_patch *patch, int klp_module_coming(struct module *mod); void klp_module_going(struct module *mod); +void klp_copy_process(struct task_struct *child); +void klp_update_patch_state(struct task_struct *task); + +static inline bool klp_patch_pending(struct task_struct *task) +{ + return test_tsk_thread_flag(task, TIF_PATCH_PENDING); +} + +static inline bool klp_have_reliable_stack(void) +{ + return IS_ENABLED(CONFIG_STACKTRACE) && + IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE); +} + #else /* !CONFIG_LIVEPATCH */ static inline int klp_module_coming(struct module *mod) { return 0; } -static inline void klp_module_going(struct module *mod) { } +static inline void klp_module_going(struct module *mod) {} +static inline bool klp_patch_pending(struct task_struct *task) { return false; } +static inline void klp_update_patch_state(struct task_struct *task) {} +static inline void klp_copy_process(struct task_struct *child) {} #endif /* CONFIG_LIVEPATCH */ diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 140edab644462051f23d813514fe9a60404aa0e7..05728396a1a18f701f5bba8075be5301e6a89e4e 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -18,6 +18,7 @@ /* Dummy declarations */ struct svc_rqst; +struct rpc_task; /* * This is the set of functions for lockd->nfsd communication @@ -43,6 +44,7 @@ struct nlmclnt_initdata { u32 nfs_version; int noresvport; struct net *net; + const struct nlmclnt_operations *nlmclnt_ops; }; /* @@ -52,8 +54,26 @@ struct nlmclnt_initdata { extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init); extern void nlmclnt_done(struct nlm_host *host); -extern int nlmclnt_proc(struct nlm_host *host, int cmd, - struct file_lock *fl); +/* + * NLM client operations provide a means to modify RPC processing of NLM + * requests. Callbacks receive a pointer to data passed into the call to + * nlmclnt_proc(). + */ +struct nlmclnt_operations { + /* Called on successful allocation of nlm_rqst, use for allocation or + * reference counting. */ + void (*nlmclnt_alloc_call)(void *); + + /* Called in rpc_task_prepare for unlock. A return value of true + * indicates the callback has put the task to sleep on a waitqueue + * and NLM should not call rpc_call_start(). */ + bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *); + + /* Called when the nlm_rqst is freed, callbacks should clean up here */ + void (*nlmclnt_release_call)(void *); +}; + +extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data); extern int lockd_up(struct net *net); extern void lockd_down(struct net *net); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b37dee3acaba4c152fc2c0c928f4f312c74cc778..41f7b6a04d6914841a155d4734d308f6b4cc8d00 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -69,6 +69,7 @@ struct nlm_host { char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ char nodename[UNX_MAXNODENAME + 1]; + const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */ }; /* @@ -142,6 +143,7 @@ struct nlm_rqst { struct nlm_block * a_block; unsigned int a_retries; /* Retry count */ u8 a_owner[NLMCLNT_OHSIZE]; + void * a_callback_data; /* sent to nlmclnt_operations callbacks */ }; /* diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1e327bb80838d86202276a63fa715686fb6eff7e..fffe49f188e6d22dfb53ad86e7c53da44a287974 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -361,6 +361,8 @@ static inline void lock_set_subclass(struct lockdep_map *lock, lock_set_class(lock, lock->name, lock->key, subclass, ip); } +extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); + extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); extern void lockdep_clear_current_reclaim_state(void); extern void lockdep_trace_alloc(gfp_t mask); @@ -411,6 +413,7 @@ static inline void lockdep_on(void) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) +# define lock_downgrade(l, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_set_current_reclaim_state(g) do { } while (0) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e29d4c62a3c8e268695d9963c2250f3eaf375399..080f34e66017a8299d10bc4276b1acfe8a1f94cc 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -533,8 +533,13 @@ * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. + * @task_alloc: + * @task task being allocated. + * @clone_flags contains the flags indicating what should be shared. + * Handle allocation of task-related resources. + * Returns a zero on success, negative values on failure. * @task_free: - * @task task being freed + * @task task about to be freed. * Handle release of task-related resources. (Note that this can be called * from interrupt context.) * @cred_alloc_blank: @@ -630,10 +635,19 @@ * Check permission before getting the ioprio value of @p. * @p contains the task_struct of process. * Return 0 if permission is granted. + * @task_prlimit: + * Check permission before getting and/or setting the resource limits of + * another task. + * @cred points to the cred structure for the current task. + * @tcred points to the cred structure for the target task. + * @flags contains the LSM_PRLIMIT_* flag bits indicating whether the + * resource limits are being read, modified, or both. + * Return 0 if permission is granted. * @task_setrlimit: - * Check permission before setting the resource limits of the current - * process for @resource to @new_rlim. The old resource limit values can - * be examined by dereferencing (current->signal->rlim + resource). + * Check permission before setting the resource limits of process @p + * for @resource to @new_rlim. The old resource limit values can + * be examined by dereferencing (p->signal->rlim + resource). + * @p points to the task_struct for the target task's group leader. * @resource contains the resource whose limit is being set. * @new_rlim contains the new limits for @resource. * Return 0 if permission is granted. @@ -1473,6 +1487,7 @@ union security_list_options { int (*file_open)(struct file *file, const struct cred *cred); int (*task_create)(unsigned long clone_flags); + int (*task_alloc)(struct task_struct *task, unsigned long clone_flags); void (*task_free)(struct task_struct *task); int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); void (*cred_free)(struct cred *cred); @@ -1494,6 +1509,8 @@ union security_list_options { int (*task_setnice)(struct task_struct *p, int nice); int (*task_setioprio)(struct task_struct *p, int ioprio); int (*task_getioprio)(struct task_struct *p); + int (*task_prlimit)(const struct cred *cred, const struct cred *tcred, + unsigned int flags); int (*task_setrlimit)(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); int (*task_setscheduler)(struct task_struct *p); @@ -1737,6 +1754,7 @@ struct security_hook_heads { struct list_head file_receive; struct list_head file_open; struct list_head task_create; + struct list_head task_alloc; struct list_head task_free; struct list_head cred_alloc_blank; struct list_head cred_free; @@ -1755,6 +1773,7 @@ struct security_hook_heads { struct list_head task_setnice; struct list_head task_setioprio; struct list_head task_getioprio; + struct list_head task_prlimit; struct list_head task_setrlimit; struct list_head task_setscheduler; struct list_head task_getscheduler; @@ -1908,6 +1927,13 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, } #endif /* CONFIG_SECURITY_SELINUX_DISABLE */ +/* Currently required to handle SELinux runtime hook disable. */ +#ifdef CONFIG_SECURITY_WRITABLE_HOOKS +#define __lsm_ro_after_init +#else +#define __lsm_ro_after_init __ro_after_init +#endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ + extern int __init security_module_enable(const char *module); extern void __init capability_add_hooks(void); #ifdef CONFIG_SECURITY_YAMA diff --git a/include/linux/mailbox/brcm-message.h b/include/linux/mailbox/brcm-message.h index 6b55c938b4015f23ae2f3deee224817902a958c8..c20b4843fc2d60ab9a0249ff7d08799a6bcab1d8 100644 --- a/include/linux/mailbox/brcm-message.h +++ b/include/linux/mailbox/brcm-message.h @@ -16,6 +16,7 @@ enum brcm_message_type { BRCM_MESSAGE_UNKNOWN = 0, + BRCM_MESSAGE_BATCH, BRCM_MESSAGE_SPU, BRCM_MESSAGE_SBA, BRCM_MESSAGE_MAX, @@ -23,23 +24,28 @@ enum brcm_message_type { struct brcm_sba_command { u64 cmd; + u64 *cmd_dma; + dma_addr_t cmd_dma_addr; #define BRCM_SBA_CMD_TYPE_A BIT(0) #define BRCM_SBA_CMD_TYPE_B BIT(1) #define BRCM_SBA_CMD_TYPE_C BIT(2) #define BRCM_SBA_CMD_HAS_RESP BIT(3) #define BRCM_SBA_CMD_HAS_OUTPUT BIT(4) u64 flags; - dma_addr_t input; - size_t input_len; dma_addr_t resp; size_t resp_len; - dma_addr_t output; - size_t output_len; + dma_addr_t data; + size_t data_len; }; struct brcm_message { enum brcm_message_type type; union { + struct { + struct brcm_message *msgs; + unsigned int msgs_queued; + unsigned int msgs_count; + } batch; struct { struct scatterlist *src; struct scatterlist *dst; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bdfc65af4152047c7b34f9b459f095a6d675cd11..8098695e5d8d9dfba3815fd359823f92833a5d61 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -93,6 +93,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); +int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void); /* Low level functions */ @@ -335,6 +336,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); +void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size); void memblock_mem_limit_remove_map(phys_addr_t limit); bool memblock_is_memory(phys_addr_t addr); int memblock_is_map_memory(phys_addr_t addr); @@ -423,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) } #endif +extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr); #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { return 0; } +static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr) +{ + return 0; +} + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bb7250c45cb8356b03df2d4b9f1325dc6e30069b..899949bbb2f9362182e7fd28dc34833cca7de78e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,48 +35,43 @@ struct page; struct mm_struct; struct kmem_cache; -/* - * The corresponding mem_cgroup_stat_names is defined in mm/memcontrol.c, - * These two lists should keep in accord with each other. - */ -enum mem_cgroup_stat_index { - /* - * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. - */ - MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ - MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ - MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ - MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ - MEM_CGROUP_STAT_DIRTY, /* # of dirty pages in page cache */ - MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ - MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ - MEM_CGROUP_STAT_NSTATS, - /* default hierarchy stats */ - MEMCG_KERNEL_STACK_KB = MEM_CGROUP_STAT_NSTATS, +/* Cgroup-specific page state, on top of universal node page state */ +enum memcg_stat_item { + MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS, + MEMCG_RSS, + MEMCG_RSS_HUGE, + MEMCG_SWAP, + MEMCG_SOCK, + /* XXX: why are these zone and not node counters? */ + MEMCG_KERNEL_STACK_KB, MEMCG_SLAB_RECLAIMABLE, MEMCG_SLAB_UNRECLAIMABLE, - MEMCG_SOCK, MEMCG_NR_STAT, }; +/* Cgroup-specific events, on top of universal VM events */ +enum memcg_event_item { + MEMCG_LOW = NR_VM_EVENT_ITEMS, + MEMCG_HIGH, + MEMCG_MAX, + MEMCG_OOM, + MEMCG_NR_EVENTS, +}; + struct mem_cgroup_reclaim_cookie { pg_data_t *pgdat; int priority; unsigned int generation; }; -enum mem_cgroup_events_index { - MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ - MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ - MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ - MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ - MEM_CGROUP_EVENTS_NSTATS, - /* default hierarchy events */ - MEMCG_LOW = MEM_CGROUP_EVENTS_NSTATS, - MEMCG_HIGH, - MEMCG_MAX, - MEMCG_OOM, - MEMCG_NR_EVENTS, +#ifdef CONFIG_MEMCG + +#define MEM_CGROUP_ID_SHIFT 16 +#define MEM_CGROUP_ID_MAX USHRT_MAX + +struct mem_cgroup_id { + int id; + atomic_t ref; }; /* @@ -92,16 +87,6 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -#ifdef CONFIG_MEMCG - -#define MEM_CGROUP_ID_SHIFT 16 -#define MEM_CGROUP_ID_MAX USHRT_MAX - -struct mem_cgroup_id { - int id; - atomic_t ref; -}; - struct mem_cgroup_stat_cpu { long count[MEMCG_NR_STAT]; unsigned long events[MEMCG_NR_EVENTS]; @@ -283,17 +268,10 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -/** - * mem_cgroup_events - count memory events against a cgroup - * @memcg: the memory cgroup - * @idx: the event index - * @nr: the number of events to account for - */ -static inline void mem_cgroup_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx, - unsigned int nr) +static inline void mem_cgroup_event(struct mem_cgroup *memcg, + enum memcg_event_item event) { - this_cpu_add(memcg->stat->events[idx], nr); + this_cpu_inc(memcg->stat->events[event]); cgroup_file_notify(&memcg->events_file); } @@ -494,8 +472,42 @@ extern int do_swap_account; void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + long val = 0; + int cpu; + + for_each_possible_cpu(cpu) + val += per_cpu(memcg->stat->count[idx], cpu); + + if (val < 0) + val = 0; + + return val; +} + +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val) +{ + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->count[idx], val); +} + +static inline void inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + mod_memcg_state(memcg, idx, 1); +} + +static inline void dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + mod_memcg_state(memcg, idx, -1); +} + /** - * mem_cgroup_update_page_stat - update page state statistics + * mod_memcg_page_state - update page state statistics * @page: the page * @idx: page state item to account * @val: number of pages (positive or negative) @@ -506,28 +518,28 @@ void unlock_page_memcg(struct page *page); * * lock_page(page) or lock_page_memcg(page) * if (TestClearPageState(page)) - * mem_cgroup_update_page_stat(page, state, -1); + * mod_memcg_page_state(page, state, -1); * unlock_page(page) or unlock_page_memcg(page) + * + * Kernel pages are an exception to this, since they'll never move. */ -static inline void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, int val) +static inline void mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, int val) { - VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); - if (page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); + mod_memcg_state(page->mem_cgroup, idx, val); } -static inline void mem_cgroup_inc_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) +static inline void inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { - mem_cgroup_update_page_stat(page, idx, 1); + mod_memcg_page_state(page, idx, 1); } -static inline void mem_cgroup_dec_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) +static inline void dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { - mem_cgroup_update_page_stat(page, idx, -1); + mod_memcg_page_state(page, idx, -1); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -544,20 +556,8 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (unlikely(!memcg)) - goto out; - - switch (idx) { - case PGFAULT: - this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); - break; - case PGMAJFAULT: - this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); - break; - default: - BUG(); - } -out: + if (likely(memcg)) + this_cpu_inc(memcg->stat->events[idx]); rcu_read_unlock(); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -576,9 +576,8 @@ static inline bool mem_cgroup_disabled(void) return true; } -static inline void mem_cgroup_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx, - unsigned int nr) +static inline void mem_cgroup_event(struct mem_cgroup *memcg, + enum memcg_event_item event) { } @@ -740,19 +739,41 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } -static inline void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, - int nr) +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ + return 0; +} + +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, + int nr) +{ +} + +static inline void inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ +} + +static inline void dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) +{ +} + +static inline void mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, + int nr) { } -static inline void mem_cgroup_inc_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) +static inline void inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { } -static inline void mem_cgroup_dec_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) +static inline void dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { } @@ -872,7 +893,7 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) * @val: number of pages (positive or negative) */ static inline void memcg_kmem_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { if (memcg_kmem_enabled() && page->mem_cgroup) this_cpu_add(page->mem_cgroup->stat->count[idx], val); @@ -901,7 +922,7 @@ static inline void memcg_put_cache_ids(void) } static inline void memcg_kmem_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { } #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 64faeeff698c2275bab11c9680f4665126a50494..bfeecf179895b09edfb2ed5489e6293ca5a100d7 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -12,6 +12,8 @@ #define _ARIZONA_PDATA_H #include +#include +#include #define ARIZONA_GPN_DIR_MASK 0x8000 /* GPN_DIR */ #define ARIZONA_GPN_DIR_SHIFT 15 /* GPN_DIR */ @@ -76,13 +78,12 @@ struct arizona_micd_range { struct arizona_pdata { int reset; /** GPIO controlling /RESET, if any */ - int ldoena; /** GPIO controlling LODENA, if any */ /** Regulator configuration for MICVDD */ - struct regulator_init_data *micvdd; + struct arizona_micsupp_pdata micvdd; /** Regulator configuration for LDO1 */ - struct regulator_init_data *ldo1; + struct arizona_ldo1_pdata ldo1; /** If a direct 32kHz clock is provided on an MCLK specify it here */ int clk32k_src; diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 0d9a1ff383933e375dedf820f7ba5ef7d55494c7..cde56cfe8446b17b9696b2e1182c13cb29f9e727 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -20,6 +20,7 @@ enum axp20x_variants { AXP221_ID, AXP223_ID, AXP288_ID, + AXP803_ID, AXP806_ID, AXP809_ID, NR_AXP20X_VARIANTS, @@ -228,13 +229,13 @@ enum axp20x_variants { #define AXP20X_OCV_MAX 0xf /* AXP22X specific registers */ -#define AXP22X_PMIC_ADC_H 0x56 -#define AXP22X_PMIC_ADC_L 0x57 +#define AXP22X_PMIC_TEMP_H 0x56 +#define AXP22X_PMIC_TEMP_L 0x57 #define AXP22X_TS_ADC_H 0x58 #define AXP22X_TS_ADC_L 0x59 #define AXP22X_BATLOW_THRES1 0xe6 -/* AXP288 specific registers */ +/* AXP288/AXP803 specific registers */ #define AXP288_POWER_REASON 0x02 #define AXP288_BC_GLOBAL 0x2c #define AXP288_BC_VBUS_CNTL 0x2d @@ -475,6 +476,43 @@ enum axp288_irqs { AXP288_IRQ_BC_USB_CHNG, }; +enum axp803_irqs { + AXP803_IRQ_ACIN_OVER_V = 1, + AXP803_IRQ_ACIN_PLUGIN, + AXP803_IRQ_ACIN_REMOVAL, + AXP803_IRQ_VBUS_OVER_V, + AXP803_IRQ_VBUS_PLUGIN, + AXP803_IRQ_VBUS_REMOVAL, + AXP803_IRQ_BATT_PLUGIN, + AXP803_IRQ_BATT_REMOVAL, + AXP803_IRQ_BATT_ENT_ACT_MODE, + AXP803_IRQ_BATT_EXIT_ACT_MODE, + AXP803_IRQ_CHARG, + AXP803_IRQ_CHARG_DONE, + AXP803_IRQ_BATT_CHG_TEMP_HIGH, + AXP803_IRQ_BATT_CHG_TEMP_HIGH_END, + AXP803_IRQ_BATT_CHG_TEMP_LOW, + AXP803_IRQ_BATT_CHG_TEMP_LOW_END, + AXP803_IRQ_BATT_ACT_TEMP_HIGH, + AXP803_IRQ_BATT_ACT_TEMP_HIGH_END, + AXP803_IRQ_BATT_ACT_TEMP_LOW, + AXP803_IRQ_BATT_ACT_TEMP_LOW_END, + AXP803_IRQ_DIE_TEMP_HIGH, + AXP803_IRQ_GPADC, + AXP803_IRQ_LOW_PWR_LVL1, + AXP803_IRQ_LOW_PWR_LVL2, + AXP803_IRQ_TIMER, + AXP803_IRQ_PEK_RIS_EDGE, + AXP803_IRQ_PEK_FAL_EDGE, + AXP803_IRQ_PEK_SHORT, + AXP803_IRQ_PEK_LONG, + AXP803_IRQ_PEK_OVER_OFF, + AXP803_IRQ_GPIO1_INPUT, + AXP803_IRQ_GPIO0_INPUT, + AXP803_IRQ_BC_USB_CHNG, + AXP803_IRQ_MV_CHNG, +}; + enum axp806_irqs { AXP806_IRQ_DIE_TEMP_HIGH_LV1, AXP806_IRQ_DIE_TEMP_HIGH_LV2, diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 3eef9fb9968ae730716a79bc9e3aef8be4e2e650..28baee63eaf62f11dd31417db8aed7ba34a5a63b 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -305,4 +305,22 @@ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; extern struct attribute_group cros_ec_vbc_attr_group; +/* ACPI GPE handler */ +#ifdef CONFIG_ACPI + +int cros_ec_acpi_install_gpe_handler(struct device *dev); +void cros_ec_acpi_remove_gpe_handler(void); +void cros_ec_acpi_clear_gpe(void); + +#else /* CONFIG_ACPI */ + +static inline int cros_ec_acpi_install_gpe_handler(struct device *dev) +{ + return -ENODEV; +} +static inline void cros_ec_acpi_remove_gpe_handler(void) {} +static inline void cros_ec_acpi_clear_gpe(void) {} + +#endif /* CONFIG_ACPI */ + #endif /* __LINUX_MFD_CROS_EC_H */ diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index f1ef6388c2337e55310a3dd6b8244f792022580c..c93e7e0300efa6507cd96925c2130378a0573fca 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2041,6 +2041,9 @@ enum ec_mkbp_event { /* The state of the switches have changed. */ EC_MKBP_EVENT_SWITCH = 4, + /* EC sent a sysrq command */ + EC_MKBP_EVENT_SYSRQ = 6, + /* Number of MKBP events */ EC_MKBP_EVENT_COUNT, }; @@ -2053,6 +2056,7 @@ union ec_response_get_next_data { uint32_t buttons; uint32_t switches; + uint32_t sysrq; } __packed; struct ec_response_get_next_event { diff --git a/include/linux/mfd/da9062/core.h b/include/linux/mfd/da9062/core.h index 376ba84366a09628b93dc6dc173d3e606326ecef..74d33a01ddae1241eb3595f2fbf240ac623f846b 100644 --- a/include/linux/mfd/da9062/core.h +++ b/include/linux/mfd/da9062/core.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Dialog Semiconductor Ltd. + * Copyright (C) 2015-2017 Dialog Semiconductor * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,7 +18,31 @@ #include #include -/* Interrupts */ +enum da9062_compatible_types { + COMPAT_TYPE_DA9061 = 1, + COMPAT_TYPE_DA9062, +}; + +enum da9061_irqs { + /* IRQ A */ + DA9061_IRQ_ONKEY, + DA9061_IRQ_WDG_WARN, + DA9061_IRQ_SEQ_RDY, + /* IRQ B*/ + DA9061_IRQ_TEMP, + DA9061_IRQ_LDO_LIM, + DA9061_IRQ_DVC_RDY, + DA9061_IRQ_VDD_WARN, + /* IRQ C */ + DA9061_IRQ_GPI0, + DA9061_IRQ_GPI1, + DA9061_IRQ_GPI2, + DA9061_IRQ_GPI3, + DA9061_IRQ_GPI4, + + DA9061_NUM_IRQ, +}; + enum da9062_irqs { /* IRQ A */ DA9062_IRQ_ONKEY, @@ -45,6 +69,7 @@ struct da9062 { struct device *dev; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irq; + enum da9062_compatible_types chip_type; }; #endif /* __MFD_DA9062_CORE_H__ */ diff --git a/include/linux/mfd/da9062/registers.h b/include/linux/mfd/da9062/registers.h index 97790d1b02c5e23984095354a57e85fd42d9a53c..18d576aed90282dabe7ac6caf29ac843d8df09a2 100644 --- a/include/linux/mfd/da9062/registers.h +++ b/include/linux/mfd/da9062/registers.h @@ -1,6 +1,5 @@ /* - * registers.h - REGISTERS H for DA9062 - * Copyright (C) 2015 Dialog Semiconductor Ltd. + * Copyright (C) 2015-2017 Dialog Semiconductor * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,6 +17,8 @@ #define DA9062_PMIC_DEVICE_ID 0x62 #define DA9062_PMIC_VARIANT_MRC_AA 0x01 +#define DA9062_PMIC_VARIANT_VRC_DA9061 0x01 +#define DA9062_PMIC_VARIANT_VRC_DA9062 0x02 #define DA9062_I2C_PAGE_SEL_SHIFT 1 diff --git a/include/linux/mfd/intel_bxtwc.h b/include/linux/mfd/intel_bxtwc.h deleted file mode 100644 index 1a0ee9d6efe9df4cfe88af60bdd8bb3608401bdf..0000000000000000000000000000000000000000 --- a/include/linux/mfd/intel_bxtwc.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * intel_bxtwc.h - Header file for Intel Broxton Whiskey Cove PMIC - * - * Copyright (C) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include - -#ifndef __INTEL_BXTWC_H__ -#define __INTEL_BXTWC_H__ - -/* BXT WC devices */ -#define BXTWC_DEVICE1_ADDR 0x4E -#define BXTWC_DEVICE2_ADDR 0x4F -#define BXTWC_DEVICE3_ADDR 0x5E - -/* device1 Registers */ -#define BXTWC_CHIPID 0x4E00 -#define BXTWC_CHIPVER 0x4E01 - -#define BXTWC_SCHGRIRQ0_ADDR 0x5E1A -#define BXTWC_CHGRCTRL0_ADDR 0x5E16 -#define BXTWC_CHGRCTRL1_ADDR 0x5E17 -#define BXTWC_CHGRCTRL2_ADDR 0x5E18 -#define BXTWC_CHGRSTATUS_ADDR 0x5E19 -#define BXTWC_THRMBATZONE_ADDR 0x4F22 - -#define BXTWC_USBPATH_ADDR 0x5E19 -#define BXTWC_USBPHYCTRL_ADDR 0x5E07 -#define BXTWC_USBIDCTRL_ADDR 0x5E05 -#define BXTWC_USBIDEN_MASK 0x01 -#define BXTWC_USBIDSTAT_ADDR 0x00FF -#define BXTWC_USBSRCDETSTATUS_ADDR 0x5E29 - -#define BXTWC_DBGUSBBC1_ADDR 0x5FE0 -#define BXTWC_DBGUSBBC2_ADDR 0x5FE1 -#define BXTWC_DBGUSBBCSTAT_ADDR 0x5FE2 - -#define BXTWC_WAKESRC_ADDR 0x4E22 -#define BXTWC_WAKESRC2_ADDR 0x4EE5 -#define BXTWC_CHRTTADDR_ADDR 0x5E22 -#define BXTWC_CHRTTDATA_ADDR 0x5E23 - -#define BXTWC_STHRMIRQ0_ADDR 0x4F19 -#define WC_MTHRMIRQ1_ADDR 0x4E12 -#define WC_STHRMIRQ1_ADDR 0x4F1A -#define WC_STHRMIRQ2_ADDR 0x4F1B - -#define BXTWC_THRMZN0H_ADDR 0x4F44 -#define BXTWC_THRMZN0L_ADDR 0x4F45 -#define BXTWC_THRMZN1H_ADDR 0x4F46 -#define BXTWC_THRMZN1L_ADDR 0x4F47 -#define BXTWC_THRMZN2H_ADDR 0x4F48 -#define BXTWC_THRMZN2L_ADDR 0x4F49 -#define BXTWC_THRMZN3H_ADDR 0x4F4A -#define BXTWC_THRMZN3L_ADDR 0x4F4B -#define BXTWC_THRMZN4H_ADDR 0x4F4C -#define BXTWC_THRMZN4L_ADDR 0x4F4D - -#endif diff --git a/include/linux/mfd/intel_soc_pmic_bxtwc.h b/include/linux/mfd/intel_soc_pmic_bxtwc.h new file mode 100644 index 0000000000000000000000000000000000000000..0c351bc85d2d86de2c980a550c3cb76ab28ad814 --- /dev/null +++ b/include/linux/mfd/intel_soc_pmic_bxtwc.h @@ -0,0 +1,67 @@ +/* + * Header file for Intel Broxton Whiskey Cove PMIC + * + * Copyright (C) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_BXTWC_H__ +#define __INTEL_BXTWC_H__ + +/* BXT WC devices */ +#define BXTWC_DEVICE1_ADDR 0x4E +#define BXTWC_DEVICE2_ADDR 0x4F +#define BXTWC_DEVICE3_ADDR 0x5E + +/* device1 Registers */ +#define BXTWC_CHIPID 0x4E00 +#define BXTWC_CHIPVER 0x4E01 + +#define BXTWC_SCHGRIRQ0_ADDR 0x5E1A +#define BXTWC_CHGRCTRL0_ADDR 0x5E16 +#define BXTWC_CHGRCTRL1_ADDR 0x5E17 +#define BXTWC_CHGRCTRL2_ADDR 0x5E18 +#define BXTWC_CHGRSTATUS_ADDR 0x5E19 +#define BXTWC_THRMBATZONE_ADDR 0x4F22 + +#define BXTWC_USBPATH_ADDR 0x5E19 +#define BXTWC_USBPHYCTRL_ADDR 0x5E07 +#define BXTWC_USBIDCTRL_ADDR 0x5E05 +#define BXTWC_USBIDEN_MASK 0x01 +#define BXTWC_USBIDSTAT_ADDR 0x00FF +#define BXTWC_USBSRCDETSTATUS_ADDR 0x5E29 + +#define BXTWC_DBGUSBBC1_ADDR 0x5FE0 +#define BXTWC_DBGUSBBC2_ADDR 0x5FE1 +#define BXTWC_DBGUSBBCSTAT_ADDR 0x5FE2 + +#define BXTWC_WAKESRC_ADDR 0x4E22 +#define BXTWC_WAKESRC2_ADDR 0x4EE5 +#define BXTWC_CHRTTADDR_ADDR 0x5E22 +#define BXTWC_CHRTTDATA_ADDR 0x5E23 + +#define BXTWC_STHRMIRQ0_ADDR 0x4F19 +#define WC_MTHRMIRQ1_ADDR 0x4E12 +#define WC_STHRMIRQ1_ADDR 0x4F1A +#define WC_STHRMIRQ2_ADDR 0x4F1B + +#define BXTWC_THRMZN0H_ADDR 0x4F44 +#define BXTWC_THRMZN0L_ADDR 0x4F45 +#define BXTWC_THRMZN1H_ADDR 0x4F46 +#define BXTWC_THRMZN1L_ADDR 0x4F47 +#define BXTWC_THRMZN2H_ADDR 0x4F48 +#define BXTWC_THRMZN2L_ADDR 0x4F49 +#define BXTWC_THRMZN3H_ADDR 0x4F4A +#define BXTWC_THRMZN3L_ADDR 0x4F4B +#define BXTWC_THRMZN4H_ADDR 0x4F4C +#define BXTWC_THRMZN4L_ADDR 0x4F4D + +#endif diff --git a/include/linux/mfd/motorola-cpcap.h b/include/linux/mfd/motorola-cpcap.h index b4031c2b2214fc2f5417f14756e14f6cd5ff63d2..aefc49cb7ba9955ebb8b7115103e43a2de89fff5 100644 --- a/include/linux/mfd/motorola-cpcap.h +++ b/include/linux/mfd/motorola-cpcap.h @@ -14,6 +14,9 @@ * published by the Free Software Foundation. */ +#include +#include + #define CPCAP_VENDOR_ST 0 #define CPCAP_VENDOR_TI 1 @@ -290,3 +293,5 @@ static inline int cpcap_get_vendor(struct device *dev, return 0; } + +extern int cpcap_sense_virq(struct regmap *regmap, int virq); diff --git a/include/linux/mfd/mxs-lradc.h b/include/linux/mfd/mxs-lradc.h new file mode 100644 index 0000000000000000000000000000000000000000..661a4521f72396fa58a3b2321d41c181b00b99e3 --- /dev/null +++ b/include/linux/mfd/mxs-lradc.h @@ -0,0 +1,187 @@ +/* + * Freescale MXS Low Resolution Analog-to-Digital Converter driver + * + * Copyright (c) 2012 DENX Software Engineering, GmbH. + * Copyright (c) 2016 Ksenija Stanojevic + * + * Author: Marek Vasut + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MFD_MXS_LRADC_H +#define __MFD_MXS_LRADC_H + +#include +#include +#include + +#define LRADC_MAX_DELAY_CHANS 4 +#define LRADC_MAX_MAPPED_CHANS 8 +#define LRADC_MAX_TOTAL_CHANS 16 + +#define LRADC_DELAY_TIMER_HZ 2000 + +#define LRADC_CTRL0 0x00 +# define LRADC_CTRL0_MX28_TOUCH_DETECT_ENABLE BIT(23) +# define LRADC_CTRL0_MX28_TOUCH_SCREEN_TYPE BIT(22) +# define LRADC_CTRL0_MX28_YNNSW /* YM */ BIT(21) +# define LRADC_CTRL0_MX28_YPNSW /* YP */ BIT(20) +# define LRADC_CTRL0_MX28_YPPSW /* YP */ BIT(19) +# define LRADC_CTRL0_MX28_XNNSW /* XM */ BIT(18) +# define LRADC_CTRL0_MX28_XNPSW /* XM */ BIT(17) +# define LRADC_CTRL0_MX28_XPPSW /* XP */ BIT(16) + +# define LRADC_CTRL0_MX23_TOUCH_DETECT_ENABLE BIT(20) +# define LRADC_CTRL0_MX23_YM BIT(19) +# define LRADC_CTRL0_MX23_XM BIT(18) +# define LRADC_CTRL0_MX23_YP BIT(17) +# define LRADC_CTRL0_MX23_XP BIT(16) + +# define LRADC_CTRL0_MX28_PLATE_MASK \ + (LRADC_CTRL0_MX28_TOUCH_DETECT_ENABLE | \ + LRADC_CTRL0_MX28_YNNSW | LRADC_CTRL0_MX28_YPNSW | \ + LRADC_CTRL0_MX28_YPPSW | LRADC_CTRL0_MX28_XNNSW | \ + LRADC_CTRL0_MX28_XNPSW | LRADC_CTRL0_MX28_XPPSW) + +# define LRADC_CTRL0_MX23_PLATE_MASK \ + (LRADC_CTRL0_MX23_TOUCH_DETECT_ENABLE | \ + LRADC_CTRL0_MX23_YM | LRADC_CTRL0_MX23_XM | \ + LRADC_CTRL0_MX23_YP | LRADC_CTRL0_MX23_XP) + +#define LRADC_CTRL1 0x10 +#define LRADC_CTRL1_TOUCH_DETECT_IRQ_EN BIT(24) +#define LRADC_CTRL1_LRADC_IRQ_EN(n) (1 << ((n) + 16)) +#define LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK (0x1fff << 16) +#define LRADC_CTRL1_MX23_LRADC_IRQ_EN_MASK (0x01ff << 16) +#define LRADC_CTRL1_LRADC_IRQ_EN_OFFSET 16 +#define LRADC_CTRL1_TOUCH_DETECT_IRQ BIT(8) +#define LRADC_CTRL1_LRADC_IRQ(n) BIT(n) +#define LRADC_CTRL1_MX28_LRADC_IRQ_MASK 0x1fff +#define LRADC_CTRL1_MX23_LRADC_IRQ_MASK 0x01ff +#define LRADC_CTRL1_LRADC_IRQ_OFFSET 0 + +#define LRADC_CTRL2 0x20 +#define LRADC_CTRL2_DIVIDE_BY_TWO_OFFSET 24 +#define LRADC_CTRL2_TEMPSENSE_PWD BIT(15) + +#define LRADC_STATUS 0x40 +#define LRADC_STATUS_TOUCH_DETECT_RAW BIT(0) + +#define LRADC_CH(n) (0x50 + (0x10 * (n))) +#define LRADC_CH_ACCUMULATE BIT(29) +#define LRADC_CH_NUM_SAMPLES_MASK (0x1f << 24) +#define LRADC_CH_NUM_SAMPLES_OFFSET 24 +#define LRADC_CH_NUM_SAMPLES(x) \ + ((x) << LRADC_CH_NUM_SAMPLES_OFFSET) +#define LRADC_CH_VALUE_MASK 0x3ffff +#define LRADC_CH_VALUE_OFFSET 0 + +#define LRADC_DELAY(n) (0xd0 + (0x10 * (n))) +#define LRADC_DELAY_TRIGGER_LRADCS_MASK (0xffUL << 24) +#define LRADC_DELAY_TRIGGER_LRADCS_OFFSET 24 +#define LRADC_DELAY_TRIGGER(x) \ + (((x) << LRADC_DELAY_TRIGGER_LRADCS_OFFSET) & \ + LRADC_DELAY_TRIGGER_LRADCS_MASK) +#define LRADC_DELAY_KICK BIT(20) +#define LRADC_DELAY_TRIGGER_DELAYS_MASK (0xf << 16) +#define LRADC_DELAY_TRIGGER_DELAYS_OFFSET 16 +#define LRADC_DELAY_TRIGGER_DELAYS(x) \ + (((x) << LRADC_DELAY_TRIGGER_DELAYS_OFFSET) & \ + LRADC_DELAY_TRIGGER_DELAYS_MASK) +#define LRADC_DELAY_LOOP_COUNT_MASK (0x1f << 11) +#define LRADC_DELAY_LOOP_COUNT_OFFSET 11 +#define LRADC_DELAY_LOOP(x) \ + (((x) << LRADC_DELAY_LOOP_COUNT_OFFSET) & \ + LRADC_DELAY_LOOP_COUNT_MASK) +#define LRADC_DELAY_DELAY_MASK 0x7ff +#define LRADC_DELAY_DELAY_OFFSET 0 +#define LRADC_DELAY_DELAY(x) \ + (((x) << LRADC_DELAY_DELAY_OFFSET) & \ + LRADC_DELAY_DELAY_MASK) + +#define LRADC_CTRL4 0x140 +#define LRADC_CTRL4_LRADCSELECT_MASK(n) (0xf << ((n) * 4)) +#define LRADC_CTRL4_LRADCSELECT_OFFSET(n) ((n) * 4) +#define LRADC_CTRL4_LRADCSELECT(n, x) \ + (((x) << LRADC_CTRL4_LRADCSELECT_OFFSET(n)) & \ + LRADC_CTRL4_LRADCSELECT_MASK(n)) + +#define LRADC_RESOLUTION 12 +#define LRADC_SINGLE_SAMPLE_MASK ((1 << LRADC_RESOLUTION) - 1) + +#define BUFFER_VCHANS_LIMITED 0x3f +#define BUFFER_VCHANS_ALL 0xff + + /* + * Certain LRADC channels are shared between touchscreen + * and/or touch-buttons and generic LRADC block. Therefore when using + * either of these, these channels are not available for the regular + * sampling. The shared channels are as follows: + * + * CH0 -- Touch button #0 + * CH1 -- Touch button #1 + * CH2 -- Touch screen XPUL + * CH3 -- Touch screen YPLL + * CH4 -- Touch screen XNUL + * CH5 -- Touch screen YNLR + * CH6 -- Touch screen WIPER (5-wire only) + * + * The bit fields below represents which parts of the LRADC block are + * switched into special mode of operation. These channels can not + * be sampled as regular LRADC channels. The driver will refuse any + * attempt to sample these channels. + */ +#define CHAN_MASK_TOUCHBUTTON (BIT(1) | BIT(0)) +#define CHAN_MASK_TOUCHSCREEN_4WIRE (0xf << 2) +#define CHAN_MASK_TOUCHSCREEN_5WIRE (0x1f << 2) + +enum mxs_lradc_id { + IMX23_LRADC, + IMX28_LRADC, +}; + +enum mxs_lradc_ts_wires { + MXS_LRADC_TOUCHSCREEN_NONE = 0, + MXS_LRADC_TOUCHSCREEN_4WIRE, + MXS_LRADC_TOUCHSCREEN_5WIRE, +}; + +/** + * struct mxs_lradc + * @soc: soc type (IMX23 or IMX28) + * @clk: 2 kHz clock for delay units + * @buffer_vchans: channels that can be used during buffered capture + * @touchscreen_wire: touchscreen type (4-wire or 5-wire) + * @use_touchbutton: button state (on or off) + */ +struct mxs_lradc { + enum mxs_lradc_id soc; + struct clk *clk; + u8 buffer_vchans; + + enum mxs_lradc_ts_wires touchscreen_wire; + bool use_touchbutton; +}; + +static inline u32 mxs_lradc_irq_mask(struct mxs_lradc *lradc) +{ + switch (lradc->soc) { + case IMX23_LRADC: + return LRADC_CTRL1_MX23_LRADC_IRQ_MASK; + case IMX28_LRADC: + return LRADC_CTRL1_MX28_LRADC_IRQ_MASK; + default: + return 0; + } +} + +#endif /* __MXS_LRADC_H */ diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index d0300045f04ab84efabd0a78e1af19d22b0e773f..4a0abbc10ef6c2f4cd5c6ce0dbf7902024f55e05 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -21,6 +21,7 @@ #define TIM_CCMR1 0x18 /* Capt/Comp 1 Mode Reg */ #define TIM_CCMR2 0x1C /* Capt/Comp 2 Mode Reg */ #define TIM_CCER 0x20 /* Capt/Comp Enable Reg */ +#define TIM_CNT 0x24 /* Counter */ #define TIM_PSC 0x28 /* Prescaler */ #define TIM_ARR 0x2c /* Auto-Reload Register */ #define TIM_CCR1 0x34 /* Capt/Comp Register 1 */ @@ -30,6 +31,7 @@ #define TIM_BDTR 0x44 /* Break and Dead-Time Reg */ #define TIM_CR1_CEN BIT(0) /* Counter Enable */ +#define TIM_CR1_DIR BIT(4) /* Counter Direction */ #define TIM_CR1_ARPE BIT(7) /* Auto-reload Preload Ena */ #define TIM_CR2_MMS (BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */ #define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h index d7a29f246d647fb6c7be225bc47fb42f2e295c8d..139872c2e0fe0ffe9517d8f13db65efcd5bc42ff 100644 --- a/include/linux/mfd/sun4i-gpadc.h +++ b/include/linux/mfd/sun4i-gpadc.h @@ -28,6 +28,7 @@ #define SUN4I_GPADC_CTRL1_TP_MODE_EN BIT(4) #define SUN4I_GPADC_CTRL1_TP_ADC_SELECT BIT(3) #define SUN4I_GPADC_CTRL1_ADC_CHAN_SELECT(x) (GENMASK(2, 0) & (x)) +#define SUN4I_GPADC_CTRL1_ADC_CHAN_MASK GENMASK(2, 0) /* TP_CTRL1 bits for sun6i SOCs */ #define SUN6I_GPADC_CTRL1_TOUCH_PAN_CALI_EN BIT(7) @@ -35,6 +36,11 @@ #define SUN6I_GPADC_CTRL1_TP_MODE_EN BIT(5) #define SUN6I_GPADC_CTRL1_TP_ADC_SELECT BIT(4) #define SUN6I_GPADC_CTRL1_ADC_CHAN_SELECT(x) (GENMASK(3, 0) & BIT(x)) +#define SUN6I_GPADC_CTRL1_ADC_CHAN_MASK GENMASK(3, 0) + +/* TP_CTRL1 bits for sun8i SoCs */ +#define SUN8I_GPADC_CTRL1_CHOP_TEMP_EN BIT(8) +#define SUN8I_GPADC_CTRL1_GPADC_CALI_EN BIT(7) #define SUN4I_GPADC_CTRL2 0x08 diff --git a/include/linux/mfd/syscon/atmel-smc.h b/include/linux/mfd/syscon/atmel-smc.h index be6ebe64eebe1964573a2b963004a8055dff7abf..afa2661698004fed5792374ac68d8d8dd9bb0c1f 100644 --- a/include/linux/mfd/syscon/atmel-smc.h +++ b/include/linux/mfd/syscon/atmel-smc.h @@ -17,157 +17,92 @@ #include #include -#define AT91SAM9_SMC_GENERIC 0x00 -#define AT91SAM9_SMC_GENERIC_BLK_SZ 0x10 - -#define SAMA5_SMC_GENERIC 0x600 -#define SAMA5_SMC_GENERIC_BLK_SZ 0x14 - -#define AT91SAM9_SMC_SETUP(o) ((o) + 0x00) -#define AT91SAM9_SMC_NWESETUP(x) (x) -#define AT91SAM9_SMC_NCS_WRSETUP(x) ((x) << 8) -#define AT91SAM9_SMC_NRDSETUP(x) ((x) << 16) -#define AT91SAM9_SMC_NCS_NRDSETUP(x) ((x) << 24) - -#define AT91SAM9_SMC_PULSE(o) ((o) + 0x04) -#define AT91SAM9_SMC_NWEPULSE(x) (x) -#define AT91SAM9_SMC_NCS_WRPULSE(x) ((x) << 8) -#define AT91SAM9_SMC_NRDPULSE(x) ((x) << 16) -#define AT91SAM9_SMC_NCS_NRDPULSE(x) ((x) << 24) - -#define AT91SAM9_SMC_CYCLE(o) ((o) + 0x08) -#define AT91SAM9_SMC_NWECYCLE(x) (x) -#define AT91SAM9_SMC_NRDCYCLE(x) ((x) << 16) - -#define AT91SAM9_SMC_MODE(o) ((o) + 0x0c) -#define SAMA5_SMC_MODE(o) ((o) + 0x10) -#define AT91_SMC_READMODE BIT(0) -#define AT91_SMC_READMODE_NCS (0 << 0) -#define AT91_SMC_READMODE_NRD (1 << 0) -#define AT91_SMC_WRITEMODE BIT(1) -#define AT91_SMC_WRITEMODE_NCS (0 << 1) -#define AT91_SMC_WRITEMODE_NWE (1 << 1) -#define AT91_SMC_EXNWMODE GENMASK(5, 4) -#define AT91_SMC_EXNWMODE_DISABLE (0 << 4) -#define AT91_SMC_EXNWMODE_FROZEN (2 << 4) -#define AT91_SMC_EXNWMODE_READY (3 << 4) -#define AT91_SMC_BAT BIT(8) -#define AT91_SMC_BAT_SELECT (0 << 8) -#define AT91_SMC_BAT_WRITE (1 << 8) -#define AT91_SMC_DBW GENMASK(13, 12) -#define AT91_SMC_DBW_8 (0 << 12) -#define AT91_SMC_DBW_16 (1 << 12) -#define AT91_SMC_DBW_32 (2 << 12) -#define AT91_SMC_TDF GENMASK(19, 16) -#define AT91_SMC_TDF_(x) ((((x) - 1) << 16) & AT91_SMC_TDF) -#define AT91_SMC_TDF_MAX 16 -#define AT91_SMC_TDFMODE_OPTIMIZED BIT(20) -#define AT91_SMC_PMEN BIT(24) -#define AT91_SMC_PS GENMASK(29, 28) -#define AT91_SMC_PS_4 (0 << 28) -#define AT91_SMC_PS_8 (1 << 28) -#define AT91_SMC_PS_16 (2 << 28) -#define AT91_SMC_PS_32 (3 << 28) - - -/* - * This function converts a setup timing expressed in nanoseconds into an - * encoded value that can be written in the SMC_SETUP register. - * - * The following formula is described in atmel datasheets (section - * "SMC Setup Register"): - * - * setup length = (128* SETUP[5] + SETUP[4:0]) - * - * where setup length is the timing expressed in cycles. - */ -static inline u32 at91sam9_smc_setup_ns_to_cycles(unsigned int clk_rate, - u32 timing_ns) -{ - u32 clk_period = DIV_ROUND_UP(NSEC_PER_SEC, clk_rate); - u32 coded_cycles = 0; - u32 cycles; - - cycles = DIV_ROUND_UP(timing_ns, clk_period); - if (cycles / 32) { - coded_cycles |= 1 << 5; - if (cycles < 128) - cycles = 0; - } - - coded_cycles |= cycles % 32; - - return coded_cycles; -} - -/* - * This function converts a pulse timing expressed in nanoseconds into an - * encoded value that can be written in the SMC_PULSE register. - * - * The following formula is described in atmel datasheets (section - * "SMC Pulse Register"): - * - * pulse length = (256* PULSE[6] + PULSE[5:0]) - * - * where pulse length is the timing expressed in cycles. +#define ATMEL_SMC_SETUP(cs) (((cs) * 0x10)) +#define ATMEL_HSMC_SETUP(cs) (0x600 + ((cs) * 0x14)) +#define ATMEL_SMC_PULSE(cs) (((cs) * 0x10) + 0x4) +#define ATMEL_HSMC_PULSE(cs) (0x600 + ((cs) * 0x14) + 0x4) +#define ATMEL_SMC_CYCLE(cs) (((cs) * 0x10) + 0x8) +#define ATMEL_HSMC_CYCLE(cs) (0x600 + ((cs) * 0x14) + 0x8) +#define ATMEL_SMC_NWE_SHIFT 0 +#define ATMEL_SMC_NCS_WR_SHIFT 8 +#define ATMEL_SMC_NRD_SHIFT 16 +#define ATMEL_SMC_NCS_RD_SHIFT 24 + +#define ATMEL_SMC_MODE(cs) (((cs) * 0x10) + 0xc) +#define ATMEL_HSMC_MODE(cs) (0x600 + ((cs) * 0x14) + 0x10) +#define ATMEL_SMC_MODE_READMODE_MASK BIT(0) +#define ATMEL_SMC_MODE_READMODE_NCS (0 << 0) +#define ATMEL_SMC_MODE_READMODE_NRD (1 << 0) +#define ATMEL_SMC_MODE_WRITEMODE_MASK BIT(1) +#define ATMEL_SMC_MODE_WRITEMODE_NCS (0 << 1) +#define ATMEL_SMC_MODE_WRITEMODE_NWE (1 << 1) +#define ATMEL_SMC_MODE_EXNWMODE_MASK GENMASK(5, 4) +#define ATMEL_SMC_MODE_EXNWMODE_DISABLE (0 << 4) +#define ATMEL_SMC_MODE_EXNWMODE_FROZEN (2 << 4) +#define ATMEL_SMC_MODE_EXNWMODE_READY (3 << 4) +#define ATMEL_SMC_MODE_BAT_MASK BIT(8) +#define ATMEL_SMC_MODE_BAT_SELECT (0 << 8) +#define ATMEL_SMC_MODE_BAT_WRITE (1 << 8) +#define ATMEL_SMC_MODE_DBW_MASK GENMASK(13, 12) +#define ATMEL_SMC_MODE_DBW_8 (0 << 12) +#define ATMEL_SMC_MODE_DBW_16 (1 << 12) +#define ATMEL_SMC_MODE_DBW_32 (2 << 12) +#define ATMEL_SMC_MODE_TDF_MASK GENMASK(19, 16) +#define ATMEL_SMC_MODE_TDF(x) (((x) - 1) << 16) +#define ATMEL_SMC_MODE_TDF_MAX 16 +#define ATMEL_SMC_MODE_TDF_MIN 1 +#define ATMEL_SMC_MODE_TDFMODE_OPTIMIZED BIT(20) +#define ATMEL_SMC_MODE_PMEN BIT(24) +#define ATMEL_SMC_MODE_PS_MASK GENMASK(29, 28) +#define ATMEL_SMC_MODE_PS_4 (0 << 28) +#define ATMEL_SMC_MODE_PS_8 (1 << 28) +#define ATMEL_SMC_MODE_PS_16 (2 << 28) +#define ATMEL_SMC_MODE_PS_32 (3 << 28) + +#define ATMEL_HSMC_TIMINGS(cs) (0x600 + ((cs) * 0x14) + 0xc) +#define ATMEL_HSMC_TIMINGS_OCMS BIT(12) +#define ATMEL_HSMC_TIMINGS_RBNSEL(x) ((x) << 28) +#define ATMEL_HSMC_TIMINGS_NFSEL BIT(31) +#define ATMEL_HSMC_TIMINGS_TCLR_SHIFT 0 +#define ATMEL_HSMC_TIMINGS_TADL_SHIFT 4 +#define ATMEL_HSMC_TIMINGS_TAR_SHIFT 8 +#define ATMEL_HSMC_TIMINGS_TRR_SHIFT 16 +#define ATMEL_HSMC_TIMINGS_TWB_SHIFT 24 + +/** + * struct atmel_smc_cs_conf - SMC CS config as described in the datasheet. + * @setup: NCS/NWE/NRD setup timings (not applicable to at91rm9200) + * @pulse: NCS/NWE/NRD pulse timings (not applicable to at91rm9200) + * @cycle: NWE/NRD cycle timings (not applicable to at91rm9200) + * @timings: advanced NAND related timings (only applicable to HSMC) + * @mode: all kind of config parameters (see the fields definition above). + * The mode fields are different on at91rm9200 */ -static inline u32 at91sam9_smc_pulse_ns_to_cycles(unsigned int clk_rate, - u32 timing_ns) -{ - u32 clk_period = DIV_ROUND_UP(NSEC_PER_SEC, clk_rate); - u32 coded_cycles = 0; - u32 cycles; - - cycles = DIV_ROUND_UP(timing_ns, clk_period); - if (cycles / 64) { - coded_cycles |= 1 << 6; - if (cycles < 256) - cycles = 0; - } - - coded_cycles |= cycles % 64; - - return coded_cycles; -} - -/* - * This function converts a cycle timing expressed in nanoseconds into an - * encoded value that can be written in the SMC_CYCLE register. - * - * The following formula is described in atmel datasheets (section - * "SMC Cycle Register"): - * - * cycle length = (CYCLE[8:7]*256 + CYCLE[6:0]) - * - * where cycle length is the timing expressed in cycles. - */ -static inline u32 at91sam9_smc_cycle_ns_to_cycles(unsigned int clk_rate, - u32 timing_ns) -{ - u32 clk_period = DIV_ROUND_UP(NSEC_PER_SEC, clk_rate); - u32 coded_cycles = 0; - u32 cycles; - - cycles = DIV_ROUND_UP(timing_ns, clk_period); - if (cycles / 128) { - coded_cycles = cycles / 256; - cycles %= 256; - if (cycles >= 128) { - coded_cycles++; - cycles = 0; - } - - if (coded_cycles > 0x3) { - coded_cycles = 0x3; - cycles = 0x7f; - } - - coded_cycles <<= 7; - } - - coded_cycles |= cycles % 128; - - return coded_cycles; -} +struct atmel_smc_cs_conf { + u32 setup; + u32 pulse; + u32 cycle; + u32 timings; + u32 mode; +}; + +void atmel_smc_cs_conf_init(struct atmel_smc_cs_conf *conf); +int atmel_smc_cs_conf_set_timing(struct atmel_smc_cs_conf *conf, + unsigned int shift, + unsigned int ncycles); +int atmel_smc_cs_conf_set_setup(struct atmel_smc_cs_conf *conf, + unsigned int shift, unsigned int ncycles); +int atmel_smc_cs_conf_set_pulse(struct atmel_smc_cs_conf *conf, + unsigned int shift, unsigned int ncycles); +int atmel_smc_cs_conf_set_cycle(struct atmel_smc_cs_conf *conf, + unsigned int shift, unsigned int ncycles); +void atmel_smc_cs_conf_apply(struct regmap *regmap, int cs, + const struct atmel_smc_cs_conf *conf); +void atmel_hsmc_cs_conf_apply(struct regmap *regmap, int cs, + const struct atmel_smc_cs_conf *conf); +void atmel_smc_cs_conf_get(struct regmap *regmap, int cs, + struct atmel_smc_cs_conf *conf); +void atmel_hsmc_cs_conf_get(struct regmap *regmap, int cs, + struct atmel_smc_cs_conf *conf); #endif /* _LINUX_MFD_SYSCON_ATMEL_SMC_H_ */ diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index c28ff21ca4d207c484c196eda1986ce8b4458b9b..b4942a32b81df8e9b4b21b92f68e408a6f80d4c2 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -12,41 +12,8 @@ #ifndef _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ #define _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ -/* Exynos5 PMU register definitions */ -#define EXYNOS5_HDMI_PHY_CONTROL (0x700) -#define EXYNOS5_USBDRD_PHY_CONTROL (0x704) - -/* Exynos5250 specific register definitions */ -#define EXYNOS5_USBHOST_PHY_CONTROL (0x708) -#define EXYNOS5_EFNAND_PHY_CONTROL (0x70c) -#define EXYNOS5_MIPI_PHY0_CONTROL (0x710) -#define EXYNOS5_MIPI_PHY1_CONTROL (0x714) -#define EXYNOS5_ADC_PHY_CONTROL (0x718) -#define EXYNOS5_MTCADC_PHY_CONTROL (0x71c) -#define EXYNOS5_DPTX_PHY_CONTROL (0x720) -#define EXYNOS5_SATA_PHY_CONTROL (0x724) - -/* Exynos5420 specific register definitions */ -#define EXYNOS5420_USBDRD1_PHY_CONTROL (0x708) -#define EXYNOS5420_USBHOST_PHY_CONTROL (0x70c) -#define EXYNOS5420_MIPI_PHY0_CONTROL (0x714) -#define EXYNOS5420_MIPI_PHY1_CONTROL (0x718) -#define EXYNOS5420_MIPI_PHY2_CONTROL (0x71c) -#define EXYNOS5420_ADC_PHY_CONTROL (0x720) -#define EXYNOS5420_MTCADC_PHY_CONTROL (0x724) -#define EXYNOS5420_DPTX_PHY_CONTROL (0x728) - -/* Exynos5433 specific register definitions */ -#define EXYNOS5433_USBHOST30_PHY_CONTROL (0x728) -#define EXYNOS5433_MIPI_PHY0_CONTROL (0x710) -#define EXYNOS5433_MIPI_PHY1_CONTROL (0x714) -#define EXYNOS5433_MIPI_PHY2_CONTROL (0x718) - #define EXYNOS5_PHY_ENABLE BIT(0) #define EXYNOS5_MIPI_PHY_S_RESETN BIT(1) #define EXYNOS5_MIPI_PHY_M_RESETN BIT(2) -#define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) -#define EXYNOS5433_PAD_INITIATE_WAKEUP_FROM_LOWPWR BIT(28) - #endif /* _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ */ diff --git a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h index 4585d6105d68af19f72f1fd90663b670d45a8f8e..abbd52466573f9994ea25eab5e6d2b12c00f564e 100644 --- a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h @@ -44,4 +44,8 @@ #define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI (0x1 << 4) +#define IMX7D_GPR12_PCIE_PHY_REFCLK_SEL BIT(5) + +#define IMX7D_GPR22_PCIE_PHY_PLL_LOCKED BIT(31) + #endif /* __LINUX_IMX7_IOMUXC_GPR_H */ diff --git a/include/linux/mfd/ti-lmu-register.h b/include/linux/mfd/ti-lmu-register.h new file mode 100644 index 0000000000000000000000000000000000000000..2125c7c02818b3c5c30eff67111e6e512ccfb065 --- /dev/null +++ b/include/linux/mfd/ti-lmu-register.h @@ -0,0 +1,280 @@ +/* + * TI LMU (Lighting Management Unit) Device Register Map + * + * Copyright 2017 Texas Instruments + * + * Author: Milo Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MFD_TI_LMU_REGISTER_H__ +#define __MFD_TI_LMU_REGISTER_H__ + +#include + +/* LM3532 */ +#define LM3532_REG_OUTPUT_CFG 0x10 +#define LM3532_ILED1_CFG_MASK 0x03 +#define LM3532_ILED2_CFG_MASK 0x0C +#define LM3532_ILED3_CFG_MASK 0x30 +#define LM3532_ILED1_CFG_SHIFT 0 +#define LM3532_ILED2_CFG_SHIFT 2 +#define LM3532_ILED3_CFG_SHIFT 4 + +#define LM3532_REG_RAMPUP 0x12 +#define LM3532_REG_RAMPDN LM3532_REG_RAMPUP +#define LM3532_RAMPUP_MASK 0x07 +#define LM3532_RAMPUP_SHIFT 0 +#define LM3532_RAMPDN_MASK 0x38 +#define LM3532_RAMPDN_SHIFT 3 + +#define LM3532_REG_ENABLE 0x1D + +#define LM3532_REG_PWM_A_CFG 0x13 +#define LM3532_PWM_A_MASK 0x05 /* zone 0 */ +#define LM3532_PWM_ZONE_0 BIT(2) + +#define LM3532_REG_PWM_B_CFG 0x14 +#define LM3532_PWM_B_MASK 0x09 /* zone 1 */ +#define LM3532_PWM_ZONE_1 BIT(3) + +#define LM3532_REG_PWM_C_CFG 0x15 +#define LM3532_PWM_C_MASK 0x11 /* zone 2 */ +#define LM3532_PWM_ZONE_2 BIT(4) + +#define LM3532_REG_ZONE_CFG_A 0x16 +#define LM3532_REG_ZONE_CFG_B 0x18 +#define LM3532_REG_ZONE_CFG_C 0x1A +#define LM3532_ZONE_MASK (BIT(2) | BIT(3) | BIT(4)) +#define LM3532_ZONE_0 0 +#define LM3532_ZONE_1 BIT(2) +#define LM3532_ZONE_2 BIT(3) + +#define LM3532_REG_BRT_A 0x70 /* zone 0 */ +#define LM3532_REG_BRT_B 0x76 /* zone 1 */ +#define LM3532_REG_BRT_C 0x7C /* zone 2 */ + +#define LM3532_MAX_REG 0x7E + +/* LM3631 */ +#define LM3631_REG_DEVCTRL 0x00 +#define LM3631_LCD_EN_MASK BIT(1) +#define LM3631_BL_EN_MASK BIT(0) + +#define LM3631_REG_BRT_LSB 0x01 +#define LM3631_REG_BRT_MSB 0x02 + +#define LM3631_REG_BL_CFG 0x06 +#define LM3631_BL_CHANNEL_MASK BIT(3) +#define LM3631_BL_DUAL_CHANNEL 0 +#define LM3631_BL_SINGLE_CHANNEL BIT(3) +#define LM3631_MAP_MASK BIT(5) +#define LM3631_EXPONENTIAL_MAP 0 + +#define LM3631_REG_BRT_MODE 0x08 +#define LM3631_MODE_MASK (BIT(1) | BIT(2) | BIT(3)) +#define LM3631_DEFAULT_MODE (BIT(1) | BIT(3)) + +#define LM3631_REG_SLOPE 0x09 +#define LM3631_SLOPE_MASK 0xF0 +#define LM3631_SLOPE_SHIFT 4 + +#define LM3631_REG_LDO_CTRL1 0x0A +#define LM3631_EN_OREF_MASK BIT(0) +#define LM3631_EN_VNEG_MASK BIT(1) +#define LM3631_EN_VPOS_MASK BIT(2) + +#define LM3631_REG_LDO_CTRL2 0x0B +#define LM3631_EN_CONT_MASK BIT(0) + +#define LM3631_REG_VOUT_CONT 0x0C +#define LM3631_VOUT_CONT_MASK (BIT(6) | BIT(7)) + +#define LM3631_REG_VOUT_BOOST 0x0C +#define LM3631_REG_VOUT_POS 0x0D +#define LM3631_REG_VOUT_NEG 0x0E +#define LM3631_REG_VOUT_OREF 0x0F +#define LM3631_VOUT_MASK 0x3F + +#define LM3631_REG_ENTIME_VCONT 0x0B +#define LM3631_ENTIME_CONT_MASK 0x70 + +#define LM3631_REG_ENTIME_VOREF 0x0F +#define LM3631_REG_ENTIME_VPOS 0x10 +#define LM3631_REG_ENTIME_VNEG 0x11 +#define LM3631_ENTIME_MASK 0xF0 +#define LM3631_ENTIME_SHIFT 4 + +#define LM3631_MAX_REG 0x16 + +/* LM3632 */ +#define LM3632_REG_CONFIG1 0x02 +#define LM3632_OVP_MASK (BIT(5) | BIT(6) | BIT(7)) +#define LM3632_OVP_25V BIT(6) + +#define LM3632_REG_CONFIG2 0x03 +#define LM3632_SWFREQ_MASK BIT(7) +#define LM3632_SWFREQ_1MHZ BIT(7) + +#define LM3632_REG_BRT_LSB 0x04 +#define LM3632_REG_BRT_MSB 0x05 + +#define LM3632_REG_IO_CTRL 0x09 +#define LM3632_PWM_MASK BIT(6) +#define LM3632_I2C_MODE 0 +#define LM3632_PWM_MODE BIT(6) + +#define LM3632_REG_ENABLE 0x0A +#define LM3632_BL_EN_MASK BIT(0) +#define LM3632_BL_CHANNEL_MASK (BIT(3) | BIT(4)) +#define LM3632_BL_SINGLE_CHANNEL BIT(4) +#define LM3632_BL_DUAL_CHANNEL BIT(3) + +#define LM3632_REG_BIAS_CONFIG 0x0C +#define LM3632_EXT_EN_MASK BIT(0) +#define LM3632_EN_VNEG_MASK BIT(1) +#define LM3632_EN_VPOS_MASK BIT(2) + +#define LM3632_REG_VOUT_BOOST 0x0D +#define LM3632_REG_VOUT_POS 0x0E +#define LM3632_REG_VOUT_NEG 0x0F +#define LM3632_VOUT_MASK 0x3F + +#define LM3632_MAX_REG 0x10 + +/* LM3633 */ +#define LM3633_REG_HVLED_OUTPUT_CFG 0x10 +#define LM3633_HVLED1_CFG_MASK BIT(0) +#define LM3633_HVLED2_CFG_MASK BIT(1) +#define LM3633_HVLED3_CFG_MASK BIT(2) +#define LM3633_HVLED1_CFG_SHIFT 0 +#define LM3633_HVLED2_CFG_SHIFT 1 +#define LM3633_HVLED3_CFG_SHIFT 2 + +#define LM3633_REG_BANK_SEL 0x11 + +#define LM3633_REG_BL0_RAMP 0x12 +#define LM3633_REG_BL1_RAMP 0x13 +#define LM3633_BL_RAMPUP_MASK 0xF0 +#define LM3633_BL_RAMPUP_SHIFT 4 +#define LM3633_BL_RAMPDN_MASK 0x0F +#define LM3633_BL_RAMPDN_SHIFT 0 + +#define LM3633_REG_BL_RAMP_CONF 0x1B +#define LM3633_BL_RAMP_MASK 0x0F +#define LM3633_BL_RAMP_EACH 0x05 + +#define LM3633_REG_PTN0_RAMP 0x1C +#define LM3633_REG_PTN1_RAMP 0x1D +#define LM3633_PTN_RAMPUP_MASK 0x70 +#define LM3633_PTN_RAMPUP_SHIFT 4 +#define LM3633_PTN_RAMPDN_MASK 0x07 +#define LM3633_PTN_RAMPDN_SHIFT 0 + +#define LM3633_REG_LED_MAPPING_MODE 0x1F +#define LM3633_LED_EXPONENTIAL BIT(1) + +#define LM3633_REG_IMAX_HVLED_A 0x20 +#define LM3633_REG_IMAX_HVLED_B 0x21 +#define LM3633_REG_IMAX_LVLED_BASE 0x22 + +#define LM3633_REG_BL_FEEDBACK_ENABLE 0x28 + +#define LM3633_REG_ENABLE 0x2B +#define LM3633_LED_BANK_OFFSET 2 + +#define LM3633_REG_PATTERN 0x2C + +#define LM3633_REG_BOOST_CFG 0x2D +#define LM3633_OVP_MASK (BIT(1) | BIT(2)) +#define LM3633_OVP_40V 0x6 + +#define LM3633_REG_PWM_CFG 0x2F +#define LM3633_PWM_A_MASK BIT(0) +#define LM3633_PWM_B_MASK BIT(1) + +#define LM3633_REG_BRT_HVLED_A_LSB 0x40 +#define LM3633_REG_BRT_HVLED_A_MSB 0x41 +#define LM3633_REG_BRT_HVLED_B_LSB 0x42 +#define LM3633_REG_BRT_HVLED_B_MSB 0x43 + +#define LM3633_REG_BRT_LVLED_BASE 0x44 + +#define LM3633_REG_PTN_DELAY 0x50 + +#define LM3633_REG_PTN_LOWTIME 0x51 + +#define LM3633_REG_PTN_HIGHTIME 0x52 + +#define LM3633_REG_PTN_LOWBRT 0x53 + +#define LM3633_REG_PTN_HIGHBRT LM3633_REG_BRT_LVLED_BASE + +#define LM3633_REG_BL_OPEN_FAULT_STATUS 0xB0 + +#define LM3633_REG_BL_SHORT_FAULT_STATUS 0xB2 + +#define LM3633_REG_MONITOR_ENABLE 0xB4 + +#define LM3633_MAX_REG 0xB4 + +/* LM3695 */ +#define LM3695_REG_GP 0x10 +#define LM3695_BL_CHANNEL_MASK BIT(3) +#define LM3695_BL_DUAL_CHANNEL 0 +#define LM3695_BL_SINGLE_CHANNEL BIT(3) +#define LM3695_BRT_RW_MASK BIT(2) +#define LM3695_BL_EN_MASK BIT(0) + +#define LM3695_REG_BRT_LSB 0x13 +#define LM3695_REG_BRT_MSB 0x14 + +#define LM3695_MAX_REG 0x14 + +/* LM3697 */ +#define LM3697_REG_HVLED_OUTPUT_CFG 0x10 +#define LM3697_HVLED1_CFG_MASK BIT(0) +#define LM3697_HVLED2_CFG_MASK BIT(1) +#define LM3697_HVLED3_CFG_MASK BIT(2) +#define LM3697_HVLED1_CFG_SHIFT 0 +#define LM3697_HVLED2_CFG_SHIFT 1 +#define LM3697_HVLED3_CFG_SHIFT 2 + +#define LM3697_REG_BL0_RAMP 0x11 +#define LM3697_REG_BL1_RAMP 0x12 +#define LM3697_RAMPUP_MASK 0xF0 +#define LM3697_RAMPUP_SHIFT 4 +#define LM3697_RAMPDN_MASK 0x0F +#define LM3697_RAMPDN_SHIFT 0 + +#define LM3697_REG_RAMP_CONF 0x14 +#define LM3697_RAMP_MASK 0x0F +#define LM3697_RAMP_EACH 0x05 + +#define LM3697_REG_PWM_CFG 0x1C +#define LM3697_PWM_A_MASK BIT(0) +#define LM3697_PWM_B_MASK BIT(1) + +#define LM3697_REG_IMAX_A 0x17 +#define LM3697_REG_IMAX_B 0x18 + +#define LM3697_REG_FEEDBACK_ENABLE 0x19 + +#define LM3697_REG_BRT_A_LSB 0x20 +#define LM3697_REG_BRT_A_MSB 0x21 +#define LM3697_REG_BRT_B_LSB 0x22 +#define LM3697_REG_BRT_B_MSB 0x23 + +#define LM3697_REG_ENABLE 0x24 + +#define LM3697_REG_OPEN_FAULT_STATUS 0xB0 + +#define LM3697_REG_SHORT_FAULT_STATUS 0xB2 + +#define LM3697_REG_MONITOR_ENABLE 0xB4 + +#define LM3697_MAX_REG 0xB4 +#endif diff --git a/include/linux/mfd/ti-lmu.h b/include/linux/mfd/ti-lmu.h new file mode 100644 index 0000000000000000000000000000000000000000..09d5f30384e56666607be51b2e95b02ba54afd45 --- /dev/null +++ b/include/linux/mfd/ti-lmu.h @@ -0,0 +1,87 @@ +/* + * TI LMU (Lighting Management Unit) Devices + * + * Copyright 2017 Texas Instruments + * + * Author: Milo Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MFD_TI_LMU_H__ +#define __MFD_TI_LMU_H__ + +#include +#include +#include + +/* Notifier event */ +#define LMU_EVENT_MONITOR_DONE 0x01 + +enum ti_lmu_id { + LM3532, + LM3631, + LM3632, + LM3633, + LM3695, + LM3697, + LMU_MAX_ID, +}; + +enum ti_lmu_max_current { + LMU_IMAX_5mA, + LMU_IMAX_6mA, + LMU_IMAX_7mA = 0x03, + LMU_IMAX_8mA, + LMU_IMAX_9mA, + LMU_IMAX_10mA = 0x07, + LMU_IMAX_11mA, + LMU_IMAX_12mA, + LMU_IMAX_13mA, + LMU_IMAX_14mA, + LMU_IMAX_15mA = 0x0D, + LMU_IMAX_16mA, + LMU_IMAX_17mA, + LMU_IMAX_18mA, + LMU_IMAX_19mA, + LMU_IMAX_20mA = 0x13, + LMU_IMAX_21mA, + LMU_IMAX_22mA, + LMU_IMAX_23mA = 0x17, + LMU_IMAX_24mA, + LMU_IMAX_25mA, + LMU_IMAX_26mA, + LMU_IMAX_27mA = 0x1C, + LMU_IMAX_28mA, + LMU_IMAX_29mA, + LMU_IMAX_30mA, +}; + +enum lm363x_regulator_id { + LM3631_BOOST, /* Boost output */ + LM3631_LDO_CONT, /* Display panel controller */ + LM3631_LDO_OREF, /* Gamma reference */ + LM3631_LDO_POS, /* Positive display bias output */ + LM3631_LDO_NEG, /* Negative display bias output */ + LM3632_BOOST, /* Boost output */ + LM3632_LDO_POS, /* Positive display bias output */ + LM3632_LDO_NEG, /* Negative display bias output */ +}; + +/** + * struct ti_lmu + * + * @dev: Parent device pointer + * @regmap: Used for i2c communcation on accessing registers + * @en_gpio: GPIO for HWEN pin [Optional] + * @notifier: Notifier for reporting hwmon event + */ +struct ti_lmu { + struct device *dev; + struct regmap *regmap; + int en_gpio; + struct blocking_notifier_head notifier; +}; +#endif diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h index 76c22648436f22dad8ee59cec7f69ee984322a47..b49fa67612f19ba52bf8a3e518f18f15298d3811 100644 --- a/include/linux/mfd/wm831x/core.h +++ b/include/linux/mfd/wm831x/core.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include /* * Register values. @@ -367,6 +369,9 @@ struct wm831x { struct regmap *regmap; + struct wm831x_pdata pdata; + enum wm831x_parent type; + int irq; /* Our chip IRQ */ struct mutex irq_lock; struct irq_domain *irq_domain; @@ -412,7 +417,7 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg, int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg, int count, u16 *buf); -int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq); +int wm831x_device_init(struct wm831x *wm831x, int irq); void wm831x_device_exit(struct wm831x *wm831x); int wm831x_device_suspend(struct wm831x *wm831x); void wm831x_device_shutdown(struct wm831x *wm831x); @@ -427,4 +432,6 @@ static inline int wm831x_irq(struct wm831x *wm831x, int irq) extern struct regmap_config wm831x_regmap_config; +extern const struct of_device_id wm831x_of_match[]; + #endif diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h deleted file mode 100644 index e11f4d9f1c2e0b19a7b00e204dac73f53fc64bea..0000000000000000000000000000000000000000 --- a/include/linux/mg_disk.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * include/linux/mg_disk.c - * - * Private data for mflash platform driver - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MG_DISK_H__ -#define __MG_DISK_H__ - -/* name for platform device */ -#define MG_DEV_NAME "mg_disk" - -/* names of GPIO resource */ -#define MG_RST_PIN "mg_rst" -/* except MG_BOOT_DEV, reset-out pin should be assigned */ -#define MG_RSTOUT_PIN "mg_rstout" - -/* device attribution */ -/* use mflash as boot device */ -#define MG_BOOT_DEV (1 << 0) -/* use mflash as storage device */ -#define MG_STORAGE_DEV (1 << 1) -/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ -#define MG_STORAGE_DEV_SKIP_RST (1 << 2) - -/* private driver data */ -struct mg_drv_data { - /* disk resource */ - u32 use_polling; - - /* device attribution */ - u32 dev_attr; - - /* internally used */ - void *host; -}; - -#endif diff --git a/include/linux/migrate.h b/include/linux/migrate.h index fa76b516fa473bdfd803d09e0206923153613d65..48e24844b3c5074c8bf8c26dcf2be11a32c657e0 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -33,8 +33,9 @@ extern char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -extern int migrate_page(struct address_space *, - struct page *, struct page *, enum migrate_mode); +extern int migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1beb1ec2fbdf339b34affc69508a5f5462b409b0..d5bed0875d309ce95025dbcab502f51815ca431b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -108,7 +108,7 @@ enum { MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; -/* Driver supports 3 diffrent device methods to manage traffic steering: +/* Driver supports 3 different device methods to manage traffic steering: * -device managed - High level API for ib and eth flow steering. FW is * managing flow steering tables. * - B0 steering mode - Common low level API for ib and (if supported) eth. @@ -1011,8 +1011,7 @@ struct mlx4_mad_ifc { #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ - ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) + ((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_ETH)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index b4ee8f62ce8da82720cb79e7a9ea3ec97703b849..8e2828d48d7fcf6a7ba683bf51c8c91a1ad28ec0 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -470,6 +470,7 @@ struct mlx4_update_qp_params { u16 rate_val; }; +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn); int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dd9a263ed368d5476b06a3464bbfada0436cf6e2..a940ec6a046cfd0dc8c3016226c71ea9e2c343e5 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -787,8 +787,14 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 6, - CQE_RSS_HTYPE_L4 = 0x3 << 2, + CQE_RSS_HTYPE_IP = 0x3 << 2, + /* cqe->rss_hash_type[3:2] - IP destination selected for hash + * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) + */ + CQE_RSS_HTYPE_L4 = 0x3 << 6, + /* cqe->rss_hash_type[7:6] - L4 destination selected for hash + * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI + */ }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2fcff6b4503f6a4824bea50c189b072ef6c486cb..93273d9ea4d145f125846f0c9a56a5a6e74915e4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -540,6 +540,7 @@ struct mlx5_fc_stats { struct workqueue_struct *wq; struct delayed_work work; unsigned long next_query; + unsigned long sampling_interval; /* jiffies */ }; struct mlx5_eswitch; @@ -728,6 +729,7 @@ struct mlx5e_resources { u32 pdn; struct mlx5_td td; struct mlx5_core_mkey mkey; + struct mlx5_sq_bfreg bfreg; }; struct mlx5_core_dev { @@ -785,7 +787,12 @@ enum { typedef void (*mlx5_cmd_cbk_t)(int status, void *context); +enum { + MLX5_CMD_ENT_STATE_PENDING_COMP, +}; + struct mlx5_cmd_work_ent { + unsigned long state; struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; void *uout; @@ -890,12 +897,7 @@ static inline u16 cmdif_rev(struct mlx5_core_dev *dev) static inline void *mlx5_vzalloc(unsigned long size) { - void *rtn; - - rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!rtn) - rtn = vzalloc(size); - return rtn; + return kvzalloc(size, GFP_KERNEL); } static inline u32 mlx5_base_mkey(const u32 key) @@ -979,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, @@ -1100,6 +1102,25 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); +#ifndef CONFIG_MLX5_CORE_IPOIB +static inline +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {} +#else +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)); +void mlx5_rdma_netdev_free(struct net_device *netdev); +#endif /* CONFIG_MLX5_CORE_IPOIB */ + struct mlx5_profile { u64 mask; u8 log_max_qp; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 949b24b6c4794ce14909d779b7dbfd2534aa53db..b25e7baa273e8d9db99fb1e2f33e3086ea875e89 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -104,12 +104,17 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, u32 level, u32 flags); +struct mlx5_flow_table_attr { + int prio; + int max_fte; + u32 level; + u32 flags; +}; + struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - u32 level, - u32 flags); + struct mlx5_flow_table_attr *ft_attr); + struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, @@ -134,8 +139,13 @@ struct mlx5_flow_act { u32 action; u32 flow_tag; u32 encap_id; + u32 modify_id; }; +#define MLX5_DECLARE_FLOW_ACT(name) \ + struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ + MLX5_FS_DEFAULT_FLOW_TAG, 0, 0} + /* Single destination per rule. * Group ID is implied by the match criteria. */ @@ -156,5 +166,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 838242697541a28fdda4d90bf7b604e25f3bfba2..edafedb7b509010c904fccf2cdeffea1afa9317b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -227,6 +227,8 @@ enum { MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_MAX }; @@ -234,7 +236,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; u8 outer_ether_type[0x1]; - u8 reserved_at_3[0x1]; + u8 outer_ip_version[0x1]; u8 outer_first_prio[0x1]; u8 outer_first_cfi[0x1]; u8 outer_first_vid[0x1]; @@ -263,7 +265,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_dmac[0x1]; u8 inner_smac[0x1]; u8 inner_ether_type[0x1]; - u8 reserved_at_23[0x1]; + u8 inner_ip_version[0x1]; u8 inner_first_prio[0x1]; u8 inner_first_cfi[0x1]; u8 inner_first_vid[0x1]; @@ -302,7 +304,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; - u8 reserved_at_28[0x10]; + u8 log_max_modify_header_context[0x8]; + u8 max_modify_header_actions[0x8]; u8 max_ft_level[0x8]; u8 reserved_at_40[0x20]; @@ -368,7 +371,7 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 cvlan_tag[0x1]; u8 svlan_tag[0x1]; u8 frag[0x1]; - u8 reserved_at_93[0x4]; + u8 ip_version[0x4]; u8 tcp_flags[0x9]; u8 tcp_sport[0x10]; @@ -763,6 +766,12 @@ enum { MLX5_CAP_PORT_TYPE_ETH = 0x1, }; +enum { + MLX5_CAP_UMR_FENCE_STRONG = 0x0, + MLX5_CAP_UMR_FENCE_SMALL = 0x1, + MLX5_CAP_UMR_FENCE_NONE = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x80]; @@ -869,9 +878,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 compact_address_vector[0x1]; u8 striding_rq[0x1]; - u8 reserved_at_202[0x2]; + u8 reserved_at_202[0x1]; + u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0xa]; + u8 reserved_at_205[0x5]; + u8 umr_fence[0x2]; + u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; @@ -1452,7 +1464,9 @@ struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { u8 vl_15_dropped[0x10]; - u8 reserved_at_a0[0xa0]; + u8 reserved_at_a0[0x80]; + + u8 port_xmit_wait[0x20]; }; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { @@ -2190,6 +2204,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40, }; struct mlx5_ifc_flow_context_bits { @@ -2211,7 +2226,9 @@ struct mlx5_ifc_flow_context_bits { u8 encap_id[0x20]; - u8 reserved_at_e0[0x120]; + u8 modify_header_id[0x20]; + + u8 reserved_at_100[0x100]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -2287,7 +2304,9 @@ struct mlx5_ifc_tisc_bits { u8 reserved_at_120[0x8]; u8 transport_domain[0x18]; - u8 reserved_at_140[0x3c0]; + u8 reserved_at_140[0x8]; + u8 underlay_qpn[0x18]; + u8 reserved_at_160[0x3a0]; }; enum { @@ -4534,6 +4553,109 @@ struct mlx5_ifc_dealloc_encap_header_in_bits { u8 reserved_60[0x20]; }; +struct mlx5_ifc_set_action_in_bits { + u8 action_type[0x4]; + u8 field[0xc]; + u8 reserved_at_10[0x3]; + u8 offset[0x5]; + u8 reserved_at_18[0x3]; + u8 length[0x5]; + + u8 data[0x20]; +}; + +struct mlx5_ifc_add_action_in_bits { + u8 action_type[0x4]; + u8 field[0xc]; + u8 reserved_at_10[0x10]; + + u8 data[0x20]; +}; + +union mlx5_ifc_set_action_in_add_action_in_auto_bits { + struct mlx5_ifc_set_action_in_bits set_action_in; + struct mlx5_ifc_add_action_in_bits add_action_in; + u8 reserved_at_0[0x40]; +}; + +enum { + MLX5_ACTION_TYPE_SET = 0x1, + MLX5_ACTION_TYPE_ADD = 0x2, +}; + +enum { + MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16 = 0x1, + MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0 = 0x2, + MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE = 0x3, + MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16 = 0x4, + MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0 = 0x5, + MLX5_ACTION_IN_FIELD_OUT_IP_DSCP = 0x6, + MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS = 0x7, + MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT = 0x8, + MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT = 0x9, + MLX5_ACTION_IN_FIELD_OUT_IP_TTL = 0xa, + MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT = 0xb, + MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT = 0xc, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96 = 0xd, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64 = 0xe, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32 = 0xf, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0 = 0x10, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96 = 0x11, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64 = 0x12, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32 = 0x13, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0 = 0x14, + MLX5_ACTION_IN_FIELD_OUT_SIPV4 = 0x15, + MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, +}; + +struct mlx5_ifc_alloc_modify_header_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 modify_header_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_alloc_modify_header_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 table_type[0x8]; + u8 reserved_at_68[0x10]; + u8 num_of_actions[0x8]; + + union mlx5_ifc_set_action_in_add_action_in_auto_bits actions[0]; +}; + +struct mlx5_ifc_dealloc_modify_header_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_modify_header_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 modify_header_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4623,17 +4745,17 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 reserved_at_40[0x40]; - u8 cur_flows[0x20]; + u8 rp_cur_flows[0x20]; u8 sum_flows[0x20]; - u8 cnp_ignored_high[0x20]; + u8 rp_cnp_ignored_high[0x20]; - u8 cnp_ignored_low[0x20]; + u8 rp_cnp_ignored_low[0x20]; - u8 cnp_handled_high[0x20]; + u8 rp_cnp_handled_high[0x20]; - u8 cnp_handled_low[0x20]; + u8 rp_cnp_handled_low[0x20]; u8 reserved_at_140[0x100]; @@ -4643,13 +4765,13 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 accumulators_period[0x20]; - u8 ecn_marked_roce_packets_high[0x20]; + u8 np_ecn_marked_roce_packets_high[0x20]; - u8 ecn_marked_roce_packets_low[0x20]; + u8 np_ecn_marked_roce_packets_low[0x20]; - u8 cnps_sent_high[0x20]; + u8 np_cnp_sent_high[0x20]; - u8 cnps_sent_low[0x20]; + u8 np_cnp_sent_low[0x20]; u8 reserved_at_320[0x560]; }; @@ -5013,6 +5135,7 @@ struct mlx5_ifc_modify_rq_out_bits { enum { MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS = 1ULL << 2, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3, }; @@ -8108,7 +8231,9 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_at_c0[0x140]; + u8 reserved_at_c0[0x8]; + u8 underlay_qpn[0x18]; + u8 reserved_at_e0[0x120]; }; enum { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3096370fe8319ec76f823b920ded745a10ebb6fd..bef80d0a0e30bcfb446abf14417789a34eb28af8 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -295,6 +295,16 @@ struct mlx5_av { u8 rgid[16]; }; +struct mlx5_ib_ah { + struct ib_ah ibah; + struct mlx5_av av; +}; + +static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx5_ib_ah, ibah); +} + struct mlx5_wqe_datagram_seg { struct mlx5_av av; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 00a8fa7e366a0320210941ca39dd53fed97d4e2c..6f543a47fc92ecc89cdd49c00f4c0e9acf0ea2eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -432,6 +432,10 @@ static inline int pud_devmap(pud_t pud) { return 0; } +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} #endif /* @@ -514,6 +518,28 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + +static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + + return kvmalloc(n * size, flags); +} + extern void kvfree(const void *addr); static inline atomic_t *compound_mapcount_ptr(struct page *page) @@ -758,19 +784,11 @@ static inline enum zone_type page_zonenum(const struct page *page) } #ifdef CONFIG_ZONE_DEVICE -void get_zone_device_page(struct page *page); -void put_zone_device_page(struct page *page); static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } #else -static inline void get_zone_device_page(struct page *page) -{ -} -static inline void put_zone_device_page(struct page *page) -{ -} static inline bool is_zone_device_page(const struct page *page) { return false; @@ -786,9 +804,6 @@ static inline void get_page(struct page *page) */ VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); page_ref_inc(page); - - if (unlikely(is_zone_device_page(page))) - get_zone_device_page(page); } static inline void put_page(struct page *page) @@ -797,9 +812,6 @@ static inline void put_page(struct page *page) if (put_page_testzero(page)) __put_page(page); - - if (unlikely(is_zone_device_page(page))) - put_zone_device_page(page); } #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) @@ -1381,12 +1393,6 @@ int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -/* Is the vma a continuation of the stack vma above it? */ -static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); -} - static inline bool vma_is_anonymous(struct vm_area_struct *vma) { return !vma->vm_ops; @@ -1402,28 +1408,6 @@ bool vma_is_shmem(struct vm_area_struct *vma); static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } #endif -static inline int stack_guard_page_start(struct vm_area_struct *vma, - unsigned long addr) -{ - return (vma->vm_flags & VM_GROWSDOWN) && - (vma->vm_start == addr) && - !vma_growsdown(vma->vm_prev, addr); -} - -/* Is the vma a continuation of the stack vma below it? */ -static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); -} - -static inline int stack_guard_page_end(struct vm_area_struct *vma, - unsigned long addr) -{ - return (vma->vm_flags & VM_GROWSUP) && - (vma->vm_end == addr) && - !vma_growsup(vma->vm_next, addr); -} - int vma_is_stack_for_current(struct vm_area_struct *vma); extern unsigned long move_page_tables(struct vm_area_struct *vma, @@ -2210,6 +2194,7 @@ void page_cache_async_readahead(struct address_space *mapping, pgoff_t offset, unsigned long size); +extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); @@ -2238,6 +2223,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m return vma; } +static inline unsigned long vm_start_gap(struct vm_area_struct *vma) +{ + unsigned long vm_start = vma->vm_start; + + if (vma->vm_flags & VM_GROWSDOWN) { + vm_start -= stack_guard_gap; + if (vm_start > vma->vm_start) + vm_start = 0; + } + return vm_start; +} + +static inline unsigned long vm_end_gap(struct vm_area_struct *vma) +{ + unsigned long vm_end = vma->vm_end; + + if (vma->vm_flags & VM_GROWSUP) { + vm_end += stack_guard_gap; + if (vm_end < vma->vm_end) + vm_end = -PAGE_SIZE; + } + return vm_end; +} + static inline unsigned long vma_pages(struct vm_area_struct *vma) { return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; @@ -2315,6 +2324,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ +static inline int vm_fault_to_errno(int vm_fault, int foll_flags) +{ + if (vm_fault & VM_FAULT_OOM) + return -ENOMEM; + if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT; + if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) + return -EFAULT; + return 0; +} + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, @@ -2497,7 +2517,6 @@ extern long copy_huge_page_from_user(struct page *dst_page, #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ extern struct page_ext_operations debug_guardpage_ops; -extern struct page_ext_operations page_poisoning_ops; #ifdef CONFIG_DEBUG_PAGEALLOC extern unsigned int _debug_guardpage_minorder; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f60f45fe226fcad85590b1eda3fafb1d170e943a..45cdb27791a33ff8d494549ce92f080a93434889 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -367,6 +367,11 @@ struct mm_struct { #endif unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ +#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; +#endif unsigned long task_size; /* size of task vm space */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 77e61e0a216a2728dd5cfecf55299402ac03c384..aad015e0152b7f1d32f92c500825b723498d1be9 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -89,6 +89,7 @@ struct mmc_ext_csd { unsigned int boot_ro_lock; /* ro lock support */ bool boot_ro_lockable; bool ffu_capable; /* Firmware upgrade support */ + bool cmdq_en; /* Command Queue enabled */ bool cmdq_support; /* Command Queue supported */ unsigned int cmdq_depth; /* Command Queue depth */ #define MMC_FIRMWARE_LEN 8 @@ -208,6 +209,7 @@ struct sdio_cis { struct mmc_host; struct sdio_func; struct sdio_func_tuple; +struct mmc_queue_req; #define SDIO_MAX_FUNCS 7 @@ -267,6 +269,8 @@ struct mmc_card { #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ + bool reenable_cmdq; /* Re-enable Command Queue */ + unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ unsigned int pref_erase; /* in sectors */ @@ -300,6 +304,10 @@ struct mmc_card { struct dentry *debugfs_root; struct mmc_part part[MMC_NUM_PHY_PARTITION]; /* physical partitions */ unsigned int nr_parts; + + struct mmc_queue_req *mqrq; /* Shared queue structure */ + unsigned int bouncesz; /* Bounce buffer size */ + int qdepth; /* Shared queue depth */ }; static inline bool mmc_large_sector(struct mmc_card *card) @@ -307,6 +315,8 @@ static inline bool mmc_large_sector(struct mmc_card *card) return card->ext_csd.data_sector_size == 4096; } +bool mmc_card_is_blockaddr(struct mmc_card *card); + #define mmc_card_mmc(c) ((c)->type == MMC_TYPE_MMC) #define mmc_card_sd(c) ((c)->type == MMC_TYPE_SD) #define mmc_card_sdio(c) ((c)->type == MMC_TYPE_SDIO) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 83f1c4a9f03b7e36b4cb9ed23346d443c2cb4f4c..21385ac0c9b1c9cebea155571bbbb9047af50685 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -17,6 +17,7 @@ #include #include #include +#include struct mmc_ios { unsigned int clock; /* clock rate */ @@ -499,6 +500,11 @@ static inline bool mmc_can_retune(struct mmc_host *host) return host->can_retune == 1; } +static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data) +{ + return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE; +} + int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_abort_tuning(struct mmc_host *host, u32 opcode); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index aab032a6ae6124de63b7934f49d0dc564b0d2dc3..97ca105347a6c5e608297ae1a3562925dc6f6834 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -53,7 +53,7 @@ struct sdio_func { unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ - u8 tmpbuf[4]; /* DMA:able scratch buffer */ + u8 *tmpbuf; /* DMA:able scratch buffer */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8e02b3750fe0f6e18afeb8cfc096705f23405728..ef6a13b7bd3e851385bea32434e207a5cf6eec7f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,7 +35,7 @@ */ #define PAGE_ALLOC_COSTLY_ORDER 3 -enum { +enum migratetype { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, @@ -74,6 +74,11 @@ extern char * const migratetype_names[MIGRATE_TYPES]; # define is_migrate_cma_page(_page) false #endif +static inline bool is_migrate_movable(int mt) +{ + return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE; +} + #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) @@ -149,7 +154,6 @@ enum node_stat_item { NR_UNEVICTABLE, /* " " " " " */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ - NR_PAGES_SCANNED, /* pages scanned since last reclaim */ WORKINGSET_REFAULT, WORKINGSET_ACTIVATE, WORKINGSET_NODERECLAIM, @@ -226,6 +230,8 @@ struct lruvec { struct zone_reclaim_stat reclaim_stat; /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; + /* Refaults at the time of last reclaim cycle */ + unsigned long refaults; #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif @@ -630,6 +636,8 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_classzone_idx; + int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; @@ -670,6 +678,7 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; + unsigned long static_init_size; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8850fcaf50dba05d9440b958c5d95cd9330e727a..3f74ef2281e8afac1e4667b4fbf4abcc5f89ff17 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -428,6 +428,16 @@ struct i2c_device_id { kernel_ulong_t driver_data; /* Data private to the driver */ }; +/* pci_epf */ + +#define PCI_EPF_NAME_SIZE 20 +#define PCI_EPF_MODULE_PREFIX "pci_epf:" + +struct pci_epf_device_id { + char name[PCI_EPF_NAME_SIZE]; + kernel_ulong_t driver_data; +}; + /* spi */ #define SPI_NAME_SIZE 32 @@ -457,6 +467,7 @@ enum dmi_field { DMI_PRODUCT_VERSION, DMI_PRODUCT_SERIAL, DMI_PRODUCT_UUID, + DMI_PRODUCT_FAMILY, DMI_BOARD_VENDOR, DMI_BOARD_NAME, DMI_BOARD_VERSION, diff --git a/include/linux/module.h b/include/linux/module.h index 0297c5cd7cdfc6692a400cac545d1a07637ab8b2..21f56393602f2000b901238cd9a7911e23e04448 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -493,6 +493,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); @@ -582,7 +583,7 @@ extern bool try_module_get(struct module *module); extern void module_put(struct module *module); #else /*!CONFIG_MODULE_UNLOAD*/ -static inline int try_module_get(struct module *module) +static inline bool try_module_get(struct module *module) { return !module || module_is_live(module); } @@ -660,6 +661,11 @@ static inline bool is_module_percpu_address(unsigned long addr) return false; } +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) +{ + return false; +} + static inline bool is_module_text_address(unsigned long addr) { return false; @@ -674,9 +680,9 @@ static inline void __module_get(struct module *module) { } -static inline int try_module_get(struct module *module) +static inline bool try_module_get(struct module *module) { - return 1; + return true; } static inline void module_put(struct module *module) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 52666d90ca9452251c539383ecc7b038ba9afbea..6be1949ebcdff2dd2dcb5270618c7a56a42fdc40 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -60,9 +60,11 @@ struct kernel_param_ops { * Flags available for kernel_param * * UNSAFE - the parameter is dangerous and setting it will taint the kernel + * HWPARAM - Hardware param not permitted in lockdown mode */ enum { - KERNEL_PARAM_FL_UNSAFE = (1 << 0) + KERNEL_PARAM_FL_UNSAFE = (1 << 0), + KERNEL_PARAM_FL_HWPARAM = (1 << 1), }; struct kernel_param { @@ -451,6 +453,67 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) +enum hwparam_type { + hwparam_ioport, /* Module parameter configures an I/O port */ + hwparam_iomem, /* Module parameter configures an I/O mem address */ + hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */ + hwparam_irq, /* Module parameter configures an I/O port */ + hwparam_dma, /* Module parameter configures a DMA channel */ + hwparam_dma_addr, /* Module parameter configures a DMA buffer address */ + hwparam_other, /* Module parameter configures some other value */ +}; + +/** + * module_param_hw_named - A parameter representing a hw parameters + * @name: a valid C identifier which is the parameter name. + * @value: the actual lvalue to alter. + * @type: the type of the parameter + * @hwtype: what the value represents (enum hwparam_type) + * @perm: visibility in sysfs. + * + * Usually it's a good idea to have variable names and user-exposed names the + * same, but that's harder if the variable must be non-static or is inside a + * structure. This allows exposure under a different name. + */ +#define module_param_hw_named(name, value, type, hwtype, perm) \ + param_check_##type(name, &(value)); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + ¶m_ops_##type, &value, \ + perm, -1, \ + KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0)); \ + __MODULE_PARM_TYPE(name, #type) + +#define module_param_hw(name, type, hwtype, perm) \ + module_param_hw_named(name, name, type, hwtype, perm) + +/** + * module_param_hw_array - A parameter representing an array of hw parameters + * @name: the name of the array variable + * @type: the type, as per module_param() + * @hwtype: what the value represents (enum hwparam_type) + * @nump: optional pointer filled in with the number written + * @perm: visibility in sysfs + * + * Input and output are as comma-separated values. Commas inside values + * don't work properly (eg. an array of charp). + * + * ARRAY_SIZE(@name) is used to determine the number of elements in the + * array, so the definition must be visible. + */ +#define module_param_hw_array(name, type, hwtype, nump, perm) \ + param_check_##type(name, &(name)[0]); \ + static const struct kparam_array __param_arr_##name \ + = { .max = ARRAY_SIZE(name), .num = nump, \ + .ops = ¶m_ops_##type, \ + .elemsize = sizeof(name[0]), .elem = name }; \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + ¶m_array_ops, \ + .arr = &__param_arr_##name, \ + perm, -1, \ + KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0)); \ + __MODULE_PARM_TYPE(name, "array of " #type) + + extern const struct kernel_param_ops param_array_ops; extern const struct kernel_param_ops param_ops_string; diff --git a/include/linux/mpls.h b/include/linux/mpls.h index 9999145bc19007105db13aef9d473296f2f0c5a9..384fb22b6c436e357919656232342c0d538b8de3 100644 --- a/include/linux/mpls.h +++ b/include/linux/mpls.h @@ -3,4 +3,9 @@ #include +#define MPLS_TTL_MASK (MPLS_LS_TTL_MASK >> MPLS_LS_TTL_SHIFT) +#define MPLS_BOS_MASK (MPLS_LS_S_MASK >> MPLS_LS_S_SHIFT) +#define MPLS_TC_MASK (MPLS_LS_TC_MASK >> MPLS_LS_TC_SHIFT) +#define MPLS_LABEL_MASK (MPLS_LS_LABEL_MASK >> MPLS_LS_LABEL_SHIFT) + #endif /* _LINUX_MPLS_H */ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eebdc63cf6af94a5ea9a5c4703c416f87e0079a8..f8a2ef239c60adf6ce49f5ebcdbf576db0bd434d 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -334,11 +334,6 @@ struct mtd_info { int (*_get_device) (struct mtd_info *mtd); void (*_put_device) (struct mtd_info *mtd); - /* Backing device capabilities for this device - * - provides mmap capabilities - */ - struct backing_dev_info *backing_dev_info; - struct notifier_block reboot_notifier; /* default mode before reboot */ /* ECC status information */ @@ -393,7 +388,7 @@ static inline void mtd_set_of_node(struct mtd_info *mtd, static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd) { - return mtd->dev.of_node; + return dev_of_node(&mtd->dev); } static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9591e0fbe5bd76a1c47e2deb4292469e0a2cf8e6..8f67b15816836a1127fddae062c1ad76774e4534 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -366,26 +366,6 @@ struct onfi_ext_param_page { */ } __packed; -struct nand_onfi_vendor_micron { - u8 two_plane_read; - u8 read_cache; - u8 read_unique_id; - u8 dq_imped; - u8 dq_imped_num_settings; - u8 dq_imped_feat_addr; - u8 rb_pulldown_strength; - u8 rb_pulldown_strength_feat_addr; - u8 rb_pulldown_strength_num_settings; - u8 otp_mode; - u8 otp_page_start; - u8 otp_data_prot_addr; - u8 otp_num_pages; - u8 otp_feat_addr; - u8 read_retry_options; - u8 reserved[72]; - u8 param_revision; -} __packed; - struct jedec_ecc_info { u8 ecc_bits; u8 codeword_size; @@ -464,6 +444,17 @@ struct nand_jedec_params { __le16 crc; } __packed; +/** + * struct nand_id - NAND id structure + * @data: buffer containing the id bytes. Currently 8 bytes large, but can + * be extended if required. + * @len: ID length. + */ +struct nand_id { + u8 data[8]; + int len; +}; + /** * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices * @lock: protection lock @@ -525,7 +516,7 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc) * out-of-band data). * @read_page: function to read a page according to the ECC generator * requirements; returns maximum number of bitflips corrected in - * any single ECC step, 0 if bitflips uncorrectable, -EIO hw error + * any single ECC step, -EIO hw error * @read_subpage: function to read parts of the page covered by ECC; * returns same as read_page() * @write_subpage: function to write parts of the page covered by ECC. @@ -720,6 +711,20 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) return &conf->timings.sdr; } +/** + * struct nand_manufacturer_ops - NAND Manufacturer operations + * @detect: detect the NAND memory organization and capabilities + * @init: initialize all vendor specific fields (like the ->read_retry() + * implementation) if any. + * @cleanup: the ->init() function may have allocated resources, ->cleanup() + * is here to let vendor specific code release those resources. + */ +struct nand_manufacturer_ops { + void (*detect)(struct nand_chip *chip); + int (*init)(struct nand_chip *chip); + void (*cleanup)(struct nand_chip *chip); +}; + /** * struct nand_chip - NAND Private Flash Chip Data * @mtd: MTD device registered to the MTD framework @@ -750,6 +755,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buffers: buffer structure for read/write + * @buf_align: minimum buffer alignment required by a platform * @hwcontrol: platform-specific hardware control structure * @erase: [REPLACEABLE] erase function * @scan_bbt: [REPLACEABLE] function to scan bad block table @@ -793,6 +799,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * @pagebuf_bitflips: [INTERN] holds the bitflip count for the page which is * currently in data_buf. * @subpagesize: [INTERN] holds the subpagesize + * @id: [INTERN] holds NAND ID * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), * non 0 if ONFI supported. * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), @@ -822,7 +829,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * @errstat: [OPTIONAL] hardware specific function to perform * additional error status checks (determine if errors are * correctable). - * @write_page: [REPLACEABLE] High-level page write function + * @manufacturer: [INTERN] Contains manufacturer information */ struct nand_chip { @@ -847,9 +854,6 @@ struct nand_chip { int (*scan_bbt)(struct mtd_info *mtd); int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page); - int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offset, int data_len, const uint8_t *buf, - int oob_required, int page, int cached, int raw); int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, @@ -881,6 +885,7 @@ struct nand_chip { int badblockpos; int badblockbits; + struct nand_id id; int onfi_version; int jedec_version; union { @@ -901,6 +906,7 @@ struct nand_chip { struct nand_ecc_ctrl ecc; struct nand_buffers *buffers; + unsigned long buf_align; struct nand_hw_control hwcontrol; uint8_t *bbt; @@ -910,6 +916,11 @@ struct nand_chip { struct nand_bbt_descr *badblock_pattern; void *priv; + + struct { + const struct nand_manufacturer *desc; + void *priv; + } manufacturer; }; extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; @@ -946,6 +957,17 @@ static inline void nand_set_controller_data(struct nand_chip *chip, void *priv) chip->priv = priv; } +static inline void nand_set_manufacturer_data(struct nand_chip *chip, + void *priv) +{ + chip->manufacturer.priv = priv; +} + +static inline void *nand_get_manufacturer_data(struct nand_chip *chip) +{ + return chip->manufacturer.priv; +} + /* * NAND Flash Manufacturer ID Codes */ @@ -1049,17 +1071,33 @@ struct nand_flash_dev { }; /** - * struct nand_manufacturers - NAND Flash Manufacturer ID Structure + * struct nand_manufacturer - NAND Flash Manufacturer structure * @name: Manufacturer name * @id: manufacturer ID code of device. + * @ops: manufacturer operations */ -struct nand_manufacturers { +struct nand_manufacturer { int id; char *name; + const struct nand_manufacturer_ops *ops; }; +const struct nand_manufacturer *nand_get_manufacturer(u8 id); + +static inline const char * +nand_manufacturer_name(const struct nand_manufacturer *manufacturer) +{ + return manufacturer ? manufacturer->name : "Unknown"; +} + extern struct nand_flash_dev nand_flash_ids[]; -extern struct nand_manufacturers nand_manuf_ids[]; + +extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops; +extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; +extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; +extern const struct nand_manufacturer_ops micron_nand_manuf_ops; +extern const struct nand_manufacturer_ops amd_nand_manuf_ops; +extern const struct nand_manufacturer_ops macronix_nand_manuf_ops; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); @@ -1226,4 +1264,6 @@ int nand_reset(struct nand_chip *chip, int chipnr); /* Free resources held by the NAND device */ void nand_cleanup(struct nand_chip *chip); +/* Default extended ID decoding function */ +void nand_decode_ext_id(struct nand_chip *chip); #endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/namei.h b/include/linux/namei.h index f29abda31e6dc8b32376a86c6c52e9e9ca6da0a0..8b4794e83196db62997ef5aea81886c3ad800bbd 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +#define LOOKUP_DOWN 0x8000 extern int path_pts(struct path *path); diff --git a/include/linux/nd.h b/include/linux/nd.h index fa66aeed441a5882a1dcad4ad7a8c89faca1abf3..194b8e002ea74d9e0cef5ec184cf23833c0b6215 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -48,7 +48,7 @@ struct nd_namespace_common { struct device dev; struct device *claim; int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, - void *buf, size_t size, int rw); + void *buf, size_t size, int rw, unsigned long flags); }; static inline struct nd_namespace_common *to_ndns(struct device *dev) @@ -134,9 +134,10 @@ static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device * * @buf is up-to-date upon return from this routine. */ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size) + resource_size_t offset, void *buf, size_t size, + unsigned long flags) { - return ndns->rw_bytes(ndns, offset, buf, size, READ); + return ndns->rw_bytes(ndns, offset, buf, size, READ, flags); } /** @@ -152,9 +153,10 @@ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, * to media is handled internal to the @ndns driver, if at all. */ static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size) + resource_size_t offset, void *buf, size_t size, + unsigned long flags) { - return ndns->rw_bytes(ndns, offset, buf, size, WRITE); + return ndns->rw_bytes(ndns, offset, buf, size, WRITE, flags); } #define MODULE_ALIAS_ND_DEVICE(type) \ diff --git a/include/linux/net.h b/include/linux/net.h index 0620f5e18c96b706b70fb71005b29008a11fffb1..abcfa46a2bd9a6f9eb242dacb199070a05d61014 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); +/* Routine returns the IP overhead imposed by a (caller-protected) socket. */ +u32 kernel_sock_ip_overhead(struct sock *sk); + #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9a0419594e842ca00a5ecfca53823b38bad207bb..1d4737cffc7116c898b42787680b2e9b0dd46812 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -54,8 +54,9 @@ enum { */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ + NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_SCTP_BIT, + NETIF_F_GSO_ESP_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -73,6 +74,8 @@ enum { NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ + NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */ + NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ /* * Add your fresh new feature above and remember to update @@ -129,11 +132,14 @@ enum { #define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) +#define NETIF_F_GSO_ESP __NETIF_F(GSO_ESP) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_HW_TC __NETIF_F(HW_TC) +#define NETIF_F_HW_ESP __NETIF_F(HW_ESP) +#define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97456b2539e46d6232dda804f6a434db6fd7134f..4ed952c17fc7757965c26d2eba1c56c39d5b4ccc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -41,7 +41,6 @@ #include #include -#include #ifdef CONFIG_DCB #include #endif @@ -57,6 +56,8 @@ struct netpoll_info; struct device; struct phy_device; +struct dsa_switch_tree; + /* 802.11 specific */ struct wireless_dev; /* 802.15.4 specific */ @@ -236,8 +237,7 @@ struct netdev_hw_addr_list { netdev_hw_addr_list_for_each(ha, &(dev)->mc) struct hh_cache { - u16 hh_len; - u16 __pad; + unsigned int hh_len; seqlock_t hh_lock; /* cached hardware header; allow for machine alignment needs. */ @@ -786,11 +786,11 @@ struct tc_cls_u32_offload; struct tc_to_netdev { unsigned int type; union { - u8 tc; struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; struct tc_cls_bpf_offload *cls_bpf; + struct tc_mqprio_qopt *mqprio; }; bool egress_dev; }; @@ -813,16 +813,31 @@ enum xdp_netdev_command { XDP_QUERY_PROG, }; +struct netlink_ext_ack; + struct netdev_xdp { enum xdp_netdev_command command; union { /* XDP_SETUP_PROG */ - struct bpf_prog *prog; + struct { + struct bpf_prog *prog; + struct netlink_ext_ack *extack; + }; /* XDP_QUERY_PROG */ bool prog_attached; }; }; +#ifdef CONFIG_XFRM_OFFLOAD +struct xfrmdev_ops { + int (*xdo_dev_state_add) (struct xfrm_state *x); + void (*xdo_dev_state_delete) (struct xfrm_state *x); + void (*xdo_dev_state_free) (struct xfrm_state *x); + bool (*xdo_dev_offload_ok) (struct sk_buff *skb, + struct xfrm_state *x); +}; +#endif + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -899,8 +914,7 @@ struct netdev_xdp { * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); * Called when a user wants to change the Maximum Transfer Unit - * of a device. If not defined, any request to change MTU will - * will return an error. + * of a device. * * void (*ndo_tx_timeout)(struct net_device *dev); * Callback used when the transmitter has not made any progress @@ -1581,8 +1595,8 @@ enum netdev_priv_flags { * @rtnl_link_state: This enum represents the phases of creating * a new link * - * @destructor: Called from unregister, - * can be used to call free_netdev + * @needs_free_netdev: Should unregister perform free_netdev? + * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside * @@ -1696,6 +1710,10 @@ struct net_device { const struct ndisc_ops *ndisc_ops; #endif +#ifdef CONFIG_XFRM + const struct xfrmdev_ops *xfrmdev_ops; +#endif + const struct header_ops *header_ops; unsigned int flags; @@ -1715,7 +1733,7 @@ struct net_device { unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; - unsigned short min_header_len; + unsigned char min_header_len; unsigned short needed_headroom; unsigned short needed_tailroom; @@ -1776,6 +1794,7 @@ struct net_device { unsigned int real_num_rx_queues; #endif + struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -1838,7 +1857,8 @@ struct net_device { RTNL_LINK_INITIALIZING, } rtnl_link_state:16; - void (*destructor)(struct net_device *dev); + bool needs_free_netdev; + void (*priv_destructor)(struct net_device *dev); #ifdef CONFIG_NETPOLL struct netpoll_info __rcu *npinfo; @@ -1894,6 +1914,13 @@ struct net_device { }; #define to_net_dev(d) container_of(d, struct net_device, dev) +static inline bool netif_elide_gro(const struct net_device *dev) +{ + if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) + return true; + return false; +} + #define NETDEV_ALIGN 32 static inline @@ -2004,15 +2031,6 @@ void dev_net_set(struct net_device *dev, struct net *net) write_pnet(&dev->nd_net, net); } -static inline bool netdev_uses_dsa(struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_NET_DSA) - if (dev->dsa_ptr != NULL) - return dsa_uses_tagged_protocol(dev->dsa_ptr); -#endif - return false; -} - /** * netdev_priv - access network device private data * @dev: network device @@ -3278,10 +3296,15 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); + +typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, @@ -3305,6 +3328,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; +extern unsigned int netdev_budget_usecs; /* Called by rtnetlink.c:rtnl_unlock() */ void netdev_run_todo(void); @@ -3394,10 +3418,10 @@ static inline void netif_dormant_off(struct net_device *dev) } /** - * netif_dormant - test if carrier present + * netif_dormant - test if device is dormant * @dev: network device * - * Check if carrier is present on device + * Check if device is dormant. */ static inline bool netif_dormant(const struct net_device *dev) { @@ -4078,6 +4102,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } @@ -4180,6 +4205,11 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } +static inline bool netif_is_ovs_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_OVS_DATAPATH; +} + static inline bool netif_is_team_master(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM; @@ -4231,6 +4261,11 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } +static inline bool netdev_unregistering(const struct net_device *dev) +{ + return dev->reg_state == NETREG_UNREGISTERING; +} + static inline const char *netdev_reg_state(const struct net_device *dev) { switch (dev->reg_state) { diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1b49209dd5c7cd74644624007ce57df107c3744b..996711d8a7b4b536990c05d698243cdbb3413e44 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,6 +41,11 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, int flags); +static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) +{ + return subsys << 8 | msg_type; +} + void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be378cf47fcc93fa2c89c6cd870f0a9fecd65211..b3044c2c62cbe8e856605129aa17f111e2a21493 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m, int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u); int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size); + int usersize, int size, int aligned_size); void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 984b2112c77b642f75e3f121387e955aa3f22b93..e0cbf17af780e1d3e4c2be6bba351c9a27cebf88 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -109,8 +109,10 @@ struct ebt_table { #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ ~(__alignof__(struct _xt_align)-1)) extern struct ebt_table *ebt_register_table(struct net *net, - const struct ebt_table *table); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table); + const struct ebt_table *table, + const struct nf_hook_ops *); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table, + const struct nf_hook_ops *); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); @@ -123,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) +static inline bool ebt_invalid_target(int target) +{ + return (target < -NUM_STANDARD_TARGETS || target >= 0); +} + #endif diff --git a/include/linux/netlink.h b/include/linux/netlink.h index da14ab61f3634cae5c9ec1636c2774a6f388083d..5fff5ba5964e25e3aa5814509d08943409d471b5 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -62,11 +62,47 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } +/* this can be increased when necessary - don't expose to userland */ +#define NETLINK_MAX_COOKIE_LEN 20 + +/** + * struct netlink_ext_ack - netlink extended ACK report struct + * @_msg: message string to report - don't access directly, use + * %NL_SET_ERR_MSG + * @bad_attr: attribute with error + * @cookie: cookie data to return to userspace (for success) + * @cookie_len: actual cookie data length + */ +struct netlink_ext_ack { + const char *_msg; + const struct nlattr *bad_attr; + u8 cookie[NETLINK_MAX_COOKIE_LEN]; + u8 cookie_len; +}; + +/* Always use this macro, this allows later putting the + * message into a separate section or such for things + * like translation or listing all possible messages. + * Currently string formatting is not supported (due + * to the lack of an output buffer.) + */ +#define NL_SET_ERR_MSG(extack, msg) do { \ + static const char __msg[] = (msg); \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) \ + __extack->_msg = __msg; \ +} while (0) + +#define NL_SET_ERR_MSG_MOD(extack, msg) \ + NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); -extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); +extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 287f341610864f745e44fb4cbfb4ad44a26f4ea5..bb0eb2c9acca7d19ddf14c65480a9ee27715c561 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -76,6 +76,7 @@ struct nfs_open_context { #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) +#define NFS_CONTEXT_UNLOCK (3) int error; struct list_head list; @@ -499,24 +500,12 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); +extern int nfs_wb_page(struct inode *inode, struct page *page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_commit_data *nfs_commitdata_alloc(void); +extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); -static inline int -nfs_wb_launder_page(struct inode *inode, struct page *page) -{ - return nfs_wb_single_page(inode, page, true); -} - -static inline int -nfs_wb_page(struct inode *inode, struct page *page) -{ - return nfs_wb_single_page(inode, page, false); -} - static inline int nfs_have_writebacks(struct inode *inode) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b34097c6784864193e67053f8fa8e5bbbc092071..e418a1096662c387f1b96e834d90983c22e5106f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -133,7 +133,6 @@ struct nfs_server { struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ - struct backing_dev_info backing_dev_info; atomic_long_t writeback; /* number of writeback pages */ int flags; /* various flags */ unsigned int caps; /* server capabilities */ @@ -222,6 +221,7 @@ struct nfs_server { u32 mountd_version; unsigned short mountd_port; unsigned short mountd_protocol; + struct rpc_wait_queue uoc_rpcwaitq; }; /* Server capabilities */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 957049f72290d4b6c328845535c664aea4afc7c0..247cc3d3498f241b19f57b486685986b81d95e56 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -64,7 +64,6 @@ struct nfs_pageio_ops { }; struct nfs_rw_ops { - const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, @@ -124,7 +123,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int how); + int how, + gfp_t gfp_flags); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, @@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool); extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); /* * Lock the page of an asynchronous request diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 348f7c158084b8de71c2a0a2f6941b0db7bf2c3b..b28c83475ee8e9106c5f567551e4307156dfe465 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1383,6 +1383,7 @@ struct nfs42_copy_res { struct nfs42_write_res write_res; bool consecutive; bool synchronous; + struct nfs_commitres commit_res; }; struct nfs42_seek_args { @@ -1427,6 +1428,7 @@ struct nfs_pgio_header { struct list_head pages; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ + fmode_t rw_mode; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; @@ -1550,6 +1552,7 @@ struct nfs_rpc_ops { const struct inode_operations *dir_inode_ops; const struct inode_operations *file_inode_ops; const struct file_operations *file_ops; + const struct nlmclnt_operations *nlmclnt_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f21471f7ee407a80534047004ecd784ebdf347d5..6c8c5d8041b72ec01097d1c0563b793ea7449f1f 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -27,8 +27,8 @@ /* FC Port role bitmask - can merge with FC Port Roles in fc transport */ #define FC_PORT_ROLE_NVME_INITIATOR 0x10 -#define FC_PORT_ROLE_NVME_TARGET 0x11 -#define FC_PORT_ROLE_NVME_DISCOVERY 0x12 +#define FC_PORT_ROLE_NVME_TARGET 0x20 +#define FC_PORT_ROLE_NVME_DISCOVERY 0x40 /** @@ -137,9 +137,9 @@ enum nvmefc_fcp_datadir { * transferred. Should equal payload_length on success. * @rcv_rsplen: length, in bytes, of the FCP RSP IU received. * @status: Completion status of the FCP operation. must be 0 upon success, - * NVME_SC_FC_xxx value upon failure. Note: this is NOT a - * reflection of the NVME CQE completion status. Only the status - * of the FCP operation at the NVME-FC level. + * negative errno value upon failure (ex: -EIO). Note: this is + * NOT a reflection of the NVME CQE completion status. Only the + * status of the FCP operation at the NVME-FC level. */ struct nvmefc_fcp_req { void *cmdaddr; @@ -533,9 +533,6 @@ enum { * rsp as well */ NVMET_FCOP_RSP = 4, /* send rsp frame */ - NVMET_FCOP_ABORT = 5, /* abort exchange via ABTS */ - NVMET_FCOP_BA_ACC = 6, /* send BA_ACC */ - NVMET_FCOP_BA_RJT = 7, /* send BA_RJT */ }; /** @@ -572,8 +569,6 @@ enum { * upon compeletion of the operation. The nvmet-fc layer will also set a * private pointer for its own use in the done routine. * - * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !! - * * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op * entrypoint. * @op: Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx) @@ -647,13 +642,21 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1), - /* Bit 1: When 0, the LLDD will deliver FCP CMD - * on the CPU it should be affinitized to. Thus work will - * be scheduled on the cpu received on. When 1, the LLDD - * may not deliver the CMD on the CPU it should be worked - * on. The transport should pick a cpu to schedule the work - * on. + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), + /* Bit 2: When 0, the LLDD is calling the cmd rcv handler + * in a non-isr context, allowing the transport to finish + * op completion in the calling context. When 1, the LLDD + * is calling the cmd rcv handler in an ISR context, + * requiring the transport to transition to a workqueue + * for op completion. + */ + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), + /* Bit 3: When 0, the LLDD is calling the op done handler + * in a non-isr context, allowing the transport to finish + * op completion in the calling context. When 1, the LLDD + * is calling the op done handler in an ISR context, + * requiring the transport to transition to a workqueue + * for op completion. */ }; @@ -725,12 +728,12 @@ struct nvmet_fc_target_port { * be freed/released. * Entrypoint is Mandatory. * - * @fcp_op: Called to perform a data transfer, transmit a response, or - * abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same - * LLDD-supplied exchange structure specified in the - * nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received. - * The op field in the structure shall indicate the operation for - * the LLDD to perform relative to the io. + * @fcp_op: Called to perform a data transfer or transmit a response. + * The nvmefc_tgt_fcp_req structure is the same LLDD-supplied + * exchange structure specified in the nvmet_fc_rcv_fcp_req() call + * made when the FCP CMD IU was received. The op field in the + * structure shall indicate the operation for the LLDD to perform + * relative to the io. * NVMET_FCOP_READDATA operation: the LLDD is to send the * payload data (described by sglist) to the host in 1 or * more FC sequences (preferrably 1). Note: the fc-nvme layer @@ -752,29 +755,31 @@ struct nvmet_fc_target_port { * successfully, the LLDD is to update the nvmefc_tgt_fcp_req * transferred_length field and may subsequently transmit the * FCP_RSP iu payload (described by rspbuf, rspdma, rsplen). - * The LLDD is to await FCP_CONF reception to confirm the RSP - * reception by the host. The LLDD may retramsit the FCP_RSP iu - * if necessary per FC-NVME. Upon reception of FCP_CONF, or upon - * FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req - * fcp_error field and consider the operation complete.. + * If FCP_CONF is supported, the LLDD is to await FCP_CONF + * reception to confirm the RSP reception by the host. The LLDD + * may retramsit the FCP_RSP iu if necessary per FC-NVME. Upon + * transmission of the FCP_RSP iu if FCP_CONF is not supported, + * or upon success/failure of FCP_CONF if it is supported, the + * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and + * consider the operation complete. * NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload - * (described by rspbuf, rspdma, rsplen). The LLDD is to await - * FCP_CONF reception to confirm the RSP reception by the host. - * The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME. - * Upon reception of FCP_CONF, or upon FCP_CONF failure, the + * (described by rspbuf, rspdma, rsplen). If FCP_CONF is + * supported, the LLDD is to await FCP_CONF reception to confirm + * the RSP reception by the host. The LLDD may retramsit the + * FCP_RSP iu if FCP_CONF is not received per FC-NVME. Upon + * transmission of the FCP_RSP iu if FCP_CONF is not supported, + * or upon success/failure of FCP_CONF if it is supported, the * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and - * consider the operation complete.. - * NVMET_FCOP_ABORT: the LLDD is to terminate the exchange - * corresponding to the fcp operation. The LLDD shall send - * ABTS and follow FC exchange abort-multi rules, including - * ABTS retries and possible logout. + * consider the operation complete. * Upon completing the indicated operation, the LLDD is to set the * status fields for the operation (tranferred_length and fcp_error - * status) in the request, then all the "done" routine - * indicated in the fcp request. Upon return from the "done" - * routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation - * the fc-nvme layer will not longer reference the fcp request, - * allowing the LLDD to free/release the fcp request. + * status) in the request, then call the "done" routine + * indicated in the fcp request. After the operation completes, + * regardless of whether the FCP_RSP iu was successfully transmit, + * the LLDD-supplied exchange structure must remain valid until the + * transport calls the fcp_req_release() callback to return ownership + * of the exchange structure back to the LLDD so that it may be used + * for another fcp command. * Note: when calling the done routine for READDATA or WRITEDATA * operations, the fc-nvme layer may immediate convert, in the same * thread and before returning to the LLDD, the fcp operation to @@ -786,6 +791,22 @@ struct nvmet_fc_target_port { * Returns 0 on success, - on failure (Ex: -EIO) * Entrypoint is Mandatory. * + * @fcp_abort: Called by the transport to abort an active command. + * The command may be in-between operations (nothing active in LLDD) + * or may have an active WRITEDATA operation pending. The LLDD is to + * initiate the ABTS process for the command and return from the + * callback. The ABTS does not need to be complete on the command. + * The fcp_abort callback inherently cannot fail. After the + * fcp_abort() callback completes, the transport will wait for any + * outstanding operation (if there was one) to complete, then will + * call the fcp_req_release() callback to return the command's + * exchange context back to the LLDD. + * + * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req + * to the LLDD after all operations on the fcp operation are complete. + * This may be due to the command completing or upon completion of + * abort cleanup. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -820,7 +841,11 @@ struct nvmet_fc_target_template { int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_ls_req *tls_req); int (*fcp_op)(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_fcp_req *); + struct nvmefc_tgt_fcp_req *fcpreq); + void (*fcp_abort)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); + void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); u32 max_hw_queues; u16 max_sgl_segments; @@ -848,4 +873,7 @@ int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq, void *cmdiubuf, u32 cmdiubuf_len); +void nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); + #endif /* _NVME_FC_DRIVER_H */ diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 4b45226bd604c5630fe73015e3a9876ced1cef2e..e997c4a49a8884e3b1167a830e7fdf0431365a3d 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -16,8 +16,7 @@ */ /* - * This file contains definitions relative to FC-NVME r1.11 and a few - * newer items + * This file contains definitions relative to FC-NVME r1.14 (16-020vB). */ #ifndef _NVME_FC_H @@ -47,8 +46,15 @@ struct nvme_fc_cmd_iu { #define NVME_FC_SIZEOF_ZEROS_RSP 12 +enum { + FCNVME_SC_SUCCESS = 0, + FCNVME_SC_INVALID_FIELD = 1, + FCNVME_SC_INVALID_CONNID = 2, +}; + struct nvme_fc_ersp_iu { - __u8 rsvd0[2]; + __u8 status_code; + __u8 rsvd1; __be16 iu_len; __be32 rsn; __be32 xfrd_len; @@ -58,7 +64,7 @@ struct nvme_fc_ersp_iu { }; -/* FC-NVME r1.03/16-119v0 NVME Link Services */ +/* FC-NVME Link Services */ enum { FCNVME_LS_RSVD = 0, FCNVME_LS_RJT = 1, @@ -68,7 +74,7 @@ enum { FCNVME_LS_DISCONNECT = 5, }; -/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */ +/* FC-NVME Link Service Descriptors */ enum { FCNVME_LSDESC_RSVD = 0x0, FCNVME_LSDESC_RQST = 0x1, @@ -92,7 +98,6 @@ static inline __be32 fcnvme_lsdesc_len(size_t sz) return cpu_to_be32(sz - (2 * sizeof(u32))); } - struct fcnvme_ls_rqst_w0 { u8 ls_cmd; /* FCNVME_LS_xxx */ u8 zeros[3]; @@ -106,8 +111,53 @@ struct fcnvme_lsdesc_rqst { __be32 rsvd12; }; +/* FC-NVME LS RJT reason_code values */ +enum fcnvme_ls_rjt_reason { + FCNVME_RJT_RC_NONE = 0, + /* no reason - not to be sent */ + + FCNVME_RJT_RC_INVAL = 0x01, + /* invalid NVMe_LS command code */ + + FCNVME_RJT_RC_LOGIC = 0x03, + /* logical error */ + + FCNVME_RJT_RC_UNAB = 0x09, + /* unable to perform command request */ + + FCNVME_RJT_RC_UNSUP = 0x0b, + /* command not supported */ + + FCNVME_RJT_RC_INPROG = 0x0e, + /* command already in progress */ + FCNVME_RJT_RC_INV_ASSOC = 0x40, + /* Invalid Association ID*/ + FCNVME_RJT_RC_INV_CONN = 0x41, + /* Invalid Connection ID*/ + + FCNVME_RJT_RC_VENDOR = 0xff, + /* vendor specific error */ +}; + +/* FC-NVME LS RJT reason_explanation values */ +enum fcnvme_ls_rjt_explan { + FCNVME_RJT_EXP_NONE = 0x00, + /* No additional explanation */ + + FCNVME_RJT_EXP_OXID_RXID = 0x17, + /* invalid OX_ID-RX_ID combination */ + + FCNVME_RJT_EXP_INSUF_RES = 0x29, + /* insufficient resources */ + + FCNVME_RJT_EXP_UNAB_DATA = 0x2a, + /* unable to supply requested data */ + + FCNVME_RJT_EXP_INV_LEN = 0x2d, + /* Invalid payload length */ +}; /* FCNVME_LSDESC_RJT */ struct fcnvme_lsdesc_rjt { @@ -119,15 +169,15 @@ struct fcnvme_lsdesc_rjt { * Reject reason and explanaction codes are generic * to ELs's from LS-3. */ - u8 reason_code; - u8 reason_explanation; + u8 reason_code; /* fcnvme_ls_rjt_reason */ + u8 reason_explanation; /* fcnvme_ls_rjt_explan */ u8 vendor; __be32 rsvd12; }; -#define FCNVME_ASSOC_HOSTID_LEN 64 +#define FCNVME_ASSOC_HOSTID_LEN 16 #define FCNVME_ASSOC_HOSTNQN_LEN 256 #define FCNVME_ASSOC_SUBNQN_LEN 256 diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9061780b141ff44c58a0ab5d5429dfbba8f1d2fb..b625bacf37efaabd84e8735b2b304401cdb195fd 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -245,6 +245,7 @@ enum { NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, }; struct nvme_lbaf { @@ -603,6 +604,7 @@ enum nvme_admin_opcode { nvme_admin_download_fw = 0x11, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, + nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, nvme_admin_security_recv = 0x82, @@ -874,6 +876,16 @@ struct nvmf_property_get_command { __u8 resv4[16]; }; +struct nvme_dbbuf { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __u32 rsvd12[6]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -893,6 +905,7 @@ struct nvme_command { struct nvmf_connect_command connect; struct nvmf_property_set_command prop_set; struct nvmf_property_get_command prop_get; + struct nvme_dbbuf dbbuf; }; }; diff --git a/include/linux/of.h b/include/linux/of.h index 21e6323de0f3b7868a2c3a44471f92cdc6cba176..50fcdb54087fa02dcacc10820077771981f7c445 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -159,6 +159,8 @@ static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) container_of(fwnode, struct device_node, fwnode) : NULL; } +#define of_fwnode_handle(node) (&(node)->fwnode) + static inline bool of_have_populated_dt(void) { return of_root != NULL; @@ -292,6 +294,9 @@ extern int of_property_count_elems_of_size(const struct device_node *np, extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); +extern int of_property_read_u64_index(const struct device_node *np, + const char *propname, + u32 index, u64 *out_value); extern int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max); @@ -602,6 +607,8 @@ static inline struct device_node *of_find_node_with_property( return NULL; } +#define of_fwnode_handle(node) NULL + static inline bool of_have_populated_dt(void) { return false; diff --git a/include/linux/of_device.h b/include/linux/of_device.h index c12dace043f3c840a8bb4d4540e06f897eeca8cb..b4ad8b4f85065dae43ab10be81ac27851b4802e7 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -34,8 +34,7 @@ extern void of_device_unregister(struct platform_device *ofdev); extern const void *of_device_get_match_data(const struct device *dev); -extern ssize_t of_device_get_modalias(struct device *dev, - char *str, ssize_t len); +extern ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len); extern int of_device_request_module(struct device *dev); extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env); @@ -55,7 +54,8 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } -void of_dma_configure(struct device *dev, struct device_node *np); +int of_dma_configure(struct device *dev, struct device_node *np); +void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -72,8 +72,8 @@ static inline const void *of_device_get_match_data(const struct device *dev) return NULL; } -static inline int of_device_get_modalias(struct device *dev, - char *str, ssize_t len) +static inline int of_device_modalias(struct device *dev, + char *str, ssize_t len) { return -ENODEV; } @@ -103,7 +103,12 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) { return NULL; } -static inline void of_dma_configure(struct device *dev, struct device_node *np) + +static inline int of_dma_configure(struct device *dev, struct device_node *np) +{ + return 0; +} +static inline void of_dma_deconfigure(struct device *dev) {} #endif /* CONFIG_OF */ diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 271b3fdf00709e2752115e0d371cfa397791eec4..1dfbfd0d8040cb56820e43f51fc9bf58b7d117e8 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -54,6 +54,11 @@ extern char __dtb_end[]; extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), void *data); +extern int of_scan_flat_dt_subnodes(unsigned long node, + int (*it)(unsigned long node, + const char *uname, + void *data), + void *data); extern int of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname); extern const void *of_get_flat_dt_prop(unsigned long node, const char *name, @@ -62,6 +67,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); extern int of_get_flat_dt_size(void); +extern uint32_t of_get_flat_dt_phandle(unsigned long node); extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 3f87ea5b8bee24d0e5cfa78517cfaf338a2bf051..1e089d5a182beb076d9021e41ab9672820dcfb82 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -30,6 +30,7 @@ struct device_node; enum of_gpio_flags { OF_GPIO_ACTIVE_LOW = 0x1, OF_GPIO_SINGLE_ENDED = 0x2, + OF_GPIO_OPEN_DRAIN = 0x4, }; #ifdef CONFIG_OF_GPIO diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index a58cca8bcb29d8c9bdcda71538cfa8f25c913916..ba35ba5204871a69b5d5522f170432d24aa32e96 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -12,7 +12,7 @@ #include #include -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF_MDIO) extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); extern struct phy_device *of_phy_find_device(struct device_node *phy_np); extern struct phy_device *of_phy_connect(struct net_device *dev, @@ -32,7 +32,7 @@ extern int of_phy_register_fixed_link(struct device_node *np); extern void of_phy_deregister_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); -#else /* CONFIG_OF */ +#else /* CONFIG_OF_MDIO */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { /* diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 0e0974eceb803b64cee8f8f67a7bde55d58f8f4a..518c8d20647a560bf2f679b62b80364b040ba9ae 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -85,15 +85,4 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev, } #endif -#if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI) -int of_pci_msi_chip_add(struct msi_controller *chip); -void of_pci_msi_chip_remove(struct msi_controller *chip); -struct msi_controller *of_pci_find_msi_chip_by_node(struct device_node *of_node); -#else -static inline int of_pci_msi_chip_add(struct msi_controller *chip) { return -EINVAL; } -static inline void of_pci_msi_chip_remove(struct msi_controller *chip) { } -static inline struct msi_controller * -of_pci_find_msi_chip_by_node(struct device_node *of_node) { return NULL; } -#endif - #endif diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index dc8224ae28d5d9e6106dc0d06a8a9bc2eb85fd0a..e0d1946270f38e5238ddf0a3bb25cf03c3f3ebe4 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np, const char *bus_id, struct device *parent); +extern int of_platform_device_destroy(struct device *dev, void *data); extern int of_platform_bus_probe(struct device_node *root, const struct of_device_id *matches, struct device *parent); diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 047d64706f2a298157cf0d54305aa8e49f6f7837..d4cd2014fa6f5a18f03f1b0babfccb21fa3ac55f 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -33,10 +33,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, bool skip_hwpoisoned_pages); void set_pageblock_migratetype(struct page *page, int migratetype); int move_freepages_block(struct zone *zone, struct page *page, - int migratetype); -int move_freepages(struct zone *zone, - struct page *start_page, struct page *end_page, - int migratetype); + int migratetype, int *num_movable); /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 84943e8057ef85a646a8f43155805fd8fa2273d8..316a19f6b635db760570a7e7b66af2a03b697e5e 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -148,7 +148,7 @@ static inline int page_cache_get_speculative(struct page *page) #ifdef CONFIG_TINY_RCU # ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic()); + VM_BUG_ON(!in_atomic() && !irqs_disabled()); # endif /* * Preempt must be disabled here - we rely on rcu_read_lock doing @@ -186,7 +186,7 @@ static inline int page_cache_add_speculative(struct page *page, int count) #if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic()); + VM_BUG_ON(!in_atomic() && !irqs_disabled()); # endif VM_BUG_ON_PAGE(page_count(page) == 0, page); page_ref_add(page, count); diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index f0d2b94512701ccdb6c77d5b37c7ff4f49ea1d3e..809c2f1873ac2a04c6ebb262407196302d00ea46 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -16,6 +16,7 @@ #ifndef DRIVERS_PCI_ECAM_H #define DRIVERS_PCI_ECAM_H +#include #include #include @@ -68,7 +69,7 @@ extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ #endif -#ifdef CONFIG_PCI_HOST_GENERIC +#ifdef CONFIG_PCI_HOST_COMMON /* for DT-based PCI controllers that support ECAM */ int pci_host_common_probe(struct platform_device *pdev, struct pci_ecam_ops *ops); diff --git a/include/linux/pci-ep-cfs.h b/include/linux/pci-ep-cfs.h new file mode 100644 index 0000000000000000000000000000000000000000..263b89ea5705028c663c7792fd49380b17323386 --- /dev/null +++ b/include/linux/pci-ep-cfs.h @@ -0,0 +1,41 @@ +/** + * PCI Endpoint ConfigFS header file + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + */ + +#ifndef __LINUX_PCI_EP_CFS_H +#define __LINUX_PCI_EP_CFS_H + +#include + +#ifdef CONFIG_PCI_ENDPOINT_CONFIGFS +struct config_group *pci_ep_cfs_add_epc_group(const char *name); +void pci_ep_cfs_remove_epc_group(struct config_group *group); +struct config_group *pci_ep_cfs_add_epf_group(const char *name); +void pci_ep_cfs_remove_epf_group(struct config_group *group); +#else +static inline struct config_group *pci_ep_cfs_add_epc_group(const char *name) +{ + return 0; +} + +static inline void pci_ep_cfs_remove_epc_group(struct config_group *group) +{ +} + +static inline struct config_group *pci_ep_cfs_add_epf_group(const char *name) +{ + return 0; +} + +static inline void pci_ep_cfs_remove_epf_group(struct config_group *group) +{ +} +#endif +#endif /* __LINUX_PCI_EP_CFS_H */ diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h new file mode 100644 index 0000000000000000000000000000000000000000..af5edbf3eea3c2d1767bfaf660634ec63729b54f --- /dev/null +++ b/include/linux/pci-epc.h @@ -0,0 +1,144 @@ +/** + * PCI Endpoint *Controller* (EPC) header file + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + */ + +#ifndef __LINUX_PCI_EPC_H +#define __LINUX_PCI_EPC_H + +#include + +struct pci_epc; + +enum pci_epc_irq_type { + PCI_EPC_IRQ_UNKNOWN, + PCI_EPC_IRQ_LEGACY, + PCI_EPC_IRQ_MSI, +}; + +/** + * struct pci_epc_ops - set of function pointers for performing EPC operations + * @write_header: ops to populate configuration space header + * @set_bar: ops to configure the BAR + * @clear_bar: ops to reset the BAR + * @map_addr: ops to map CPU address to PCI address + * @unmap_addr: ops to unmap CPU address and PCI address + * @set_msi: ops to set the requested number of MSI interrupts in the MSI + * capability register + * @get_msi: ops to get the number of MSI interrupts allocated by the RC from + * the MSI capability register + * @raise_irq: ops to raise a legacy or MSI interrupt + * @start: ops to start the PCI link + * @stop: ops to stop the PCI link + * @owner: the module owner containing the ops + */ +struct pci_epc_ops { + int (*write_header)(struct pci_epc *pci_epc, + struct pci_epf_header *hdr); + int (*set_bar)(struct pci_epc *epc, enum pci_barno bar, + dma_addr_t bar_phys, size_t size, int flags); + void (*clear_bar)(struct pci_epc *epc, enum pci_barno bar); + int (*map_addr)(struct pci_epc *epc, phys_addr_t addr, + u64 pci_addr, size_t size); + void (*unmap_addr)(struct pci_epc *epc, phys_addr_t addr); + int (*set_msi)(struct pci_epc *epc, u8 interrupts); + int (*get_msi)(struct pci_epc *epc); + int (*raise_irq)(struct pci_epc *pci_epc, + enum pci_epc_irq_type type, u8 interrupt_num); + int (*start)(struct pci_epc *epc); + void (*stop)(struct pci_epc *epc); + struct module *owner; +}; + +/** + * struct pci_epc_mem - address space of the endpoint controller + * @phys_base: physical base address of the PCI address space + * @size: the size of the PCI address space + * @bitmap: bitmap to manage the PCI address space + * @pages: number of bits representing the address region + */ +struct pci_epc_mem { + phys_addr_t phys_base; + size_t size; + unsigned long *bitmap; + int pages; +}; + +/** + * struct pci_epc - represents the PCI EPC device + * @dev: PCI EPC device + * @pci_epf: list of endpoint functions present in this EPC device + * @ops: function pointers for performing endpoint operations + * @mem: address space of the endpoint controller + * @max_functions: max number of functions that can be configured in this EPC + * @group: configfs group representing the PCI EPC device + * @lock: spinlock to protect pci_epc ops + */ +struct pci_epc { + struct device dev; + struct list_head pci_epf; + const struct pci_epc_ops *ops; + struct pci_epc_mem *mem; + u8 max_functions; + struct config_group *group; + /* spinlock to protect against concurrent access of EP controller */ + spinlock_t lock; +}; + +#define to_pci_epc(device) container_of((device), struct pci_epc, dev) + +#define pci_epc_create(dev, ops) \ + __pci_epc_create((dev), (ops), THIS_MODULE) +#define devm_pci_epc_create(dev, ops) \ + __devm_pci_epc_create((dev), (ops), THIS_MODULE) + +static inline void epc_set_drvdata(struct pci_epc *epc, void *data) +{ + dev_set_drvdata(&epc->dev, data); +} + +static inline void *epc_get_drvdata(struct pci_epc *epc) +{ + return dev_get_drvdata(&epc->dev); +} + +struct pci_epc * +__devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, + struct module *owner); +struct pci_epc * +__pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, + struct module *owner); +void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc); +void pci_epc_destroy(struct pci_epc *epc); +int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf); +void pci_epc_linkup(struct pci_epc *epc); +void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf); +int pci_epc_write_header(struct pci_epc *epc, struct pci_epf_header *hdr); +int pci_epc_set_bar(struct pci_epc *epc, enum pci_barno bar, + dma_addr_t bar_phys, size_t size, int flags); +void pci_epc_clear_bar(struct pci_epc *epc, int bar); +int pci_epc_map_addr(struct pci_epc *epc, phys_addr_t phys_addr, + u64 pci_addr, size_t size); +void pci_epc_unmap_addr(struct pci_epc *epc, phys_addr_t phys_addr); +int pci_epc_set_msi(struct pci_epc *epc, u8 interrupts); +int pci_epc_get_msi(struct pci_epc *epc); +int pci_epc_raise_irq(struct pci_epc *epc, enum pci_epc_irq_type type, + u8 interrupt_num); +int pci_epc_start(struct pci_epc *epc); +void pci_epc_stop(struct pci_epc *epc); +struct pci_epc *pci_epc_get(const char *epc_name); +void pci_epc_put(struct pci_epc *epc); + +int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size); +void pci_epc_mem_exit(struct pci_epc *epc); +void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, + phys_addr_t *phys_addr, size_t size); +void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, + void __iomem *virt_addr, size_t size); +#endif /* __LINUX_PCI_EPC_H */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h new file mode 100644 index 0000000000000000000000000000000000000000..0d529cb9014398df67893a72efb7f2f171111345 --- /dev/null +++ b/include/linux/pci-epf.h @@ -0,0 +1,162 @@ +/** + * PCI Endpoint *Function* (EPF) header file + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + */ + +#ifndef __LINUX_PCI_EPF_H +#define __LINUX_PCI_EPF_H + +#include +#include + +struct pci_epf; + +enum pci_interrupt_pin { + PCI_INTERRUPT_UNKNOWN, + PCI_INTERRUPT_INTA, + PCI_INTERRUPT_INTB, + PCI_INTERRUPT_INTC, + PCI_INTERRUPT_INTD, +}; + +enum pci_barno { + BAR_0, + BAR_1, + BAR_2, + BAR_3, + BAR_4, + BAR_5, +}; + +/** + * struct pci_epf_header - represents standard configuration header + * @vendorid: identifies device manufacturer + * @deviceid: identifies a particular device + * @revid: specifies a device-specific revision identifier + * @progif_code: identifies a specific register-level programming interface + * @subclass_code: identifies more specifically the function of the device + * @baseclass_code: broadly classifies the type of function the device performs + * @cache_line_size: specifies the system cacheline size in units of DWORDs + * @subsys_vendor_id: vendor of the add-in card or subsystem + * @subsys_id: id specific to vendor + * @interrupt_pin: interrupt pin the device (or device function) uses + */ +struct pci_epf_header { + u16 vendorid; + u16 deviceid; + u8 revid; + u8 progif_code; + u8 subclass_code; + u8 baseclass_code; + u8 cache_line_size; + u16 subsys_vendor_id; + u16 subsys_id; + enum pci_interrupt_pin interrupt_pin; +}; + +/** + * struct pci_epf_ops - set of function pointers for performing EPF operations + * @bind: ops to perform when a EPC device has been bound to EPF device + * @unbind: ops to perform when a binding has been lost between a EPC device + * and EPF device + * @linkup: ops to perform when the EPC device has established a connection with + * a host system + */ +struct pci_epf_ops { + int (*bind)(struct pci_epf *epf); + void (*unbind)(struct pci_epf *epf); + void (*linkup)(struct pci_epf *epf); +}; + +/** + * struct pci_epf_driver - represents the PCI EPF driver + * @probe: ops to perform when a new EPF device has been bound to the EPF driver + * @remove: ops to perform when the binding between the EPF device and EPF + * driver is broken + * @driver: PCI EPF driver + * @ops: set of function pointers for performing EPF operations + * @owner: the owner of the module that registers the PCI EPF driver + * @group: configfs group corresponding to the PCI EPF driver + * @id_table: identifies EPF devices for probing + */ +struct pci_epf_driver { + int (*probe)(struct pci_epf *epf); + int (*remove)(struct pci_epf *epf); + + struct device_driver driver; + struct pci_epf_ops *ops; + struct module *owner; + struct config_group *group; + const struct pci_epf_device_id *id_table; +}; + +#define to_pci_epf_driver(drv) (container_of((drv), struct pci_epf_driver, \ + driver)) + +/** + * struct pci_epf_bar - represents the BAR of EPF device + * @phys_addr: physical address that should be mapped to the BAR + * @size: the size of the address space present in BAR + */ +struct pci_epf_bar { + dma_addr_t phys_addr; + size_t size; +}; + +/** + * struct pci_epf - represents the PCI EPF device + * @dev: the PCI EPF device + * @name: the name of the PCI EPF device + * @header: represents standard configuration header + * @bar: represents the BAR of EPF device + * @msi_interrupts: number of MSI interrupts required by this function + * @func_no: unique function number within this endpoint device + * @epc: the EPC device to which this EPF device is bound + * @driver: the EPF driver to which this EPF device is bound + * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc + */ +struct pci_epf { + struct device dev; + const char *name; + struct pci_epf_header *header; + struct pci_epf_bar bar[6]; + u8 msi_interrupts; + u8 func_no; + + struct pci_epc *epc; + struct pci_epf_driver *driver; + struct list_head list; +}; + +#define to_pci_epf(epf_dev) container_of((epf_dev), struct pci_epf, dev) + +#define pci_epf_register_driver(driver) \ + __pci_epf_register_driver((driver), THIS_MODULE) + +static inline void epf_set_drvdata(struct pci_epf *epf, void *data) +{ + dev_set_drvdata(&epf->dev, data); +} + +static inline void *epf_get_drvdata(struct pci_epf *epf) +{ + return dev_get_drvdata(&epf->dev); +} + +struct pci_epf *pci_epf_create(const char *name); +void pci_epf_destroy(struct pci_epf *epf); +int __pci_epf_register_driver(struct pci_epf_driver *driver, + struct module *owner); +void pci_epf_unregister_driver(struct pci_epf_driver *driver); +void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar); +void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar); +int pci_epf_bind(struct pci_epf *epf); +void pci_epf_unbind(struct pci_epf *epf); +void pci_epf_linkup(struct pci_epf *epf); +#endif /* __LINUX_PCI_EPF_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 5948cfdc984e2f849507fa1d0fb8f2bed3c01cfa..8039f9f0ca054ba20fd9992b13b7926859d029b5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,15 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), /* Get VPD from function 0 VPD */ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), + /* a non-root bridge where translation occurs, stop alias search here */ + PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), + /* Do not use FLR even if device advertises PCI_AF_CAP */ + PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), + /* + * Resume before calling the driver's system suspend hooks, disabling + * the direct_complete optimization. + */ + PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), }; enum pci_irq_reroute_variant { @@ -397,6 +407,8 @@ struct pci_dev { phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */ size_t romlen; /* Length of ROM if it's not from the BAR */ char *driver_override; /* Driver name to force a match */ + + unsigned long priv_flags; /* Private flags for the pci driver */ }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) @@ -941,32 +953,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops); -static inline int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val) -{ - return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val); -} -static inline int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val) -{ - return pci_bus_read_config_word(dev->bus, dev->devfn, where, val); -} -static inline int pci_read_config_dword(const struct pci_dev *dev, int where, - u32 *val) -{ - return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val); -} -static inline int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val) -{ - return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val); -} -static inline int pci_write_config_word(const struct pci_dev *dev, int where, u16 val) -{ - return pci_bus_write_config_word(dev->bus, dev->devfn, where, val); -} -static inline int pci_write_config_dword(const struct pci_dev *dev, int where, - u32 val) -{ - return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val); -} +int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val); +int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val); +int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val); +int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val); +int pci_write_config_word(const struct pci_dev *dev, int where, u16 val); +int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val); int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val); @@ -1053,6 +1045,7 @@ int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +void pcie_flr(struct pci_dev *dev); int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); @@ -1073,6 +1066,11 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); void pci_ignore_hotplug(struct pci_dev *dev); +int __printf(6, 7) pci_request_irq(struct pci_dev *dev, unsigned int nr, + irq_handler_t handler, irq_handler_t thread_fn, void *dev_id, + const char *fmt, ...); +void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id); + /* ROM control related routines */ int pci_enable_rom(struct pci_dev *pdev); void pci_disable_rom(struct pci_dev *pdev); @@ -1200,6 +1198,11 @@ unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); void pci_unmap_iospace(struct resource *res); +void __iomem *devm_pci_remap_cfgspace(struct device *dev, + resource_size_t offset, + resource_size_t size); +void __iomem *devm_pci_remap_cfg_resource(struct device *dev, + struct resource *res); static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { @@ -1298,11 +1301,8 @@ struct msix_entry { #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); -void pci_msi_shutdown(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); int pci_msix_vec_count(struct pci_dev *dev); -int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); -void pci_msix_shutdown(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); int pci_msi_enabled(void); @@ -1328,13 +1328,8 @@ int pci_irq_get_node(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } -static inline void pci_msi_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msi(struct pci_dev *dev) { } static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; } -static inline int pci_enable_msix(struct pci_dev *dev, - struct msix_entry *entries, int nvec) -{ return -ENOSYS; } -static inline void pci_msix_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } static inline int pci_msi_enabled(void) { return 0; } @@ -1352,9 +1347,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, const struct irq_affinity *aff_desc) { - if (min_vecs > 1) - return -EINVAL; - return 1; + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) + return 1; + return -ENOSPC; } static inline void pci_free_irq_vectors(struct pci_dev *dev) @@ -1627,6 +1622,36 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } #include +/* These two functions provide almost identical functionality. Depennding + * on the architecture, one will be implemented as a wrapper around the + * other (in drivers/pci/mmap.c). + * + * pci_mmap_resource_range() maps a specific BAR, and vm->vm_pgoff + * is expected to be an offset within that region. + * + * pci_mmap_page_range() is the legacy architecture-specific interface, + * which accepts a "user visible" resource address converted by + * pci_resource_to_user(), as used in the legacy mmap() interface in + * /proc/bus/pci/. + */ +int pci_mmap_resource_range(struct pci_dev *dev, int bar, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); +int pci_mmap_page_range(struct pci_dev *pdev, int bar, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + +#ifndef arch_can_pci_mmap_wc +#define arch_can_pci_mmap_wc() 0 +#endif + +#ifndef arch_can_pci_mmap_io +#define arch_can_pci_mmap_io() 0 +#define pci_iobar_pfn(pdev, bar, vma) (-EINVAL) +#else +int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma); +#endif + #ifndef pci_root_bus_fwnode #define pci_root_bus_fwnode(bus) NULL #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a4f77feecbb00f84fac0295a2b2745928e9e0472..5f6b71d15393a4d6df153589cee33e42d42ebb26 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -862,6 +862,8 @@ #define PCI_DEVICE_ID_TI_X620 0xac8d #define PCI_DEVICE_ID_TI_X420 0xac8e #define PCI_DEVICE_ID_TI_XX20_FM 0xac8f +#define PCI_DEVICE_ID_TI_DRA74x 0xb500 +#define PCI_DEVICE_ID_TI_DRA72x 0xb501 #define PCI_VENDOR_ID_SONY 0x104d diff --git a/include/linux/pe.h b/include/linux/pe.h index e170b95e763bbdf78d04c36372a2c9b4e45c0e1a..143ce75be5f01549ac60f6a8e91e8e614d5f3fe1 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -23,34 +23,6 @@ #define MZ_MAGIC 0x5a4d /* "MZ" */ -struct mz_hdr { - uint16_t magic; /* MZ_MAGIC */ - uint16_t lbsize; /* size of last used block */ - uint16_t blocks; /* pages in file, 0x3 */ - uint16_t relocs; /* relocations */ - uint16_t hdrsize; /* header size in "paragraphs" */ - uint16_t min_extra_pps; /* .bss */ - uint16_t max_extra_pps; /* runtime limit for the arena size */ - uint16_t ss; /* relative stack segment */ - uint16_t sp; /* initial %sp register */ - uint16_t checksum; /* word checksum */ - uint16_t ip; /* initial %ip register */ - uint16_t cs; /* initial %cs relative to load segment */ - uint16_t reloc_table_offset; /* offset of the first relocation */ - uint16_t overlay_num; /* overlay number. set to 0. */ - uint16_t reserved0[4]; /* reserved */ - uint16_t oem_id; /* oem identifier */ - uint16_t oem_info; /* oem specific */ - uint16_t reserved1[10]; /* reserved */ - uint32_t peaddr; /* address of pe header */ - char message[64]; /* message to print */ -}; - -struct mz_reloc { - uint16_t offset; - uint16_t segment; -}; - #define PE_MAGIC 0x00004550 /* "PE\0\0" */ #define PE_OPT_MAGIC_PE32 0x010b #define PE_OPT_MAGIC_PE32_ROM 0x0107 @@ -62,6 +34,7 @@ struct mz_reloc { #define IMAGE_FILE_MACHINE_AMD64 0x8664 #define IMAGE_FILE_MACHINE_ARM 0x01c0 #define IMAGE_FILE_MACHINE_ARMV7 0x01c4 +#define IMAGE_FILE_MACHINE_ARM64 0xaa64 #define IMAGE_FILE_MACHINE_EBC 0x0ebc #define IMAGE_FILE_MACHINE_I386 0x014c #define IMAGE_FILE_MACHINE_IA64 0x0200 @@ -98,17 +71,6 @@ struct mz_reloc { #define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 #define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 -struct pe_hdr { - uint32_t magic; /* PE magic */ - uint16_t machine; /* machine type */ - uint16_t sections; /* number of sections */ - uint32_t timestamp; /* time_t */ - uint32_t symbol_table; /* symbol table offset */ - uint32_t symbols; /* number of symbols */ - uint16_t opt_hdr_size; /* size of optional header */ - uint16_t flags; /* flags */ -}; - #define IMAGE_FILE_OPT_ROM_MAGIC 0x107 #define IMAGE_FILE_OPT_PE32_MAGIC 0x10b #define IMAGE_FILE_OPT_PE32_PLUS_MAGIC 0x20b @@ -134,6 +96,95 @@ struct pe_hdr { #define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 #define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 +/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ +#define IMAGE_SCN_RESERVED_0 0x00000001 +#define IMAGE_SCN_RESERVED_1 0x00000002 +#define IMAGE_SCN_RESERVED_2 0x00000004 +#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ +#define IMAGE_SCN_RESERVED_3 0x00000010 +#define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ +#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ +#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ +#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ +#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ +#define IMAGE_SCN_RESERVED_4 0x00000400 +#define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ +#define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ +#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ +#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ +#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ +/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ +#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ +#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ +#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ +#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ +/* and here they just stuck a 1-byte integer in the middle of a bitfield */ +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ +#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 +#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 +#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 +#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 +#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 +#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 +#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 +#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 +#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000 +#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000 +#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 +#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 +#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 +#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ +#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ +#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ +#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ +#define IMAGE_SCN_MEM_SHARED 0x10000000 /* can be shared */ +#define IMAGE_SCN_MEM_EXECUTE 0x20000000 /* can be executed as code */ +#define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ +#define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ + +#define IMAGE_DEBUG_TYPE_CODEVIEW 2 + +#ifndef __ASSEMBLY__ + +struct mz_hdr { + uint16_t magic; /* MZ_MAGIC */ + uint16_t lbsize; /* size of last used block */ + uint16_t blocks; /* pages in file, 0x3 */ + uint16_t relocs; /* relocations */ + uint16_t hdrsize; /* header size in "paragraphs" */ + uint16_t min_extra_pps; /* .bss */ + uint16_t max_extra_pps; /* runtime limit for the arena size */ + uint16_t ss; /* relative stack segment */ + uint16_t sp; /* initial %sp register */ + uint16_t checksum; /* word checksum */ + uint16_t ip; /* initial %ip register */ + uint16_t cs; /* initial %cs relative to load segment */ + uint16_t reloc_table_offset; /* offset of the first relocation */ + uint16_t overlay_num; /* overlay number. set to 0. */ + uint16_t reserved0[4]; /* reserved */ + uint16_t oem_id; /* oem identifier */ + uint16_t oem_info; /* oem specific */ + uint16_t reserved1[10]; /* reserved */ + uint32_t peaddr; /* address of pe header */ + char message[64]; /* message to print */ +}; + +struct mz_reloc { + uint16_t offset; + uint16_t segment; +}; + +struct pe_hdr { + uint32_t magic; /* PE magic */ + uint16_t machine; /* machine type */ + uint16_t sections; /* number of sections */ + uint32_t timestamp; /* time_t */ + uint32_t symbol_table; /* symbol table offset */ + uint32_t symbols; /* number of symbols */ + uint16_t opt_hdr_size; /* size of optional header */ + uint16_t flags; /* flags */ +}; + /* the fact that pe32 isn't padded where pe32+ is 64-bit means union won't * work right. vomit. */ struct pe32_opt_hdr { @@ -243,52 +294,6 @@ struct section_header { uint32_t flags; }; -/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ -#define IMAGE_SCN_RESERVED_0 0x00000001 -#define IMAGE_SCN_RESERVED_1 0x00000002 -#define IMAGE_SCN_RESERVED_2 0x00000004 -#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ -#define IMAGE_SCN_RESERVED_3 0x00000010 -#define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ -#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ -#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ -#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ -#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ -#define IMAGE_SCN_RESERVED_4 0x00000400 -#define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ -#define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ -#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ -#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ -#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ -/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ -#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ -#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ -#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ -#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ -/* and here they just stuck a 1-byte integer in the middle of a bitfield */ -#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ -#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 -#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 -#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 -#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 -#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 -#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 -#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 -#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 -#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000 -#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000 -#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 -#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 -#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 -#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ -#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ -#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ -#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ -#define IMAGE_SCN_MEM_SHARED 0x10000000 /* can be shared */ -#define IMAGE_SCN_MEM_EXECUTE 0x20000000 /* can be executed as code */ -#define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ -#define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ - enum x64_coff_reloc_type { IMAGE_REL_AMD64_ABSOLUTE = 0, IMAGE_REL_AMD64_ADDR64, @@ -445,4 +450,6 @@ struct win_certificate { uint16_t cert_type; }; +#endif /* !__ASSEMBLY__ */ + #endif /* __LINUX_PE_H */ diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3a481a49546ef1c85d8f88bf7668f8a2c8e8c0f1..c13dceb87b60adbe36c8ae2c7ba21824f33343dd 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -99,6 +99,7 @@ int __must_check percpu_ref_init(struct percpu_ref *ref, void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch); +void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 56939d3f6e531baa21d7770e5ee8f256f3957c86..491b3f5a5f8a86ee9347affdb1b08013757235e9 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -110,6 +110,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, #endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 8462da2660899153552df914268ddaadd448fc2d..1360dd6d5e617a0fee746a0d530129e66530a0fb 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -75,6 +75,8 @@ struct pmu_hw_events { * already have to allocate this struct per cpu. */ struct arm_pmu *percpu_pmu; + + int irq; }; enum armpmu_attr_groups { @@ -88,7 +90,6 @@ struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; cpumask_t supported_cpus; - int *irq_affinity; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); @@ -104,12 +105,8 @@ struct arm_pmu { void (*start)(struct arm_pmu *); void (*stop)(struct arm_pmu *); void (*reset)(void *); - int (*request_irq)(struct arm_pmu *, irq_handler_t handler); - void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); int num_events; - atomic_t active_events; - struct mutex reserve_mutex; u64 max_period; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 @@ -120,6 +117,9 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; + + /* Only to be used by ACPI probing code */ + unsigned long acpi_cpuid; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) @@ -135,10 +135,12 @@ int armpmu_map_event(struct perf_event *event, [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask); +typedef int (*armpmu_init_fn)(struct arm_pmu *); + struct pmu_probe_info { unsigned int cpuid; unsigned int mask; - int (*init)(struct arm_pmu *); + armpmu_init_fn init; }; #define PMU_PROBE(_cpuid, _mask, _fn) \ @@ -160,6 +162,21 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table); +#ifdef CONFIG_ACPI +int arm_pmu_acpi_probe(armpmu_init_fn init_fn); +#else +static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } +#endif + +/* Internal functions only for core arm_pmu code */ +struct arm_pmu *armpmu_alloc(void); +void armpmu_free(struct arm_pmu *pmu); +int armpmu_register(struct arm_pmu *pmu); +int armpmu_request_irqs(struct arm_pmu *armpmu); +void armpmu_free_irqs(struct arm_pmu *armpmu); +int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); +void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); + #define ARMV8_PMU_PDEV_NAME "armv8-pmu" #endif /* CONFIG_ARM_PMU */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000fdb211c7d7e2c8fc7351a99dbf6b58fbcb135..24a635887f28df99379d8b683fd94154c7888a69 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -165,6 +165,13 @@ struct hw_perf_event { struct list_head bp_list; }; #endif + struct { /* amd_iommu */ + u8 iommu_bank; + u8 iommu_cntr; + u16 padding; + u64 conf; + u64 conf1; + }; }; /* * If the event is a per task event, this will point to the task in @@ -801,6 +808,7 @@ struct perf_output_handle { struct ring_buffer *rb; unsigned long wakeup; unsigned long size; + u64 aux_flags; union { void *addr; unsigned long head; @@ -849,10 +857,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) extern void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event); extern void perf_aux_output_end(struct perf_output_handle *handle, - unsigned long size, bool truncated); + unsigned long size); extern int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size); extern void *perf_get_aux(struct perf_output_handle *handle); +extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); @@ -1112,6 +1121,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1267,8 +1277,8 @@ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } static inline void -perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) { } +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) + { } static inline int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { return -EINVAL; } @@ -1315,6 +1325,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/linux/phy.h b/include/linux/phy.h index 43a774873aa96d4af64d0cdebb579be572a6658a..e76e4adbc7c728efdbadae302af8259d5d55dc05 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -217,6 +217,13 @@ struct mii_bus { * matching its address */ int irq[PHY_MAX_ADDR]; + + /* GPIO reset pulse width in microseconds */ + int reset_delay_us; + /* Number of reset GPIOs */ + int num_reset_gpios; + /* Array of RESET GPIO descriptors */ + struct gpio_desc **reset_gpiod; }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) @@ -587,23 +594,29 @@ struct phy_driver { */ void (*link_change_notify)(struct phy_device *dev); - /* A function provided by a phy specific driver to override the - * the PHY driver framework support for reading a MMD register - * from the PHY. If not supported, return -1. This function is - * optional for PHY specific drivers, if not provided then the - * default MMD read function is used by the PHY framework. + /* + * Phy specific driver override for reading a MMD register. + * This function is optional for PHY specific drivers. When + * not provided, the default MMD read function will be used + * by phy_read_mmd(), which will use either a direct read for + * Clause 45 PHYs or an indirect read for Clause 22 PHYs. + * devnum is the MMD device number within the PHY device, + * regnum is the register within the selected MMD device. */ - int (*read_mmd_indirect)(struct phy_device *dev, int ptrad, - int devnum, int regnum); - - /* A function provided by a phy specific driver to override the - * the PHY driver framework support for writing a MMD register - * from the PHY. This function is optional for PHY specific drivers, - * if not provided then the default MMD read function is used by - * the PHY framework. + int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum); + + /* + * Phy specific driver override for writing a MMD register. + * This function is optional for PHY specific drivers. When + * not provided, the default MMD write function will be used + * by phy_write_mmd(), which will use either a direct write for + * Clause 45 PHYs, or an indirect write for Clause 22 PHYs. + * devnum is the MMD device number within the PHY device, + * regnum is the register within the selected MMD device. + * val is the value to be written. */ - void (*write_mmd_indirect)(struct phy_device *dev, int ptrad, - int devnum, int regnum, u32 val); + int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum, + u16 val); /* Get the size and type of the eeprom contained within a plug-in * module */ @@ -651,25 +664,7 @@ struct phy_fixup { * * Same rules as for phy_read(); */ -static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) -{ - if (!phydev->is_c45) - return -EOPNOTSUPP; - - return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, - MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); -} - -/** - * phy_read_mmd_indirect - reads data from the MMD registers - * @phydev: The PHY device bus - * @prtad: MMD Address - * @addr: PHY address on the MII bus - * - * Description: it reads data from the MMD registers (clause 22 to access to - * clause 45) of the specified phy address. - */ -int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad); +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); /** * phy_read - Convenience function for reading a given PHY register @@ -752,35 +747,29 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev) * * Same rules as for phy_write(); */ -static inline int phy_write_mmd(struct phy_device *phydev, int devad, - u32 regnum, u16 val) -{ - if (!phydev->is_c45) - return -EOPNOTSUPP; - - regnum = MII_ADDR_C45 | ((devad & 0x1f) << 16) | (regnum & 0xffff); - - return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val); -} - -/** - * phy_write_mmd_indirect - writes data to the MMD registers - * @phydev: The PHY device - * @prtad: MMD Address - * @devad: MMD DEVAD - * @data: data to write in the MMD register - * - * Description: Write data from the MMD registers of the specified - * phy address. - */ -void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, - int devad, u32 data); +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); +#if IS_ENABLED(CONFIG_PHYLIB) struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); +void phy_device_free(struct phy_device *phydev); +#else +static inline +struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) +{ + return NULL; +} + +static inline int phy_device_register(struct phy_device *phy) +{ + return 0; +} + +static inline void phy_device_free(struct phy_device *phydev) { } +#endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); @@ -852,6 +841,7 @@ void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); +void phy_trigger_machine(struct phy_device *phydev, bool sync); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_ksettings_get(struct phy_device *phydev, @@ -861,7 +851,6 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -void phy_device_free(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, @@ -888,8 +877,10 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); +#if IS_ENABLED(CONFIG_PHYLIB) int __init mdio_bus_init(void); void mdio_bus_exit(void); +#endif extern struct bus_type mdio_bus_type; @@ -900,7 +891,7 @@ struct mdio_board_info { const void *platform_data; }; -#if IS_ENABLED(CONFIG_PHYLIB) +#if IS_ENABLED(CONFIG_MDIO_DEVICE) int mdiobus_register_board_info(const struct mdio_board_info *info, unsigned int n); #else diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 0496d171700a767b13b89e8e73c50b043fcf6c64..e8b12dbf617024f488f078f746e87ce32eec839a 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -12,28 +12,8 @@ #include -#define MMU_REG_SIZE 256 - -/** - * struct iommu_arch_data - omap iommu private data - * @name: name of the iommu device - * @iommu_dev: handle of the iommu device - * - * This is an omap iommu private data object, which binds an iommu user - * to its iommu device. This object should be placed at the iommu user's - * dev_archdata so generic IOMMU API can be used without having to - * utilize omap-specific plumbing anymore. - */ -struct omap_iommu_arch_data { - const char *name; - struct omap_iommu *iommu_dev; -}; - struct iommu_platform_data { - const char *name; const char *reset_name; - int nr_tlb_entries; - int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); }; diff --git a/include/linux/platform_data/isl9305.h b/include/linux/platform_data/isl9305.h index 1419133fa69e9be9ad07e55690fb8abeff966e32..4ac1a070af0a1967f59c02e3b18363ba83e7552f 100644 --- a/include/linux/platform_data/isl9305.h +++ b/include/linux/platform_data/isl9305.h @@ -24,7 +24,7 @@ struct regulator_init_data; struct isl9305_pdata { - struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR]; + struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR + 1]; }; #endif diff --git a/include/linux/platform_data/itco_wdt.h b/include/linux/platform_data/itco_wdt.h index f16542c77ff794ad13a1e81a54a8d1d57b5d1c94..0e95527edf253db9ab55496f4e404743b6c04b35 100644 --- a/include/linux/platform_data/itco_wdt.h +++ b/include/linux/platform_data/itco_wdt.h @@ -14,6 +14,10 @@ struct itco_wdt_platform_data { char name[32]; unsigned int version; + /* private data to be passed to update_no_reboot_bit API */ + void *no_reboot_priv; + /* pointer for platform specific no reboot update function */ + int (*update_no_reboot_bit)(void *priv, bool set); }; #endif /* _ITCO_WDT_H_ */ diff --git a/include/linux/platform_data/pn544.h b/include/linux/platform_data/pn544.h deleted file mode 100644 index 5ce1ab983f44d1c0191fe0a29f403cb5f6482507..0000000000000000000000000000000000000000 --- a/include/linux/platform_data/pn544.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Driver include for the PN544 NFC chip. - * - * Copyright (C) Nokia Corporation - * - * Author: Jari Vanhala - * Contact: Matti Aaltoenn - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _PN544_H_ -#define _PN544_H_ - -#include - -enum { - NFC_GPIO_ENABLE, - NFC_GPIO_FW_RESET, - NFC_GPIO_IRQ -}; - -/* board config */ -struct pn544_nfc_platform_data { - int (*request_resources) (struct i2c_client *client); - void (*free_resources) (void); - void (*enable) (int fw); - int (*test) (void); - void (*disable) (void); - int (*get_gpio)(int type); -}; - -#endif /* _PN544_H_ */ diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h deleted file mode 100644 index cc2bdafb0c69cfe61d1ff99f69423b87d2322eee..0000000000000000000000000000000000000000 --- a/include/linux/platform_data/st21nfca.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Driver include for the ST21NFCA NFC chip. - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _ST21NFCA_HCI_H_ -#define _ST21NFCA_HCI_H_ - -#include - -#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci" - -struct st21nfca_nfc_platform_data { - unsigned int gpio_ena; - unsigned int irq_polarity; - bool is_ese_present; - bool is_uicc_present; -}; - -#endif /* _ST21NFCA_HCI_H_ */ diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h index a5c0a71ec9147692f6a4dc995b71375f977bb71c..cf9348b376ac50f99944c5bc0762afb3e0b65daf 100644 --- a/include/linux/platform_data/video-imxfb.h +++ b/include/linux/platform_data/video-imxfb.h @@ -50,6 +50,7 @@ struct imx_fb_videomode { struct fb_videomode mode; u32 pcr; + bool aus_mode; unsigned char bpp; }; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 5339ed5bd6f9d04af80cd7da068a776517d9c4ff..b7803a251044a0eb89142b7a32452bef61f2594b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -20,6 +20,7 @@ /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ #define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ +#define GENPD_FLAG_ALWAYS_ON (1U << 2) /* PM domain is always powered on */ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -117,6 +118,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + void *data; }; #ifdef CONFIG_PM_GENERIC_DOMAINS diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index a3447932df1ff0a00f417c847052a83a407f4631..4c2cba7ec1d44edec6881ed7ac1e80ee27312c44 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -106,8 +106,8 @@ extern void __pm_stay_awake(struct wakeup_source *ws); extern void pm_stay_awake(struct device *dev); extern void __pm_relax(struct wakeup_source *ws); extern void pm_relax(struct device *dev); -extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec); -extern void pm_wakeup_event(struct device *dev, unsigned int msec); +extern void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard); +extern void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard); #else /* !CONFIG_PM_SLEEP */ @@ -182,9 +182,11 @@ static inline void __pm_relax(struct wakeup_source *ws) {} static inline void pm_relax(struct device *dev) {} -static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {} +static inline void pm_wakeup_ws_event(struct wakeup_source *ws, + unsigned int msec, bool hard) {} -static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} +static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, + bool hard) {} #endif /* !CONFIG_PM_SLEEP */ @@ -201,4 +203,19 @@ static inline void wakeup_source_trash(struct wakeup_source *ws) wakeup_source_drop(ws); } +static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) +{ + return pm_wakeup_ws_event(ws, msec, false); +} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) +{ + return pm_wakeup_dev_event(dev, msec, false); +} + +static inline void pm_wakeup_hard_event(struct device *dev) +{ + return pm_wakeup_dev_event(dev, 0, true); +} + #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/pmem.h b/include/linux/pmem.h index e856c2cb0fe86da91d55e2766b687b66ad02cc1e..71ecf3d46aacb210edfed793d5ab0ece06671d72 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,12 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) -{ - BUG(); - return -EFAULT; -} - static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { @@ -65,23 +59,6 @@ static inline bool arch_has_pmem_api(void) return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); } -/* - * memcpy_from_pmem - read from persistent memory with error handling - * @dst: destination buffer - * @src: source buffer - * @size: transfer length - * - * Returns 0 on success negative error code on failure. - */ -static inline int memcpy_from_pmem(void *dst, void const *src, size_t size) -{ - if (arch_has_pmem_api()) - return arch_memcpy_from_pmem(dst, src, size); - else - memcpy(dst, src, size); - return 0; -} - /** * memcpy_to_pmem - copy data to persistent memory * @dst: destination buffer for the copy diff --git a/include/linux/poll.h b/include/linux/poll.h index a46d6755035e3c086bea24a1bad5a37e5bd0f8b6..75ffc5729e4c654bdccd142df3d3fbac0018d6e4 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -98,64 +98,8 @@ extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack); extern u64 select_estimate_accuracy(struct timespec64 *tv); - -static inline int poll_schedule(struct poll_wqueues *pwq, int state) -{ - return poll_schedule_timeout(pwq, state, NULL, 0); -} - -/* - * Scalable version of the fd_set. - */ - -typedef struct { - unsigned long *in, *out, *ex; - unsigned long *res_in, *res_out, *res_ex; -} fd_set_bits; - -/* - * How many longwords for "nr" bits? - */ -#define FDS_BITPERLONG (8*sizeof(long)) -#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) -#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) - -/* - * We do a VERIFY_WRITE here even though we are only reading this time: - * we'll write to it eventually.. - * - * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. - */ -static inline -int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) -{ - nr = FDS_BYTES(nr); - if (ufdset) - return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; - - memset(fdset, 0, nr); - return 0; -} - -static inline unsigned long __must_check -set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) -{ - if (ufdset) - return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); - return 0; -} - -static inline -void zero_fd_set(unsigned long nr, unsigned long *fdset) -{ - memset(fdset, 0, FDS_BYTES(nr)); -} - #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) -extern int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time); -extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, - struct timespec64 *end_time); extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec64 *end_time); diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 34c4498b800fc7b288bedceb375a7788d375f4d4..83b22ae9ae12a27a2855f0559cd227036a2accf6 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -59,23 +59,23 @@ struct posix_clock_operations { int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); - int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts); + int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); - int (*clock_getres) (struct posix_clock *pc, struct timespec *ts); + int (*clock_getres) (struct posix_clock *pc, struct timespec64 *ts); int (*clock_settime)(struct posix_clock *pc, - const struct timespec *ts); + const struct timespec64 *ts); int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit); int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit); void (*timer_gettime)(struct posix_clock *pc, - struct k_itimer *kit, struct itimerspec *tsp); + struct k_itimer *kit, struct itimerspec64 *tsp); int (*timer_settime)(struct posix_clock *pc, struct k_itimer *kit, int flags, - struct itimerspec *tsp, struct itimerspec *old); + struct itimerspec64 *tsp, struct itimerspec64 *old); /* * Optional character device methods: */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 64aa189efe21d6792b07f2b9005fcf5345b2e98c..8c1e43ab14a97e8645b7f981730938cb4e257b70 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -87,22 +87,22 @@ struct k_itimer { }; struct k_clock { - int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); + int (*clock_getres) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_set) (const clockid_t which_clock, - const struct timespec *tp); - int (*clock_get) (const clockid_t which_clock, struct timespec * tp); + const struct timespec64 *tp); + int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, - struct timespec *, struct timespec __user *); + struct timespec64 *, struct timespec __user *); long (*nsleep_restart) (struct restart_block *restart_block); - int (*timer_set) (struct k_itimer * timr, int flags, - struct itimerspec * new_setting, - struct itimerspec * old_setting); - int (*timer_del) (struct k_itimer * timr); + int (*timer_set) (struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting); + int (*timer_del) (struct k_itimer *timr); #define TIMER_RETRY 1 - void (*timer_get) (struct k_itimer * timr, - struct itimerspec * cur_setting); + void (*timer_get) (struct k_itimer *timr, + struct itimerspec64 *cur_setting); }; extern struct k_clock clock_posix_cpu; diff --git a/include/linux/power/bq24190_charger.h b/include/linux/power/bq24190_charger.h deleted file mode 100644 index 9f0283721cbcbdae0d4379672469944780935598..0000000000000000000000000000000000000000 --- a/include/linux/power/bq24190_charger.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Platform data for the TI bq24190 battery charger driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _BQ24190_CHARGER_H_ -#define _BQ24190_CHARGER_H_ - -struct bq24190_platform_data { - unsigned int gpio_int; /* GPIO pin that's connected to INT# */ -}; - -#endif diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index 522757ac9cd4d9975a5fe8bbbf144efb54551cd4..a7ed29baf44a6ce6c4ee01ba381655884090d6f2 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -24,8 +24,15 @@ #define __MAX17042_BATTERY_H_ #define MAX17042_STATUS_BattAbsent (1 << 3) -#define MAX17042_BATTERY_FULL (100) +#define MAX17042_BATTERY_FULL (95) /* Recommend. FullSOCThr value */ #define MAX17042_DEFAULT_SNS_RESISTOR (10000) +#define MAX17042_DEFAULT_VMIN (3000) +#define MAX17042_DEFAULT_VMAX (4500) /* LiHV cell max */ +#define MAX17042_DEFAULT_TEMP_MIN (0) /* For sys without temp sensor */ +#define MAX17042_DEFAULT_TEMP_MAX (700) /* 70 degrees Celcius */ + +/* Consider RepCap which is less then 10 units below FullCAP full */ +#define MAX17042_FULL_THRESHOLD 10 #define MAX17042_CHARACTERIZATION_DATA_SIZE 48 diff --git a/include/linux/printk.h b/include/linux/printk.h index 571257e0f53dc669c4f59df19923bd50dfd31a59..e10f274683222358f892096c3bda5fbf7e896015 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -198,7 +198,7 @@ extern void wake_up_klogd(void); char *log_buf_addr_get(void); u32 log_buf_len_get(void); -void log_buf_kexec_setup(void); +void log_buf_vmcoreinfo_setup(void); void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); @@ -246,7 +246,7 @@ static inline u32 log_buf_len_get(void) return 0; } -static inline void log_buf_kexec_setup(void) +static inline void log_buf_vmcoreinfo_setup(void) { } diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 12cb8bd81d2d12b734c83059fb0b0759ee0c92c3..58ab28d81fc2ecd4f78f37a81ac07a56d984f2e5 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -14,6 +14,7 @@ struct inode; struct proc_ns_operations { const char *name; + const char *real_ns_name; int type; struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); @@ -26,6 +27,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations pidns_for_children_operations; extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; extern const struct proc_ns_operations cgroupns_operations; diff --git a/include/linux/property.h b/include/linux/property.h index 64e3a9c6d95fed6662f75290486ef0c01d245f28..2f482616a2f22e2e26192a0a47a1c1b22d2bf015 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -33,6 +33,8 @@ enum dev_dma_attr { DEV_DMA_COHERENT, }; +struct fwnode_handle *dev_fwnode(struct device *dev); + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); @@ -70,6 +72,15 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode, int fwnode_property_match_string(struct fwnode_handle *fwnode, const char *propname, const char *string); +struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode, + struct fwnode_handle *child); + +#define fwnode_for_each_child_node(fwnode, child) \ + for (child = fwnode_get_next_child_node(fwnode, NULL); child; \ + child = fwnode_get_next_child_node(fwnode, child)) + struct fwnode_handle *device_get_next_child_node(struct device *dev, struct fwnode_handle *child); @@ -77,9 +88,12 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev, for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) +struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode, + const char *childname); struct fwnode_handle *device_get_named_child_node(struct device *dev, const char *childname); +void fwnode_handle_get(struct fwnode_handle *fwnode); void fwnode_handle_put(struct fwnode_handle *fwnode); unsigned int device_get_child_node_count(struct device *dev); @@ -258,4 +272,16 @@ int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); +struct fwnode_handle *fwnode_graph_get_next_endpoint( + struct fwnode_handle *fwnode, struct fwnode_handle *prev); +struct fwnode_handle *fwnode_graph_get_remote_port_parent( + struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_graph_get_remote_port( + struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_graph_get_remote_endpoint( + struct fwnode_handle *fwnode); + +int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode, + struct fwnode_endpoint *endpoint); + #endif /* _LINUX_PROPERTY_H_ */ diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 0da29cae009b186a9010736b2c9b84210ff7b3a2..e2233f50f4284826d2462ea900a2f430c8db8019 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -30,7 +30,9 @@ #include #include -/* types */ +struct module; + +/* pstore record types (see fs/pstore/inode.c for filename templates) */ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, PSTORE_TYPE_MCE = 1, @@ -45,42 +47,146 @@ enum pstore_type_id { PSTORE_TYPE_UNKNOWN = 255 }; -struct module; +struct pstore_info; +/** + * struct pstore_record - details of a pstore record entry + * @psi: pstore backend driver information + * @type: pstore record type + * @id: per-type unique identifier for record + * @time: timestamp of the record + * @buf: pointer to record contents + * @size: size of @buf + * @ecc_notice_size: + * ECC information for @buf + * + * Valid for PSTORE_TYPE_DMESG @type: + * + * @count: Oops count since boot + * @reason: kdump reason for notification + * @part: position in a multipart record + * @compressed: whether the buffer is compressed + * + */ +struct pstore_record { + struct pstore_info *psi; + enum pstore_type_id type; + u64 id; + struct timespec time; + char *buf; + ssize_t size; + ssize_t ecc_notice_size; + int count; + enum kmsg_dump_reason reason; + unsigned int part; + bool compressed; +}; + +/** + * struct pstore_info - backend pstore driver structure + * + * @owner: module which is repsonsible for this backend driver + * @name: name of the backend driver + * + * @buf_lock: spinlock to serialize access to @buf + * @buf: preallocated crash dump buffer + * @bufsize: size of @buf available for crash dump writes + * + * @read_mutex: serializes @open, @read, @close, and @erase callbacks + * @flags: bitfield of frontends the backend can accept writes for + * @data: backend-private pointer passed back during callbacks + * + * Callbacks: + * + * @open: + * Notify backend that pstore is starting a full read of backend + * records. Followed by one or more @read calls, and a final @close. + * + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. + * + * @close: + * Notify backend that pstore has finished a full read of backend + * records. Always preceded by an @open call and one or more @read + * calls. + * + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. (Though pstore will + * ignore the error.) + * + * @read: + * Read next available backend record. Called after a successful + * @open. + * + * @record: + * pointer to record to populate. @buf should be allocated + * by the backend and filled. At least @type and @id should + * be populated, since these are used when creating pstorefs + * file names. + * + * Returns record size on success, zero when no more records are + * available, or negative on error. + * + * @write: + * A newly generated record needs to be written to backend storage. + * + * @record: + * pointer to record metadata. When @type is PSTORE_TYPE_DMESG, + * @buf will be pointing to the preallocated @psi.buf, since + * memory allocation may be broken during an Oops. Regardless, + * @buf must be proccesed or copied before returning. The + * backend is also expected to write @id with something that + 8 can help identify this record to a future @erase callback. + * + * Returns 0 on success, and non-zero on error. + * + * @write_user: + * Perform a frontend write to a backend record, using a specified + * buffer that is coming directly from userspace, instead of the + * @record @buf. + * + * @record: pointer to record metadata. + * @buf: pointer to userspace contents to write to backend + * + * Returns 0 on success, and non-zero on error. + * + * @erase: + * Delete a record from backend storage. Different backends + * identify records differently, so entire original record is + * passed back to assist in identification of what the backend + * should remove from storage. + * + * @record: pointer to record metadata. + * + * Returns 0 on success, and non-zero on error. + * + */ struct pstore_info { struct module *owner; char *name; - spinlock_t buf_lock; /* serialize access to 'buf' */ + + spinlock_t buf_lock; char *buf; size_t bufsize; - struct mutex read_mutex; /* serialize open/read/close */ + + struct mutex read_mutex; + int flags; + void *data; + int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); - ssize_t (*read)(u64 *id, enum pstore_type_id *type, - int *count, struct timespec *time, char **buf, - bool *compressed, ssize_t *ecc_notice_size, - struct pstore_info *psi); - int (*write)(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, int count, bool compressed, - size_t size, struct pstore_info *psi); - int (*write_buf)(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, const char *buf, bool compressed, - size_t size, struct pstore_info *psi); - int (*write_buf_user)(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, const char __user *buf, - bool compressed, size_t size, struct pstore_info *psi); - int (*erase)(enum pstore_type_id type, u64 id, - int count, struct timespec time, - struct pstore_info *psi); - void *data; + ssize_t (*read)(struct pstore_record *record); + int (*write)(struct pstore_record *record); + int (*write_user)(struct pstore_record *record, + const char __user *buf); + int (*erase)(struct pstore_record *record); }; +/* Supported frontends */ #define PSTORE_FLAGS_DMESG (1 << 0) -#define PSTORE_FLAGS_FRAGILE PSTORE_FLAGS_DMESG #define PSTORE_FLAGS_CONSOLE (1 << 1) #define PSTORE_FLAGS_FTRACE (1 << 2) #define PSTORE_FLAGS_PMSG (1 << 3) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6c70444da3b9d3e5c31f04193b96986ff0e4b04b..6b2e0dd88569b13c66ef445609b2f12419fdd3ac 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -34,11 +34,13 @@ struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; - int consumer ____cacheline_aligned_in_smp; + int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ + int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ + int batch; /* number of entries to consume in a batch */ void **queue; }; @@ -170,7 +172,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) - return r->queue[r->consumer]; + return r->queue[r->consumer_head]; return NULL; } @@ -231,9 +233,38 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r) /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { - r->queue[r->consumer++] = NULL; - if (unlikely(r->consumer >= r->size)) - r->consumer = 0; + /* Fundamentally, what we want to do is update consumer + * index and zero out the entry so producer can reuse it. + * Doing it naively at each consume would be as simple as: + * r->queue[r->consumer++] = NULL; + * if (unlikely(r->consumer >= r->size)) + * r->consumer = 0; + * but that is suboptimal when the ring is full as producer is writing + * out new entries in the same cache line. Defer these updates until a + * batch of entries has been consumed. + */ + int head = r->consumer_head++; + + /* Once we have processed enough entries invalidate them in + * the ring all at once so producer can reuse their space in the ring. + * We also do this when we reach end of the ring - not mandatory + * but helps keep the implementation simple. + */ + if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || + r->consumer_head >= r->size)) { + /* Zero out entries in the reverse order: this way we touch the + * cache line that producer might currently be reading the last; + * producer won't make progress and touch other cache lines + * besides the first one until we write out all entries. + */ + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + } + if (unlikely(r->consumer_head >= r->size)) { + r->consumer_head = 0; + r->consumer_tail = 0; + } } static inline void *__ptr_ring_consume(struct ptr_ring *r) @@ -345,14 +376,27 @@ static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); } +static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) +{ + r->size = size; + r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); + /* We need to set batch at least to 1 to make logic + * in __ptr_ring_discard_one work correctly. + * Batching too much (because ring is small) would cause a lot of + * burstiness. Needs tuning, for now disable batching. + */ + if (r->batch > r->size / 2 || !r->batch) + r->batch = 1; +} + static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) { r->queue = __ptr_ring_init_queue_alloc(size, gfp); if (!r->queue) return -ENOMEM; - r->size = size; - r->producer = r->consumer = 0; + __ptr_ring_set_size(r, size); + r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); @@ -373,9 +417,10 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, else if (destroy) destroy(ptr); - r->size = size; + __ptr_ring_set_size(r, size); r->producer = producer; - r->consumer = 0; + r->consumer_head = 0; + r->consumer_tail = 0; old = r->queue; r->queue = queue; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 422bc2e4cb6a6fc47571d28cb0602d59477d7caf..ef3eb8bbfee482e04aa06c83b21fbc2ce02d50b3 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, - struct task_struct *new_parent); + struct task_struct *new_parent, + const struct cred *ptracer_cred); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 @@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) if (unlikely(ptrace) && current->ptrace) { child->ptrace = current->ptrace; - __ptrace_link(child, current->parent); + __ptrace_link(child, current->parent, current->ptracer_cred); if (child->ptrace & PT_SEIZED) task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); @@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) set_tsk_thread_flag(child, TIF_SIGPENDING); } + else + child->ptracer_cred = NULL; } /** diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index d32f6f1a5225f38de2ff8456a09b236028eb1de5..e5380471c2cd2a42edb8d0bbeb231d8b7320ddf0 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -40,6 +40,9 @@ extern int qcom_scm_pas_shutdown(u32 peripheral); extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); extern int qcom_scm_set_remote_state(u32 state, u32 id); +extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -67,5 +70,8 @@ static inline void qcom_scm_cpu_power_down(u32 flags) {} static inline u32 qcom_scm_get_version(void) { return 0; } static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } +static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } +static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } +static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } #endif #endif diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 52966b9bfde3740b506664bf2329596cf23695ef..fbab6e0514f07bf0f4a9ac481cb712c58d113b7f 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -100,8 +100,8 @@ #define MAX_NUM_LL2_TX_STATS_COUNTERS 32 #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 10 -#define FW_REVISION_VERSION 10 +#define FW_MINOR_VERSION 15 +#define FW_REVISION_VERSION 3 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -187,6 +187,9 @@ /* DEMS */ #define DQ_DEMS_LEGACY 0 +#define DQ_DEMS_TOE_MORE_TO_SEND 3 +#define DQ_DEMS_TOE_LOCAL_ADV_WND 4 +#define DQ_DEMS_ROCE_CQ_CONS 7 /* XCM agg val selection */ #define DQ_XCM_AGG_VAL_SEL_WORD2 0 @@ -214,6 +217,9 @@ #define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 #define DQ_XCM_ISCSI_EXP_STAT_SN_CMD DQ_XCM_AGG_VAL_SEL_REG6 #define DQ_XCM_ROCE_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_TOE_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_TOE_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 +#define DQ_XCM_TOE_LOCAL_ADV_WND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG4 /* UCM agg val selection (HW) */ #define DQ_UCM_AGG_VAL_SEL_WORD0 0 @@ -269,6 +275,8 @@ #define DQ_XCM_ISCSI_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) #define DQ_XCM_ISCSI_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) #define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_TOE_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_TOE_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) /* UCM agg counter flag selection (HW) */ #define DQ_UCM_AGG_FLG_SHIFT_CF0 0 @@ -285,6 +293,9 @@ #define DQ_UCM_ETH_PMD_RX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) #define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) #define DQ_UCM_ROCE_CQ_ARM_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_TOE_TIMER_STOP_ALL_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF3) +#define DQ_UCM_TOE_SLOW_PATH_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_TOE_DQ_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) /* TCM agg counter flag selection (HW) */ #define DQ_TCM_AGG_FLG_SHIFT_CF0 0 @@ -301,6 +312,9 @@ #define DQ_TCM_FCOE_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) #define DQ_TCM_ISCSI_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) #define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) +#define DQ_TCM_TOE_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) +#define DQ_TCM_TOE_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) +#define DQ_TCM_IWARP_POST_RQ_CF_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) /* PWM address mapping */ #define DQ_PWM_OFFSET_DPM_BASE 0x0 @@ -689,6 +703,16 @@ struct iscsi_eqe_data { #define ISCSI_EQE_DATA_RESERVED0_SHIFT 7 }; +struct rdma_eqe_destroy_qp { + __le32 cid; + u8 reserved[4]; +}; + +union rdma_eqe_data { + struct regpair async_handle; + struct rdma_eqe_destroy_qp rdma_destroy_qp_data; +}; + struct malicious_vf_eqe_data { u8 vf_id; u8 err_id; @@ -705,9 +729,9 @@ union event_ring_data { u8 bytes[8]; struct vf_pf_channel_eqe_data vf_pf_channel; struct iscsi_eqe_data iscsi_info; + union rdma_eqe_data rdma_data; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; - struct regpair roce_handle; }; /* Event Ring Entry */ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 4b402fb0eaad5fdf7bd29221d95596f092c0e945..34d93eb5bfba346019ba1d2c9014ab8a2fa5fd8f 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -49,6 +49,9 @@ #define ETH_RX_CQE_PAGE_SIZE_BYTES 4096 #define ETH_RX_NUM_NEXT_PAGE_BDS 2 +#define ETH_MAX_TUNN_LSO_INNER_IPV4_OFFSET 253 +#define ETH_MAX_TUNN_LSO_INNER_IPV6_OFFSET 251 + #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 #define ETH_TX_MAX_BDS_PER_LSO_PACKET 255 diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h index 2e417a45c5f7028ba1fc2b6201fc828f1fef4247..947a635d04bb57ff15a61f1ee82c41ae27c1d4e5 100644 --- a/include/linux/qed/fcoe_common.h +++ b/include/linux/qed/fcoe_common.h @@ -109,13 +109,6 @@ struct fcoe_conn_terminate_ramrod_data { struct regpair terminate_params_addr; }; -struct fcoe_fast_sgl_ctx { - struct regpair sgl_start_addr; - __le32 sgl_byte_offset; - __le16 task_reuse_cnt; - __le16 init_offset_in_first_sge; -}; - struct fcoe_slow_sgl_ctx { struct regpair base_sgl_addr; __le16 curr_sge_off; @@ -124,23 +117,16 @@ struct fcoe_slow_sgl_ctx { __le16 reserved; }; -struct fcoe_sge { - struct regpair sge_addr; - __le16 size; - __le16 reserved0; - u8 reserved1[3]; - u8 is_valid_sge; -}; - -union fcoe_data_desc_ctx { - struct fcoe_fast_sgl_ctx fast; - struct fcoe_slow_sgl_ctx slow; - struct fcoe_sge single_sge; -}; - union fcoe_dix_desc_ctx { struct fcoe_slow_sgl_ctx dix_sgl; - struct fcoe_sge cached_dix_sge; + struct scsi_sge cached_dix_sge; +}; + +struct fcoe_fast_sgl_ctx { + struct regpair sgl_start_addr; + __le32 sgl_byte_offset; + __le16 task_reuse_cnt; + __le16 init_offset_in_first_sge; }; struct fcoe_fcp_cmd_payload { @@ -172,57 +158,6 @@ enum fcoe_mode_type { MAX_FCOE_MODE_TYPE }; -struct fcoe_mstorm_fcoe_task_st_ctx_fp { - __le16 flags; -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_MASK 0x7FFF -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_SHIFT 0 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_SHIFT 15 - __le16 difDataResidue; - __le16 parent_id; - __le16 single_sge_saved_offset; - __le32 data_2_trns_rem; - __le32 offset_in_io; - union fcoe_dix_desc_ctx dix_desc; - union fcoe_data_desc_ctx data_desc; -}; - -struct fcoe_mstorm_fcoe_task_st_ctx_non_fp { - __le16 flags; -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_MASK 0x3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_SHIFT 0 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_SHIFT 2 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_SHIFT 3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_MASK 0xF -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_SHIFT 4 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_MASK 0x3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_SHIFT 8 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_SHIFT 10 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_SHIFT 11 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_SHIFT 12 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_SHIFT 13 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_SHIFT 14 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_SHIFT 15 - u8 tx_rx_sgl_mode; -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_MASK 0x7 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_SHIFT 0 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_MASK 0x7 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_SHIFT 3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_MASK 0x3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_SHIFT 6 - u8 rsrv2; - __le32 num_prm_zero_read; - struct regpair rsp_buf_addr; -}; - struct fcoe_rx_stat { struct regpair fcoe_rx_byte_cnt; struct regpair fcoe_rx_data_pkt_cnt; @@ -236,16 +171,6 @@ struct fcoe_rx_stat { __le32 rsrv; }; -enum fcoe_sgl_mode { - FCOE_SLOW_SGL, - FCOE_SINGLE_FAST_SGE, - FCOE_2_FAST_SGE, - FCOE_3_FAST_SGE, - FCOE_4_FAST_SGE, - FCOE_MUL_FAST_SGES, - MAX_FCOE_SGL_MODE -}; - struct fcoe_stat_ramrod_data { struct regpair stat_params_addr; }; @@ -328,22 +253,24 @@ union fcoe_tx_info_union_ctx { struct ystorm_fcoe_task_st_ctx { u8 task_type; u8 sgl_mode; -#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x7 +#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x1 #define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 0 -#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK 0x1F -#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT 3 +#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK 0x7F +#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT 1 u8 cached_dix_sge; u8 expect_first_xfer; __le32 num_pbf_zero_write; union protection_info_union_ctx protection_info_union; __le32 data_2_trns_rem; + struct scsi_sgl_params sgl_params; + u8 reserved1[12]; union fcoe_tx_info_union_ctx tx_info_union; union fcoe_dix_desc_ctx dix_desc; - union fcoe_data_desc_ctx data_desc; + struct scsi_cached_sges data_desc; __le16 ox_id; __le16 rx_id; __le32 task_rety_identifier; - __le32 reserved1[2]; + u8 reserved2[8]; }; struct ystorm_fcoe_task_ag_ctx { @@ -484,22 +411,22 @@ struct tstorm_fcoe_task_ag_ctx { struct fcoe_tstorm_fcoe_task_st_ctx_read_write { union fcoe_cleanup_addr_exp_ro_union cleanup_addr_exp_ro_union; __le16 flags; -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK 0x7 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK 0x1 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_SHIFT 0 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT 3 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT 1 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT 4 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT 2 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT 5 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT 3 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 6 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 4 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT 7 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT 5 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_MASK 0x3 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT 8 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK 0x3F -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT 10 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT 6 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK 0xFF +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT 8 __le16 seq_cnt; u8 seq_id; u8 ooo_rx_seq_id; @@ -582,8 +509,34 @@ struct mstorm_fcoe_task_ag_ctx { }; struct mstorm_fcoe_task_st_ctx { - struct fcoe_mstorm_fcoe_task_st_ctx_non_fp non_fp; - struct fcoe_mstorm_fcoe_task_st_ctx_fp fp; + struct regpair rsp_buf_addr; + __le32 rsrv[2]; + struct scsi_sgl_params sgl_params; + __le32 data_2_trns_rem; + __le32 data_buffer_offset; + __le16 parent_id; + __le16 flags; +#define MSTORM_FCOE_TASK_ST_CTX_INTERVAL_SIZE_LOG_MASK 0xF +#define MSTORM_FCOE_TASK_ST_CTX_INTERVAL_SIZE_LOG_SHIFT 0 +#define MSTORM_FCOE_TASK_ST_CTX_HOST_INTERFACE_MASK 0x3 +#define MSTORM_FCOE_TASK_ST_CTX_HOST_INTERFACE_SHIFT 4 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_TO_PEER_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_TO_PEER_SHIFT 6 +#define MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER_SHIFT 7 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_BLOCK_SIZE_MASK 0x3 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_BLOCK_SIZE_SHIFT 8 +#define MSTORM_FCOE_TASK_ST_CTX_VALIDATE_DIX_REF_TAG_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_VALIDATE_DIX_REF_TAG_SHIFT 10 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_CACHED_SGE_FLG_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_CACHED_SGE_FLG_SHIFT 11 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_SUPPORTED_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_SUPPORTED_SHIFT 12 +#define MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 13 +#define MSTORM_FCOE_TASK_ST_CTX_RESERVED_MASK 0x3 +#define MSTORM_FCOE_TASK_ST_CTX_RESERVED_SHIFT 14 + struct scsi_cached_sges data_desc; }; struct ustorm_fcoe_task_ag_ctx { @@ -646,6 +599,7 @@ struct ustorm_fcoe_task_ag_ctx { struct fcoe_task_context { struct ystorm_fcoe_task_st_ctx ystorm_st_context; + struct regpair ystorm_st_padding[2]; struct tdif_task_context tdif_context; struct ystorm_fcoe_task_ag_ctx ystorm_ag_context; struct tstorm_fcoe_task_ag_ctx tstorm_ag_context; @@ -668,20 +622,20 @@ struct fcoe_tx_stat { struct fcoe_wqe { __le16 task_id; __le16 flags; -#define FCOE_WQE_REQ_TYPE_MASK 0xF -#define FCOE_WQE_REQ_TYPE_SHIFT 0 -#define FCOE_WQE_SGL_MODE_MASK 0x7 -#define FCOE_WQE_SGL_MODE_SHIFT 4 -#define FCOE_WQE_CONTINUATION_MASK 0x1 -#define FCOE_WQE_CONTINUATION_SHIFT 7 -#define FCOE_WQE_INVALIDATE_PTU_MASK 0x1 -#define FCOE_WQE_INVALIDATE_PTU_SHIFT 8 -#define FCOE_WQE_SUPER_IO_MASK 0x1 -#define FCOE_WQE_SUPER_IO_SHIFT 9 -#define FCOE_WQE_SEND_AUTO_RSP_MASK 0x1 -#define FCOE_WQE_SEND_AUTO_RSP_SHIFT 10 -#define FCOE_WQE_RESERVED0_MASK 0x1F -#define FCOE_WQE_RESERVED0_SHIFT 11 +#define FCOE_WQE_REQ_TYPE_MASK 0xF +#define FCOE_WQE_REQ_TYPE_SHIFT 0 +#define FCOE_WQE_SGL_MODE_MASK 0x1 +#define FCOE_WQE_SGL_MODE_SHIFT 4 +#define FCOE_WQE_CONTINUATION_MASK 0x1 +#define FCOE_WQE_CONTINUATION_SHIFT 5 +#define FCOE_WQE_SEND_AUTO_RSP_MASK 0x1 +#define FCOE_WQE_SEND_AUTO_RSP_SHIFT 6 +#define FCOE_WQE_RESERVED_MASK 0x1 +#define FCOE_WQE_RESERVED_SHIFT 7 +#define FCOE_WQE_NUM_SGES_MASK 0xF +#define FCOE_WQE_NUM_SGES_SHIFT 8 +#define FCOE_WQE_RESERVED1_MASK 0xF +#define FCOE_WQE_RESERVED1_SHIFT 12 union fcoe_additional_info_union additional_info_union; }; diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 4c5747babcf63ff32b8db664146df40545d986d2..69949f8e354b0447c7950884bd205622a4653ab2 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -39,17 +39,9 @@ /* iSCSI HSI constants */ #define ISCSI_DEFAULT_MTU (1500) -/* Current iSCSI HSI version number composed of two fields (16 bit) */ -#define ISCSI_HSI_MAJOR_VERSION (0) -#define ISCSI_HSI_MINOR_VERSION (0) - /* KWQ (kernel work queue) layer codes */ #define ISCSI_SLOW_PATH_LAYER_CODE (6) -/* CQE completion status */ -#define ISCSI_EQE_COMPLETION_SUCCESS (0x0) -#define ISCSI_EQE_RST_CONN_RCVD (0x1) - /* iSCSI parameter defaults */ #define ISCSI_DEFAULT_HEADER_DIGEST (0) #define ISCSI_DEFAULT_DATA_DIGEST (0) @@ -68,6 +60,10 @@ #define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T (1) #define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T (0xff) +#define ISCSI_AHS_CNTL_SIZE 4 + +#define ISCSI_WQE_NUM_SGES_SLOWIO (0xf) + /* iSCSI reserved params */ #define ISCSI_ITT_ALL_ONES (0xffffffff) #define ISCSI_TTT_ALL_ONES (0xffffffff) @@ -173,19 +169,6 @@ struct iscsi_async_msg_hdr { __le32 reserved7; }; -struct iscsi_sge { - struct regpair sge_addr; - __le16 sge_len; - __le16 reserved0; - __le32 reserved1; -}; - -struct iscsi_cached_sge_ctx { - struct iscsi_sge sge; - struct regpair reserved; - __le32 dsgl_curr_offset[2]; -}; - struct iscsi_cmd_hdr { __le16 reserved1; u8 flags_attr; @@ -229,8 +212,13 @@ struct iscsi_common_hdr { #define ISCSI_COMMON_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 lun_reserved[4]; - __le32 data[6]; + struct regpair lun_reserved; + __le32 itt; + __le32 ttt; + __le32 cmdstat_sn; + __le32 exp_statcmd_sn; + __le32 max_cmd_sn; + __le32 data[3]; }; struct iscsi_conn_offload_params { @@ -246,8 +234,10 @@ struct iscsi_conn_offload_params { #define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0 #define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK 0x1 #define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT 1 -#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0x3F -#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 2 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_MASK 0x1 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_SHIFT 2 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0x1F +#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 3 u8 pbl_page_size_log; u8 pbe_page_size_log; u8 default_cq; @@ -278,8 +268,12 @@ struct iscsi_conn_update_ramrod_params { #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_SHIFT 2 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_MASK 0x1 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_SHIFT 3 -#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK 0xF -#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT 4 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_BLOCK_SIZE_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_BLOCK_SIZE_SHIFT 4 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_HOST_EN_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_HOST_EN_SHIFT 5 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK 0x3 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT 6 u8 reserved0[3]; __le32 max_seq_size; __le32 max_send_pdu_length; @@ -312,7 +306,7 @@ struct iscsi_ext_cdb_cmd_hdr { __le32 expected_transfer_length; __le32 cmd_sn; __le32 exp_stat_sn; - struct iscsi_sge cdb_sge; + struct scsi_sge cdb_sge; }; struct iscsi_login_req_hdr { @@ -519,8 +513,8 @@ struct iscsi_logout_response_hdr { __le32 exp_cmd_sn; __le32 max_cmd_sn; __le32 reserved4; - __le16 time2retain; - __le16 time2wait; + __le16 time_2_retain; + __le16 time_2_wait; __le32 reserved5[1]; }; @@ -602,7 +596,7 @@ struct iscsi_tmf_response_hdr { #define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 struct regpair reserved0; __le32 itt; - __le32 rtt; + __le32 reserved1; __le32 stat_sn; __le32 exp_cmd_sn; __le32 max_cmd_sn; @@ -641,7 +635,7 @@ struct iscsi_reject_hdr { #define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_SHIFT 24 struct regpair reserved0; - __le32 reserved1; + __le32 all_ones; __le32 reserved2; __le32 stat_sn; __le32 exp_cmd_sn; @@ -688,7 +682,9 @@ struct iscsi_cqe_solicited { __le16 itid; u8 task_type; u8 fw_dbg_field; - __le32 reserved1[2]; + u8 caused_conn_err; + u8 reserved0[3]; + __le32 reserved1[1]; union iscsi_task_hdr iscsi_hdr; }; @@ -727,35 +723,6 @@ enum iscsi_cqe_unsolicited_type { MAX_ISCSI_CQE_UNSOLICITED_TYPE }; -struct iscsi_virt_sgl_ctx { - struct regpair sgl_base; - struct regpair dsgl_base; - __le32 sgl_initial_offset; - __le32 dsgl_initial_offset; - __le32 dsgl_curr_offset[2]; -}; - -struct iscsi_sgl_var_params { - u8 sgl_ptr; - u8 dsgl_ptr; - __le16 sge_offset; - __le16 dsge_offset; -}; - -struct iscsi_phys_sgl_ctx { - struct regpair sgl_base; - struct regpair dsgl_base; - u8 sgl_size; - u8 dsgl_size; - __le16 reserved; - struct iscsi_sgl_var_params var_params[2]; -}; - -union iscsi_data_desc_ctx { - struct iscsi_virt_sgl_ctx virt_sgl; - struct iscsi_phys_sgl_ctx phys_sgl; - struct iscsi_cached_sge_ctx cached_sge; -}; struct iscsi_debug_modes { u8 flags; @@ -771,8 +738,10 @@ struct iscsi_debug_modes { #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_SHIFT 4 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_MASK 0x1 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_SHIFT 5 -#define ISCSI_DEBUG_MODES_RESERVED0_MASK 0x3 -#define ISCSI_DEBUG_MODES_RESERVED0_SHIFT 6 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DATA_DIGEST_ERROR_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DATA_DIGEST_ERROR_SHIFT 6 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DIF_ERROR_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DIF_ERROR_SHIFT 7 }; struct iscsi_dif_flags { @@ -806,7 +775,6 @@ enum iscsi_eqe_opcode { ISCSI_EVENT_TYPE_ASYN_FIN_WAIT2, ISCSI_EVENT_TYPE_ISCSI_CONN_ERROR, ISCSI_EVENT_TYPE_TCP_CONN_ERROR, - ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES, MAX_ISCSI_EQE_OPCODE }; @@ -856,31 +824,11 @@ enum iscsi_error_types { ISCSI_CONN_ERROR_PROTOCOL_ERR_DIF_TX, ISCSI_CONN_ERROR_SENSE_DATA_LENGTH, ISCSI_CONN_ERROR_DATA_PLACEMENT_ERROR, + ISCSI_CONN_ERROR_INVALID_ITT, ISCSI_ERROR_UNKNOWN, MAX_ISCSI_ERROR_TYPES }; -struct iscsi_mflags { - u8 mflags; -#define ISCSI_MFLAGS_SLOW_IO_MASK 0x1 -#define ISCSI_MFLAGS_SLOW_IO_SHIFT 0 -#define ISCSI_MFLAGS_SINGLE_SGE_MASK 0x1 -#define ISCSI_MFLAGS_SINGLE_SGE_SHIFT 1 -#define ISCSI_MFLAGS_RESERVED_MASK 0x3F -#define ISCSI_MFLAGS_RESERVED_SHIFT 2 -}; - -struct iscsi_sgl { - struct regpair sgl_addr; - __le16 updated_sge_size; - __le16 updated_sge_offset; - __le32 byte_offset; -}; - -union iscsi_mstorm_sgl { - struct iscsi_sgl sgl_struct; - struct iscsi_sge single_sge; -}; enum iscsi_ramrod_cmd_id { ISCSI_RAMROD_CMD_ID_UNUSED = 0, @@ -896,10 +844,10 @@ enum iscsi_ramrod_cmd_id { struct iscsi_reg1 { __le32 reg1_map; -#define ISCSI_REG1_NUM_FAST_SGES_MASK 0x7 -#define ISCSI_REG1_NUM_FAST_SGES_SHIFT 0 -#define ISCSI_REG1_RESERVED1_MASK 0x1FFFFFFF -#define ISCSI_REG1_RESERVED1_SHIFT 3 +#define ISCSI_REG1_NUM_SGES_MASK 0xF +#define ISCSI_REG1_NUM_SGES_SHIFT 0 +#define ISCSI_REG1_RESERVED1_MASK 0xFFFFFFF +#define ISCSI_REG1_RESERVED1_SHIFT 4 }; union iscsi_seq_num { @@ -967,22 +915,33 @@ struct iscsi_spe_func_init { }; struct ystorm_iscsi_task_state { - union iscsi_data_desc_ctx sgl_ctx_union; - __le32 buffer_offset[2]; - __le16 bytes_nxt_dif; - __le16 rxmit_bytes_nxt_dif; - union iscsi_seq_num seq_num_union; - u8 dif_bytes_leftover; - u8 rxmit_dif_bytes_leftover; - __le16 reuse_count; - struct iscsi_dif_flags dif_flags; - u8 local_comp; + struct scsi_cached_sges data_desc; + struct scsi_sgl_params sgl_params; __le32 exp_r2t_sn; - __le32 sgl_offset[2]; + __le32 buffer_offset; + union iscsi_seq_num seq_num; + struct iscsi_dif_flags dif_flags; + u8 flags; +#define YSTORM_ISCSI_TASK_STATE_LOCAL_COMP_MASK 0x1 +#define YSTORM_ISCSI_TASK_STATE_LOCAL_COMP_SHIFT 0 +#define YSTORM_ISCSI_TASK_STATE_SLOW_IO_MASK 0x1 +#define YSTORM_ISCSI_TASK_STATE_SLOW_IO_SHIFT 1 +#define YSTORM_ISCSI_TASK_STATE_RESERVED0_MASK 0x3F +#define YSTORM_ISCSI_TASK_STATE_RESERVED0_SHIFT 2 +}; + +struct ystorm_iscsi_task_rxmit_opt { + __le32 fast_rxmit_sge_offset; + __le32 scan_start_buffer_offset; + __le32 fast_rxmit_buffer_offset; + u8 scan_start_sgl_index; + u8 fast_rxmit_sgl_index; + __le16 reserved; }; struct ystorm_iscsi_task_st_ctx { struct ystorm_iscsi_task_state state; + struct ystorm_iscsi_task_rxmit_opt rxmit_opt; union iscsi_task_hdr pdu_hdr; }; @@ -1152,25 +1111,16 @@ struct ustorm_iscsi_task_ag_ctx { }; struct mstorm_iscsi_task_st_ctx { - union iscsi_mstorm_sgl sgl_union; - struct iscsi_dif_flags dif_flags; - struct iscsi_mflags flags; - u8 sgl_size; - u8 host_sge_index; - __le16 dix_cur_sge_offset; - __le16 dix_cur_sge_size; - __le32 data_offset_rtid; - u8 dif_offset; - u8 dix_sgl_size; - u8 dix_sge_index; + struct scsi_cached_sges data_desc; + struct scsi_sgl_params sgl_params; + __le32 rem_task_size; + __le32 data_buffer_offset; u8 task_type; + struct iscsi_dif_flags dif_flags; + u8 reserved0[2]; struct regpair sense_db; - struct regpair dix_sgl_cur_sge; - __le32 rem_task_size; - __le16 reuse_count; - __le16 dif_data_residue; - u8 reserved0[4]; - __le32 reserved1[1]; + __le32 expected_itt; + __le32 reserved1; }; struct ustorm_iscsi_task_st_ctx { @@ -1184,7 +1134,7 @@ struct ustorm_iscsi_task_st_ctx { #define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_SHIFT 0 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_MASK 0x7F #define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_SHIFT 1 - u8 reserved2; + struct iscsi_dif_flags dif_flags; __le16 reserved3; __le32 reserved4; __le32 reserved5; @@ -1207,10 +1157,10 @@ struct ustorm_iscsi_task_st_ctx { #define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_SHIFT 2 #define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK 0x1 #define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT 3 -#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_MASK 0x1 -#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_SHIFT 4 -#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_MASK 0x1 -#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_SHIFT 5 +#define USTORM_ISCSI_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_SHIFT 4 +#define USTORM_ISCSI_TASK_ST_CTX_HQ_SCANNED_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_HQ_SCANNED_DONE_SHIFT 5 #define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_MASK 0x1 #define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_SHIFT 6 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_MASK 0x1 @@ -1220,7 +1170,6 @@ struct ustorm_iscsi_task_st_ctx { struct iscsi_task_context { struct ystorm_iscsi_task_st_ctx ystorm_st_context; - struct regpair ystorm_st_padding[2]; struct ystorm_iscsi_task_ag_ctx ystorm_ag_context; struct regpair ystorm_ag_padding[2]; struct tdif_task_context tdif_context; @@ -1272,32 +1221,22 @@ struct iscsi_uhqe { #define ISCSI_UHQE_TASK_ID_LO_SHIFT 24 }; -struct iscsi_wqe_field { - __le32 contlen_cdbsize_field; -#define ISCSI_WQE_FIELD_CONT_LEN_MASK 0xFFFFFF -#define ISCSI_WQE_FIELD_CONT_LEN_SHIFT 0 -#define ISCSI_WQE_FIELD_CDB_SIZE_MASK 0xFF -#define ISCSI_WQE_FIELD_CDB_SIZE_SHIFT 24 -}; - -union iscsi_wqe_field_union { - struct iscsi_wqe_field cont_field; - __le32 prev_tid; -}; struct iscsi_wqe { __le16 task_id; u8 flags; #define ISCSI_WQE_WQE_TYPE_MASK 0x7 #define ISCSI_WQE_WQE_TYPE_SHIFT 0 -#define ISCSI_WQE_NUM_FAST_SGES_MASK 0x7 -#define ISCSI_WQE_NUM_FAST_SGES_SHIFT 3 -#define ISCSI_WQE_PTU_INVALIDATE_MASK 0x1 -#define ISCSI_WQE_PTU_INVALIDATE_SHIFT 6 +#define ISCSI_WQE_NUM_SGES_MASK 0xF +#define ISCSI_WQE_NUM_SGES_SHIFT 3 #define ISCSI_WQE_RESPONSE_MASK 0x1 #define ISCSI_WQE_RESPONSE_SHIFT 7 struct iscsi_dif_flags prot_flags; - union iscsi_wqe_field_union cont_prevtid_union; + __le32 contlen_cdbsize; +#define ISCSI_WQE_CONT_LEN_MASK 0xFFFFFF +#define ISCSI_WQE_CONT_LEN_SHIFT 0 +#define ISCSI_WQE_CDB_SIZE_MASK 0xFF +#define ISCSI_WQE_CDB_SIZE_SHIFT 24 }; enum iscsi_wqe_type { @@ -1318,17 +1257,15 @@ struct iscsi_xhqe { u8 total_ahs_length; u8 opcode; u8 flags; -#define ISCSI_XHQE_NUM_FAST_SGES_MASK 0x7 -#define ISCSI_XHQE_NUM_FAST_SGES_SHIFT 0 -#define ISCSI_XHQE_FINAL_MASK 0x1 -#define ISCSI_XHQE_FINAL_SHIFT 3 -#define ISCSI_XHQE_SUPER_IO_MASK 0x1 -#define ISCSI_XHQE_SUPER_IO_SHIFT 4 -#define ISCSI_XHQE_STATUS_BIT_MASK 0x1 -#define ISCSI_XHQE_STATUS_BIT_SHIFT 5 -#define ISCSI_XHQE_RESERVED_MASK 0x3 -#define ISCSI_XHQE_RESERVED_SHIFT 6 - union iscsi_seq_num seq_num_union; +#define ISCSI_XHQE_FINAL_MASK 0x1 +#define ISCSI_XHQE_FINAL_SHIFT 0 +#define ISCSI_XHQE_STATUS_BIT_MASK 0x1 +#define ISCSI_XHQE_STATUS_BIT_SHIFT 1 +#define ISCSI_XHQE_NUM_SGES_MASK 0xF +#define ISCSI_XHQE_NUM_SGES_SHIFT 2 +#define ISCSI_XHQE_RESERVED0_MASK 0x3 +#define ISCSI_XHQE_RESERVED0_SHIFT 6 + union iscsi_seq_num seq_num; __le16 reserved1; }; diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 4cd1f0ccfa367a5215dd121ca22efa6267e7c51e..d66d16a559e19e37516abe08939fa9e0a712aa60 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -158,15 +158,27 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; void (*force_mac) (void *dev, u8 *mac, bool forced); + void (*ports_update)(void *dev, u16 vxlan_port, u16 geneve_port); }; #define QED_MAX_PHC_DRIFT_PPB 291666666 enum qed_ptp_filter_type { - QED_PTP_FILTER_L2, - QED_PTP_FILTER_IPV4, - QED_PTP_FILTER_IPV4_IPV6, - QED_PTP_FILTER_L2_IPV4_IPV6 + QED_PTP_FILTER_NONE, + QED_PTP_FILTER_ALL, + QED_PTP_FILTER_V1_L4_EVENT, + QED_PTP_FILTER_V1_L4_GEN, + QED_PTP_FILTER_V2_L4_EVENT, + QED_PTP_FILTER_V2_L4_GEN, + QED_PTP_FILTER_V2_L2_EVENT, + QED_PTP_FILTER_V2_L2_GEN, + QED_PTP_FILTER_V2_EVENT, + QED_PTP_FILTER_V2_GEN +}; + +enum qed_ptp_hwtstamp_tx_type { + QED_PTP_HWTSTAMP_TX_OFF, + QED_PTP_HWTSTAMP_TX_ON, }; #ifdef CONFIG_DCB @@ -229,8 +241,8 @@ struct qed_eth_dcbnl_ops { #endif struct qed_eth_ptp_ops { - int (*hwtstamp_tx_on)(struct qed_dev *); - int (*cfg_rx_filters)(struct qed_dev *, enum qed_ptp_filter_type); + int (*cfg_filters)(struct qed_dev *, enum qed_ptp_filter_type, + enum qed_ptp_hwtstamp_tx_type); int (*read_rx_ts)(struct qed_dev *, u64 *); int (*read_tx_ts)(struct qed_dev *, u64 *); int (*read_cc)(struct qed_dev *, u64 *); @@ -301,6 +313,14 @@ struct qed_eth_ops { int (*tunn_config)(struct qed_dev *cdev, struct qed_tunn_params *params); + + int (*ntuple_filter_config)(struct qed_dev *cdev, void *cookie, + dma_addr_t mapping, u16 length, + u16 vport_id, u16 rx_queue_id, + bool add_filter); + + int (*configure_arfs_searcher)(struct qed_dev *cdev, + bool en_searcher); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index fde56c436f7177e27173656f4fa4480752351c6d..c70ac13a97e66e9ded8a81f2716228619f55a98c 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -144,6 +144,7 @@ struct qed_dcbx_operational_params { bool enabled; bool ieee; bool cee; + bool local; u32 err; }; @@ -178,6 +179,12 @@ struct qed_eth_pf_params { * to update_pf_params routine invoked before slowpath start */ u16 num_cons; + + /* To enable arfs, previous to HW-init a positive number needs to be + * set [as filters require allocated searcher ILT memory]. + * This will set the maximal number of configured steering-filters. + */ + u32 num_arfs_filters; }; struct qed_fcoe_pf_params { @@ -263,7 +270,6 @@ struct qed_rdma_pf_params { * the doorbell BAR). */ u32 min_dpis; /* number of requested DPIs */ - u32 num_mrs; /* number of requested memory regions */ u32 num_qps; /* number of requested Queue Pairs */ u32 num_srqs; /* number of requested SRQ */ u8 roce_edpm_mode; /* see QED_ROCE_EDPM_MODE_ENABLE */ @@ -300,6 +306,11 @@ struct qed_sb_info { struct qed_dev *cdev; }; +enum qed_dev_type { + QED_DEV_TYPE_BB, + QED_DEV_TYPE_AH, +}; + struct qed_dev_info { unsigned long pci_mem_start; unsigned long pci_mem_end; @@ -325,6 +336,13 @@ struct qed_dev_info { u16 mtu; bool wol_support; + + enum qed_dev_type dev_type; + + /* Output parameters for qede */ + bool vxlan_enable; + bool gre_enable; + bool geneve_enable; }; enum qed_sb_type { @@ -421,6 +439,7 @@ struct qed_int_info { }; struct qed_common_cb_ops { + void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); @@ -616,7 +635,7 @@ struct qed_common_ops { * @return 0 on success, error otherwise. */ int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u8 qid, u16 sb_id); + u16 qid, u16 sb_id); /** * @brief set_led - Configure LED mode @@ -752,7 +771,7 @@ enum qed_mf_mode { QED_MF_NPAR, }; -struct qed_eth_stats { +struct qed_eth_stats_common { u64 no_buff_discards; u64 packet_too_big_discard; u64 ttl0_discard; @@ -784,11 +803,6 @@ struct qed_eth_stats { u64 rx_256_to_511_byte_packets; u64 rx_512_to_1023_byte_packets; u64 rx_1024_to_1518_byte_packets; - u64 rx_1519_to_1522_byte_packets; - u64 rx_1519_to_2047_byte_packets; - u64 rx_2048_to_4095_byte_packets; - u64 rx_4096_to_9216_byte_packets; - u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; @@ -805,14 +819,8 @@ struct qed_eth_stats { u64 tx_256_to_511_byte_packets; u64 tx_512_to_1023_byte_packets; u64 tx_1024_to_1518_byte_packets; - u64 tx_1519_to_2047_byte_packets; - u64 tx_2048_to_4095_byte_packets; - u64 tx_4096_to_9216_byte_packets; - u64 tx_9217_to_16383_byte_packets; u64 tx_pause_frames; u64 tx_pfc_frames; - u64 tx_lpi_entry_count; - u64 tx_total_collisions; u64 brb_truncates; u64 brb_discards; u64 rx_mac_bytes; @@ -827,6 +835,34 @@ struct qed_eth_stats { u64 tx_mac_ctrl_frames; }; +struct qed_eth_stats_bb { + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; + u64 tx_1519_to_2047_byte_packets; + u64 tx_2048_to_4095_byte_packets; + u64 tx_4096_to_9216_byte_packets; + u64 tx_9217_to_16383_byte_packets; + u64 tx_lpi_entry_count; + u64 tx_total_collisions; +}; + +struct qed_eth_stats_ah { + u64 rx_1519_to_max_byte_packets; + u64 tx_1519_to_max_byte_packets; +}; + +struct qed_eth_stats { + struct qed_eth_stats_common common; + + union { + struct qed_eth_stats_bb bb; + struct qed_eth_stats_ah ah; + }; +}; + #define QED_SB_IDX 0x0002 #define RX_PI 0 diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index f70bb81b8b6acfda1701ffcd073163db05a82770..3414649133d2e44fe36b9c12e12f42ba6970ed9d 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -67,6 +67,8 @@ struct qed_dev_iscsi_info { void __iomem *primary_dbq_rq_addr; void __iomem *secondary_bdq_rq_addr; + + u8 num_cqs; }; struct qed_iscsi_id_params { diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index f742d4312c9d96f1498554edee09654e065e9e74..cbb2ff0ce4bc34c5179a5ad77167ac0b721f6a30 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -240,6 +240,7 @@ struct qed_rdma_add_user_out_params { u64 dpi_addr; u64 dpi_phys_addr; u32 dpi_size; + u16 wid_count; }; enum roce_mode { @@ -533,6 +534,7 @@ enum qed_rdma_type { struct qed_dev_rdma_info { struct qed_dev_info common; enum qed_rdma_type rdma_type; + u8 user_dpm_enabled; }; struct qed_rdma_ops { diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index f773aa5e746ff47bb886aa19f568a24108dd0d1e..72c770f9f6669a5169f1780f8cae524bc6c3e0b4 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -52,7 +52,8 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS -#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB +#define RDMA_NUM_STATISTIC_COUNTERS_K2 MAX_NUM_VPORTS_K2 +#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB #define RDMA_TASK_TYPE (PROTOCOLID_ROCE) diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index bad02df213dfccd11cd25fa1b1e0decaf17f92c6..866f063026dedc6540d87d595bcacf9071f14681 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -38,4 +38,21 @@ #define ROCE_MAX_QPS (32 * 1024) +enum roce_async_events_type { + ROCE_ASYNC_EVENT_NONE = 0, + ROCE_ASYNC_EVENT_COMM_EST = 1, + ROCE_ASYNC_EVENT_SQ_DRAINED, + ROCE_ASYNC_EVENT_SRQ_LIMIT, + ROCE_ASYNC_EVENT_LAST_WQE_REACHED, + ROCE_ASYNC_EVENT_CQ_ERR, + ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR, + ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR, + ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR, + ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR, + ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR, + ROCE_ASYNC_EVENT_SRQ_EMPTY, + ROCE_ASYNC_EVENT_DESTROY_QP_DONE, + MAX_ROCE_ASYNC_EVENTS_TYPE +}; + #endif /* __ROCE_COMMON__ */ diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h index 03f3e37ab059d5e4b48aa2b7f80366016b27fdf5..08df82a096b62de80e51f34122f4809f0eb6d27e 100644 --- a/include/linux/qed/storage_common.h +++ b/include/linux/qed/storage_common.h @@ -40,6 +40,8 @@ #define BDQ_ID_IMM_DATA (1) #define BDQ_NUM_IDS (2) +#define SCSI_NUM_SGES_SLOW_SGL_THR 8 + #define BDQ_MAX_EXTERNAL_RING_SIZE (1 << 15) struct scsi_bd { @@ -52,6 +54,16 @@ struct scsi_bdq_ram_drv_data { __le16 reserved0[3]; }; +struct scsi_sge { + struct regpair sge_addr; + __le32 sge_len; + __le32 reserved; +}; + +struct scsi_cached_sges { + struct scsi_sge sge[4]; +}; + struct scsi_drv_cmdq { __le16 cmdq_cons; __le16 reserved0; @@ -99,11 +111,19 @@ struct scsi_ram_per_bdq_resource_drv_data { struct scsi_bdq_ram_drv_data drv_data_per_bdq_id[BDQ_NUM_IDS]; }; -struct scsi_sge { - struct regpair sge_addr; - __le16 sge_len; - __le16 reserved0; - __le32 reserved1; +enum scsi_sgl_mode { + SCSI_TX_SLOW_SGL, + SCSI_FAST_SGL, + MAX_SCSI_SGL_MODE +}; + +struct scsi_sgl_params { + struct regpair sgl_addr; + __le32 sgl_total_length; + __le32 sge_offset; + __le16 sgl_num_sges; + u8 sgl_index; + u8 reserved; }; struct scsi_terminate_extra_params { diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index 46fe7856f1b22c828474257ceedf03c182958ec5..a5e843268f0e9431eacd07ad5cc74e10be690b0d 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -173,6 +173,7 @@ enum tcp_seg_placement_event { TCP_EVENT_ADD_ISLE_RIGHT, TCP_EVENT_ADD_ISLE_LEFT, TCP_EVENT_JOIN, + TCP_EVENT_DELETE_ISLES, TCP_EVENT_NOP, MAX_TCP_SEG_PLACEMENT_EVENT }; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 799a63d0e1a823e38c0c54f52e5e297cde9bb60c..dda22f45fc1b2361b9b7c79d5fa39a98f09952be 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -44,6 +44,7 @@ void inode_sub_rsv_space(struct inode *inode, qsize_t number); void inode_reclaim_rsv_space(struct inode *inode, qsize_t number); int dquot_initialize(struct inode *inode); +bool dquot_initialize_needed(struct inode *inode); void dquot_drop(struct inode *inode); struct dquot *dqget(struct super_block *sb, struct kqid qid); static inline struct dquot *dqgrab(struct dquot *dquot) @@ -162,7 +163,6 @@ static inline bool sb_has_quota_active(struct super_block *sb, int type) * Operations supported for diskquotas. */ extern const struct dquot_operations dquot_operations; -extern const struct quotactl_ops dquot_quotactl_ops; extern const struct quotactl_ops dquot_quotactl_sysfile_ops; #else @@ -208,6 +208,11 @@ static inline int dquot_initialize(struct inode *inode) return 0; } +static inline bool dquot_initialize_needed(struct inode *inode) +{ + return false; +} + static inline void dquot_drop(struct inode *inode) { } diff --git a/include/linux/ras.h b/include/linux/ras.h index 2aceeafd6fe51fb34f5b99a4c506f92e3aa0353c..ffb147185e8df8ae62e4566d7da1b666fc290e1a 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -1,14 +1,25 @@ #ifndef __RAS_H__ #define __RAS_H__ +#include + #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); void ras_debugfs_init(void); int ras_add_daemon_trace(void); #else static inline int ras_userspace_consumers(void) { return 0; } -static inline void ras_debugfs_init(void) { return; } +static inline void ras_debugfs_init(void) { } static inline int ras_add_daemon_trace(void) { return 0; } #endif +#ifdef CONFIG_RAS_CEC +void __init cec_init(void); +int __init parse_cec_param(char *str); +int cec_add_elem(u64 pfn); +#else +static inline void __init cec_init(void) { } +static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif + +#endif /* __RAS_H__ */ diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h new file mode 100644 index 0000000000000000000000000000000000000000..4b766b61e1a026c226bb86186a72009b8074929f --- /dev/null +++ b/include/linux/rcu_node_tree.h @@ -0,0 +1,99 @@ +/* + * RCU node combining tree definitions. These are used to compute + * global attributes while avoiding common-case global contention. A key + * property that these computations rely on is a tournament-style approach + * where only one of the tasks contending a lower level in the tree need + * advance to the next higher level. If properly configured, this allows + * unlimited scalability while maintaining a constant level of contention + * on the root node. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Author: Paul E. McKenney + */ + +#ifndef __LINUX_RCU_NODE_TREE_H +#define __LINUX_RCU_NODE_TREE_H + +/* + * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and + * CONFIG_RCU_FANOUT_LEAF. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this did work well going from three levels to four. + * Of course, your mileage may vary. + */ + +#ifdef CONFIG_RCU_FANOUT +#define RCU_FANOUT CONFIG_RCU_FANOUT +#else /* #ifdef CONFIG_RCU_FANOUT */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT 64 +# else +# define RCU_FANOUT 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT */ + +#ifdef CONFIG_RCU_FANOUT_LEAF +#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF +#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ +#define RCU_FANOUT_LEAF 16 +#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ + +#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) +#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) +#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) +#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT_1 +# define RCU_NUM_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_NODES NUM_RCU_LVL_0 +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 } +# define RCU_NODE_NAME_INIT { "rcu_node_0" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" } +#elif NR_CPUS <= RCU_FANOUT_2 +# define RCU_NUM_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1) +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } +# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" } +#elif NR_CPUS <= RCU_FANOUT_3 +# define RCU_NUM_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2) +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } +# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } +#elif NR_CPUS <= RCU_FANOUT_4 +# define RCU_NUM_LVLS 4 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) +# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } +# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ + +#endif /* __LINUX_RCU_NODE_TREE_H */ diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h new file mode 100644 index 0000000000000000000000000000000000000000..ba4d2621d9cabccc8130ad0b7576dfac5930f3aa --- /dev/null +++ b/include/linux/rcu_segcblist.h @@ -0,0 +1,90 @@ +/* + * RCU segmented callback lists + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Authors: Paul E. McKenney + */ + +#ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H +#define __INCLUDE_LINUX_RCU_SEGCBLIST_H + +/* Simple unsegmented callback lists. */ +struct rcu_cblist { + struct rcu_head *head; + struct rcu_head **tail; + long len; + long len_lazy; +}; + +#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } + +/* Complicated segmented callback lists. ;-) */ + +/* + * Index values for segments in rcu_segcblist structure. + * + * The segments are as follows: + * + * [head, *tails[RCU_DONE_TAIL]): + * Callbacks whose grace period has elapsed, and thus can be invoked. + * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]): + * Callbacks waiting for the current GP from the current CPU's viewpoint. + * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]): + * Callbacks that arrived before the next GP started, again from + * the current CPU's viewpoint. These can be handled by the next GP. + * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]): + * Callbacks that might have arrived after the next GP started. + * There is some uncertainty as to when a given GP starts and + * ends, but a CPU knows the exact times if it is the one starting + * or ending the GP. Other CPUs know that the previous GP ends + * before the next one starts. + * + * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also + * empty. + * + * The ->gp_seq[] array contains the grace-period number at which the + * corresponding segment of callbacks will be ready to invoke. A given + * element of this array is meaningful only when the corresponding segment + * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks + * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have + * not yet been assigned a grace-period number). + */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_CBLIST_NSEGS 4 + +struct rcu_segcblist { + struct rcu_head *head; + struct rcu_head **tails[RCU_CBLIST_NSEGS]; + unsigned long gp_seq[RCU_CBLIST_NSEGS]; + long len; + long len_lazy; +}; + +#define RCU_SEGCBLIST_INITIALIZER(n) \ +{ \ + .head = NULL, \ + .tails[RCU_DONE_TAIL] = &n.head, \ + .tails[RCU_WAIT_TAIL] = &n.head, \ + .tails[RCU_NEXT_READY_TAIL] = &n.head, \ + .tails[RCU_NEXT_TAIL] = &n.head, \ +} + +#endif /* __INCLUDE_LINUX_RCU_SEGCBLIST_H */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4f7a9561b8c415d069505dab632b922903d87d2e..b1fd8bf85fdc430eaaa2195cd6dc18417bb64585 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -509,7 +509,8 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, { struct hlist_node *i, *last = NULL; - for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) + /* Note: write side code, so rcu accessors are not needed. */ + for (i = h->first; i; i = i->next) last = i; if (last) { diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de88b33c0974877bdaac25c0759f8f4d0ea3808b..e1e5d002fdb93a537110b68886656caf6c049fc4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -97,6 +97,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename, unsigned long secs, unsigned long c_old, unsigned long c); +bool rcu_irq_enter_disabled(void); #else static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, @@ -113,6 +114,10 @@ static inline void rcutorture_record_test_transition(void) static inline void rcutorture_record_progress(unsigned long vernum) { } +static inline bool rcu_irq_enter_disabled(void) +{ + return false; +} #ifdef CONFIG_RCU_TRACE void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, @@ -363,15 +368,20 @@ static inline void rcu_init_nohz(void) #ifdef CONFIG_TASKS_RCU #define TASKS_RCU(x) x extern struct srcu_struct tasks_rcu_exit_srcu; -#define rcu_note_voluntary_context_switch(t) \ +#define rcu_note_voluntary_context_switch_lite(t) \ do { \ - rcu_all_qs(); \ if (READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) +#define rcu_note_voluntary_context_switch(t) \ + do { \ + rcu_all_qs(); \ + rcu_note_voluntary_context_switch_lite(t); \ + } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -#define rcu_note_voluntary_context_switch(t) rcu_all_qs() +#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) +#define rcu_note_voluntary_context_switch(t) rcu_all_qs() #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** @@ -1127,11 +1137,11 @@ do { \ * if the UNLOCK and LOCK are executed by the same CPU or if the * UNLOCK and LOCK operate on the same lock variable. */ -#ifdef CONFIG_PPC +#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE #define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ -#else /* #ifdef CONFIG_PPC */ +#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #define smp_mb__after_unlock_lock() do { } while (0) -#endif /* #else #ifdef CONFIG_PPC */ +#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index b452953e21c8ae0b311a29d2af46a2a4d3036ce7..74d9c3a1feeec494b693501c6ec976c9dd487186 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) return 0; } +static inline bool rcu_eqs_special_set(int cpu) +{ + return false; /* Never flag non-existent other CPUs! */ +} + static inline unsigned long get_state_synchronize_rcu(void) { return 0; @@ -87,10 +92,11 @@ static inline void kfree_call_rcu(struct rcu_head *head, call_rcu(head, func); } -static inline void rcu_note_context_switch(void) -{ - rcu_sched_qs(); -} +#define rcu_note_context_switch(preempt) \ + do { \ + rcu_sched_qs(); \ + rcu_note_voluntary_context_switch_lite(current); \ + } while (0) /* * Take advantage of the fact that there is only one CPU, which @@ -212,14 +218,14 @@ static inline void exit_rcu(void) { } -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) extern int rcu_scheduler_active __read_mostly; void rcu_scheduler_starting(void); -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ static inline void rcu_scheduler_starting(void) { } -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) @@ -237,6 +243,10 @@ static inline bool rcu_is_watching(void) #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ +static inline void rcu_request_urgent_qs_task(struct task_struct *t) +{ +} + static inline void rcu_all_qs(void) { barrier(); /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 63a4e4cf40a54315705cec804c8f065bdcf2cdbb..0bacb6b2af6973681b3724a9e4d4311daf730c30 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,7 +30,7 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -void rcu_note_context_switch(void); +void rcu_note_context_switch(bool preempt); int rcu_needs_cpu(u64 basem, u64 *nextevt); void rcu_cpu_stall_reset(void); @@ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void); */ static inline void rcu_virt_note_context_switch(int cpu) { - rcu_note_context_switch(); + rcu_note_context_switch(false); } void synchronize_rcu_bh(void); @@ -108,6 +108,7 @@ void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; bool rcu_is_watching(void); +void rcu_request_urgent_qs_task(struct task_struct *t); void rcu_all_qs(void); diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 0023fee4bbbcb4f38c4d3f6f9181e57261c4188a..b34aa649d204887d723569d00da544e0eba463a7 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -6,17 +6,36 @@ #include #include +/** + * refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * + * The counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ typedef struct refcount_struct { atomic_t refs; } refcount_t; #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +/** + * refcount_set - set a refcount's value + * @r: the refcount + * @n: value to which the refcount will be set + */ static inline void refcount_set(refcount_t *r, unsigned int n) { atomic_set(&r->refs, n); } +/** + * refcount_read - get a refcount's value + * @r: the refcount + * + * Return: the refcount's value + */ static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); diff --git a/include/linux/regulator/arizona-ldo1.h b/include/linux/regulator/arizona-ldo1.h new file mode 100644 index 0000000000000000000000000000000000000000..c685f1277c639187afdbead1bce86a2a681566a8 --- /dev/null +++ b/include/linux/regulator/arizona-ldo1.h @@ -0,0 +1,24 @@ +/* + * Platform data for Arizona LDO1 regulator + * + * Copyright 2017 Cirrus Logic + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARIZONA_LDO1_H +#define ARIZONA_LDO1_H + +struct regulator_init_data; + +struct arizona_ldo1_pdata { + /** GPIO controlling LDOENA, if any */ + int ldoena; + + /** Regulator configuration for LDO1 */ + const struct regulator_init_data *init_data; +}; + +#endif diff --git a/include/linux/regulator/arizona-micsupp.h b/include/linux/regulator/arizona-micsupp.h new file mode 100644 index 0000000000000000000000000000000000000000..616842619c0084928c4aaee5dbc4f441cfc60c6a --- /dev/null +++ b/include/linux/regulator/arizona-micsupp.h @@ -0,0 +1,21 @@ +/* + * Platform data for Arizona micsupp regulator + * + * Copyright 2017 Cirrus Logic + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARIZONA_MICSUPP_H +#define ARIZONA_MICSUPP_H + +struct regulator_init_data; + +struct arizona_micsupp_pdata { + /** Regulator configuration for micsupp */ + const struct regulator_init_data *init_data; +}; + +#endif diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index ea0fffa5faebb1688ecdb6166db8c7f4d2836c40..df176d7c2b87c00356a11b383a61185e7f3e47ae 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -119,6 +119,7 @@ struct regmap; #define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 #define REGULATOR_EVENT_PRE_DISABLE 0x400 #define REGULATOR_EVENT_ABORT_DISABLE 0x800 +#define REGULATOR_EVENT_ENABLE 0x1000 /* * Regulator errors that can be queried using regulator_get_error_flags diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index dac8e7b16bc679e8550ee7781cbda5194791c535..94417b4226bd88a42d5d53c845266ee97ec30879 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -292,6 +292,14 @@ enum regulator_type { * set_active_discharge * @active_discharge_reg: Register for control when using regmap * set_active_discharge + * @soft_start_reg: Register for control when using regmap set_soft_start + * @soft_start_mask: Mask for control when using regmap set_soft_start + * @soft_start_val_on: Enabling value for control when using regmap + * set_soft_start + * @pull_down_reg: Register for control when using regmap set_pull_down + * @pull_down_mask: Mask for control when using regmap set_pull_down + * @pull_down_val_on: Enabling value for control when using regmap + * set_pull_down * * @enable_time: Time taken for initial enable of regulator (in uS). * @off_on_delay: guard time (in uS), before re-enabling a regulator @@ -345,6 +353,12 @@ struct regulator_desc { unsigned int active_discharge_off; unsigned int active_discharge_mask; unsigned int active_discharge_reg; + unsigned int soft_start_reg; + unsigned int soft_start_mask; + unsigned int soft_start_val_on; + unsigned int pull_down_reg; + unsigned int pull_down_mask; + unsigned int pull_down_val_on; unsigned int enable_time; @@ -429,6 +443,8 @@ struct regulator_dev { struct regulator_enable_gpio *ena_pin; unsigned int ena_gpio_state:1; + unsigned int is_switch:1; + /* time when this regulator was disabled last time */ unsigned long last_off_jiffy; }; @@ -476,6 +492,8 @@ int regulator_set_voltage_time_sel(struct regulator_dev *rdev, unsigned int new_selector); int regulator_set_bypass_regmap(struct regulator_dev *rdev, bool enable); int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable); +int regulator_set_soft_start_regmap(struct regulator_dev *rdev); +int regulator_set_pull_down_regmap(struct regulator_dev *rdev); int regulator_set_active_discharge_regmap(struct regulator_dev *rdev, bool enable); diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index c9f795e9a2ee26aaf562e9a97a2fe2f963a2f054..117699d1f7df4f72491bd65eccec39a160acf646 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -108,6 +108,8 @@ struct regulator_state { * @initial_state: Suspend state to set by default. * @initial_mode: Mode to set at startup. * @ramp_delay: Time to settle down after voltage change (unit: uV/us) + * @settling_time: Time to settle down after voltage change when voltage + * change is non-linear (unit: microseconds). * @active_discharge: Enable/disable active discharge. The enum * regulator_active_discharge values are used for * initialisation. @@ -149,6 +151,7 @@ struct regulation_constraints { unsigned int initial_mode; unsigned int ramp_delay; + unsigned int settling_time; unsigned int enable_time; unsigned int active_discharge; diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index 70c6c66c5bcf16cc4be324a1aaf40b1981e56413..e0ccf46f66cf34ae44296de9410fa107270aff4e 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -48,6 +48,7 @@ #define PFUZE200_VGEN4 10 #define PFUZE200_VGEN5 11 #define PFUZE200_VGEN6 12 +#define PFUZE200_COIN 13 #define PFUZE3000_SW1A 0 #define PFUZE3000_SW1B 1 diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 092292b6675e2cf08b1138410a8488bc87986495..7d56a7ea2b2e8cdcf471da6aaa6ac0be3c51acaa 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,6 +49,21 @@ /* Base bits plus 1 bit for nulls marker */ #define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) +/* Maximum chain length before rehash + * + * The maximum (not average) chain length grows with the size of the hash + * table, at a rate of (log N)/(log log N). + * + * The value of 16 is selected so that even if the hash table grew to + * 2^32 you would not expect the maximum chain length to exceed it + * unless we are under attack (or extremely unlucky). + * + * As this limit is only to detect attacks, we don't need to set it to a + * lower value as you'd need the chain length to vastly exceed 16 to have + * any real effect on the system. + */ +#define RHT_ELASTICITY 16u + struct rhash_head { struct rhash_head __rcu *next; }; @@ -110,29 +125,25 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @insecure_max_entries: Maximum number of entries (may be exceeded) * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking - * @nulls_base: Base value to generate nulls marker - * @insecure_elasticity: Set to true to disable chain length checks - * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) + * @automatic_shrinking: Enable automatic shrinking of tables + * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ struct rhashtable_params { - size_t nelem_hint; - size_t key_len; - size_t key_offset; - size_t head_offset; - unsigned int insecure_max_entries; + u16 nelem_hint; + u16 key_len; + u16 key_offset; + u16 head_offset; unsigned int max_size; - unsigned int min_size; - u32 nulls_base; - bool insecure_elasticity; + u16 min_size; bool automatic_shrinking; - size_t locks_mul; + u8 locks_mul; + u32 nulls_base; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; rht_obj_cmpfn_t obj_cmpfn; @@ -143,8 +154,8 @@ struct rhashtable_params { * @tbl: Bucket table * @nelems: Number of elements in table * @key_len: Key length for hashfn - * @elasticity: Maximum chain length before rehash * @p: Configuration parameters + * @max_elems: Maximum number of elements in table * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -154,8 +165,8 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; unsigned int key_len; - unsigned int elasticity; struct rhashtable_params p; + unsigned int max_elems; bool rhlist; struct work_struct run_work; struct mutex mutex; @@ -318,8 +329,7 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, static inline bool rht_grow_above_max(const struct rhashtable *ht, const struct bucket_table *tbl) { - return ht->p.insecure_max_entries && - atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; + return atomic_read(&ht->nelems) >= ht->max_elems; } /* The bucket lock is selected based on the hash and protects mutations @@ -726,7 +736,7 @@ static inline void *__rhashtable_insert_fast( return rhashtable_insert_slow(ht, key, obj); } - elasticity = ht->elasticity; + elasticity = RHT_ELASTICITY; pprev = rht_bucket_insert(ht, tbl, hash); data = ERR_PTR(-ENOMEM); if (!pprev) @@ -915,6 +925,28 @@ static inline int rhashtable_lookup_insert_fast( return ret == NULL ? 0 : -EEXIST; } +/** + * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Just like rhashtable_lookup_insert_fast(), but this function returns the + * object if it exists, NULL if it did not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); +} + /** * rhashtable_lookup_insert_key - search and insert object to hash table * with explicit key diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b6d4568795a78839f075c43681de5da1af0d0674..ee9b461af095389f7dd238b10e6cff120bcb773d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -185,7 +185,7 @@ size_t ring_buffer_page_len(void *page); void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu); -void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); +void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 8c89e902df3e7ad35ecbff3009e2d88d06026b2a..43ef2c30cb0f59f83c05d23eea4a8aad87edefd1 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -83,19 +83,17 @@ struct anon_vma_chain { }; enum ttu_flags { - TTU_UNMAP = 1, /* unmap mode */ - TTU_MIGRATION = 2, /* migration mode */ - TTU_MUNLOCK = 4, /* munlock mode */ - TTU_LZFREE = 8, /* lazy free mode */ - TTU_SPLIT_HUGE_PMD = 16, /* split huge PMD if any */ - - TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ - TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ - TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ - TTU_BATCH_FLUSH = (1 << 11), /* Batch TLB flushes where possible + TTU_MIGRATION = 0x1, /* migration mode */ + TTU_MUNLOCK = 0x2, /* munlock mode */ + + TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ + TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ + TTU_IGNORE_ACCESS = 0x10, /* don't age */ + TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ + TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ - TTU_RMAP_LOCKED = (1 << 12) /* do not grab rmap lock: + TTU_RMAP_LOCKED = 0x80 /* do not grab rmap lock: * caller holds it */ }; @@ -193,9 +191,7 @@ static inline void page_dup_rmap(struct page *page, bool compound) int page_referenced(struct page *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) - -int try_to_unmap(struct page *, enum ttu_flags flags); +bool try_to_unmap(struct page *, enum ttu_flags flags); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) @@ -239,7 +235,7 @@ int page_mkclean(struct page *); * called in munlock()/munmap() path to check for other vmas holding * the page mlocked. */ -int try_to_munlock(struct page *); +void try_to_munlock(struct page *); void remove_migration_ptes(struct page *old, struct page *new, bool locked); @@ -261,15 +257,19 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); */ struct rmap_walk_control { void *arg; - int (*rmap_one)(struct page *page, struct vm_area_struct *vma, + /* + * Return false if page table scanning in rmap_walk should be stopped. + * Otherwise, return true. + */ + bool (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); struct anon_vma *(*anon_lock)(struct page *page); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -int rmap_walk(struct page *page, struct rmap_walk_control *rwc); -int rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ @@ -285,7 +285,7 @@ static inline int page_referenced(struct page *page, int is_locked, return 0; } -#define try_to_unmap(page, refs) SWAP_FAIL +#define try_to_unmap(page, refs) false static inline int page_mkclean(struct page *page) { @@ -295,13 +295,4 @@ static inline int page_mkclean(struct page *page) #endif /* CONFIG_MMU */ -/* - * Return values of try_to_unmap - */ -#define SWAP_SUCCESS 0 -#define SWAP_AGAIN 1 -#define SWAP_FAIL 2 -#define SWAP_MLOCK 3 -#define SWAP_LZFREE 4 - #endif /* _LINUX_RMAP_H */ diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h index ea05f6c514139c39e1c52abd7d45c7337d764152..84766bcdd01f934a632af261f998882cb46eeb27 100644 --- a/include/linux/rodata_test.h +++ b/include/linux/rodata_test.h @@ -14,7 +14,6 @@ #define _RODATA_TEST_H #ifdef CONFIG_DEBUG_RODATA_TEST -extern const int rodata_test_data; void rodata_test(void); #else static inline void rodata_test(void) {} diff --git a/include/linux/rpmsg/qcom_smd.h b/include/linux/rpmsg/qcom_smd.h index 8ec8b6439b25edb956bb06bcf116c190a46ce62c..f27917e0a10114f9c72e228c42b6f60efd51b541 100644 --- a/include/linux/rpmsg/qcom_smd.h +++ b/include/linux/rpmsg/qcom_smd.h @@ -6,7 +6,7 @@ struct qcom_smd_edge; -#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD) || IS_ENABLED(CONFIG_QCOM_SMD) +#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD) struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, struct device_node *node); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index d4e0a204c118c7244e7e5d167cc3d0429832de01..a1904aadbc45004ba18c8db06f184a164908cd46 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -175,6 +175,25 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); */ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); +/** + * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, + * limiting the depth used from each word. + * @sb: Bitmap to allocate from. + * @alloc_hint: Hint for where to start searching for a free bit. + * @shallow_depth: The maximum number of bits to allocate from a single word. + * + * This rather specific operation allows for having multiple users with + * different allocation limits. E.g., there can be a high-priority class that + * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() + * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority + * class can only allocate half of the total bits in the bitmap, preventing it + * from starving out the high-priority class. + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, + unsigned long shallow_depth); + /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. @@ -325,6 +344,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); */ int __sbitmap_queue_get(struct sbitmap_queue *sbq); +/** + * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue, limiting the depth used from each word, with preemption + * already disabled. + * @sbq: Bitmap queue to allocate from. + * @shallow_depth: The maximum number of bits to allocate from a single word. + * See sbitmap_get_shallow(). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth); + /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. @@ -345,6 +377,29 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, return nr; } +/** + * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue, limiting the depth used from each word. + * @sbq: Bitmap queue to allocate from. + * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to + * sbitmap_queue_clear()). + * @shallow_depth: The maximum number of bits to allocate from a single word. + * See sbitmap_get_shallow(). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int *cpu, + unsigned int shallow_depth) +{ + int nr; + + *cpu = get_cpu(); + nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); + put_cpu(); + return nr; +} + /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cf9a59a4d08ed181f30d7cefa56db92f48408d2..2b69fc65020131bed57e41d4b228ed0c59692a5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -186,7 +186,7 @@ extern long io_schedule_timeout(long timeout); extern void io_schedule(void); /** - * struct prev_cputime - snaphsot of system and user cputime + * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode * @lock: protects the above two fields @@ -779,6 +779,8 @@ struct task_struct { /* PI waiters blocked on a rt_mutex held by this task: */ struct rb_root pi_waiters; struct rb_node *pi_waiters_leftmost; + /* Updated under owner's pi_lock and rq lock */ + struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ struct rt_mutex_waiter *pi_blocked_on; #endif @@ -1041,6 +1043,13 @@ struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ atomic_t stack_refcount; +#endif +#ifdef CONFIG_LIVEPATCH + int patch_state; +#endif +#ifdef CONFIG_SECURITY + /* Used by LSM modules for access restriction: */ + void *security; #endif /* CPU-specific state of this task: */ struct thread_struct thread; @@ -1215,9 +1224,9 @@ extern struct pid *cad_pid; #define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_KSWAPD 0x00020000 /* I am kswapd */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ +#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ @@ -1260,7 +1269,6 @@ extern struct pid *cad_pid; #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ -#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ @@ -1286,14 +1294,11 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) -TASK_PFA_TEST(LMK_WAITING, lmk_waiting) -TASK_PFA_SET(LMK_WAITING, lmk_waiting) - static inline void -tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) +current_restore_flags(unsigned long orig_flags, unsigned long flags) { - task->flags &= ~flags; - task->flags |= orig_flags & flags; + current->flags &= ~flags; + current->flags |= orig_flags & flags; } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 830953ebb391fa80e39af4c4f61f1cdd766a3040..2b24a6974847664165bb323aa00c8fae6f3506b0 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -149,13 +149,21 @@ static inline bool in_vfork(struct task_struct *tsk) return ret; } -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. +/* + * Applies per-task gfp context to the given allocation flags. + * PF_MEMALLOC_NOIO implies GFP_NOIO + * PF_MEMALLOC_NOFS implies GFP_NOFS */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) +static inline gfp_t current_gfp_context(gfp_t flags) { + /* + * NOIO implies both NOIO and NOFS and it is a weaker context + * so always make sure it makes precendence + */ if (unlikely(current->flags & PF_MEMALLOC_NOIO)) flags &= ~(__GFP_IO | __GFP_FS); + else if (unlikely(current->flags & PF_MEMALLOC_NOFS)) + flags &= ~__GFP_FS; return flags; } @@ -171,4 +179,28 @@ static inline void memalloc_noio_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; } +static inline unsigned int memalloc_nofs_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOFS; + current->flags |= PF_MEMALLOC_NOFS; + return flags; +} + +static inline void memalloc_nofs_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; +} + +static inline unsigned int memalloc_noreclaim_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC; + current->flags |= PF_MEMALLOC; + return flags; +} + +static inline void memalloc_noreclaim_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC) | flags; +} + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 3bd668414f61a593e9f40ff8364a3dce63d78d01..f93329aba31a9ebf814600447b4abc107827dd17 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,27 +18,20 @@ static inline int rt_task(struct task_struct *p) } #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); +/* + * Must hold either p->pi_lock or task_rq(p)->lock. + */ +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) +{ + return p->pi_top_task; +} +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} - -static inline int rt_mutex_get_effective_prio(struct task_struct *task, - int newprio) -{ - return newprio; -} - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 2cf446704cd4fce8c380d36df074f8822787b2e2..c06d63b3a58389b92483bb6a0f24f427324ac000 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -293,7 +293,6 @@ extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, const struct cred *, u32); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); diff --git a/include/linux/security.h b/include/linux/security.h index 96899fad701696ce9212055a5a4e049cfcf0f933..af675b57664592148e23691a5a3c15ade42ddd0f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,6 +133,10 @@ extern unsigned long dac_mmap_min_addr; /* setfsuid or setfsgid, id0 == fsuid or fsgid */ #define LSM_SETID_FS 8 +/* Flags for security_task_prlimit(). */ +#define LSM_PRLIMIT_READ 1 +#define LSM_PRLIMIT_WRITE 2 + /* forward declares to avoid warnings */ struct sched_param; struct request_sock; @@ -304,6 +308,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_file_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); +int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); void security_cred_free(struct cred *cred); @@ -324,6 +329,8 @@ void security_task_getsecid(struct task_struct *p, u32 *secid); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); +int security_task_prlimit(const struct cred *cred, const struct cred *tcred, + unsigned int flags); int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); int security_task_setscheduler(struct task_struct *p); @@ -855,6 +862,12 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } +static inline int security_task_alloc(struct task_struct *task, + unsigned long clone_flags) +{ + return 0; +} + static inline void security_task_free(struct task_struct *task) { } @@ -949,6 +962,13 @@ static inline int security_task_getioprio(struct task_struct *p) return 0; } +static inline int security_task_prlimit(const struct cred *cred, + const struct cred *tcred, + unsigned int flags) +{ + return 0; +} + static inline int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) diff --git a/include/linux/sem.h b/include/linux/sem.h index 4fc222f8755d82113deba19eea85e8c47a844818..9edec926e9d968a3900ec0f683b039a6cf6f6ef7 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -10,8 +10,7 @@ struct task_struct; /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { - struct kern_ipc_perm ____cacheline_aligned_in_smp - sem_perm; /* permissions .. see ipc.h */ + struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ struct list_head pending_alter; /* pending operations */ diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 9519da6253a83bac113219c41ecafda95be3a029..e69402d4a8aecbdc6fe4a40e8ad7e01957badbd6 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -15,6 +15,8 @@ #include #include +#include +#include struct serdev_controller; struct serdev_device; @@ -39,12 +41,16 @@ struct serdev_device_ops { * @nr: Device number on serdev bus. * @ctrl: serdev controller managing this device. * @ops: Device operations. + * @write_comp Completion used by serdev_device_write() internally + * @write_lock Lock to serialize access when writing data */ struct serdev_device { struct device dev; int nr; struct serdev_controller *ctrl; const struct serdev_device_ops *ops; + struct completion write_comp; + struct mutex write_lock; }; static inline struct serdev_device *to_serdev_device(struct device *d) @@ -81,6 +87,9 @@ struct serdev_controller_ops { void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); + void (*wait_until_sent)(struct serdev_controller *, long); + int (*get_tiocm)(struct serdev_controller *); + int (*set_tiocm)(struct serdev_controller *, unsigned int, unsigned int); }; /** @@ -165,7 +174,7 @@ static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl if (!serdev || !serdev->ops->write_wakeup) return; - serdev->ops->write_wakeup(ctrl->serdev); + serdev->ops->write_wakeup(serdev); } static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, @@ -177,7 +186,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, if (!serdev || !serdev->ops->receive_buf) return -EINVAL; - return serdev->ops->receive_buf(ctrl->serdev, data, count); + return serdev->ops->receive_buf(serdev, data, count); } #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) @@ -187,6 +196,11 @@ void serdev_device_close(struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); +void serdev_device_wait_until_sent(struct serdev_device *, long); +int serdev_device_get_tiocm(struct serdev_device *); +int serdev_device_set_tiocm(struct serdev_device *, int, int); +void serdev_device_write_wakeup(struct serdev_device *); +int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, unsigned long); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); @@ -223,7 +237,23 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev return 0; } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} -static inline int serdev_device_write_buf(struct serdev_device *sdev, const unsigned char *buf, size_t count) +static inline int serdev_device_write_buf(struct serdev_device *serdev, + const unsigned char *buf, + size_t count) +{ + return -ENODEV; +} +static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {} +static inline int serdev_device_get_tiocm(struct serdev_device *serdev) +{ + return -ENOTSUPP; +} +static inline int serdev_device_set_tiocm(struct serdev_device *serdev, int set, int clear) +{ + return -ENOTSUPP; +} +static inline int serdev_device_write(struct serdev_device *sdev, const unsigned char *buf, + size_t count, unsigned long timeout) { return -ENODEV; } @@ -238,6 +268,36 @@ static inline int serdev_device_write_room(struct serdev_device *sdev) #endif /* CONFIG_SERIAL_DEV_BUS */ +static inline bool serdev_device_get_cts(struct serdev_device *serdev) +{ + int status = serdev_device_get_tiocm(serdev); + return !!(status & TIOCM_CTS); +} + +static inline int serdev_device_wait_for_cts(struct serdev_device *serdev, bool state, int timeout_ms) +{ + unsigned long timeout; + bool signal; + + timeout = jiffies + msecs_to_jiffies(timeout_ms); + while (time_is_after_jiffies(timeout)) { + signal = serdev_device_get_cts(serdev); + if (signal == state) + return 0; + usleep_range(1000, 2000); + } + + return -ETIMEDOUT; +} + +static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enable) +{ + if (enable) + return serdev_device_set_tiocm(serdev, TIOCM_RTS, 0); + else + return serdev_device_set_tiocm(serdev, 0, TIOCM_RTS); +} + /* * serdev hooks into TTY core */ @@ -248,7 +308,7 @@ struct tty_driver; struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx); -void serdev_tty_port_unregister(struct tty_port *port); +int serdev_tty_port_unregister(struct tty_port *port); #else static inline struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, @@ -256,7 +316,10 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port, { return ERR_PTR(-ENODEV); } -static inline void serdev_tty_port_unregister(struct tty_port *port) {} +static inline int serdev_tty_port_unregister(struct tty_port *port) +{ + return -ENODEV; +} #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */ #endif /*_LINUX_SERDEV_H */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 58484fb35cc869b63fe0a1dec5fa39f1b9eace84..64d892f1e5cd833bd30003e6f64c99da3a73f5f7 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -247,6 +247,7 @@ struct uart_port { unsigned char suspended; unsigned char irq_wake; unsigned char unused[2]; + const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; diff --git a/include/linux/serio.h b/include/linux/serio.h index c733cff44e18a74f3949032d1413356df0a9e226..138a5efe863a3cb569de62fe00f93bbe38051279 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -77,6 +77,7 @@ struct serio_driver { irqreturn_t (*interrupt)(struct serio *, unsigned char, unsigned int); int (*connect)(struct serio *, struct serio_driver *drv); int (*reconnect)(struct serio *); + int (*fast_reconnect)(struct serio *); void (*disconnect)(struct serio *); void (*cleanup)(struct serio *); diff --git a/include/linux/signal.h b/include/linux/signal.h index 94ad6eea9550352d8e717a90cf50b462b238bfa9..1f5a16620693a644281d6e108c256de0800f977a 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -390,10 +390,6 @@ int unhandled_signal(struct task_struct *tsk, int sig); #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) -#define sig_user_defined(t, signr) \ - (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ - ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) - #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c776abd86937f52b002773728b5611746f199281..a098d95b3d84d0246adc88651af9978d6810fb01 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -413,14 +413,15 @@ struct ubuf_info { * the end of the header data, ie. at skb->end. */ struct skb_shared_info { + unsigned short _unused; unsigned char nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; - unsigned short gso_type; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + unsigned int gso_type; u32 tskey; __be32 ip6_frag_id; @@ -491,6 +492,8 @@ enum { SKB_GSO_TUNNEL_REMCSUM = 1 << 14, SKB_GSO_SCTP = 1 << 15, + + SKB_GSO_ESP = 1 << 16, }; #if BITS_PER_LONG > 32 @@ -3113,7 +3116,7 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { - return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; + return copy_from_iter_full(data, len, &msg->msg_iter) ? 0 : -EFAULT; } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) diff --git a/include/linux/slab.h b/include/linux/slab.h index 3c37a8c5192159c88c892779ffa71c17d23b7736..04a7f7993e678d6454b6efaa608db3b88ea78eec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -28,7 +28,7 @@ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ /* - * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! + * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -61,8 +61,10 @@ * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. + * + * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ +#define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 8e0cb7a0f836faadd10ee917f8fe2c76d8965fa6..68123c1fe54918c051292eb5ba3427df09f31c2f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); +extern int __boot_cpu_id; + +static inline int get_boot_cpu_id(void) +{ + return __boot_cpu_id; +} + #else /* !SMP */ static inline void smp_send_stop(void) { } @@ -158,6 +165,11 @@ static inline void smp_init(void) { up_late_init(); } static inline void smp_init(void) { } #endif +static inline int get_boot_cpu_id(void) +{ + return 0; +} + #endif /* !SMP */ /* diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h deleted file mode 100644 index f148e0ffbec75f443af5bdb23e8ca318b5743786..0000000000000000000000000000000000000000 --- a/include/linux/soc/qcom/smd.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef __QCOM_SMD_H__ -#define __QCOM_SMD_H__ - -#include -#include - -struct qcom_smd; -struct qcom_smd_channel; -struct qcom_smd_lookup; - -/** - * struct qcom_smd_id - struct used for matching a smd device - * @name: name of the channel - */ -struct qcom_smd_id { - char name[20]; -}; - -/** - * struct qcom_smd_device - smd device struct - * @dev: the device struct - * @channel: handle to the smd channel for this device - */ -struct qcom_smd_device { - struct device dev; - struct qcom_smd_channel *channel; -}; - -typedef int (*qcom_smd_cb_t)(struct qcom_smd_channel *, const void *, size_t); - -/** - * struct qcom_smd_driver - smd driver struct - * @driver: underlying device driver - * @smd_match_table: static channel match table - * @probe: invoked when the smd channel is found - * @remove: invoked when the smd channel is closed - * @callback: invoked when an inbound message is received on the channel, - * should return 0 on success or -EBUSY if the data cannot be - * consumed at this time - */ -struct qcom_smd_driver { - struct device_driver driver; - const struct qcom_smd_id *smd_match_table; - - int (*probe)(struct qcom_smd_device *dev); - void (*remove)(struct qcom_smd_device *dev); - qcom_smd_cb_t callback; -}; - -#if IS_ENABLED(CONFIG_QCOM_SMD) - -int qcom_smd_driver_register(struct qcom_smd_driver *drv); -void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); - -struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *channel, - const char *name, - qcom_smd_cb_t cb); -void qcom_smd_close_channel(struct qcom_smd_channel *channel); -void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel); -void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data); -int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); - - -struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, - struct device_node *node); -int qcom_smd_unregister_edge(struct qcom_smd_edge *edge); - -#else - -static inline int qcom_smd_driver_register(struct qcom_smd_driver *drv) -{ - return -ENXIO; -} - -static inline void qcom_smd_driver_unregister(struct qcom_smd_driver *drv) -{ - /* This shouldn't be possible */ - WARN_ON(1); -} - -static inline struct qcom_smd_channel * -qcom_smd_open_channel(struct qcom_smd_channel *channel, - const char *name, - qcom_smd_cb_t cb) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return NULL; -} - -static inline void qcom_smd_close_channel(struct qcom_smd_channel *channel) -{ - /* This shouldn't be possible */ - WARN_ON(1); -} - -static inline void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return NULL; -} - -static inline void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data) -{ - /* This shouldn't be possible */ - WARN_ON(1); -} - -static inline int qcom_smd_send(struct qcom_smd_channel *channel, - const void *data, int len) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return -ENXIO; -} - -static inline struct qcom_smd_edge * -qcom_smd_register_edge(struct device *parent, - struct device_node *node) -{ - return ERR_PTR(-ENXIO); -} - -static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return -ENXIO; -} - -#endif - -#define module_qcom_smd_driver(__smd_driver) \ - module_driver(__smd_driver, qcom_smd_driver_register, \ - qcom_smd_driver_unregister) - - -#endif diff --git a/include/linux/soc/qcom/wcnss_ctrl.h b/include/linux/soc/qcom/wcnss_ctrl.h index eab64976a73b0e1aa2c15de6a06ae65e1333de99..a4dd4d7c711dc000fc3f1fe93a794dbff245b418 100644 --- a/include/linux/soc/qcom/wcnss_ctrl.h +++ b/include/linux/soc/qcom/wcnss_ctrl.h @@ -1,16 +1,19 @@ #ifndef __WCNSS_CTRL_H__ #define __WCNSS_CTRL_H__ -#include +#include #if IS_ENABLED(CONFIG_QCOM_WCNSS_CTRL) -struct qcom_smd_channel *qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb); +struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, const char *name, + rpmsg_rx_cb_t cb, void *priv); #else -static inline struct qcom_smd_channel* -qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb) +static struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, + const char *name, + rpmsg_rx_cb_t cb, + void *priv) { WARN_ON(1); return ERR_PTR(-ENXIO); diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index a18e0783946b66eccba6a5b5faf77f2662d0a51a..787e7ad53d45f61cb045c047c610996de1141a7a 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -1,6 +1,11 @@ #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__ #define __LINUX_SOC_RENESAS_RCAR_RST_H__ +#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \ + defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796) int rcar_rst_read_mode_pins(u32 *mode); +#else +static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } +#endif #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 49df0a01a2cc203ef2e8ea50560e637e012dac98..bebdde5dccd69ff53112f12464ffa14ba7c971b7 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Samsung Electronics Co., Ltd. + * Copyright (c) 2010-2015 Samsung Electronics Co., Ltd. * http://www.samsung.com * * EXYNOS - Power management unit definition @@ -50,6 +50,14 @@ #define S5P_WAKEUP_MASK 0x0608 #define S5P_WAKEUP_MASK2 0x0614 +/* MIPI_PHYn_CONTROL, valid for Exynos3250, Exynos4, Exynos5250 and Exynos5433 */ +#define EXYNOS4_MIPI_PHY_CONTROL(n) (0x0710 + (n) * 4) +/* Phy enable bit, common for all phy registers, not only MIPI */ +#define EXYNOS4_PHY_ENABLE (1 << 0) +#define EXYNOS4_MIPI_PHY_SRESETN (1 << 1) +#define EXYNOS4_MIPI_PHY_MRESETN (1 << 2) +#define EXYNOS4_MIPI_PHY_RESET_MASK (3 << 1) + #define S5P_INFORM0 0x0800 #define S5P_INFORM1 0x0804 #define S5P_INFORM5 0x0814 @@ -342,6 +350,8 @@ #define EXYNOS5_AUTO_WDTRESET_DISABLE 0x0408 #define EXYNOS5_MASK_WDTRESET_REQUEST 0x040C +#define EXYNOS5_USBDRD_PHY_CONTROL 0x0704 +#define EXYNOS5_DPTX_PHY_CONTROL 0x0720 #define EXYNOS5_USE_RETENTION BIT(4) #define EXYNOS5_SYS_WDTRESET (1 << 20) @@ -495,6 +505,9 @@ #define EXYNOS5420_KFC_CORE_RESET(_nr) \ ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) +#define EXYNOS5420_USBDRD1_PHY_CONTROL 0x0708 +#define EXYNOS5420_MIPI_PHY_CONTROL(n) (0x0714 + (n) * 4) +#define EXYNOS5420_DPTX_PHY_CONTROL 0x0728 #define EXYNOS5420_ARM_CORE2_SYS_PWR_REG 0x1020 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG 0x1024 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG 0x1028 @@ -632,6 +645,7 @@ | EXYNOS5420_KFC_USE_STANDBY_WFI3) /* For EXYNOS5433 */ +#define EXYNOS5433_USBHOST30_PHY_CONTROL (0x0728) #define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) #define EXYNOS5433_PAD_RETENTION_MMC2_OPTION (0x30C8) #define EXYNOS5433_PAD_RETENTION_TOP_OPTION (0x3108) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index a0596ca0e80ac77aeb0afa29648532ef51a5deae..a2f8109bb215751427e99b3c026badfe7113b875 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -24,6 +24,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +u64 sock_gen_cookie(struct sock *sk); int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 75c6bd0ac605b2024509985214cdc3d451c6ab53..935bd2854ff19b2ab64e6d3786bc4ab53f7bc59c 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -23,6 +23,7 @@ #include struct dma_chan; +struct property_entry; struct spi_master; struct spi_transfer; struct spi_flash_read_message; @@ -375,6 +376,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @unprepare_message: undo any work done by prepare_message(). * @spi_flash_read: to support spi-controller hardwares that provide * accelerated interface to read from flash devices. + * @spi_flash_can_dma: analogous to can_dma() interface, but for + * controllers implementing spi_flash_read. * @flash_read_supported: spi device supports flash read * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that @@ -538,6 +541,8 @@ struct spi_master { struct spi_message *message); int (*spi_flash_read)(struct spi_device *spi, struct spi_flash_read_message *msg); + bool (*spi_flash_can_dma)(struct spi_device *spi, + struct spi_flash_read_message *msg); bool (*flash_read_supported)(struct spi_device *spi); /* @@ -891,7 +896,7 @@ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags unsigned i; struct spi_transfer *t = (struct spi_transfer *)(m + 1); - INIT_LIST_HEAD(&m->transfers); + spi_message_init_no_memset(m); for (i = 0; i < ntrans; i++, t++) spi_message_add_tail(t, m); } @@ -1209,6 +1214,7 @@ int spi_flash_read(struct spi_device *spi, * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. + * @properties: Additional device properties for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. * @irq: Initializes spi_device.irq; depends on how the board is wired. @@ -1241,10 +1247,12 @@ struct spi_board_info { * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, + * device properties are copied and attached to spi_device, * irq is copied too */ char modalias[SPI_NAME_SIZE]; const void *platform_data; + const struct property_entry *properties; void *controller_data; int irq; diff --git a/include/linux/splice.h b/include/linux/splice.h index 00a21166e268b50f812807bc7e899360cb65febc..db42746bdfea54858ba2fae9fdaf5e4141b478a9 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -20,6 +20,8 @@ #define SPLICE_F_MORE (0x04) /* expect more data */ #define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ +#define SPLICE_F_ALL (SPLICE_F_MOVE|SPLICE_F_NONBLOCK|SPLICE_F_MORE|SPLICE_F_GIFT) + /* * Passed to the actors */ @@ -55,7 +57,6 @@ struct splice_pipe_desc { struct partial_page *partial; /* pages[] may not be contig */ int nr_pages; /* number of populated pages in map */ unsigned int nr_pages_max; /* pages[] & partial[] arrays size */ - unsigned int flags; /* splice flags */ const struct pipe_buf_operations *ops;/* ops associated with output pipe */ void (*spd_release)(struct splice_pipe_desc *, unsigned int); }; @@ -82,7 +83,6 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, */ extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *); extern void splice_shrink_spd(struct splice_pipe_desc *); -extern void spd_release_page(struct splice_pipe_desc *, unsigned int); extern const struct pipe_buf_operations page_cache_pipe_buf_ops; extern const struct pipe_buf_operations default_pipe_buf_ops; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a598cf3ac70ca686ca60c77dbdfb986a554993af..4c1d5f7e62c4f6aef8eded0ec9db4d812a4acd29 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -22,7 +22,7 @@ * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt + * Documentation/RCU/ *.txt * */ @@ -32,35 +32,9 @@ #include #include #include +#include -struct srcu_array { - unsigned long lock_count[2]; - unsigned long unlock_count[2]; -}; - -struct rcu_batch { - struct rcu_head *head, **tail; -}; - -#define RCU_BATCH_INIT(name) { NULL, &(name.head) } - -struct srcu_struct { - unsigned long completed; - struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->batch_queue, ->running */ - bool running; - /* callbacks just queued */ - struct rcu_batch batch_queue; - /* callbacks try to do the first check_zero */ - struct rcu_batch batch_check0; - /* callbacks done with the first check_zero and the flip */ - struct rcu_batch batch_check1; - struct rcu_batch batch_done; - struct delayed_work work; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -}; +struct srcu_struct; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -82,46 +56,15 @@ int init_srcu_struct(struct srcu_struct *sp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -void process_srcu(struct work_struct *work); - -#define __SRCU_STRUCT_INIT(name) \ - { \ - .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ - .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .running = false, \ - .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ - .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ - .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ - .batch_done = RCU_BATCH_INIT(name.batch_done), \ - .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ - __SRCU_DEP_MAP_INIT(name) \ - } - -/* - * Define and initialize a srcu struct at build time. - * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. - * - * Note that although DEFINE_STATIC_SRCU() hides the name from other - * files, the per-CPU variable rules nevertheless require that the - * chosen name be globally unique. These rules also prohibit use of - * DEFINE_STATIC_SRCU() within a function. If these rules are too - * restrictive, declare the srcu_struct manually. For example, in - * each file: - * - * static struct srcu_struct my_srcu; - * - * Then, before the first use of each my_srcu, manually initialize it: - * - * init_srcu_struct(&my_srcu); - * - * See include/linux/percpu-defs.h for the rules on per-CPU variables. - */ -#define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) -#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) -#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) +#ifdef CONFIG_TINY_SRCU +#include +#elif defined(CONFIG_TREE_SRCU) +#include +#elif defined(CONFIG_CLASSIC_SRCU) +#include +#else +#error "Unknown SRCU implementation specified to kernel configuration" +#endif /** * call_srcu() - Queue a callback for invocation after an SRCU grace period @@ -147,9 +90,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); -void synchronize_srcu_expedited(struct srcu_struct *sp); -unsigned long srcu_batches_completed(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -232,9 +172,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { int retval; - preempt_disable(); retval = __srcu_read_lock(sp); - preempt_enable(); rcu_lock_acquire(&(sp)->dep_map); return retval; } diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h new file mode 100644 index 0000000000000000000000000000000000000000..5753f73222629c1340d363eba4e50510e437c575 --- /dev/null +++ b/include/linux/srcuclassic.h @@ -0,0 +1,115 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * classic v4.11 variant. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#ifndef _LINUX_SRCU_CLASSIC_H +#define _LINUX_SRCU_CLASSIC_H + +struct srcu_array { + unsigned long lock_count[2]; + unsigned long unlock_count[2]; +}; + +struct rcu_batch { + struct rcu_head *head, **tail; +}; + +#define RCU_BATCH_INIT(name) { NULL, &(name.head) } + +struct srcu_struct { + unsigned long completed; + struct srcu_array __percpu *per_cpu_ref; + spinlock_t queue_lock; /* protect ->batch_queue, ->running */ + bool running; + /* callbacks just queued */ + struct rcu_batch batch_queue; + /* callbacks try to do the first check_zero */ + struct rcu_batch batch_check0; + /* callbacks done with the first check_zero and the flip */ + struct rcu_batch batch_check1; + struct rcu_batch batch_done; + struct delayed_work work; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +}; + +void process_srcu(struct work_struct *work); + +#define __SRCU_STRUCT_INIT(name) \ + { \ + .completed = -300, \ + .per_cpu_ref = &name##_srcu_array, \ + .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ + .running = false, \ + .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ + .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ + .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ + .batch_done = RCU_BATCH_INIT(name.batch_done), \ + .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ + __SRCU_DEP_MAP_INIT(name) \ + } + +/* + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. + */ +#define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) +#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) + +void synchronize_srcu_expedited(struct srcu_struct *sp); +void srcu_barrier(struct srcu_struct *sp); +unsigned long srcu_batches_completed(struct srcu_struct *sp); + +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->completed; + *gpnum = *completed; + if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) + (*gpnum)++; +} + +#endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h new file mode 100644 index 0000000000000000000000000000000000000000..42311ee0334fde629280f6b92cdc065853e3ba14 --- /dev/null +++ b/include/linux/srcutiny.h @@ -0,0 +1,93 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * tiny variant. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#ifndef _LINUX_SRCU_TINY_H +#define _LINUX_SRCU_TINY_H + +#include + +struct srcu_struct { + int srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ + struct swait_queue_head srcu_wq; + /* Last srcu_read_unlock() wakes GP. */ + unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ + struct rcu_segcblist srcu_cblist; + /* Pending SRCU callbacks. */ + int srcu_idx; /* Current reader array element. */ + bool srcu_gp_running; /* GP workqueue running? */ + bool srcu_gp_waiting; /* GP waiting for readers? */ + struct work_struct srcu_work; /* For driving grace periods. */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +}; + +void srcu_drive_gp(struct work_struct *wp); + +#define __SRCU_STRUCT_INIT(name) \ +{ \ + .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ + .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist), \ + .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ + __SRCU_DEP_MAP_INIT(name) \ +} + +/* + * This odd _STATIC_ arrangement is needed for API compatibility with + * Tree SRCU, which needs some per-CPU data. + */ +#define DEFINE_SRCU(name) \ + struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_STATIC_SRCU(name) \ + static struct srcu_struct name = __SRCU_STRUCT_INIT(name) + +void synchronize_srcu(struct srcu_struct *sp); + +static inline void synchronize_srcu_expedited(struct srcu_struct *sp) +{ + synchronize_srcu(sp); +} + +static inline void srcu_barrier(struct srcu_struct *sp) +{ + synchronize_srcu(sp); +} + +static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) +{ + return 0; +} + +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->srcu_gp_seq; + *gpnum = *completed; +} + +#endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h new file mode 100644 index 0000000000000000000000000000000000000000..32e86d85fd11f54bd973129a6428c4c09c104ace --- /dev/null +++ b/include/linux/srcutree.h @@ -0,0 +1,150 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * tree variant. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#ifndef _LINUX_SRCU_TREE_H +#define _LINUX_SRCU_TREE_H + +#include +#include + +struct srcu_node; +struct srcu_struct; + +/* + * Per-CPU structure feeding into leaf srcu_node, similar in function + * to rcu_node. + */ +struct srcu_data { + /* Read-side state. */ + unsigned long srcu_lock_count[2]; /* Locks per CPU. */ + unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + + /* Update-side state. */ + spinlock_t lock ____cacheline_internodealigned_in_smp; + struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ + unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ + bool srcu_cblist_invoking; /* Invoking these CBs? */ + struct delayed_work work; /* Context for CB invoking. */ + struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ + struct srcu_node *mynode; /* Leaf srcu_node. */ + unsigned long grpmask; /* Mask for leaf srcu_node */ + /* ->srcu_data_have_cbs[]. */ + int cpu; + struct srcu_struct *sp; +}; + +/* + * Node in SRCU combining tree, similar in function to rcu_data. + */ +struct srcu_node { + spinlock_t lock; + unsigned long srcu_have_cbs[4]; /* GP seq for children */ + /* having CBs, but only */ + /* is > ->srcu_gq_seq. */ + unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */ + /* have CBs for given GP? */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ + struct srcu_node *srcu_parent; /* Next up in tree. */ + int grplo; /* Least CPU for node. */ + int grphi; /* Biggest CPU for node. */ +}; + +/* + * Per-SRCU-domain structure, similar in function to rcu_state. + */ +struct srcu_struct { + struct srcu_node node[NUM_RCU_NODES]; /* Combining tree. */ + struct srcu_node *level[RCU_NUM_LVLS + 1]; + /* First node at each level. */ + struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ + spinlock_t gp_lock; /* protect ->srcu_cblist */ + struct mutex srcu_gp_mutex; /* Serialize GP work. */ + unsigned int srcu_idx; /* Current rdr array element. */ + unsigned long srcu_gp_seq; /* Grace-period seq #. */ + unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ + unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ + struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ + unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ + struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ + struct completion srcu_barrier_completion; + /* Awaken barrier rq at end. */ + atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ + /* callback for the barrier */ + /* operation. */ + struct delayed_work work; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +}; + +/* Values for state variable (bottom bits of ->srcu_gp_seq). */ +#define SRCU_STATE_IDLE 0 +#define SRCU_STATE_SCAN1 1 +#define SRCU_STATE_SCAN2 2 + +void process_srcu(struct work_struct *work); + +#define __SRCU_STRUCT_INIT(name) \ + { \ + .sda = &name##_srcu_data, \ + .gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock), \ + .srcu_gp_seq_needed = 0 - 1, \ + __SRCU_DEP_MAP_INIT(name) \ + } + +/* + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. + */ +#define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) +#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) + +void synchronize_srcu_expedited(struct srcu_struct *sp); +void srcu_barrier(struct srcu_struct *sp); +unsigned long srcu_batches_completed(struct srcu_struct *sp); + +void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, unsigned long *completed); + +#endif diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 0a34489a46b62a4c526e27b39b53d3d571db6727..4205f71a5f0e99e9900ec47de977aed8f6453aaf 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -18,6 +18,8 @@ extern void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); +extern int save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); extern int snprint_stack_trace(char *buf, size_t size, @@ -29,12 +31,13 @@ extern void save_stack_trace_user(struct stack_trace *trace); # define save_stack_trace_user(trace) do { } while (0) #endif -#else +#else /* !CONFIG_STACKTRACE */ # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) # define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) # define snprint_stack_trace(buf, size, trace, spaces) do { } while (0) -#endif +# define save_stack_trace_tsk_reliable(tsk, trace) ({ -ENOSYS; }) +#endif /* CONFIG_STACKTRACE */ -#endif +#endif /* __LINUX_STACKTRACE_H */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fc273e9d5f67625b9ddf611746d1a09ff555e4ab..3921cb9dfadb9b4a9bbe30854ccada75725b8005 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -28,6 +28,9 @@ #include +#define MTL_MAX_RX_QUEUES 8 +#define MTL_MAX_TX_QUEUES 8 + #define STMMAC_RX_COE_NONE 0 #define STMMAC_RX_COE_TYPE1 1 #define STMMAC_RX_COE_TYPE2 2 @@ -44,6 +47,18 @@ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ #define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */ +/* MTL algorithms identifiers */ +#define MTL_TX_ALGORITHM_WRR 0x0 +#define MTL_TX_ALGORITHM_WFQ 0x1 +#define MTL_TX_ALGORITHM_DWRR 0x2 +#define MTL_TX_ALGORITHM_SP 0x3 +#define MTL_RX_ALGORITHM_SP 0x4 +#define MTL_RX_ALGORITHM_WSP 0x5 + +/* RX/TX Queue Mode */ +#define MTL_QUEUE_AVB 0x0 +#define MTL_QUEUE_DCB 0x1 + /* The MDC clock could be set higher than the IEEE 802.3 * specified frequency limit 0f 2.5 MHz, by programming a clock divider * of value different than the above defined values. The resultant MDIO @@ -109,6 +124,26 @@ struct stmmac_axi { bool axi_rb; }; +struct stmmac_rxq_cfg { + u8 mode_to_use; + u8 chan; + u8 pkt_route; + bool use_prio; + u32 prio; +}; + +struct stmmac_txq_cfg { + u8 weight; + u8 mode_to_use; + /* Credit Base Shaper parameters */ + u32 send_slope; + u32 idle_slope; + u32 high_credit; + u32 low_credit; + bool use_prio; + u32 prio; +}; + struct plat_stmmacenet_data { int bus_id; int phy_addr; @@ -133,6 +168,12 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; + u8 rx_queues_to_use; + u8 tx_queues_to_use; + u8 rx_sched_algorithm; + u8 tx_sched_algorithm; + struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; + struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*fix_mac_speed)(void *priv, unsigned int speed); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); diff --git a/include/linux/string.h b/include/linux/string.h index 26b6f6a66f835b5f3fcb44da978e3d75b57054ec..537918f8a98eec239a8f16c7ae64cadcbc847789 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -114,6 +114,14 @@ extern int memcmp(const void *,const void *,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif +#ifndef __HAVE_ARCH_MEMCPY_MCSAFE +static inline __must_check int memcpy_mcsafe(void *dst, const void *src, + size_t cnt) +{ + memcpy(dst, src, cnt); + return 0; +} +#endif void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); @@ -135,6 +143,16 @@ static inline int strtobool(const char *s, bool *res) } int match_string(const char * const *array, size_t n, const char *string); +int __sysfs_match_string(const char * const *array, size_t n, const char *s); + +/** + * sysfs_match_string - matches given string in an array + * @_a: array of strings + * @_s: string to match with + * + * Helper for __sysfs_match_string(). Calculates the size of @a automatically. + */ +#define sysfs_match_string(_a, _s) __sysfs_match_string(_a, ARRAY_SIZE(_a), _s) #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 245fc59b73247d744682c128bfcae1270e146c26..b7e85b341a54f5498e8a2095f3aef84cf5a0fe63 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -143,6 +143,9 @@ enum rpcrdma_proc { #define rdma_done cpu_to_be32(RDMA_DONE) #define rdma_error cpu_to_be32(RDMA_ERROR) +#define err_vers cpu_to_be32(ERR_VERS) +#define err_chunk cpu_to_be32(ERR_CHUNK) + /* * Private extension to RPC-over-RDMA Version One. * Message passed during RDMA-CM connection set-up. diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e770abeed32d7117c4f2d363f9d7370a60d2c55f..11cef5a7bc87a9fe67a4bfbfca9f52e41d0abd88 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -474,6 +474,7 @@ void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); void svc_shutdown_net(struct svc_serv *, struct net *); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b105f73e3ca26355b2ee8b32651b48526a945899..f3787d800ba46b7cdae49e19584fe64cbdff2ef3 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,6 +48,12 @@ #include #define SVCRDMA_DEBUG +/* Default and maximum inline threshold sizes */ +enum { + RPCRDMA_DEF_INLINE_THRESH = 4096, + RPCRDMA_MAX_INLINE_THRESH = 65536 +}; + /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; @@ -85,27 +91,11 @@ struct svc_rdma_op_ctxt { enum dma_data_direction direction; int count; unsigned int mapped_sges; - struct ib_sge sge[RPCSVC_MAXPAGES]; + struct ib_send_wr send_wr; + struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE]; struct page *pages[RPCSVC_MAXPAGES]; }; -/* - * NFS_ requests are mapped on the client side by the chunk lists in - * the RPCRDMA header. During the fetching of the RPC from the client - * and the writing of the reply to the client, the memory in the - * client and the memory in the server must be mapped as contiguous - * vaddr/len for access by the hardware. These data strucures keep - * these mappings. - * - * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the - * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the - * 'ch' field maps the read-list of the RPCRDMA header to the 'sge' - * mapping of the reply. - */ -struct svc_rdma_chunk_sge { - int start; /* sge no for this chunk */ - int count; /* sge count for this chunk */ -}; struct svc_rdma_fastreg_mr { struct ib_mr *mr; struct scatterlist *sg; @@ -114,15 +104,7 @@ struct svc_rdma_fastreg_mr { enum dma_data_direction direction; struct list_head frmr_list; }; -struct svc_rdma_req_map { - struct list_head free; - unsigned long count; - union { - struct kvec sge[RPCSVC_MAXPAGES]; - struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; - unsigned long lkey[RPCSVC_MAXPAGES]; - }; -}; + #define RDMACTXT_F_LAST_CTXT 2 #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ @@ -144,14 +126,15 @@ struct svcxprt_rdma { u32 sc_max_requests; /* Max requests */ u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ + u8 sc_port_num; struct ib_pd *sc_pd; spinlock_t sc_ctxt_lock; struct list_head sc_ctxts; int sc_ctxt_used; - spinlock_t sc_map_lock; - struct list_head sc_maps; + spinlock_t sc_rw_ctxt_lock; + struct list_head sc_rw_ctxts; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; @@ -181,9 +164,7 @@ struct svcxprt_rdma { /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ #define RPCRDMA_ORD (64/4) -#define RPCRDMA_SQ_DEPTH_MULT 8 #define RPCRDMA_MAX_REQUESTS 32 -#define RPCRDMA_MAX_REQ_SIZE 4096 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our * current NFSv4.1 implementation supports one backchannel slot. @@ -201,19 +182,11 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, /* svc_rdma_backchannel.c */ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, - struct rpcrdma_msg *rmsgp, + __be32 *rdma_resp, struct xdr_buf *rcvbuf); /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct xdr_buf *); -extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, - struct rpcrdma_msg *, - enum rpcrdma_errcode, __be32 *); -extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); -extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); -extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, - __be32, __be64, u32); -extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); @@ -224,16 +197,25 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, struct svc_rdma_op_ctxt *, int *, u32 *, u32, u32, u64, bool); +/* svc_rdma_rw.c */ +extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); +extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, + __be32 *wr_ch, struct xdr_buf *xdr); +extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, + __be32 *rp_ch, bool writelist, + struct xdr_buf *xdr); + /* svc_rdma_sendto.c */ -extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, - struct svc_rdma_req_map *, bool); +extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + __be32 *rdma_resp, unsigned int len); +extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + int num_sge, u32 inv_rkey); extern int svc_rdma_sendto(struct svc_rqst *); -extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - int); /* svc_rdma_transport.c */ extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *); -extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *); extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *); extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *); extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *); @@ -244,9 +226,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *); -extern void svc_rdma_put_req_map(struct svcxprt_rdma *, - struct svc_rdma_req_map *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/include/linux/swap.h b/include/linux/swap.h index 45e91dd6716d89b0ffa8f2d97481d12732d20173..ba5882419a7dbcebe0cbf7edee276346429d8b79 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -279,7 +279,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); -extern void deactivate_page(struct page *page); +extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); @@ -411,9 +411,6 @@ struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); -extern int get_swap_slots(int n, swp_entry_t *slots); -extern void swapcache_free_batch(swp_entry_t *entries, int n); - #else /* CONFIG_SWAP */ #define swap_address_space(entry) (NULL) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b7e82049fec754c7f2c88d3b50fd513395958660..80d07816def05bf0f7662c46d56387830dec3d57 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -180,7 +180,6 @@ extern void setup_sysctl_set(struct ctl_table_set *p, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); -void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, const char *path, struct ctl_table *table); diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 9fba9dd33544087dc5dfc55733ec3e4cdb9a317f..9375d23a24e7aba97504d743e77d3b78f4d72493 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -34,9 +34,9 @@ struct t10_pi_tuple { }; -extern struct blk_integrity_profile t10_pi_type1_crc; -extern struct blk_integrity_profile t10_pi_type1_ip; -extern struct blk_integrity_profile t10_pi_type3_crc; -extern struct blk_integrity_profile t10_pi_type3_ip; +extern const struct blk_integrity_profile t10_pi_type1_crc; +extern const struct blk_integrity_profile t10_pi_type1_ip; +extern const struct blk_integrity_profile t10_pi_type3_crc; +extern const struct blk_integrity_profile t10_pi_type3_ip; #endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index cfc2d9506ce8077af1ec92eb7086fd52ce4fe1ac..b6d5adcee8fcb611de202993623cc80274d262e4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -233,12 +233,14 @@ struct tcp_sock { u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ + syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ + struct skb_mstamp tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ @@ -331,16 +333,16 @@ struct tcp_sock { /* Receiver side RTT estimation */ struct { - u32 rtt; - u32 seq; - u32 time; + u32 rtt_us; + u32 seq; + struct skb_mstamp time; } rcv_rtt_est; /* Receiver queue space */ struct { - int space; - u32 seq; - u32 time; + int space; + u32 seq; + struct skb_mstamp time; } rcvq_space; /* TCP-specific MTU probe information. */ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h new file mode 100644 index 0000000000000000000000000000000000000000..0f175b8f6456c1d5ad14538deb66b9c3a9f3c5c7 --- /dev/null +++ b/include/linux/tee_drv.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __TEE_DRV_H +#define __TEE_DRV_H + +#include +#include +#include +#include + +/* + * The file describes the API provided by the generic TEE driver to the + * specific TEE driver. + */ + +#define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ +#define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ + +struct tee_device; +struct tee_shm; +struct tee_shm_pool; + +/** + * struct tee_context - driver specific context on file pointer data + * @teedev: pointer to this drivers struct tee_device + * @list_shm: List of shared memory object owned by this context + * @data: driver specific context data, managed by the driver + */ +struct tee_context { + struct tee_device *teedev; + struct list_head list_shm; + void *data; +}; + +struct tee_param_memref { + size_t shm_offs; + size_t size; + struct tee_shm *shm; +}; + +struct tee_param_value { + u64 a; + u64 b; + u64 c; +}; + +struct tee_param { + u64 attr; + union { + struct tee_param_memref memref; + struct tee_param_value value; + } u; +}; + +/** + * struct tee_driver_ops - driver operations vtable + * @get_version: returns version of driver + * @open: called when the device file is opened + * @release: release this open file + * @open_session: open a new session + * @close_session: close a session + * @invoke_func: invoke a trusted function + * @cancel_req: request cancel of an ongoing invoke or open + * @supp_revc: called for supplicant to get a command + * @supp_send: called for supplicant to send a response + */ +struct tee_driver_ops { + void (*get_version)(struct tee_device *teedev, + struct tee_ioctl_version_data *vers); + int (*open)(struct tee_context *ctx); + void (*release)(struct tee_context *ctx); + int (*open_session)(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); + int (*close_session)(struct tee_context *ctx, u32 session); + int (*invoke_func)(struct tee_context *ctx, + struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); + int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); + int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); + int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); +}; + +/** + * struct tee_desc - Describes the TEE driver to the subsystem + * @name: name of driver + * @ops: driver operations vtable + * @owner: module providing the driver + * @flags: Extra properties of driver, defined by TEE_DESC_* below + */ +#define TEE_DESC_PRIVILEGED 0x1 +struct tee_desc { + const char *name; + const struct tee_driver_ops *ops; + struct module *owner; + u32 flags; +}; + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data); + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev); + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev); + +/** + * struct tee_shm_pool_mem_info - holds information needed to create a shared + * memory pool + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + */ +struct tee_shm_pool_mem_info { + unsigned long vaddr; + phys_addr_t paddr; + size_t size; +}; + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * The must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev); + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If + * TEE_SHM_DMA_BUF global shared memory will be allocated and associated + * with a dma-buf handle, else driver private memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +int tee_shm_get_id(struct tee_shm *shm); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); + +#endif /*__TEE_DRV_H*/ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 58373875e8eec2aee668e5fdac4526796ebe78c8..d7d3ea637dd0a500875cbe64c00eb5e0188acd41 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -22,6 +22,18 @@ #endif #include + +/* + * For per-arch arch_within_stack_frames() implementations, defined in + * asm/thread_info.h. + */ +enum { + BAD_STACK = -1, + NOT_STACK = 0, + GOOD_FRAME, + GOOD_STACK, +}; + #include #ifdef __KERNEL__ @@ -101,6 +113,10 @@ static inline void check_object_size(const void *ptr, unsigned long n, { } #endif /* CONFIG_HARDENED_USERCOPY */ +#ifndef arch_setup_new_exec +static inline void arch_setup_new_exec(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index a04fea19676f30e07282231b6c14b4ce9ab40f1d..fe01e68bf520c792f8c06989dd3e811baf8e98b5 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -117,6 +117,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern unsigned long tick_nohz_get_idle_calls(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/include/linux/time.h b/include/linux/time.h index 23f0f5ce30909d7349a354086980e2ca9988744c..c0543f5f25de471594330d84d605c6f20ef4c565 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -151,9 +151,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts) return true; } -#define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) - /* Some architectures do not supply their own clocksource. * This is mainly the case in architectures that get their * inter-tick times by reading the counter on their interval diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b598cbc7b576847a4e3f58ebbc2c76a24f149540..ddc229ff6d1ee1dbd3e535d7c0a2f16225c29f47 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -19,21 +19,6 @@ extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday64(const struct timespec64 *ts); extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); -static inline int do_sys_settimeofday(const struct timespec *tv, - const struct timezone *tz) -{ - struct timespec64 ts64; - - if (!tv) - return do_sys_settimeofday64(NULL, tz); - - if (!timespec_valid(tv)) - return -EINVAL; - - ts64 = timespec_to_timespec64(*tv); - return do_sys_settimeofday64(&ts64, tz); -} - /* * Kernel time accessors */ @@ -273,6 +258,11 @@ static inline void timekeeping_clocktai(struct timespec *ts) *ts = ktime_to_timespec(ktime_get_clocktai()); } +static inline void timekeeping_clocktai64(struct timespec64 *ts) +{ + *ts = ktime_to_timespec64(ktime_get_clocktai()); +} + /* * RTC specific */ diff --git a/include/linux/tpm.h b/include/linux/tpm.h index da158f06e0b2fec35f10e2faea57d806696e4d20..5a090f5ab3356f5f26bb21581d9a3df8ea488aa0 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -48,7 +48,8 @@ struct tpm_class_ops { u8 (*status) (struct tpm_chip *chip); bool (*update_timeouts)(struct tpm_chip *chip, unsigned long *timeout_cap); - + int (*request_locality)(struct tpm_chip *chip, int loc); + void (*relinquish_locality)(struct tpm_chip *chip, int loc); }; #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0af63c4381b9799151317bafc1a800815d85876b..a556805eff8a8ebb5390dd6c9f025d85608cbbb9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -138,16 +138,7 @@ enum print_line_t { TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; -/* - * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq - * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function - * simplifies those functions and keeps them in sync. - */ -static inline enum print_line_t trace_handle_return(struct trace_seq *s) -{ - return trace_seq_has_overflowed(s) ? - TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; -} +enum print_line_t trace_handle_return(struct trace_seq *s); void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index f72fcfe0e66a80a80ae59a175e21abfba5b5ca42..cc48cb2ce20965a412037bfe3a8565991fcb9611 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -128,7 +128,7 @@ extern void syscall_unregfunc(void); * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ -#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ +#define __DO_TRACE(tp, proto, args, cond, rcucheck) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ @@ -136,7 +136,11 @@ extern void syscall_unregfunc(void); \ if (!(cond)) \ return; \ - prercu; \ + if (rcucheck) { \ + if (WARN_ON_ONCE(rcu_irq_enter_disabled())) \ + return; \ + rcu_irq_enter_irqson(); \ + } \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ @@ -147,20 +151,19 @@ extern void syscall_unregfunc(void); } while ((++it_func_ptr)->func); \ } \ rcu_read_unlock_sched_notrace(); \ - postrcu; \ + if (rcucheck) \ + rcu_irq_exit_irqson(); \ } while (0) #ifndef MODULE -#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \ static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ - TP_CONDITION(cond), \ - rcu_irq_enter_irqson(), \ - rcu_irq_exit_irqson()); \ + TP_CONDITION(cond), 1); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) @@ -186,7 +189,7 @@ extern void syscall_unregfunc(void); __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ - TP_CONDITION(cond),,); \ + TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_##name.funcs);\ diff --git a/include/linux/tty.h b/include/linux/tty.h index 1017e904c0a3f90507183570b0897199b4ace563..eccb4ec30a8a7b94064ff1ede0645234b94ae49c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -390,7 +390,6 @@ static inline bool tty_throttled(struct tty_struct *tty) } #ifdef CONFIG_TTY -extern void console_init(void); extern void tty_kref_put(struct tty_struct *tty); extern struct pid *tty_get_pgrp(struct tty_struct *tty); extern void tty_vhangup_self(void); @@ -402,8 +401,6 @@ extern struct tty_struct *get_current_tty(void); extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); #else -static inline void console_init(void) -{ } static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) @@ -478,9 +475,13 @@ extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); +extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); +extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); +extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); +extern void session_clear_tty(struct pid *session); extern void no_tty(void); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); @@ -528,6 +529,8 @@ extern void tty_ldisc_flush(struct tty_struct *tty); extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); +extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, + struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); @@ -555,6 +558,15 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); +extern struct device *tty_port_register_device_serdev(struct tty_port *port, + struct tty_driver *driver, unsigned index, + struct device *device); +extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port, + struct tty_driver *driver, unsigned index, + struct device *device, void *drvdata, + const struct attribute_group **attr_grp); +extern void tty_port_unregister_device(struct tty_port *port, + struct tty_driver *driver, unsigned index); extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern void tty_port_destroy(struct tty_port *port); @@ -669,7 +681,11 @@ extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); +#ifdef CONFIG_TTY extern void __init n_tty_init(void); +#else +static inline void n_tty_init(void) { } +#endif /* tty_audit.c */ #ifdef CONFIG_AUDIT diff --git a/include/linux/types.h b/include/linux/types.h index 1e7bd24848fcb54cae26bdd10597f6ea80afd46f..258099a4ed82b1b3dd8199b7c905491c935a73a7 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -209,7 +209,7 @@ struct ustat { * naturally due ABI requirements, but some architectures (like CRIS) have * weird ABI and we need to ask it explicitly. * - * The alignment is required to guarantee that bits 0 and 1 of @next will be + * The alignment is required to guarantee that bit 0 of @next will be * clear under normal conditions -- as long as we use call_rcu(), * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. * diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index f30c187ed785366231e318a7beab151e0bba64b6..201418d5e15c2b92dc7d7bc73b54f469eb488a35 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,8 +2,199 @@ #define __LINUX_UACCESS_H__ #include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS) + #include +/* + * Architectures should provide two primitives (raw_copy_{to,from}_user()) + * and get rid of their private instances of copy_{to,from}_user() and + * __copy_{to,from}_user{,_inatomic}(). + * + * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and + * return the amount left to copy. They should assume that access_ok() has + * already been checked (and succeeded); they should *not* zero-pad anything. + * No KASAN or object size checks either - those belong here. + * + * Both of these functions should attempt to copy size bytes starting at from + * into the area starting at to. They must not fetch or store anything + * outside of those areas. Return value must be between 0 (everything + * copied successfully) and size (nothing copied). + * + * If raw_copy_{to,from}_user(to, from, size) returns N, size - N bytes starting + * at to must become equal to the bytes fetched from the corresponding area + * starting at from. All data past to + size - N must be left unmodified. + * + * If copying succeeds, the return value must be 0. If some data cannot be + * fetched, it is permitted to copy less than had been fetched; the only + * hard requirement is that not storing anything at all (i.e. returning size) + * should happen only when nothing could be copied. In other words, you don't + * have to squeeze as much as possible - it is allowed, but not necessary. + * + * For raw_copy_from_user() to always points to kernel memory and no faults + * on store should happen. Interpretation of from is affected by set_fs(). + * For raw_copy_to_user() it's the other way round. + * + * Both can be inlined - it's up to architectures whether it wants to bother + * with that. They should not be used directly; they are used to implement + * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic()) + * that are used instead. Out of those, __... ones are inlined. Plain + * copy_{to,from}_user() might or might not be inlined. If you want them + * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER. + * + * NOTE: only copy_from_user() zero-pads the destination in case of short copy. + * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything + * at all; their callers absolutely must check the return value. + * + * Biarch ones should also provide raw_copy_in_user() - similar to the above, + * but both source and destination are __user pointers (affected by set_fs() + * as usual) and both source and destination can trigger faults. + */ + +static __always_inline unsigned long +__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) +{ + kasan_check_write(to, n); + check_object_size(to, n, false); + return raw_copy_from_user(to, from, n); +} + +static __always_inline unsigned long +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + might_fault(); + kasan_check_write(to, n); + check_object_size(to, n, false); + return raw_copy_from_user(to, from, n); +} + +/** + * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * The caller should also make sure he pins the user space address + * so that we don't result in page fault and sleep. + */ +static __always_inline unsigned long +__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) +{ + kasan_check_read(from, n); + check_object_size(from, n, true); + return raw_copy_to_user(to, from, n); +} + +static __always_inline unsigned long +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + kasan_check_read(from, n); + check_object_size(from, n, true); + return raw_copy_to_user(to, from, n); +} + +#ifdef INLINE_COPY_FROM_USER +static inline unsigned long +_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned long res = n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = raw_copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +#else +extern unsigned long +_copy_from_user(void *, const void __user *, unsigned long); +#endif + +#ifdef INLINE_COPY_TO_USER +static inline unsigned long +_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = raw_copy_to_user(to, from, n); + return n; +} +#else +extern unsigned long +_copy_to_user(void __user *, const void *, unsigned long); +#endif + +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} + +static __always_inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + int sz = __compiletime_object_size(to); + + might_fault(); + kasan_check_write(to, n); + + if (likely(sz < 0 || sz >= n)) { + check_object_size(to, n, false); + n = _copy_from_user(to, from, n); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); + + return n; +} + +static __always_inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + int sz = __compiletime_object_size(from); + + kasan_check_read(from, n); + might_fault(); + + if (likely(sz < 0 || sz >= n)) { + check_object_size(from, n, true); + n = _copy_to_user(to, from, n); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); + + return n; +} +#ifdef CONFIG_COMPAT +static __always_inline unsigned long __must_check +__copy_in_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + return raw_copy_in_user(to, from, n); +} +static __always_inline unsigned long __must_check +copy_in_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n)) + n = raw_copy_in_user(to, from, n); + return n; +} +#endif + static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; @@ -12,7 +203,6 @@ static __always_inline void pagefault_disabled_inc(void) static __always_inline void pagefault_disabled_dec(void) { current->pagefault_disabled--; - WARN_ON(current->pagefault_disabled < 0); } /* @@ -67,12 +257,6 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to, return __copy_from_user_inatomic(to, from, n); } -static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) -{ - return __copy_from_user(to, from, n); -} - #endif /* ARCH_HAS_NOCACHE_UACCESS */ /* diff --git a/include/linux/udp.h b/include/linux/udp.h index c0f530809d1f3db7323e51a52224eb49d8f97da0..6cb4061a720d2df5e5f9467de8269529195ce827 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -115,6 +115,6 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) -#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) +#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) #endif /* _LINUX_UDP_H */ diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 32c0e83d62397355e1b6bb804c1d0381f56daca1..3c85c81b00279708eb6f816c153e9e4e2f735c1e 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -23,11 +23,13 @@ struct uio_map; /** * struct uio_mem - description of a UIO memory region * @name: name of the memory region for identification - * @addr: address of the device's memory (phys_addr is used since - * addr can be logical, virtual, or physical & phys_addr_t - * should always be large enough to handle any of the - * address types) - * @size: size of IO + * @addr: address of the device's memory rounded to page + * size (phys_addr is used since addr can be + * logical, virtual, or physical & phys_addr_t + * should always be large enough to handle any of + * the address types) + * @offs: offset of device memory within the page + * @size: size of IO (multiple of page size) * @memtype: type of memory addr points to * @internal_addr: ioremap-ped version of addr, for driver internal use * @map: for use by the UIO core only. @@ -35,6 +37,7 @@ struct uio_map; struct uio_mem { const char *name; phys_addr_t addr; + unsigned long offs; resource_size_t size; int memtype; void __iomem *internal_addr; diff --git a/include/linux/usb.h b/include/linux/usb.h index 7e68259360dec55f4578f6203615b60faed5227c..cb9fbd54386e286f58b6332490a8bd8dcb78471e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -99,6 +99,76 @@ enum usb_interface_condition { USB_INTERFACE_UNBINDING, }; +int __must_check +usb_find_common_endpoints(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out); + +int __must_check +usb_find_common_endpoints_reverse(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out); + +static inline int __must_check +usb_find_bulk_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in) +{ + return usb_find_common_endpoints(alt, bulk_in, NULL, NULL, NULL); +} + +static inline int __must_check +usb_find_bulk_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_out) +{ + return usb_find_common_endpoints(alt, NULL, bulk_out, NULL, NULL); +} + +static inline int __must_check +usb_find_int_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_in) +{ + return usb_find_common_endpoints(alt, NULL, NULL, int_in, NULL); +} + +static inline int __must_check +usb_find_int_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_out) +{ + return usb_find_common_endpoints(alt, NULL, NULL, NULL, int_out); +} + +static inline int __must_check +usb_find_last_bulk_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in) +{ + return usb_find_common_endpoints_reverse(alt, bulk_in, NULL, NULL, NULL); +} + +static inline int __must_check +usb_find_last_bulk_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_out) +{ + return usb_find_common_endpoints_reverse(alt, NULL, bulk_out, NULL, NULL); +} + +static inline int __must_check +usb_find_last_int_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_in) +{ + return usb_find_common_endpoints_reverse(alt, NULL, NULL, int_in, NULL); +} + +static inline int __must_check +usb_find_last_int_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_out) +{ + return usb_find_common_endpoints_reverse(alt, NULL, NULL, NULL, int_out); +} + /** * struct usb_interface - what usb device drivers talk to * @altsetting: array of interface structures, one for each alternate @@ -248,7 +318,7 @@ void usb_put_intf(struct usb_interface *intf); * struct usb_interface (which persists only as long as its configuration * is installed). The altsetting arrays can be accessed through these * structures at any time, permitting comparison of configurations and - * providing support for the /proc/bus/usb/devices pseudo-file. + * providing support for the /sys/kernel/debug/usb/devices pseudo-file. */ struct usb_interface_cache { unsigned num_altsetting; /* number of alternate settings */ @@ -354,6 +424,7 @@ struct usb_devmap { */ struct usb_bus { struct device *controller; /* host/master side hardware */ + struct device *sysdev; /* as seen from firmware or bus */ int busnum; /* Bus number (in order of reg) */ const char *bus_name; /* stable id (PCI slot_name etc) */ u8 uses_dma; /* Does the host controller use DMA? */ diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 4616a49a1c2e58c61e3bc602fa214e0be40f8820..f665d2ceac20587a285428202a7ba17de23b5734 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -451,6 +451,7 @@ static inline struct usb_composite_driver *to_cdriver( * sure doing that won't hurt too much. * * One notion for how to handle Wireless USB devices involves: + * * (a) a second gadget here, discovery mechanism TBD, but likely * needing separate "register/unregister WUSB gadget" calls; * (b) updates to usb_gadget to include flags "is it wireless", @@ -503,8 +504,9 @@ struct usb_composite_dev { /* protects deactivations and delayed_status counts*/ spinlock_t lock; - unsigned setup_pending:1; - unsigned os_desc_pending:1; + /* public: */ + unsigned int setup_pending:1; + unsigned int os_desc_pending:1; }; extern int usb_string_id(struct usb_composite_dev *c); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index e4516e9ded0f437b73f171678833425d9f29a442..fbc22a39e7bc6898f3cc17cbc49b8b3f458697b0 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -188,7 +188,7 @@ struct usb_ep_caps { * @caps:The structure describing types and directions supported by endoint. * @maxpacket:The maximum packet size used on this endpoint. The initial * value can sometimes be reduced (hardware allowing), according to - * the endpoint descriptor used to configure the endpoint. + * the endpoint descriptor used to configure the endpoint. * @maxpacket_limit:The maximum packet size value which can be handled by this * endpoint. It's set once by UDC driver when endpoint is initialized, and * should not be changed. Should not be confused with maxpacket. diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 40edf6a8533e51fef324eb52b91d6038f74ab877..50398b69ca4497401b7fae3ebc716655003f408c 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -148,6 +148,7 @@ struct usb_hcd { unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ unsigned msix_enabled:1; /* driver has MSI-X enabled? */ + unsigned msi_enabled:1; /* driver has MSI enabled? */ unsigned remove_phy:1; /* auto-remove USB phy */ /* The next flag is a stopgap, to be removed when all the HCDs @@ -437,6 +438,9 @@ extern int usb_hcd_alloc_bandwidth(struct usb_device *udev, struct usb_host_interface *new_alt); extern int usb_hcd_get_frame_number(struct usb_device *udev); +struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, + struct device *sysdev, struct device *dev, const char *bus_name, + struct usb_hcd *primary_hcd); extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver, struct device *dev, const char *bus_name); extern struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, @@ -453,7 +457,7 @@ extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1); struct platform_device; extern void usb_hcd_platform_shutdown(struct platform_device *dev); -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI struct pci_dev; struct pci_device_id; extern int usb_hcd_pci_probe(struct pci_dev *dev, @@ -466,7 +470,7 @@ extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); #ifdef CONFIG_PM extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif -#endif /* CONFIG_PCI */ +#endif /* CONFIG_USB_PCI */ /* pci-ish (pdev null is ok) buffer alloc/mapping support */ void usb_init_pool_max(void); diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 5ff9032ee1b47b8cb8a7b969e93829ce63d1deb1..4031f47629ecf264113b3b3d402c6a78ce2a955a 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -18,6 +18,7 @@ int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); struct device_node *usb_of_get_child_node(struct device_node *parent, int portnum); +struct device *usb_of_get_companion_dev(struct device *dev); #else static inline enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0) @@ -38,6 +39,10 @@ static inline struct device_node *usb_of_get_child_node { return NULL; } +static inline struct device *usb_of_get_companion_dev(struct device *dev) +{ + return NULL; +} #endif #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_USB_SUPPORT) diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 7a0350535cb10146ffd4e1936da7dced6bc477ca..a0a8f878503c959442608d09e1f0495a6319e03a 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -21,21 +21,6 @@ #include #include -#undef VERBOSE - -#ifdef VERBOSE -#define VDBG(fmt, args...) pr_debug("[%s] " fmt , \ - __func__, ## args) -#else -#define VDBG(stuff...) do {} while (0) -#endif - -#ifdef VERBOSE -#define MPC_LOC printk("Current Location [%s]:[%d]\n", __FILE__, __LINE__) -#else -#define MPC_LOC do {} while (0) -#endif - #define PROTO_UNDEF (0) #define PROTO_HOST (1) #define PROTO_GADGET (2) diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 704a1ab8240ca124f29c5ce361c871090d28ea5b..e2f0ab07eea5bd4d6b49abb7c563d7816934394d 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -20,7 +20,7 @@ #include /* The maximum number of ports one device can grab at once */ -#define MAX_NUM_PORTS 8 +#define MAX_NUM_PORTS 16 /* parity check flag */ #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) @@ -159,10 +159,10 @@ struct usb_serial { unsigned char minors_reserved:1; unsigned char num_ports; unsigned char num_port_pointers; - char num_interrupt_in; - char num_interrupt_out; - char num_bulk_in; - char num_bulk_out; + unsigned char num_interrupt_in; + unsigned char num_interrupt_out; + unsigned char num_bulk_in; + unsigned char num_bulk_out; struct usb_serial_port *port[MAX_NUM_PORTS]; struct kref kref; struct mutex disc_mutex; @@ -181,6 +181,17 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) serial->private = data; } +struct usb_serial_endpoints { + unsigned char num_bulk_in; + unsigned char num_bulk_out; + unsigned char num_interrupt_in; + unsigned char num_interrupt_out; + struct usb_endpoint_descriptor *bulk_in[MAX_NUM_PORTS]; + struct usb_endpoint_descriptor *bulk_out[MAX_NUM_PORTS]; + struct usb_endpoint_descriptor *interrupt_in[MAX_NUM_PORTS]; + struct usb_endpoint_descriptor *interrupt_out[MAX_NUM_PORTS]; +}; + /** * usb_serial_driver - describes a usb serial driver * @description: pointer to a string that describes this driver. This string @@ -188,12 +199,17 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) * @id_table: pointer to a list of usb_device_id structures that define all * of the devices this structure can support. * @num_ports: the number of different ports this device will have. + * @num_bulk_in: minimum number of bulk-in endpoints + * @num_bulk_out: minimum number of bulk-out endpoints + * @num_interrupt_in: minimum number of interrupt-in endpoints + * @num_interrupt_out: minimum number of interrupt-out endpoints * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer * (0 = end-point size) * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size) * @calc_num_ports: pointer to a function to determine how many ports this - * device has dynamically. It will be called after the probe() - * callback is called, but before attach() + * device has dynamically. It can also be used to verify the number of + * endpoints or to modify the port-endpoint mapping. It will be called + * after the probe() callback is called, but before attach(). * @probe: pointer to the driver's probe function. * This will be called when the device is inserted into the system, * but before the device has been fully initialized by the usb_serial @@ -227,19 +243,26 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) struct usb_serial_driver { const char *description; const struct usb_device_id *id_table; - char num_ports; struct list_head driver_list; struct device_driver driver; struct usb_driver *usb_driver; struct usb_dynids dynids; + unsigned char num_ports; + + unsigned char num_bulk_in; + unsigned char num_bulk_out; + unsigned char num_interrupt_in; + unsigned char num_interrupt_out; + size_t bulk_in_size; size_t bulk_out_size; int (*probe)(struct usb_serial *serial, const struct usb_device_id *id); int (*attach)(struct usb_serial *serial); - int (*calc_num_ports) (struct usb_serial *serial); + int (*calc_num_ports)(struct usb_serial *serial, + struct usb_serial_endpoints *epds); void (*disconnect)(struct usb_serial *serial); void (*release)(struct usb_serial *serial); @@ -356,7 +379,6 @@ extern void usb_serial_handle_dcd_change(struct usb_serial_port *usb_port, extern int usb_serial_bus_register(struct usb_serial_driver *device); extern void usb_serial_bus_deregister(struct usb_serial_driver *device); -extern struct usb_serial_driver usb_serial_generic_device; extern struct bus_type usb_serial_bus_type; extern struct tty_driver *usb_serial_tty_driver; diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h new file mode 100644 index 0000000000000000000000000000000000000000..ec78204964ab806d1f807515e8976c385c661181 --- /dev/null +++ b/include/linux/usb/typec.h @@ -0,0 +1,243 @@ + +#ifndef __LINUX_USB_TYPEC_H +#define __LINUX_USB_TYPEC_H + +#include + +/* XXX: Once we have a header for USB Power Delivery, this belongs there */ +#define ALTMODE_MAX_MODES 6 + +/* USB Type-C Specification releases */ +#define USB_TYPEC_REV_1_0 0x100 /* 1.0 */ +#define USB_TYPEC_REV_1_1 0x110 /* 1.1 */ +#define USB_TYPEC_REV_1_2 0x120 /* 1.2 */ + +struct typec_altmode; +struct typec_partner; +struct typec_cable; +struct typec_plug; +struct typec_port; + +struct fwnode_handle; + +enum typec_port_type { + TYPEC_PORT_DFP, + TYPEC_PORT_UFP, + TYPEC_PORT_DRP, +}; + +enum typec_plug_type { + USB_PLUG_NONE, + USB_PLUG_TYPE_A, + USB_PLUG_TYPE_B, + USB_PLUG_TYPE_C, + USB_PLUG_CAPTIVE, +}; + +enum typec_data_role { + TYPEC_DEVICE, + TYPEC_HOST, +}; + +enum typec_role { + TYPEC_SINK, + TYPEC_SOURCE, +}; + +enum typec_pwr_opmode { + TYPEC_PWR_MODE_USB, + TYPEC_PWR_MODE_1_5A, + TYPEC_PWR_MODE_3_0A, + TYPEC_PWR_MODE_PD, +}; + +enum typec_accessory { + TYPEC_ACCESSORY_NONE, + TYPEC_ACCESSORY_AUDIO, + TYPEC_ACCESSORY_DEBUG, +}; + +#define TYPEC_MAX_ACCESSORY 3 + +/* + * struct usb_pd_identity - USB Power Delivery identity data + * @id_header: ID Header VDO + * @cert_stat: Cert Stat VDO + * @product: Product VDO + * + * USB power delivery Discover Identity command response data. + * + * REVISIT: This is USB Power Delivery specific information, so this structure + * probable belongs to USB Power Delivery header file once we have them. + */ +struct usb_pd_identity { + u32 id_header; + u32 cert_stat; + u32 product; +}; + +int typec_partner_set_identity(struct typec_partner *partner); +int typec_cable_set_identity(struct typec_cable *cable); + +/* + * struct typec_mode_desc - Individual Mode of an Alternate Mode + * @index: Index of the Mode within the SVID + * @vdo: VDO returned by Discover Modes USB PD command + * @desc: Optional human readable description of the mode + * @roles: Only for ports. DRP if the mode is available in both roles + * + * Description of a mode of an Alternate Mode which a connector, cable plug or + * partner supports. Every mode will have it's own sysfs group. The details are + * the VDO returned by discover modes command, description for the mode and + * active flag telling has the mode being entered or not. + */ +struct typec_mode_desc { + int index; + u32 vdo; + char *desc; + /* Only used with ports */ + enum typec_port_type roles; +}; + +/* + * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor + * @svid: Standard or Vendor ID + * @n_modes: Number of modes + * @modes: Array of modes supported by the Alternate Mode + * + * Representation of an Alternate Mode that has SVID assigned by USB-IF. The + * array of modes will list the modes of a particular SVID that are supported by + * a connector, partner of a cable plug. + */ +struct typec_altmode_desc { + u16 svid; + int n_modes; + struct typec_mode_desc modes[ALTMODE_MAX_MODES]; +}; + +struct typec_altmode +*typec_partner_register_altmode(struct typec_partner *partner, + struct typec_altmode_desc *desc); +struct typec_altmode +*typec_plug_register_altmode(struct typec_plug *plug, + struct typec_altmode_desc *desc); +struct typec_altmode +*typec_port_register_altmode(struct typec_port *port, + struct typec_altmode_desc *desc); +void typec_unregister_altmode(struct typec_altmode *altmode); + +struct typec_port *typec_altmode2port(struct typec_altmode *alt); + +void typec_altmode_update_active(struct typec_altmode *alt, int mode, + bool active); + +enum typec_plug_index { + TYPEC_PLUG_SOP_P, + TYPEC_PLUG_SOP_PP, +}; + +/* + * struct typec_plug_desc - USB Type-C Cable Plug Descriptor + * @index: SOP Prime for the plug connected to DFP and SOP Double Prime for the + * plug connected to UFP + * + * Represents USB Type-C Cable Plug. + */ +struct typec_plug_desc { + enum typec_plug_index index; +}; + +/* + * struct typec_cable_desc - USB Type-C Cable Descriptor + * @type: The plug type from USB PD Cable VDO + * @active: Is the cable active or passive + * @identity: Result of Discover Identity command + * + * Represents USB Type-C Cable attached to USB Type-C port. + */ +struct typec_cable_desc { + enum typec_plug_type type; + unsigned int active:1; + struct usb_pd_identity *identity; +}; + +/* + * struct typec_partner_desc - USB Type-C Partner Descriptor + * @usb_pd: USB Power Delivery support + * @accessory: Audio, Debug or none. + * @identity: Discover Identity command data + * + * Details about a partner that is attached to USB Type-C port. If @identity + * member exists when partner is registered, a directory named "identity" is + * created to sysfs for the partner device. + */ +struct typec_partner_desc { + unsigned int usb_pd:1; + enum typec_accessory accessory; + struct usb_pd_identity *identity; +}; + +/* + * struct typec_capability - USB Type-C Port Capabilities + * @role: DFP (Host-only), UFP (Device-only) or DRP (Dual Role) + * @revision: USB Type-C Specification release. Binary coded decimal + * @pd_revision: USB Power Delivery Specification revision if supported + * @prefer_role: Initial role preference + * @accessory: Supported Accessory Modes + * @fwnode: Optional fwnode of the port + * @try_role: Set data role preference for DRP port + * @dr_set: Set Data Role + * @pr_set: Set Power Role + * @vconn_set: Set VCONN Role + * @activate_mode: Enter/exit given Alternate Mode + * + * Static capabilities of a single USB Type-C port. + */ +struct typec_capability { + enum typec_port_type type; + u16 revision; /* 0120H = "1.2" */ + u16 pd_revision; /* 0300H = "3.0" */ + int prefer_role; + enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; + + struct fwnode_handle *fwnode; + + int (*try_role)(const struct typec_capability *, + int role); + + int (*dr_set)(const struct typec_capability *, + enum typec_data_role); + int (*pr_set)(const struct typec_capability *, + enum typec_role); + int (*vconn_set)(const struct typec_capability *, + enum typec_role); + + int (*activate_mode)(const struct typec_capability *, + int mode, int activate); +}; + +/* Specific to try_role(). Indicates the user want's to clear the preference. */ +#define TYPEC_NO_PREFERRED_ROLE (-1) + +struct typec_port *typec_register_port(struct device *parent, + const struct typec_capability *cap); +void typec_unregister_port(struct typec_port *port); + +struct typec_partner *typec_register_partner(struct typec_port *port, + struct typec_partner_desc *desc); +void typec_unregister_partner(struct typec_partner *partner); + +struct typec_cable *typec_register_cable(struct typec_port *port, + struct typec_cable_desc *desc); +void typec_unregister_cable(struct typec_cable *cable); + +struct typec_plug *typec_register_plug(struct typec_cable *cable, + struct typec_plug_desc *desc); +void typec_unregister_plug(struct typec_plug *plug); + +void typec_set_data_role(struct typec_port *port, enum typec_data_role role); +void typec_set_pwr_role(struct typec_port *port, enum typec_role role); +void typec_set_vconn_role(struct typec_port *port, enum typec_role role); +void typec_set_pwr_opmode(struct typec_port *port, enum typec_pwr_opmode mode); + +#endif /* __LINUX_USB_TYPEC_H */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 6e0ce8c7b8cb5a9fcb985a5a5078f82267d03092..97116379db5ff6a850e078c35047fe995f45b265 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -64,6 +64,8 @@ struct usbnet { struct usb_anchor deferred; struct tasklet_struct bh; + struct pcpu_sw_netstats __percpu *stats64; + struct work_struct kevent; unsigned long flags; # define EVENT_TX_HALT 0 @@ -204,6 +206,7 @@ struct cdc_state { }; extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *); +extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf); extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); @@ -261,10 +264,10 @@ extern void usbnet_pause_rx(struct usbnet *); extern void usbnet_resume_rx(struct usbnet *); extern void usbnet_purge_paused_rxq(struct usbnet *); -extern int usbnet_get_settings(struct net_device *net, - struct ethtool_cmd *cmd); -extern int usbnet_set_settings(struct net_device *net, - struct ethtool_cmd *cmd); +extern int usbnet_get_link_ksettings(struct net_device *net, + struct ethtool_link_ksettings *cmd); +extern int usbnet_set_link_ksettings(struct net_device *net, + const struct ethtool_link_ksettings *cmd); extern u32 usbnet_get_link(struct net_device *net); extern u32 usbnet_get_msglevel(struct net_device *); extern void usbnet_set_msglevel(struct net_device *, u32); @@ -278,5 +281,7 @@ extern int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags); extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); +extern void usbnet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); #endif /* __LINUX_USB_USBNET_H */ diff --git a/include/linux/usb/xhci-dbgp.h b/include/linux/usb/xhci-dbgp.h new file mode 100644 index 0000000000000000000000000000000000000000..80c1cca1f52976117a4f62928bd56d23fb191f5d --- /dev/null +++ b/include/linux/usb/xhci-dbgp.h @@ -0,0 +1,29 @@ +/* + * Standalone xHCI debug capability driver + * + * Copyright (C) 2016 Intel Corporation + * + * Author: Lu Baolu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_XHCI_DBGP_H +#define __LINUX_XHCI_DBGP_H + +#ifdef CONFIG_EARLY_PRINTK_USB_XDBC +int __init early_xdbc_parse_parameter(char *s); +int __init early_xdbc_setup_hardware(void); +void __init early_xdbc_register_console(void); +#else +static inline int __init early_xdbc_setup_hardware(void) +{ + return -ENODEV; +} +static inline void __init early_xdbc_register_console(void) +{ +} +#endif /* CONFIG_EARLY_PRINTK_USB_XDBC */ +#endif /* __LINUX_XHCI_DBGP_H */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 7edfbdb55a995d436bf9e999ce202d0ca0bf2550..28b0e965360ff1822a22252b1ecaaab52ca8ab2d 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -44,6 +44,12 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_inbuf_ctx(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + void *ctx, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, @@ -59,6 +65,9 @@ bool virtqueue_notify(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); +void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, + void **ctx); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); @@ -156,9 +165,13 @@ int virtio_device_restore(struct virtio_device *dev); * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. * @probe: the function to call when a device is found. Returns 0 or -errno. + * @scan: optional function to call after successful probe; intended + * for virtio-scsi to invoke a scan. * @remove: the function to call when a device is removed. * @config_changed: optional function to call when the device configuration * changes; may be called in interrupt context. + * @freeze: optional function to call during suspend/hibernation. + * @restore: optional function to call on resume. */ struct virtio_driver { struct device_driver driver; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8355bab175e1d8fb27ac9e0860465ba8afc36076..0133d8a12ccd468514a72fdf8eff64aa7d69551d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -72,7 +72,8 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc); + const char * const names[], const bool *ctx, + struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -173,12 +174,32 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, + NULL); if (err < 0) return ERR_PTR(err); return vq; } +static inline +int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], + struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); +} + +static inline +int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); +} + /** * virtio_device_ready - enable vq use in probe function * @vdev: the device diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e8d36938f09a56cfe4729c17b0ee8175b4991bbe..270cfa81830ee4a69bca0c4ccf914a7a94e5b3e1 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -71,6 +71,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, + bool ctx, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name); @@ -80,6 +81,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring vring, struct virtio_device *vdev, bool weak_barriers, + bool ctx, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name); @@ -93,6 +95,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool ctx, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 584f9a647ad4acca191ff6116a47c14da1385fa3..ab13f0743da8ed095edb3be854f5c4430e828728 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -153,5 +153,6 @@ void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); +void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt); #endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index a80b7b59cf33418811217faca1b9c6b041dad814..d84ae90ccd5c40de69de9217aa7c4e23e4c26be8 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -25,7 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), FOR_ALL_ZONES(ALLOCSTALL), FOR_ALL_ZONES(PGSCAN_SKIP), - PGFREE, PGACTIVATE, PGDEACTIVATE, + PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, PGFAULT, PGMAJFAULT, PGLAZYFREED, PGREFILL, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d68edffbf142cec5f8987a7907b6f2e76cefdfd7..2d92dd002abdaf823d1676be3c1ed5315d18342b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -80,6 +80,17 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +#ifndef CONFIG_MMU +extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); +static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, + gfp_t flags, void *caller) +{ + return __vmalloc_node_flags(size, node, flags); +} +#else +extern void *__vmalloc_node_flags_caller(unsigned long size, + int node, gfp_t flags, void *caller); +#endif extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/include/linux/vme.h b/include/linux/vme.h index ec5e8bf6118e7b1d99be2a5d0293c9a9babe6b1e..25874da3f2e17965879b44aab7b08fb0ad62af89 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -92,7 +92,7 @@ extern struct bus_type vme_bus_type; #define VME_SLOT_ALL -2 /** - * Structure representing a VME device + * struct vme_dev - Structure representing a VME device * @num: The device number * @bridge: Pointer to the bridge device this device is on * @dev: Internal device structure @@ -107,6 +107,16 @@ struct vme_dev { struct list_head bridge_list; }; +/** + * struct vme_driver - Structure representing a VME driver + * @name: Driver name, should be unique among VME drivers and usually the same + * as the module name. + * @match: Callback used to determine whether probe should be run. + * @probe: Callback for device binding, called when new device is detected. + * @remove: Callback, called on device removal. + * @driver: Underlying generic device driver structure. + * @devices: List of VME devices (struct vme_dev) associated with this driver. + */ struct vme_driver { const char *name; int (*match)(struct vme_dev *); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bde063cefd047c1bb42ac02a02522ae88863c723..c102ef65cb64a94269b950ff09c47d90bbc2d315 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -608,8 +608,13 @@ static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } +static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} #else long work_on_cpu(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a3c0cbd7c88824a297cd725e36c931c223c3e106..d5815794416c9b8430c7ca9153e3be3271a76899 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -237,6 +237,7 @@ static inline void inode_attach_wb(struct inode *inode, struct page *page) static inline void inode_detach_wb(struct inode *inode) { if (inode->i_wb) { + WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); wb_put(inode->i_wb); inode->i_wb = NULL; } diff --git a/include/media/cec-edid.h b/include/media/cec-edid.h deleted file mode 100644 index bdf731ecba1a760438a3f4bcf4ccf6d91b9fb90e..0000000000000000000000000000000000000000 --- a/include/media/cec-edid.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * cec-edid - HDMI Consumer Electronics Control & EDID helpers - * - * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved. - * - * This program is free software; you may redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _MEDIA_CEC_EDID_H -#define _MEDIA_CEC_EDID_H - -#include - -#define CEC_PHYS_ADDR_INVALID 0xffff -#define cec_phys_addr_exp(pa) \ - ((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf - -/** - * cec_get_edid_phys_addr() - find and return the physical address - * - * @edid: pointer to the EDID data - * @size: size in bytes of the EDID data - * @offset: If not %NULL then the location of the physical address - * bytes in the EDID will be returned here. This is set to 0 - * if there is no physical address found. - * - * Return: the physical address or CEC_PHYS_ADDR_INVALID if there is none. - */ -u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, - unsigned int *offset); - -/** - * cec_set_edid_phys_addr() - find and set the physical address - * - * @edid: pointer to the EDID data - * @size: size in bytes of the EDID data - * @phys_addr: the new physical address - * - * This function finds the location of the physical address in the EDID - * and fills in the given physical address and updates the checksum - * at the end of the EDID block. It does nothing if the EDID doesn't - * contain a physical address. - */ -void cec_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr); - -/** - * cec_phys_addr_for_input() - calculate the PA for an input - * - * @phys_addr: the physical address of the parent - * @input: the number of the input port, must be between 1 and 15 - * - * This function calculates a new physical address based on the input - * port number. For example: - * - * PA = 0.0.0.0 and input = 2 becomes 2.0.0.0 - * - * PA = 3.0.0.0 and input = 1 becomes 3.1.0.0 - * - * PA = 3.2.1.0 and input = 5 becomes 3.2.1.5 - * - * PA = 3.2.1.3 and input = 5 becomes f.f.f.f since it maxed out the depth. - * - * Return: the new physical address or CEC_PHYS_ADDR_INVALID. - */ -u16 cec_phys_addr_for_input(u16 phys_addr, u8 input); - -/** - * cec_phys_addr_validate() - validate a physical address from an EDID - * - * @phys_addr: the physical address to validate - * @parent: if not %NULL, then this is filled with the parents PA. - * @port: if not %NULL, then this is filled with the input port. - * - * This validates a physical address as read from an EDID. If the - * PA is invalid (such as 1.0.1.0 since '0' is only allowed at the end), - * then it will return -EINVAL. - * - * The parent PA is passed into %parent and the input port is passed into - * %port. For example: - * - * PA = 0.0.0.0: has parent 0.0.0.0 and input port 0. - * - * PA = 1.0.0.0: has parent 0.0.0.0 and input port 1. - * - * PA = 3.2.0.0: has parent 3.0.0.0 and input port 2. - * - * PA = f.f.f.f: has parent f.f.f.f and input port 0. - * - * Return: 0 if the PA is valid, -EINVAL if not. - */ -int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port); - -#endif /* _MEDIA_CEC_EDID_H */ diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h new file mode 100644 index 0000000000000000000000000000000000000000..298f996969df632ba41b6c68f3d815678c89e079 --- /dev/null +++ b/include/media/cec-notifier.h @@ -0,0 +1,121 @@ +/* + * cec-notifier.h - notify CEC drivers of physical address changes + * + * Copyright 2016 Russell King + * Copyright 2016-2017 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef LINUX_CEC_NOTIFIER_H +#define LINUX_CEC_NOTIFIER_H + +#include +#include + +struct device; +struct edid; +struct cec_adapter; +struct cec_notifier; + +#if IS_REACHABLE(CONFIG_CEC_CORE) && IS_ENABLED(CONFIG_CEC_NOTIFIER) + +/** + * cec_notifier_get - find or create a new cec_notifier for the given device. + * @dev: device that sends the events. + * + * If a notifier for device @dev already exists, then increase the refcount + * and return that notifier. + * + * If it doesn't exist, then allocate a new notifier struct and return a + * pointer to that new struct. + * + * Return NULL if the memory could not be allocated. + */ +struct cec_notifier *cec_notifier_get(struct device *dev); + +/** + * cec_notifier_put - decrease refcount and delete when the refcount reaches 0. + * @n: notifier + */ +void cec_notifier_put(struct cec_notifier *n); + +/** + * cec_notifier_set_phys_addr - set a new physical address. + * @n: the CEC notifier + * @pa: the CEC physical address + * + * Set a new CEC physical address. + */ +void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa); + +/** + * cec_notifier_set_phys_addr_from_edid - set parse the PA from the EDID. + * @n: the CEC notifier + * @edid: the struct edid pointer + * + * Parses the EDID to obtain the new CEC physical address and set it. + */ +void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n, + const struct edid *edid); + +/** + * cec_notifier_register - register a callback with the notifier + * @n: the CEC notifier + * @adap: the CEC adapter, passed as argument to the callback function + * @callback: the callback function + */ +void cec_notifier_register(struct cec_notifier *n, + struct cec_adapter *adap, + void (*callback)(struct cec_adapter *adap, u16 pa)); + +/** + * cec_notifier_unregister - unregister the callback from the notifier. + * @n: the CEC notifier + */ +void cec_notifier_unregister(struct cec_notifier *n); + +#else +static inline struct cec_notifier *cec_notifier_get(struct device *dev) +{ + /* A non-NULL pointer is expected on success */ + return (struct cec_notifier *)0xdeadfeed; +} + +static inline void cec_notifier_put(struct cec_notifier *n) +{ +} + +static inline void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa) +{ +} + +static inline void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n, + const struct edid *edid) +{ +} + +static inline void cec_notifier_register(struct cec_notifier *n, + struct cec_adapter *adap, + void (*callback)(struct cec_adapter *adap, u16 pa)) +{ +} + +static inline void cec_notifier_unregister(struct cec_notifier *n) +{ +} + +#endif + +#endif diff --git a/include/media/cec.h b/include/media/cec.h index 96a0aa770d61ddd843fc418f7ae3df4b4df41f74..201f060978da2d5f8f49ad3fb31ac514c55e60c2 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include /** * struct cec_devnode - cec device node @@ -173,6 +173,10 @@ struct cec_adapter { bool passthrough; struct cec_log_addrs log_addrs; +#ifdef CONFIG_CEC_NOTIFIER + struct cec_notifier *notifier; +#endif + struct dentry *cec_dir; struct dentry *status_file; @@ -184,6 +188,11 @@ struct cec_adapter { char input_drv[32]; }; +static inline void *cec_get_drvdata(const struct cec_adapter *adap) +{ + return adap->priv; +} + static inline bool cec_has_log_addr(const struct cec_adapter *adap, u8 log_addr) { return adap->log_addrs.log_addr_mask & (1 << log_addr); @@ -194,7 +203,10 @@ static inline bool cec_is_sink(const struct cec_adapter *adap) return adap->phys_addr == 0; } -#if IS_ENABLED(CONFIG_MEDIA_CEC_SUPPORT) +#define cec_phys_addr_exp(pa) \ + ((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf + +#if IS_REACHABLE(CONFIG_CEC_CORE) struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops, void *priv, const char *name, u32 caps, u8 available_las); int cec_register_adapter(struct cec_adapter *adap, struct device *parent); @@ -213,6 +225,86 @@ void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt, u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt); void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg); +/** + * cec_get_edid_phys_addr() - find and return the physical address + * + * @edid: pointer to the EDID data + * @size: size in bytes of the EDID data + * @offset: If not %NULL then the location of the physical address + * bytes in the EDID will be returned here. This is set to 0 + * if there is no physical address found. + * + * Return: the physical address or CEC_PHYS_ADDR_INVALID if there is none. + */ +u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, + unsigned int *offset); + +/** + * cec_set_edid_phys_addr() - find and set the physical address + * + * @edid: pointer to the EDID data + * @size: size in bytes of the EDID data + * @phys_addr: the new physical address + * + * This function finds the location of the physical address in the EDID + * and fills in the given physical address and updates the checksum + * at the end of the EDID block. It does nothing if the EDID doesn't + * contain a physical address. + */ +void cec_set_edid_phys_addr(u8 *edid, unsigned int size, u16 phys_addr); + +/** + * cec_phys_addr_for_input() - calculate the PA for an input + * + * @phys_addr: the physical address of the parent + * @input: the number of the input port, must be between 1 and 15 + * + * This function calculates a new physical address based on the input + * port number. For example: + * + * PA = 0.0.0.0 and input = 2 becomes 2.0.0.0 + * + * PA = 3.0.0.0 and input = 1 becomes 3.1.0.0 + * + * PA = 3.2.1.0 and input = 5 becomes 3.2.1.5 + * + * PA = 3.2.1.3 and input = 5 becomes f.f.f.f since it maxed out the depth. + * + * Return: the new physical address or CEC_PHYS_ADDR_INVALID. + */ +u16 cec_phys_addr_for_input(u16 phys_addr, u8 input); + +/** + * cec_phys_addr_validate() - validate a physical address from an EDID + * + * @phys_addr: the physical address to validate + * @parent: if not %NULL, then this is filled with the parents PA. + * @port: if not %NULL, then this is filled with the input port. + * + * This validates a physical address as read from an EDID. If the + * PA is invalid (such as 1.0.1.0 since '0' is only allowed at the end), + * then it will return -EINVAL. + * + * The parent PA is passed into %parent and the input port is passed into + * %port. For example: + * + * PA = 0.0.0.0: has parent 0.0.0.0 and input port 0. + * + * PA = 1.0.0.0: has parent 0.0.0.0 and input port 1. + * + * PA = 3.2.0.0: has parent 3.0.0.0 and input port 2. + * + * PA = f.f.f.f: has parent f.f.f.f and input port 0. + * + * Return: 0 if the PA is valid, -EINVAL if not. + */ +int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port); + +#ifdef CONFIG_CEC_NOTIFIER +void cec_register_cec_notifier(struct cec_adapter *adap, + struct cec_notifier *notifier); +#endif + #else static inline int cec_register_adapter(struct cec_adapter *adap, @@ -234,6 +326,29 @@ static inline void cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, { } +static inline u16 cec_get_edid_phys_addr(const u8 *edid, unsigned int size, + unsigned int *offset) +{ + if (offset) + *offset = 0; + return CEC_PHYS_ADDR_INVALID; +} + +static inline void cec_set_edid_phys_addr(u8 *edid, unsigned int size, + u16 phys_addr) +{ +} + +static inline u16 cec_phys_addr_for_input(u16 phys_addr, u8 input) +{ + return CEC_PHYS_ADDR_INVALID; +} + +static inline int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port) +{ + return 0; +} + #endif #endif /* _MEDIA_CEC_H */ diff --git a/include/media/davinci/vpif_types.h b/include/media/davinci/vpif_types.h index c49c306cba6177fc21e5f9765059a98935bddc9a..385597da20dc3791cfa0c0f29f9b9c53bcc6516d 100644 --- a/include/media/davinci/vpif_types.h +++ b/include/media/davinci/vpif_types.h @@ -53,6 +53,7 @@ struct vpif_display_config { int (*set_clock)(int, int); struct vpif_subdev_info *subdevinfo; int subdev_count; + int i2c_adapter_id; struct vpif_display_chan_config chan_config[VPIF_DISPLAY_MAX_CHANNELS]; const char *card_name; struct v4l2_async_subdev **asd; /* Flat array, arranged in groups */ diff --git a/include/media/rc-map.h b/include/media/rc-map.h index a704749280d2342d10f64fbd01b2b9a90604a045..1a815a572fa184a22cdf5c3240bea59f0514de4d 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -27,7 +27,8 @@ * @RC_TYPE_NECX: Extended NEC protocol * @RC_TYPE_NEC32: NEC 32 bit protocol * @RC_TYPE_SANYO: Sanyo protocol - * @RC_TYPE_MCE_KBD: RC6-ish MCE keyboard/mouse + * @RC_TYPE_MCIR2_KBD: RC6-ish MCE keyboard + * @RC_TYPE_MCIR2_MSE: RC6-ish MCE mouse * @RC_TYPE_RC6_0: Philips RC6-0-16 protocol * @RC_TYPE_RC6_6A_20: Philips RC6-6A-20 protocol * @RC_TYPE_RC6_6A_24: Philips RC6-6A-24 protocol @@ -51,48 +52,51 @@ enum rc_type { RC_TYPE_NECX = 10, RC_TYPE_NEC32 = 11, RC_TYPE_SANYO = 12, - RC_TYPE_MCE_KBD = 13, - RC_TYPE_RC6_0 = 14, - RC_TYPE_RC6_6A_20 = 15, - RC_TYPE_RC6_6A_24 = 16, - RC_TYPE_RC6_6A_32 = 17, - RC_TYPE_RC6_MCE = 18, - RC_TYPE_SHARP = 19, - RC_TYPE_XMP = 20, - RC_TYPE_CEC = 21, + RC_TYPE_MCIR2_KBD = 13, + RC_TYPE_MCIR2_MSE = 14, + RC_TYPE_RC6_0 = 15, + RC_TYPE_RC6_6A_20 = 16, + RC_TYPE_RC6_6A_24 = 17, + RC_TYPE_RC6_6A_32 = 18, + RC_TYPE_RC6_MCE = 19, + RC_TYPE_SHARP = 20, + RC_TYPE_XMP = 21, + RC_TYPE_CEC = 22, }; #define RC_BIT_NONE 0ULL -#define RC_BIT_UNKNOWN (1ULL << RC_TYPE_UNKNOWN) -#define RC_BIT_OTHER (1ULL << RC_TYPE_OTHER) -#define RC_BIT_RC5 (1ULL << RC_TYPE_RC5) -#define RC_BIT_RC5X_20 (1ULL << RC_TYPE_RC5X_20) -#define RC_BIT_RC5_SZ (1ULL << RC_TYPE_RC5_SZ) -#define RC_BIT_JVC (1ULL << RC_TYPE_JVC) -#define RC_BIT_SONY12 (1ULL << RC_TYPE_SONY12) -#define RC_BIT_SONY15 (1ULL << RC_TYPE_SONY15) -#define RC_BIT_SONY20 (1ULL << RC_TYPE_SONY20) -#define RC_BIT_NEC (1ULL << RC_TYPE_NEC) -#define RC_BIT_NECX (1ULL << RC_TYPE_NECX) -#define RC_BIT_NEC32 (1ULL << RC_TYPE_NEC32) -#define RC_BIT_SANYO (1ULL << RC_TYPE_SANYO) -#define RC_BIT_MCE_KBD (1ULL << RC_TYPE_MCE_KBD) -#define RC_BIT_RC6_0 (1ULL << RC_TYPE_RC6_0) -#define RC_BIT_RC6_6A_20 (1ULL << RC_TYPE_RC6_6A_20) -#define RC_BIT_RC6_6A_24 (1ULL << RC_TYPE_RC6_6A_24) -#define RC_BIT_RC6_6A_32 (1ULL << RC_TYPE_RC6_6A_32) -#define RC_BIT_RC6_MCE (1ULL << RC_TYPE_RC6_MCE) -#define RC_BIT_SHARP (1ULL << RC_TYPE_SHARP) -#define RC_BIT_XMP (1ULL << RC_TYPE_XMP) -#define RC_BIT_CEC (1ULL << RC_TYPE_CEC) +#define RC_BIT_UNKNOWN BIT_ULL(RC_TYPE_UNKNOWN) +#define RC_BIT_OTHER BIT_ULL(RC_TYPE_OTHER) +#define RC_BIT_RC5 BIT_ULL(RC_TYPE_RC5) +#define RC_BIT_RC5X_20 BIT_ULL(RC_TYPE_RC5X_20) +#define RC_BIT_RC5_SZ BIT_ULL(RC_TYPE_RC5_SZ) +#define RC_BIT_JVC BIT_ULL(RC_TYPE_JVC) +#define RC_BIT_SONY12 BIT_ULL(RC_TYPE_SONY12) +#define RC_BIT_SONY15 BIT_ULL(RC_TYPE_SONY15) +#define RC_BIT_SONY20 BIT_ULL(RC_TYPE_SONY20) +#define RC_BIT_NEC BIT_ULL(RC_TYPE_NEC) +#define RC_BIT_NECX BIT_ULL(RC_TYPE_NECX) +#define RC_BIT_NEC32 BIT_ULL(RC_TYPE_NEC32) +#define RC_BIT_SANYO BIT_ULL(RC_TYPE_SANYO) +#define RC_BIT_MCIR2_KBD BIT_ULL(RC_TYPE_MCIR2_KBD) +#define RC_BIT_MCIR2_MSE BIT_ULL(RC_TYPE_MCIR2_MSE) +#define RC_BIT_RC6_0 BIT_ULL(RC_TYPE_RC6_0) +#define RC_BIT_RC6_6A_20 BIT_ULL(RC_TYPE_RC6_6A_20) +#define RC_BIT_RC6_6A_24 BIT_ULL(RC_TYPE_RC6_6A_24) +#define RC_BIT_RC6_6A_32 BIT_ULL(RC_TYPE_RC6_6A_32) +#define RC_BIT_RC6_MCE BIT_ULL(RC_TYPE_RC6_MCE) +#define RC_BIT_SHARP BIT_ULL(RC_TYPE_SHARP) +#define RC_BIT_XMP BIT_ULL(RC_TYPE_XMP) +#define RC_BIT_CEC BIT_ULL(RC_TYPE_CEC) #define RC_BIT_ALL (RC_BIT_UNKNOWN | RC_BIT_OTHER | \ RC_BIT_RC5 | RC_BIT_RC5X_20 | RC_BIT_RC5_SZ | \ RC_BIT_JVC | \ RC_BIT_SONY12 | RC_BIT_SONY15 | RC_BIT_SONY20 | \ RC_BIT_NEC | RC_BIT_NECX | RC_BIT_NEC32 | \ - RC_BIT_SANYO | RC_BIT_MCE_KBD | RC_BIT_RC6_0 | \ - RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ + RC_BIT_SANYO | \ + RC_BIT_MCIR2_KBD | RC_BIT_MCIR2_MSE | \ + RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | RC_BIT_SHARP | \ RC_BIT_XMP | RC_BIT_CEC) /* All rc protocols for which we have decoders */ @@ -101,8 +105,8 @@ enum rc_type { RC_BIT_JVC | \ RC_BIT_SONY12 | RC_BIT_SONY15 | RC_BIT_SONY20 | \ RC_BIT_NEC | RC_BIT_NECX | RC_BIT_NEC32 | \ - RC_BIT_SANYO | RC_BIT_MCE_KBD | RC_BIT_RC6_0 | \ - RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ + RC_BIT_SANYO | RC_BIT_MCIR2_KBD | RC_BIT_MCIR2_MSE | \ + RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | RC_BIT_SHARP | \ RC_BIT_XMP) @@ -111,7 +115,7 @@ enum rc_type { RC_BIT_JVC | \ RC_BIT_SONY12 | RC_BIT_SONY15 | RC_BIT_SONY20 | \ RC_BIT_NEC | RC_BIT_NECX | RC_BIT_NEC32 | \ - RC_BIT_SANYO | \ + RC_BIT_SANYO | RC_BIT_MCIR2_KBD | RC_BIT_MCIR2_MSE | \ RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | \ RC_BIT_SHARP) @@ -255,7 +259,6 @@ struct rc_map *rc_map_get(const char *name); #define RC_MAP_KWORLD_PC150U "rc-kworld-pc150u" #define RC_MAP_KWORLD_PLUS_TV_ANALOG "rc-kworld-plus-tv-analog" #define RC_MAP_LEADTEK_Y04G0051 "rc-leadtek-y04g0051" -#define RC_MAP_LIRC "rc-lirc" #define RC_MAP_LME2510 "rc-lme2510" #define RC_MAP_MANLI "rc-manli" #define RC_MAP_MEDION_X10 "rc-medion-x10" diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h index 1a15c3e4efd387ce5641946aaa67183ba5daa3f7..4d8cb0796bc60d637cd69a7381cb93e1318a723d 100644 --- a/include/media/soc_camera.h +++ b/include/media/soc_camera.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -55,10 +54,7 @@ struct soc_camera_device { /* Asynchronous subdevice management */ struct soc_camera_async_client *sasc; /* video buffer queue */ - union { - struct videobuf_queue vb_vidq; - struct vb2_queue vb2_vidq; - }; + struct vb2_queue vb2_vidq; }; /* Host supports programmable stride */ @@ -114,11 +110,8 @@ struct soc_camera_host_ops { int (*set_liveselection)(struct soc_camera_device *, struct v4l2_selection *); int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *); int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *); - void (*init_videobuf)(struct videobuf_queue *, - struct soc_camera_device *); int (*init_videobuf2)(struct vb2_queue *, struct soc_camera_device *); - int (*reqbufs)(struct soc_camera_device *, struct v4l2_requestbuffers *); int (*querycap)(struct soc_camera_host *, struct v4l2_capability *); int (*set_bus_param)(struct soc_camera_device *); int (*get_parm)(struct soc_camera_device *, struct v4l2_streamparm *); @@ -396,11 +389,6 @@ static inline struct soc_camera_device *soc_camera_from_vb2q(const struct vb2_qu return container_of(vq, struct soc_camera_device, vb2_vidq); } -static inline struct soc_camera_device *soc_camera_from_vbq(const struct videobuf_queue *vq) -{ - return container_of(vq, struct soc_camera_device, vb_vidq); -} - static inline u32 soc_camera_grp_id(const struct soc_camera_device *icd) { return (icd->iface << 8) | (icd->devnum + 1); diff --git a/include/media/tveeprom.h b/include/media/tveeprom.h index c56501ee0484a1a1e0b7c6720499195637756b3d..630bcf3d8885b9e1eaf9da2a77b076c4a13c4d4e 100644 --- a/include/media/tveeprom.h +++ b/include/media/tveeprom.h @@ -94,13 +94,12 @@ struct tveeprom { * of the eeprom previously filled at * @eeprom_data field. * - * @c: I2C client struct * @tvee: Struct to where the eeprom parsed data will be filled; * @eeprom_data: Array with the contents of the eeprom_data. It should * contain 256 bytes filled with the contents of the * eeprom read from the Hauppauge device. */ -void tveeprom_hauppauge_analog(struct i2c_client *c, struct tveeprom *tvee, +void tveeprom_hauppauge_analog(struct tveeprom *tvee, unsigned char *eeprom_data); /** diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index 6cd94e5ee113f0f5314f28e69e136bc9e35a4e9b..bd5312118013d3fbd2d88c698d15139d0b9155da 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -44,6 +44,9 @@ struct v4l2_fh; * @vidioc_enum_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_ENUM_FMT ` ioctl logic * for Software Defined Radio output + * @vidioc_enum_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_ENUM_FMT ` ioctl logic + * for metadata capture * @vidioc_g_fmt_vid_cap: pointer to the function that implements * :ref:`VIDIOC_G_FMT ` ioctl logic for video capture * in single plane mode @@ -74,6 +77,8 @@ struct v4l2_fh; * @vidioc_g_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_G_FMT ` ioctl logic for Software Defined * Radio output + * @vidioc_g_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_G_FMT ` ioctl logic for metadata capture * @vidioc_s_fmt_vid_cap: pointer to the function that implements * :ref:`VIDIOC_S_FMT ` ioctl logic for video capture * in single plane mode @@ -104,6 +109,8 @@ struct v4l2_fh; * @vidioc_s_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_S_FMT ` ioctl logic for Software Defined * Radio output + * @vidioc_s_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_S_FMT ` ioctl logic for metadata capture * @vidioc_try_fmt_vid_cap: pointer to the function that implements * :ref:`VIDIOC_TRY_FMT ` ioctl logic for video capture * in single plane mode @@ -136,6 +143,8 @@ struct v4l2_fh; * @vidioc_try_fmt_sdr_out: pointer to the function that implements * :ref:`VIDIOC_TRY_FMT ` ioctl logic for Software Defined * Radio output + * @vidioc_try_fmt_meta_cap: pointer to the function that implements + * :ref:`VIDIOC_TRY_FMT ` ioctl logic for metadata capture * @vidioc_reqbufs: pointer to the function that implements * :ref:`VIDIOC_REQBUFS ` ioctl * @vidioc_querybuf: pointer to the function that implements @@ -306,6 +315,8 @@ struct v4l2_ioctl_ops { struct v4l2_fmtdesc *f); int (*vidioc_enum_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_fmtdesc *f); + int (*vidioc_enum_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_fmtdesc *f); /* VIDIOC_G_FMT handlers */ int (*vidioc_g_fmt_vid_cap)(struct file *file, void *fh, @@ -332,6 +343,8 @@ struct v4l2_ioctl_ops { struct v4l2_format *f); int (*vidioc_g_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_format *f); + int (*vidioc_g_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_format *f); /* VIDIOC_S_FMT handlers */ int (*vidioc_s_fmt_vid_cap)(struct file *file, void *fh, @@ -358,6 +371,8 @@ struct v4l2_ioctl_ops { struct v4l2_format *f); int (*vidioc_s_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_format *f); + int (*vidioc_s_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_format *f); /* VIDIOC_TRY_FMT handlers */ int (*vidioc_try_fmt_vid_cap)(struct file *file, void *fh, @@ -384,6 +399,8 @@ struct v4l2_ioctl_ops { struct v4l2_format *f); int (*vidioc_try_fmt_sdr_out)(struct file *file, void *fh, struct v4l2_format *f); + int (*vidioc_try_fmt_meta_cap)(struct file *file, void *fh, + struct v4l2_format *f); /* Buffer handlers */ int (*vidioc_reqbufs)(struct file *file, void *fh, diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index ac5898a55fd967e6c051f8e837f9bdb6425752fb..cb97c224be730f5e3fd7076fd681e66b56df64e5 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -305,16 +305,16 @@ struct vb2_buffer { * buffer in \*num_planes, the size of each plane should be * set in the sizes\[\] array and optional per-plane * allocator specific device in the alloc_devs\[\] array. - * When called from VIDIOC_REQBUFS,() \*num_planes == 0, + * When called from VIDIOC_REQBUFS(), \*num_planes == 0, * the driver has to use the currently configured format to * determine the plane sizes and \*num_buffers is the total * number of buffers that are being allocated. When called - * from VIDIOC_CREATE_BUFS,() \*num_planes != 0 and it + * from VIDIOC_CREATE_BUFS(), \*num_planes != 0 and it * describes the requested number of planes and sizes\[\] - * contains the requested plane sizes. If either - * \*num_planes or the requested sizes are invalid callback - * must return %-EINVAL. In this case \*num_buffers are - * being allocated additionally to q->num_buffers. + * contains the requested plane sizes. In this case + * \*num_buffers are being allocated additionally to + * q->num_buffers. If either \*num_planes or the requested + * sizes are invalid callback must return %-EINVAL. * @wait_prepare: release any locks taken while calling vb2 functions; * it is called before an ioctl needs to wait for a new * buffer to arrive; required to avoid a deadlock in diff --git a/include/media/videobuf2-memops.h b/include/media/videobuf2-memops.h index 36565c7acb542b961fcbe38d8b978a6f802f234c..a6ed091b79ce501a183b2fb6cf6051d169f066f7 100644 --- a/include/media/videobuf2-memops.h +++ b/include/media/videobuf2-memops.h @@ -16,6 +16,7 @@ #include #include +#include /** * struct vb2_vmarea_handler - common vma refcount tracking handler @@ -25,7 +26,7 @@ * @arg: argument for @put callback */ struct vb2_vmarea_handler { - atomic_t *refcount; + refcount_t *refcount; void (*put)(void *arg); void *arg; }; diff --git a/include/misc/charlcd.h b/include/misc/charlcd.h new file mode 100644 index 0000000000000000000000000000000000000000..23f61850f3639ae1686f9f83e0bac6e6a5015ba5 --- /dev/null +++ b/include/misc/charlcd.h @@ -0,0 +1,42 @@ +/* + * Character LCD driver for Linux + * + * Copyright (C) 2000-2008, Willy Tarreau + * Copyright (C) 2016-2017 Glider bvba + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +struct charlcd { + const struct charlcd_ops *ops; + const unsigned char *char_conv; /* Optional */ + + int ifwidth; /* 4-bit or 8-bit (default) */ + int height; + int width; + int bwidth; /* Default set by charlcd_alloc() */ + int hwidth; /* Default set by charlcd_alloc() */ + + void *drvdata; /* Set by charlcd_alloc() */ +}; + +struct charlcd_ops { + /* Required */ + void (*write_cmd)(struct charlcd *lcd, int cmd); + void (*write_data)(struct charlcd *lcd, int data); + + /* Optional */ + void (*write_cmd_raw4)(struct charlcd *lcd, int cmd); /* 4-bit only */ + void (*clear_fast)(struct charlcd *lcd); + void (*backlight)(struct charlcd *lcd, int on); +}; + +struct charlcd *charlcd_alloc(unsigned int drvdata_size); + +int charlcd_register(struct charlcd *lcd); +int charlcd_unregister(struct charlcd *lcd); + +void charlcd_poke(struct charlcd *lcd); diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index 5ab4c9901ccc12979e32d4788c35050717a00d7f..a71378007e61dc94e7f43bf671b275c291376db8 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -198,6 +198,21 @@ static inline void lowpan_iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, ipaddr->s6_addr[8] ^= 0x02; } +static inline void lowpan_iphc_uncompress_eui48_lladdr(struct in6_addr *ipaddr, + const void *lladdr) +{ + /* fe:80::XXXX:XXff:feXX:XXXX + * \_________________/ + * hwaddr + */ + ipaddr->s6_addr[0] = 0xFE; + ipaddr->s6_addr[1] = 0x80; + memcpy(&ipaddr->s6_addr[8], lladdr, 3); + ipaddr->s6_addr[11] = 0xFF; + ipaddr->s6_addr[12] = 0xFE; + memcpy(&ipaddr->s6_addr[13], lladdr + 3, 3); +} + #ifdef DEBUG /* print data in line */ static inline void raw_dump_inline(const char *caller, char *msg, diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 17c6fd84e287808eca08503b87a56a5d6fc0cc5c..b43a4eec3ceca4f798bf7513ecd4aa999535c6e7 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -20,6 +20,8 @@ #define ADDRCONF_TIMER_FUZZ (HZ / 4) #define ADDRCONF_TIMER_FUZZ_MAX (HZ) +#define ADDRCONF_NOTIFY_PRIORITY 0 + #include #include @@ -103,12 +105,24 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, u32 addr_flags, bool sllao, bool tokenized, __u32 valid_lft, u32 prefered_lft); +static inline void addrconf_addr_eui48_base(u8 *eui, const char *const addr) +{ + memcpy(eui, addr, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + memcpy(eui + 5, addr + 3, 3); +} + +static inline void addrconf_addr_eui48(u8 *eui, const char *const addr) +{ + addrconf_addr_eui48_base(eui, addr); + eui[0] ^= 2; +} + static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->addr_len != ETH_ALEN) return -1; - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); /* * The zSeries OSA network cards can be shared among various @@ -123,14 +137,16 @@ static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) * case. Hence the resulting interface identifier has local * scope according to RFC2373. */ + + addrconf_addr_eui48_base(eui, dev->dev_addr); + if (dev->dev_id) { eui[3] = (dev->dev_id >> 8) & 0xFF; eui[4] = dev->dev_id & 0xFF; } else { - eui[3] = 0xFF; - eui[4] = 0xFE; eui[0] ^= 2; } + return 0; } @@ -262,8 +278,8 @@ int register_inet6addr_notifier(struct notifier_block *nb); int unregister_inet6addr_notifier(struct notifier_block *nb); int inet6addr_notifier_call_chain(unsigned long val, void *v); -void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf); +void inet6_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv6_devconf *devconf); /** * __in6_dev_get - get inet6_dev pointer from netdevice diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 1061a472a3e35b88a7df587fdeefcdb310c793bc..b5f5187f488cc1e663912ec1e12811d49ecb3fd5 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -39,7 +39,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); -void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, +bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f32ed9ac181a47c00757596fc3b8c5733426c468..f9fb566e75cfd8ca1531dd733d443f9a6c929c62 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -188,4 +188,17 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +/**** TAP ****/ + +struct vsock_tap { + struct net_device *dev; + struct module *module; + struct list_head list; +}; + +int vsock_init_tap(void); +int vsock_add_tap(struct vsock_tap *vt); +int vsock_remove_tap(struct vsock_tap *vt); +void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); + #endif /* __AF_VSOCK_H__ */ diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 5ee3c689c86370b4b325309d337611c9b51af5d1..0697fd41308777a4801dcf8728f2ff5aa0210804 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -282,7 +282,7 @@ struct l2cap_conn_rsp { #define L2CAP_CR_BAD_KEY_SIZE 0x0007 #define L2CAP_CR_ENCRYPTION 0x0008 #define L2CAP_CR_INVALID_SCID 0x0009 -#define L2CAP_CR_SCID_IN_USE 0x0010 +#define L2CAP_CR_SCID_IN_USE 0x000A /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 4190af53a46ad1a5f7868f4add65363e5ada9b20..da4acefe39c81abbc11af58c88eb283159d1de26 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include + #ifndef __RFCOMM_H #define __RFCOMM_H @@ -174,7 +176,7 @@ struct rfcomm_dlc { struct mutex lock; unsigned long state; unsigned long flags; - atomic_t refcnt; + refcount_t refcnt; u8 dlci; u8 addr; u8 priority; @@ -247,12 +249,12 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel); static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d) { - atomic_inc(&d->refcnt); + refcount_inc(&d->refcnt); } static inline void rfcomm_dlc_put(struct rfcomm_dlc *d) { - if (atomic_dec_and_test(&d->refcnt)) + if (refcount_dec_and_test(&d->refcnt)) rfcomm_dlc_free(d); } diff --git a/include/net/bonding.h b/include/net/bonding.h index 3c857778a6ca6870f7e7d5604adcd263380e4708..b00508d22e0a6adc37ee4e69187377fea3bc102d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -153,7 +153,8 @@ struct slave { unsigned long last_link_up; unsigned long last_rx; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; - s8 link; /* one of BOND_LINK_XXXX */ + s8 link; /* one of BOND_LINK_XXXX */ + s8 link_new_state; /* one of BOND_LINK_XXXX */ s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ @@ -165,7 +166,7 @@ struct slave { u32 link_failure_count; u32 speed; u16 queue_id; - u8 perm_hwaddr[ETH_ALEN]; + u8 perm_hwaddr[MAX_ADDR_LEN]; struct ad_slave_info *ad_info; struct tlb_slave_info tlb_info; #ifdef CONFIG_NET_POLL_CONTROLLER @@ -401,6 +402,16 @@ static inline bool bond_slave_can_tx(struct slave *slave) bond_is_active_slave(slave); } +static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len) +{ + if (len == ETH_ALEN) { + ether_addr_copy(dst, src); + return; + } + + memcpy(dst, src, len); +} + #define BOND_PRI_RESELECT_ALWAYS 0 #define BOND_PRI_RESELECT_BETTER 1 #define BOND_PRI_RESELECT_FAILURE 2 @@ -504,13 +515,17 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline void bond_set_slave_link_state(struct slave *slave, int state, - bool notify) +static inline void bond_propose_link_state(struct slave *slave, int state) { - if (slave->link == state) + slave->link_new_state = state; +} + +static inline void bond_commit_link_state(struct slave *slave, bool notify) +{ + if (slave->link == slave->link_new_state) return; - slave->link = state; + slave->link = slave->link_new_state; if (notify) { bond_queue_slave_event(slave); bond_lower_state_changed(slave); @@ -523,6 +538,13 @@ static inline void bond_set_slave_link_state(struct slave *slave, int state, } } +static inline void bond_set_slave_link_state(struct slave *slave, int state, + bool notify) +{ + bond_propose_link_state(slave, state); + bond_commit_link_state(slave, notify); +} + static inline void bond_slave_link_notify(struct bonding *bond) { struct list_head *iter; @@ -592,6 +614,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); +void bond_work_init_all(struct bonding *bond); #ifdef CONFIG_PROC_FS void bond_create_proc_entry(struct bonding *bond); diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c0452de83086e51738a249bea5fa86d6fb121760..8ffd434676b7a270af73534f3dbcc26f370fe215 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -35,83 +35,101 @@ struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; +/* 0 - Reserved to indicate value not set + * 1..NR_CPUS - Reserved for sender_cpu + * NR_CPUS+1..~0 - Region available for NAPI IDs + */ +#define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) + static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; } -static inline u64 busy_loop_us_clock(void) +static inline bool sk_can_busy_loop(const struct sock *sk) { - return local_clock() >> 10; + return sk->sk_ll_usec && !signal_pending(current); } -static inline unsigned long sk_busy_loop_end_time(struct sock *sk) -{ - return busy_loop_us_clock() + ACCESS_ONCE(sk->sk_ll_usec); -} +bool sk_busy_loop_end(void *p, unsigned long start_time); -/* in poll/select we use the global sysctl_net_ll_poll value */ -static inline unsigned long busy_loop_end_time(void) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg); + +#else /* CONFIG_NET_RX_BUSY_POLL */ +static inline unsigned long net_busy_loop_on(void) { - return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); + return 0; } -static inline bool sk_can_busy_loop(const struct sock *sk) +static inline bool sk_can_busy_loop(struct sock *sk) { - return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current); + return false; } +#endif /* CONFIG_NET_RX_BUSY_POLL */ -static inline bool busy_loop_timeout(unsigned long end_time) +static inline unsigned long busy_loop_current_time(void) { - unsigned long now = busy_loop_us_clock(); - - return time_after(now, end_time); +#ifdef CONFIG_NET_RX_BUSY_POLL + return (unsigned long)(local_clock() >> 10); +#else + return 0; +#endif } -bool sk_busy_loop(struct sock *sk, int nonblock); - -/* used in the NIC receive handler to mark the skb */ -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) +/* in poll/select we use the global sysctl_net_ll_poll value */ +static inline bool busy_loop_timeout(unsigned long start_time) { - skb->napi_id = napi->napi_id; -} +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll); + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); -#else /* CONFIG_NET_RX_BUSY_POLL */ -static inline unsigned long net_busy_loop_on(void) -{ - return 0; + return time_after(now, end_time); + } +#endif + return true; } -static inline unsigned long busy_loop_end_time(void) +static inline bool sk_busy_loop_timeout(struct sock *sk, + unsigned long start_time) { - return 0; -} +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec); -static inline bool sk_can_busy_loop(struct sock *sk) -{ - return false; -} + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); -static inline void skb_mark_napi_id(struct sk_buff *skb, - struct napi_struct *napi) -{ + return time_after(now, end_time); + } +#endif + return true; } -static inline bool busy_loop_timeout(unsigned long end_time) +static inline void sk_busy_loop(struct sock *sk, int nonblock) { - return true; +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id = READ_ONCE(sk->sk_napi_id); + + if (napi_id >= MIN_NAPI_ID) + napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk); +#endif } -static inline bool sk_busy_loop(struct sock *sk, int nonblock) +/* used in the NIC receive handler to mark the skb */ +static inline void skb_mark_napi_id(struct sk_buff *skb, + struct napi_struct *napi) { - return false; +#ifdef CONFIG_NET_RX_BUSY_POLL + skb->napi_id = napi->napi_id; +#endif } -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* used in the protocol hanlder to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ead1aa6d003ef97b09847e8175b704da31c341d1..b083e6cbae8cb33f3c9d9078bab5468cd80d31f1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -363,6 +363,8 @@ static inline void wiphy_read_of_freq_limits(struct wiphy *wiphy) /** * struct vif_params - describes virtual interface parameters + * @flags: monitor interface flags, unchanged if 0, otherwise + * %MONITOR_FLAG_CHANGED will be set * @use_4addr: use 4-address frames * @macaddr: address to use for this virtual interface. * If this parameter is set to zero address the driver may @@ -370,13 +372,17 @@ static inline void wiphy_read_of_freq_limits(struct wiphy *wiphy) * This feature is only fully supported by drivers that enable the * %NL80211_FEATURE_MAC_ON_CREATE flag. Others may support creating ** only p2p devices with specified MAC. - * @vht_mumimo_groups: MU-MIMO groupID. used for monitoring only - * packets belonging to that MU-MIMO groupID. + * @vht_mumimo_groups: MU-MIMO groupID, used for monitoring MU-MIMO packets + * belonging to that MU-MIMO groupID; %NULL if not changed + * @vht_mumimo_follow_addr: MU-MIMO follow address, used for monitoring + * MU-MIMO packets going to the specified station; %NULL if not changed */ struct vif_params { + u32 flags; int use_4addr; u8 macaddr[ETH_ALEN]; - u8 vht_mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN]; + const u8 *vht_mumimo_groups; + const u8 *vht_mumimo_follow_addr; }; /** @@ -1007,9 +1013,9 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth */ enum rate_info_bw { + RATE_INFO_BW_20 = 0, RATE_INFO_BW_5, RATE_INFO_BW_10, - RATE_INFO_BW_20, RATE_INFO_BW_40, RATE_INFO_BW_80, RATE_INFO_BW_160, @@ -1211,6 +1217,7 @@ static inline int cfg80211_get_station(struct net_device *dev, * Monitor interface configuration flags. Note that these must be the bits * according to the nl80211 flags. * + * @MONITOR_FLAG_CHANGED: set if the flags were changed * @MONITOR_FLAG_FCSFAIL: pass frames with bad FCS * @MONITOR_FLAG_PLCPFAIL: pass frames with bad PLCP * @MONITOR_FLAG_CONTROL: pass control frames @@ -1219,6 +1226,7 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { + MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID, MONITOR_FLAG_FCSFAIL = 1<= 0). + */ +struct cfg80211_connect_resp_params { + int status; + const u8 *bssid; + struct cfg80211_bss *bss; + const u8 *req_ie; + size_t req_ie_len; + const u8 *resp_ie; + size_t resp_ie_len; + const u8 *fils_kek; + size_t fils_kek_len; + bool update_erp_next_seq_num; + u16 fils_erp_next_seq_num; + const u8 *pmk; + size_t pmk_len; + const u8 *pmkid; + enum nl80211_timeout_reason timeout_reason; +}; + +/** + * cfg80211_connect_done - notify cfg80211 of connection result + * + * @dev: network device + * @params: connection response parameters + * @gfp: allocation flags + * + * It should be called by the underlying driver once execution of the connection + * request from connect() has been completed. This is similar to + * cfg80211_connect_bss(), but takes a structure pointer for connection response + * parameters. Only one of the functions among cfg80211_connect_bss(), + * cfg80211_connect_result(), cfg80211_connect_timeout(), + * and cfg80211_connect_done() should be called. + */ +void cfg80211_connect_done(struct net_device *dev, + struct cfg80211_connect_resp_params *params, + gfp_t gfp); + /** * cfg80211_connect_bss - notify cfg80211 of connection result * @@ -5152,13 +5309,31 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * It should be called by the underlying driver once execution of the connection * request from connect() has been completed. This is similar to * cfg80211_connect_result(), but with the option of identifying the exact bss - * entry for the connection. Only one of these functions should be called. + * entry for the connection. Only one of the functions among + * cfg80211_connect_bss(), cfg80211_connect_result(), + * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called. */ -void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, - struct cfg80211_bss *bss, const u8 *req_ie, - size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, int status, gfp_t gfp, - enum nl80211_timeout_reason timeout_reason); +static inline void +cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, + struct cfg80211_bss *bss, const u8 *req_ie, + size_t req_ie_len, const u8 *resp_ie, + size_t resp_ie_len, int status, gfp_t gfp, + enum nl80211_timeout_reason timeout_reason) +{ + struct cfg80211_connect_resp_params params; + + memset(¶ms, 0, sizeof(params)); + params.status = status; + params.bssid = bssid; + params.bss = bss; + params.req_ie = req_ie; + params.req_ie_len = req_ie_len; + params.resp_ie = resp_ie; + params.resp_ie_len = resp_ie_len; + params.timeout_reason = timeout_reason; + + cfg80211_connect_done(dev, ¶ms, gfp); +} /** * cfg80211_connect_result - notify cfg80211 of connection result @@ -5177,7 +5352,8 @@ void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, * It should be called by the underlying driver once execution of the connection * request from connect() has been completed. This is similar to * cfg80211_connect_bss() which allows the exact bss entry to be specified. Only - * one of these functions should be called. + * one of the functions among cfg80211_connect_bss(), cfg80211_connect_result(), + * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called. */ static inline void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, @@ -5204,7 +5380,9 @@ cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * in a sequence where no explicit authentication/association rejection was * received from the AP. This could happen, e.g., due to not being able to send * out the Authentication or Association Request frame or timing out while - * waiting for the response. + * waiting for the response. Only one of the functions among + * cfg80211_connect_bss(), cfg80211_connect_result(), + * cfg80211_connect_timeout(), and cfg80211_connect_done() should be called. */ static inline void cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, @@ -5216,51 +5394,46 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, } /** - * cfg80211_roamed - notify cfg80211 of roaming + * struct cfg80211_roam_info - driver initiated roaming information * - * @dev: network device * @channel: the channel of the new AP - * @bssid: the BSSID of the new AP + * @bss: entry of bss to which STA got roamed (may be %NULL if %bssid is set) + * @bssid: the BSSID of the new AP (may be %NULL if %bss is set) * @req_ie: association request IEs (maybe be %NULL) * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @gfp: allocation flags - * - * It should be called by the underlying driver whenever it roamed - * from one AP to another while connected. */ -void cfg80211_roamed(struct net_device *dev, - struct ieee80211_channel *channel, - const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); +struct cfg80211_roam_info { + struct ieee80211_channel *channel; + struct cfg80211_bss *bss; + const u8 *bssid; + const u8 *req_ie; + size_t req_ie_len; + const u8 *resp_ie; + size_t resp_ie_len; +}; /** - * cfg80211_roamed_bss - notify cfg80211 of roaming + * cfg80211_roamed - notify cfg80211 of roaming * * @dev: network device - * @bss: entry of bss to which STA got roamed - * @req_ie: association request IEs (maybe be %NULL) - * @req_ie_len: association request IEs length - * @resp_ie: association response IEs (may be %NULL) - * @resp_ie_len: assoc response IEs length + * @info: information about the new BSS. struct &cfg80211_roam_info. * @gfp: allocation flags * - * This is just a wrapper to notify cfg80211 of roaming event with driver - * passing bss to avoid a race in timeout of the bss entry. It should be - * called by the underlying driver whenever it roamed from one AP to another - * while connected. Drivers which have roaming implemented in firmware - * may use this function to avoid a race in bss entry timeout where the bss - * entry of the new AP is seen in the driver, but gets timed out by the time - * it is accessed in __cfg80211_roamed() due to delay in scheduling + * This function may be called with the driver passing either the BSSID of the + * new AP or passing the bss entry to avoid a race in timeout of the bss entry. + * It should be called by the underlying driver whenever it roamed from one AP + * to another while connected. Drivers which have roaming implemented in + * firmware should pass the bss entry to avoid a race in bss entry timeout where + * the bss entry of the new AP is seen in the driver, but gets timed out by the + * time it is accessed in __cfg80211_roamed() due to delay in scheduling * rdev->event_work. In case of any failures, the reference is released - * either in cfg80211_roamed_bss() or in __cfg80211_romed(), Otherwise, - * it will be released while diconneting from the current bss. + * either in cfg80211_roamed() or in __cfg80211_romed(), Otherwise, it will be + * released while diconneting from the current bss. */ -void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); +void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, + gfp_t gfp); /** * cfg80211_disconnected - notify cfg80211 that connection was dropped diff --git a/include/net/devlink.h b/include/net/devlink.h index d29e5fc8258216b9d79604bc99b69b66cce3e443..ed7687bbf5d08f1bb5327f3f8c9e3e543e98e7c2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -25,6 +25,8 @@ struct devlink { struct list_head list; struct list_head port_list; struct list_head sb_list; + struct list_head dpipe_table_list; + struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; possible_net_t _net; @@ -49,6 +51,178 @@ struct devlink_sb_pool_info { enum devlink_sb_threshold_type threshold_type; }; +/** + * struct devlink_dpipe_field - dpipe field object + * @name: field name + * @id: index inside the headers field array + * @bitwidth: bitwidth + * @mapping_type: mapping type + */ +struct devlink_dpipe_field { + const char *name; + unsigned int id; + unsigned int bitwidth; + enum devlink_dpipe_field_mapping_type mapping_type; +}; + +/** + * struct devlink_dpipe_header - dpipe header object + * @name: header name + * @id: index, global/local detrmined by global bit + * @fields: fields + * @fields_count: number of fields + * @global: indicates if header is shared like most protocol header + * or driver specific + */ +struct devlink_dpipe_header { + const char *name; + unsigned int id; + struct devlink_dpipe_field *fields; + unsigned int fields_count; + bool global; +}; + +/** + * struct devlink_dpipe_match - represents match operation + * @type: type of match + * @header_index: header index (packets can have several headers of same + * type like in case of tunnels) + * @header: header + * @fieled_id: field index + */ +struct devlink_dpipe_match { + enum devlink_dpipe_match_type type; + unsigned int header_index; + struct devlink_dpipe_header *header; + unsigned int field_id; +}; + +/** + * struct devlink_dpipe_action - represents action operation + * @type: type of action + * @header_index: header index (packets can have several headers of same + * type like in case of tunnels) + * @header: header + * @fieled_id: field index + */ +struct devlink_dpipe_action { + enum devlink_dpipe_action_type type; + unsigned int header_index; + struct devlink_dpipe_header *header; + unsigned int field_id; +}; + +/** + * struct devlink_dpipe_value - represents value of match/action + * @action: action + * @match: match + * @mapping_value: in case the field has some mapping this value + * specified the mapping value + * @mapping_valid: specify if mapping value is valid + * @value_size: value size + * @value: value + * @mask: bit mask + */ +struct devlink_dpipe_value { + union { + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + }; + unsigned int mapping_value; + bool mapping_valid; + unsigned int value_size; + void *value; + void *mask; +}; + +/** + * struct devlink_dpipe_entry - table entry object + * @index: index of the entry in the table + * @match_values: match values + * @matche_values_count: count of matches tuples + * @action_values: actions values + * @action_values_count: count of actions values + * @counter: value of counter + * @counter_valid: Specify if value is valid from hardware + */ +struct devlink_dpipe_entry { + u64 index; + struct devlink_dpipe_value *match_values; + unsigned int match_values_count; + struct devlink_dpipe_value *action_values; + unsigned int action_values_count; + u64 counter; + bool counter_valid; +}; + +/** + * struct devlink_dpipe_dump_ctx - context provided to driver in order + * to dump + * @info: info + * @cmd: devlink command + * @skb: skb + * @nest: top attribute + * @hdr: hdr + */ +struct devlink_dpipe_dump_ctx { + struct genl_info *info; + enum devlink_command cmd; + struct sk_buff *skb; + struct nlattr *nest; + void *hdr; +}; + +struct devlink_dpipe_table_ops; + +/** + * struct devlink_dpipe_table - table object + * @priv: private + * @name: table name + * @size: maximum number of entries + * @counters_enabled: indicates if counters are active + * @counter_control_extern: indicates if counter control is in dpipe or + * external tool + * @table_ops: table operations + * @rcu: rcu + */ +struct devlink_dpipe_table { + void *priv; + struct list_head list; + const char *name; + u64 size; + bool counters_enabled; + bool counter_control_extern; + struct devlink_dpipe_table_ops *table_ops; + struct rcu_head rcu; +}; + +/** + * struct devlink_dpipe_table_ops - dpipe_table ops + * @actions_dump - dumps all tables actions + * @matches_dump - dumps all tables matches + * @entries_dump - dumps all active entries in the table + * @counters_set_update - when changing the counter status hardware sync + * maybe needed to allocate/free counter related + * resources + */ +struct devlink_dpipe_table_ops { + int (*actions_dump)(void *priv, struct sk_buff *skb); + int (*matches_dump)(void *priv, struct sk_buff *skb); + int (*entries_dump)(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx); + int (*counters_set_update)(void *priv, bool enable); +}; + +/** + * struct devlink_dpipe_headers - dpipe headers + * @headers - header array can be shared (global bit) or driver specific + * @headers_count - count of headers + */ +struct devlink_dpipe_headers { + struct devlink_dpipe_header **headers; + unsigned int headers_count; +}; + struct devlink_ops { int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); @@ -94,6 +268,8 @@ struct devlink_ops { int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); + int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode); }; static inline void *devlink_priv(struct devlink *devlink) @@ -132,6 +308,26 @@ int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u16 egress_pools_count, u16 ingress_tc_count, u16 egress_tc_count); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern); +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name); +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers); +void devlink_dpipe_headers_unregister(struct devlink *devlink); +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name); +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry); +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx); +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action); +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match); #else @@ -200,6 +396,71 @@ static inline void devlink_sb_unregister(struct devlink *devlink, { } +static inline int +devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + return 0; +} + +static inline void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ +} + +static inline int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers * + dpipe_headers) +{ + return 0; +} + +static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ +} + +static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + return false; +} + +static inline int +devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + return 0; +} + +static inline int +devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return 0; +} + +static inline int +devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + return 0; +} + +static inline int +devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + return 0; +} + +static inline int +devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + return 0; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dsa.h b/include/net/dsa.h index 4e13e695f0251d5c762c3089065f4eeb429033eb..8e24677b1c62a5842f81496d7d18c071d2cf0af6 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -19,6 +19,7 @@ #include #include #include +#include struct tc_action; struct phy_device; @@ -31,6 +32,8 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, DSA_TAG_PROTO_QCA, + DSA_TAG_PROTO_MTK, + DSA_TAG_PROTO_LAN9303, DSA_TAG_LAST, /* MUST BE LAST */ }; @@ -122,7 +125,7 @@ struct dsa_switch_tree { * protocol to use. */ struct net_device *master_netdev; - int (*rcv)(struct sk_buff *skb, + struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); @@ -182,6 +185,7 @@ struct dsa_port { unsigned int ageing_time; u8 stp_state; struct net_device *bridge_dev; + struct devlink_port devlink_port; }; struct dsa_switch { @@ -233,6 +237,13 @@ struct dsa_switch { u32 phys_mii_mask; struct mii_bus *slave_mii_bus; + /* Ageing Time limits in msecs */ + unsigned int ageing_time_min; + unsigned int ageing_time_max; + + /* devlink used to represent this switch device */ + struct devlink *devlink; + /* Dynamically allocated ports, keep last */ size_t num_ports; struct dsa_port ports[]; @@ -248,6 +259,11 @@ static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) return !!((ds->dsa_port_mask) & (1 << p)); } +static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) +{ + return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); +} + static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) { return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; @@ -287,7 +303,7 @@ struct dsa_notifier_bridge_info { struct dsa_switch_ops { /* - * Probing and setup. + * Legacy probing. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, @@ -442,6 +458,14 @@ struct dsa_switch_ops { bool ingress); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); + + /* + * Cross-chip operations + */ + int (*crosschip_bridge_join)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *br); + void (*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *br); }; struct dsa_switch_driver { @@ -449,9 +473,11 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; +/* Legacy driver registration */ void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); + struct net_device *dsa_dev_to_net_device(struct device *dev); static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) @@ -459,6 +485,15 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) return dst->rcv != NULL; } +static inline bool netdev_uses_dsa(struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_NET_DSA) + if (dev->dsa_ptr != NULL) + return dsa_uses_tagged_protocol(dev->dsa_ptr); +#endif + return false; +} + struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n); void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds, struct device *dev); diff --git a/include/net/dst.h b/include/net/dst.h index 049af33da3b6c95897d544670cea65c542317673..cfc0437841665d7ed46a714915c50d723c24901c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -107,10 +107,16 @@ struct dst_entry { }; }; +struct dst_metrics { + u32 metrics[RTAX_MAX]; + atomic_t refcnt; +}; +extern const struct dst_metrics dst_default_metrics; + u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL +#define DST_METRICS_REFCOUNTED 0x2UL #define DST_METRICS_FLAGS 0x3UL #define __DST_METRICS_PTR(Y) \ ((u32 *)((Y) & ~DST_METRICS_FLAGS)) diff --git a/include/net/esp.h b/include/net/esp.h index a43be85aedc44594265695e98452ae491b852d83..c41994d1bfef0bce761227828638d466a01d6ebc 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -10,4 +10,23 @@ static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) return (struct ip_esp_hdr *)skb_transport_header(skb); } +struct esp_info { + struct ip_esp_hdr *esph; + __be64 seqno; + int tfclen; + int tailen; + int plen; + int clen; + int len; + int nfrags; + __u8 proto; + bool inplace; +}; + +int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp_input_done2(struct sk_buff *skb, int err); +int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp); +int esp6_input_done2(struct sk_buff *skb, int err); #endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 8dbfdf728cd8ce901b3b05f0e58b4eeee25051fe..76c7300626d675bd70374efe45e5608c65f44f84 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -141,7 +141,10 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); +bool fib_rule_matchall(const struct fib_rule *rule); -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh); +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); #endif diff --git a/include/net/flow.h b/include/net/flow.h index 6984f1913dc124ab97627a4a5ecb2aba9f93e901..bae198b3039e6e66829c2717c1baf2baebbec6a2 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -202,7 +202,7 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) typedef unsigned long flow_compare_t; -static inline size_t flow_key_size(u16 family) +static inline unsigned int flow_key_size(u16 family) { switch (family) { case AF_INET: diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index ac9703018a3a63bd392aa37f31b1afdf54d69aa0..8d21d448daa9d19ad58a753e373035f2a1be59bb 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -41,6 +41,13 @@ struct flow_dissector_key_vlan { u16 padding; }; +struct flow_dissector_key_mpls { + u32 mpls_ttl:8, + mpls_bos:1, + mpls_tc:3, + mpls_label:20; +}; + struct flow_dissector_key_keyid { __be32 keyid; }; @@ -169,6 +176,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/flowcache.h b/include/net/flowcache.h index 9caf3bfc8d2dafcc89064ec54e78682244d71fb7..51eb971e897378e25329b06f80678f522eec2ea6 100644 --- a/include/net/flowcache.h +++ b/include/net/flowcache.h @@ -8,7 +8,7 @@ struct flow_cache_percpu { struct hlist_head *hash_table; - int hash_count; + unsigned int hash_count; u32 hash_rnd; int hash_rnd_recalc; struct tasklet_struct flush_tasklet; @@ -18,8 +18,8 @@ struct flow_cache { u32 hash_shift; struct flow_cache_percpu __percpu *percpu; struct hlist_node node; - int low_watermark; - int high_watermark; + unsigned int low_watermark; + unsigned int high_watermark; struct timer_list rnd_timer; }; #endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index a34275be360001e89740fe4325a53905683e9778..68b88192b00c6200aa0b8b2628554952d0d79a1c 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -84,6 +84,7 @@ struct nlattr **genl_family_attrbuf(const struct genl_family *family); * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers + * @extack: extended ACK report struct */ struct genl_info { u32 snd_seq; @@ -94,6 +95,7 @@ struct genl_info { struct nlattr ** attrs; possible_net_t _net; void * user_ptr[2]; + struct netlink_ext_ack *extack; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -106,6 +108,16 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) write_pnet(&info->_net, net); } +#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) + +static inline int genl_err_attr(struct genl_info *info, int err, + struct nlattr *attr) +{ + info->extack->bad_attr = attr; + + return err; +} + /** * struct genl_ops - generic netlink operations * @cmd: command identifier @@ -162,14 +174,16 @@ genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy - * */ + * @extack: extended ACK report struct + */ static inline int genlmsg_parse(const struct nlmsghdr *nlh, const struct genl_family *family, struct nlattr *tb[], int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, - policy); + policy, extack); } /** diff --git a/include/net/ip.h b/include/net/ip.h index bf264a8db1ce3b1b29b9a5cc9732df323154e7e5..821cedcc8e73b68af72d1918242c25d757c63d24 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -33,6 +33,8 @@ #include #include +#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ + struct sock; struct inet_skb_parm { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9dc2c182a263218ad63ceb326893b4c86c21ff95..f5e625f5336799d88c15df94d8776680dddcfe19 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -84,6 +84,7 @@ struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int ifindex, struct flowi6 *fl6, int flags); +void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 1b1cf33cbfb02eaf4eb1eeb92be474076fdeebe4..08fbc7f7d8d70ffc43f93881b0fbeab5b3fe8abb 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -33,6 +33,8 @@ struct __ip6_tnl_parm { __be16 o_flags; __be32 i_key; __be32 o_key; + + __u32 fwmark; }; /* IPv6 tunnel */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 368bb4024b78c411d02a842f340f9fa11a9b5f7e..f7f6aa789c6174c41ca9739206d586c559c1f3a1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -114,11 +114,11 @@ struct fib_info { __be32 fib_prefsrc; u32 fib_tb_id; u32 fib_priority; - u32 *fib_metrics; -#define fib_mtu fib_metrics[RTAX_MTU-1] -#define fib_window fib_metrics[RTAX_WINDOW-1] -#define fib_rtt fib_metrics[RTAX_RTT-1] -#define fib_advmss fib_metrics[RTAX_ADVMSS-1] + struct dst_metrics *fib_metrics; +#define fib_mtu fib_metrics->metrics[RTAX_MTU-1] +#define fib_window fib_metrics->metrics[RTAX_WINDOW-1] +#define fib_rtt fib_metrics->metrics[RTAX_RTT-1] +#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; @@ -213,6 +213,11 @@ struct fib_entry_notifier_info { u32 tb_id; }; +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; @@ -232,9 +237,21 @@ enum fib_event_type { int register_fib_notifier(struct notifier_block *nb, void (*cb)(struct notifier_block *nb)); int unregister_fib_notifier(struct notifier_block *nb); +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); +void fib_notify(struct net *net, struct notifier_block *nb); +#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_rules_notify(struct net *net, struct notifier_block *nb); +#else +static inline void fib_rules_notify(struct net *net, struct notifier_block *nb) +{ +} +#endif + struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -299,6 +316,11 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, return err; } +static inline bool fib4_rule_default(const struct fib_rule *rule) +{ + return true; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -343,6 +365,8 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, return err; } +bool fib4_rule_default(const struct fib_rule *rule); + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -371,17 +395,13 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); -extern u32 fib_multipath_secret __read_mostly; - -static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) -{ - return jhash_2words((__force u32)saddr, (__force u32)daddr, - fib_multipath_secret) >> 1; -} - +#ifdef CONFIG_IP_ROUTE_MULTIPATH +int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, + const struct sk_buff *skb); +#endif void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, - struct flowi4 *fl4, int mp_hash); + struct flowi4 *fl4, const struct sk_buff *skb); /* Exported by fib_trie.c */ void fib_trie_init(void); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 95056796657cf9b11c24ae1497f1d5c01a9fd178..520809912f0392e84b12cc8ce99d82e3e27f5d78 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -132,6 +132,7 @@ struct ip_tunnel { unsigned int prl_count; /* # of entries in PRL */ unsigned int ip_tnl_net_id; struct gro_cells gro_cells; + __u32 fwmark; bool collect_md; bool ignore_df; }; @@ -273,9 +274,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p); + struct ip_tunnel_parm *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p); + struct ip_tunnel_parm *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); struct ip_tunnel_encap_ops { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 7bdfa7d783639d8b65c18bd7f5a6ea5fa4fbb7da..4f4f786255efe16725cbdea7bb29362ea6023185 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -12,6 +12,8 @@ #include /* for struct list_head */ #include /* for struct rwlock_t */ #include /* for struct atomic_t */ +#include /* for struct refcount_t */ + #include #include #include @@ -525,7 +527,7 @@ struct ip_vs_conn { struct netns_ipvs *ipvs; /* counter and timer */ - atomic_t refcnt; /* reference count */ + refcount_t refcnt; /* reference count */ struct timer_list timer; /* Expiration timer */ volatile unsigned long timeout; /* timeout */ @@ -667,7 +669,7 @@ struct ip_vs_dest { atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ - atomic_t refcnt; /* reference counter */ + refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ unsigned long idle_start; /* start time, jiffies */ @@ -1211,14 +1213,14 @@ struct ip_vs_conn * ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, */ static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp) { - return atomic_inc_not_zero(&cp->refcnt); + return refcount_inc_not_zero(&cp->refcnt); } /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) { smp_mb__before_atomic(); - atomic_dec(&cp->refcnt); + refcount_dec(&cp->refcnt); } void ip_vs_conn_put(struct ip_vs_conn *cp); void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); @@ -1347,8 +1349,6 @@ int ip_vs_protocol_init(void); void ip_vs_protocol_cleanup(void); void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); int *ip_vs_create_timeout_table(int *table, int size); -int ip_vs_set_state_timeout(int *table, int num, const char *const *names, - const char *name, int to); void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -1410,18 +1410,18 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp); static inline void ip_vs_dest_hold(struct ip_vs_dest *dest) { - atomic_inc(&dest->refcnt); + refcount_inc(&dest->refcnt); } static inline void ip_vs_dest_put(struct ip_vs_dest *dest) { smp_mb__before_atomic(); - atomic_dec(&dest->refcnt); + refcount_dec(&dest->refcnt); } static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest) { - if (atomic_dec_and_test(&dest->refcnt)) + if (refcount_dec_and_test(&dest->refcnt)) kfree(dest); } @@ -1553,13 +1553,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (!ct || !nf_ct_is_untracked(ct)) { - struct nf_conn *untracked; - + if (ct) { nf_conntrack_put(&ct->ct_general); - untracked = nf_ct_untracked_get(); - nf_conntrack_get(&untracked->ct_general); - nf_ct_set(skb, untracked, IP_CT_NEW); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } #endif } @@ -1618,7 +1614,7 @@ static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, if (!(cp->flags & IP_VS_CONN_F_NFCT)) return false; ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) + if (ct) return true; #endif return false; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index dbf0abba33b8da21be05abf6e719f69542da80fc..3e505bbff8ca4a41f8d39fefcd59aa01b85424f4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1007,6 +1007,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, */ extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; +extern const struct proto_ops inet6_sockraw_ops; struct group_source_req; struct group_filter; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a3bab3c5ecfb302e5df2d9e5d5ec36e2c368ef9b..76ed24a201eb9075672e8d2f55fffe5ae147923d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015 - 2016 Intel Deutschland GmbH + * Copyright (C) 2015 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -299,6 +299,8 @@ struct ieee80211_vif_chanctx_switch { * context had been assigned. * @BSS_CHANGED_OCB: OCB join status changed * @BSS_CHANGED_MU_GROUPS: VHT MU-MIMO group id or user position changed + * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected + * keep alive) changed. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -325,6 +327,7 @@ enum ieee80211_bss_change { BSS_CHANGED_BANDWIDTH = 1<<21, BSS_CHANGED_OCB = 1<<22, BSS_CHANGED_MU_GROUPS = 1<<23, + BSS_CHANGED_KEEP_ALIVE = 1<<24, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -501,6 +504,10 @@ struct ieee80211_mu_group_data { * implies disabled. As with the cfg80211 callback, a change here should * cause an event to be sent indicating where the current value is in * relation to the newly configured threshold. + * @cqm_rssi_low: Connection quality monitor RSSI lower threshold, a zero value + * implies disabled. This is an alternative mechanism to the single + * threshold event and can't be enabled simultaneously with it. + * @cqm_rssi_high: Connection quality monitor RSSI upper threshold. * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The * may filter ARP queries targeted for other addresses than listed here. @@ -529,6 +536,13 @@ struct ieee80211_mu_group_data { * @allow_p2p_go_ps: indication for AP or P2P GO interface, whether it's allowed * to use P2P PS mechanism or not. AP/P2P GO is not allowed to use P2P PS * if it has associated clients without P2P PS support. + * @max_idle_period: the time period during which the station can refrain from + * transmitting frames to its associated AP without being disassociated. + * In units of 1000 TUs. Zero value indicates that the AP did not include + * a (valid) BSS Max Idle Period Element. + * @protected_keep_alive: if set, indicates that the station should send an RSN + * protected frame to the AP to reset the idle timer at the AP for the + * station. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -553,6 +567,8 @@ struct ieee80211_bss_conf { u16 ht_operation_mode; s32 cqm_rssi_thold; u32 cqm_rssi_hyst; + s32 cqm_rssi_low; + s32 cqm_rssi_high; struct cfg80211_chan_def chandef; struct ieee80211_mu_group_data mu_group; __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; @@ -567,6 +583,8 @@ struct ieee80211_bss_conf { enum nl80211_tx_power_setting txpower_type; struct ieee80211_p2p_noa_attr p2p_noa_attr; bool allow_p2p_go_ps; + u16 max_idle_period; + bool protected_keep_alive; }; /** @@ -942,6 +960,19 @@ struct ieee80211_tx_info { }; }; +/** + * struct ieee80211_tx_status - extended tx staus info for rate control + * + * @sta: Station that the packet was transmitted for + * @info: Basic tx status information + * @skb: Packet skb (can be NULL if not provided by the driver) + */ +struct ieee80211_tx_status { + struct ieee80211_sta *sta; + struct ieee80211_tx_info *info; + struct sk_buff *skb; +}; + /** * struct ieee80211_scan_ies - descriptors for different blocks of IEs * @@ -1039,16 +1070,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * (including FCS) was received. * @RX_FLAG_MACTIME_PLCP_START: The timestamp passed in the RX status (@mactime * field) is valid and contains the time the SYNC preamble was received. - * @RX_FLAG_SHORTPRE: Short preamble was used for this frame - * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index - * @RX_FLAG_VHT: VHT MCS was used and rate_index is MCS index - * @RX_FLAG_40MHZ: HT40 (40 MHz) was used - * @RX_FLAG_SHORT_GI: Short guard interval was used * @RX_FLAG_NO_SIGNAL_VAL: The signal strength value is not present. * Valid only for data frames (mainly A-MPDU) - * @RX_FLAG_HT_GF: This frame was received in a HT-greenfield transmission, if - * the driver fills this value it should add %IEEE80211_RADIOTAP_MCS_HAVE_FMT - * to hw.radiotap_mcs_details to advertise that fact * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference * number (@ampdu_reference) must be populated and be a distinct number for * each A-MPDU @@ -1061,7 +1084,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * is stored in the @ampdu_delimiter_crc field) * @RX_FLAG_MIC_STRIPPED: The mic was stripped of this packet. Decryption was * done by the hardware - * @RX_FLAG_LDPC: LDPC was used * @RX_FLAG_ONLY_MONITOR: Report frame only to monitor interfaces without * processing it in any regular way. * This is useful if drivers offload some frames but still want to report @@ -1070,9 +1092,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * monitor interfaces. * This is useful if drivers offload some frames but still want to report * them for sniffing purposes. - * @RX_FLAG_STBC_MASK: STBC 2 bit bitmask. 1 - Nss=1, 2 - Nss=2, 3 - Nss=3 - * @RX_FLAG_10MHZ: 10 MHz (half channel) was used - * @RX_FLAG_5MHZ: 5 MHz (quarter channel) was used * @RX_FLAG_AMSDU_MORE: Some drivers may prefer to report separate A-MSDU * subframes instead of a one huge frame for performance reasons. * All, but the last MSDU from an A-MSDU should have this flag set. E.g. @@ -1100,50 +1119,52 @@ enum mac80211_rx_flags { RX_FLAG_FAILED_FCS_CRC = BIT(5), RX_FLAG_FAILED_PLCP_CRC = BIT(6), RX_FLAG_MACTIME_START = BIT(7), - RX_FLAG_SHORTPRE = BIT(8), - RX_FLAG_HT = BIT(9), - RX_FLAG_40MHZ = BIT(10), - RX_FLAG_SHORT_GI = BIT(11), - RX_FLAG_NO_SIGNAL_VAL = BIT(12), - RX_FLAG_HT_GF = BIT(13), - RX_FLAG_AMPDU_DETAILS = BIT(14), - RX_FLAG_PN_VALIDATED = BIT(15), - RX_FLAG_DUP_VALIDATED = BIT(16), - RX_FLAG_AMPDU_LAST_KNOWN = BIT(17), - RX_FLAG_AMPDU_IS_LAST = BIT(18), - RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(19), - RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(20), - RX_FLAG_MACTIME_END = BIT(21), - RX_FLAG_VHT = BIT(22), - RX_FLAG_LDPC = BIT(23), - RX_FLAG_ONLY_MONITOR = BIT(24), - RX_FLAG_SKIP_MONITOR = BIT(25), - RX_FLAG_STBC_MASK = BIT(26) | BIT(27), - RX_FLAG_10MHZ = BIT(28), - RX_FLAG_5MHZ = BIT(29), - RX_FLAG_AMSDU_MORE = BIT(30), - RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31), - RX_FLAG_MIC_STRIPPED = BIT_ULL(32), - RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33), - RX_FLAG_ICV_STRIPPED = BIT_ULL(34), + RX_FLAG_NO_SIGNAL_VAL = BIT(8), + RX_FLAG_AMPDU_DETAILS = BIT(9), + RX_FLAG_PN_VALIDATED = BIT(10), + RX_FLAG_DUP_VALIDATED = BIT(11), + RX_FLAG_AMPDU_LAST_KNOWN = BIT(12), + RX_FLAG_AMPDU_IS_LAST = BIT(13), + RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14), + RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15), + RX_FLAG_MACTIME_END = BIT(16), + RX_FLAG_ONLY_MONITOR = BIT(17), + RX_FLAG_SKIP_MONITOR = BIT(18), + RX_FLAG_AMSDU_MORE = BIT(19), + RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(20), + RX_FLAG_MIC_STRIPPED = BIT(21), + RX_FLAG_ALLOW_SAME_PN = BIT(22), + RX_FLAG_ICV_STRIPPED = BIT(23), }; -#define RX_FLAG_STBC_SHIFT 26 - /** - * enum mac80211_rx_vht_flags - receive VHT flags + * enum mac80211_rx_encoding_flags - MCS & bandwidth flags * - * These flags are used with the @vht_flag member of - * &struct ieee80211_rx_status. - * @RX_VHT_FLAG_80MHZ: 80 MHz was used - * @RX_VHT_FLAG_160MHZ: 160 MHz was used - * @RX_VHT_FLAG_BF: packet was beamformed - */ + * @RX_ENC_FLAG_SHORTPRE: Short preamble was used for this frame + * @RX_ENC_FLAG_SHORT_GI: Short guard interval was used + * @RX_ENC_FLAG_HT_GF: This frame was received in a HT-greenfield transmission, + * if the driver fills this value it should add + * %IEEE80211_RADIOTAP_MCS_HAVE_FMT + * to hw.radiotap_mcs_details to advertise that fact + * @RX_ENC_FLAG_LDPC: LDPC was used + * @RX_ENC_FLAG_STBC_MASK: STBC 2 bit bitmask. 1 - Nss=1, 2 - Nss=2, 3 - Nss=3 + * @RX_ENC_FLAG_BF: packet was beamformed + */ +enum mac80211_rx_encoding_flags { + RX_ENC_FLAG_SHORTPRE = BIT(0), + RX_ENC_FLAG_SHORT_GI = BIT(2), + RX_ENC_FLAG_HT_GF = BIT(3), + RX_ENC_FLAG_STBC_MASK = BIT(4) | BIT(5), + RX_ENC_FLAG_LDPC = BIT(6), + RX_ENC_FLAG_BF = BIT(7), +}; -enum mac80211_rx_vht_flags { - RX_VHT_FLAG_80MHZ = BIT(0), - RX_VHT_FLAG_160MHZ = BIT(1), - RX_VHT_FLAG_BF = BIT(2), +#define RX_ENC_FLAG_STBC_SHIFT 4 + +enum mac80211_rx_encoding { + RX_ENC_LEGACY = 0, + RX_ENC_HT, + RX_ENC_VHT, }; /** @@ -1173,9 +1194,11 @@ enum mac80211_rx_vht_flags { * @antenna: antenna used * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) - * @vht_nss: number of streams (VHT only) + * @nss: number of streams (VHT and HE only) * @flag: %RX_FLAG_\* - * @vht_flag: %RX_VHT_FLAG_\* + * @encoding: &enum mac80211_rx_encoding + * @bw: &enum rate_info_bw + * @enc_flags: uses bits from &enum mac80211_rx_encoding_flags * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1186,11 +1209,12 @@ struct ieee80211_rx_status { u64 boottime_ns; u32 device_timestamp; u32 ampdu_reference; - u64 flag; + u32 flag; u16 freq; - u8 vht_flag; + u8 enc_flags; + u8 encoding:2, bw:3; u8 rate_idx; - u8 vht_nss; + u8 nss; u8 rx_flags; u8 band; u8 antenna; @@ -4199,6 +4223,23 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_tx_status_ext - extended transmit status callback + * + * This function can be used as a replacement for ieee80211_tx_status + * in drivers that may want to provide extra information that does not + * fit into &struct ieee80211_tx_info. + * + * Calls to this function for a single hardware must be synchronized + * against each other. Calls to this function, ieee80211_tx_status_ni() + * and ieee80211_tx_status_irqsafe() may not be mixed for a single hardware. + * + * @hw: the hardware the frame was transmitted by + * @status: tx status information + */ +void ieee80211_tx_status_ext(struct ieee80211_hw *hw, + struct ieee80211_tx_status *status); + /** * ieee80211_tx_status_noskb - transmit status callback without skb * @@ -4215,9 +4256,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, * (NULL for multicast packets) * @info: tx status information */ -void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, - struct ieee80211_tx_info *info); +static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, + struct ieee80211_tx_info *info) +{ + struct ieee80211_tx_status status = { + .sta = sta, + .info = info, + }; + + ieee80211_tx_status_ext(hw, &status); +} /** * ieee80211_tx_status_ni - transmit status callback (in process context) @@ -5438,9 +5487,6 @@ void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, * RTS threshold * @short_preamble: whether mac80211 will request short-preamble transmission * if the selected rate supports it - * @max_rate_idx: user-requested maximum (legacy) rate - * (deprecated; this will be removed once drivers get updated to use - * rate_idx_mask) * @rate_idx_mask: user-requested (legacy) rate mask * @rate_idx_mcs_mask: user-requested MCS rate mask (NULL if not in use) * @bss: whether this frame is sent out in AP or IBSS mode @@ -5452,7 +5498,6 @@ struct ieee80211_tx_rate_control { struct sk_buff *skb; struct ieee80211_tx_rate reported_rate; bool rts, short_preamble; - u8 max_rate_idx; u32 rate_idx_mask; u8 *rate_idx_mcs_mask; bool bss; @@ -5474,10 +5519,9 @@ struct rate_control_ops { void (*free_sta)(void *priv, struct ieee80211_sta *sta, void *priv_sta); - void (*tx_status_noskb)(void *priv, - struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct ieee80211_tx_info *info); + void (*tx_status_ext)(void *priv, + struct ieee80211_supported_band *sband, + void *priv_sta, struct ieee80211_tx_status *st); void (*tx_status)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb); diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h index 179253f9dcfd986ef806331044bc4973f1cc7d6e..9d22bf67ac86623eaaea2c49fbe1140747ce67e8 100644 --- a/include/net/mpls_iptunnel.h +++ b/include/net/mpls_iptunnel.h @@ -14,11 +14,12 @@ #ifndef _NET_MPLS_IPTUNNEL_H #define _NET_MPLS_IPTUNNEL_H 1 -#define MAX_NEW_LABELS 2 - struct mpls_iptunnel_encap { - u32 label[MAX_NEW_LABELS]; u8 labels; + u8 ttl_propagate; + u8 default_ttl; + u8 reserved1; + u32 label[0]; }; static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 8a0214654b6b10bc480d7e6dc8195555ca58dc9a..1036c902d2c9904ed084fcd5a4d8dc70b7902cbe 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -439,8 +439,10 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh, * IGMP */ int igmp6_init(void); +int igmp6_late_init(void); void igmp6_cleanup(void); +void igmp6_late_cleanup(void); int igmp6_event_query(struct sk_buff *skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 5ebf6949116097f60e668b0c2c4c48dd1639e5e8..e4dd3a2140341049fabfbff1ec55406fbe11d5b2 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -314,7 +314,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, } void neigh_destroy(struct neighbour *neigh); int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); -int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags); +int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, + u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); @@ -449,7 +450,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) { unsigned int seq; - int hh_len; + unsigned int hh_len; do { seq = read_seqbegin(&hh->hh_lock); @@ -458,7 +459,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb /* this is inlined by gcc */ memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); } else { - int hh_alen = HH_DATA_ALIGN(hh_len); + unsigned int hh_alen = HH_DATA_ALIGN(hh_len); memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); } diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index af8fe8a909dc0ca62e54056cf4be9d0d4ea82477..fe80bb48ab1f0c7b1665a10a9bf2388b32eb5013 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -140,6 +141,9 @@ struct net { #endif #if IS_ENABLED(CONFIG_MPLS) struct netns_mpls mpls; +#endif +#if IS_ENABLED(CONFIG_CAN) + struct netns_can can; #endif struct sock *diag_nlsk; atomic_t fnhe_genid; diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 6ff32815641b21ba0d3c20214806c4f16aa217ca..919e4e8af3272b3d66580e4c60c79bdc39b47616 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -14,7 +14,6 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index c59b82456f89cd421fde702f13c7cfe59f73266a..eaea968f86570db7010011b1af4a3608f2dee8f9 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -5,7 +5,6 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 19605878da4739d04f0642b20f8641ed8601d2eb..8ece3612d0cd6bf8fe4ae6c347c2cb30da2f553f 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -50,25 +50,6 @@ union nf_conntrack_expect_proto { #define NF_CT_ASSERT(x) #endif -struct nf_conntrack_helper; - -/* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 4 - -/* nf_conn feature for connections that have a helper */ -struct nf_conn_help { - /* Helper. if any */ - struct nf_conntrack_helper __rcu *helper; - - struct hlist_head expectations; - - /* Current number of expected connections */ - u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; - - /* private helper information. */ - char data[]; -}; - #include #include @@ -243,14 +224,6 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); -/* Fake conntrack entry for untracked connections */ -DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); -static inline struct nf_conn *nf_ct_untracked_get(void) -{ - return raw_cpu_ptr(&nf_conntrack_untracked); -} -void nf_ct_untracked_status_or(unsigned long bits); - /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), @@ -281,11 +254,6 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct) return test_bit(IPS_DYING_BIT, &ct->status); } -static inline int nf_ct_is_untracked(const struct nf_conn *ct) -{ - return test_bit(IPS_UNTRACKED_BIT, &ct->status); -} - /* Packet is received from loopback */ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) { diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 84ec7ca5f195db411b7e07c6dce641a286b5ea0a..81d7f8a3094557c3cd518be809c524b4448bbe1b 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -65,7 +65,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { if (!nf_ct_is_confirmed(ct)) ret = __nf_conntrack_confirm(skb); if (likely(ret == NF_ACCEPT)) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 12d967b587264af71c84d62faca95a4e7ace096f..2a10c6570fccfcd633351a3b881bff4d51e2f45c 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -20,11 +20,11 @@ enum nf_ct_ecache_state { struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ + u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ + enum nf_ct_ecache_state state:8;/* ecache state */ u32 portid; /* netlink portid of destroyer */ - enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 5ed33ea4718ef5a101689432825ebb1a48fcc0d9..2ba54feaccd8d4a6ebccc84fd5e21f45bff665ca 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -5,6 +5,8 @@ #ifndef _NF_CONNTRACK_EXPECT_H #define _NF_CONNTRACK_EXPECT_H +#include + #include #include @@ -37,7 +39,7 @@ struct nf_conntrack_expect { struct timer_list timeout; /* Usage count. */ - atomic_t use; + refcount_t use; /* Flags */ unsigned int flags; @@ -71,6 +73,7 @@ struct nf_conntrack_expect_policy { }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 +#define NF_CT_EXPECT_MAX_CNT 255 int nf_conntrack_expect_pernet_init(struct net *net); void nf_conntrack_expect_pernet_fini(struct net *net); @@ -102,6 +105,7 @@ static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 1c3035dda31f66a33d50cf2617d3a70288812383..4944bc9153cf5d307c4168079d9eb4b6a2c09f21 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -43,8 +43,8 @@ enum nf_ct_ext_id { /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { struct rcu_head rcu; - u16 offset[NF_CT_EXT_NUM]; - u16 len; + u8 offset[NF_CT_EXT_NUM]; + u8 len; char data[0]; }; @@ -69,12 +69,7 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) /* Destroy all relationships */ -void __nf_ct_ext_destroy(struct nf_conn *ct); -static inline void nf_ct_ext_destroy(struct nf_conn *ct) -{ - if (ct->ext) - __nf_ct_ext_destroy(ct); -} +void nf_ct_ext_destroy(struct nf_conn *ct); /* Free operation. If you want to free a object referred from private area, * please implement __nf_ct_ext_free() and call it. @@ -86,15 +81,7 @@ static inline void nf_ct_ext_free(struct nf_conn *ct) } /* Add this type, returns pointer to data or NULL. */ -void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp); - -#define nf_ct_ext_add(ct, id, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp))) -#define nf_ct_ext_add_length(ct, id, len, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp))) - -#define NF_CT_EXT_F_PREALLOC 0x0001 +void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ @@ -102,15 +89,11 @@ struct nf_ct_ext_type { enum nf_ct_ext_id id; - unsigned int flags; - /* Length and min alignment. */ u8 len; u8 align; - /* initial size of nf_ct_ext. */ - u8 alloc_size; }; -int nf_ct_extend_register(struct nf_ct_ext_type *type); -void nf_ct_extend_unregister(struct nf_ct_ext_type *type); +int nf_ct_extend_register(const struct nf_ct_ext_type *type); +void nf_ct_extend_unregister(const struct nf_ct_ext_type *type); #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1eaac1f4cd6aef2b12cfe51621a5ce6da5c4ba13..c519bb5b5bb8806089886caacaca4846fe3eeb98 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -9,6 +9,7 @@ #ifndef _NF_CONNTRACK_HELPER_H #define _NF_CONNTRACK_HELPER_H +#include #include #include #include @@ -26,12 +27,10 @@ struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ + refcount_t refcnt; struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; - /* length of internal data, ie. sizeof(struct nf_ct_*_master) */ - size_t data_len; - /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -49,20 +48,46 @@ struct nf_conntrack_helper { unsigned int expect_class_max; unsigned int flags; - unsigned int queue_num; /* For user-space helpers. */ + + /* For user-space helpers: */ + unsigned int queue_num; + /* length of userspace private data stored in nf_conn_help->data */ + u16 data_len; +}; + +/* Must be kept in sync with the classes defined by helpers */ +#define NF_CT_MAX_EXPECT_CLASSES 4 + +/* nf_conn feature for connections that have a helper */ +struct nf_conn_help { + /* Helper. if any */ + struct nf_conntrack_helper __rcu *helper; + + struct hlist_head expectations; + + /* Current number of expected connections */ + u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[32] __aligned(8); }; +#define NF_CT_HELPER_BUILD_BUG_ON(structsize) \ + BUILD_BUG_ON((structsize) > FIELD_SIZEOF(struct nf_conn_help, data)) + struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper); + void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, - u32 expect_class_max, u32 data_len, + u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 85e993e278d5e1e7a886e772dd69f5031214410d..7032e044bbe2a364ae0cfdb629f2e0c20ac81e8a 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -58,6 +58,9 @@ struct nf_conntrack_l4proto { unsigned int dataoff, u_int8_t pf, unsigned int hooknum); + /* called by gc worker if table is full */ + bool (*can_early_drop)(const struct nf_conn *ct); + /* Print out the per-protocol part of the tuple. Return like seq_* */ void (*print_tuple)(struct seq_file *s, const struct nf_conntrack_tuple *); diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index b0ca402c1f72e20bae492c634f663b8846e788a5..a2fcb5271726d3f5c9e20ccfb521385b59951610 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -52,6 +52,8 @@ struct synproxy_stats { struct synproxy_net { struct nf_conn *tmpl; struct synproxy_stats __percpu *stats; + unsigned int hook_ref4; + unsigned int hook_ref6; }; extern unsigned int synproxy_net_id; diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index 5cc5e9e6171a03db471407c708578b4794c22b92..d40b89355fdd345617cf21593922753613190a11 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -12,7 +13,7 @@ struct ctnl_timeout { struct list_head head; struct rcu_head rcu_head; - atomic_t refcnt; + refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; __u16 l3num; struct nf_conntrack_l4proto *l4proto; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c327a431a6f38103598b82182d463dd728f70443..05c82a1a42679cf336c4ecc2addc30bdaceecbbe 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -67,7 +67,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, { #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) - return nat->masq_index && hooknum == NF_INET_POST_ROUTING && + return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && nat->masq_index != out->ifindex; #else diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 01bcc6bfbcc9034e399a938a6a6dd83b2bed6ef7..fbfa5acf4f14628f6334a36943bd1ab01844bb6f 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,31 +7,31 @@ struct sk_buff; /* These return true or false. */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned int match_offset, - unsigned int match_len, const char *rep_buffer, - unsigned int rep_len, bool adjust); +bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len, bool adjust); -static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) +static inline bool nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) { return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, match_offset, match_len, rep_buffer, rep_len, true); } -int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned int match_offset, - unsigned int match_len, const char *rep_buffer, - unsigned int rep_len); +bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len); /* Setup NAT on this expected conntrack so it follows master, but goes * to port ct->master->saved_proto. */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 09948d10e38e0b939b9764caafd7f3b9a9be41d5..4454719ff849fde065f64eba042d7f58cd971078 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,8 +24,7 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - const struct nf_hook_entry *hooks); + unsigned int (*nf_hook_drop)(struct net *net); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0136028652bdb8b3c20813b01b2fa8cfb16ba012..8a8bab8d7b15a8e9c746a899dcf474740e9f6f25 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -176,7 +176,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type); +void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); @@ -413,10 +413,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv) return (void *)priv - offsetof(struct nft_set, data); } -struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla, u8 genmask); -struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla, u8 genmask); +struct nft_set *nft_set_lookup(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -910,6 +911,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } +static inline bool nft_is_base_chain(const struct nft_chain *chain) +{ + return chain->flags & NFT_BASE_CHAIN; +} + int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); @@ -1044,7 +1050,8 @@ struct nft_object_type { unsigned int maxattr; struct module *owner; const struct nla_policy *policy; - int (*init)(const struct nlattr * const tb[], + int (*init)(const struct nft_ctx *ctx, + const struct nlattr *const tb[], struct nft_object *obj); void (*destroy)(struct nft_object *obj); int (*dump)(struct sk_buff *skb, diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 5ceb2205e4e3ed93461ed4a3956b227f99ac9494..381af9469e6ada01e4acffd4efc3ebc88e66a019 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -32,6 +32,6 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); -void nft_fib_store_result(void *reg, enum nft_fib_result r, +void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct nft_pktinfo *pkt, int index); #endif diff --git a/include/net/netlink.h b/include/net/netlink.h index b239fcd33d8091268fd793fd45cc755264fa592f..01709172b3d38455e7e5510228d6b4a0ad6e94a6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -233,14 +233,17 @@ struct nl_info { }; int netlink_rcv_skb(struct sk_buff *skb, - int (*cb)(struct sk_buff *, struct nlmsghdr *)); + int (*cb)(struct sk_buff *, struct nlmsghdr *, + struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy); + const struct nla_policy *policy, + struct netlink_ext_ack *extack); int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy); + int len, const struct nla_policy *policy, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -374,18 +377,20 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct * * See nla_parse() */ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy); + nlmsg_attrlen(nlh, hdrlen), policy, extack); } /** @@ -409,16 +414,19 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * @hdrlen: length of familiy specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct */ static inline int nlmsg_validate(const struct nlmsghdr *nlh, int hdrlen, int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; return nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), maxtype, policy); + nlmsg_attrlen(nlh, hdrlen), maxtype, policy, + extack); } /** @@ -739,14 +747,17 @@ nla_find_nested(const struct nlattr *nla, int attrtype) * @maxtype: maximum attribute type to be expected * @nla: attribute containing the nested attributes * @policy: validation policy + * @extack: extended ACK report struct * * See nla_parse() */ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, const struct nlattr *nla, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + extack); } /** @@ -1252,6 +1263,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct * * Validates all attributes in the nested attribute stream against the * specified policy. Attributes with a type exceeding maxtype will be @@ -1260,9 +1272,11 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * Returns 0 on success or a negative error code. */ static inline int nla_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_validate(nla_data(start), nla_len(start), maxtype, policy); + return nla_validate(nla_data(start), nla_len(start), maxtype, policy, + extack); } /** diff --git a/include/net/netns/can.h b/include/net/netns/can.h new file mode 100644 index 0000000000000000000000000000000000000000..b106e6ae2e5b46dc119a322ebf22437e88158962 --- /dev/null +++ b/include/net/netns/can.h @@ -0,0 +1,40 @@ +/* + * can in net namespaces + */ + +#ifndef __NETNS_CAN_H__ +#define __NETNS_CAN_H__ + +#include + +struct dev_rcv_lists; +struct s_stats; +struct s_pstats; + +struct netns_can { +#if IS_ENABLED(CONFIG_PROC_FS) + struct proc_dir_entry *proc_dir; + struct proc_dir_entry *pde_version; + struct proc_dir_entry *pde_stats; + struct proc_dir_entry *pde_reset_stats; + struct proc_dir_entry *pde_rcvlist_all; + struct proc_dir_entry *pde_rcvlist_fil; + struct proc_dir_entry *pde_rcvlist_inv; + struct proc_dir_entry *pde_rcvlist_sff; + struct proc_dir_entry *pde_rcvlist_eff; + struct proc_dir_entry *pde_rcvlist_err; + struct proc_dir_entry *bcmproc_dir; +#endif + + /* receive filters subscribed for 'all' CAN devices */ + struct dev_rcv_lists *can_rx_alldev_list; + spinlock_t can_rcvlists_lock; + struct timer_list can_stattimer;/* timer for statistics update */ + struct s_stats *can_stats; /* packet statistics */ + struct s_pstats *can_pstats; /* receive list statistics */ + + /* CAN GW per-net gateway jobs */ + struct hlist_head cgw_list; +}; + +#endif /* __NETNS_CAN_H__ */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 622d2da27135586d164c228b81e71afb922d5d8c..cd686c4fb32dc5409a08f818d48228bffa6f6778 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -33,7 +33,6 @@ struct inet_timewait_death_row { atomic_t tw_count; struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; - int sysctl_tw_recycle; int sysctl_max_tw_buckets; }; @@ -96,6 +95,8 @@ struct netns_ipv4 { /* Shall we try to damage output packets if routing dev changes? */ int sysctl_ip_dynaddr; int sysctl_ip_early_demux; + int sysctl_tcp_early_demux; + int sysctl_udp_early_demux; int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; @@ -152,6 +153,7 @@ struct netns_ipv4 { #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH int sysctl_fib_multipath_use_neigh; + int sysctl_fib_multipath_hash_policy; #endif unsigned int fib_seq; /* protected by rtnl_mutex */ diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index d29203651c01700d9406157ef8dd016ea55a4cbb..6608b3693385e771147f78da13afa87cc6355d83 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -9,8 +9,11 @@ struct mpls_route; struct ctl_table_header; struct netns_mpls { + int ip_ttl_propagate; + int default_ttl; size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; + struct ctl_table_header *ctl; }; diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 1a3de8b34ad27a8f3f8cf74ddd3eb6af61ced72e..bbdc73a3239dffbcafe8cf19f5997ddbeb7ada04 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -27,6 +27,7 @@ #include #include +#define nfc_dbg(dev, fmt, ...) dev_dbg((dev), "NFC: " fmt, ##__VA_ARGS__) #define nfc_info(dev, fmt, ...) dev_info((dev), "NFC: " fmt, ##__VA_ARGS__) #define nfc_err(dev, fmt, ...) dev_err((dev), "NFC: " fmt, ##__VA_ARGS__) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f1b76b8e6d2d296177116d0ef0f254d175551cbe..bec46f63f10ced844f8aec2b19bebf8b3dc01167 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -92,7 +92,7 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -void qdisc_hash_add(struct Qdisc *q); +void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); diff --git a/include/net/protocol.h b/include/net/protocol.h index bf36ca34af7ad255b9eb821cbed0a70abad993f5..65ba335b0e7e66bb7f1b4bd279d31e616e0dd31e 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -40,6 +40,7 @@ /* This is used to register protocols. */ struct net_protocol { void (*early_demux)(struct sk_buff *skb); + void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, @@ -54,7 +55,7 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { void (*early_demux)(struct sk_buff *skb); - + void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, @@ -92,12 +93,12 @@ struct inet_protosw { #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ -extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; +extern struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS]; extern const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS]; extern const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS]; #if IS_ENABLED(CONFIG_IPV6) -extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; +extern struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; #endif int inet_add_protocol(const struct net_protocol *prot, unsigned char num); diff --git a/include/net/route.h b/include/net/route.h index c0874c87c173717f2c13c8af06d2482a76190243..2cc0e14c63598ce3d3be88bb04d2fd433d676129 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -113,13 +113,13 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp, - int mp_hash); +struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp, + const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { - return __ip_route_output_key_hash(net, flp, -1); + return __ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 106de5f7bf0675e29114cde65b7ea8a4e557c1ce..78fa5fe32947d590351917c6353caa243eb0cf1b 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -4,7 +4,8 @@ #include #include -typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *); +typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, + struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); @@ -158,7 +159,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, int rtnl_delete_link(struct net_device *dev); int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len); +int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, + struct netlink_ext_ack *exterr); #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index aeec4086afb2446dadb1fb8c54ad54a909634380..22e52093bfda7fe651f4c0aafc95d18e22b5f6fd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -66,6 +66,7 @@ struct Qdisc { #define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : * qdisc_tree_decrease_qlen() should stop. */ +#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; @@ -203,14 +204,14 @@ struct tcf_proto_ops { const struct tcf_proto *, struct tcf_result *); int (*init)(struct tcf_proto*); - bool (*destroy)(struct tcf_proto*, bool); + void (*destroy)(struct tcf_proto*); unsigned long (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, unsigned long *, bool); - int (*delete)(struct tcf_proto*, unsigned long); + int (*delete)(struct tcf_proto*, unsigned long, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); /* rtnetlink specific */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index b6f682ec184a62a1e7b9d2a0ea159293cca12a76..47113f2c4b0a2b6c596d2f28018a1e1941cb6ede 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -293,6 +293,22 @@ struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_association *asoc, union sctp_params param, struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_tsnreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_addstrm_out( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_addstrm_in( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); +struct sctp_chunk *sctp_process_strreset_resp( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp); /* Prototypes for statetable processing. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 138f8615acf0993d8015f6c1d2eee32966dccad0..a8b38e123f9744368cb6c0fa96d28556d44aee14 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1315,6 +1315,8 @@ struct sctp_inithdr_host { struct sctp_stream_out { __u16 ssn; __u8 state; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; struct sctp_stream_in { @@ -1887,6 +1889,7 @@ struct sctp_association { __u32 strreset_outseq; /* Update after receiving response */ __u32 strreset_inseq; /* Update after receiving request */ + __u32 strreset_result[2]; /* save the results of last 2 responses */ struct sctp_chunk *strreset_chunk; /* save request chunk */ diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 324b5965fc4de505ca98fbbb9aedc0d3a0039742..1060494ac230b80caca57f6963dca2692ae10b9d 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -132,6 +132,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, __u16 stream_num, __u16 *stream_list, gfp_t gfp); +struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( + const struct sctp_association *asoc, __u16 flags, + __u32 local_tsn, __u32 remote_tsn, gfp_t gfp); + +struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( + const struct sctp_association *asoc, __u16 flags, + __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp); + void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *); void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 0caee631a8364fe6e49ab8cacba864d019be8b47..b94006f6fbdde0d78fe33b9c2d86159e291c30cf 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,12 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff); -u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr); +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport); +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/sock.h b/include/net/sock.h index 03252d53975de7ad0da66d35802738830b0e3367..f33e3d134e0b7f66329f2122d7acc8b396c1787b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -995,7 +995,7 @@ struct smc_hashinfo; struct module; /* - * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes * un-modified. Special care is taken when initializing object to zero. */ static inline void sk_prot_clear_nulls(struct sock *sk, int size) @@ -1783,11 +1783,8 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) sk_tx_queue_clear(sk); sk->sk_dst_pending_confirm = 0; - /* - * This can be called while sk is owned by the caller only, - * with no state that can be checked in a rcu_dereference_check() cond - */ - old_dst = rcu_dereference_raw(sk->sk_dst_cache); + old_dst = rcu_dereference_protected(sk->sk_dst_cache, + lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); dst_release(old_dst); } @@ -2242,6 +2239,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); +#define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { @@ -2252,8 +2250,10 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) __sock_recv_ts_and_drops(msg, sk, skb); - else + else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sk->sk_stamp = skb->tstamp; + else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) + sk->sk_stamp = 0; } void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); @@ -2365,6 +2365,8 @@ bool sk_ns_capable(const struct sock *sk, bool sk_capable(const struct sock *sk, int cap); bool sk_net_capable(const struct sock *sk, int cap); +void sk_get_meminfo(const struct sock *sk, u32 *meminfo); + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h index f31fb6331a532eed7258177d5cfbb48fde9a594a..3248beaf16b0513283cd3c863695a77d09793cdc 100644 --- a/include/net/tc_act/tc_csum.h +++ b/include/net/tc_act/tc_csum.h @@ -3,6 +3,7 @@ #include #include +#include struct tcf_csum { struct tc_action common; @@ -11,4 +12,18 @@ struct tcf_csum { }; #define to_tcf_csum(a) ((struct tcf_csum *)a) +static inline bool is_tcf_csum(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_CSUM) + return true; +#endif + return false; +} + +static inline u32 tcf_csum_update_flags(const struct tc_action *a) +{ + return to_tcf_csum(a)->update_flags; +} + #endif /* __NET_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index dfbd6ee0bc7cd196c052e700da43deddb8d1dfef..a46c3f2ace702932dc95c3021e7b13d72a2a4777 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -2,6 +2,7 @@ #define __NET_TC_PED_H #include +#include struct tcf_pedit_key_ex { enum pedit_header_type htype; @@ -17,4 +18,48 @@ struct tcf_pedit { }; #define to_pedit(a) ((struct tcf_pedit *)a) +static inline bool is_tcf_pedit(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_PEDIT) + return true; +#endif + return false; +} + +static inline int tcf_pedit_nkeys(const struct tc_action *a) +{ + return to_pedit(a)->tcfp_nkeys; +} + +static inline u32 tcf_pedit_htype(const struct tc_action *a, int index) +{ + if (to_pedit(a)->tcfp_keys_ex) + return to_pedit(a)->tcfp_keys_ex[index].htype; + + return TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; +} + +static inline u32 tcf_pedit_cmd(const struct tc_action *a, int index) +{ + if (to_pedit(a)->tcfp_keys_ex) + return to_pedit(a)->tcfp_keys_ex[index].cmd; + + return __PEDIT_CMD_MAX; +} + +static inline u32 tcf_pedit_mask(const struct tc_action *a, int index) +{ + return to_pedit(a)->tcfp_keys[index].mask; +} + +static inline u32 tcf_pedit_val(const struct tc_action *a, int index) +{ + return to_pedit(a)->tcfp_keys[index].val; +} + +static inline u32 tcf_pedit_offset(const struct tc_action *a, int index) +{ + return to_pedit(a)->tcfp_keys[index].off; +} #endif /* __NET_TC_PED_H */ diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 48cca321ee6c4f3e44f8f546d05d5f32b470ccc1..c2090df944ff53224ef0500c05f62672f0d5e0b3 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -13,9 +13,6 @@ #include #include -#define VLAN_F_POP 0x1 -#define VLAN_F_PUSH 0x2 - struct tcf_vlan { struct tc_action common; int tcfv_action; @@ -49,4 +46,9 @@ static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) return to_vlan(a)->tcfv_push_proto; } +static inline u8 tcf_vlan_push_prio(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_prio; +} + #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ec4ea652f3f55e53675dbe09f29599af179c41a..be6223c586fa05b3ef1dbcb96e73cf7b5dae292d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -78,6 +78,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* Maximal number of ACKs sent quickly to accelerate slow-start. */ #define TCP_MAX_QUICKACKS 16U +/* Maximal number of window scale according to RFC1323 */ +#define TCP_MAX_WSCALE 14U + /* urg_data states */ #define TCP_URG_VALID 0x0100 #define TCP_URG_NOTYET 0x0200 @@ -406,11 +409,7 @@ void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, - bool paws_check, bool timestamps); -bool tcp_remember_stamp(struct sock *sk); -bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); -void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); @@ -471,7 +470,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst); + struct dst_entry *dst, u32 tsoff); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); @@ -925,7 +924,7 @@ struct tcp_congestion_ops { void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* call when ack arrives (optional) */ void (*in_ack_event)(struct sock *sk, u32 flags); - /* new value of cwnd after loss (optional) */ + /* new value of cwnd after loss (required) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); @@ -1005,7 +1004,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs); + struct rate_sample *rs); void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK @@ -1235,10 +1234,12 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta); static inline void tcp_slow_start_after_idle_check(struct sock *sk) { + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sysctl_tcp_slow_start_after_idle || tp->packets_out) + if (!sysctl_tcp_slow_start_after_idle || tp->packets_out || + ca_ops->cong_control) return; delta = tcp_time_stamp - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1252,9 +1253,11 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, static inline int tcp_win_from_space(int space) { - return sysctl_tcp_adv_win_scale<=0 ? - (space>>(-sysctl_tcp_adv_win_scale)) : - space - (space>>sysctl_tcp_adv_win_scale); + int tcp_adv_win_scale = sysctl_tcp_adv_win_scale; + + return tcp_adv_win_scale <= 0 ? + (space>>(-tcp_adv_win_scale)) : + space - (space>>tcp_adv_win_scale); } /* Note: caller must be prepared to deal with negative returns */ @@ -1505,6 +1508,12 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; +void tcp_fastopen_active_disable(struct sock *sk); +bool tcp_fastopen_active_should_disable(struct sock *sk); +void tcp_fastopen_active_disable_ofo_check(struct sock *sk); +void tcp_fastopen_active_timeout_reset(void); + /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ @@ -1814,9 +1823,9 @@ struct tcp_request_sock_ops { __u16 *mss); #endif struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict); - __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff); + const struct request_sock *req); + u32 (*init_seq)(const struct sk_buff *skb); + u32 (*init_ts_off)(const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, @@ -1847,10 +1856,9 @@ void tcp_v4_init(void); void tcp_init(void); /* tcp_recovery.c */ -extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now); +extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - const struct skb_mstamp *xmit_time, - const struct skb_mstamp *ack_time); + const struct skb_mstamp *xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); /* diff --git a/include/net/udp.h b/include/net/udp.h index c9d8b8e848e05c2e7228f287f88ccdb57b2e10c2..3391dbd739595a76150453c28468ce8bb55530f8 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -372,4 +372,5 @@ void udp_encap_enable(void); #if IS_ENABLED(CONFIG_IPV6) void udpv6_encap_enable(void); #endif + #endif /* _UDP_H */ diff --git a/include/net/x25.h b/include/net/x25.h index c383aa4edbf0c27980ef7aad22ff160c72b41bbd..6d30a01d281d4faa824129cb3921dbc3a77e7da3 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL -void x25_register_sysctl(void); +int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else -static inline void x25_register_sysctl(void) {}; +static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 14d82bf16692a6d0ed66721b1548ba152dbc18de..7e7e2b0d29157047fa0d3596b3f97cf501d754f9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -120,6 +120,13 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; +struct xfrm_state_offload { + struct net_device *dev; + unsigned long offload_handle; + unsigned int num_exthdrs; + u8 flags; +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@ -207,6 +214,8 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct tasklet_hrtimer mtimer; + struct xfrm_state_offload xso; + /* used to fix curlft->add_time when changing date */ long saved_tmo; @@ -222,6 +231,8 @@ struct xfrm_state { struct xfrm_mode *inner_mode_iaf; struct xfrm_mode *outer_mode; + const struct xfrm_type_offload *type_offload; + /* Security context */ struct xfrm_sec_ctx *security; @@ -314,12 +325,14 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 portid); int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { - unsigned int family; - unsigned int proto; - __be16 eth_proto; - struct module *owner; - const struct xfrm_type *type_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX]; + unsigned int family; + unsigned int proto; + __be16 eth_proto; + struct module *owner; + const struct xfrm_type *type_map[IPPROTO_MAX]; + const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; + struct xfrm_mode *mode_map[XFRM_MODE_MAX]; + int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_selector *sel, const struct flowi *fl); @@ -380,6 +393,18 @@ struct xfrm_type { int xfrm_register_type(const struct xfrm_type *type, unsigned short family); int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); +struct xfrm_type_offload { + char *description; + struct module *owner; + u8 proto; + void (*encap)(struct xfrm_state *, struct sk_buff *pskb); + int (*input_tail)(struct xfrm_state *x, struct sk_buff *skb); + int (*xmit)(struct xfrm_state *, struct sk_buff *pskb, netdev_features_t features); +}; + +int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); +int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); + struct xfrm_mode { /* * Remove encapsulation header. @@ -428,6 +453,16 @@ struct xfrm_mode { */ int (*output)(struct xfrm_state *x, struct sk_buff *skb); + /* + * Adjust pointers into the packet and do GSO segmentation. + */ + struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features); + + /* + * Adjust pointers into the packet when IPsec is done at layer2. + */ + void (*xmit)(struct xfrm_state *x, struct sk_buff *skb); + struct xfrm_state_afinfo *afinfo; struct module *owner; unsigned int encap; @@ -586,7 +621,6 @@ struct xfrm_migrate { struct xfrm_mgr { struct list_head list; - char *id; int (*notify)(struct xfrm_state *x, const struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp); struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); @@ -817,12 +851,12 @@ static inline void xfrm_state_hold(struct xfrm_state *x) } static inline bool addr_match(const void *token1, const void *token2, - int prefixlen) + unsigned int prefixlen) { const __be32 *a1 = token1; const __be32 *a2 = token2; - int pdw; - int pbi; + unsigned int pdw; + unsigned int pbi; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ pbi = prefixlen & 0x1f; /* num of bits in incomplete u32 in prefix */ @@ -846,9 +880,9 @@ static inline bool addr_match(const void *token1, const void *token2, static inline bool addr4_match(__be32 a1, __be32 a2, u8 prefixlen) { /* C99 6.5.7 (3): u32 << 32 is undefined behaviour */ - if (prefixlen == 0) + if (sizeof(long) == 4 && prefixlen == 0) return true; - return !((a1 ^ a2) & htonl(0xFFFFFFFFu << (32 - prefixlen))); + return !((a1 ^ a2) & htonl(~0UL << (32 - prefixlen))); } static __inline__ @@ -945,10 +979,6 @@ struct xfrm_dst { struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; -#ifdef CONFIG_XFRM_SUB_POLICY - struct flowi *origin; - struct xfrm_selector *partner; -#endif u32 xfrm_genid; u32 policy_genid; u32 route_mtu_cached; @@ -964,12 +994,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); -#ifdef CONFIG_XFRM_SUB_POLICY - kfree(xdst->origin); - xdst->origin = NULL; - kfree(xdst->partner); - xdst->partner = NULL; -#endif } #endif @@ -1533,6 +1557,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); @@ -1615,6 +1640,11 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) } #endif +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family); + struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); @@ -1820,6 +1850,61 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) } #endif +#ifdef CONFIG_XFRM_OFFLOAD +void __net_init xfrm_dev_init(void); +int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo); +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + + if (xso->dev) + xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + struct net_device *dev = xso->dev; + + if (dev && dev->xfrmdev_ops) { + dev->xfrmdev_ops->xdo_dev_state_free(x); + xso->dev = NULL; + dev_put(dev); + } +} +#else +static inline void __net_init xfrm_dev_init(void) +{ +} + +static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +{ + return 0; +} + +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +{ + return 0; +} + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ +} + +static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + return false; +} +#endif + static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_MARK]) diff --git a/include/rdma/ib.h b/include/rdma/ib.h index 9b4c22a36931884520b4b25e42d3b99c2bd8b733..66dbed0c146d290fb86bd0c14abcdb37945b530f 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -100,7 +100,7 @@ struct sockaddr_ib { */ static inline bool ib_safe_file_access(struct file *filp) { - return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); + return filp->f_cred == current_cred() && !uaccess_kernel(); } #endif /* _RDMA_IB_H */ diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index b49258b16f4ed5610cbb38b29d84f5c617afddb9..7979cb04f529449aaa6d825985e204c5cf7de660 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -117,8 +117,8 @@ struct ib_cm_req_event_param { u8 port; - struct ib_sa_path_rec *primary_path; - struct ib_sa_path_rec *alternate_path; + struct sa_path_rec *primary_path; + struct sa_path_rec *alternate_path; __be64 remote_ca_guid; u32 remote_qkey; @@ -197,7 +197,7 @@ struct ib_cm_mra_event_param { }; struct ib_cm_lap_event_param { - struct ib_sa_path_rec *alternate_path; + struct sa_path_rec *alternate_path; }; enum ib_cm_apr_status { @@ -363,8 +363,8 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, __be64 service_id); struct ib_cm_req_param { - struct ib_sa_path_rec *primary_path; - struct ib_sa_path_rec *alternate_path; + struct sa_path_rec *primary_path; + struct sa_path_rec *alternate_path; __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; @@ -521,7 +521,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct ib_sa_path_rec *alternate_path, + struct sa_path_rec *alternate_path, const void *private_data, u8 private_data_len); @@ -565,7 +565,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, u8 private_data_len); struct ib_cm_sidr_req_param { - struct ib_sa_path_rec *path; + struct sa_path_rec *path; __be64 service_id; int timeout_ms; const void *private_data; diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index c755325f0831c56b7eca923707e2772c52aa08b9..5519f31f043a45b086e098407c4a15a2a7d02ca4 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -74,6 +74,12 @@ #define IB_GRH_FLOW_MASK 0xFFFFF #define IB_GRH_FLOW_SHIFT 0 #define IB_GRH_NEXT_HDR 0x1B +#define IB_FECN_SHIFT 31 +#define IB_FECN_MASK 1 +#define IB_FECN_SMASK BIT(IB_FECN_SHIFT) +#define IB_BECN_SHIFT 30 +#define IB_BECN_MASK 1 +#define IB_BECN_SMASK BIT(IB_BECN_SHIFT) #define IB_AETH_CREDIT_SHIFT 24 #define IB_AETH_CREDIT_MASK 0x1F @@ -181,4 +187,64 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) ib_u64_put(val, &ateth->compare_data); } +/* + * 9B/IB Packet Format + */ +#define IB_LNH_MASK 3 +#define IB_SC_MASK 0xf +#define IB_SC_SHIFT 12 +#define IB_SL_MASK 0xf +#define IB_SL_SHIFT 4 + +static inline u8 ib_get_lnh(struct ib_header *hdr) +{ + return (be16_to_cpu(hdr->lrh[0]) & IB_LNH_MASK); +} + +static inline u8 ib_get_sc(struct ib_header *hdr) +{ + return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); +} + +static inline u8 ib_get_sl(struct ib_header *hdr) +{ + return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); +} + +static inline u16 ib_get_dlid(struct ib_header *hdr) +{ + return (be16_to_cpu(hdr->lrh[1])); +} + +static inline u16 ib_get_slid(struct ib_header *hdr) +{ + return (be16_to_cpu(hdr->lrh[3])); +} + +/* + * BTH + */ +#define IB_BTH_OPCODE_MASK 0xff +#define IB_BTH_OPCODE_SHIFT 24 +#define IB_BTH_PAD_MASK 3 +#define IB_BTH_PKEY_MASK 0xffff +#define IB_BTH_PAD_SHIFT 20 + +static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) & + IB_BTH_PAD_MASK); +} + +static inline u16 ib_bth_get_pkey(struct ib_other_headers *ohdr) +{ + return (be32_to_cpu(ohdr->bth[0]) & IB_BTH_PKEY_MASK); +} + +static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_OPCODE_SHIFT) & + IB_BTH_OPCODE_MASK); +} + #endif /* IB_HDRS_H */ diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 981214b3790cf4226d68bd5010aa87e14506be27..d67b11b7202924c335345d5e0c78d9cc99ea187b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -262,6 +262,33 @@ struct ib_class_port_info { __be32 trap_qkey; }; +#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) + +struct opa_class_port_info { + u8 base_version; + u8 class_version; + __be16 cap_mask; + __be32 cap_mask2_resp_time; + + u8 redirect_gid[16]; + __be32 redirect_tc_fl; + __be32 redirect_lid; + __be32 redirect_sl_qp; + __be32 redirect_qkey; + + u8 trap_gid[16]; + __be32 trap_tc_fl; + __be32 trap_lid; + __be32 trap_hl_qp; + __be32 trap_qkey; + + __be16 trap_pkey; + __be16 redirect_pkey; + + u8 trap_sl_rsvd; + u8 reserved[3]; +} __packed; + /** * ib_get_cpi_resp_time - Returns the resp_time value from * cap_mask2_resp_time in ib_class_port_info. @@ -315,6 +342,17 @@ static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi, IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); } +/** + * opa_get_cpi_capmask2 - Returns the capmask2 value from + * cap_mask2_resp_time in ib_class_port_info. + * @cpi: A struct opa_class_port_info mad. + */ +static inline u32 opa_get_cpi_capmask2(struct opa_class_port_info *cpi) +{ + return (be32_to_cpu(cpi->cap_mask2_resp_time) >> + IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); +} + struct ib_mad_notice_attr { u8 generic_type; u8 prod_type_msb; @@ -673,7 +711,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, * After invoking this routine, MAD services are no longer usable by the * client on the associated QP. */ -int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); +void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h index db037205c9e8cb4c5883ba0720f4b30105006d69..68cef3bd50fb97730df5112d4dc88a8c9923f46d 100644 --- a/include/rdma/ib_marshall.h +++ b/include/rdma/ib_marshall.h @@ -42,12 +42,12 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src); void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, - struct ib_ah_attr *src); + struct rdma_ah_attr *src); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, - struct ib_sa_path_rec *src); + struct sa_path_rec *src); -void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, +void ib_copy_path_rec_from_user(struct sa_path_rec *dst, struct ib_user_path_rec *src); #endif /* IB_USER_MARSHALL_H */ diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index b13419ce99ff5b52d666c2af7a91f58457f58100..36655899ee028d568257f2b2cc8764b136ab0e20 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -80,6 +80,8 @@ enum { IB_OPCODE_UD = 0x60, /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */ IB_OPCODE_CNP = 0x80, + /* Manufacturer specific */ + IB_OPCODE_MSP = 0xe0, /* operations -- just used to define real constants */ IB_OPCODE_SEND_FIRST = 0x00, diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index fd0e53219f93e56b66643578c31f9172846a08c5..355b81f4242defd82a3802cd47e2a28abfb24ee5 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -43,6 +43,8 @@ #include #include +#include +#include enum { IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ @@ -56,6 +58,7 @@ enum { IB_SA_METHOD_GET_TRACE_TBL = 0x13 }; +#define OPA_SA_CLASS_VERSION 0x80 enum { IB_SA_ATTR_CLASS_PORTINFO = 0x01, IB_SA_ATTR_NOTICE = 0x02, @@ -147,13 +150,44 @@ enum ib_sa_mc_join_states { #define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) #define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) -struct ib_sa_path_rec { - __be64 service_id; - union ib_gid dgid; - union ib_gid sgid; +enum sa_path_rec_type { + SA_PATH_REC_TYPE_IB, + SA_PATH_REC_TYPE_ROCE_V1, + SA_PATH_REC_TYPE_ROCE_V2, + SA_PATH_REC_TYPE_OPA +}; + +struct sa_path_rec_ib { __be16 dlid; __be16 slid; u8 raw_traffic; +}; + +struct sa_path_rec_roce { + u8 dmac[ETH_ALEN]; + /* ignored in IB */ + int ifindex; + /* ignored in IB */ + struct net *net; + +}; + +struct sa_path_rec_opa { + __be32 dlid; + __be32 slid; + u8 raw_traffic; + u8 l2_8B; + u8 l2_10B; + u8 l2_9B; + u8 l2_16B; + u8 qos_type; + u8 qos_priority; +}; + +struct sa_path_rec { + union ib_gid dgid; + union ib_gid sgid; + __be64 service_id; /* reserved */ __be32 flow_label; u8 hop_limit; @@ -170,17 +204,109 @@ struct ib_sa_path_rec { u8 packet_life_time_selector; u8 packet_life_time; u8 preference; - u8 dmac[ETH_ALEN]; - /* ignored in IB */ - int ifindex; - /* ignored in IB */ - struct net *net; - enum ib_gid_type gid_type; + union { + struct sa_path_rec_ib ib; + struct sa_path_rec_roce roce; + struct sa_path_rec_opa opa; + }; + enum sa_path_rec_type rec_type; }; -static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec) +static inline enum ib_gid_type + sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec) +{ + switch (rec->rec_type) { + case SA_PATH_REC_TYPE_ROCE_V1: + return IB_GID_TYPE_ROCE; + case SA_PATH_REC_TYPE_ROCE_V2: + return IB_GID_TYPE_ROCE_UDP_ENCAP; + default: + return IB_GID_TYPE_IB; + } +} + +static inline enum sa_path_rec_type + sa_conv_gid_to_pathrec_type(enum ib_gid_type type) +{ + switch (type) { + case IB_GID_TYPE_ROCE: + return SA_PATH_REC_TYPE_ROCE_V1; + case IB_GID_TYPE_ROCE_UDP_ENCAP: + return SA_PATH_REC_TYPE_ROCE_V2; + default: + return SA_PATH_REC_TYPE_IB; + } +} + +static inline void path_conv_opa_to_ib(struct sa_path_rec *ib, + struct sa_path_rec *opa) +{ + if ((be32_to_cpu(opa->opa.dlid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) || + (be32_to_cpu(opa->opa.slid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE))) { + /* Create OPA GID and zero out the LID */ + ib->dgid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(opa->opa.dlid)); + ib->dgid.global.subnet_prefix + = opa->dgid.global.subnet_prefix; + ib->sgid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(opa->opa.slid)); + ib->dgid.global.subnet_prefix + = opa->dgid.global.subnet_prefix; + ib->ib.dlid = 0; + + ib->ib.slid = 0; + } else { + ib->ib.dlid = htons(ntohl(opa->opa.dlid)); + ib->ib.slid = htons(ntohl(opa->opa.slid)); + } + ib->service_id = opa->service_id; + ib->ib.raw_traffic = opa->opa.raw_traffic; +} + +static inline void path_conv_ib_to_opa(struct sa_path_rec *opa, + struct sa_path_rec *ib) +{ + __be32 slid, dlid; + + if ((ib_is_opa_gid(&ib->sgid)) || + (ib_is_opa_gid(&ib->dgid))) { + slid = htonl(opa_get_lid_from_gid(&ib->sgid)); + dlid = htonl(opa_get_lid_from_gid(&ib->dgid)); + } else { + slid = htonl(ntohs(ib->ib.slid)); + dlid = htonl(ntohs(ib->ib.dlid)); + } + opa->opa.slid = slid; + opa->opa.dlid = dlid; + opa->service_id = ib->service_id; + opa->opa.raw_traffic = ib->ib.raw_traffic; +} + +/* Convert from OPA to IB path record */ +static inline void sa_convert_path_opa_to_ib(struct sa_path_rec *dest, + struct sa_path_rec *src) { - return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL; + if (src->rec_type != SA_PATH_REC_TYPE_OPA) + return; + + *dest = *src; + dest->rec_type = SA_PATH_REC_TYPE_IB; + path_conv_opa_to_ib(dest, src); +} + +/* Convert from IB to OPA path record */ +static inline void sa_convert_path_ib_to_opa(struct sa_path_rec *dest, + struct sa_path_rec *src) +{ + if (src->rec_type != SA_PATH_REC_TYPE_IB) + return; + + /* Do a structure copy and overwrite the relevant fields */ + *dest = *src; + dest->rec_type = SA_PATH_REC_TYPE_OPA; + path_conv_ib_to_opa(dest, src); } #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) @@ -322,11 +448,11 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, + struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, - struct ib_sa_path_rec *resp, + struct sa_path_rec *resp, void *context), void *context, struct ib_sa_query **query); @@ -420,27 +546,27 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, - struct ib_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr); /** * ib_init_ah_from_path - Initialize address handle attributes based on an SA * path record. */ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, - struct ib_sa_path_rec *rec, - struct ib_ah_attr *ah_attr); + struct sa_path_rec *rec, + struct rdma_ah_attr *ah_attr); /** * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec * to IB MAD wire format. */ -void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute); +void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute); /** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. */ -void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); +void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec); /* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, @@ -454,14 +580,119 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context, struct ib_sa_query **sa_query); -/* Support get SA ClassPortInfo */ -int ib_sa_classport_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_class_port_info *resp, - void *context), - void *context, - struct ib_sa_query **sa_query); +bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, + struct ib_device *device, + u8 port_num); + +static inline bool sa_path_is_roce(struct sa_path_rec *rec) +{ + return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) || + (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); +} + +static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid) +{ + if (rec->rec_type == SA_PATH_REC_TYPE_IB) + rec->ib.slid = htons(ntohl(slid)); + else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) + rec->opa.slid = slid; +} + +static inline void sa_path_set_dlid(struct sa_path_rec *rec, __be32 dlid) +{ + if (rec->rec_type == SA_PATH_REC_TYPE_IB) + rec->ib.dlid = htons(ntohl(dlid)); + else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) + rec->opa.dlid = dlid; +} + +static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, + u8 raw_traffic) +{ + if (rec->rec_type == SA_PATH_REC_TYPE_IB) + rec->ib.raw_traffic = raw_traffic; + else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) + rec->opa.raw_traffic = raw_traffic; +} + +static inline __be32 sa_path_get_slid(struct sa_path_rec *rec) +{ + if (rec->rec_type == SA_PATH_REC_TYPE_IB) + return htonl(ntohs(rec->ib.slid)); + else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) + return rec->opa.slid; + return 0; +} + +static inline __be32 sa_path_get_dlid(struct sa_path_rec *rec) +{ + if (rec->rec_type == SA_PATH_REC_TYPE_IB) + return htonl(ntohs(rec->ib.dlid)); + else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) + return rec->opa.dlid; + return 0; +} + +static inline u8 sa_path_get_raw_traffic(struct sa_path_rec *rec) +{ + if (rec->rec_type == SA_PATH_REC_TYPE_IB) + return rec->ib.raw_traffic; + else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) + return rec->opa.raw_traffic; + return 0; +} + +static inline void sa_path_set_dmac(struct sa_path_rec *rec, u8 *dmac) +{ + if (sa_path_is_roce(rec)) + memcpy(rec->roce.dmac, dmac, ETH_ALEN); +} + +static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec) +{ + if (sa_path_is_roce(rec)) + eth_zero_addr(rec->roce.dmac); +} + +static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex) +{ + if (sa_path_is_roce(rec)) + rec->roce.ifindex = ifindex; +} + +static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net) +{ + if (sa_path_is_roce(rec)) + rec->roce.net = net; +} + +static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec) +{ + if (sa_path_is_roce(rec)) + return rec->roce.dmac; + return NULL; +} + +static inline int sa_path_get_ifindex(struct sa_path_rec *rec) +{ + if (sa_path_is_roce(rec)) + return rec->roce.ifindex; + return 0; +} + +static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec) +{ + if (sa_path_is_roce(rec)) + return rec->roce.net; + return NULL; +} + +static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec) +{ + return sa_path_get_ndev(rec) ? + dev_get_by_index(sa_path_get_ndev(rec), + sa_path_get_ifindex(rec)) + : NULL; +} #endif /* IB_SA_H */ diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 2d83cfd7e6ce20da3b1606cfe4335fada792fb51..23159dd5be184bc5db37f4d34a30653afe8b97fd 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -44,7 +44,7 @@ struct ib_umem { struct ib_ucontext *context; size_t length; unsigned long address; - int page_size; + int page_shift; int writable; int hugetlb; struct work_struct work; @@ -60,7 +60,7 @@ struct ib_umem { /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { - return umem->address & ((unsigned long)umem->page_size - 1); + return umem->address & (BIT(umem->page_shift) - 1); } /* Returns the first page of an ODP umem. */ @@ -72,12 +72,12 @@ static inline unsigned long ib_umem_start(struct ib_umem *umem) /* Returns the address of the page after the last one of an ODP umem. */ static inline unsigned long ib_umem_end(struct ib_umem *umem) { - return PAGE_ALIGN(umem->address + umem->length); + return ALIGN(umem->address + umem->length, BIT(umem->page_shift)); } static inline size_t ib_umem_num_pages(struct ib_umem *umem) { - return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT; + return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift; } #ifdef CONFIG_INFINIBAND_USER_MEM diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 542cd8b3414c14f5fc65aa85006836fffe7ed42a..fb67554aabd6e0f80c34f8966b035cbb4efba029 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -84,7 +84,8 @@ struct ib_umem_odp { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); +int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, + int access); struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size); @@ -154,7 +155,8 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline int ib_umem_odp_get(struct ib_ucontext *context, - struct ib_umem *umem) + struct ib_umem *umem, + int access) { return -EINVAL; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 99e4423eb2b80b142024bed892ddc4a84ac5e576..ba8314ec576844ddb7148364617a768c81acd90f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -55,12 +55,14 @@ #include #include #include +#include #include #include #include #include #include +#include extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -224,6 +226,7 @@ enum ib_device_cap_flags { IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), + IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), }; enum ib_signature_prot_cap { @@ -431,7 +434,8 @@ enum ib_port_speed { IB_SPEED_QDR = 4, IB_SPEED_FDR10 = 8, IB_SPEED_FDR = 16, - IB_SPEED_EDR = 32 + IB_SPEED_EDR = 32, + IB_SPEED_HDR = 64 }; /** @@ -498,6 +502,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( /* Address format 0x000FF000 */ #define RDMA_CORE_CAP_AF_IB 0x00001000 #define RDMA_CORE_CAP_ETH_AH 0x00002000 +#define RDMA_CORE_CAP_OPA_AH 0x00004000 /* Protocol 0xFFF00000 */ #define RDMA_CORE_CAP_PROT_IB 0x00100000 @@ -836,15 +841,38 @@ struct ib_mr_status { */ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); +enum rdma_ah_attr_type { + RDMA_AH_ATTR_TYPE_IB, + RDMA_AH_ATTR_TYPE_ROCE, + RDMA_AH_ATTR_TYPE_OPA, +}; + struct ib_ah_attr { - struct ib_global_route grh; u16 dlid; - u8 sl; u8 src_path_bits; +}; + +struct roce_ah_attr { + u8 dmac[ETH_ALEN]; +}; + +struct opa_ah_attr { + u32 dlid; + u8 src_path_bits; +}; + +struct rdma_ah_attr { + struct ib_global_route grh; + u8 sl; u8 static_rate; - u8 ah_flags; u8 port_num; - u8 dmac[ETH_ALEN]; + u8 ah_flags; + enum rdma_ah_attr_type type; + union { + struct ib_ah_attr ib; + struct roce_ah_attr roce; + struct opa_ah_attr opa; + }; }; enum ib_wc_status { @@ -1163,8 +1191,8 @@ struct ib_qp_attr { u32 dest_qp_num; int qp_access_flags; struct ib_qp_cap cap; - struct ib_ah_attr ah_attr; - struct ib_ah_attr alt_ah_attr; + struct rdma_ah_attr ah_attr; + struct rdma_ah_attr alt_ah_attr; u16 pkey_index; u16 alt_pkey_index; u8 en_sqd_async_notify; @@ -1336,6 +1364,7 @@ enum ib_access_flags { IB_ACCESS_MW_BIND = (1<<4), IB_ZERO_BASED = (1<<5), IB_ACCESS_ON_DEMAND = (1<<6), + IB_ACCESS_HUGETLB = (1<<7), }; /* @@ -1357,6 +1386,17 @@ struct ib_fmr_attr { struct ib_umem; +enum rdma_remove_reason { + /* Userspace requested uobject deletion. Call could fail */ + RDMA_REMOVE_DESTROY, + /* Context deletion. This call should delete the actual object itself */ + RDMA_REMOVE_CLOSE, + /* Driver is being hot-unplugged. This call should delete the actual object itself */ + RDMA_REMOVE_DRIVER_REMOVE, + /* Context is being cleaned-up, but commit was just completed */ + RDMA_REMOVE_DURING_CLEANUP, +}; + struct ib_rdmacg_object { #ifdef CONFIG_CGROUP_RDMA struct rdma_cgroup *cg; /* owner rdma cgroup */ @@ -1365,19 +1405,16 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; - struct list_head pd_list; - struct list_head mr_list; - struct list_head mw_list; - struct list_head cq_list; - struct list_head qp_list; - struct list_head srq_list; - struct list_head ah_list; - struct list_head xrcd_list; - struct list_head rule_list; - struct list_head wq_list; - struct list_head rwq_ind_tbl_list; + struct ib_uverbs_file *ufile; int closing; + /* locking the uobjects_list */ + struct mutex uobjects_lock; + struct list_head uobjects; + /* protects cleanup process from other actions */ + struct rw_semaphore cleanup_rwsem; + enum rdma_remove_reason cleanup_reason; + struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct rb_root umem_tree; @@ -1407,9 +1444,16 @@ struct ib_uobject { struct ib_rdmacg_object cg_obj; /* rdmacg object */ int id; /* index into kernel idr */ struct kref ref; - struct rw_semaphore mutex; /* protects .live */ + atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ - int live; + + const struct uverbs_obj_type *type; +}; + +struct ib_uobject_file { + struct ib_uobject uobj; + /* ufile contains the lock between context release and file close */ + struct ib_uverbs_file *ufile; }; struct ib_udata { @@ -1447,6 +1491,7 @@ struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; + enum rdma_ah_attr_type type; }; typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); @@ -1662,6 +1707,7 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_INNER = 0x100, /* Actions */ IB_FLOW_SPEC_ACTION_TAG = 0x1000, + IB_FLOW_SPEC_ACTION_DROP = 0x1001, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 8 @@ -1790,6 +1836,11 @@ struct ib_flow_spec_action_tag { u32 tag_id; }; +struct ib_flow_spec_action_drop { + enum ib_flow_spec_type type; + u16 size; +}; + union ib_flow_spec { struct { u32 type; @@ -1802,6 +1853,7 @@ union ib_flow_spec { struct ib_flow_spec_ipv6 ipv6; struct ib_flow_spec_tunnel tunnel; struct ib_flow_spec_action_tag flow_tag; + struct ib_flow_spec_action_drop drop; }; struct ib_flow_attr { @@ -1838,8 +1890,6 @@ enum ib_mad_result { IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ }; -#define IB_DEVICE_NAME_MAX 64 - struct ib_port_cache { struct ib_pkey_cache *pkey; struct ib_gid_table *gid; @@ -1862,6 +1912,34 @@ struct ib_port_immutable { u32 max_mad_size; }; +/* rdma netdev type - specifies protocol type */ +enum rdma_netdev_t { + RDMA_NETDEV_OPA_VNIC, + RDMA_NETDEV_IPOIB, +}; + +/** + * struct rdma_netdev - rdma netdev + * For cases where netstack interfacing is required. + */ +struct rdma_netdev { + void *clnt_priv; + struct ib_device *hca; + u8 port_num; + + /* control functions */ + void (*set_id)(struct net_device *netdev, int id); + /* send packet */ + int (*send)(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn); + /* multicast */ + int (*attach_mcast)(struct net_device *dev, struct ib_device *hca, + union ib_gid *gid, u16 mlid, + int set_qkey, u32 qkey); + int (*detach_mcast)(struct net_device *dev, struct ib_device *hca, + union ib_gid *gid, u16 mlid); +}; + struct ib_device { /* Do not access @dma_device directly from ULP nor from HW drivers. */ struct device *dma_device; @@ -1977,12 +2055,12 @@ struct ib_device { struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, - struct ib_ah_attr *ah_attr, + struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, - struct ib_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, - struct ib_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah); struct ib_srq * (*create_srq)(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, @@ -2115,6 +2193,20 @@ struct ib_device { struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); + /** + * rdma netdev operations + * + * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it + * doesn't support the specified rdma netdev type. + */ + struct net_device *(*alloc_rdma_netdev)( + struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)); + void (*free_rdma_netdev)(struct net_device *netdev); struct module *owner; struct device dev; @@ -2536,6 +2628,21 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH; } +/** + * rdma_cap_opa_ah - Check if the port of device supports + * OPA Address handles + * @device: Device to check + * @port_num: Port number to check + * + * Return: true if we are running on an OPA device which supports + * the extended OPA addressing. + */ +static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num) +{ + return (device->port_immutable[port_num].core_cap_flags & + RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH; +} + /** * rdma_max_mad_size - Return the max MAD size required by this RDMA Port. * @@ -2636,14 +2743,14 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, void ib_dealloc_pd(struct ib_pd *pd); /** - * ib_create_ah - Creates an address handle for the given address vector. + * rdma_create_ah - Creates an address handle for the given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ -struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr); /** * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header @@ -2676,7 +2783,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); */ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, - struct ib_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr); /** * ib_create_ah_from_wc - Creates an address handle associated with the @@ -2694,28 +2801,28 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num); /** - * ib_modify_ah - Modifies the address vector associated with an address + * rdma_modify_ah - Modifies the address vector associated with an address * handle. * @ah: The address handle to modify. * @ah_attr: The new address vector attributes to associate with the * address handle. */ -int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); +int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); /** - * ib_query_ah - Queries the address vector associated with an address + * rdma_query_ah - Queries the address vector associated with an address * handle. * @ah: The address handle to query. * @ah_attr: The address vector attributes associated with the address * handle. */ -int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); +int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); /** - * ib_destroy_ah - Destroys an address handle. + * rdma_destroy_ah - Destroys an address handle. * @ah: The address handle to destroy. */ -int ib_destroy_ah(struct ib_ah *ah); +int rdma_destroy_ah(struct ib_ah *ah); /** * ib_create_srq - Creates a SRQ associated with the specified protection @@ -3380,5 +3487,156 @@ void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); int ib_resolve_eth_dmac(struct ib_device *device, - struct ib_ah_attr *ah_attr); + struct rdma_ah_attr *ah_attr); + +static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_ROCE) + return attr->roce.dmac; + return NULL; +} + +static inline void rdma_ah_set_dlid(struct rdma_ah_attr *attr, u32 dlid) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_IB) + attr->ib.dlid = (u16)dlid; + else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + attr->opa.dlid = dlid; +} + +static inline u32 rdma_ah_get_dlid(const struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_IB) + return attr->ib.dlid; + else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + return attr->opa.dlid; + return 0; +} + +static inline void rdma_ah_set_sl(struct rdma_ah_attr *attr, u8 sl) +{ + attr->sl = sl; +} + +static inline u8 rdma_ah_get_sl(const struct rdma_ah_attr *attr) +{ + return attr->sl; +} + +static inline void rdma_ah_set_path_bits(struct rdma_ah_attr *attr, + u8 src_path_bits) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_IB) + attr->ib.src_path_bits = src_path_bits; + else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + attr->opa.src_path_bits = src_path_bits; +} + +static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_IB) + return attr->ib.src_path_bits; + else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + return attr->opa.src_path_bits; + return 0; +} + +static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) +{ + attr->port_num = port_num; +} + +static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr) +{ + return attr->port_num; +} + +static inline void rdma_ah_set_static_rate(struct rdma_ah_attr *attr, + u8 static_rate) +{ + attr->static_rate = static_rate; +} + +static inline u8 rdma_ah_get_static_rate(const struct rdma_ah_attr *attr) +{ + return attr->static_rate; +} + +static inline void rdma_ah_set_ah_flags(struct rdma_ah_attr *attr, + enum ib_ah_flags flag) +{ + attr->ah_flags = flag; +} + +static inline enum ib_ah_flags + rdma_ah_get_ah_flags(const struct rdma_ah_attr *attr) +{ + return attr->ah_flags; +} + +static inline const struct ib_global_route + *rdma_ah_read_grh(const struct rdma_ah_attr *attr) +{ + return &attr->grh; +} + +/*To retrieve and modify the grh */ +static inline struct ib_global_route + *rdma_ah_retrieve_grh(struct rdma_ah_attr *attr) +{ + return &attr->grh; +} + +static inline void rdma_ah_set_dgid_raw(struct rdma_ah_attr *attr, void *dgid) +{ + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + memcpy(grh->dgid.raw, dgid, sizeof(grh->dgid)); +} + +static inline void rdma_ah_set_subnet_prefix(struct rdma_ah_attr *attr, + __be64 prefix) +{ + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + grh->dgid.global.subnet_prefix = prefix; +} + +static inline void rdma_ah_set_interface_id(struct rdma_ah_attr *attr, + __be64 if_id) +{ + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + grh->dgid.global.interface_id = if_id; +} + +static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr, + union ib_gid *dgid, u32 flow_label, + u8 sgid_index, u8 hop_limit, + u8 traffic_class) +{ + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + attr->ah_flags = IB_AH_GRH; + if (dgid) + grh->dgid = *dgid; + grh->flow_label = flow_label; + grh->sgid_index = sgid_index; + grh->hop_limit = hop_limit; + grh->traffic_class = traffic_class; +} + +/*Get AH type */ +static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, + u32 port_num) +{ + if ((rdma_protocol_roce(dev, port_num)) || + (rdma_protocol_iwarp(dev, port_num))) + return RDMA_AH_ATTR_TYPE_ROCE; + else if ((rdma_protocol_ib(dev, port_num)) && + (rdma_cap_opa_ah(dev, port_num))) + return RDMA_AH_ATTR_TYPE_OPA; + else + return RDMA_AH_ATTR_TYPE_IB; +} #endif /* IB_VERBS_H */ diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h new file mode 100644 index 0000000000000000000000000000000000000000..eace28f1555d958a896677977232b814c4642568 --- /dev/null +++ b/include/rdma/opa_addr.h @@ -0,0 +1,79 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef OPA_ADDR_H +#define OPA_ADDR_H + +#define OPA_SPECIAL_OUI (0x00066AULL) +#define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) + +/** + * ib_is_opa_gid: Returns true if the top 24 bits of the gid + * contains the OPA_STL_OUI identifier. This identifies that + * the provided gid is a special purpose GID meant to carry + * extended LID information. + * + * @gid: The Global identifier + */ +static inline bool ib_is_opa_gid(union ib_gid *gid) +{ + return ((be64_to_cpu(gid->global.interface_id) >> 40) == + OPA_SPECIAL_OUI); +} + +/** + * opa_get_lid_from_gid: Returns the last 32 bits of the gid. + * OPA devices use one of the gids in the gid table to also + * store the lid. + * + * @gid: The Global identifier + */ +static inline u32 opa_get_lid_from_gid(union ib_gid *gid) +{ + return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; +} +#endif /* OPA_ADDR_H */ diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 9303e0e4f508d8c524a34b979da7a359cb15b30c..b4f0ac02f28315afba4b7cb031858d35da887f49 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2017 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -127,6 +127,7 @@ #define OPA_LINK_WIDTH_3X 0x0004 #define OPA_LINK_WIDTH_4X 0x0008 +#define OPA_CAP_MASK3_IsEthOnFabricSupported (1 << 13) #define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) #define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) #define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5) diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h new file mode 100644 index 0000000000000000000000000000000000000000..39d6890616a6bf8c93ff98242c0cbabd532845ef --- /dev/null +++ b/include/rdma/opa_vnic.h @@ -0,0 +1,141 @@ +#ifndef _OPA_VNIC_H +#define _OPA_VNIC_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains Intel Omni-Path (OPA) Virtual Network Interface + * Controller (VNIC) specific declarations. + */ + +#include + +/* VNIC uses 16B header format */ +#define OPA_VNIC_L2_TYPE 0x2 + +/* 16 header bytes + 2 reserved bytes */ +#define OPA_VNIC_L2_HDR_LEN (16 + 2) + +#define OPA_VNIC_L4_HDR_LEN 2 + +#define OPA_VNIC_HDR_LEN (OPA_VNIC_L2_HDR_LEN + \ + OPA_VNIC_L4_HDR_LEN) + +#define OPA_VNIC_L4_ETHR 0x78 + +#define OPA_VNIC_ICRC_LEN 4 +#define OPA_VNIC_TAIL_LEN 1 +#define OPA_VNIC_ICRC_TAIL_LEN (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN) + +#define OPA_VNIC_SKB_MDATA_LEN 4 +#define OPA_VNIC_SKB_MDATA_ENCAP_ERR 0x1 + +/* opa vnic rdma netdev's private data structure */ +struct opa_vnic_rdma_netdev { + struct rdma_netdev rn; /* keep this first */ + /* followed by device private data */ + char *dev_priv[0]; +}; + +static inline void *opa_vnic_priv(const struct net_device *dev) +{ + struct rdma_netdev *rn = netdev_priv(dev); + + return rn->clnt_priv; +} + +static inline void *opa_vnic_dev_priv(const struct net_device *dev) +{ + struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev); + + return oparn->dev_priv; +} + +/* opa_vnic skb meta data structrue */ +struct opa_vnic_skb_mdata { + u8 vl; + u8 entropy; + u8 flags; + u8 rsvd; +} __packed; + +/* OPA VNIC group statistics */ +struct opa_vnic_grp_stats { + u64 unicast; + u64 mcastbcast; + u64 untagged; + u64 vlan; + u64 s_64; + u64 s_65_127; + u64 s_128_255; + u64 s_256_511; + u64 s_512_1023; + u64 s_1024_1518; + u64 s_1519_max; +}; + +struct opa_vnic_stats { + /* standard netdev statistics */ + struct rtnl_link_stats64 netstats; + + /* OPA VNIC statistics */ + struct opa_vnic_grp_stats tx_grp; + struct opa_vnic_grp_stats rx_grp; + u64 tx_dlid_zero; + u64 tx_drop_state; + u64 rx_drop_state; + u64 rx_runt; + u64 rx_oversize; +}; + +static inline bool rdma_cap_opa_vnic(struct ib_device *device) +{ + return !!(device->attrs.device_cap_flags & + IB_DEVICE_RDMA_NETDEV_OPA_VNIC); +} + +#endif /* _OPA_VNIC_H */ diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index d3968b561f868db47c9c98ed72ca96d44303bb16..3d2eed3c4e7512b6f6d1592d296e7187ba183404 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -85,7 +85,7 @@ struct rdma_addr { struct rdma_route { struct rdma_addr addr; - struct ib_sa_path_rec *path_rec; + struct sa_path_rec *path_rec; int num_paths; }; @@ -106,7 +106,7 @@ struct rdma_conn_param { struct rdma_ud_param { const void *private_data; u8 private_data_len; - struct ib_ah_attr ah_attr; + struct rdma_ah_attr ah_attr; u32 qp_num; u32 qkey; }; diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h index 2389c3b4540498718860ef04dd5297498257b09a..6947a6ba25579f06c5de1357c6d0a094cbb58d41 100644 --- a/include/rdma/rdma_cm_ib.h +++ b/include/rdma/rdma_cm_ib.h @@ -46,7 +46,7 @@ * connection and replaces the call to rdma_resolve_route. */ int rdma_set_ib_paths(struct rdma_cm_id *id, - struct ib_sa_path_rec *path_rec, int num_paths); + struct sa_path_rec *path_rec, int num_paths); /* Global qkey for UDP QPs and multicast groups. */ #define RDMA_UDP_QKEY 0x01234567 diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 5852661443290d52eb8a3e719716452d752b6eaa..348c102cb5f6afdbe9f90a7c788431bac0219226 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -10,9 +10,6 @@ struct ibnl_client_cbs { struct module *module; }; -int ibnl_init(void); -void ibnl_cleanup(void); - /** * Add a a client to the list of IB netlink exporters. * @index: Index of the added client @@ -77,11 +74,4 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags); -/** - * Check if there are any listeners to the netlink group - * @group: the netlink group ID - * Returns 0 on success or a negative for no listeners. - */ -int ibnl_chk_listeners(unsigned int group); - #endif /* _RDMA_NETLINK_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 8fc1ca7b6f2329fd38f27577de827949a9697df7..4878aaf7bdffd871515bc826470ec77886a1c02c 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -170,7 +170,7 @@ struct rvt_pd { /* Address handle */ struct rvt_ah { struct ib_ah ibah; - struct ib_ah_attr attr; + struct rdma_ah_attr attr; atomic_t refcount; u8 vl; u8 log_pmtu; @@ -311,10 +311,10 @@ struct rvt_driver_provided { unsigned (*free_all_qps)(struct rvt_dev_info *rdi); /* Driver specific AH validation */ - int (*check_ah)(struct ib_device *, struct ib_ah_attr *); + int (*check_ah)(struct ib_device *, struct rdma_ah_attr *); /* Inform the driver a new AH has been created */ - void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, + void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *, struct rvt_ah *); /* Let the driver pick the next queue pair number*/ @@ -506,7 +506,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); -int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table); int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, @@ -516,6 +516,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); -struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, + u16 lid); #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f3816396c76acbc9646cd648b8ed2c493d98bdcf..be6472e5b06bd1ed1117e928e9b3d44748dfbfeb 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -269,8 +269,8 @@ struct rvt_qp { struct ib_qp ibqp; void *priv; /* Driver private data */ /* read mostly fields above and below */ - struct ib_ah_attr remote_ah_attr; - struct ib_ah_attr alt_ah_attr; + struct rdma_ah_attr remote_ah_attr; + struct rdma_ah_attr alt_ah_attr; struct rvt_qp __rcu *next; /* link list for QPN hash table */ struct rvt_swqe *s_wq; /* send work queue */ struct rvt_mmap_info *ip; @@ -324,6 +324,7 @@ struct rvt_qp { u8 r_state; /* opcode of last packet received */ u8 r_flags; u8 r_head_ack_queue; /* index into s_ack_queue[] */ + u8 r_adefered; /* defered ack count */ struct list_head rspwait; /* link for waiting to respond */ @@ -435,9 +436,14 @@ struct rvt_mcast_qp { struct rvt_qp *qp; }; +struct rvt_mcast_addr { + union ib_gid mgid; + u16 lid; +}; + struct rvt_mcast { struct rb_node rb_node; - union ib_gid mgid; + struct rvt_mcast_addr mcast_addr; struct list_head qp_list; wait_queue_head_t wait; atomic_t refcount; @@ -526,7 +532,6 @@ static inline void rvt_qp_wqe_reserve( struct rvt_qp *qp, struct rvt_swqe *wqe) { - wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; atomic_inc(&qp->s_reserved_used); } @@ -550,7 +555,6 @@ static inline void rvt_qp_wqe_unreserve( struct rvt_swqe *wqe) { if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { - wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; atomic_dec(&qp->s_reserved_used); /* insure no compiler re-order up to s_last change */ smp_mb__after_atomic(); @@ -574,6 +578,7 @@ extern const enum ib_wc_opcode ib_rvt_wc_opcode[]; static inline void rvt_qp_swqe_complete( struct rvt_qp *qp, struct rvt_swqe *wqe, + enum ib_wc_opcode opcode, enum ib_wc_status status) { if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) @@ -586,7 +591,7 @@ static inline void rvt_qp_swqe_complete( memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr.wr_id; wc.status = status; - wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode]; + wc.opcode = opcode; wc.qp = &qp->ibqp; wc.byte_len = wqe->length; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h new file mode 100644 index 0000000000000000000000000000000000000000..7771ce9669521912ddfcc611bf01dad5381dc58c --- /dev/null +++ b/include/rdma/uverbs_std_types.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_STD_TYPES__ +#define _UVERBS_STD_TYPES__ + +#include + +extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_wq; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_srq; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_ah; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_flow; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd; + +static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, + bool write, + struct ib_ucontext *ucontext, + int id) +{ + return rdma_lookup_get_uobject(type, ucontext, id, write); +} + +#define uobj_get_type(_type) uverbs_type_attrs_##_type.type + +#define uobj_get_read(_type, _id, _ucontext) \ + __uobj_get(&(_type), false, _ucontext, _id) + +#define uobj_get_obj_read(_type, _id, _ucontext) \ +({ \ + struct ib_uobject *uobj = \ + __uobj_get(&uobj_get_type(_type), \ + false, _ucontext, _id); \ + \ + (struct ib_##_type *)(IS_ERR(uobj) ? NULL : uobj->object); \ +}) + +#define uobj_get_write(_type, _id, _ucontext) \ + __uobj_get(&(_type), true, _ucontext, _id) + +static inline void uobj_put_read(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, false); +} + +#define uobj_put_obj_read(_obj) \ + uobj_put_read((_obj)->uobject) + +static inline void uobj_put_write(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, true); +} + +static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj) +{ + return rdma_remove_commit_uobject(uobj); +} + +static inline void uobj_alloc_commit(struct ib_uobject *uobj) +{ + rdma_alloc_commit_uobject(uobj); +} + +static inline void uobj_alloc_abort(struct ib_uobject *uobj) +{ + rdma_alloc_abort_uobject(uobj); +} + +static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + return rdma_alloc_begin_uobject(type, ucontext); +} + +#define uobj_alloc(_type, ucontext) \ + __uobj_alloc(&(_type), ucontext) + +#endif + diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h new file mode 100644 index 0000000000000000000000000000000000000000..351ea185df44fee1af734ae6cad770f1a5ba6c6d --- /dev/null +++ b/include/rdma/uverbs_types.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_TYPES_ +#define _UVERBS_TYPES_ + +#include +#include + +struct uverbs_obj_type; + +struct uverbs_obj_type_class { + /* + * Get an ib_uobject that corresponds to the given id from ucontext, + * These functions could create or destroy objects if required. + * The action will be finalized only when commit, abort or put fops are + * called. + * The flow of the different actions is: + * [alloc]: Starts with alloc_begin. The handlers logic is than + * executed. If the handler is successful, alloc_commit + * is called and the object is inserted to the repository. + * Once alloc_commit completes the object is visible to + * other threads and userspace. + e Otherwise, alloc_abort is called and the object is + * destroyed. + * [lookup]: Starts with lookup_get which fetches and locks the + * object. After the handler finished using the object, it + * needs to call lookup_put to unlock it. The exclusive + * flag indicates if the object is locked for exclusive + * access. + * [remove]: Starts with lookup_get with exclusive flag set. This + * locks the object for exclusive access. If the handler + * code completed successfully, remove_commit is called + * and the ib_uobject is removed from the context's + * uobjects repository and put. The object itself is + * destroyed as well. Once remove succeeds new krefs to + * the object cannot be acquired by other threads or + * userspace and the hardware driver is removed from the + * object. Other krefs on the object may still exist. + * If the handler code failed, lookup_put should be + * called. This callback is used when the context + * is destroyed as well (process termination, + * reset flow). + */ + struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext); + void (*alloc_commit)(struct ib_uobject *uobj); + void (*alloc_abort)(struct ib_uobject *uobj); + + struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, int id, + bool exclusive); + void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); + /* + * Must be called with the exclusive lock held. If successful uobj is + * invalid on return. On failure uobject is left completely + * unchanged + */ + int __must_check (*remove_commit)(struct ib_uobject *uobj, + enum rdma_remove_reason why); + u8 needs_kfree_rcu; +}; + +struct uverbs_obj_type { + const struct uverbs_obj_type_class * const type_class; + size_t obj_size; + unsigned int destroy_order; +}; + +/* + * Objects type classes which support a detach state (object is still alive but + * it's not attached to any context need to make sure: + * (a) no call through to a driver after a detach is called + * (b) detach isn't called concurrently with context_cleanup + */ + +struct uverbs_obj_idr_type { + /* + * In idr based objects, uverbs_obj_type_class points to a generic + * idr operations. In order to specialize the underlying types (e.g. CQ, + * QPs, etc.), we add destroy_object specific callbacks. + */ + struct uverbs_obj_type type; + + /* Free driver resources from the uobject, make the driver uncallable, + * and move the uobject to the detached state. If the object was + * destroyed by the user's request, a failure should leave the uobject + * completely unchanged. + */ + int __must_check (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); +}; + +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool exclusive); +void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext); +void rdma_alloc_abort_uobject(struct ib_uobject *uobj); +int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); +int rdma_alloc_commit_uobject(struct ib_uobject *uobj); + +struct uverbs_obj_fd_type { + /* + * In fd based objects, uverbs_obj_type_ops points to generic + * fd operations. In order to specialize the underlying types (e.g. + * completion_channel), we use fops, name and flags for fd creation. + * context_closed is called when the context is closed either when + * the driver is removed or the process terminated. + */ + struct uverbs_obj_type type; + int (*context_closed)(struct ib_uobject_file *uobj_file, + enum rdma_remove_reason why); + const struct file_operations *fops; + const char *name; + int flags; +}; + +extern const struct uverbs_obj_type_class uverbs_idr_class; +extern const struct uverbs_obj_type_class uverbs_fd_class; + +#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ + sizeof(char)) +#define UVERBS_TYPE_ALLOC_FD(_size, _order) \ + { \ + .destroy_order = _order, \ + .type_class = &uverbs_fd_class, \ + .obj_size = (_size) + \ + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject_file)),\ + } +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \ + { \ + .destroy_order = _order, \ + .type_class = &uverbs_idr_class, \ + .obj_size = (_size) + \ + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject)), \ + } +#define UVERBS_TYPE_ALLOC_IDR(_order) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order) +#endif diff --git a/include/scsi/fc/Kbuild b/include/scsi/fc/Kbuild deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index da5033dd8cbcab5bff1557452b7a55e888d46c30..2109844be53d132cf13e6b12b5d6ad35cefa10b0 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -321,7 +322,7 @@ struct fc_seq_els_data { */ struct fc_fcp_pkt { spinlock_t scsi_pkt_lock; - atomic_t ref_cnt; + refcount_t ref_cnt; /* SCSI command and data transfer information */ u32 data_len; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 583875ea136ab228ea14a727581c384f50527211..c9bd935f4fd1ca1ff7cd2ab824b1d0cd0441a143 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -139,7 +140,7 @@ struct iscsi_task { /* state set/tested under session->lock */ int state; - atomic_t refcount; + refcount_t refcount; struct list_head running; /* running cmd list */ void *dd_data; /* driver/transport data */ }; diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index dae99d7d2bc05a265faddb5a4f953d34f99e4ef1..dd0f72c95abe19bd09bcc7fbd34fb555f3b84e98 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -693,7 +693,6 @@ extern int sas_bios_param(struct scsi_device *, sector_t capacity, int *hsc); extern struct scsi_transport_template * sas_domain_attach_transport(struct sas_domain_function_template *); -extern void sas_domain_release_transport(struct scsi_transport_template *); int sas_discover_root_expander(struct domain_device *); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 080c7ce9bae8892a43838043d986282a1385283a..05641aebd1811f5141b2bdf63a0f1d82cdace9c1 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -316,7 +316,7 @@ extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh); void scsi_attach_vpd(struct scsi_device *sdev); extern struct scsi_device *scsi_device_from_queue(struct request_queue *q); -extern int scsi_device_get(struct scsi_device *); +extern int __must_check scsi_device_get(struct scsi_device *); extern void scsi_device_put(struct scsi_device *); extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *, uint, uint, u64); diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 891a658aa8673d03b5ad8ac848f3318662d39707..a5534ccad859d29069bff2b240ccfca4dd0c9567 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -16,6 +16,7 @@ struct scsi_driver { void (*uninit_command)(struct scsi_cmnd *); int (*done)(struct scsi_cmnd *); int (*eh_action)(struct scsi_cmnd *, int); + void (*eh_reset)(struct scsi_cmnd *); }; #define to_scsi_driver(drv) \ container_of((drv), struct scsi_driver, gendrv) diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 98d366b5577069201815ac77e0827fa6f32f664b..64d30d80dadb5f91ba73f2b020ede8d1e11e7ded 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -23,14 +23,15 @@ static inline bool scsi_sense_is_deferred(const struct scsi_sense_hdr *sshdr) return ((sshdr->response_code >= 0x70) && (sshdr->response_code & 1)); } -extern int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len, - u64 * info_out); +extern bool scsi_get_sense_info_fld(const u8 *sense_buffer, int sb_len, + u64 *info_out); extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { /* saved state */ int result; + int eh_eflags; enum dma_data_direction data_direction; unsigned underflow; unsigned char cmd_len; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 3cd8c3bec6384e02328bbb31aafb9aec84f78c37..afb04811b7b9c578f24d8360ace4f85efa07632c 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -451,11 +451,6 @@ struct scsi_host_template { /* True if the controller does not support WRITE SAME */ unsigned no_write_same:1; - /* - * True if asynchronous aborts are not supported - */ - unsigned no_async_abort:1; - /* * Countdown for host blocking with no commands outstanding. */ diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index 6ba66e01f6df33e37bee7f90c773e432ce75fc91..ce78ec8e367da1a34d75136339d9b7e2a088abdf 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -112,6 +112,7 @@ #define WRITE_16 0x8a #define READ_ATTRIBUTE 0x8c #define WRITE_ATTRIBUTE 0x8d +#define WRITE_VERIFY_16 0x8e #define VERIFY_16 0x8f #define SYNCHRONIZE_CACHE_16 0x91 #define WRITE_SAME_16 0x93 diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h index ba0aeb980f7e7cb654171697316f6089b9db01d0..f0c76f9dc28547301d4f67ac76e580560d081989 100644 --- a/include/scsi/scsi_request.h +++ b/include/scsi/scsi_request.h @@ -9,8 +9,10 @@ struct scsi_request { unsigned char __cmd[BLK_MAX_CDB]; unsigned char *cmd; unsigned short cmd_len; + int result; unsigned int sense_len; unsigned int resid_len; /* residual count */ + int retries; void *sense; }; diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index b21b8aa58c4d6b8890592754bab2439db5769188..6e208bb32c780d9c075903f5f23a9d2e0f4cb68d 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -162,6 +162,7 @@ enum fc_tgtid_binding_type { #define FC_PORT_ROLE_FCP_TARGET 0x01 #define FC_PORT_ROLE_FCP_INITIATOR 0x02 #define FC_PORT_ROLE_IP_PORT 0x04 +#define FC_PORT_ROLE_FCP_DUMMY_INITIATOR 0x08 /* The following are for compatibility */ #define FC_RPORT_ROLE_UNKNOWN FC_PORT_ROLE_UNKNOWN diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 3afec7032448c8927d2cd134111606ee0206ca3d..20bc71c3e0b81acf07af84fea30d6b530427bb70 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -197,7 +197,6 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ #define SG_DEFAULT_RETRIES 0 /* Defaults, commented if they differ from original sg driver */ -#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */ #define SG_DEF_FORCE_PACK_ID 0 #define SG_DEF_KEEP_ORPHAN 0 #define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */ diff --git a/include/soc/fsl/qe/immap_qe.h b/include/soc/fsl/qe/immap_qe.h index c76ef30b05ba38ff517972125282c33ecc41b8eb..7baaabd5ec2ced0bf227691bdfea94ba9c53a0e4 100644 --- a/include/soc/fsl/qe/immap_qe.h +++ b/include/soc/fsl/qe/immap_qe.h @@ -464,25 +464,6 @@ struct qe_immap { } __attribute__ ((packed)); extern struct qe_immap __iomem *qe_immr; -extern phys_addr_t get_qe_base(void); - -/* - * Returns the offset within the QE address space of the given pointer. - * - * Note that the QE does not support 36-bit physical addresses, so if - * get_qe_base() returns a number above 4GB, the caller will probably fail. - */ -static inline phys_addr_t immrbar_virt_to_phys(void *address) -{ - void *q = (void *)qe_immr; - - /* Is it a MURAM address? */ - if ((address >= q) && (address < (q + QE_IMMAP_SIZE))) - return get_qe_base() + (address - q); - - /* It's an address returned by kmalloc */ - return virt_to_phys(address); -} #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IMMAP_QE_H */ diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 70339d7958c02c2d8690509a5b62ef8a844bb7ff..0cd4c11479b1f4c4385c294295fdea4a7d82daa1 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -243,6 +243,7 @@ static inline int qe_alive_during_sleep(void) #define qe_muram_free cpm_muram_free #define qe_muram_addr cpm_muram_addr #define qe_muram_offset cpm_muram_offset +#define qe_muram_dma cpm_muram_dma #define qe_setbits32(_addr, _v) iowrite32be(ioread32be(_addr) | (_v), (_addr)) #define qe_clrbits32(_addr, _v) iowrite32be(ioread32be(_addr) & ~(_v), (_addr)) diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h index 3d4df74a96dee6ca09e40d2e79bf9c21bdd90886..d4dfefdee6c10dfb607642e3a0c773144b08cd16 100644 --- a/include/soc/fsl/qman.h +++ b/include/soc/fsl/qman.h @@ -36,8 +36,11 @@ /* Hardware constants */ #define QM_CHANNEL_SWPORTAL0 0 #define QMAN_CHANNEL_POOL1 0x21 +#define QMAN_CHANNEL_CAAM 0x80 #define QMAN_CHANNEL_POOL1_REV3 0x401 +#define QMAN_CHANNEL_CAAM_REV3 0x840 extern u16 qm_channel_pool1; +extern u16 qm_channel_caam; /* Portal processing (interrupt) sources */ #define QM_PIRQ_CSCI 0x00100000 /* Congestion State Change */ @@ -165,6 +168,7 @@ static inline void qm_fd_set_param(struct qm_fd *fd, enum qm_fd_format fmt, #define qm_fd_set_contig_big(fd, len) \ qm_fd_set_param(fd, qm_fd_contig_big, 0, len) #define qm_fd_set_sg_big(fd, len) qm_fd_set_param(fd, qm_fd_sg_big, 0, len) +#define qm_fd_set_compound(fd, len) qm_fd_set_param(fd, qm_fd_compound, 0, len) static inline void qm_fd_clear_fd(struct qm_fd *fd) { @@ -639,6 +643,7 @@ struct qm_mcc_initcgr { #define QM_CGR_WE_MODE 0x0001 #define QMAN_CGR_FLAG_USE_INIT 0x00000001 +#define QMAN_CGR_MODE_FRAME 0x00000001 /* Portal and Frame Queues */ /* Represents a managed portal */ @@ -791,6 +796,84 @@ struct qman_cgr { #define QMAN_INITFQ_FLAG_SCHED 0x00000001 /* schedule rather than park */ #define QMAN_INITFQ_FLAG_LOCAL 0x00000004 /* set dest portal */ +/* + * For qman_volatile_dequeue(); Choose one PRECEDENCE. EXACT is optional. Use + * NUMFRAMES(n) (6-bit) or NUMFRAMES_TILLEMPTY to fill in the frame-count. Use + * FQID(n) to fill in the frame queue ID. + */ +#define QM_VDQCR_PRECEDENCE_VDQCR 0x0 +#define QM_VDQCR_PRECEDENCE_SDQCR 0x80000000 +#define QM_VDQCR_EXACT 0x40000000 +#define QM_VDQCR_NUMFRAMES_MASK 0x3f000000 +#define QM_VDQCR_NUMFRAMES_SET(n) (((n) & 0x3f) << 24) +#define QM_VDQCR_NUMFRAMES_GET(n) (((n) >> 24) & 0x3f) +#define QM_VDQCR_NUMFRAMES_TILLEMPTY QM_VDQCR_NUMFRAMES_SET(0) + +#define QMAN_VOLATILE_FLAG_WAIT 0x00000001 /* wait if VDQCR is in use */ +#define QMAN_VOLATILE_FLAG_WAIT_INT 0x00000002 /* if wait, interruptible? */ +#define QMAN_VOLATILE_FLAG_FINISH 0x00000004 /* wait till VDQCR completes */ + +/* "Query FQ Non-Programmable Fields" */ +struct qm_mcr_queryfq_np { + u8 verb; + u8 result; + u8 __reserved1; + u8 state; /* QM_MCR_NP_STATE_*** */ + u32 fqd_link; /* 24-bit, _res2[24-31] */ + u16 odp_seq; /* 14-bit, _res3[14-15] */ + u16 orp_nesn; /* 14-bit, _res4[14-15] */ + u16 orp_ea_hseq; /* 15-bit, _res5[15] */ + u16 orp_ea_tseq; /* 15-bit, _res6[15] */ + u32 orp_ea_hptr; /* 24-bit, _res7[24-31] */ + u32 orp_ea_tptr; /* 24-bit, _res8[24-31] */ + u32 pfdr_hptr; /* 24-bit, _res9[24-31] */ + u32 pfdr_tptr; /* 24-bit, _res10[24-31] */ + u8 __reserved2[5]; + u8 is; /* 1-bit, _res12[1-7] */ + u16 ics_surp; + u32 byte_cnt; + u32 frm_cnt; /* 24-bit, _res13[24-31] */ + u32 __reserved3; + u16 ra1_sfdr; /* QM_MCR_NP_RA1_*** */ + u16 ra2_sfdr; /* QM_MCR_NP_RA2_*** */ + u16 __reserved4; + u16 od1_sfdr; /* QM_MCR_NP_OD1_*** */ + u16 od2_sfdr; /* QM_MCR_NP_OD2_*** */ + u16 od3_sfdr; /* QM_MCR_NP_OD3_*** */ +} __packed; + +#define QM_MCR_NP_STATE_FE 0x10 +#define QM_MCR_NP_STATE_R 0x08 +#define QM_MCR_NP_STATE_MASK 0x07 /* Reads FQD::STATE; */ +#define QM_MCR_NP_STATE_OOS 0x00 +#define QM_MCR_NP_STATE_RETIRED 0x01 +#define QM_MCR_NP_STATE_TEN_SCHED 0x02 +#define QM_MCR_NP_STATE_TRU_SCHED 0x03 +#define QM_MCR_NP_STATE_PARKED 0x04 +#define QM_MCR_NP_STATE_ACTIVE 0x05 +#define QM_MCR_NP_PTR_MASK 0x07ff /* for RA[12] & OD[123] */ +#define QM_MCR_NP_RA1_NRA(v) (((v) >> 14) & 0x3) /* FQD::NRA */ +#define QM_MCR_NP_RA2_IT(v) (((v) >> 14) & 0x1) /* FQD::IT */ +#define QM_MCR_NP_OD1_NOD(v) (((v) >> 14) & 0x3) /* FQD::NOD */ +#define QM_MCR_NP_OD3_NPC(v) (((v) >> 14) & 0x3) /* FQD::NPC */ + +enum qm_mcr_queryfq_np_masks { + qm_mcr_fqd_link_mask = BIT(24) - 1, + qm_mcr_odp_seq_mask = BIT(14) - 1, + qm_mcr_orp_nesn_mask = BIT(14) - 1, + qm_mcr_orp_ea_hseq_mask = BIT(15) - 1, + qm_mcr_orp_ea_tseq_mask = BIT(15) - 1, + qm_mcr_orp_ea_hptr_mask = BIT(24) - 1, + qm_mcr_orp_ea_tptr_mask = BIT(24) - 1, + qm_mcr_pfdr_hptr_mask = BIT(24) - 1, + qm_mcr_pfdr_tptr_mask = BIT(24) - 1, + qm_mcr_is_mask = BIT(1) - 1, + qm_mcr_frm_cnt_mask = BIT(24) - 1, +}; + +#define qm_mcr_np_get(np, field) \ + ((np)->field & (qm_mcr_##field##_mask)) + /* Portal Management */ /** * qman_p_irqsource_add - add processing sources to be interrupt-driven @@ -963,6 +1046,25 @@ int qman_retire_fq(struct qman_fq *fq, u32 *flags); */ int qman_oos_fq(struct qman_fq *fq); +/* + * qman_volatile_dequeue - Issue a volatile dequeue command + * @fq: the frame queue object to dequeue from + * @flags: a bit-mask of QMAN_VOLATILE_FLAG_*** options + * @vdqcr: bit mask of QM_VDQCR_*** options, as per qm_dqrr_vdqcr_set() + * + * Attempts to lock access to the portal's VDQCR volatile dequeue functionality. + * The function will block and sleep if QMAN_VOLATILE_FLAG_WAIT is specified and + * the VDQCR is already in use, otherwise returns non-zero for failure. If + * QMAN_VOLATILE_FLAG_FINISH is specified, the function will only return once + * the VDQCR command has finished executing (ie. once the callback for the last + * DQRR entry resulting from the VDQCR command has been called). If not using + * the FINISH flag, completion can be determined either by detecting the + * presence of the QM_DQRR_STAT_UNSCHEDULED and QM_DQRR_STAT_DQCR_EXPIRED bits + * in the "stat" parameter passed to the FQ's dequeue callback, or by waiting + * for the QMAN_FQ_STATE_VDQCR bit to disappear. + */ +int qman_volatile_dequeue(struct qman_fq *fq, u32 flags, u32 vdqcr); + /** * qman_enqueue - Enqueue a frame to a frame queue * @fq: the frame queue object to enqueue to @@ -994,6 +1096,13 @@ int qman_alloc_fqid_range(u32 *result, u32 count); */ int qman_release_fqid(u32 fqid); +/** + * qman_query_fq_np - Queries non-programmable FQD fields + * @fq: the frame queue object to be queried + * @np: storage for the queried FQD fields + */ +int qman_query_fq_np(struct qman_fq *fq, struct qm_mcr_queryfq_np *np); + /* Pool-channel management */ /** * qman_alloc_pool_range - Allocate a contiguous range of pool-channel IDs diff --git a/arch/arm/mach-tegra/flowctrl.h b/include/soc/tegra/flowctrl.h similarity index 77% rename from arch/arm/mach-tegra/flowctrl.h rename to include/soc/tegra/flowctrl.h index 73a9c5016c1ab1ae9414887eb3fdfa16a1b32293..8f86aea4024b2f41ad9f28b9cbf39d397940dbac 100644 --- a/arch/arm/mach-tegra/flowctrl.h +++ b/include/soc/tegra/flowctrl.h @@ -1,7 +1,5 @@ /* - * arch/arm/mach-tegra/flowctrl.h - * - * functions and macros to control the flowcontroller + * Functions and macros to control the flowcontroller * * Copyright (c) 2010-2012, NVIDIA Corporation. All rights reserved. * @@ -18,8 +16,8 @@ * along with this program. If not, see . */ -#ifndef __MACH_TEGRA_FLOWCTRL_H -#define __MACH_TEGRA_FLOWCTRL_H +#ifndef __SOC_TEGRA_FLOWCTRL_H__ +#define __SOC_TEGRA_FLOWCTRL_H__ #define FLOW_CTRL_HALT_CPU0_EVENTS 0x0 #define FLOW_CTRL_WAITEVENT (2 << 29) @@ -53,14 +51,32 @@ #define TEGRA30_FLOW_CTRL_CSR_WFI_BITMAP (0xF << 8) #ifndef __ASSEMBLY__ +#ifdef CONFIG_SOC_TEGRA_FLOWCTRL u32 flowctrl_read_cpu_csr(unsigned int cpuid); void flowctrl_write_cpu_csr(unsigned int cpuid, u32 value); void flowctrl_write_cpu_halt(unsigned int cpuid, u32 value); void flowctrl_cpu_suspend_enter(unsigned int cpuid); void flowctrl_cpu_suspend_exit(unsigned int cpuid); +#else +static inline u32 flowctrl_read_cpu_csr(unsigned int cpuid) +{ + return 0; +} + +static inline void flowctrl_write_cpu_csr(unsigned int cpuid, u32 value) +{ +} + +static inline void flowctrl_write_cpu_halt(unsigned int cpuid, u32 value) {} -void tegra_flowctrl_init(void); -#endif +static inline void flowctrl_cpu_suspend_enter(unsigned int cpuid) +{ +} -#endif +static inline void flowctrl_cpu_suspend_exit(unsigned int cpuid) +{ +} +#endif /* CONFIG_SOC_TEGRA_FLOWCTRL */ +#endif /* __ASSEMBLY */ +#endif /* __SOC_TEGRA_FLOWCTRL_H__ */ diff --git a/include/soc/tegra/pmc.h b/include/soc/tegra/pmc.h index 2f271d1b9cea6fdc9bb1678077eab3222ea3bf92..1c3982bc558fb1d98ec3c90f6663c1a8189dca8e 100644 --- a/include/soc/tegra/pmc.h +++ b/include/soc/tegra/pmc.h @@ -26,12 +26,6 @@ struct clk; struct reset_control; -#ifdef CONFIG_PM_SLEEP -enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void); -void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode); -void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode); -#endif /* CONFIG_PM_SLEEP */ - #ifdef CONFIG_SMP bool tegra_pmc_cpu_is_powered(unsigned int cpuid); int tegra_pmc_cpu_power_on(unsigned int cpuid); @@ -144,7 +138,7 @@ enum tegra_io_pad_voltage { TEGRA_IO_PAD_3300000UV, }; -#ifdef CONFIG_ARCH_TEGRA +#ifdef CONFIG_SOC_TEGRA_PMC int tegra_powergate_is_powered(unsigned int id); int tegra_powergate_power_on(unsigned int id); int tegra_powergate_power_off(unsigned int id); @@ -163,6 +157,11 @@ int tegra_io_pad_get_voltage(enum tegra_io_pad id); /* deprecated, use tegra_io_pad_power_{enable,disable}() instead */ int tegra_io_rail_power_on(unsigned int id); int tegra_io_rail_power_off(unsigned int id); + +enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void); +void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode); +void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode); + #else static inline int tegra_powergate_is_powered(unsigned int id) { @@ -221,6 +220,20 @@ static inline int tegra_io_rail_power_off(unsigned int id) { return -ENOSYS; } -#endif /* CONFIG_ARCH_TEGRA */ + +static inline enum tegra_suspend_mode tegra_pmc_get_suspend_mode(void) +{ + return TEGRA_SUSPEND_NONE; +} + +static inline void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode) +{ +} + +static inline void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode) +{ +} + +#endif /* CONFIG_SOC_TEGRA_PMC */ #endif /* __SOC_TEGRA_PMC_H__ */ diff --git a/include/sound/cs35l35.h b/include/sound/cs35l35.h new file mode 100644 index 0000000000000000000000000000000000000000..29da899e17e4def3dfe72e1b67eabba795f2fa8c --- /dev/null +++ b/include/sound/cs35l35.h @@ -0,0 +1,108 @@ +/* + * linux/sound/cs35l35.h -- Platform data for CS35l35 + * + * Copyright (c) 2016 Cirrus Logic Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __CS35L35_H +#define __CS35L35_H + +struct classh_cfg { + /* + * Class H Algorithm Control Variables + * You can either have it done + * automatically or you can adjust + * these variables for tuning + * + * if you do not enable the internal algorithm + * you will get a set of mixer controls for + * Class H tuning + * + * Section 4.3 of the datasheet + */ + bool classh_bst_override; + bool classh_algo_enable; + int classh_bst_max_limit; + int classh_mem_depth; + int classh_release_rate; + int classh_headroom; + int classh_wk_fet_disable; + int classh_wk_fet_delay; + int classh_wk_fet_thld; + int classh_vpch_auto; + int classh_vpch_rate; + int classh_vpch_man; +}; + +struct monitor_cfg { + /* + * Signal Monitor Data + * highly configurable signal monitoring + * data positioning and different types of + * monitoring data. + * + * Section 4.8.2 - 4.8.4 of the datasheet + */ + bool is_present; + bool imon_specs; + bool vmon_specs; + bool vpmon_specs; + bool vbstmon_specs; + bool vpbrstat_specs; + bool zerofill_specs; + u8 imon_dpth; + u8 imon_loc; + u8 imon_frm; + u8 imon_scale; + u8 vmon_dpth; + u8 vmon_loc; + u8 vmon_frm; + u8 vpmon_dpth; + u8 vpmon_loc; + u8 vpmon_frm; + u8 vbstmon_dpth; + u8 vbstmon_loc; + u8 vbstmon_frm; + u8 vpbrstat_dpth; + u8 vpbrstat_loc; + u8 vpbrstat_frm; + u8 zerofill_dpth; + u8 zerofill_loc; + u8 zerofill_frm; +}; + +struct cs35l35_platform_data { + + /* Stereo (2 Device) */ + bool stereo; + /* serial port drive strength */ + int sp_drv_str; + /* serial port drive in unused slots */ + int sp_drv_unused; + /* Boost Power Down with FET */ + bool bst_pdn_fet_on; + /* Boost Voltage : used if ClassH Algo Enabled */ + int bst_vctl; + /* Boost Converter Peak Current CTRL */ + int bst_ipk; + /* Amp Gain Zero Cross */ + bool gain_zc; + /* Audio Input Location */ + int aud_channel; + /* Advisory Input Location */ + int adv_channel; + /* Shared Boost for stereo */ + bool shared_bst; + /* Specifies this amp is using an external boost supply */ + bool ext_bst; + /* ClassH Algorithm */ + struct classh_cfg classh_algo; + /* Monitor Config */ + struct monitor_cfg mon_cfg; +}; + +#endif /* __CS35L35_H */ diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h index 0013063db7f2bfe22faaf7b7f0d66695581254f9..15fc6daf909657ac7237c896cbc2de9b4ff04b18 100644 --- a/include/sound/hda_register.h +++ b/include/sound/hda_register.h @@ -106,8 +106,26 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define AZX_REG_HSW_EM4 0x100c #define AZX_REG_HSW_EM5 0x1010 -/* Skylake/Broxton display HD-A controller Extended Mode registers */ -#define AZX_REG_SKL_EM4L 0x1040 +/* Skylake/Broxton vendor-specific registers */ +#define AZX_REG_VS_EM1 0x1000 +#define AZX_REG_VS_INRC 0x1004 +#define AZX_REG_VS_OUTRC 0x1008 +#define AZX_REG_VS_FIFOTRK 0x100C +#define AZX_REG_VS_FIFOTRK2 0x1010 +#define AZX_REG_VS_EM2 0x1030 +#define AZX_REG_VS_EM3L 0x1038 +#define AZX_REG_VS_EM3U 0x103C +#define AZX_REG_VS_EM4L 0x1040 +#define AZX_REG_VS_EM4U 0x1044 +#define AZX_REG_VS_LTRC 0x1048 +#define AZX_REG_VS_D0I3C 0x104A +#define AZX_REG_VS_PCE 0x104B +#define AZX_REG_VS_L2MAGC 0x1050 +#define AZX_REG_VS_L2LAHPT 0x1054 +#define AZX_REG_VS_SDXDPIB_XBASE 0x1084 +#define AZX_REG_VS_SDXDPIB_XINTERVAL 0x20 +#define AZX_REG_VS_SDXEFIFOS_XBASE 0x1094 +#define AZX_REG_VS_SDXEFIFOS_XINTERVAL 0x20 /* PCI space */ #define AZX_PCIREG_TCSEL 0x44 @@ -243,9 +261,11 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 }; #define AZX_REG_ML_LOUTPAY 0x20 #define AZX_REG_ML_LINPAY 0x30 -#define AZX_MLCTL_SPA (1<<16) -#define AZX_MLCTL_CPA 23 - +#define ML_LCTL_SCF_MASK 0xF +#define AZX_MLCTL_SPA (0x1 << 16) +#define AZX_MLCTL_CPA (0x1 << 23) +#define AZX_MLCTL_SPA_SHIFT 16 +#define AZX_MLCTL_CPA_SHIFT 23 /* registers for DMA Resume Capability Structure */ #define AZX_DRSM_CAP_ID 0x5 diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index 56004ec8d4418b00e1d62b62e5b057bf964f81a5..96546b30e90075f394ed5e8be608ddeb1bec519c 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -368,24 +368,32 @@ void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus); /* * macros for easy use */ -#define _snd_hdac_chip_write(type, chip, reg, value) \ - ((chip)->io_ops->reg_write ## type(value, (chip)->remap_addr + (reg))) -#define _snd_hdac_chip_read(type, chip, reg) \ - ((chip)->io_ops->reg_read ## type((chip)->remap_addr + (reg))) +#define _snd_hdac_chip_writeb(chip, reg, value) \ + ((chip)->io_ops->reg_writeb(value, (chip)->remap_addr + (reg))) +#define _snd_hdac_chip_readb(chip, reg) \ + ((chip)->io_ops->reg_readb((chip)->remap_addr + (reg))) +#define _snd_hdac_chip_writew(chip, reg, value) \ + ((chip)->io_ops->reg_writew(value, (chip)->remap_addr + (reg))) +#define _snd_hdac_chip_readw(chip, reg) \ + ((chip)->io_ops->reg_readw((chip)->remap_addr + (reg))) +#define _snd_hdac_chip_writel(chip, reg, value) \ + ((chip)->io_ops->reg_writel(value, (chip)->remap_addr + (reg))) +#define _snd_hdac_chip_readl(chip, reg) \ + ((chip)->io_ops->reg_readl((chip)->remap_addr + (reg))) /* read/write a register, pass without AZX_REG_ prefix */ #define snd_hdac_chip_writel(chip, reg, value) \ - _snd_hdac_chip_write(l, chip, AZX_REG_ ## reg, value) + _snd_hdac_chip_writel(chip, AZX_REG_ ## reg, value) #define snd_hdac_chip_writew(chip, reg, value) \ - _snd_hdac_chip_write(w, chip, AZX_REG_ ## reg, value) + _snd_hdac_chip_writew(chip, AZX_REG_ ## reg, value) #define snd_hdac_chip_writeb(chip, reg, value) \ - _snd_hdac_chip_write(b, chip, AZX_REG_ ## reg, value) + _snd_hdac_chip_writeb(chip, AZX_REG_ ## reg, value) #define snd_hdac_chip_readl(chip, reg) \ - _snd_hdac_chip_read(l, chip, AZX_REG_ ## reg) + _snd_hdac_chip_readl(chip, AZX_REG_ ## reg) #define snd_hdac_chip_readw(chip, reg) \ - _snd_hdac_chip_read(w, chip, AZX_REG_ ## reg) + _snd_hdac_chip_readw(chip, AZX_REG_ ## reg) #define snd_hdac_chip_readb(chip, reg) \ - _snd_hdac_chip_read(b, chip, AZX_REG_ ## reg) + _snd_hdac_chip_readb(chip, AZX_REG_ ## reg) /* update a register, pass without AZX_REG_ prefix */ #define snd_hdac_chip_updatel(chip, reg, mask, val) \ diff --git a/include/sound/soc.h b/include/sound/soc.h index cdfb55f7aedeac2b0ee6a7f5f7b4bf81da123ddf..5170fd81e1fd0886c465225b72748c44792240d5 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -434,6 +434,8 @@ int snd_soc_codec_set_sysclk(struct snd_soc_codec *codec, int clk_id, int source, unsigned int freq, int dir); int snd_soc_codec_set_pll(struct snd_soc_codec *codec, int pll_id, int source, unsigned int freq_in, unsigned int freq_out); +int snd_soc_codec_set_jack(struct snd_soc_codec *codec, + struct snd_soc_jack *jack, void *data); int snd_soc_register_card(struct snd_soc_card *card); int snd_soc_unregister_card(struct snd_soc_card *card); @@ -497,7 +499,15 @@ void snd_soc_runtime_deactivate(struct snd_soc_pcm_runtime *rtd, int stream); int snd_soc_runtime_set_dai_fmt(struct snd_soc_pcm_runtime *rtd, unsigned int dai_fmt); +#ifdef CONFIG_DMI int snd_soc_set_dmi_name(struct snd_soc_card *card, const char *flavour); +#else +static inline int snd_soc_set_dmi_name(struct snd_soc_card *card, + const char *flavour) +{ + return 0; +} +#endif /* Utility functions to get clock rates from various things */ int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots); @@ -721,6 +731,7 @@ struct snd_soc_jack_gpio { /* private: */ struct snd_soc_jack *jack; struct delayed_work work; + struct notifier_block pm_notifier; struct gpio_desc *desc; void *data; @@ -812,7 +823,6 @@ struct snd_soc_component { unsigned int ignore_pmdown_time:1; /* pmdown_time is ignored at stop */ unsigned int registered_as_component:1; - unsigned int auxiliary:1; /* for auxiliary component of the card */ unsigned int suspended:1; /* is in suspend PM state */ struct list_head list; @@ -913,6 +923,8 @@ struct snd_soc_codec_driver { int clk_id, int source, unsigned int freq, int dir); int (*set_pll)(struct snd_soc_codec *codec, int pll_id, int source, unsigned int freq_in, unsigned int freq_out); + int (*set_jack)(struct snd_soc_codec *codec, + struct snd_soc_jack *jack, void *data); /* codec IO */ struct regmap *(*get_regmap)(struct device *); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 275581d483ddd90d97c550ee8bf44d705833ecf8..5f17fb770477bbdfa2729a7b35cf21f70493515e 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -557,6 +557,7 @@ struct iscsi_conn { #define LOGIN_FLAGS_READ_ACTIVE 1 #define LOGIN_FLAGS_CLOSED 2 #define LOGIN_FLAGS_READY 4 +#define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; struct delayed_work login_cleanup_work; diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 1b0f447ce850f015e64dd27e47751fe945cbb2ec..e475531565fdfb6808de54bf9b4c84de7a501584 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -10,6 +10,7 @@ * backend module. */ #define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 +#define TRANSPORT_FLAG_PASSTHROUGH_PGR 0x4 struct request_queue; struct scatterlist; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index ccfad0e9c2cdbd68f13c809c7ed6414b2c0c97c1..0c1dce2ac6f0239a90d3e86bff2afd3b0f318128 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -664,6 +664,7 @@ struct se_dev_attrib { int pi_prot_format; enum target_prot_type pi_prot_type; enum target_prot_type hw_pi_prot_type; + int pi_prot_verify; int enforce_pr_isids; int force_pr_aptpl; int is_nonrot; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index a88ed13446ff88e200ed642db63badf4c9a4b680..d0dbe60d8a6dd5ccbb89029796fcabd23d970ceb 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -61,7 +61,16 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, TP_ARGS(bh) ); -DECLARE_EVENT_CLASS(block_rq_with_error, +/** + * block_rq_requeue - place block IO request back on a queue + * @q: queue holding operation + * @rq: block IO operation request + * + * The block operation request @rq is being placed back into queue + * @q. For some reason the request was not completed and needs to be + * put back in the queue. + */ +TRACE_EVENT(block_rq_requeue, TP_PROTO(struct request_queue *q, struct request *rq), @@ -71,7 +80,6 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __field( int, errors ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), @@ -80,7 +88,6 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); - __entry->errors = rq->errors; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); __get_str(cmd)[0] = '\0'; @@ -90,46 +97,13 @@ DECLARE_EVENT_CLASS(block_rq_with_error, MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) -); - -/** - * block_rq_abort - abort block operation request - * @q: queue containing the block operation request - * @rq: block IO operation request - * - * Called immediately after pending block IO operation request @rq in - * queue @q is aborted. The fields in the operation request @rq - * can be examined to determine which device and sectors the pending - * operation would access. - */ -DEFINE_EVENT(block_rq_with_error, block_rq_abort, - - TP_PROTO(struct request_queue *q, struct request *rq), - - TP_ARGS(q, rq) -); - -/** - * block_rq_requeue - place block IO request back on a queue - * @q: queue holding operation - * @rq: block IO operation request - * - * The block operation request @rq is being placed back into queue - * @q. For some reason the request was not completed and needs to be - * put back in the queue. - */ -DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - - TP_PROTO(struct request_queue *q, struct request *rq), - - TP_ARGS(q, rq) + __entry->nr_sector, 0) ); /** * block_rq_complete - block IO operation completed by device driver - * @q: queue containing the block operation request * @rq: block operations request + * @error: status code * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion @@ -140,16 +114,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, */ TRACE_EVENT(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq, - unsigned int nr_bytes), + TP_PROTO(struct request *rq, int error, unsigned int nr_bytes), - TP_ARGS(q, rq, nr_bytes), + TP_ARGS(rq, error, nr_bytes), TP_STRUCT__entry( __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __field( int, errors ) + __field( int, error ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), @@ -158,7 +131,7 @@ TRACE_EVENT(block_rq_complete, __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; - __entry->errors = rq->errors; + __entry->error = error; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); __get_str(cmd)[0] = '\0'; @@ -168,7 +141,7 @@ TRACE_EVENT(block_rq_complete, MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + __entry->nr_sector, __entry->error) ); DECLARE_EVENT_CLASS(block_rq, diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h index c3a53fd47ff1a20a322c71e1f9a4fd89c3462ed6..52c8425d144b349988a16366ab69d0366bd0c065 100644 --- a/include/trace/events/bpf.h +++ b/include/trace/events/bpf.h @@ -321,11 +321,14 @@ TRACE_EVENT(bpf_map_next_key, __dynamic_array(u8, key, map->key_size) __dynamic_array(u8, nxt, map->key_size) __field(bool, key_trunc) + __field(bool, key_null) __field(int, ufd) ), TP_fast_assign( - memcpy(__get_dynamic_array(key), key, map->key_size); + if (key) + memcpy(__get_dynamic_array(key), key, map->key_size); + __entry->key_null = !key; memcpy(__get_dynamic_array(nxt), key_next, map->key_size); __entry->type = map->map_type; __entry->key_len = min(map->key_size, 16U); @@ -336,8 +339,9 @@ TRACE_EVENT(bpf_map_next_key, TP_printk("map type=%s ufd=%d key=[%s%s] next=[%s%s]", __print_symbolic(__entry->type, __MAP_TYPE_SYM_TAB), __entry->ufd, - __print_hex(__get_dynamic_array(key), __entry->key_len), - __entry->key_trunc ? " ..." : "", + __entry->key_null ? "NULL" : __print_hex(__get_dynamic_array(key), + __entry->key_len), + __entry->key_trunc && !__entry->key_null ? " ..." : "", __print_hex(__get_dynamic_array(nxt), __entry->key_len), __entry->key_trunc ? " ..." : "") ); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index a3c3cab643a9528dd5f8ad50dfac29b53384f1db..e37973526153a83c2d96169b77e507525c53cc53 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -12,6 +12,7 @@ struct btrfs_root; struct btrfs_fs_info; struct btrfs_inode; struct extent_map; +struct btrfs_file_extent_item; struct btrfs_ordered_extent; struct btrfs_delayed_ref_node; struct btrfs_delayed_tree_ref; @@ -24,6 +25,7 @@ struct extent_buffer; struct btrfs_work; struct __btrfs_workqueue; struct btrfs_qgroup_extent_record; +struct btrfs_qgroup; #define show_ref_type(type) \ __print_symbolic(type, \ @@ -54,6 +56,12 @@ struct btrfs_qgroup_extent_record; (obj >= BTRFS_ROOT_TREE_OBJECTID && \ obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-" +#define show_fi_type(type) \ + __print_symbolic(type, \ + { BTRFS_FILE_EXTENT_INLINE, "INLINE" }, \ + { BTRFS_FILE_EXTENT_REG, "REG" }, \ + { BTRFS_FILE_EXTENT_PREALLOC, "PREALLOC"}) + #define BTRFS_GROUP_FLAGS \ { BTRFS_BLOCK_GROUP_DATA, "DATA"}, \ { BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \ @@ -213,7 +221,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->block_start = map->block_start; __entry->block_len = map->block_len; __entry->flags = map->flags; - __entry->refs = atomic_read(&map->refs); + __entry->refs = refcount_read(&map->refs); __entry->compress_type = map->compress_type; ), @@ -232,6 +240,138 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->refs, __entry->compress_type) ); +/* file extent item */ +DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular, + + TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l, + struct btrfs_file_extent_item *fi, u64 start), + + TP_ARGS(bi, l, fi, start), + + TP_STRUCT__entry_btrfs( + __field( u64, root_obj ) + __field( u64, ino ) + __field( loff_t, isize ) + __field( u64, disk_isize ) + __field( u64, num_bytes ) + __field( u64, ram_bytes ) + __field( u64, disk_bytenr ) + __field( u64, disk_num_bytes ) + __field( u64, extent_offset ) + __field( u8, extent_type ) + __field( u8, compression ) + __field( u64, extent_start ) + __field( u64, extent_end ) + ), + + TP_fast_assign_btrfs(bi->root->fs_info, + __entry->root_obj = bi->root->objectid; + __entry->ino = btrfs_ino(bi); + __entry->isize = bi->vfs_inode.i_size; + __entry->disk_isize = bi->disk_i_size; + __entry->num_bytes = btrfs_file_extent_num_bytes(l, fi); + __entry->ram_bytes = btrfs_file_extent_ram_bytes(l, fi); + __entry->disk_bytenr = btrfs_file_extent_disk_bytenr(l, fi); + __entry->disk_num_bytes = btrfs_file_extent_disk_num_bytes(l, fi); + __entry->extent_offset = btrfs_file_extent_offset(l, fi); + __entry->extent_type = btrfs_file_extent_type(l, fi); + __entry->compression = btrfs_file_extent_compression(l, fi); + __entry->extent_start = start; + __entry->extent_end = (start + __entry->num_bytes); + ), + + TP_printk_btrfs( + "root=%llu(%s) inode=%llu size=%llu disk_isize=%llu " + "file extent range=[%llu %llu] " + "(num_bytes=%llu ram_bytes=%llu disk_bytenr=%llu " + "disk_num_bytes=%llu extent_offset=%llu type=%s " + "compression=%u", + show_root_type(__entry->root_obj), __entry->ino, + __entry->isize, + __entry->disk_isize, __entry->extent_start, + __entry->extent_end, __entry->num_bytes, __entry->ram_bytes, + __entry->disk_bytenr, __entry->disk_num_bytes, + __entry->extent_offset, show_fi_type(__entry->extent_type), + __entry->compression) +); + +DECLARE_EVENT_CLASS( + btrfs__file_extent_item_inline, + + TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l, + struct btrfs_file_extent_item *fi, int slot, u64 start), + + TP_ARGS(bi, l, fi, slot, start), + + TP_STRUCT__entry_btrfs( + __field( u64, root_obj ) + __field( u64, ino ) + __field( loff_t, isize ) + __field( u64, disk_isize ) + __field( u8, extent_type ) + __field( u8, compression ) + __field( u64, extent_start ) + __field( u64, extent_end ) + ), + + TP_fast_assign_btrfs( + bi->root->fs_info, + __entry->root_obj = bi->root->objectid; + __entry->ino = btrfs_ino(bi); + __entry->isize = bi->vfs_inode.i_size; + __entry->disk_isize = bi->disk_i_size; + __entry->extent_type = btrfs_file_extent_type(l, fi); + __entry->compression = btrfs_file_extent_compression(l, fi); + __entry->extent_start = start; + __entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi)); + ), + + TP_printk_btrfs( + "root=%llu(%s) inode=%llu size=%llu disk_isize=%llu " + "file extent range=[%llu %llu] " + "extent_type=%s compression=%u", + show_root_type(__entry->root_obj), __entry->ino, __entry->isize, + __entry->disk_isize, __entry->extent_start, + __entry->extent_end, show_fi_type(__entry->extent_type), + __entry->compression) +); + +DEFINE_EVENT( + btrfs__file_extent_item_regular, btrfs_get_extent_show_fi_regular, + + TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l, + struct btrfs_file_extent_item *fi, u64 start), + + TP_ARGS(bi, l, fi, start) +); + +DEFINE_EVENT( + btrfs__file_extent_item_regular, btrfs_truncate_show_fi_regular, + + TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l, + struct btrfs_file_extent_item *fi, u64 start), + + TP_ARGS(bi, l, fi, start) +); + +DEFINE_EVENT( + btrfs__file_extent_item_inline, btrfs_get_extent_show_fi_inline, + + TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l, + struct btrfs_file_extent_item *fi, int slot, u64 start), + + TP_ARGS(bi, l, fi, slot, start) +); + +DEFINE_EVENT( + btrfs__file_extent_item_inline, btrfs_truncate_show_fi_inline, + + TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l, + struct btrfs_file_extent_item *fi, int slot, u64 start), + + TP_ARGS(bi, l, fi, slot, start) +); + #define show_ordered_flags(flags) \ __print_flags(flags, "|", \ { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \ @@ -275,7 +415,7 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, __entry->bytes_left = ordered->bytes_left; __entry->flags = ordered->flags; __entry->compress_type = ordered->compress_type; - __entry->refs = atomic_read(&ordered->refs); + __entry->refs = refcount_read(&ordered->refs); __entry->root_objectid = BTRFS_I(inode)->root->root_key.objectid; __entry->truncated_len = ordered->truncated_len; @@ -1475,6 +1615,49 @@ TRACE_EVENT(qgroup_update_counters, __entry->cur_new_count) ); +TRACE_EVENT(qgroup_update_reserve, + + TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup, + s64 diff), + + TP_ARGS(fs_info, qgroup, diff), + + TP_STRUCT__entry_btrfs( + __field( u64, qgid ) + __field( u64, cur_reserved ) + __field( s64, diff ) + ), + + TP_fast_assign_btrfs(fs_info, + __entry->qgid = qgroup->qgroupid; + __entry->cur_reserved = qgroup->reserved; + __entry->diff = diff; + ), + + TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld", + __entry->qgid, __entry->cur_reserved, __entry->diff) +); + +TRACE_EVENT(qgroup_meta_reserve, + + TP_PROTO(struct btrfs_root *root, s64 diff), + + TP_ARGS(root, diff), + + TP_STRUCT__entry_btrfs( + __field( u64, refroot ) + __field( s64, diff ) + ), + + TP_fast_assign_btrfs(root->fs_info, + __entry->refroot = root->objectid; + __entry->diff = diff; + ), + + TP_printk_btrfs("refroot=%llu(%s) diff=%lld", + show_root_type(__entry->refroot), __entry->diff) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 09c71e9aaebf7d7bf83857294b9ec37a13d0920e..dfae175ddebc2340c68e9145d588f57f2f45ebce 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -15,6 +15,7 @@ struct ext4_inode_info; struct mpage_da_data; struct ext4_map_blocks; struct extent_status; +struct ext4_fsmap; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) @@ -2529,6 +2530,79 @@ TRACE_EVENT(ext4_es_shrink, __entry->scan_time, __entry->nr_skipped, __entry->retried) ); +/* fsmap traces */ +DECLARE_EVENT_CLASS(ext4_fsmap_class, + TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, + u64 owner), + TP_ARGS(sb, keydev, agno, bno, len, owner), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, keydev) + __field(u32, agno) + __field(u64, bno) + __field(u64, len) + __field(u64, owner) + ), + TP_fast_assign( + __entry->dev = sb->s_bdev->bd_dev; + __entry->keydev = new_decode_dev(keydev); + __entry->agno = agno; + __entry->bno = bno; + __entry->len = len; + __entry->owner = owner; + ), + TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->keydev), MINOR(__entry->keydev), + __entry->agno, + __entry->bno, + __entry->len, + __entry->owner) +) +#define DEFINE_FSMAP_EVENT(name) \ +DEFINE_EVENT(ext4_fsmap_class, name, \ + TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \ + u64 owner), \ + TP_ARGS(sb, keydev, agno, bno, len, owner)) +DEFINE_FSMAP_EVENT(ext4_fsmap_low_key); +DEFINE_FSMAP_EVENT(ext4_fsmap_high_key); +DEFINE_FSMAP_EVENT(ext4_fsmap_mapping); + +DECLARE_EVENT_CLASS(ext4_getfsmap_class, + TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), + TP_ARGS(sb, fsmap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(dev_t, keydev) + __field(u64, block) + __field(u64, len) + __field(u64, owner) + __field(u64, flags) + ), + TP_fast_assign( + __entry->dev = sb->s_bdev->bd_dev; + __entry->keydev = new_decode_dev(fsmap->fmr_device); + __entry->block = fsmap->fmr_physical; + __entry->len = fsmap->fmr_length; + __entry->owner = fsmap->fmr_owner; + __entry->flags = fsmap->fmr_flags; + ), + TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld flags 0x%llx\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + MAJOR(__entry->keydev), MINOR(__entry->keydev), + __entry->block, + __entry->len, + __entry->owner, + __entry->flags) +) +#define DEFINE_GETFSMAP_EVENT(name) \ +DEFINE_EVENT(ext4_getfsmap_class, name, \ + TP_PROTO(struct super_block *sb, struct ext4_fsmap *fsmap), \ + TP_ARGS(sb, fsmap)) +DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key); +DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key); +DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index c80fcad0a6c97b6975628fe6dcd236863f295c51..15da88c5c3a4d12ef12ff56a7e6cde8a039920b7 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -15,6 +15,8 @@ TRACE_DEFINE_ENUM(META); TRACE_DEFINE_ENUM(META_FLUSH); TRACE_DEFINE_ENUM(INMEM); TRACE_DEFINE_ENUM(INMEM_DROP); +TRACE_DEFINE_ENUM(INMEM_INVALIDATE); +TRACE_DEFINE_ENUM(INMEM_REVOKE); TRACE_DEFINE_ENUM(IPU); TRACE_DEFINE_ENUM(OPU); TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); @@ -42,6 +44,7 @@ TRACE_DEFINE_ENUM(CP_FASTBOOT); TRACE_DEFINE_ENUM(CP_SYNC); TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); +TRACE_DEFINE_ENUM(CP_TRIMMED); #define show_block_type(type) \ __print_symbolic(type, \ @@ -51,12 +54,13 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { META_FLUSH, "META_FLUSH" }, \ { INMEM, "INMEM" }, \ { INMEM_DROP, "INMEM_DROP" }, \ + { INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \ { INMEM_REVOKE, "INMEM_REVOKE" }, \ { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) -#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\ - REQ_PRIO) +#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ + REQ_PREFLUSH | REQ_FUA) #define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) #define show_bio_type(op,op_flags) show_bio_op(op), \ @@ -75,16 +79,13 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { REQ_OP_WRITE_ZEROES, "WRITE_ZEROES" }) #define show_bio_op_flags(flags) \ - __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ - { REQ_RAHEAD, "(RA)" }, \ - { REQ_SYNC, "(S)" }, \ - { REQ_SYNC | REQ_PRIO, "(SP)" }, \ - { REQ_META, "(M)" }, \ - { REQ_META | REQ_PRIO, "(MP)" }, \ - { REQ_SYNC | REQ_PREFLUSH , "(SF)" }, \ - { REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" }, \ - { REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" }, \ - { 0, " \b" }) + __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ + { REQ_RAHEAD, "R" }, \ + { REQ_SYNC, "S" }, \ + { REQ_META, "M" }, \ + { REQ_PRIO, "P" }, \ + { REQ_PREFLUSH, "PF" }, \ + { REQ_FUA, "FUA" }) #define show_data_type(type) \ __print_symbolic(type, \ @@ -117,12 +118,14 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { GC_CB, "Cost-Benefit" }) #define show_cpreason(type) \ - __print_symbolic(type, \ + __print_flags(type, "|", \ { CP_UMOUNT, "Umount" }, \ { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ { CP_RECOVERY, "Recovery" }, \ - { CP_DISCARD, "Discard" }) + { CP_DISCARD, "Discard" }, \ + { CP_UMOUNT, "Umount" }, \ + { CP_TRIMMED, "Trimmed" }) struct victim_sel_policy; struct f2fs_map_blocks; @@ -769,7 +772,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s%s, type = %s", + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->old_blkaddr, @@ -822,7 +825,7 @@ DECLARE_EVENT_CLASS(f2fs__bio, __entry->size = bio->bi_iter.bi_size; ), - TP_printk("dev = (%d,%d)/(%d,%d), rw = %s%s, %s, sector = %lld, size = %u", + TP_printk("dev = (%d,%d)/(%d,%d), rw = %s(%s), %s, sector = %lld, size = %u", show_dev(__entry->target), show_dev(__entry->dev), show_bio_type(__entry->op, __entry->op_flags), @@ -1126,7 +1129,7 @@ TRACE_EVENT(f2fs_write_checkpoint, __entry->msg) ); -TRACE_EVENT(f2fs_issue_discard, +DECLARE_EVENT_CLASS(f2fs_discard, TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), @@ -1150,6 +1153,20 @@ TRACE_EVENT(f2fs_issue_discard, (unsigned long long)__entry->blklen) ); +DEFINE_EVENT(f2fs_discard, f2fs_queue_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + +DEFINE_EVENT(f2fs_discard, f2fs_issue_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + TRACE_EVENT(f2fs_issue_reset_zone, TP_PROTO(struct block_device *dev, block_t blkstart), @@ -1174,26 +1191,29 @@ TRACE_EVENT(f2fs_issue_reset_zone, TRACE_EVENT(f2fs_issue_flush, TP_PROTO(struct block_device *dev, unsigned int nobarrier, - unsigned int flush_merge), + unsigned int flush_merge, int ret), - TP_ARGS(dev, nobarrier, flush_merge), + TP_ARGS(dev, nobarrier, flush_merge, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned int, nobarrier) __field(unsigned int, flush_merge) + __field(int, ret) ), TP_fast_assign( __entry->dev = dev->bd_dev; __entry->nobarrier = nobarrier; __entry->flush_merge = flush_merge; + __entry->ret = ret; ), - TP_printk("dev = (%d,%d), %s %s", + TP_printk("dev = (%d,%d), %s %s, ret = %d", show_dev(__entry->dev), __entry->nobarrier ? "skip (nobarrier)" : "issue", - __entry->flush_merge ? " with flush_merge" : "") + __entry->flush_merge ? " with flush_merge" : "", + __entry->ret) ); TRACE_EVENT(f2fs_lookup_extent_tree_start, diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index c566ddc87f73e5885841204d5ae7d160374545a6..08bb3ed18dcc213dbbdcfad0832a83a8fbb2ed4e 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -150,6 +150,136 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback); +DECLARE_EVENT_CLASS(dax_pte_fault_class, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), + TP_ARGS(inode, vmf, result), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(pgoff_t, pgoff) + __field(dev_t, dev) + __field(unsigned int, flags) + __field(int, result) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->flags = vmf->flags; + __entry->pgoff = vmf->pgoff; + __entry->result = result; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx pgoff %#lx %s", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE), + __entry->address, + __entry->pgoff, + __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE) + ) +) + +#define DEFINE_PTE_FAULT_EVENT(name) \ +DEFINE_EVENT(dax_pte_fault_class, name, \ + TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), \ + TP_ARGS(inode, vmf, result)) + +DEFINE_PTE_FAULT_EVENT(dax_pte_fault); +DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); +DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); +DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); +DEFINE_PTE_FAULT_EVENT(dax_load_hole); + +TRACE_EVENT(dax_insert_mapping, + TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), + TP_ARGS(inode, vmf, radix_entry), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(unsigned long, vm_flags) + __field(unsigned long, address) + __field(void *, radix_entry) + __field(dev_t, dev) + __field(int, write) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->vm_flags = vmf->vma->vm_flags; + __entry->address = vmf->address; + __entry->write = vmf->flags & FAULT_FLAG_WRITE; + __entry->radix_entry = radix_entry; + ), + TP_printk("dev %d:%d ino %#lx %s %s address %#lx radix_entry %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->vm_flags & VM_SHARED ? "shared" : "private", + __entry->write ? "write" : "read", + __entry->address, + (unsigned long)__entry->radix_entry + ) +) + +DECLARE_EVENT_CLASS(dax_writeback_range_class, + TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index), + TP_ARGS(inode, start_index, end_index), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(pgoff_t, start_index) + __field(pgoff_t, end_index) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->start_index = start_index; + __entry->end_index = end_index; + ), + TP_printk("dev %d:%d ino %#lx pgoff %#lx-%#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->start_index, + __entry->end_index + ) +) + +#define DEFINE_WRITEBACK_RANGE_EVENT(name) \ +DEFINE_EVENT(dax_writeback_range_class, name, \ + TP_PROTO(struct inode *inode, pgoff_t start_index, pgoff_t end_index),\ + TP_ARGS(inode, start_index, end_index)) + +DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range); +DEFINE_WRITEBACK_RANGE_EVENT(dax_writeback_range_done); + +TRACE_EVENT(dax_writeback_one, + TP_PROTO(struct inode *inode, pgoff_t pgoff, pgoff_t pglen), + TP_ARGS(inode, pgoff, pglen), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(pgoff_t, pgoff) + __field(pgoff_t, pglen) + __field(dev_t, dev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pgoff = pgoff; + __entry->pglen = pglen; + ), + TP_printk("dev %d:%d ino %#lx pgoff %#lx pglen %#lx", + MAJOR(__entry->dev), + MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->pglen + ) +) + #endif /* _TRACE_FS_DAX_H */ /* This part must be outside protection */ diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 2c7befb10f13e3b0175385e501e40ddedaab9094..99254ed89212b0036c7afecb1fcd318d6cac3b8b 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -11,7 +11,6 @@ #define _TRACE_IOMMU_H #include -#include struct device; diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 39123c06a5661316a80dd677b43b1e581a17e2e7..29a3d53a401535ef1e602a2dd82d79d66bc68bea 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -683,6 +683,57 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks) ); +TRACE_EVENT(rxrpc_rx_abort, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + u32 abort_code), + + TP_ARGS(call, serial, abort_code), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_serial_t, serial ) + __field(u32, abort_code ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->serial = serial; + __entry->abort_code = abort_code; + ), + + TP_printk("c=%p ABORT %08x ac=%d", + __entry->call, + __entry->serial, + __entry->abort_code) + ); + +TRACE_EVENT(rxrpc_rx_rwind_change, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + u32 rwind, bool wake), + + TP_ARGS(call, serial, rwind, wake), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_serial_t, serial ) + __field(u32, rwind ) + __field(bool, wake ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->serial = serial; + __entry->rwind = rwind; + __entry->wake = wake; + ), + + TP_printk("c=%p %08x rw=%u%s", + __entry->call, + __entry->serial, + __entry->rwind, + __entry->wake ? " wake" : "") + ); + TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_serial_t serial, u8 flags, bool retrans, bool lose), @@ -1087,6 +1138,56 @@ TRACE_EVENT(rxrpc_improper_term, __entry->abort_code) ); +TRACE_EVENT(rxrpc_rx_eproto, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + const char *why), + + TP_ARGS(call, serial, why), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_serial_t, serial ) + __field(const char *, why ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->serial = serial; + __entry->why = why; + ), + + TP_printk("c=%p EPROTO %08x %s", + __entry->call, + __entry->serial, + __entry->why) + ); + +TRACE_EVENT(rxrpc_connect_call, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(unsigned long, user_call_ID ) + __field(u32, cid ) + __field(u32, call_id ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->user_call_ID = call->user_call_ID; + __entry->cid = call->cid; + __entry->call_id = call->call_id; + ), + + TP_printk("c=%p u=%p %08x:%08x", + __entry->call, + (void *)__entry->user_call_ID, + __entry->cid, + __entry->call_id) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9e3ef6c99e4b0db058a312f0405475b44cef8354..ae1409ffe99a00817f02a2cc51771840712fc1c3 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), @@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch, memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; + /* XXX SCHED_DEADLINE */ ), TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", @@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), @@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template, TP_fast_assign( memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; - __entry->prio = p->prio; + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", @@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait, TP_fast_assign( memcpy(__entry->comm, current->comm, TASK_COMM_LEN); __entry->pid = pid_nr(pid); - __entry->prio = current->prio; + __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ ), TP_printk("comm=%s pid=%d prio=%d", @@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, */ TRACE_EVENT(sched_pi_setprio, - TP_PROTO(struct task_struct *tsk, int newprio), + TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), - TP_ARGS(tsk, newprio), + TP_ARGS(tsk, pi_task), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = newprio; + __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + /* XXX SCHED_DEADLINE bits missing */ ), TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 2b4a8ff72d0dd54647255f6f8dc5ec8b61981872..6cde5b3514c2793cbaaffd3420c7217feafbad7a 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -151,9 +151,9 @@ TRACE_EVENT(thermal_power_cpu_limit, TRACE_EVENT(thermal_power_devfreq_get_power, TP_PROTO(struct thermal_cooling_device *cdev, struct devfreq_dev_status *status, unsigned long freq, - u32 dynamic_power, u32 static_power), + u32 dynamic_power, u32 static_power, u32 power), - TP_ARGS(cdev, status, freq, dynamic_power, static_power), + TP_ARGS(cdev, status, freq, dynamic_power, static_power, power), TP_STRUCT__entry( __string(type, cdev->type ) @@ -161,6 +161,7 @@ TRACE_EVENT(thermal_power_devfreq_get_power, __field(u32, load ) __field(u32, dynamic_power ) __field(u32, static_power ) + __field(u32, power) ), TP_fast_assign( @@ -169,11 +170,13 @@ TRACE_EVENT(thermal_power_devfreq_get_power, __entry->load = (100 * status->busy_time) / status->total_time; __entry->dynamic_power = dynamic_power; __entry->static_power = static_power; + __entry->power = power; ), - TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u", + TP_printk("type=%s freq=%lu load=%u dynamic_power=%u static_power=%u power=%u", __get_str(type), __entry->freq, - __entry->load, __entry->dynamic_power, __entry->static_power) + __entry->load, __entry->dynamic_power, __entry->static_power, + __entry->power) ); TRACE_EVENT(thermal_power_devfreq_limit, diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h index ee7754c6e4a1279e87974f62c8d9c7c3fa3d152c..b3a85b3df53e4eb0fd4dc62219a99c7dac42e01e 100644 --- a/include/trace/events/v4l2.h +++ b/include/trace/events/v4l2.h @@ -29,6 +29,7 @@ EM( V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, "VIDEO_OUTPUT_MPLANE" ) \ EM( V4L2_BUF_TYPE_SDR_CAPTURE, "SDR_CAPTURE" ) \ EM( V4L2_BUF_TYPE_SDR_OUTPUT, "SDR_OUTPUT" ) \ + EM( V4L2_BUF_TYPE_META_CAPTURE, "META_CAPTURE" ) \ EMe(V4L2_BUF_TYPE_PRIVATE, "PRIVATE" ) SHOW_TYPE diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index bce990f5a35d77615bb536009940f6146d1e7d82..31acce9019a63e7b4123dd89d9daa7f31111f74a 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -241,21 +241,21 @@ TRACE_EVENT(xen_mmu_set_pud, (int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval) ); -TRACE_EVENT(xen_mmu_set_pgd, - TP_PROTO(pgd_t *pgdp, pgd_t *user_pgdp, pgd_t pgdval), - TP_ARGS(pgdp, user_pgdp, pgdval), +TRACE_EVENT(xen_mmu_set_p4d, + TP_PROTO(p4d_t *p4dp, p4d_t *user_p4dp, p4d_t p4dval), + TP_ARGS(p4dp, user_p4dp, p4dval), TP_STRUCT__entry( - __field(pgd_t *, pgdp) - __field(pgd_t *, user_pgdp) - __field(pgdval_t, pgdval) - ), - TP_fast_assign(__entry->pgdp = pgdp; - __entry->user_pgdp = user_pgdp; - __entry->pgdval = pgdval.pgd), - TP_printk("pgdp %p user_pgdp %p pgdval %0*llx (raw %0*llx)", - __entry->pgdp, __entry->user_pgdp, - (int)sizeof(pgdval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pgdval)), - (int)sizeof(pgdval_t) * 2, (unsigned long long)__entry->pgdval) + __field(p4d_t *, p4dp) + __field(p4d_t *, user_p4dp) + __field(p4dval_t, p4dval) + ), + TP_fast_assign(__entry->p4dp = p4dp; + __entry->user_p4dp = user_p4dp; + __entry->p4dval = p4d_val(p4dval)), + TP_printk("p4dp %p user_p4dp %p p4dval %0*llx (raw %0*llx)", + __entry->p4dp, __entry->user_p4dp, + (int)sizeof(p4dval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->p4dval)), + (int)sizeof(p4dval_t) * 2, (unsigned long long)__entry->p4dval) ); TRACE_EVENT(xen_mmu_pud_clear, diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild deleted file mode 100644 index 245aa6e05e6adef3f0c0cc4ea2e3a017a2231481..0000000000000000000000000000000000000000 --- a/include/uapi/Kbuild +++ /dev/null @@ -1,15 +0,0 @@ -# UAPI Header export list -# Top-level Makefile calls into asm-$(ARCH) -# List only non-arch directories below - - -header-y += asm-generic/ -header-y += linux/ -header-y += sound/ -header-y += mtd/ -header-y += rdma/ -header-y += video/ -header-y += drm/ -header-y += xen/ -header-y += scsi/ -header-y += misc/ diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild deleted file mode 100644 index b73de7bb7a62ae9eff4d0445ae848c5d9b94999d..0000000000000000000000000000000000000000 --- a/include/uapi/asm-generic/Kbuild +++ /dev/null @@ -1,36 +0,0 @@ -# UAPI Header export list -header-y += auxvec.h -header-y += bitsperlong.h -header-y += errno-base.h -header-y += errno.h -header-y += fcntl.h -header-y += int-l64.h -header-y += int-ll64.h -header-y += ioctl.h -header-y += ioctls.h -header-y += ipcbuf.h -header-y += kvm_para.h -header-y += mman-common.h -header-y += mman.h -header-y += msgbuf.h -header-y += param.h -header-y += poll.h -header-y += posix_types.h -header-y += resource.h -header-y += sembuf.h -header-y += setup.h -header-y += shmbuf.h -header-y += shmparam.h -header-y += siginfo.h -header-y += signal-defs.h -header-y += signal.h -header-y += socket.h -header-y += sockios.h -header-y += stat.h -header-y += statfs.h -header-y += swab.h -header-y += termbits.h -header-y += termios.h -header-y += types.h -header-y += ucontext.h -header-y += unistd.h diff --git a/include/uapi/asm-generic/Kbuild.asm b/include/uapi/asm-generic/Kbuild.asm index fcd50b759217615088767688661c4ece3259b5cf..21381449d98a88f0b5f68942b307128a4269926a 100644 --- a/include/uapi/asm-generic/Kbuild.asm +++ b/include/uapi/asm-generic/Kbuild.asm @@ -1,49 +1,33 @@ -# -# Headers that are optional in usr/include/asm/ -# -opt-header += kvm.h -opt-header += kvm_para.h -opt-header += a.out.h - # # Headers that are mandatory in usr/include/asm/ # -header-y += auxvec.h -header-y += bitsperlong.h -header-y += byteorder.h -header-y += errno.h -header-y += fcntl.h -header-y += ioctl.h -header-y += ioctls.h -header-y += ipcbuf.h -header-y += mman.h -header-y += msgbuf.h -header-y += param.h -header-y += poll.h -header-y += posix_types.h -header-y += ptrace.h -header-y += resource.h -header-y += sembuf.h -header-y += setup.h -header-y += shmbuf.h -header-y += sigcontext.h -header-y += siginfo.h -header-y += signal.h -header-y += socket.h -header-y += sockios.h -header-y += stat.h -header-y += statfs.h -header-y += swab.h -header-y += termbits.h -header-y += termios.h -header-y += types.h -header-y += unistd.h - -header-y += $(foreach hdr,$(opt-header), \ - $(if \ - $(wildcard \ - $(srctree)/arch/$(SRCARCH)/include/uapi/asm/$(hdr) \ - $(srctree)/arch/$(SRCARCH)/include/asm/$(hdr) \ - ), \ - $(hdr) \ - )) +mandatory-y += auxvec.h +mandatory-y += bitsperlong.h +mandatory-y += byteorder.h +mandatory-y += errno.h +mandatory-y += fcntl.h +mandatory-y += ioctl.h +mandatory-y += ioctls.h +mandatory-y += ipcbuf.h +mandatory-y += mman.h +mandatory-y += msgbuf.h +mandatory-y += param.h +mandatory-y += poll.h +mandatory-y += posix_types.h +mandatory-y += ptrace.h +mandatory-y += resource.h +mandatory-y += sembuf.h +mandatory-y += setup.h +mandatory-y += shmbuf.h +mandatory-y += sigcontext.h +mandatory-y += siginfo.h +mandatory-y += signal.h +mandatory-y += socket.h +mandatory-y += sockios.h +mandatory-y += stat.h +mandatory-y += statfs.h +mandatory-y += swab.h +mandatory-y += termbits.h +mandatory-y += termios.h +mandatory-y += types.h +mandatory-y += unistd.h diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 2c748ddad5f875711ed66f91eae9bc69b9a41fe0..2b488565599daf73b8942fe6482890830af94dff 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -94,4 +94,10 @@ #define SCM_TIMESTAMPING_OPT_STATS 54 +#define SO_MEMINFO 55 + +#define SO_INCOMING_NAPI_ID 56 + +#define SO_COOKIE 57 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index a076cf1a3a23be2fbee73dab483e051b37b2370c..061185a5eb51390c68edd38f09ea55284df8ae88 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -194,8 +194,7 @@ __SYSCALL(__NR_quotactl, sys_quotactl) /* fs/readdir.c */ #define __NR_getdents64 61 -#define __ARCH_WANT_COMPAT_SYS_GETDENTS64 -__SC_COMP(__NR_getdents64, sys_getdents64, compat_sys_getdents64) +__SYSCALL(__NR_getdents64, sys_getdents64) /* fs/read_write.c */ #define __NR3264_lseek 62 diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild deleted file mode 100644 index c97addd08f8c9545314e6d3519dc16aeb6f917d5..0000000000000000000000000000000000000000 --- a/include/uapi/drm/Kbuild +++ /dev/null @@ -1,23 +0,0 @@ -# UAPI Header export list -header-y += drm.h -header-y += drm_fourcc.h -header-y += drm_mode.h -header-y += drm_sarea.h -header-y += amdgpu_drm.h -header-y += exynos_drm.h -header-y += i810_drm.h -header-y += i915_drm.h -header-y += mga_drm.h -header-y += nouveau_drm.h -header-y += omap_drm.h -header-y += qxl_drm.h -header-y += r128_drm.h -header-y += radeon_drm.h -header-y += savage_drm.h -header-y += sis_drm.h -header-y += tegra_drm.h -header-y += via_drm.h -header-y += vmwgfx_drm.h -header-y += msm_drm.h -header-y += vc4_drm.h -header-y += virtgpu_drm.h diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 516a9f2857307fd672557e2761692fd1b9fbbbb2..6c249e5cfb09d4d74cfe7c18aea7a46208273487 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -295,7 +295,10 @@ union drm_amdgpu_gem_wait_idle { }; struct drm_amdgpu_wait_cs_in { - /** Command submission handle */ + /* Command submission handle + * handle equals 0 means none to wait for + * handle equals ~0ull means wait for the latest sequence number + */ __u64 handle; /** Absolute timeout to wait */ __u64 timeout; @@ -764,6 +767,25 @@ struct drm_amdgpu_info_device { __u64 cntl_sb_buf_gpu_addr; /* NGG Parameter Cache */ __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + __u32 _pad1; }; struct drm_amdgpu_info_hw_ip { diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 995c8f9c692fb36a12056541d944edd7e3590026..55e301047b3e94a4d0eee18c6bb7732cc162b695 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -306,6 +306,51 @@ extern "C" { */ #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) + +/* NVIDIA Tegra frame buffer modifiers */ + +/* + * Some modifiers take parameters, for example the number of vertical GOBs in + * a block. Reserve the lower 32 bits for parameters + */ +#define __fourcc_mod_tegra_mode_shift 32 +#define fourcc_mod_tegra_code(val, params) \ + fourcc_mod_code(NV, ((((__u64)val) << __fourcc_mod_tegra_mode_shift) | params)) +#define fourcc_mod_tegra_mod(m) \ + (m & ~((1ULL << __fourcc_mod_tegra_mode_shift) - 1)) +#define fourcc_mod_tegra_param(m) \ + (m & ((1ULL << __fourcc_mod_tegra_mode_shift) - 1)) + +/* + * Tegra Tiled Layout, used by Tegra 2, 3 and 4. + * + * Pixels are arranged in simple tiles of 16 x 16 bytes. + */ +#define NV_FORMAT_MOD_TEGRA_TILED fourcc_mod_tegra_code(1, 0) + +/* + * Tegra 16Bx2 Block Linear layout, used by TK1/TX1 + * + * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked + * vertically by a power of 2 (1 to 32 GOBs) to form a block. + * + * Within a GOB, data is ordered as 16B x 2 lines sectors laid in Z-shape. + * + * Parameter 'v' is the log2 encoding of the number of GOBs stacked vertically. + * Valid values are: + * + * 0 == ONE_GOB + * 1 == TWO_GOBS + * 2 == FOUR_GOBS + * 3 == EIGHT_GOBS + * 4 == SIXTEEN_GOBS + * 5 == THIRTYTWO_GOBS + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + */ +#define NV_FORMAT_MOD_TEGRA_16BX2_BLOCK(v) fourcc_mod_tegra_code(2, v) + #if defined(__cplusplus) } #endif diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index f8d9fed17ba99418d858ceb57a864b31a00d078a..ca2787d9bf0f87598dc827faf40baf7e92403fba 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -1,493 +1,13 @@ # UAPI Header export list -header-y += android/ -header-y += byteorder/ -header-y += can/ -header-y += caif/ -header-y += dvb/ -header-y += hdlc/ -header-y += hsi/ -header-y += iio/ -header-y += isdn/ -header-y += mmc/ -header-y += nfsd/ -header-y += raid/ -header-y += spi/ -header-y += sunrpc/ -header-y += tc_act/ -header-y += tc_ematch/ -header-y += netfilter/ -header-y += netfilter_arp/ -header-y += netfilter_bridge/ -header-y += netfilter_ipv4/ -header-y += netfilter_ipv6/ -header-y += usb/ -header-y += wimax/ -genhdr-y += version.h - -ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h \ - $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h),) -header-y += a.out.h +ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/a.out.h),) +no-export-headers += a.out.h endif -header-y += acct.h -header-y += adb.h -header-y += adfs_fs.h -header-y += affs_hardblocks.h -header-y += agpgart.h -header-y += aio_abi.h -header-y += am437x-vpfe.h -header-y += apm_bios.h -header-y += arcfb.h -header-y += atalk.h -header-y += atmapi.h -header-y += atmarp.h -header-y += atmbr2684.h -header-y += atmclip.h -header-y += atmdev.h -header-y += atm_eni.h -header-y += atm.h -header-y += atm_he.h -header-y += atm_idt77105.h -header-y += atmioc.h -header-y += atmlec.h -header-y += atmmpc.h -header-y += atm_nicstar.h -header-y += atmppp.h -header-y += atmsap.h -header-y += atmsvc.h -header-y += atm_tcp.h -header-y += atm_zatm.h -header-y += audit.h -header-y += auto_fs4.h -header-y += auto_fs.h -header-y += auxvec.h -header-y += ax25.h -header-y += b1lli.h -header-y += batman_adv.h -header-y += baycom.h -header-y += bcm933xx_hcs.h -header-y += bfs_fs.h -header-y += binfmts.h -header-y += blkpg.h -header-y += blktrace_api.h -header-y += blkzoned.h -header-y += bpf_common.h -header-y += bpf_perf_event.h -header-y += bpf.h -header-y += bpqether.h -header-y += bsg.h -header-y += bt-bmc.h -header-y += btrfs.h -header-y += can.h -header-y += capability.h -header-y += capi.h -header-y += cciss_defs.h -header-y += cciss_ioctl.h -header-y += cdrom.h -header-y += cec.h -header-y += cec-funcs.h -header-y += cgroupstats.h -header-y += chio.h -header-y += cm4000_cs.h -header-y += cn_proc.h -header-y += coda.h -header-y += coda_psdev.h -header-y += coff.h -header-y += connector.h -header-y += const.h -header-y += cramfs_fs.h -header-y += cuda.h -header-y += cyclades.h -header-y += cycx_cfm.h -header-y += dcbnl.h -header-y += dccp.h -header-y += devlink.h -header-y += dlmconstants.h -header-y += dlm_device.h -header-y += dlm.h -header-y += dlm_netlink.h -header-y += dlm_plock.h -header-y += dm-ioctl.h -header-y += dm-log-userspace.h -header-y += dma-buf.h -header-y += dn.h -header-y += dqblk_xfs.h -header-y += edd.h -header-y += efs_fs_sb.h -header-y += elfcore.h -header-y += elf-em.h -header-y += elf-fdpic.h -header-y += elf.h -header-y += errno.h -header-y += errqueue.h -header-y += ethtool.h -header-y += eventpoll.h -header-y += fadvise.h -header-y += falloc.h -header-y += fanotify.h -header-y += fb.h -header-y += fcntl.h -header-y += fd.h -header-y += fdreg.h -header-y += fib_rules.h -header-y += fiemap.h -header-y += filter.h -header-y += firewire-cdev.h -header-y += firewire-constants.h -header-y += flat.h -header-y += fou.h -header-y += fs.h -header-y += fsl_hypervisor.h -header-y += fuse.h -header-y += futex.h -header-y += gameport.h -header-y += genetlink.h -header-y += gen_stats.h -header-y += gfs2_ondisk.h -header-y += gigaset_dev.h -header-y += gpio.h -header-y += gsmmux.h -header-y += gtp.h -header-y += hdlcdrv.h -header-y += hdlc.h -header-y += hdreg.h -header-y += hiddev.h -header-y += hid.h -header-y += hidraw.h -header-y += hpet.h -header-y += hsr_netlink.h -header-y += hyperv.h -header-y += hysdn_if.h -header-y += i2c-dev.h -header-y += i2c.h -header-y += i2o-dev.h -header-y += i8k.h -header-y += icmp.h -header-y += icmpv6.h -header-y += if_addr.h -header-y += if_addrlabel.h -header-y += if_alg.h -header-y += if_arcnet.h -header-y += if_arp.h -header-y += if_bonding.h -header-y += if_bridge.h -header-y += if_cablemodem.h -header-y += if_eql.h -header-y += if_ether.h -header-y += if_fc.h -header-y += if_fddi.h -header-y += if_frad.h -header-y += if.h -header-y += if_hippi.h -header-y += if_infiniband.h -header-y += if_link.h -header-y += if_ltalk.h -header-y += if_macsec.h -header-y += if_packet.h -header-y += if_phonet.h -header-y += if_plip.h -header-y += if_ppp.h -header-y += if_pppol2tp.h -header-y += if_pppox.h -header-y += if_slip.h -header-y += if_team.h -header-y += if_tun.h -header-y += if_tunnel.h -header-y += if_vlan.h -header-y += if_x25.h -header-y += ife.h -header-y += igmp.h -header-y += ila.h -header-y += in6.h -header-y += inet_diag.h -header-y += in.h -header-y += inotify.h -header-y += input.h -header-y += input-event-codes.h -header-y += in_route.h -header-y += ioctl.h -header-y += ip6_tunnel.h -header-y += ipc.h -header-y += ip.h -header-y += ipmi.h -header-y += ipmi_msgdefs.h -header-y += ipsec.h -header-y += ipv6.h -header-y += ipv6_route.h -header-y += ip_vs.h -header-y += ipx.h -header-y += irda.h -header-y += irqnr.h -header-y += isdn_divertif.h -header-y += isdn.h -header-y += isdnif.h -header-y += isdn_ppp.h -header-y += iso_fs.h -header-y += ivtvfb.h -header-y += ivtv.h -header-y += ixjuser.h -header-y += jffs2.h -header-y += joystick.h -header-y += kcmp.h -header-y += kdev_t.h -header-y += kd.h -header-y += kernelcapi.h -header-y += kernel.h -header-y += kernel-page-flags.h -header-y += kexec.h -header-y += keyboard.h -header-y += keyctl.h - -ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm.h \ - $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h),) -header-y += kvm.h +ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm.h),) +no-export-headers += kvm.h endif - -ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h \ - $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h),) -header-y += kvm_para.h +ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),) +no-export-headers += kvm_para.h endif - -header-y += hw_breakpoint.h -header-y += l2tp.h -header-y += libc-compat.h -header-y += lirc.h -header-y += limits.h -header-y += llc.h -header-y += loop.h -header-y += lp.h -header-y += lwtunnel.h -header-y += magic.h -header-y += major.h -header-y += map_to_7segment.h -header-y += matroxfb.h -header-y += mdio.h -header-y += media.h -header-y += media-bus-format.h -header-y += mei.h -header-y += membarrier.h -header-y += memfd.h -header-y += mempolicy.h -header-y += meye.h -header-y += mic_common.h -header-y += mic_ioctl.h -header-y += mii.h -header-y += minix_fs.h -header-y += mman.h -header-y += mmtimer.h -header-y += mpls.h -header-y += mpls_iptunnel.h -header-y += mqueue.h -header-y += mroute6.h -header-y += mroute.h -header-y += msdos_fs.h -header-y += msg.h -header-y += mtio.h -header-y += nbd.h -header-y += ncp_fs.h -header-y += ncp.h -header-y += ncp_mount.h -header-y += ncp_no.h -header-y += ndctl.h -header-y += neighbour.h -header-y += netconf.h -header-y += netdevice.h -header-y += net_dropmon.h -header-y += netfilter_arp.h -header-y += netfilter_bridge.h -header-y += netfilter_decnet.h -header-y += netfilter.h -header-y += netfilter_ipv4.h -header-y += netfilter_ipv6.h -header-y += net.h -header-y += netlink_diag.h -header-y += netlink.h -header-y += netrom.h -header-y += net_namespace.h -header-y += net_tstamp.h -header-y += nfc.h -header-y += psample.h -header-y += nfs2.h -header-y += nfs3.h -header-y += nfs4.h -header-y += nfs4_mount.h -header-y += nfsacl.h -header-y += nfs_fs.h -header-y += nfs.h -header-y += nfs_idmap.h -header-y += nfs_mount.h -header-y += nl80211.h -header-y += n_r3964.h -header-y += nubus.h -header-y += nvme_ioctl.h -header-y += nvram.h -header-y += omap3isp.h -header-y += omapfb.h -header-y += oom.h -header-y += openvswitch.h -header-y += packet_diag.h -header-y += param.h -header-y += parport.h -header-y += patchkey.h -header-y += pci.h -header-y += pci_regs.h -header-y += perf_event.h -header-y += personality.h -header-y += pfkeyv2.h -header-y += pg.h -header-y += phantom.h -header-y += phonet.h -header-y += pktcdvd.h -header-y += pkt_cls.h -header-y += pkt_sched.h -header-y += pmu.h -header-y += poll.h -header-y += posix_acl.h -header-y += posix_acl_xattr.h -header-y += posix_types.h -header-y += ppdev.h -header-y += ppp-comp.h -header-y += ppp_defs.h -header-y += ppp-ioctl.h -header-y += pps.h -header-y += prctl.h -header-y += psci.h -header-y += ptp_clock.h -header-y += ptrace.h -header-y += qnx4_fs.h -header-y += qnxtypes.h -header-y += quota.h -header-y += radeonfb.h -header-y += random.h -header-y += raw.h -header-y += rds.h -header-y += reboot.h -header-y += reiserfs_fs.h -header-y += reiserfs_xattr.h -header-y += resource.h -header-y += rfkill.h -header-y += rio_cm_cdev.h -header-y += rio_mport_cdev.h -header-y += romfs_fs.h -header-y += rose.h -header-y += route.h -header-y += rtc.h -header-y += rtnetlink.h -header-y += scc.h -header-y += sched.h -header-y += scif_ioctl.h -header-y += screen_info.h -header-y += sctp.h -header-y += sdla.h -header-y += seccomp.h -header-y += securebits.h -header-y += seg6_genl.h -header-y += seg6.h -header-y += seg6_hmac.h -header-y += seg6_iptunnel.h -header-y += selinux_netlink.h -header-y += sem.h -header-y += serial_core.h -header-y += serial.h -header-y += serial_reg.h -header-y += serio.h -header-y += shm.h -header-y += signalfd.h -header-y += signal.h -header-y += smiapp.h -header-y += snmp.h -header-y += sock_diag.h -header-y += socket.h -header-y += sockios.h -header-y += sonet.h -header-y += sonypi.h -header-y += soundcard.h -header-y += sound.h -header-y += stat.h -header-y += stddef.h -header-y += string.h -header-y += suspend_ioctls.h -header-y += swab.h -header-y += synclink.h -header-y += sync_file.h -header-y += sysctl.h -header-y += sysinfo.h -header-y += target_core_user.h -header-y += taskstats.h -header-y += tcp.h -header-y += tcp_metrics.h -header-y += telephony.h -header-y += termios.h -header-y += thermal.h -header-y += time.h -header-y += timerfd.h -header-y += times.h -header-y += timex.h -header-y += tiocl.h -header-y += tipc_config.h -header-y += tipc_netlink.h -header-y += tipc.h -header-y += toshiba.h -header-y += tty_flags.h -header-y += tty.h -header-y += types.h -header-y += udf_fs_i.h -header-y += udp.h -header-y += uhid.h -header-y += uinput.h -header-y += uio.h -header-y += uleds.h -header-y += ultrasound.h -header-y += un.h -header-y += unistd.h -header-y += unix_diag.h -header-y += usbdevice_fs.h -header-y += usbip.h -header-y += userio.h -header-y += utime.h -header-y += utsname.h -header-y += uuid.h -header-y += uvcvideo.h -header-y += v4l2-common.h -header-y += v4l2-controls.h -header-y += v4l2-dv-timings.h -header-y += v4l2-mediabus.h -header-y += v4l2-subdev.h -header-y += veth.h -header-y += vfio.h -header-y += vhost.h -header-y += videodev2.h -header-y += virtio_9p.h -header-y += virtio_balloon.h -header-y += virtio_blk.h -header-y += virtio_config.h -header-y += virtio_console.h -header-y += virtio_gpu.h -header-y += virtio_ids.h -header-y += virtio_input.h -header-y += virtio_mmio.h -header-y += virtio_net.h -header-y += virtio_pci.h -header-y += virtio_ring.h -header-y += virtio_rng.h -header-y += virtio_scsi.h -header-y += virtio_types.h -header-y += virtio_vsock.h -header-y += virtio_crypto.h -header-y += vm_sockets.h -header-y += vt.h -header-y += vtpm_proxy.h -header-y += wait.h -header-y += wanrouter.h -header-y += watchdog.h -header-y += wimax.h -header-y += wireless.h -header-y += x25.h -header-y += xattr.h -header-y += xfrm.h -header-y += xilinx-v4l2-controls.h -header-y += zorro.h -header-y += zorro_ids.h -header-y += userfaultfd.h diff --git a/include/uapi/linux/android/Kbuild b/include/uapi/linux/android/Kbuild deleted file mode 100644 index ca011eec252a8c2fd962e24ca6542a771dcf9284..0000000000000000000000000000000000000000 --- a/include/uapi/linux/android/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += binder.h diff --git a/include/uapi/linux/aspeed-lpc-ctrl.h b/include/uapi/linux/aspeed-lpc-ctrl.h new file mode 100644 index 0000000000000000000000000000000000000000..c328c976c684cb0b5add21c26de7c96c273dc787 --- /dev/null +++ b/include/uapi/linux/aspeed-lpc-ctrl.h @@ -0,0 +1,61 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_ASPEED_LPC_CTRL_H +#define _UAPI_LINUX_ASPEED_LPC_CTRL_H + +#include +#include + +/* Window types */ +#define ASPEED_LPC_CTRL_WINDOW_FLASH 1 +#define ASPEED_LPC_CTRL_WINDOW_MEMORY 2 + +/* + * This driver provides a window for the host to access a BMC resource + * across the BMC <-> Host LPC bus. + * + * window_type: The BMC resource that the host will access through the + * window. BMC flash and BMC RAM. + * + * window_id: For each window type there may be multiple windows, + * these are referenced by ID. + * + * flags: Reserved for future use, this field is expected to be + * zeroed. + * + * addr: Address on the host LPC bus that the specified window should + * be mapped. This address must be power of two aligned. + * + * offset: Offset into the BMC window that should be mapped to the + * host (at addr). This must be a multiple of size. + * + * size: The size of the mapping. The smallest possible size is 64K. + * This must be power of two aligned. + * + */ + +struct aspeed_lpc_ctrl_mapping { + __u8 window_type; + __u8 window_id; + __u16 flags; + __u32 addr; + __u32 offset; + __u32 size; +}; + +#define __ASPEED_LPC_CTRL_IOCTL_MAGIC 0xb2 + +#define ASPEED_LPC_CTRL_IOCTL_GET_SIZE _IOWR(__ASPEED_LPC_CTRL_IOCTL_MAGIC, \ + 0x00, struct aspeed_lpc_ctrl_mapping) + +#define ASPEED_LPC_CTRL_IOCTL_MAP _IOW(__ASPEED_LPC_CTRL_IOCTL_MAGIC, \ + 0x01, struct aspeed_lpc_ctrl_mapping) + +#endif /* _UAPI_LINUX_ASPEED_LPC_CTRL_H */ diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 22b6ad31c706dae59544286faea5808d7b303342..e3bb0635e94ae238481fece501d33b9b6c0ed937 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -5,7 +5,7 @@ * Bcache on disk data structures */ -#include +#include #define BITMASK(name, type, field, offset, size) \ static inline __u64 name(const type *k) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0539a0ceef38155835552360667070552ebce641..94dfa9def355f7f52805bba5ade3a32c304f989a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,7 @@ enum bpf_cmd { BPF_OBJ_GET, BPF_PROG_ATTACH, BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, }; enum bpf_map_type { @@ -96,6 +97,8 @@ enum bpf_map_type { BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, }; enum bpf_prog_type { @@ -129,6 +132,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -152,6 +162,7 @@ union bpf_attr { __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ __u32 map_flags; /* prealloc or not */ + __u32 inner_map_fd; /* fd pointing to the inner map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -173,6 +184,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -186,6 +198,17 @@ union bpf_attr { __u32 attach_type; __u32 attach_flags; }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; + __u32 data_size_out; + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + } test; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -456,6 +479,17 @@ union bpf_attr { * Return: * > 0 length of the string including the trailing NUL on success * < 0 error + * + * u64 bpf_get_socket_cookie(skb) + * Get the cookie for the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: 8 Bytes non-decreasing number on success or 0 if the socket + * field is missing inside sk_buff + * + * u32 bpf_get_socket_uid(skb) + * Get the owner uid of the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -503,7 +537,9 @@ union bpf_attr { FN(get_numa_node_id), \ FN(skb_change_head), \ FN(xdp_adjust_head), \ - FN(probe_read_str), + FN(probe_read_str), \ + FN(get_socket_cookie), \ + FN(get_socket_uid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -574,6 +610,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index dcfc3a5a9cb1d20f29bbac00c6ef315006e9d208..a456e5309238bbd78466abee16a0da6ab0248e50 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -291,10 +291,10 @@ struct btrfs_ioctl_feature_flags { struct btrfs_balance_args { __u64 profiles; union { - __le64 usage; + __u64 usage; struct { - __le32 usage_min; - __le32 usage_max; + __u32 usage_min; + __u32 usage_max; }; }; __u64 devid; @@ -324,8 +324,8 @@ struct btrfs_balance_args { * Process chunks that cross stripes_min..stripes_max devices, * BTRFS_BALANCE_ARGS_STRIPES_RANGE */ - __le32 stripes_min; - __le32 stripes_max; + __u32 stripes_min; + __u32 stripes_max; __u64 unused[6]; } __attribute__ ((__packed__)); diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index d5ad15a106a707c13fa77c5733ea96b8e750a5c8..10689e1fdf11d1e232bb3e9dc572a693542dd7b3 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -1,6 +1,9 @@ #ifndef _BTRFS_CTREE_H_ #define _BTRFS_CTREE_H_ +#include +#include + /* * This header contains the structure definitions and constants used * by file system objects that can be retrieved using diff --git a/include/uapi/linux/byteorder/Kbuild b/include/uapi/linux/byteorder/Kbuild deleted file mode 100644 index 619225b9ff2ef8d610f5ac6160d51e93561e1ef2..0000000000000000000000000000000000000000 --- a/include/uapi/linux/byteorder/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# UAPI Header export list -header-y += big_endian.h -header-y += little_endian.h diff --git a/include/uapi/linux/caif/Kbuild b/include/uapi/linux/caif/Kbuild deleted file mode 100644 index 43396612d3a322dae3cb99f00719bfb15001c476..0000000000000000000000000000000000000000 --- a/include/uapi/linux/caif/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# UAPI Header export list -header-y += caif_socket.h -header-y += if_caif.h diff --git a/include/uapi/linux/can/Kbuild b/include/uapi/linux/can/Kbuild deleted file mode 100644 index 21c91bf25a298c9090d103e1fe41406e72a7ae20..0000000000000000000000000000000000000000 --- a/include/uapi/linux/can/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -# UAPI Header export list -header-y += bcm.h -header-y += error.h -header-y += gw.h -header-y += netlink.h -header-y += raw.h diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h new file mode 100644 index 0000000000000000000000000000000000000000..ffb0b7156f7e030b9c65a0b5e96f931fa8187eea --- /dev/null +++ b/include/uapi/linux/can/vxcan.h @@ -0,0 +1,12 @@ +#ifndef _UAPI_CAN_VXCAN_H +#define _UAPI_CAN_VXCAN_H + +enum { + VXCAN_INFO_UNSPEC, + VXCAN_INFO_PEER, + + __VXCAN_INFO_MAX +#define VXCAN_INFO_MAX (__VXCAN_INFO_MAX - 1) +}; + +#endif diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 49bc0629539879c8edc3ae83e4b1933bdfc1dcbd..6fe14d001f68f77b98991d8650df0fcc8a6f3988 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -205,7 +205,7 @@ struct vfs_cap_data { #define CAP_SYS_MODULE 16 /* Allow ioperm/iopl access */ -/* Allow sending USB messages to any device via /proc/bus/usb */ +/* Allow sending USB messages to any device via /dev/bus/usb */ #define CAP_SYS_RAWIO 17 diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index 14b6f24b189ef31e59b7cd354dda260f8f98aa28..a0dfe27bc6c7c0d07bf6abca546105b7d94773fb 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -223,7 +223,7 @@ static inline int cec_msg_status_is_ok(const struct cec_msg *msg) #define CEC_LOG_ADDR_BACKUP_2 13 #define CEC_LOG_ADDR_SPECIFIC 14 #define CEC_LOG_ADDR_UNREGISTERED 15 /* as initiator address */ -#define CEC_LOG_ADDR_BROADCAST 15 /* ad destination address */ +#define CEC_LOG_ADDR_BROADCAST 15 /* as destination address */ /* The logical address types that the CEC device wants to claim */ #define CEC_LOG_ADDR_TYPE_TV 0 diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 11d21fce14d6e08147c94c53b88c66435ac374e8..fdcbb3c29083bb58531f8992bbdd47d5fa0fc23b 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -18,6 +18,8 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#include + /* Netlink configuration messages. */ enum { CRYPTO_MSG_BASE = 0x10, @@ -31,7 +33,7 @@ enum { #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1) #define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE) -#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME +#define CRYPTO_MAX_NAME 64 /* Netlink message attributes. */ enum crypto_attr_type_t { @@ -53,9 +55,9 @@ enum crypto_attr_type_t { }; struct crypto_user_alg { - char cru_name[CRYPTO_MAX_ALG_NAME]; - char cru_driver_name[CRYPTO_MAX_ALG_NAME]; - char cru_module_name[CRYPTO_MAX_ALG_NAME]; + char cru_name[CRYPTO_MAX_NAME]; + char cru_driver_name[CRYPTO_MAX_NAME]; + char cru_module_name[CRYPTO_MAX_NAME]; __u32 cru_type; __u32 cru_mask; __u32 cru_refcnt; @@ -73,7 +75,7 @@ struct crypto_report_hash { }; struct crypto_report_cipher { - char type[CRYPTO_MAX_ALG_NAME]; + char type[CRYPTO_MAX_NAME]; unsigned int blocksize; unsigned int min_keysize; unsigned int max_keysize; diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0f1f3a12e23c30e511cdb332059e30ee8d3d5efb..b0e807ac53bbfd3b088533381ecbf25a689633b0 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -65,8 +65,12 @@ enum devlink_command { #define DEVLINK_CMD_ESWITCH_MODE_SET /* obsolete, never use this! */ \ DEVLINK_CMD_ESWITCH_SET - /* add new commands above here */ + DEVLINK_CMD_DPIPE_TABLE_GET, + DEVLINK_CMD_DPIPE_ENTRIES_GET, + DEVLINK_CMD_DPIPE_HEADERS_GET, + DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 }; @@ -115,6 +119,11 @@ enum devlink_eswitch_inline_mode { DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT, }; +enum devlink_eswitch_encap_mode { + DEVLINK_ESWITCH_ENCAP_MODE_NONE, + DEVLINK_ESWITCH_ENCAP_MODE_BASIC, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -148,10 +157,73 @@ enum devlink_attr { DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ DEVLINK_ATTR_ESWITCH_INLINE_MODE, /* u8 */ + DEVLINK_ATTR_DPIPE_TABLES, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_NAME, /* string */ + DEVLINK_ATTR_DPIPE_TABLE_SIZE, /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_MATCHES, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_ACTIONS, /* nested */ + DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, /* u8 */ + + DEVLINK_ATTR_DPIPE_ENTRIES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_INDEX, /* u64 */ + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES, /* nested */ + DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, /* u64 */ + + DEVLINK_ATTR_DPIPE_MATCH, /* nested */ + DEVLINK_ATTR_DPIPE_MATCH_VALUE, /* nested */ + DEVLINK_ATTR_DPIPE_MATCH_TYPE, /* u32 */ + + DEVLINK_ATTR_DPIPE_ACTION, /* nested */ + DEVLINK_ATTR_DPIPE_ACTION_VALUE, /* nested */ + DEVLINK_ATTR_DPIPE_ACTION_TYPE, /* u32 */ + + DEVLINK_ATTR_DPIPE_VALUE, + DEVLINK_ATTR_DPIPE_VALUE_MASK, + DEVLINK_ATTR_DPIPE_VALUE_MAPPING, /* u32 */ + + DEVLINK_ATTR_DPIPE_HEADERS, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER_NAME, /* string */ + DEVLINK_ATTR_DPIPE_HEADER_ID, /* u32 */ + DEVLINK_ATTR_DPIPE_HEADER_FIELDS, /* nested */ + DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, /* u8 */ + DEVLINK_ATTR_DPIPE_HEADER_INDEX, /* u32 */ + + DEVLINK_ATTR_DPIPE_FIELD, /* nested */ + DEVLINK_ATTR_DPIPE_FIELD_NAME, /* string */ + DEVLINK_ATTR_DPIPE_FIELD_ID, /* u32 */ + DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, /* u32 */ + DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, /* u32 */ + + DEVLINK_ATTR_PAD, + + DEVLINK_ATTR_ESWITCH_ENCAP_MODE, /* u8 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 }; +/* Mapping between internal resource described by the field and system + * structure + */ +enum devlink_dpipe_field_mapping_type { + DEVLINK_DPIPE_FIELD_MAPPING_TYPE_NONE, + DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, +}; + +/* Match type - specify the type of the match */ +enum devlink_dpipe_match_type { + DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT, +}; + +/* Action type - specify the action type */ +enum devlink_dpipe_action_type { + DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ diff --git a/include/uapi/linux/dvb/Kbuild b/include/uapi/linux/dvb/Kbuild deleted file mode 100644 index d40942cfc627f43b4b0da73a925d72264f778be0..0000000000000000000000000000000000000000 --- a/include/uapi/linux/dvb/Kbuild +++ /dev/null @@ -1,9 +0,0 @@ -# UAPI Header export list -header-y += audio.h -header-y += ca.h -header-y += dmx.h -header-y += frontend.h -header-y += net.h -header-y += osd.h -header-y += version.h -header-y += video.h diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index cb5d1a51920276d567f7c84b20001bda2f9c76ef..9cd1de954c0aca32718e3f3bd4c95f209cdeb0ae 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -42,7 +42,6 @@ #define EM_TILEGX 191 /* Tilera TILE-Gx */ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ -#define EM_AVR32 0x18ad /* Atmel AVR32 */ /* * This is an interim value that we will use until the committee comes diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b59ee077a5964a888be764fdb5b817e3e377bcc0..b5280db9ef6a8c27f92ee89ffd0432df525e5039 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -409,6 +409,8 @@ typedef struct elf64_shdr { #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ @@ -417,7 +419,7 @@ typedef struct elf64_shdr { #define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */ #define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */ #define NT_METAG_TLS 0x502 /* Metag TLS pointer */ - +#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ /* Note header in a PT_NOTE section */ typedef struct elf32_note { diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 3dc91a46e8b8da0b243a12a168bbf205e5a87916..7d4a594d5d58147e3cf4b56eb992268a619badf3 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1486,13 +1486,17 @@ enum ethtool_link_mode_bit_indices { * it was forced up into this mode or autonegotiated. */ -/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */ +/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Update drivers/net/phy/phy.c:phy_speed_to_str() and + * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values. + */ #define SPEED_10 10 #define SPEED_100 100 #define SPEED_1000 1000 #define SPEED_2500 2500 #define SPEED_5000 5000 #define SPEED_10000 10000 +#define SPEED_14000 14000 #define SPEED_20000 20000 #define SPEED_25000 25000 #define SPEED_40000 40000 diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 1c3154913a391fb0f67ade68150fd8af939d791d..f4d5c998cc2bc0b941cc3218a7c512464f450790 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,8 +26,21 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Epoll event masks */ +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 + /* Set exclusive wakeup mode for the target file descriptor */ -#define EPOLLEXCLUSIVE (1 << 28) +#define EPOLLEXCLUSIVE (1U << 28) /* * Request the handling of system wakeup events so as to prevent system suspends @@ -39,13 +52,13 @@ * * Requires CAP_BLOCK_SUSPEND */ -#define EPOLLWAKEUP (1 << 29) +#define EPOLLWAKEUP (1U << 29) /* Set the One Shot behaviour for the target file descriptor */ -#define EPOLLONESHOT (1 << 30) +#define EPOLLONESHOT (1U << 30) /* Set the Edge Triggered behaviour for the target file descriptor */ -#define EPOLLET (1 << 31) +#define EPOLLET (1U << 31) /* * On x86-64 make the 64bit structure have the same alignment as the diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 048a85e9f0174170bc9cf861a388d08e19d1f36c..24e61a54feaaab505b2d2e74e5462a8638d0c897 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -279,12 +279,25 @@ struct fscrypt_policy { __u8 filenames_encryption_mode; __u8 flags; __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; -} __packed; +}; #define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) #define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +/* Parameters for passing an encryption key into the kernel keyring */ +#define FS_KEY_DESC_PREFIX "fscrypt:" +#define FS_KEY_DESC_PREFIX_SIZE 8 + +/* Structure that userspace passes to the kernel keyring */ +#define FS_MAX_KEY_SIZE 64 + +struct fscrypt_key { + __u32 mode; + __u8 raw[FS_MAX_KEY_SIZE]; + __u32 size; +}; + /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) * diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h new file mode 100644 index 0000000000000000000000000000000000000000..7e8e5f0bd6d263c7a9e583a255810f5b85f96441 --- /dev/null +++ b/include/uapi/linux/fsmap.h @@ -0,0 +1,112 @@ +/* + * FS_IOC_GETFSMAP ioctl infrastructure. + * + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + */ +#ifndef _LINUX_FSMAP_H +#define _LINUX_FSMAP_H + +#include + +/* + * Structure for FS_IOC_GETFSMAP. + * + * The memory layout for this call are the scalar values defined in + * struct fsmap_head, followed by two struct fsmap that describe + * the lower and upper bound of mappings to return, followed by an + * array of struct fsmap mappings. + * + * fmh_iflags control the output of the call, whereas fmh_oflags report + * on the overall record output. fmh_count should be set to the + * length of the fmh_recs array, and fmh_entries will be set to the + * number of entries filled out during each call. If fmh_count is + * zero, the number of reverse mappings will be returned in + * fmh_entries, though no mappings will be returned. fmh_reserved + * must be set to zero. + * + * The two elements in the fmh_keys array are used to constrain the + * output. The first element in the array should represent the + * lowest disk mapping ("low key") that the user wants to learn + * about. If this value is all zeroes, the filesystem will return + * the first entry it knows about. For a subsequent call, the + * contents of fsmap_head.fmh_recs[fsmap_head.fmh_count - 1] should be + * copied into fmh_keys[0] to have the kernel start where it left off. + * + * The second element in the fmh_keys array should represent the + * highest disk mapping ("high key") that the user wants to learn + * about. If this value is all ones, the filesystem will not stop + * until it runs out of mapping to return or runs out of space in + * fmh_recs. + * + * fmr_device can be either a 32-bit cookie representing a device, or + * a 32-bit dev_t if the FMH_OF_DEV_T flag is set. fmr_physical, + * fmr_offset, and fmr_length are expressed in units of bytes. + * fmr_owner is either an inode number, or a special value if + * FMR_OF_SPECIAL_OWNER is set in fmr_flags. + */ +struct fsmap { + __u32 fmr_device; /* device id */ + __u32 fmr_flags; /* mapping flags */ + __u64 fmr_physical; /* device offset of segment */ + __u64 fmr_owner; /* owner id */ + __u64 fmr_offset; /* file offset of segment */ + __u64 fmr_length; /* length of segment */ + __u64 fmr_reserved[3]; /* must be zero */ +}; + +struct fsmap_head { + __u32 fmh_iflags; /* control flags */ + __u32 fmh_oflags; /* output flags */ + __u32 fmh_count; /* # of entries in array incl. input */ + __u32 fmh_entries; /* # of entries filled in (output). */ + __u64 fmh_reserved[6]; /* must be zero */ + + struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */ + struct fsmap fmh_recs[]; /* returned records */ +}; + +/* Size of an fsmap_head with room for nr records. */ +static inline size_t +fsmap_sizeof( + unsigned int nr) +{ + return sizeof(struct fsmap_head) + nr * sizeof(struct fsmap); +} + +/* Start the next fsmap query at the end of the current query results. */ +static inline void +fsmap_advance( + struct fsmap_head *head) +{ + head->fmh_keys[0] = head->fmh_recs[head->fmh_entries - 1]; +} + +/* fmh_iflags values - set by FS_IOC_GETFSMAP caller in the header. */ +/* no flags defined yet */ +#define FMH_IF_VALID 0 + +/* fmh_oflags values - returned in the header segment only. */ +#define FMH_OF_DEV_T 0x1 /* fmr_device values will be dev_t */ + +/* fmr_flags values - returned for each non-header segment */ +#define FMR_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ +#define FMR_OF_ATTR_FORK 0x2 /* segment = attribute fork */ +#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */ +#define FMR_OF_SHARED 0x8 /* segment = shared with another file */ +#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */ +#define FMR_OF_LAST 0x20 /* segment is the last in the FS */ + +/* Each FS gets to define its own special owner codes. */ +#define FMR_OWNER(type, code) (((__u64)type << 32) | \ + ((__u64)code & 0xFFFFFFFFULL)) +#define FMR_OWNER_TYPE(owner) ((__u32)((__u64)owner >> 32)) +#define FMR_OWNER_CODE(owner) ((__u32)(((__u64)owner & 0xFFFFFFFFULL))) +#define FMR_OWN_FREE FMR_OWNER(0, 1) /* free space */ +#define FMR_OWN_UNKNOWN FMR_OWNER(0, 2) /* unknown owner */ +#define FMR_OWN_METADATA FMR_OWNER(0, 3) /* metadata */ + +#define FS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head) + +#endif /* _LINUX_FSMAP_H */ diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 72a04a0e8ccef1e5560378af9177a6c47954daaf..57d1edb8efd9bae86d0b11779f59de9d18f35f1a 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -19,7 +19,8 @@ enum gtp_attrs { GTPA_LINK, GTPA_VERSION, GTPA_TID, /* for GTPv0 only */ - GTPA_SGSN_ADDRESS, + GTPA_PEER_ADDRESS, /* Remote GSN peer, either SGSN or GGSN */ +#define GTPA_SGSN_ADDRESS GTPA_PEER_ADDRESS /* maintain legacy attr name */ GTPA_MS_ADDRESS, GTPA_FLOW, GTPA_NET_NS_FD, diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild deleted file mode 100644 index 8c1d2cb75e330ae605277aa70ec44415a53f3428..0000000000000000000000000000000000000000 --- a/include/uapi/linux/hdlc/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += ioctl.h diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild deleted file mode 100644 index a16a00544258c2e99e46fd1632548f79d875075f..0000000000000000000000000000000000000000 --- a/include/uapi/linux/hsi/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += hsi_char.h cs-protocol.h diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index 4d024d75d64ba8b7831cdb1d61384e47ac14ef33..cf73510b923864fd1b0c20bd3de47ba874a9b855 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -95,6 +95,7 @@ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_NETLINK 824 /* Netlink header */ #define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ +#define ARPHRD_VSOCKMON 826 /* Vsock monitor header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 320fc1e747ee9623db56fbaf26b2a514b5d5a3d1..15ac20382ababeecafe2a336aa59a45e50d879e6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -323,6 +323,7 @@ enum { IFLA_BRPORT_MCAST_FLOOD, IFLA_BRPORT_MCAST_TO_UCAST, IFLA_BRPORT_VLAN_TUNNEL, + IFLA_BRPORT_BCAST_FLOOD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -538,11 +539,18 @@ enum { #define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1) /* GTP section */ + +enum ifla_gtp_role { + GTP_ROLE_GGSN = 0, + GTP_ROLE_SGSN, +}; + enum { IFLA_GTP_UNSPEC, IFLA_GTP_FD0, IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, + IFLA_GTP_ROLE, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) @@ -880,7 +888,18 @@ enum { /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) +#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ + XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE) + +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, +}; enum { IFLA_XDP_UNSPEC, diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 9e7edfd8141e5dea69129807f46b89b2b637ead9..4df96a7dd4fae591d834735046911337221c60d3 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -66,6 +66,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_CBPF 6 #define PACKET_FANOUT_EBPF 7 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 +#define PACKET_FANOUT_FLAG_UNIQUEID 0x2000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 struct tpacket_stats { diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 92f3c8677523237ea0db55b0269aa0b73b9c2009..6792d1967d3145f3f2092d3b1bf052bf13ce04fb 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -75,6 +75,7 @@ enum { IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, IFLA_IPTUN_COLLECT_METADATA, + IFLA_IPTUN_FWMARK, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) @@ -132,6 +133,7 @@ enum { IFLA_GRE_ENCAP_DPORT, IFLA_GRE_COLLECT_METADATA, IFLA_GRE_IGNORE_DF, + IFLA_GRE_FWMARK, __IFLA_GRE_MAX, }; @@ -147,6 +149,7 @@ enum { IFLA_VTI_OKEY, IFLA_VTI_LOCAL, IFLA_VTI_REMOTE, + IFLA_VTI_FWMARK, __IFLA_VTI_MAX, }; diff --git a/include/uapi/linux/iio/Kbuild b/include/uapi/linux/iio/Kbuild deleted file mode 100644 index 86f76d84c44f41fec38b49b8f1fa4d11dadfd648..0000000000000000000000000000000000000000 --- a/include/uapi/linux/iio/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# UAPI Header export list -header-y += events.h -header-y += types.h diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 3af60ee69053322b9c6ca72d3120ce9f23da1c80..f5a8d96e1e098543d7cad6af1d7a77b117701f40 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -641,6 +641,7 @@ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.) */ #define KEY_DATA 0x277 +#define KEY_ONSCREEN_KEYBOARD 0x278 #define BTN_TRIGGER_HAPPY 0x2c0 #define BTN_TRIGGER_HAPPY1 0x2c0 diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index e794f7bee22fd82db5595d047203b871778cbc3e..f561c0eb7d63645f341f3d75604c075d5e534319 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -61,9 +61,14 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * Resolution for main axes (ABS_X, ABS_Y, ABS_Z) is reported in - * units per millimeter (units/mm), resolution for rotational axes - * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) + * is reported in units per millimeter (units/mm), resolution + * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported + * in units per radian. + * When INPUT_PROP_ACCELEROMETER is set the resolution changes. + * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in + * in units per g (units/g) and in units per degree per second + * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). */ struct input_absinfo { __s32 value; diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h index 7b26a62e570734219a9a493c75c9abb909ceef1a..b9095a27a08a55fdf2e029178e1ce472637bd877 100644 --- a/include/uapi/linux/ipmi.h +++ b/include/uapi/linux/ipmi.h @@ -355,7 +355,7 @@ struct ipmi_cmdspec { #define IPMICTL_REGISTER_FOR_CMD _IOR(IPMI_IOC_MAGIC, 14, \ struct ipmi_cmdspec) /* - * Unregister a regsitered command. error values: + * Unregister a registered command. error values: * - EFAULT - an address supplied was invalid. * - ENOENT - The netfn/cmd was not found registered for this user. */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8ef9e75e004ebbf951cb50b4b4b17f2c5b907dd4..2ae59178189d7983fb9ed99ae1cd5f40197cfb04 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -183,6 +183,8 @@ enum { DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_ENHANCED_DAD, DEVCONF_ADDR_GEN_MODE, + DEVCONF_DISABLE_POLICY, + DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, DEVCONF_MAX }; diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 85bbb1799df3f93cb1eacbfec497278c2c20c098..d496c02e14bc44327fd3ab6c3faad0c1d7ac1e12 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,7 +35,7 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 -#define RTF_PCPU 0x40000000 +#define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 diff --git a/include/uapi/linux/isdn/Kbuild b/include/uapi/linux/isdn/Kbuild deleted file mode 100644 index 89e52850bf29c400d72541991b4877a9d499581d..0000000000000000000000000000000000000000 --- a/include/uapi/linux/isdn/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += capicmd.h diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index 86eddd6241f36eb21897d65c6bb51f15bbb4ec58..ef16df06642a93462e1bd5d535ec3119af3adc22 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -60,6 +60,7 @@ #define KEYCTL_INVALIDATE 21 /* invalidate a key */ #define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ #define KEYCTL_DH_COMPUTE 23 /* Compute Diffie-Hellman values */ +#define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */ /* keyctl structures */ struct keyctl_dh_params { @@ -68,4 +69,11 @@ struct keyctl_dh_params { __s32 base; }; +struct keyctl_kdf_params { + char __user *hashname; + char __user *otherinfo; + __u32 otherinfolen; + __u32 __spare[8]; +}; + #endif /* _LINUX_KEYCTL_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f51d5082a377ec1b6f2a062e54fe8d4503702e3b..577429a95ad887ca98dd31d7f54b0a6a6da8b7db 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -702,6 +702,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 +/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ +#define KVM_VM_MIPS_TE 0 +#define KVM_VM_MIPS_VZ 1 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* @@ -883,6 +887,14 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 #define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_MIPS_VZ 137 +#define KVM_CAP_MIPS_TE 138 +#define KVM_CAP_MIPS_64BIT 139 +#define KVM_CAP_S390_GS 140 +#define KVM_CAP_S390_AIS 141 +#define KVM_CAP_SPAPR_TCE_VFIO 142 +#define KVM_CAP_X86_GUEST_MWAIT 143 +#define KVM_CAP_ARM_USER_IRQ 144 #ifdef KVM_CAP_IRQ_ROUTING @@ -1087,6 +1099,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { KVM_DEV_TYPE_FSL_MPIC_20 = 1, @@ -1108,6 +1121,11 @@ enum kvm_device_type { KVM_DEV_TYPE_MAX, }; +struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; +}; + /* * ioctls for VM fds */ @@ -1354,4 +1372,11 @@ struct kvm_assigned_msix_entry { #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +/* Available with KVM_CAP_ARM_USER_IRQ */ + +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + #endif /* __LINUX_KVM_H */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index fd19f36b3129278343f37ee5e59dfa280713eb9c..c8aec4b9e73b8c85189eb9b62fa98cf1fb6e7bfa 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -85,6 +85,10 @@ struct nvm_ioctl_create_conf { }; }; +enum { + NVM_TARGET_FACTORY = 1 << 0, /* Init target in factory mode */ +}; + struct nvm_ioctl_create { char dev[DISK_NAME_LEN]; /* open-channel SSD device */ char tgttype[NVM_TTYPE_NAME_MAX]; /* target type name */ diff --git a/include/uapi/linux/mmc/Kbuild b/include/uapi/linux/mmc/Kbuild deleted file mode 100644 index 8c1d2cb75e330ae605277aa70ec44415a53f3428..0000000000000000000000000000000000000000 --- a/include/uapi/linux/mmc/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += ioctl.h diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h index d80a0498f77ed2d4dac3b61a6eef1906eb8b0626..f5e45095b0bb5c17af6515012587d6d805a7d79c 100644 --- a/include/uapi/linux/mpls_iptunnel.h +++ b/include/uapi/linux/mpls_iptunnel.h @@ -16,11 +16,13 @@ /* MPLS tunnel attributes * [RTA_ENCAP] = { * [MPLS_IPTUNNEL_DST] + * [MPLS_IPTUNNEL_TTL] * } */ enum { MPLS_IPTUNNEL_UNSPEC, MPLS_IPTUNNEL_DST, + MPLS_IPTUNNEL_TTL, __MPLS_IPTUNNEL_MAX, }; #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1) diff --git a/include/uapi/linux/nbd-netlink.h b/include/uapi/linux/nbd-netlink.h new file mode 100644 index 0000000000000000000000000000000000000000..6f7ca3d63a653a113745aa6e1c6fab228c78f610 --- /dev/null +++ b/include/uapi/linux/nbd-netlink.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2017 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#ifndef _UAPILINUX_NBD_NETLINK_H +#define _UAPILINUX_NBD_NETLINK_H + +#define NBD_GENL_FAMILY_NAME "nbd" +#define NBD_GENL_VERSION 0x1 +#define NBD_GENL_MCAST_GROUP_NAME "nbd_mc_group" + +/* Configuration policy attributes, used for CONNECT */ +enum { + NBD_ATTR_UNSPEC, + NBD_ATTR_INDEX, + NBD_ATTR_SIZE_BYTES, + NBD_ATTR_BLOCK_SIZE_BYTES, + NBD_ATTR_TIMEOUT, + NBD_ATTR_SERVER_FLAGS, + NBD_ATTR_CLIENT_FLAGS, + NBD_ATTR_SOCKETS, + NBD_ATTR_DEAD_CONN_TIMEOUT, + NBD_ATTR_DEVICE_LIST, + __NBD_ATTR_MAX, +}; +#define NBD_ATTR_MAX (__NBD_ATTR_MAX - 1) + +/* + * This is the format for multiple devices with NBD_ATTR_DEVICE_LIST + * + * [NBD_ATTR_DEVICE_LIST] + * [NBD_DEVICE_ITEM] + * [NBD_DEVICE_INDEX] + * [NBD_DEVICE_CONNECTED] + */ +enum { + NBD_DEVICE_ITEM_UNSPEC, + NBD_DEVICE_ITEM, + __NBD_DEVICE_ITEM_MAX, +}; +#define NBD_DEVICE_ITEM_MAX (__NBD_DEVICE_ITEM_MAX - 1) + +enum { + NBD_DEVICE_UNSPEC, + NBD_DEVICE_INDEX, + NBD_DEVICE_CONNECTED, + __NBD_DEVICE_MAX, +}; +#define NBD_DEVICE_ATTR_MAX (__NBD_DEVICE_MAX - 1) + +/* + * This is the format for multiple sockets with NBD_ATTR_SOCKETS + * + * [NBD_ATTR_SOCKETS] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + * [NBD_SOCK_ITEM] + * [NBD_SOCK_FD] + */ +enum { + NBD_SOCK_ITEM_UNSPEC, + NBD_SOCK_ITEM, + __NBD_SOCK_ITEM_MAX, +}; +#define NBD_SOCK_ITEM_MAX (__NBD_SOCK_ITEM_MAX - 1) + +enum { + NBD_SOCK_UNSPEC, + NBD_SOCK_FD, + __NBD_SOCK_MAX, +}; +#define NBD_SOCK_MAX (__NBD_SOCK_MAX - 1) + +enum { + NBD_CMD_UNSPEC, + NBD_CMD_CONNECT, + NBD_CMD_DISCONNECT, + NBD_CMD_RECONFIGURE, + NBD_CMD_LINK_DEAD, + NBD_CMD_STATUS, + __NBD_CMD_MAX, +}; +#define NBD_CMD_MAX (__NBD_CMD_MAX - 1) + +#endif /* _UAPILINUX_NBD_NETLINK_H */ diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index c91c642ea9003ed3ec5ba1be70b9899af3847948..155e33f819134a3aecdccf3c139974ef0c54c68c 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -37,7 +37,7 @@ enum { NBD_CMD_TRIM = 4 }; -/* values for flags field */ +/* values for flags field, these are server interaction specific. */ #define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ #define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ #define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ @@ -45,6 +45,10 @@ enum { #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ #define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ +/* These are client behavior specific flags. */ +#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on + disconnect. */ + /* userspace doesn't need the nbd_device structure */ /* These are sent over the network in the request/reply magic fields */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index ede5c6a62164ae182f4495724bf4a518e5b1b93b..7ad3863cb88b5b86766521bcee795e1a794faba3 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -169,6 +169,7 @@ enum { enum { ND_ARS_VOLATILE = 1, ND_ARS_PERSISTENT = 2, + ND_CONFIG_LOCKED = 1, }; static inline const char *nvdimm_bus_cmd_name(unsigned cmd) diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild deleted file mode 100644 index 03f194aeadc51b56d468236c9bba67afbe0b86bd..0000000000000000000000000000000000000000 --- a/include/uapi/linux/netfilter/Kbuild +++ /dev/null @@ -1,89 +0,0 @@ -# UAPI Header export list -header-y += ipset/ -header-y += nf_conntrack_common.h -header-y += nf_conntrack_ftp.h -header-y += nf_conntrack_sctp.h -header-y += nf_conntrack_tcp.h -header-y += nf_conntrack_tuple_common.h -header-y += nf_log.h -header-y += nf_tables.h -header-y += nf_tables_compat.h -header-y += nf_nat.h -header-y += nfnetlink.h -header-y += nfnetlink_acct.h -header-y += nfnetlink_compat.h -header-y += nfnetlink_conntrack.h -header-y += nfnetlink_cthelper.h -header-y += nfnetlink_cttimeout.h -header-y += nfnetlink_log.h -header-y += nfnetlink_queue.h -header-y += x_tables.h -header-y += xt_AUDIT.h -header-y += xt_CHECKSUM.h -header-y += xt_CLASSIFY.h -header-y += xt_CONNMARK.h -header-y += xt_CONNSECMARK.h -header-y += xt_CT.h -header-y += xt_DSCP.h -header-y += xt_HMARK.h -header-y += xt_IDLETIMER.h -header-y += xt_LED.h -header-y += xt_LOG.h -header-y += xt_MARK.h -header-y += xt_NFLOG.h -header-y += xt_NFQUEUE.h -header-y += xt_RATEEST.h -header-y += xt_SECMARK.h -header-y += xt_SYNPROXY.h -header-y += xt_TCPMSS.h -header-y += xt_TCPOPTSTRIP.h -header-y += xt_TEE.h -header-y += xt_TPROXY.h -header-y += xt_addrtype.h -header-y += xt_bpf.h -header-y += xt_cgroup.h -header-y += xt_cluster.h -header-y += xt_comment.h -header-y += xt_connbytes.h -header-y += xt_connlabel.h -header-y += xt_connlimit.h -header-y += xt_connmark.h -header-y += xt_conntrack.h -header-y += xt_cpu.h -header-y += xt_dccp.h -header-y += xt_devgroup.h -header-y += xt_dscp.h -header-y += xt_ecn.h -header-y += xt_esp.h -header-y += xt_hashlimit.h -header-y += xt_helper.h -header-y += xt_ipcomp.h -header-y += xt_iprange.h -header-y += xt_ipvs.h -header-y += xt_l2tp.h -header-y += xt_length.h -header-y += xt_limit.h -header-y += xt_mac.h -header-y += xt_mark.h -header-y += xt_multiport.h -header-y += xt_nfacct.h -header-y += xt_osf.h -header-y += xt_owner.h -header-y += xt_physdev.h -header-y += xt_pkttype.h -header-y += xt_policy.h -header-y += xt_quota.h -header-y += xt_rateest.h -header-y += xt_realm.h -header-y += xt_recent.h -header-y += xt_rpfilter.h -header-y += xt_sctp.h -header-y += xt_set.h -header-y += xt_socket.h -header-y += xt_state.h -header-y += xt_statistic.h -header-y += xt_string.h -header-y += xt_tcpmss.h -header-y += xt_tcpudp.h -header-y += xt_time.h -header-y += xt_u32.h diff --git a/include/uapi/linux/netfilter/ipset/Kbuild b/include/uapi/linux/netfilter/ipset/Kbuild deleted file mode 100644 index d2680423d9abbe0a3e4bd755f0c27e4eb66dacad..0000000000000000000000000000000000000000 --- a/include/uapi/linux/netfilter/ipset/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -# UAPI Header export list -header-y += ip_set.h -header-y += ip_set_bitmap.h -header-y += ip_set_hash.h -header-y += ip_set_list.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6a8e33dd4ecbd83b0d254d4cf2486dfe5cacfd34..dc947e59d03a62c5dd6fccf27c1db0a88270ea4f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -28,12 +28,14 @@ enum ip_conntrack_info { /* only for userspace compatibility */ #ifndef __KERNEL__ IP_CT_NEW_REPLY = IP_CT_NUMBER, +#else + IP_CT_UNTRACKED = 7, #endif }; #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) /* Bitset representing status of connection. */ enum ip_conntrack_status { @@ -82,10 +84,6 @@ enum ip_conntrack_status { IPS_DYING_BIT = 9, IPS_DYING = (1 << IPS_DYING_BIT), - /* Bits that cannot be altered from userland. */ - IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | - IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING), - /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), @@ -94,13 +92,22 @@ enum ip_conntrack_status { IPS_TEMPLATE_BIT = 11, IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), - /* Conntrack is a fake untracked entry */ + /* Conntrack is a fake untracked entry. Obsolete and not used anymore */ IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), /* Conntrack got a helper explicitly attached via CT target. */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), + + /* Be careful here, modifying these bits can make things messy, + * so don't let users modify them directly. + */ + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | + IPS_SEQ_ADJUST | IPS_TEMPLATE), + + __IPS_MAX_BIT = 14, }; /* Connection tracking event types */ @@ -117,6 +124,9 @@ enum ip_conntrack_events { IPCT_NATSEQADJ = IPCT_SEQADJ, IPCT_SECMARK, /* new security mark has been set */ IPCT_LABEL, /* new connlabel has been set */ +#ifdef __KERNEL__ + __IPCT_MAX +#endif }; enum ip_conntrack_expect_events { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 05215d30fe5c9853b7871e799ccdce4878a04ef1..683f6f88fcacefa0898e3898cd75f31422fc0f9a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -815,6 +815,17 @@ enum nft_rt_keys { NFT_RT_NEXTHOP6, }; +/** + * enum nft_hash_types - nf_tables hash expression types + * + * @NFT_HASH_JENKINS: Jenkins Hash + * @NFT_HASH_SYM: Symmetric Hash + */ +enum nft_hash_types { + NFT_HASH_JENKINS, + NFT_HASH_SYM, +}; + /** * enum nft_hash_attributes - nf_tables hash expression netlink attributes * @@ -824,6 +835,7 @@ enum nft_rt_keys { * @NFTA_HASH_MODULUS: modulus value (NLA_U32) * @NFTA_HASH_SEED: seed value (NLA_U32) * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) + * @NFTA_HASH_TYPE: hash operation (NLA_U32: nft_hash_types) */ enum nft_hash_attributes { NFTA_HASH_UNSPEC, @@ -833,6 +845,7 @@ enum nft_hash_attributes { NFTA_HASH_MODULUS, NFTA_HASH_SEED, NFTA_HASH_OFFSET, + NFTA_HASH_TYPE, __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) @@ -888,6 +901,7 @@ enum nft_rt_attributes { * @NFT_CT_BYTES: conntrack bytes * @NFT_CT_AVGPKT: conntrack average bytes per packet * @NFT_CT_ZONE: conntrack zone + * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack */ enum nft_ct_keys { NFT_CT_STATE, @@ -908,6 +922,7 @@ enum nft_ct_keys { NFT_CT_BYTES, NFT_CT_AVGPKT, NFT_CT_ZONE, + NFT_CT_EVENTMASK, }; /** @@ -1244,12 +1259,23 @@ enum nft_fib_flags { NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ + NFTA_FIB_F_PRESENT = 1 << 5, /* check existence only */ +}; + +enum nft_ct_helper_attributes { + NFTA_CT_HELPER_UNSPEC, + NFTA_CT_HELPER_NAME, + NFTA_CT_HELPER_L3PROTO, + NFTA_CT_HELPER_L4PROTO, + __NFTA_CT_HELPER_MAX, }; +#define NFTA_CT_HELPER_MAX (__NFTA_CT_HELPER_MAX - 1) #define NFT_OBJECT_UNSPEC 0 #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 -#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_CT_HELPER 3 +#define __NFT_OBJECT_MAX 4 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** diff --git a/include/uapi/linux/netfilter_arp/Kbuild b/include/uapi/linux/netfilter_arp/Kbuild deleted file mode 100644 index 62d5637cc0ac5c40cbca78d70b1fb3184cd31427..0000000000000000000000000000000000000000 --- a/include/uapi/linux/netfilter_arp/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# UAPI Header export list -header-y += arp_tables.h -header-y += arpt_mangle.h diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild deleted file mode 100644 index 0fbad8ef96de9ecd00262800e4dda07564bbb13b..0000000000000000000000000000000000000000 --- a/include/uapi/linux/netfilter_bridge/Kbuild +++ /dev/null @@ -1,18 +0,0 @@ -# UAPI Header export list -header-y += ebt_802_3.h -header-y += ebt_among.h -header-y += ebt_arp.h -header-y += ebt_arpreply.h -header-y += ebt_ip.h -header-y += ebt_ip6.h -header-y += ebt_limit.h -header-y += ebt_log.h -header-y += ebt_mark_m.h -header-y += ebt_mark_t.h -header-y += ebt_nat.h -header-y += ebt_nflog.h -header-y += ebt_pkttype.h -header-y += ebt_redirect.h -header-y += ebt_stp.h -header-y += ebt_vlan.h -header-y += ebtables.h diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild deleted file mode 100644 index ecb291df390e584a756e057fbfc8baba9f17f63d..0000000000000000000000000000000000000000 --- a/include/uapi/linux/netfilter_ipv4/Kbuild +++ /dev/null @@ -1,10 +0,0 @@ -# UAPI Header export list -header-y += ip_tables.h -header-y += ipt_CLUSTERIP.h -header-y += ipt_ECN.h -header-y += ipt_LOG.h -header-y += ipt_REJECT.h -header-y += ipt_TTL.h -header-y += ipt_ah.h -header-y += ipt_ecn.h -header-y += ipt_ttl.h diff --git a/include/uapi/linux/netfilter_ipv6/Kbuild b/include/uapi/linux/netfilter_ipv6/Kbuild deleted file mode 100644 index 75a668ca2353ad9abfa07bbbd6e515aa49ec37fb..0000000000000000000000000000000000000000 --- a/include/uapi/linux/netfilter_ipv6/Kbuild +++ /dev/null @@ -1,13 +0,0 @@ -# UAPI Header export list -header-y += ip6_tables.h -header-y += ip6t_HL.h -header-y += ip6t_LOG.h -header-y += ip6t_NPT.h -header-y += ip6t_REJECT.h -header-y += ip6t_ah.h -header-y += ip6t_frag.h -header-y += ip6t_hl.h -header-y += ip6t_ipv6header.h -header-y += ip6t_mh.h -header-y += ip6t_opts.h -header-y += ip6t_rt.h diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f3946a27bd07d5164fac6964785c86044f031790..f86127a46cfc80697df711c847d6a07b5a722347 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -50,12 +50,12 @@ struct nlmsghdr { /* Flags values */ -#define NLM_F_REQUEST 1 /* It is request message. */ -#define NLM_F_MULTI 2 /* Multipart message, terminated by NLMSG_DONE */ -#define NLM_F_ACK 4 /* Reply with ack, with zero or error code */ -#define NLM_F_ECHO 8 /* Echo this request */ -#define NLM_F_DUMP_INTR 16 /* Dump was inconsistent due to sequence change */ -#define NLM_F_DUMP_FILTERED 32 /* Dump was filtered as requested */ +#define NLM_F_REQUEST 0x01 /* It is request message. */ +#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ +#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ +#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ +#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ /* Modifiers to GET request */ #define NLM_F_ROOT 0x100 /* specify tree root */ @@ -69,6 +69,10 @@ struct nlmsghdr { #define NLM_F_CREATE 0x400 /* Create, if it does not exist */ #define NLM_F_APPEND 0x800 /* Add to end of list */ +/* Flags for ACK message */ +#define NLM_F_CAPPED 0x100 /* request was capped */ +#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ + /* 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL 4.4BSD CHANGE NLM_F_REPLACE @@ -101,6 +105,37 @@ struct nlmsghdr { struct nlmsgerr { int error; struct nlmsghdr msg; + /* + * followed by the message contents unless NETLINK_CAP_ACK was set + * or the ACK indicates success (error == 0) + * message length is aligned with NLMSG_ALIGN() + */ + /* + * followed by TLVs defined in enum nlmsgerr_attrs + * if NETLINK_EXT_ACK was set + */ +}; + +/** + * enum nlmsgerr_attrs - nlmsgerr attributes + * @NLMSGERR_ATTR_UNUSED: unused + * @NLMSGERR_ATTR_MSG: error message string (string) + * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original + * message, counting from the beginning of the header (u32) + * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to + * be used - in the success case - to identify a created + * object or operation or similar (binary) + * @__NLMSGERR_ATTR_MAX: number of attributes + * @NLMSGERR_ATTR_MAX: highest attribute number + */ +enum nlmsgerr_attrs { + NLMSGERR_ATTR_UNUSED, + NLMSGERR_ATTR_MSG, + NLMSGERR_ATTR_OFFS, + NLMSGERR_ATTR_COOKIE, + + __NLMSGERR_ATTR_MAX, + NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 }; #define NETLINK_ADD_MEMBERSHIP 1 @@ -115,6 +150,7 @@ struct nlmsgerr { #define NETLINK_LISTEN_ALL_NSID 8 #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 +#define NETLINK_EXT_ACK 11 struct nl_pktinfo { __u32 group; diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index 76b4d87c83a863dc7b0e809f5ab318b9656d1dfb..6dcd4de3397b393eb64c9da83e898484af65fda9 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -38,6 +38,7 @@ enum { NETLINK_DIAG_GROUPS, NETLINK_DIAG_RX_RING, NETLINK_DIAG_TX_RING, + NETLINK_DIAG_FLAGS, __NETLINK_DIAG_MAX, }; @@ -52,5 +53,14 @@ enum { /* deprecated since 4.6 */ #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ #endif +#define NDIAG_SHOW_FLAGS 0x00000008 /* show flags of a netlink socket */ + +/* flags */ +#define NDIAG_FLAG_CB_RUNNING 0x00000001 +#define NDIAG_FLAG_PKTINFO 0x00000002 +#define NDIAG_FLAG_BROADCAST_ERROR 0x00000004 +#define NDIAG_FLAG_NO_ENOBUFS 0x00000008 +#define NDIAG_FLAG_LISTEN_ALL_NSID 0x00000010 +#define NDIAG_FLAG_CAP_ACK 0x00000020 #endif diff --git a/include/uapi/linux/nfsd/Kbuild b/include/uapi/linux/nfsd/Kbuild deleted file mode 100644 index c11bc404053c121fc82604514fd24e6506e1717f..0000000000000000000000000000000000000000 --- a/include/uapi/linux/nfsd/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -# UAPI Header export list -header-y += cld.h -header-y += debug.h -header-y += export.h -header-y += nfsfh.h -header-y += stats.h diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h index f14a9ab06f1f705677a797883bf26a431f1b9c65..ec260274be0ced9fd057cb2d3f78c9c7b6b622e9 100644 --- a/include/uapi/linux/nfsd/cld.h +++ b/include/uapi/linux/nfsd/cld.h @@ -22,6 +22,8 @@ #ifndef _NFSD_CLD_H #define _NFSD_CLD_H +#include + /* latest upcall version available */ #define CLD_UPCALL_VERSION 1 @@ -37,18 +39,18 @@ enum cld_command { /* representation of long-form NFSv4 client ID */ struct cld_name { - uint16_t cn_len; /* length of cm_id */ + __u16 cn_len; /* length of cm_id */ unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ } __attribute__((packed)); /* message struct for communication with userspace */ struct cld_msg { - uint8_t cm_vers; /* upcall version */ - uint8_t cm_cmd; /* upcall command */ - int16_t cm_status; /* return code */ - uint32_t cm_xid; /* transaction id */ + __u8 cm_vers; /* upcall version */ + __u8 cm_cmd; /* upcall command */ + __s16 cm_status; /* return code */ + __u32 cm_xid; /* transaction id */ union { - int64_t cm_gracetime; /* grace period start time */ + __s64 cm_gracetime; /* grace period start time */ struct cld_name cm_name; } __attribute__((packed)) cm_u; } __attribute__((packed)); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5ed257c4cd4eaad8fcbd8dbde4ca88f385629271..b8c44b98f12d4c42d4f88a73049da962be2678b2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -172,6 +172,42 @@ * Multiple such rules can be created. */ +/** + * DOC: FILS shared key authentication offload + * + * FILS shared key authentication offload can be advertized by drivers by + * setting @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD flag. The drivers that support + * FILS shared key authentication offload should be able to construct the + * authentication and association frames for FILS shared key authentication and + * eventually do a key derivation as per IEEE 802.11ai. The below additional + * parameters should be given to driver in %NL80211_CMD_CONNECT. + * %NL80211_ATTR_FILS_ERP_USERNAME - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_REALM - used to construct keyname_nai + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used to construct erp message + * %NL80211_ATTR_FILS_ERP_RRK - used to generate the rIK and rMSK + * rIK should be used to generate an authentication tag on the ERP message and + * rMSK should be used to derive a PMKSA. + * rIK, rMSK should be generated and keyname_nai, sequence number should be used + * as specified in IETF RFC 6696. + * + * When FILS shared key authentication is completed, driver needs to provide the + * below additional parameters to userspace. + * %NL80211_ATTR_FILS_KEK - used for key renewal + * %NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM - used in further EAP-RP exchanges + * %NL80211_ATTR_PMKID - used to identify the PMKSA used/generated + * %Nl80211_ATTR_PMK - used to update PMKSA cache in userspace + * The PMKSA can be maintained in userspace persistently so that it can be used + * later after reboots or wifi turn off/on also. + * + * %NL80211_ATTR_FILS_CACHE_ID is the cache identifier advertized by a FILS + * capable AP supporting PMK caching. It specifies the scope within which the + * PMKSAs are cached in an ESS. %NL80211_CMD_SET_PMKSA and + * %NL80211_CMD_DEL_PMKSA are enhanced to allow support for PMKSA caching based + * on FILS cache identifier. Additionally %NL80211_ATTR_PMK is used with + * %NL80211_SET_PMKSA to specify the PMK corresponding to a PMKSA for driver to + * use in a FILS shared key connection with PMKSA caching. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -351,7 +387,9 @@ * are used. Extra IEs can also be passed from the userspace by * using the %NL80211_ATTR_IE attribute. The first cycle of the * scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY - * is supplied. + * is supplied. If the device supports multiple concurrent scheduled + * scans, it will allow such when the caller provides the flag attribute + * %NL80211_ATTR_SCHED_SCAN_MULTI to indicate user-space support for it. * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if * scheduled scan is not running. The caller may assume that as soon * as the call returns, it is safe to start a new scheduled scan again. @@ -370,10 +408,18 @@ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) * - * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry, using %NL80211_ATTR_MAC - * (for the BSSID) and %NL80211_ATTR_PMKID. + * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry using %NL80211_ATTR_MAC + * (for the BSSID), %NL80211_ATTR_PMKID, and optionally %NL80211_ATTR_PMK + * (PMK is used for PTKSA derivation in case of FILS shared key offload) or + * using %NL80211_ATTR_SSID, %NL80211_ATTR_FILS_CACHE_ID, + * %NL80211_ATTR_PMKID, and %NL80211_ATTR_PMK in case of FILS + * authentication where %NL80211_ATTR_FILS_CACHE_ID is the identifier + * advertized by a FILS capable AP identifying the scope of PMKSA in an + * ESS. * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC - * (for the BSSID) and %NL80211_ATTR_PMKID. + * (for the BSSID) and %NL80211_ATTR_PMKID or using %NL80211_ATTR_SSID, + * %NL80211_ATTR_FILS_CACHE_ID, and %NL80211_ATTR_PMKID in case of FILS + * authentication. * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain @@ -2012,6 +2058,36 @@ enum nl80211_commands { * u32 attribute with an &enum nl80211_timeout_reason value. This is used, * e.g., with %NL80211_CMD_CONNECT event. * + * @NL80211_ATTR_FILS_ERP_USERNAME: EAP Re-authentication Protocol (ERP) + * username part of NAI used to refer keys rRK and rIK. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_REALM: EAP Re-authentication Protocol (ERP) realm part + * of NAI specifying the domain name of the ER server. This is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM: Unsigned 16-bit ERP next sequence number + * to use in ERP messages. This is used in generating the FILS wrapped data + * for FILS authentication and is used with %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_ERP_RRK: ERP re-authentication Root Key (rRK) for the + * NAI specified by %NL80211_ATTR_FILS_ERP_USERNAME and + * %NL80211_ATTR_FILS_ERP_REALM. This is used for generating rIK and rMSK + * from successful FILS authentication and is used with + * %NL80211_CMD_CONNECT. + * + * @NL80211_ATTR_FILS_CACHE_ID: A 2-octet identifier advertized by a FILS AP + * identifying the scope of PMKSAs. This is used with + * @NL80211_CMD_SET_PMKSA and @NL80211_CMD_DEL_PMKSA. + * + * @NL80211_ATTR_PMK: PMK for the PMKSA identified by %NL80211_ATTR_PMKID. + * This is used with @NL80211_CMD_SET_PMKSA. + * + * @NL80211_ATTR_SCHED_SCAN_MULTI: flag attribute which user-space shall use to + * indicate that it supports multiple active scheduled scan requests. + * @NL80211_ATTR_SCHED_SCAN_MAX_REQS: indicates maximum number of scheduled + * scan request that may be active for the device (u32). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2423,6 +2499,17 @@ enum nl80211_attrs { NL80211_ATTR_TIMEOUT_REASON, + NL80211_ATTR_FILS_ERP_USERNAME, + NL80211_ATTR_FILS_ERP_REALM, + NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, + NL80211_ATTR_FILS_ERP_RRK, + NL80211_ATTR_FILS_CACHE_ID, + + NL80211_ATTR_PMK, + + NL80211_ATTR_SCHED_SCAN_MULTI, + NL80211_ATTR_SCHED_SCAN_MAX_REQS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3107,6 +3194,7 @@ enum nl80211_reg_rule_attr { * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching, * only report BSS with matching SSID. + * (This cannot be used together with BSSID.) * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a * BSS in scan results. Filtering is turned off if not specified. Note that * if this attribute is in a match set of its own, then it is treated as @@ -3122,6 +3210,8 @@ enum nl80211_reg_rule_attr { * BSS-es in the specified band is to be adjusted before doing * RSSI-based BSS selection. The attribute value is a packed structure * value as specified by &struct nl80211_bss_select_rssi_adjust. + * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching + * (this cannot be used together with SSID). * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -3133,6 +3223,7 @@ enum nl80211_sched_scan_match_attr { NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, + NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, @@ -3942,7 +4033,10 @@ enum nl80211_ps_state { * @__NL80211_ATTR_CQM_INVALID: invalid * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies * the threshold for the RSSI level at which an event will be sent. Zero - * to disable. + * to disable. Alternatively, if %NL80211_EXT_FEATURE_CQM_RSSI_LIST is + * set, multiple values can be supplied as a low-to-high sorted array of + * threshold values in dBm. Events will be sent when the RSSI value + * crosses any of the thresholds. * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies * the minimum amount the RSSI level must change after an event before a * new event may be issued (to reduce effects of RSSI oscillation). @@ -4753,6 +4847,11 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI: The driver supports sched_scan * for reporting BSSs with better RSSI than the current connected BSS * (%NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI). + * @NL80211_EXT_FEATURE_CQM_RSSI_LIST: With this driver the + * %NL80211_ATTR_CQM_RSSI_THOLD attribute accepts a list of zero or more + * RSSI threshold values to monitor rather than exactly one threshold. + * @NL80211_EXT_FEATURE_FILS_SK_OFFLOAD: Driver SME supports FILS shared key + * authentication with %NL80211_CMD_CONNECT. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4771,6 +4870,8 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED, NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI, + NL80211_EXT_FEATURE_CQM_RSSI_LIST, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -4906,12 +5007,17 @@ enum nl80211_smps_mode { * change to the channel status. * @NL80211_RADAR_NOP_FINISHED: The Non-Occupancy Period for this channel is * over, channel becomes usable. + * @NL80211_RADAR_PRE_CAC_EXPIRED: Channel Availability Check done on this + * non-operating channel is expired and no longer valid. New CAC must + * be done on this channel before starting the operation. This is not + * applicable for ETSI dfs domain where pre-CAC is valid for ever. */ enum nl80211_radar_event { NL80211_RADAR_DETECTED, NL80211_RADAR_CAC_FINISHED, NL80211_RADAR_CAC_ABORTED, NL80211_RADAR_NOP_FINISHED, + NL80211_RADAR_PRE_CAC_EXPIRED, }; /** diff --git a/include/uapi/linux/nubus.h b/include/uapi/linux/nubus.h index 77513d2b5638ec9ff6d21272b203927041575e32..ac516064f0ee7882227520a201067e91c648c8e4 100644 --- a/include/uapi/linux/nubus.h +++ b/include/uapi/linux/nubus.h @@ -113,13 +113,15 @@ enum nubus_drhw { NUBUS_DRHW_SIGMA_CLRMAX = 0x0007, /* Sigma Design ColorMax */ NUBUS_DRHW_APPLE_SE30 = 0x0009, /* Apple SE/30 video */ NUBUS_DRHW_APPLE_HRVC = 0x0013, /* Mac II High-Res Video Card */ + NUBUS_DRHW_APPLE_MVC = 0x0014, /* Mac II Monochrome Video Card */ NUBUS_DRHW_APPLE_PVC = 0x0017, /* Mac II Portrait Video Card */ NUBUS_DRHW_APPLE_RBV1 = 0x0018, /* IIci RBV video */ NUBUS_DRHW_APPLE_MDC = 0x0019, /* Macintosh Display Card */ + NUBUS_DRHW_APPLE_VSC = 0x0020, /* Duo MiniDock ViSC framebuffer */ NUBUS_DRHW_APPLE_SONORA = 0x0022, /* Sonora built-in video */ + NUBUS_DRHW_APPLE_JET = 0x0029, /* Jet framebuffer (DuoDock) */ NUBUS_DRHW_APPLE_24AC = 0x002b, /* Mac 24AC Video Card */ NUBUS_DRHW_APPLE_VALKYRIE = 0x002e, - NUBUS_DRHW_APPLE_JET = 0x0029, /* Jet framebuffer (DuoDock) */ NUBUS_DRHW_SMAC_GFX = 0x0105, /* SuperMac GFX */ NUBUS_DRHW_RASTER_CB264 = 0x013B, /* RasterOps ColorBoard 264 */ NUBUS_DRHW_MICRON_XCEED = 0x0146, /* Micron Exceed color */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7f41f7d0000f9f0ee36c274d88ad0d330fa8f5d6..156ee4cab82e5e73bc2705a4707c04c1cf788b98 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -343,6 +343,7 @@ enum ovs_key_attr { #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) enum ovs_tunnel_key_attr { + /* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */ OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ @@ -578,10 +579,25 @@ enum ovs_sample_attr { OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ __OVS_SAMPLE_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_SAMPLE_ATTR_ARG /* struct sample_arg */ +#endif }; #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) +#ifdef __KERNEL__ +struct sample_arg { + bool exec; /* When true, actions in sample will not + * change flow keys. False otherwise. + */ + u32 probability; /* Same value as + * 'OVS_SAMPLE_ATTR_PROBABILITY'. + */ +}; +#endif + /** * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION @@ -678,6 +694,17 @@ struct ovs_action_hash { * nothing if the connection is already committed will check that the current * packet is in conntrack entry's original direction. If directionality does * not match, will delete the existing conntrack entry and commit a new one. + * @OVS_CT_ATTR_EVENTMASK: Mask of bits indicating which conntrack event types + * (enum ip_conntrack_events IPCT_*) should be reported. For any bit set to + * zero, the corresponding event type is not generated. Default behavior + * depends on system configuration, but typically all event types are + * generated, hence listening on NFNLGRP_CONNTRACK_UPDATE events may get a lot + * of events. Explicitly passing this attribute allows limiting the updates + * received to the events of interest. The bit 1 << IPCT_NEW, 1 << + * IPCT_RELATED, and 1 << IPCT_DESTROY must be set to ones for those events to + * be received on NFNLGRP_CONNTRACK_NEW and NFNLGRP_CONNTRACK_DESTROY groups, + * respectively. Remaining bits control the changes for which an event is + * delivered on the NFNLGRP_CONNTRACK_UPDATE group. */ enum ovs_ct_attr { OVS_CT_ATTR_UNSPEC, @@ -689,6 +716,7 @@ enum ovs_ct_attr { related connections. */ OVS_CT_ATTR_NAT, /* Nested OVS_NAT_ATTR_* */ OVS_CT_ATTR_FORCE_COMMIT, /* No argument */ + OVS_CT_ATTR_EVENTMASK, /* u32 mask of IPCT_* events. */ __OVS_CT_ATTR_MAX }; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 634c9c44ed6cb2173174efab81b0826f28c2dcd3..d56bb005100949516c0ab59e964bf1c135899bb2 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -114,7 +114,7 @@ #define PCI_SUBSYSTEM_ID 0x2e #define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ #define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) +#define PCI_ROM_ADDRESS_MASK (~0x7ffU) #define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ @@ -630,6 +630,7 @@ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ #define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */ #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */ #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */ #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */ diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h new file mode 100644 index 0000000000000000000000000000000000000000..a6aa10c45ad169684923f7b9efcc53345a45b98d --- /dev/null +++ b/include/uapi/linux/pcitest.h @@ -0,0 +1,19 @@ +/** + * pcitest.h - PCI test uapi defines + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + */ + +#ifndef __UAPI_LINUX_PCITEST_H +#define __UAPI_LINUX_PCITEST_H + +#define PCITEST_BAR _IO('P', 0x1) +#define PCITEST_LEGACY_IRQ _IO('P', 0x2) +#define PCITEST_MSI _IOW('P', 0x3, int) +#define PCITEST_WRITE _IOW('P', 0x4, unsigned long) +#define PCITEST_READ _IOW('P', 0x5, unsigned long) +#define PCITEST_COPY _IOW('P', 0x6, unsigned long) + +#endif /* __UAPI_LINUX_PCITEST_H */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485a24ac81e324ea53ea17b405a3c73b520a..b1c0b187acfe57da3ece7e91325536edcc9cff0c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -885,12 +915,14 @@ enum perf_callchain_context { */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ #define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { @@ -902,6 +934,21 @@ union perf_mem_data_src { mem_rsvd:31; }; }; +#elif defined(__BIG_ENDIAN_BITFIELD) +union perf_mem_data_src { + __u64 val; + struct { + __u64 mem_rsvd:31, + mem_dtlb:7, /* tlb access */ + mem_lock:2, /* lock instr */ + mem_snoop:5, /* snoop mode */ + mem_lvl:14, /* memory hierarchy level */ + mem_op:5; /* type of opcode */ + }; +}; +#else +#error "Unknown endianness" +#endif /* type of opcode (load/store/prefetch,code) */ #define PERF_MEM_OP_NA 0x01 /* not available */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7a69f2a4ca0c06a68487ff382c6b84f8acab323b..d613be3b3239e476d728ffa323347ef42e97eaec 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -37,7 +37,20 @@ enum { #define TC_ACT_QUEUED 5 #define TC_ACT_REPEAT 6 #define TC_ACT_REDIRECT 7 -#define TC_ACT_JUMP 0x10000000 + +/* There is a special kind of actions called "extended actions", + * which need a value parameter. These have a local opcode located in + * the highest nibble, starting from 1. The rest of the bits + * are used to carry the value. These two parts together make + * a combined opcode. + */ +#define __TC_ACT_EXT_SHIFT 28 +#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT) +#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1) +#define TC_ACT_EXT_CMP(combined, opcode) \ + (((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode) + +#define TC_ACT_JUMP __TC_ACT_EXT(1) /* Action type identifiers*/ enum { @@ -432,6 +445,11 @@ enum { TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index df7451d351311cd915fc96752fcaeb0a43b5d112..099bf5528fed30008bfbde3529315be35c0411f9 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -617,6 +617,14 @@ struct tc_drr_stats { #define TC_QOPT_BITMASK 15 #define TC_QOPT_MAX_QUEUE 16 +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; diff --git a/include/uapi/linux/pps.h b/include/uapi/linux/pps.h index a9bb1d93451aeb6c5b73ba4c1bd46db053478564..c1cb3825a8bc805e804f88c4e759e77716c6efef 100644 --- a/include/uapi/linux/pps.h +++ b/include/uapi/linux/pps.h @@ -55,6 +55,12 @@ struct pps_ktime { __s32 nsec; __u32 flags; }; + +struct pps_ktime_compat { + __s64 sec; + __s32 nsec; + __u32 flags; +} __attribute__((packed, aligned(4))); #define PPS_TIME_INVALID (1<<0) /* used to specify timeout==NULL */ struct pps_kinfo { @@ -65,6 +71,14 @@ struct pps_kinfo { int current_mode; /* current mode bits */ }; +struct pps_kinfo_compat { + __u32 assert_sequence; /* seq. num. of assert event */ + __u32 clear_sequence; /* seq. num. of clear event */ + struct pps_ktime_compat assert_tu; /* time of assert event */ + struct pps_ktime_compat clear_tu; /* time of clear event */ + int current_mode; /* current mode bits */ +}; + struct pps_kparams { int api_version; /* API version # */ int mode; /* mode bits */ @@ -114,6 +128,11 @@ struct pps_fdata { struct pps_ktime timeout; }; +struct pps_fdata_compat { + struct pps_kinfo_compat info; + struct pps_ktime_compat timeout; +}; + struct pps_bind_args { int tsformat; /* format of time stamps */ int edge; /* selected event type */ diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index 57d7c0f916b6f8307d318bb2f949da7723ae0cf8..645ef3cf3dd08a7d0494cfb509a30c42c3b053c2 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -1,6 +1,8 @@ #ifndef _UAPI_PR_H #define _UAPI_PR_H +#include + enum pr_type { PR_WRITE_EXCLUSIVE = 1, PR_EXCLUSIVE_ACCESS = 2, diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h index 66c0748d26e2b3e9412424bad66011b23ec0f876..9d76c566f66e83b67d4abdc295731031e2362377 100644 --- a/include/uapi/linux/qrtr.h +++ b/include/uapi/linux/qrtr.h @@ -2,6 +2,7 @@ #define _LINUX_QRTR_H #include +#include struct sockaddr_qrtr { __kernel_sa_family_t sq_family; diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild deleted file mode 100644 index e2c3d25405d7e20de2dc708088b0399350bd861f..0000000000000000000000000000000000000000 --- a/include/uapi/linux/raid/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# UAPI Header export list -header-y += md_p.h -header-y += md_u.h diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 9930f3e9040f76ebc09bfb9446f4399f9e96b98c..d500bd224979d923f15732bd19a5bcde11c8f91f 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -242,10 +242,18 @@ struct mdp_superblock_1 { __le32 chunksize; /* in 512byte sectors */ __le32 raid_disks; - __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts - * NOTE: signed, so bitmap can be before superblock - * only meaningful of feature_map[0] is set. - */ + union { + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ + + /* only meaningful when feature_map[MD_FEATURE_PPL] is set */ + struct { + __le16 offset; /* sectors from start of superblock that ppl starts (signed) */ + __le16 size; /* ppl size in sectors */ + } ppl; + }; /* These are only valid with feature bit '4' */ __le32 new_level; /* new level we are reshaping to */ @@ -318,6 +326,7 @@ struct mdp_superblock_1 { */ #define MD_FEATURE_CLUSTERED 256 /* clustered MD */ #define MD_FEATURE_JOURNAL 512 /* support write cache */ +#define MD_FEATURE_PPL 1024 /* support PPL */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ @@ -328,6 +337,7 @@ struct mdp_superblock_1 { |MD_FEATURE_RECOVERY_BITMAP \ |MD_FEATURE_CLUSTERED \ |MD_FEATURE_JOURNAL \ + |MD_FEATURE_PPL \ ) struct r5l_payload_header { @@ -388,4 +398,31 @@ struct r5l_meta_block { #define R5LOG_VERSION 0x1 #define R5LOG_MAGIC 0x6433c509 + +struct ppl_header_entry { + __le64 data_sector; /* raid sector of the new data */ + __le32 pp_size; /* length of partial parity */ + __le32 data_size; /* length of data */ + __le32 parity_disk; /* member disk containing parity */ + __le32 checksum; /* checksum of partial parity data for this + * entry (~crc32c) */ +} __attribute__ ((__packed__)); + +#define PPL_HEADER_SIZE 4096 +#define PPL_HDR_RESERVED 512 +#define PPL_HDR_ENTRY_SPACE \ + (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 4 * sizeof(__le32) - sizeof(__le64)) +#define PPL_HDR_MAX_ENTRIES \ + (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry)) + +struct ppl_header { + __u8 reserved[PPL_HDR_RESERVED];/* reserved space, fill with 0xff */ + __le32 signature; /* signature (family number of volume) */ + __le32 padding; /* zero pad */ + __le64 generation; /* generation number of the header */ + __le32 entries_count; /* number of entries in entry array */ + __le32 checksum; /* checksum of the header (~crc32c) */ + struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES]; +} __attribute__ ((__packed__)); + #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 6546917d605a916bfd5a905e30eb05d68fd6ad6b..cce061382e4073d4fe8296a00f22eba42cf13818 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -122,6 +122,8 @@ enum { RTM_NEWNETCONF = 80, #define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF @@ -319,6 +321,7 @@ enum rtattr_type_t { RTA_EXPIRES, RTA_PAD, RTA_UID, + RTA_TTL_PROPAGATE, __RTA_MAX }; @@ -545,6 +548,7 @@ enum { TCA_STATS2, TCA_STAB, TCA_PAD, + TCA_DUMP_INVISIBLE, __TCA_MAX }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d3ae381fcf3327489c82e2f47eb39a363ec030c7..ced9d8b974268ed270661c3e2da77165e3a24784 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -115,6 +115,8 @@ typedef __s32 sctp_assoc_t; #define SCTP_PR_SUPPORTED 113 #define SCTP_DEFAULT_PRINFO 114 #define SCTP_PR_ASSOC_STATUS 115 +#define SCTP_PR_STREAM_STATUS 116 +#define SCTP_RECONFIG_SUPPORTED 117 #define SCTP_ENABLE_STREAM_RESET 118 #define SCTP_RESET_STREAMS 119 #define SCTP_RESET_ASSOC 120 @@ -502,6 +504,28 @@ struct sctp_stream_reset_event { __u16 strreset_stream_list[]; }; +#define SCTP_ASSOC_RESET_DENIED 0x0004 +#define SCTP_ASSOC_RESET_FAILED 0x0008 +struct sctp_assoc_reset_event { + __u16 assocreset_type; + __u16 assocreset_flags; + __u32 assocreset_length; + sctp_assoc_t assocreset_assoc_id; + __u32 assocreset_local_tsn; + __u32 assocreset_remote_tsn; +}; + +#define SCTP_ASSOC_CHANGE_DENIED 0x0004 +#define SCTP_ASSOC_CHANGE_FAILED 0x0008 +struct sctp_stream_change_event { + __u16 strchange_type; + __u16 strchange_flags; + __u32 strchange_length; + sctp_assoc_t strchange_assoc_id; + __u16 strchange_instrms; + __u16 strchange_outstrms; +}; + /* * Described in Section 7.3 * Ancillary Data and Notification Interest Options @@ -518,6 +542,8 @@ struct sctp_event_subscribe { __u8 sctp_authentication_event; __u8 sctp_sender_dry_event; __u8 sctp_stream_reset_event; + __u8 sctp_assoc_reset_event; + __u8 sctp_stream_change_event; }; /* @@ -543,6 +569,8 @@ union sctp_notification { struct sctp_authkey_event sn_authkey_event; struct sctp_sender_dry_event sn_sender_dry_event; struct sctp_stream_reset_event sn_strreset_event; + struct sctp_assoc_reset_event sn_assocreset_event; + struct sctp_stream_change_event sn_strchange_event; }; /* Section 5.3.1 @@ -572,6 +600,10 @@ enum sctp_sn_type { #define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT SCTP_STREAM_RESET_EVENT, #define SCTP_STREAM_RESET_EVENT SCTP_STREAM_RESET_EVENT + SCTP_ASSOC_RESET_EVENT, +#define SCTP_ASSOC_RESET_EVENT SCTP_ASSOC_RESET_EVENT + SCTP_STREAM_CHANGE_EVENT, +#define SCTP_STREAM_CHANGE_EVENT SCTP_STREAM_CHANGE_EVENT }; /* Notification error codes used to fill up the error fields in some diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h index ccd0ccd00f470b92090f1a6eff4a5a03940fa7f6..ac217c6f0151063ba759b7895472dcddb11b29c9 100644 --- a/include/uapi/linux/serio.h +++ b/include/uapi/linux/serio.h @@ -80,5 +80,6 @@ #define SERIO_WACOM_IV 0x3e #define SERIO_EGALAX 0x3f #define SERIO_PULSE8_CEC 0x40 +#define SERIO_RAINSHADOW_CEC 0x41 #endif /* _UAPI_SERIO_H */ diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index 0063919fea344664ee657913ffdb1a49c9158a25..87712bfaa9dd00d431cbc1953da0462a52d7e119 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -3,7 +3,7 @@ #include #include -#include +#include /* Request structure */ struct smc_diag_req { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 3b2bed7ca9a4d92c5671e614f2bc598668805f75..95cffcb21dfdba7c974706131d0f43e21435e82d 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -177,7 +177,6 @@ enum LINUX_MIB_TIMEWAITED, /* TimeWaited */ LINUX_MIB_TIMEWAITRECYCLED, /* TimeWaitRecycled */ LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ - LINUX_MIB_PAWSPASSIVEREJECTED, /* PAWSPassiveRejected */ LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ @@ -260,6 +259,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ + LINUX_MIB_TCPFASTOPENBLACKHOLE, /* TCPFastOpenBlackholeDetect */ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */ LINUX_MIB_TCPAUTOCORKING, /* TCPAutoCorking */ diff --git a/include/uapi/linux/spi/Kbuild b/include/uapi/linux/spi/Kbuild deleted file mode 100644 index 0cc747eff165543ce28d45b23f676ca754bc9984..0000000000000000000000000000000000000000 --- a/include/uapi/linux/spi/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += spidev.h diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index d538897b8e08bb4358c56148c189fc7b8128a9da..17b10304c393355da9ed2da743107a5c59748290 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -48,17 +48,13 @@ * tv_sec holds the number of seconds before (negative) or after (positive) * 00:00:00 1st January 1970 UTC. * - * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is - * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. - * - * Note that if both tv_sec and tv_nsec are non-zero, then the two values must - * either be both positive or both negative. + * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. * * __reserved is held in case we need a yet finer resolution. */ struct statx_timestamp { __s64 tv_sec; - __s32 tv_nsec; + __u32 tv_nsec; __s32 __reserved; }; diff --git a/include/uapi/linux/sunrpc/Kbuild b/include/uapi/linux/sunrpc/Kbuild deleted file mode 100644 index 8e02e47c20fb65cbc59ec3ae70bd23bb92af7386..0000000000000000000000000000000000000000 --- a/include/uapi/linux/sunrpc/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += debug.h diff --git a/include/uapi/linux/switchtec_ioctl.h b/include/uapi/linux/switchtec_ioctl.h new file mode 100644 index 0000000000000000000000000000000000000000..3e824e1a649524b545128cba92855e52522acbf8 --- /dev/null +++ b/include/uapi/linux/switchtec_ioctl.h @@ -0,0 +1,132 @@ +/* + * Microsemi Switchtec PCIe Driver + * Copyright (c) 2017, Microsemi Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _UAPI_LINUX_SWITCHTEC_IOCTL_H +#define _UAPI_LINUX_SWITCHTEC_IOCTL_H + +#include + +#define SWITCHTEC_IOCTL_PART_CFG0 0 +#define SWITCHTEC_IOCTL_PART_CFG1 1 +#define SWITCHTEC_IOCTL_PART_IMG0 2 +#define SWITCHTEC_IOCTL_PART_IMG1 3 +#define SWITCHTEC_IOCTL_PART_NVLOG 4 +#define SWITCHTEC_IOCTL_PART_VENDOR0 5 +#define SWITCHTEC_IOCTL_PART_VENDOR1 6 +#define SWITCHTEC_IOCTL_PART_VENDOR2 7 +#define SWITCHTEC_IOCTL_PART_VENDOR3 8 +#define SWITCHTEC_IOCTL_PART_VENDOR4 9 +#define SWITCHTEC_IOCTL_PART_VENDOR5 10 +#define SWITCHTEC_IOCTL_PART_VENDOR6 11 +#define SWITCHTEC_IOCTL_PART_VENDOR7 12 +#define SWITCHTEC_IOCTL_NUM_PARTITIONS 13 + +struct switchtec_ioctl_flash_info { + __u64 flash_length; + __u32 num_partitions; + __u32 padding; +}; + +struct switchtec_ioctl_flash_part_info { + __u32 flash_partition; + __u32 address; + __u32 length; + __u32 active; +}; + +struct switchtec_ioctl_event_summary { + __u64 global; + __u64 part_bitmap; + __u32 local_part; + __u32 padding; + __u32 part[48]; + __u32 pff[48]; +}; + +#define SWITCHTEC_IOCTL_EVENT_STACK_ERROR 0 +#define SWITCHTEC_IOCTL_EVENT_PPU_ERROR 1 +#define SWITCHTEC_IOCTL_EVENT_ISP_ERROR 2 +#define SWITCHTEC_IOCTL_EVENT_SYS_RESET 3 +#define SWITCHTEC_IOCTL_EVENT_FW_EXC 4 +#define SWITCHTEC_IOCTL_EVENT_FW_NMI 5 +#define SWITCHTEC_IOCTL_EVENT_FW_NON_FATAL 6 +#define SWITCHTEC_IOCTL_EVENT_FW_FATAL 7 +#define SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP 8 +#define SWITCHTEC_IOCTL_EVENT_TWI_MRPC_COMP_ASYNC 9 +#define SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP 10 +#define SWITCHTEC_IOCTL_EVENT_CLI_MRPC_COMP_ASYNC 11 +#define SWITCHTEC_IOCTL_EVENT_GPIO_INT 12 +#define SWITCHTEC_IOCTL_EVENT_PART_RESET 13 +#define SWITCHTEC_IOCTL_EVENT_MRPC_COMP 14 +#define SWITCHTEC_IOCTL_EVENT_MRPC_COMP_ASYNC 15 +#define SWITCHTEC_IOCTL_EVENT_DYN_PART_BIND_COMP 16 +#define SWITCHTEC_IOCTL_EVENT_AER_IN_P2P 17 +#define SWITCHTEC_IOCTL_EVENT_AER_IN_VEP 18 +#define SWITCHTEC_IOCTL_EVENT_DPC 19 +#define SWITCHTEC_IOCTL_EVENT_CTS 20 +#define SWITCHTEC_IOCTL_EVENT_HOTPLUG 21 +#define SWITCHTEC_IOCTL_EVENT_IER 22 +#define SWITCHTEC_IOCTL_EVENT_THRESH 23 +#define SWITCHTEC_IOCTL_EVENT_POWER_MGMT 24 +#define SWITCHTEC_IOCTL_EVENT_TLP_THROTTLING 25 +#define SWITCHTEC_IOCTL_EVENT_FORCE_SPEED 26 +#define SWITCHTEC_IOCTL_EVENT_CREDIT_TIMEOUT 27 +#define SWITCHTEC_IOCTL_EVENT_LINK_STATE 28 +#define SWITCHTEC_IOCTL_MAX_EVENTS 29 + +#define SWITCHTEC_IOCTL_EVENT_LOCAL_PART_IDX -1 +#define SWITCHTEC_IOCTL_EVENT_IDX_ALL -2 + +#define SWITCHTEC_IOCTL_EVENT_FLAG_CLEAR (1 << 0) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_POLL (1 << 1) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_LOG (1 << 2) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_CLI (1 << 3) +#define SWITCHTEC_IOCTL_EVENT_FLAG_EN_FATAL (1 << 4) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_POLL (1 << 5) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_LOG (1 << 6) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_CLI (1 << 7) +#define SWITCHTEC_IOCTL_EVENT_FLAG_DIS_FATAL (1 << 8) +#define SWITCHTEC_IOCTL_EVENT_FLAG_UNUSED (~0x1ff) + +struct switchtec_ioctl_event_ctl { + __u32 event_id; + __s32 index; + __u32 flags; + __u32 occurred; + __u32 count; + __u32 data[5]; +}; + +#define SWITCHTEC_IOCTL_PFF_VEP 100 +struct switchtec_ioctl_pff_port { + __u32 pff; + __u32 partition; + __u32 port; +}; + +#define SWITCHTEC_IOCTL_FLASH_INFO \ + _IOR('W', 0x40, struct switchtec_ioctl_flash_info) +#define SWITCHTEC_IOCTL_FLASH_PART_INFO \ + _IOWR('W', 0x41, struct switchtec_ioctl_flash_part_info) +#define SWITCHTEC_IOCTL_EVENT_SUMMARY \ + _IOR('W', 0x42, struct switchtec_ioctl_event_summary) +#define SWITCHTEC_IOCTL_EVENT_CTL \ + _IOWR('W', 0x43, struct switchtec_ioctl_event_ctl) +#define SWITCHTEC_IOCTL_PFF_TO_PORT \ + _IOWR('W', 0x44, struct switchtec_ioctl_pff_port) +#define SWITCHTEC_IOCTL_PORT_TO_PFF \ + _IOWR('W', 0x45, struct switchtec_ioctl_pff_port) + +#endif diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index d2b12152e358f14e791ef3e842cb6eac7cc8ceec..e13d48058b8d0e5cf36e458e68e257d73a9a1e8f 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -568,6 +568,7 @@ enum { NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, + NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, __NET_IPV6_MAX }; diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild deleted file mode 100644 index ba62ddf0e58ab3dba056ae17b5b5379e98b7627e..0000000000000000000000000000000000000000 --- a/include/uapi/linux/tc_act/Kbuild +++ /dev/null @@ -1,16 +0,0 @@ -# UAPI Header export list -header-y += tc_csum.h -header-y += tc_defact.h -header-y += tc_gact.h -header-y += tc_ipt.h -header-y += tc_mirred.h -header-y += tc_sample.h -header-y += tc_nat.h -header-y += tc_pedit.h -header-y += tc_skbedit.h -header-y += tc_vlan.h -header-y += tc_bpf.h -header-y += tc_connmark.h -header-y += tc_ife.h -header-y += tc_tunnel_key.h -header-y += tc_skbmod.h diff --git a/include/uapi/linux/tc_ematch/Kbuild b/include/uapi/linux/tc_ematch/Kbuild deleted file mode 100644 index 53fca39255351730cfd7384d4ca4ccefbb6414c1..0000000000000000000000000000000000000000 --- a/include/uapi/linux/tc_ematch/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -# UAPI Header export list -header-y += tc_em_cmp.h -header-y += tc_em_meta.h -header-y += tc_em_nbyte.h -header-y += tc_em_text.h diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h new file mode 100644 index 0000000000000000000000000000000000000000..370d8845ab2173a733ccbf7091f9ce560024bb0e --- /dev/null +++ b/include/uapi/linux/tee.h @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TEE_H +#define __TEE_H + +#include +#include + +/* + * This file describes the API provided by a TEE driver to user space. + * + * Each TEE driver defines a TEE specific protocol which is used for the + * data passed back and forth using TEE_IOC_CMD. + */ + +/* Helpers to make the ioctl defines */ +#define TEE_IOC_MAGIC 0xa4 +#define TEE_IOC_BASE 0 + +/* Flags relating to shared memory */ +#define TEE_IOCTL_SHM_MAPPED 0x1 /* memory mapped in normal world */ +#define TEE_IOCTL_SHM_DMA_BUF 0x2 /* dma-buf handle on shared memory */ + +#define TEE_MAX_ARG_SIZE 1024 + +#define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ + +/* + * TEE Implementation ID + */ +#define TEE_IMPL_ID_OPTEE 1 + +/* + * OP-TEE specific capabilities + */ +#define TEE_OPTEE_CAP_TZ (1 << 0) + +/** + * struct tee_ioctl_version_data - TEE version + * @impl_id: [out] TEE implementation id + * @impl_caps: [out] Implementation specific capabilities + * @gen_caps: [out] Generic capabilities, defined by TEE_GEN_CAPS_* above + * + * Identifies the TEE implementation, @impl_id is one of TEE_IMPL_ID_* above. + * @impl_caps is implementation specific, for example TEE_OPTEE_CAP_* + * is valid when @impl_id == TEE_IMPL_ID_OPTEE. + */ +struct tee_ioctl_version_data { + __u32 impl_id; + __u32 impl_caps; + __u32 gen_caps; +}; + +/** + * TEE_IOC_VERSION - query version of TEE + * + * Takes a tee_ioctl_version_data struct and returns with the TEE version + * data filled in. + */ +#define TEE_IOC_VERSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 0, \ + struct tee_ioctl_version_data) + +/** + * struct tee_ioctl_shm_alloc_data - Shared memory allocate argument + * @size: [in/out] Size of shared memory to allocate + * @flags: [in/out] Flags to/from allocation. + * @id: [out] Identifier of the shared memory + * + * The flags field should currently be zero as input. Updated by the call + * with actual flags as defined by TEE_IOCTL_SHM_* above. + * This structure is used as argument for TEE_IOC_SHM_ALLOC below. + */ +struct tee_ioctl_shm_alloc_data { + __u64 size; + __u32 flags; + __s32 id; +}; + +/** + * TEE_IOC_SHM_ALLOC - allocate shared memory + * + * Allocates shared memory between the user space process and secure OS. + * + * Returns a file descriptor on success or < 0 on failure + * + * The returned file descriptor is used to map the shared memory into user + * space. The shared memory is freed when the descriptor is closed and the + * memory is unmapped. + */ +#define TEE_IOC_SHM_ALLOC _IOWR(TEE_IOC_MAGIC, TEE_IOC_BASE + 1, \ + struct tee_ioctl_shm_alloc_data) + +/** + * struct tee_ioctl_buf_data - Variable sized buffer + * @buf_ptr: [in] A __user pointer to a buffer + * @buf_len: [in] Length of the buffer above + * + * Used as argument for TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE, + * TEE_IOC_SUPPL_RECV, and TEE_IOC_SUPPL_SEND below. + */ +struct tee_ioctl_buf_data { + __u64 buf_ptr; + __u64 buf_len; +}; + +/* + * Attributes for struct tee_ioctl_param, selects field in the union + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_NONE 0 /* parameter not used */ + +/* + * These defines value parameters (struct tee_ioctl_param_value) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT 1 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT 2 +#define TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INOUT 3 /* input and output */ + +/* + * These defines shared memory reference parameters (struct + * tee_ioctl_param_memref) + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT 5 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT 6 +#define TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT 7 /* input and output */ + +/* + * Mask for the type part of the attribute, leaves room for more types + */ +#define TEE_IOCTL_PARAM_ATTR_TYPE_MASK 0xff + +/* + * Matches TEEC_LOGIN_* in GP TEE Client API + * Are only defined for GP compliant TEEs + */ +#define TEE_IOCTL_LOGIN_PUBLIC 0 +#define TEE_IOCTL_LOGIN_USER 1 +#define TEE_IOCTL_LOGIN_GROUP 2 +#define TEE_IOCTL_LOGIN_APPLICATION 4 +#define TEE_IOCTL_LOGIN_USER_APPLICATION 5 +#define TEE_IOCTL_LOGIN_GROUP_APPLICATION 6 + +/** + * struct tee_ioctl_param - parameter + * @attr: attributes + * @a: if a memref, offset into the shared memory object, else a value parameter + * @b: if a memref, size of the buffer, else a value parameter + * @c: if a memref, shared memory identifier, else a value parameter + * + * @attr & TEE_PARAM_ATTR_TYPE_MASK indicates if memref or value is used in + * the union. TEE_PARAM_ATTR_TYPE_VALUE_* indicates value and + * TEE_PARAM_ATTR_TYPE_MEMREF_* indicates memref. TEE_PARAM_ATTR_TYPE_NONE + * indicates that none of the members are used. + * + * Shared memory is allocated with TEE_IOC_SHM_ALLOC which returns an + * identifier representing the shared memory object. A memref can reference + * a part of a shared memory by specifying an offset (@a) and size (@b) of + * the object. To supply the entire shared memory object set the offset + * (@a) to 0 and size (@b) to the previously returned size of the object. + */ +struct tee_ioctl_param { + __u64 attr; + __u64 a; + __u64 b; + __u64 c; +}; + +#define TEE_IOCTL_UUID_LEN 16 + +/** + * struct tee_ioctl_open_session_arg - Open session argument + * @uuid: [in] UUID of the Trusted Application + * @clnt_uuid: [in] UUID of client + * @clnt_login: [in] Login class of client, TEE_IOCTL_LOGIN_* above + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [out] Session id + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_open_session_arg { + __u8 uuid[TEE_IOCTL_UUID_LEN]; + __u8 clnt_uuid[TEE_IOCTL_UUID_LEN]; + __u32 clnt_login; + __u32 cancel_id; + __u32 session; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_OPEN_SESSION - opens a session to a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_ioctl_open_session_arg followed by any array of struct + * tee_ioctl_param + */ +#define TEE_IOC_OPEN_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 2, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_invoke_func_arg - Invokes a function in a Trusted + * Application + * @func: [in] Trusted Application function, specific to the TA + * @session: [in] Session id + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @ret: [out] return value + * @ret_origin [out] origin of the return value + * @num_params [in] number of parameters following this struct + */ +struct tee_ioctl_invoke_arg { + __u32 func; + __u32 session; + __u32 cancel_id; + __u32 ret; + __u32 ret_origin; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_INVOKE - Invokes a function in a Trusted Application + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_invoke_func_arg followed by any array of struct tee_param + */ +#define TEE_IOC_INVOKE _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 3, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_ioctl_cancel_arg - Cancels an open session or invoke ioctl + * @cancel_id: [in] Cancellation id, a unique value to identify this request + * @session: [in] Session id, if the session is opened, else set to 0 + */ +struct tee_ioctl_cancel_arg { + __u32 cancel_id; + __u32 session; +}; + +/** + * TEE_IOC_CANCEL - Cancels an open session or invoke + */ +#define TEE_IOC_CANCEL _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 4, \ + struct tee_ioctl_cancel_arg) + +/** + * struct tee_ioctl_close_session_arg - Closes an open session + * @session: [in] Session id + */ +struct tee_ioctl_close_session_arg { + __u32 session; +}; + +/** + * TEE_IOC_CLOSE_SESSION - Closes a session + */ +#define TEE_IOC_CLOSE_SESSION _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 5, \ + struct tee_ioctl_close_session_arg) + +/** + * struct tee_iocl_supp_recv_arg - Receive a request for a supplicant function + * @func: [in] supplicant function + * @num_params [in/out] number of parameters following this struct + * + * @num_params is the number of params that tee-supplicant has room to + * receive when input, @num_params is the number of actual params + * tee-supplicant receives when output. + */ +struct tee_iocl_supp_recv_arg { + __u32 func; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_RECV - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_recv_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_RECV _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 6, \ + struct tee_ioctl_buf_data) + +/** + * struct tee_iocl_supp_send_arg - Send a response to a received request + * @ret: [out] return value + * @num_params [in] number of parameters following this struct + */ +struct tee_iocl_supp_send_arg { + __u32 ret; + __u32 num_params; + /* num_params tells the actual number of element in params */ + struct tee_ioctl_param params[]; +}; + +/** + * TEE_IOC_SUPPL_SEND - Receive a request for a supplicant function + * + * Takes a struct tee_ioctl_buf_data which contains a struct + * tee_iocl_supp_send_arg followed by any array of struct tee_param + */ +#define TEE_IOC_SUPPL_SEND _IOR(TEE_IOC_MAGIC, TEE_IOC_BASE + 7, \ + struct tee_ioctl_buf_data) + +/* + * Five syscalls are used when communicating with the TEE driver. + * open(): opens the device associated with the driver + * ioctl(): as described above operating on the file descriptor from open() + * close(): two cases + * - closes the device file descriptor + * - closes a file descriptor connected to allocated shared memory + * mmap(): maps shared memory into user space using information from struct + * tee_ioctl_shm_alloc_data + * munmap(): unmaps previously shared memory + */ + +#endif /*__TEE_H*/ diff --git a/include/uapi/linux/usb/Kbuild b/include/uapi/linux/usb/Kbuild deleted file mode 100644 index 4cc4d6e7e5238e89c3887490b923c30d363a1dca..0000000000000000000000000000000000000000 --- a/include/uapi/linux/usb/Kbuild +++ /dev/null @@ -1,12 +0,0 @@ -# UAPI Header export list -header-y += audio.h -header-y += cdc.h -header-y += cdc-wdm.h -header-y += ch11.h -header-y += ch9.h -header-y += functionfs.h -header-y += g_printer.h -header-y += gadgetfs.h -header-y += midi.h -header-y += tmc.h -header-y += video.h diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h index 361297e96f5826360bb24f80de9089b83a9881ba..576c704e3fb8b13f358ae67b3a415417f2c37cb5 100644 --- a/include/uapi/linux/usb/ch11.h +++ b/include/uapi/linux/usb/ch11.h @@ -22,6 +22,9 @@ */ #define USB_MAXCHILDREN 31 +/* See USB 3.1 spec Table 10-5 */ +#define USB_SS_MAXPORTS 15 + /* * Hub request types */ diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 2c5d7c4a69e3e7585a4d8e1eb60b82a8ffd723c5..ce1169af39d72c58663e08f4ff7e0c175f467507 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -224,7 +224,8 @@ struct usb_ctrlrequest { * through the Linux-USB APIs, they are not converted to cpu byte * order; it is the responsibility of the client code to do this. * The single exception is when device and configuration descriptors (but - * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD); + * not other descriptors) are read from character devices + * (i.e. /dev/bus/usb/BBB/DDD); * in this case the fields are converted to host endianness by the kernel. */ diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index b2a31a55a61237e65e4939a9d2722e136cc73ca0..062606f02309f814ffd7c9a4e5ef5a0f4f31abe5 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -158,7 +158,7 @@ struct usb_ext_prop_desc { * |-----+-----------------------+------+-------------------------------------| * | 0 | bFirstInterfaceNumber | U8 | index of the interface or of the 1st| * | | | | interface in an IAD group | - * | 1 | Reserved | U8 | 0 | + * | 1 | Reserved | U8 | 1 | * | 2 | CompatibleID | U8[8]| compatible ID string | * | 10 | SubCompatibleID | U8[8]| subcompatible ID string | * | 18 | Reserved | U8[6]| 0 | diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 519eff362c1caa14e932836816a020ed0f84ed4d..ae461050661afc4ea86413cf2c3e384281ad35f1 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -198,6 +198,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ +#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; @@ -212,6 +213,7 @@ struct vfio_device_info { #define VFIO_DEVICE_API_PCI_STRING "vfio-pci" #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" +#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, @@ -446,6 +448,22 @@ enum { VFIO_PCI_NUM_IRQS }; +/* + * The vfio-ccw bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_CCW_CONFIG_REGION_INDEX, + VFIO_CCW_NUM_REGIONS +}; + +enum { + VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_NUM_IRQS +}; + /** * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h new file mode 100644 index 0000000000000000000000000000000000000000..34a7f6f9e0658e7d3e33573df227eb2ba04a5194 --- /dev/null +++ b/include/uapi/linux/vfio_ccw.h @@ -0,0 +1,24 @@ +/* + * Interfaces for vfio-ccw + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi + */ + +#ifndef _VFIO_CCW_H_ +#define _VFIO_CCW_H_ + +#include + +struct ccw_io_region { +#define ORB_AREA_SIZE 12 + __u8 orb_area[ORB_AREA_SIZE]; +#define SCSW_AREA_SIZE 12 + __u8 scsw_area[SCSW_AREA_SIZE]; +#define IRB_AREA_SIZE 96 + __u8 irb_area[IRB_AREA_SIZE]; + __u32 ret_code; +} __packed; + +#endif diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 45184a2ef66c219c893769dd73bc5c3485a5c814..2b8feb86d09e902435902145f0707b58b57921f3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -143,6 +143,7 @@ enum v4l2_buf_type { V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE = 10, V4L2_BUF_TYPE_SDR_CAPTURE = 11, V4L2_BUF_TYPE_SDR_OUTPUT = 12, + V4L2_BUF_TYPE_META_CAPTURE = 13, /* Deprecated, do not use */ V4L2_BUF_TYPE_PRIVATE = 0x80, }; @@ -362,8 +363,7 @@ enum v4l2_quantization { /* * The default for R'G'B' quantization is always full range, except * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG, XV601 or XV709: those - * are full range. + * limited range, except for COLORSPACE_JPEG: this is full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -378,8 +378,7 @@ enum v4l2_quantization { #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \ (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ V4L2_QUANTIZATION_LIM_RANGE : \ - (((is_rgb_or_hsv) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \ - (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \ + (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) enum v4l2_priority { @@ -453,6 +452,7 @@ struct v4l2_capability { #define V4L2_CAP_SDR_CAPTURE 0x00100000 /* Is a SDR capture device */ #define V4L2_CAP_EXT_PIX_FORMAT 0x00200000 /* Supports the extended pixel format */ #define V4L2_CAP_SDR_OUTPUT 0x00400000 /* Is a SDR output device */ +#define V4L2_CAP_META_CAPTURE 0x00800000 /* Is a metadata capture device */ #define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */ #define V4L2_CAP_ASYNCIO 0x02000000 /* async I/O */ @@ -661,6 +661,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y12I v4l2_fourcc('Y', '1', '2', 'I') /* Greyscale 12-bit L/R interleaved */ #define V4L2_PIX_FMT_Z16 v4l2_fourcc('Z', '1', '6', ' ') /* Depth data 16-bit */ #define V4L2_PIX_FMT_MT21C v4l2_fourcc('M', 'T', '2', '1') /* Mediatek compressed block mode */ +#define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ @@ -675,6 +676,10 @@ struct v4l2_pix_format { #define V4L2_TCH_FMT_TU16 v4l2_fourcc('T', 'U', '1', '6') /* 16-bit unsigned touch data */ #define V4L2_TCH_FMT_TU08 v4l2_fourcc('T', 'U', '0', '8') /* 8-bit unsigned touch data */ +/* Meta-data formats */ +#define V4L2_META_FMT_VSP1_HGO v4l2_fourcc('V', 'S', 'P', 'H') /* R-Car VSP1 1-D Histogram */ +#define V4L2_META_FMT_VSP1_HGT v4l2_fourcc('V', 'S', 'P', 'T') /* R-Car VSP1 2-D Histogram */ + /* priv field value to indicates that subsequent fields are valid. */ #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe @@ -1654,6 +1659,7 @@ struct v4l2_querymenu { #define V4L2_CTRL_FLAG_VOLATILE 0x0080 #define V4L2_CTRL_FLAG_HAS_PAYLOAD 0x0100 #define V4L2_CTRL_FLAG_EXECUTE_ON_WRITE 0x0200 +#define V4L2_CTRL_FLAG_MODIFY_LAYOUT 0x0400 /* Query flags, to be ORed with the control ID */ #define V4L2_CTRL_FLAG_NEXT_CTRL 0x80000000 @@ -2086,6 +2092,16 @@ struct v4l2_sdr_format { __u8 reserved[24]; } __attribute__ ((packed)); +/** + * struct v4l2_meta_format - metadata format definition + * @dataformat: little endian four character code (fourcc) + * @buffersize: maximum size in bytes required for data + */ +struct v4l2_meta_format { + __u32 dataformat; + __u32 buffersize; +} __attribute__ ((packed)); + /** * struct v4l2_format - stream data format * @type: enum v4l2_buf_type; type of the data stream @@ -2105,6 +2121,7 @@ struct v4l2_format { struct v4l2_vbi_format vbi; /* V4L2_BUF_TYPE_VBI_CAPTURE */ struct v4l2_sliced_vbi_format sliced; /* V4L2_BUF_TYPE_SLICED_VBI_CAPTURE */ struct v4l2_sdr_format sdr; /* V4L2_BUF_TYPE_SDR_CAPTURE */ + struct v4l2_meta_format meta; /* V4L2_BUF_TYPE_META_CAPTURE */ __u8 raw_data[200]; /* user-defined */ } fmt; }; diff --git a/include/uapi/linux/vsockmon.h b/include/uapi/linux/vsockmon.h new file mode 100644 index 0000000000000000000000000000000000000000..a08b522ef597894b092ecc0fbfe6f35a2580fc1c --- /dev/null +++ b/include/uapi/linux/vsockmon.h @@ -0,0 +1,60 @@ +#ifndef _UAPI_VSOCKMON_H +#define _UAPI_VSOCKMON_H + +#include + +/* + * vsockmon is the AF_VSOCK packet capture device. Packets captured have the + * following layout: + * + * +-----------------------------------+ + * | vsockmon header | + * | (struct af_vsockmon_hdr) | + * +-----------------------------------+ + * | transport header | + * | (af_vsockmon_hdr->len bytes long) | + * +-----------------------------------+ + * | payload | + * | (until end of packet) | + * +-----------------------------------+ + * + * The vsockmon header is a transport-independent description of the packet. + * It duplicates some of the information from the transport header so that + * no transport-specific knowledge is necessary to process packets. + * + * The transport header is useful for low-level transport-specific packet + * analysis. Transport type is given in af_vsockmon_hdr->transport and + * transport header length is given in af_vsockmon_hdr->len. + * + * If af_vsockmon_hdr->op is AF_VSOCK_OP_PAYLOAD then the payload follows the + * transport header. Other ops do not have a payload. + */ + +struct af_vsockmon_hdr { + __le64 src_cid; + __le64 dst_cid; + __le32 src_port; + __le32 dst_port; + __le16 op; /* enum af_vsockmon_op */ + __le16 transport; /* enum af_vsockmon_transport */ + __le16 len; /* Transport header length */ + __u8 reserved[2]; +}; + +enum af_vsockmon_op { + AF_VSOCK_OP_UNKNOWN = 0, + AF_VSOCK_OP_CONNECT = 1, + AF_VSOCK_OP_DISCONNECT = 2, + AF_VSOCK_OP_CONTROL = 3, + AF_VSOCK_OP_PAYLOAD = 4, +}; + +enum af_vsockmon_transport { + AF_VSOCK_TRANSPORT_UNKNOWN = 0, + AF_VSOCK_TRANSPORT_NO_INFO = 1, /* No transport information */ + + /* Transport header type: struct virtio_vsock_hdr */ + AF_VSOCK_TRANSPORT_VIRTIO = 2, +}; + +#endif diff --git a/include/uapi/linux/wimax/Kbuild b/include/uapi/linux/wimax/Kbuild deleted file mode 100644 index 1c97be49971ff377b152af80aca7e51e6c59a9a2..0000000000000000000000000000000000000000 --- a/include/uapi/linux/wimax/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# UAPI Header export list -header-y += i2400m.h diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 1fc62b239f1b7ea9f6da74740f3e35894bbc5a32..2b384ff09fa05411bdffdaa53472591554aa2be6 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -303,6 +303,7 @@ enum xfrm_attr_type_t { XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, + XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -494,6 +495,13 @@ struct xfrm_address_filter { __u8 dplen; }; +struct xfrm_user_offload { + int ifindex; + __u8 flags; +}; +#define XFRM_OFFLOAD_IPV6 1 +#define XFRM_OFFLOAD_INBOUND 2 + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 diff --git a/include/uapi/misc/Kbuild b/include/uapi/misc/Kbuild deleted file mode 100644 index e96cae7d58c9f758baf4846bfa5bc9cf3d6eee3f..0000000000000000000000000000000000000000 --- a/include/uapi/misc/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -# misc Header export list -header-y += cxl.h diff --git a/include/uapi/mtd/Kbuild b/include/uapi/mtd/Kbuild deleted file mode 100644 index 5a691e10cd0ecd35db2260c7417145dd5a588aad..0000000000000000000000000000000000000000 --- a/include/uapi/mtd/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -# UAPI Header export list -header-y += inftl-user.h -header-y += mtd-abi.h -header-y += mtd-user.h -header-y += nftl-user.h -header-y += ubi-user.h diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild deleted file mode 100644 index 1e0af1ff75c31a219fd866e52d75629ee3979c78..0000000000000000000000000000000000000000 --- a/include/uapi/rdma/Kbuild +++ /dev/null @@ -1,20 +0,0 @@ -# UAPI Header export list -header-y += ib_user_cm.h -header-y += rdma_user_ioctl.h -header-y += ib_user_mad.h -header-y += ib_user_sa.h -header-y += ib_user_verbs.h -header-y += rdma_netlink.h -header-y += rdma_user_cm.h -header-y += hfi/ -header-y += rdma_user_rxe.h -header-y += cxgb3-abi.h -header-y += cxgb4-abi.h -header-y += mlx4-abi.h -header-y += mlx5-abi.h -header-y += mthca-abi.h -header-y += nes-abi.h -header-y += ocrdma-abi.h -header-y += hns-abi.h -header-y += vmw_pvrdma-abi.h -header-y += qedr-abi.h diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index e2c8a3f0ccecb6f8cceb4a516f3927f4709d8ca0..74018bd18d7216fd0b34cc4d57d06c8138f18881 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -39,6 +39,8 @@ #ifndef __BNXT_RE_UVERBS_ABI_H__ #define __BNXT_RE_UVERBS_ABI_H__ +#include + #define BNXT_RE_ABI_VERSION 1 struct bnxt_re_uctx_resp { diff --git a/include/uapi/rdma/hfi/Kbuild b/include/uapi/rdma/hfi/Kbuild deleted file mode 100644 index b65b0b3a5f632ae27f7749dc20934e3bb61f4926..0000000000000000000000000000000000000000 --- a/include/uapi/rdma/hfi/Kbuild +++ /dev/null @@ -1,3 +0,0 @@ -# UAPI Header export list -header-y += hfi1_user.h -header-y += hfi1_ioctl.h diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 997f904c76923057a376e0f4f0772ac82dcacef7..270c350bedc6c4911ff0763275fc034e90b2a018 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -947,6 +947,17 @@ struct ib_uverbs_flow_spec_action_tag { __u32 reserved1; }; +struct ib_uverbs_flow_spec_action_drop { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; +}; + struct ib_uverbs_flow_tunnel_filter { __be32 tunnel_id; }; @@ -1124,4 +1135,6 @@ struct ib_uverbs_ex_destroy_rwq_ind_table { __u32 ind_tbl_handle; }; +#define IB_DEVICE_NAME_MAX 64 + #endif /* IB_USER_VERBS_H */ diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index 5016abc9ee9712abc1fb4e04993940b80460a2be..c8c1d2d6df4d52ad84315727ec69849b9daa32ee 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -222,7 +222,7 @@ struct pvrdma_sq_wqe_hdr { __u32 opcode; /* operation type */ __u32 send_flags; /* wr flags */ union { - __u32 imm_data; + __be32 imm_data; __u32 invalidate_rkey; } ex; __u32 reserved; @@ -273,7 +273,7 @@ struct pvrdma_cqe { __u32 opcode; __u32 status; __u32 byte_len; - __u32 imm_data; + __be32 imm_data; __u32 src_qp; __u32 wc_flags; __u32 vendor_err; diff --git a/include/uapi/scsi/Kbuild b/include/uapi/scsi/Kbuild deleted file mode 100644 index d791e0ad509d3fc525f60b220e6e0dcc37c97a08..0000000000000000000000000000000000000000 --- a/include/uapi/scsi/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -# UAPI Header export list -header-y += fc/ -header-y += scsi_bsg_fc.h -header-y += scsi_netlink.h -header-y += scsi_netlink_fc.h -header-y += cxlflash_ioctl.h diff --git a/include/uapi/scsi/fc/Kbuild b/include/uapi/scsi/fc/Kbuild deleted file mode 100644 index 5ead9fac265c31e26872e8bd991a2e2c81a7064d..0000000000000000000000000000000000000000 --- a/include/uapi/scsi/fc/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -# UAPI Header export list -header-y += fc_els.h -header-y += fc_fs.h -header-y += fc_gs.h -header-y += fc_ns.h diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild deleted file mode 100644 index 9578d8bdbf31fe5f1675704bc6fad8b090b96b31..0000000000000000000000000000000000000000 --- a/include/uapi/sound/Kbuild +++ /dev/null @@ -1,16 +0,0 @@ -# UAPI Header export list -header-y += asequencer.h -header-y += asoc.h -header-y += asound.h -header-y += asound_fm.h -header-y += compress_offload.h -header-y += compress_params.h -header-y += emu10k1.h -header-y += firewire.h -header-y += hdsp.h -header-y += hdspm.h -header-y += sb16_csp.h -header-y += sfnt_info.h -header-y += tlv.h -header-y += usb_stream.h -header-y += snd_sst_tokens.h diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index be353a78c3030d05a5a8d1ebd478171c1e3c3fa7..fd41697cb4d36a24630e42ebb23cef4b21468506 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -107,9 +107,11 @@ enum { SNDRV_HWDEP_IFACE_FW_DIGI00X, /* Digidesign Digi 002/003 family */ SNDRV_HWDEP_IFACE_FW_TASCAM, /* TASCAM FireWire series */ SNDRV_HWDEP_IFACE_LINE6, /* Line6 USB processors */ + SNDRV_HWDEP_IFACE_FW_MOTU, /* MOTU FireWire series */ + SNDRV_HWDEP_IFACE_FW_FIREFACE, /* RME Fireface series */ /* Don't forget to change the following: */ - SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_LINE6 + SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_FIREFACE }; struct snd_hwdep_info { diff --git a/include/uapi/sound/firewire.h b/include/uapi/sound/firewire.h index db79a12fcc78db70d234b89d4b8bf6f573598224..622900488bdcba396f94641d4b7e9a038fc8c462 100644 --- a/include/uapi/sound/firewire.h +++ b/include/uapi/sound/firewire.h @@ -10,6 +10,7 @@ #define SNDRV_FIREWIRE_EVENT_DICE_NOTIFICATION 0xd1ce004e #define SNDRV_FIREWIRE_EVENT_EFW_RESPONSE 0x4e617475 #define SNDRV_FIREWIRE_EVENT_DIGI00X_MESSAGE 0x746e736c +#define SNDRV_FIREWIRE_EVENT_MOTU_NOTIFICATION 0x64776479 struct snd_firewire_event_common { unsigned int type; /* SNDRV_FIREWIRE_EVENT_xxx */ @@ -46,12 +47,18 @@ struct snd_firewire_event_digi00x_message { __u32 message; /* Digi00x-specific message */ }; +struct snd_firewire_event_motu_notification { + unsigned int type; + __u32 message; /* MOTU-specific bits. */ +}; + union snd_firewire_event { struct snd_firewire_event_common common; struct snd_firewire_event_lock_status lock_status; struct snd_firewire_event_dice_notification dice_notification; struct snd_firewire_event_efw_response efw_response; struct snd_firewire_event_digi00x_message digi00x_message; + struct snd_firewire_event_motu_notification motu_notification; }; @@ -65,7 +72,8 @@ union snd_firewire_event { #define SNDRV_FIREWIRE_TYPE_OXFW 4 #define SNDRV_FIREWIRE_TYPE_DIGI00X 5 #define SNDRV_FIREWIRE_TYPE_TASCAM 6 -/* RME, MOTU, ... */ +#define SNDRV_FIREWIRE_TYPE_MOTU 7 +#define SNDRV_FIREWIRE_TYPE_FIREFACE 8 struct snd_firewire_get_info { unsigned int type; /* SNDRV_FIREWIRE_TYPE_xxx */ diff --git a/include/uapi/video/Kbuild b/include/uapi/video/Kbuild deleted file mode 100644 index ac7203bb32cc0eace2b5be57412d3e215e50b8ac..0000000000000000000000000000000000000000 --- a/include/uapi/video/Kbuild +++ /dev/null @@ -1,4 +0,0 @@ -# UAPI Header export list -header-y += edid.h -header-y += sisfb.h -header-y += uvesafb.h diff --git a/include/uapi/xen/Kbuild b/include/uapi/xen/Kbuild deleted file mode 100644 index 5c459628e8c7492c7eaea4bf6cc75e6976438e82..0000000000000000000000000000000000000000 --- a/include/uapi/xen/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -# UAPI Header export list -header-y += evtchn.h -header-y += gntalloc.h -header-y += gntdev.h -header-y += privcmd.h diff --git a/include/video/Kbuild b/include/video/Kbuild deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/include/video/udlfb.h b/include/video/udlfb.h index f9466fa54ba4bd626fe618aef5b95f3b30dfde9b..3ea90aea5617cb359423fefc6140e381d2e78e20 100644 --- a/include/video/udlfb.h +++ b/include/video/udlfb.h @@ -92,6 +92,6 @@ struct dlfb_data { /* remove these once align.h patch is taken into kernel */ #define DL_ALIGN_UP(x, a) ALIGN(x, a) -#define DL_ALIGN_DOWN(x, a) ALIGN(x-(a-1), a) +#define DL_ALIGN_DOWN(x, a) ALIGN_DOWN(x, a) #endif diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index 95ce6ac3a971fb1425d2dc841466defdc850bf9c..b1b4ecdf329a7b4fefc623f5276f10f526a8414c 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -2,8 +2,16 @@ #define _ASM_ARM_XEN_PAGE_COHERENT_H #include +#include #include +static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dev_dma_ops) + return dev->archdata.dev_dma_ops; + return get_arch_dma_ops(NULL); +} + void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); @@ -19,13 +27,13 @@ void __xen_dma_sync_single_for_device(struct device *hwdev, static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); + return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); } static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { - __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); + xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); } static inline void xen_dma_map_page(struct device *hwdev, struct page *page, @@ -49,7 +57,7 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, * specific function. */ if (local) - __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); + xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); else __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); } @@ -67,8 +75,8 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, * specific function. */ if (pfn_valid(pfn)) { - if (__generic_dma_ops(hwdev)->unmap_page) - __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); + if (xen_get_dma_ops(hwdev)->unmap_page) + xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); } else __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); } @@ -78,8 +86,8 @@ static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, { unsigned long pfn = PFN_DOWN(handle); if (pfn_valid(pfn)) { - if (__generic_dma_ops(hwdev)->sync_single_for_cpu) - __generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); + if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) + xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); } else __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); } @@ -89,8 +97,8 @@ static inline void xen_dma_sync_single_for_device(struct device *hwdev, { unsigned long pfn = PFN_DOWN(handle); if (pfn_valid(pfn)) { - if (__generic_dma_ops(hwdev)->sync_single_for_device) - __generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); + if (xen_get_dma_ops(hwdev)->sync_single_for_device) + xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); } else __xen_dma_sync_single_for_device(hwdev, handle, size, dir); } diff --git a/include/xen/interface/io/9pfs.h b/include/xen/interface/io/9pfs.h new file mode 100644 index 0000000000000000000000000000000000000000..5b6c19dae5e252f244b447a32e6bcd09b2c9344f --- /dev/null +++ b/include/xen/interface/io/9pfs.h @@ -0,0 +1,36 @@ +/* + * 9pfs.h -- Xen 9PFS transport + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2017 Stefano Stabellini + */ + +#ifndef __XEN_PUBLIC_IO_9PFS_H__ +#define __XEN_PUBLIC_IO_9PFS_H__ + +#include "xen/interface/io/ring.h" + +/* + * See docs/misc/9pfs.markdown in xen.git for the full specification: + * https://xenbits.xen.org/docs/unstable/misc/9pfs.html + */ +DEFINE_XEN_FLEX_RING_AND_INTF(xen_9pfs); + +#endif diff --git a/include/xen/interface/io/displif.h b/include/xen/interface/io/displif.h new file mode 100644 index 0000000000000000000000000000000000000000..596578d9be3e065a4c10d55dbf316bb7e672f84a --- /dev/null +++ b/include/xen/interface/io/displif.h @@ -0,0 +1,854 @@ +/****************************************************************************** + * displif.h + * + * Unified display device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2016-2017 EPAM Systems Inc. + * + * Authors: Oleksandr Andrushchenko + * Oleksandr Grytsov + */ + +#ifndef __XEN_PUBLIC_IO_DISPLIF_H__ +#define __XEN_PUBLIC_IO_DISPLIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + ****************************************************************************** + * Protocol version + ****************************************************************************** + */ +#define XENDISPL_PROTOCOL_VERSION "1" + +/* + ****************************************************************************** + * Main features provided by the protocol + ****************************************************************************** + * This protocol aims to provide a unified protocol which fits more + * sophisticated use-cases than a framebuffer device can handle. At the + * moment basic functionality is supported with the intention to be extended: + * o multiple dynamically allocated/destroyed framebuffers + * o buffers of arbitrary sizes + * o buffer allocation at either back or front end + * o better configuration options including multiple display support + * + * Note: existing fbif can be used together with displif running at the + * same time, e.g. on Linux one provides framebuffer and another DRM/KMS + * + * Note: display resolution (XenStore's "resolution" property) defines + * visible area of the virtual display. At the same time resolution of + * the display and frame buffers may differ: buffers can be smaller, equal + * or bigger than the visible area. This is to enable use-cases, where backend + * may do some post-processing of the display and frame buffers supplied, + * e.g. those buffers can be just a part of the final composition. + * + ****************************************************************************** + * Direction of improvements + ****************************************************************************** + * Future extensions to the existing protocol may include: + * o display/connector cloning + * o allocation of objects other than display buffers + * o plane/overlay support + * o scaling support + * o rotation support + * + ****************************************************************************** + * Feature and Parameter Negotiation + ****************************************************************************** + * + * Front->back notifications: when enqueuing a new request, sending a + * notification can be made conditional on xendispl_req (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * xendispl_req appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: when enqueuing a new response, sending a + * notification can be made conditional on xendispl_resp (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * xendispl_resp appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + * + * The two halves of a para-virtual display driver utilize nodes within + * XenStore to communicate capabilities and to negotiate operating parameters. + * This section enumerates these nodes which reside in the respective front and + * backend portions of XenStore, following the XenBus convention. + * + * All data in XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + ****************************************************************************** + * Example configuration + ****************************************************************************** + * + * Note: depending on the use-case backend can expose more display connectors + * than the underlying HW physically has by employing SW graphics compositors + * + * This is an example of backend and frontend configuration: + * + *--------------------------------- Backend ----------------------------------- + * + * /local/domain/0/backend/vdispl/1/0/frontend-id = "1" + * /local/domain/0/backend/vdispl/1/0/frontend = "/local/domain/1/device/vdispl/0" + * /local/domain/0/backend/vdispl/1/0/state = "4" + * /local/domain/0/backend/vdispl/1/0/versions = "1,2" + * + *--------------------------------- Frontend ---------------------------------- + * + * /local/domain/1/device/vdispl/0/backend-id = "0" + * /local/domain/1/device/vdispl/0/backend = "/local/domain/0/backend/vdispl/1/0" + * /local/domain/1/device/vdispl/0/state = "4" + * /local/domain/1/device/vdispl/0/version = "1" + * /local/domain/1/device/vdispl/0/be-alloc = "1" + * + *-------------------------- Connector 0 configuration ------------------------ + * + * /local/domain/1/device/vdispl/0/0/resolution = "1920x1080" + * /local/domain/1/device/vdispl/0/0/req-ring-ref = "2832" + * /local/domain/1/device/vdispl/0/0/req-event-channel = "15" + * /local/domain/1/device/vdispl/0/0/evt-ring-ref = "387" + * /local/domain/1/device/vdispl/0/0/evt-event-channel = "16" + * + *-------------------------- Connector 1 configuration ------------------------ + * + * /local/domain/1/device/vdispl/0/1/resolution = "800x600" + * /local/domain/1/device/vdispl/0/1/req-ring-ref = "2833" + * /local/domain/1/device/vdispl/0/1/req-event-channel = "17" + * /local/domain/1/device/vdispl/0/1/evt-ring-ref = "388" + * /local/domain/1/device/vdispl/0/1/evt-event-channel = "18" + * + ****************************************************************************** + * Backend XenBus Nodes + ****************************************************************************** + * + *----------------------------- Protocol version ------------------------------ + * + * versions + * Values: + * + * List of XENDISPL_LIST_SEPARATOR separated protocol versions supported + * by the backend. For example "1,2,3". + * + ****************************************************************************** + * Frontend XenBus Nodes + ****************************************************************************** + * + *-------------------------------- Addressing --------------------------------- + * + * dom-id + * Values: + * + * Domain identifier. + * + * dev-id + * Values: + * + * Device identifier. + * + * conn-idx + * Values: + * + * Zero based contigous index of the connector. + * /local/domain//device/vdispl///... + * + *----------------------------- Protocol version ------------------------------ + * + * version + * Values: + * + * Protocol version, chosen among the ones supported by the backend. + * + *------------------------- Backend buffer allocation ------------------------- + * + * be-alloc + * Values: "0", "1" + * + * If value is set to "1", then backend can be a buffer provider/allocator + * for this domain during XENDISPL_OP_DBUF_CREATE operation (see below + * for negotiation). + * If value is not "1" or omitted frontend must allocate buffers itself. + * + *----------------------------- Connector settings ---------------------------- + * + * resolution + * Values: x + * + * Width and height of the connector in pixels separated by + * XENDISPL_RESOLUTION_SEPARATOR. This defines visible area of the + * display. + * + *------------------ Connector Request Transport Parameters ------------------- + * + * This communication path is used to deliver requests from frontend to backend + * and get the corresponding responses from backend to frontend, + * set up per connector. + * + * req-event-channel + * Values: + * + * The identifier of the Xen connector's control event channel + * used to signal activity in the ring buffer. + * + * req-ring-ref + * Values: + * + * The Xen grant reference granting permission for the backend to map + * a sole page of connector's control ring buffer. + * + *------------------- Connector Event Transport Parameters -------------------- + * + * This communication path is used to deliver asynchronous events from backend + * to frontend, set up per connector. + * + * evt-event-channel + * Values: + * + * The identifier of the Xen connector's event channel + * used to signal activity in the ring buffer. + * + * evt-ring-ref + * Values: + * + * The Xen grant reference granting permission for the backend to map + * a sole page of connector's event ring buffer. + */ + +/* + ****************************************************************************** + * STATE DIAGRAMS + ****************************************************************************** + * + * Tool stack creates front and back state nodes with initial state + * XenbusStateInitialising. + * Tool stack creates and sets up frontend display configuration + * nodes per domain. + * + *-------------------------------- Normal flow -------------------------------- + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query backend device identification + * data. + * o Open and validate backend device. + * | + * | + * V + * XenbusStateInitWait + * + * o Query frontend configuration + * o Allocate and initialize + * event channels per configured + * connector. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the event channels. + * | + * | + * V + * XenbusStateConnected + * + * o Create and initialize OS + * virtual display connectors + * as per configuration. + * | + * | + * V + * XenbusStateConnected + * + * XenbusStateUnknown + * XenbusStateClosed + * XenbusStateClosing + * o Remove virtual display device + * o Remove event channels + * | + * | + * V + * XenbusStateClosed + * + *------------------------------- Recovery flow ------------------------------- + * + * In case of frontend unrecoverable errors backend handles that as + * if frontend goes into the XenbusStateClosed state. + * + * In case of backend unrecoverable errors frontend tries removing + * the virtualized device. If this is possible at the moment of error, + * then frontend goes into the XenbusStateInitialising state and is ready for + * new connection with backend. If the virtualized device is still in use and + * cannot be removed, then frontend goes into the XenbusStateReconfiguring state + * until either the virtualized device is removed or backend initiates a new + * connection. On the virtualized device removal frontend goes into the + * XenbusStateInitialising state. + * + * Note on XenbusStateReconfiguring state of the frontend: if backend has + * unrecoverable errors then frontend cannot send requests to the backend + * and thus cannot provide functionality of the virtualized device anymore. + * After backend is back to normal the virtualized device may still hold some + * state: configuration in use, allocated buffers, client application state etc. + * In most cases, this will require frontend to implement complex recovery + * reconnect logic. Instead, by going into XenbusStateReconfiguring state, + * frontend will make sure no new clients of the virtualized device are + * accepted, allow existing client(s) to exit gracefully by signaling error + * state etc. + * Once all the clients are gone frontend can reinitialize the virtualized + * device and get into XenbusStateInitialising state again signaling the + * backend that a new connection can be made. + * + * There are multiple conditions possible under which frontend will go from + * XenbusStateReconfiguring into XenbusStateInitialising, some of them are OS + * specific. For example: + * 1. The underlying OS framework may provide callbacks to signal that the last + * client of the virtualized device has gone and the device can be removed + * 2. Frontend can schedule a deferred work (timer/tasklet/workqueue) + * to periodically check if this is the right time to re-try removal of + * the virtualized device. + * 3. By any other means. + * + ****************************************************************************** + * REQUEST CODES + ****************************************************************************** + * Request codes [0; 15] are reserved and must not be used + */ + +#define XENDISPL_OP_DBUF_CREATE 0x10 +#define XENDISPL_OP_DBUF_DESTROY 0x11 +#define XENDISPL_OP_FB_ATTACH 0x12 +#define XENDISPL_OP_FB_DETACH 0x13 +#define XENDISPL_OP_SET_CONFIG 0x14 +#define XENDISPL_OP_PG_FLIP 0x15 + +/* + ****************************************************************************** + * EVENT CODES + ****************************************************************************** + */ +#define XENDISPL_EVT_PG_FLIP 0x00 + +/* + ****************************************************************************** + * XENSTORE FIELD AND PATH NAME STRINGS, HELPERS + ****************************************************************************** + */ +#define XENDISPL_DRIVER_NAME "vdispl" + +#define XENDISPL_LIST_SEPARATOR "," +#define XENDISPL_RESOLUTION_SEPARATOR "x" + +#define XENDISPL_FIELD_BE_VERSIONS "versions" +#define XENDISPL_FIELD_FE_VERSION "version" +#define XENDISPL_FIELD_REQ_RING_REF "req-ring-ref" +#define XENDISPL_FIELD_REQ_CHANNEL "req-event-channel" +#define XENDISPL_FIELD_EVT_RING_REF "evt-ring-ref" +#define XENDISPL_FIELD_EVT_CHANNEL "evt-event-channel" +#define XENDISPL_FIELD_RESOLUTION "resolution" +#define XENDISPL_FIELD_BE_ALLOC "be-alloc" + +/* + ****************************************************************************** + * STATUS RETURN CODES + ****************************************************************************** + * + * Status return code is zero on success and -XEN_EXX on failure. + * + ****************************************************************************** + * Assumptions + ****************************************************************************** + * o usage of grant reference 0 as invalid grant reference: + * grant reference 0 is valid, but never exposed to a PV driver, + * because of the fact it is already in use/reserved by the PV console. + * o all references in this document to page sizes must be treated + * as pages of size XEN_PAGE_SIZE unless otherwise noted. + * + ****************************************************************************** + * Description of the protocol between frontend and backend driver + ****************************************************************************** + * + * The two halves of a Para-virtual display driver communicate with + * each other using shared pages and event channels. + * Shared page contains a ring with request/response packets. + * + * All reserved fields in the structures below must be 0. + * Display buffers's cookie of value 0 is treated as invalid. + * Framebuffer's cookie of value 0 is treated as invalid. + * + * For all request/response/event packets that use cookies: + * dbuf_cookie - uint64_t, unique to guest domain value used by the backend + * to map remote display buffer to its local one + * fb_cookie - uint64_t, unique to guest domain value used by the backend + * to map remote framebuffer to its local one + * + *---------------------------------- Requests --------------------------------- + * + * All requests/responses, which are not connector specific, must be sent over + * control ring of the connector which has the index value of 0: + * /local/domain//device/vdispl//0/req-ring-ref + * + * All request packets have the same length (64 octets) + * All request packets have common header: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * id - uint16_t, private guest value, echoed in response + * operation - uint8_t, operation code, XENDISPL_OP_??? + * + * Request dbuf creation - request creation of a display buffer. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id |_OP_DBUF_CREATE | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | dbuf_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | dbuf_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | width | 20 + * +----------------+----------------+----------------+----------------+ + * | height | 24 + * +----------------+----------------+----------------+----------------+ + * | bpp | 28 + * +----------------+----------------+----------------+----------------+ + * | buffer_sz | 32 + * +----------------+----------------+----------------+----------------+ + * | flags | 36 + * +----------------+----------------+----------------+----------------+ + * | gref_directory | 40 + * +----------------+----------------+----------------+----------------+ + * | reserved | 44 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Must be sent over control ring of the connector which has the index + * value of 0: + * /local/domain//device/vdispl//0/req-ring-ref + * All unused bits in flags field must be set to 0. + * + * An attempt to create multiple display buffers with the same dbuf_cookie is + * an error. dbuf_cookie can be re-used after destroying the corresponding + * display buffer. + * + * Width and height of the display buffers can be smaller, equal or bigger + * than the connector's resolution. Depth/pixel format of the individual + * buffers can differ as well. + * + * width - uint32_t, width in pixels + * height - uint32_t, height in pixels + * bpp - uint32_t, bits per pixel + * buffer_sz - uint32_t, buffer size to be allocated, octets + * flags - uint32_t, flags of the operation + * o XENDISPL_DBUF_FLG_REQ_ALLOC - if set, then backend is requested + * to allocate the buffer with the parameters provided in this request. + * Page directory is handled as follows: + * Frontend on request: + * o allocates pages for the directory (gref_directory, + * gref_dir_next_page(s) + * o grants permissions for the pages of the directory to the backend + * o sets gref_dir_next_page fields + * Backend on response: + * o grants permissions for the pages of the buffer allocated to + * the frontend + * o fills in page directory with grant references + * (gref[] in struct xendispl_page_directory) + * gref_directory - grant_ref_t, a reference to the first shared page + * describing shared buffer references. At least one page exists. If shared + * buffer size (buffer_sz) exceeds what can be addressed by this single page, + * then reference to the next page must be supplied (see gref_dir_next_page + * below) + */ + +#define XENDISPL_DBUF_FLG_REQ_ALLOC (1 << 0) + +struct xendispl_dbuf_create_req { + uint64_t dbuf_cookie; + uint32_t width; + uint32_t height; + uint32_t bpp; + uint32_t buffer_sz; + uint32_t flags; + grant_ref_t gref_directory; +}; + +/* + * Shared page for XENDISPL_OP_DBUF_CREATE buffer descriptor (gref_directory in + * the request) employs a list of pages, describing all pages of the shared + * data buffer: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | gref_dir_next_page | 4 + * +----------------+----------------+----------------+----------------+ + * | gref[0] | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | gref[i] | i*4+8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | gref[N - 1] | N*4+8 + * +----------------+----------------+----------------+----------------+ + * + * gref_dir_next_page - grant_ref_t, reference to the next page describing + * page directory. Must be 0 if there are no more pages in the list. + * gref[i] - grant_ref_t, reference to a shared page of the buffer + * allocated at XENDISPL_OP_DBUF_CREATE + * + * Number of grant_ref_t entries in the whole page directory is not + * passed, but instead can be calculated as: + * num_grefs_total = (XENDISPL_OP_DBUF_CREATE.buffer_sz + XEN_PAGE_SIZE - 1) / + * XEN_PAGE_SIZE + */ + +struct xendispl_page_directory { + grant_ref_t gref_dir_next_page; + grant_ref_t gref[1]; /* Variable length */ +}; + +/* + * Request dbuf destruction - destroy a previously allocated display buffer: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id |_OP_DBUF_DESTROY| reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | dbuf_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | dbuf_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Must be sent over control ring of the connector which has the index + * value of 0: + * /local/domain//device/vdispl//0/req-ring-ref + */ + +struct xendispl_dbuf_destroy_req { + uint64_t dbuf_cookie; +}; + +/* + * Request framebuffer attachment - request attachment of a framebuffer to + * previously created display buffer. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_FB_ATTACH | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | dbuf_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | dbuf_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie low 32-bit | 20 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie high 32-bit | 24 + * +----------------+----------------+----------------+----------------+ + * | width | 28 + * +----------------+----------------+----------------+----------------+ + * | height | 32 + * +----------------+----------------+----------------+----------------+ + * | pixel_format | 36 + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Must be sent over control ring of the connector which has the index + * value of 0: + * /local/domain//device/vdispl//0/req-ring-ref + * Width and height can be smaller, equal or bigger than the connector's + * resolution. + * + * An attempt to create multiple frame buffers with the same fb_cookie is + * an error. fb_cookie can be re-used after destroying the corresponding + * frame buffer. + * + * width - uint32_t, width in pixels + * height - uint32_t, height in pixels + * pixel_format - uint32_t, pixel format of the framebuffer, FOURCC code + */ + +struct xendispl_fb_attach_req { + uint64_t dbuf_cookie; + uint64_t fb_cookie; + uint32_t width; + uint32_t height; + uint32_t pixel_format; +}; + +/* + * Request framebuffer detach - detach a previously + * attached framebuffer from the display buffer in request: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_FB_DETACH | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Must be sent over control ring of the connector which has the index + * value of 0: + * /local/domain//device/vdispl//0/req-ring-ref + */ + +struct xendispl_fb_detach_req { + uint64_t fb_cookie; +}; + +/* + * Request configuration set/reset - request to set or reset + * the configuration/mode of the display: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_SET_CONFIG | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | x | 20 + * +----------------+----------------+----------------+----------------+ + * | y | 24 + * +----------------+----------------+----------------+----------------+ + * | width | 28 + * +----------------+----------------+----------------+----------------+ + * | height | 32 + * +----------------+----------------+----------------+----------------+ + * | bpp | 40 + * +----------------+----------------+----------------+----------------+ + * | reserved | 44 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Pass all zeros to reset, otherwise command is treated as + * configuration set. + * Framebuffer's cookie defines which framebuffer/dbuf must be + * displayed while enabling display (applying configuration). + * x, y, width and height are bound by the connector's resolution and must not + * exceed it. + * + * x - uint32_t, starting position in pixels by X axis + * y - uint32_t, starting position in pixels by Y axis + * width - uint32_t, width in pixels + * height - uint32_t, height in pixels + * bpp - uint32_t, bits per pixel + */ + +struct xendispl_set_config_req { + uint64_t fb_cookie; + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; + uint32_t bpp; +}; + +/* + * Request page flip - request to flip a page identified by the framebuffer + * cookie: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_PG_FLIP | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + */ + +struct xendispl_page_flip_req { + uint64_t fb_cookie; +}; + +/* + *---------------------------------- Responses -------------------------------- + * + * All response packets have the same length (64 octets) + * + * All response packets have common header: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | status | 8 + * +----------------+----------------+----------------+----------------+ + * | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * id - uint16_t, private guest value, echoed from request + * status - int32_t, response status, zero on success and -XEN_EXX on failure + * + *----------------------------------- Events ---------------------------------- + * + * Events are sent via a shared page allocated by the front and propagated by + * evt-event-channel/evt-ring-ref XenStore entries + * All event packets have the same length (64 octets) + * All event packets have common header: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | type | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * + * id - uint16_t, event id, may be used by front + * type - uint8_t, type of the event + * + * + * Page flip complete event - event from back to front on page flip completed: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _EVT_PG_FLIP | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie low 32-bit | 12 + * +----------------+----------------+----------------+----------------+ + * | fb_cookie high 32-bit | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + */ + +struct xendispl_pg_flip_evt { + uint64_t fb_cookie; +}; + +struct xendispl_req { + uint16_t id; + uint8_t operation; + uint8_t reserved[5]; + union { + struct xendispl_dbuf_create_req dbuf_create; + struct xendispl_dbuf_destroy_req dbuf_destroy; + struct xendispl_fb_attach_req fb_attach; + struct xendispl_fb_detach_req fb_detach; + struct xendispl_set_config_req set_config; + struct xendispl_page_flip_req pg_flip; + uint8_t reserved[56]; + } op; +}; + +struct xendispl_resp { + uint16_t id; + uint8_t operation; + uint8_t reserved; + int32_t status; + uint8_t reserved1[56]; +}; + +struct xendispl_evt { + uint16_t id; + uint8_t type; + uint8_t reserved[5]; + union { + struct xendispl_pg_flip_evt pg_flip; + uint8_t reserved[56]; + } op; +}; + +DEFINE_RING_TYPES(xen_displif, struct xendispl_req, struct xendispl_resp); + +/* + ****************************************************************************** + * Back to front events delivery + ****************************************************************************** + * In order to deliver asynchronous events from back to front a shared page is + * allocated by front and its granted reference propagated to back via + * XenStore entries (evt-ring-ref/evt-event-channel). + * This page has a common header used by both front and back to synchronize + * access and control event's ring buffer, while back being a producer of the + * events and front being a consumer. The rest of the page after the header + * is used for event packets. + * + * Upon reception of an event(s) front may confirm its reception + * for either each event, group of events or none. + */ + +struct xendispl_event_page { + uint32_t in_cons; + uint32_t in_prod; + uint8_t reserved[56]; +}; + +#define XENDISPL_EVENT_PAGE_SIZE XEN_PAGE_SIZE +#define XENDISPL_IN_RING_OFFS (sizeof(struct xendispl_event_page)) +#define XENDISPL_IN_RING_SIZE (XENDISPL_EVENT_PAGE_SIZE - XENDISPL_IN_RING_OFFS) +#define XENDISPL_IN_RING_LEN (XENDISPL_IN_RING_SIZE / sizeof(struct xendispl_evt)) +#define XENDISPL_IN_RING(page) \ + ((struct xendispl_evt *)((char *)(page) + XENDISPL_IN_RING_OFFS)) +#define XENDISPL_IN_RING_REF(page, idx) \ + (XENDISPL_IN_RING((page))[(idx) % XENDISPL_IN_RING_LEN]) + +#endif /* __XEN_PUBLIC_IO_DISPLIF_H__ */ diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index 8066c7849fbee36b8d8b0544281fc33af802db96..2a9510ade70181a4c10b65bdfc834d831ef8b287 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -26,43 +26,432 @@ #ifndef __XEN_PUBLIC_IO_KBDIF_H__ #define __XEN_PUBLIC_IO_KBDIF_H__ -/* In events (backend -> frontend) */ +/* + ***************************************************************************** + * Feature and Parameter Negotiation + ***************************************************************************** + * + * The two halves of a para-virtual driver utilize nodes within + * XenStore to communicate capabilities and to negotiate operating parameters. + * This section enumerates these nodes which reside in the respective front and + * backend portions of XenStore, following XenBus convention. + * + * All data in XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *---------------------------- Features supported ---------------------------- + * + * Capable backend advertises supported features by publishing + * corresponding entries in XenStore and puts 1 as the value of the entry. + * If a feature is not supported then 0 must be set or feature entry omitted. + * + * feature-abs-pointer + * Values: + * + * Backends, which support reporting of absolute coordinates for pointer + * device should set this to 1. + * + * feature-multi-touch + * Values: + * + * Backends, which support reporting of multi-touch events + * should set this to 1. + * + *------------------------- Pointer Device Parameters ------------------------ + * + * width + * Values: + * + * Maximum X coordinate (width) to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * height + * Values: + * + * Maximum Y coordinate (height) to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *------------------------------ Feature request ----------------------------- + * + * Capable frontend requests features from backend via setting corresponding + * entries to 1 in XenStore. Requests for features not advertised as supported + * by the backend have no effect. + * + * request-abs-pointer + * Values: + * + * Request backend to report absolute pointer coordinates + * (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION). + * + * request-multi-touch + * Values: + * + * Request backend to report multi-touch events. + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * page-gref + * Values: + * + * The Xen grant reference granting permission for the backend to map + * a sole page in a single page sized event ring buffer. + * + * page-ref + * Values: + * + * OBSOLETE, not recommended for use. + * PFN of the shared page. + * + *----------------------- Multi-touch Device Parameters ----------------------- + * + * multi-touch-num-contacts + * Values: + * + * Number of simultaneous touches reported. + * + * multi-touch-width + * Values: + * + * Width of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * multi-touch-height + * Values: + * + * Height of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + */ /* - * Frontends should ignore unknown in events. + * EVENT CODES. */ -/* Pointer movement event */ -#define XENKBD_TYPE_MOTION 1 -/* Event type 2 currently not used */ -/* Key event (includes pointer buttons) */ -#define XENKBD_TYPE_KEY 3 +#define XENKBD_TYPE_MOTION 1 +#define XENKBD_TYPE_RESERVED 2 +#define XENKBD_TYPE_KEY 3 +#define XENKBD_TYPE_POS 4 +#define XENKBD_TYPE_MTOUCH 5 + +/* Multi-touch event sub-codes */ + +#define XENKBD_MT_EV_DOWN 0 +#define XENKBD_MT_EV_UP 1 +#define XENKBD_MT_EV_MOTION 2 +#define XENKBD_MT_EV_SYN 3 +#define XENKBD_MT_EV_SHAPE 4 +#define XENKBD_MT_EV_ORIENT 5 + +/* + * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS. + */ + +#define XENKBD_DRIVER_NAME "vkbd" + +#define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer" +#define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch" +#define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer" +#define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch" +#define XENKBD_FIELD_RING_GREF "page-gref" +#define XENKBD_FIELD_EVT_CHANNEL "event-channel" +#define XENKBD_FIELD_WIDTH "width" +#define XENKBD_FIELD_HEIGHT "height" +#define XENKBD_FIELD_MT_WIDTH "multi-touch-width" +#define XENKBD_FIELD_MT_HEIGHT "multi-touch-height" +#define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts" + +/* OBSOLETE, not recommended for use */ +#define XENKBD_FIELD_RING_REF "page-ref" + /* - * Pointer position event - * Capable backend sets feature-abs-pointer in xenstore. - * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting - * request-abs-update in xenstore. + ***************************************************************************** + * Description of the protocol between frontend and backend driver. + ***************************************************************************** + * + * The two halves of a Para-virtual driver communicate with + * each other using a shared page and an event channel. + * Shared page contains a ring with event structures. + * + * All reserved fields in the structures below must be 0. + * + ***************************************************************************** + * Backend to frontend events + ***************************************************************************** + * + * Frontends should ignore unknown in events. + * All event packets have the same length (40 octets) + * All event packets have common header: + * + * 0 octet + * +-----------------+ + * | type | + * +-----------------+ + * type - uint8_t, event code, XENKBD_TYPE_??? + * + * + * Pointer relative movement event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MOTION | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | rel_x | 8 + * +----------------+----------------+----------------+----------------+ + * | rel_y | 12 + * +----------------+----------------+----------------+----------------+ + * | rel_z | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * rel_x - int32_t, relative X motion + * rel_y - int32_t, relative Y motion + * rel_z - int32_t, relative Z motion (wheel) */ -#define XENKBD_TYPE_POS 4 struct xenkbd_motion { - uint8_t type; /* XENKBD_TYPE_MOTION */ - int32_t rel_x; /* relative X motion */ - int32_t rel_y; /* relative Y motion */ - int32_t rel_z; /* relative Z motion (wheel) */ + uint8_t type; + int32_t rel_x; + int32_t rel_y; + int32_t rel_z; }; +/* + * Key event (includes pointer buttons) + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_KEY | pressed | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | keycode | 8 + * +----------------+----------------+----------------+----------------+ + * | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * pressed - uint8_t, 1 if pressed; 0 otherwise + * keycode - uint32_t, KEY_* from linux/input.h + */ + struct xenkbd_key { - uint8_t type; /* XENKBD_TYPE_KEY */ - uint8_t pressed; /* 1 if pressed; 0 otherwise */ - uint32_t keycode; /* KEY_* from linux/input.h */ + uint8_t type; + uint8_t pressed; + uint32_t keycode; }; +/* + * Pointer absolute position event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_POS | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 12 + * +----------------+----------------+----------------+----------------+ + * | rel_z | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position (in FB pixels) + * abs_y - int32_t, absolute Y position (in FB pixels) + * rel_z - int32_t, relative Z motion (wheel) + */ + struct xenkbd_position { - uint8_t type; /* XENKBD_TYPE_POS */ - int32_t abs_x; /* absolute X position (in FB pixels) */ - int32_t abs_y; /* absolute Y position (in FB pixels) */ - int32_t rel_z; /* relative Z motion (wheel) */ + uint8_t type; + int32_t abs_x; + int32_t abs_y; + int32_t rel_z; +}; + +/* + * Multi-touch event and its sub-types + * + * All multi-touch event packets have common header: + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | event_type | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * + * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_??? + * contact_id - unt8_t, ID of the contact + * + * Touch interactions can consist of one or more contacts. + * For each contact, a series of events is generated, starting + * with a down event, followed by zero or more motion events, + * and ending with an up event. Events relating to the same + * contact point can be identified by the ID of the sequence: contact ID. + * Contact ID may be reused after XENKBD_MT_EV_UP event and + * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range. + * + * For further information please refer to documentation on Wayland [1], + * Linux [2] and Windows [3] multi-touch support. + * + * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml + * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt + * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx + * + * + * Multi-touch down event - sent when a new touch is made: touch is assigned + * a unique contact ID, sent with this and consequent events related + * to this touch. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_DOWN | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 12 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position, in pixels + * abs_y - int32_t, absolute Y position, in pixels + * + * Multi-touch contact release event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_UP | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * Multi-touch motion event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_MOTION | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 12 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position, in pixels, + * abs_y - int32_t, absolute Y position, in pixels, + * + * Multi-touch input synchronization event - shows end of a set of events + * which logically belong together. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_SYN | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * Multi-touch shape event - touch point's shape has changed its shape. + * Shape is approximated by an ellipse through the major and minor axis + * lengths: major is the longer diameter of the ellipse and minor is the + * shorter one. Center of the ellipse is reported via + * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_SHAPE | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | major | 12 + * +----------------+----------------+----------------+----------------+ + * | minor | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * major - unt32_t, length of the major axis, pixels + * minor - unt32_t, length of the minor axis, pixels + * + * Multi-touch orientation event - touch point's shape has changed + * its orientation: calculated as a clockwise angle between the major axis + * of the ellipse and positive Y axis in degrees, [-180; +180]. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_ORIENT | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | orientation | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * orientation - int16_t, clockwise angle of the major axis + */ + +struct xenkbd_mtouch { + uint8_t type; /* XENKBD_TYPE_MTOUCH */ + uint8_t event_type; /* XENKBD_MT_EV_??? */ + uint8_t contact_id; + uint8_t reserved[5]; /* reserved for the future use */ + union { + struct { + int32_t abs_x; /* absolute X position, pixels */ + int32_t abs_y; /* absolute Y position, pixels */ + } pos; + struct { + uint32_t major; /* length of the major axis, pixels */ + uint32_t minor; /* length of the minor axis, pixels */ + } shape; + int16_t orientation; /* clockwise angle of the major axis */ + } u; }; #define XENKBD_IN_EVENT_SIZE 40 @@ -72,15 +461,26 @@ union xenkbd_in_event { struct xenkbd_motion motion; struct xenkbd_key key; struct xenkbd_position pos; + struct xenkbd_mtouch mtouch; char pad[XENKBD_IN_EVENT_SIZE]; }; -/* Out events (frontend -> backend) */ - /* + ***************************************************************************** + * Frontend to backend events + ***************************************************************************** + * * Out events may be sent only when requested by backend, and receipt * of an unknown out event is an error. * No out events currently defined. + + * All event packets have the same length (40 octets) + * All event packets have common header: + * 0 octet + * +-----------------+ + * | type | + * +-----------------+ + * type - uint8_t, event code */ #define XENKBD_OUT_EVENT_SIZE 40 @@ -90,7 +490,11 @@ union xenkbd_out_event { char pad[XENKBD_OUT_EVENT_SIZE]; }; -/* shared page */ +/* + ***************************************************************************** + * Shared page + ***************************************************************************** + */ #define XENKBD_IN_RING_SIZE 2048 #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) @@ -113,4 +517,4 @@ struct xenkbd_page { uint32_t out_cons, out_prod; }; -#endif +#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 21f4fbd55e48edf03061b92d687a23cbe34a8989..c794568555391518466d8d7d11a08a4b38ef3eaf 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -283,4 +283,147 @@ struct __name##_back_ring { \ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) + +/* + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and + * functions to check if there is data on the ring, and to read and + * write to them. + * + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but + * does not define the indexes page. As different protocols can have + * extensions to the basic format, this macro allow them to define their + * own struct. + * + * XEN_FLEX_RING_SIZE + * Convenience macro to calculate the size of one of the two rings + * from the overall order. + * + * $NAME_mask + * Function to apply the size mask to an index, to reduce the index + * within the range [0-size]. + * + * $NAME_read_packet + * Function to read data from the ring. The amount of data to read is + * specified by the "size" argument. + * + * $NAME_write_packet + * Function to write data to the ring. The amount of data to write is + * specified by the "size" argument. + * + * $NAME_get_ring_ptr + * Convenience function that returns a pointer to read/write to the + * ring at the right location. + * + * $NAME_data_intf + * Indexes page, shared between frontend and backend. It also + * contains the array of grant refs. + * + * $NAME_queued + * Function to calculate how many bytes are currently on the ring, + * ready to be read. It can also be used to calculate how much free + * space is currently on the ring (XEN_FLEX_RING_SIZE() - + * $NAME_queued()). + */ + +#ifndef XEN_PAGE_SHIFT +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always + * 4K, regardless of the architecture, and page granularity chosen by + * operating systems. + */ +#define XEN_PAGE_SHIFT 12 +#endif +#define XEN_FLEX_RING_SIZE(order) \ + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) + +#define DEFINE_XEN_FLEX_RING(name) \ +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \ +{ \ + return idx & (ring_size - 1); \ +} \ + \ +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \ + RING_IDX idx, \ + RING_IDX ring_size) \ +{ \ + return buf + name##_mask(idx, ring_size); \ +} \ + \ +static inline void name##_read_packet(void *opaque, \ + const unsigned char *buf, \ + size_t size, \ + RING_IDX masked_prod, \ + RING_IDX *masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_cons < masked_prod || \ + size <= ring_size - *masked_cons) { \ + memcpy(opaque, buf + *masked_cons, size); \ + } else { \ + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \ + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \ + size - (ring_size - *masked_cons)); \ + } \ + *masked_cons = name##_mask(*masked_cons + size, ring_size); \ +} \ + \ +static inline void name##_write_packet(unsigned char *buf, \ + const void *opaque, \ + size_t size, \ + RING_IDX *masked_prod, \ + RING_IDX masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_prod < masked_cons || \ + size <= ring_size - *masked_prod) { \ + memcpy(buf + *masked_prod, opaque, size); \ + } else { \ + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \ + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \ + size - (ring_size - *masked_prod)); \ + } \ + *masked_prod = name##_mask(*masked_prod + size, ring_size); \ +} \ + \ +static inline RING_IDX name##_queued(RING_IDX prod, \ + RING_IDX cons, \ + RING_IDX ring_size) \ +{ \ + RING_IDX size; \ + \ + if (prod == cons) \ + return 0; \ + \ + prod = name##_mask(prod, ring_size); \ + cons = name##_mask(cons, ring_size); \ + \ + if (prod == cons) \ + return ring_size; \ + \ + if (prod > cons) \ + size = prod - cons; \ + else \ + size = ring_size - (cons - prod); \ + return size; \ +} \ + \ +struct name##_data { \ + unsigned char *in; /* half of the allocation */ \ + unsigned char *out; /* half of the allocation */ \ +} + +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \ +struct name##_data_intf { \ + RING_IDX in_cons, in_prod; \ + \ + uint8_t pad1[56]; \ + \ + RING_IDX out_cons, out_prod; \ + \ + uint8_t pad2[56]; \ + \ + RING_IDX ring_order; \ + grant_ref_t ref[]; \ +}; \ +DEFINE_XEN_FLEX_RING(name) + #endif /* __XEN_PUBLIC_IO_RING_H__ */ diff --git a/include/xen/interface/io/sndif.h b/include/xen/interface/io/sndif.h new file mode 100644 index 0000000000000000000000000000000000000000..5c918276835eb0729c5c5e9ef488e94a5c8afc3e --- /dev/null +++ b/include/xen/interface/io/sndif.h @@ -0,0 +1,793 @@ +/****************************************************************************** + * sndif.h + * + * Unified sound-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2013-2015 GlobalLogic Inc. + * Copyright (C) 2016-2017 EPAM Systems Inc. + * + * Authors: Oleksandr Andrushchenko + * Oleksandr Grytsov + * Oleksandr Dmytryshyn + * Iurii Konovalenko + */ + +#ifndef __XEN_PUBLIC_IO_SNDIF_H__ +#define __XEN_PUBLIC_IO_SNDIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + ****************************************************************************** + * Feature and Parameter Negotiation + ****************************************************************************** + * + * Front->back notifications: when enqueuing a new request, sending a + * notification can be made conditional on xensnd_req (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * xensnd_req appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: when enqueuing a new response, sending a + * notification can be made conditional on xensnd_resp (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * xensnd_resp appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + * + * The two halves of a para-virtual sound card driver utilize nodes within + * XenStore to communicate capabilities and to negotiate operating parameters. + * This section enumerates these nodes which reside in the respective front and + * backend portions of XenStore, following the XenBus convention. + * + * All data in XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + ****************************************************************************** + * Example configuration + ****************************************************************************** + * + * Note: depending on the use-case backend can expose more sound cards and + * PCM devices/streams than the underlying HW physically has by employing + * SW mixers, configuring virtual sound streams, channels etc. + * + * This is an example of backend and frontend configuration: + * + *--------------------------------- Backend ----------------------------------- + * + * /local/domain/0/backend/vsnd/1/0/frontend-id = "1" + * /local/domain/0/backend/vsnd/1/0/frontend = "/local/domain/1/device/vsnd/0" + * /local/domain/0/backend/vsnd/1/0/state = "4" + * /local/domain/0/backend/vsnd/1/0/versions = "1,2" + * + *--------------------------------- Frontend ---------------------------------- + * + * /local/domain/1/device/vsnd/0/backend-id = "0" + * /local/domain/1/device/vsnd/0/backend = "/local/domain/0/backend/vsnd/1/0" + * /local/domain/1/device/vsnd/0/state = "4" + * /local/domain/1/device/vsnd/0/version = "1" + * + *----------------------------- Card configuration ---------------------------- + * + * /local/domain/1/device/vsnd/0/short-name = "Card short name" + * /local/domain/1/device/vsnd/0/long-name = "Card long name" + * /local/domain/1/device/vsnd/0/sample-rates = "8000,32000,44100,48000,96000" + * /local/domain/1/device/vsnd/0/sample-formats = "s8,u8,s16_le,s16_be" + * /local/domain/1/device/vsnd/0/buffer-size = "262144" + * + *------------------------------- PCM device 0 -------------------------------- + * + * /local/domain/1/device/vsnd/0/0/name = "General analog" + * /local/domain/1/device/vsnd/0/0/channels-max = "5" + * + *----------------------------- Stream 0, playback ---------------------------- + * + * /local/domain/1/device/vsnd/0/0/0/type = "p" + * /local/domain/1/device/vsnd/0/0/0/sample-formats = "s8,u8" + * /local/domain/1/device/vsnd/0/0/0/unique-id = "0" + * + * /local/domain/1/device/vsnd/0/0/0/ring-ref = "386" + * /local/domain/1/device/vsnd/0/0/0/event-channel = "15" + * + *------------------------------ Stream 1, capture ---------------------------- + * + * /local/domain/1/device/vsnd/0/0/1/type = "c" + * /local/domain/1/device/vsnd/0/0/1/channels-max = "2" + * /local/domain/1/device/vsnd/0/0/1/unique-id = "1" + * + * /local/domain/1/device/vsnd/0/0/1/ring-ref = "384" + * /local/domain/1/device/vsnd/0/0/1/event-channel = "13" + * + *------------------------------- PCM device 1 -------------------------------- + * + * /local/domain/1/device/vsnd/0/1/name = "HDMI-0" + * /local/domain/1/device/vsnd/0/1/sample-rates = "8000,32000,44100" + * + *------------------------------ Stream 0, capture ---------------------------- + * + * /local/domain/1/device/vsnd/0/1/0/type = "c" + * /local/domain/1/device/vsnd/0/1/0/unique-id = "2" + * + * /local/domain/1/device/vsnd/0/1/0/ring-ref = "387" + * /local/domain/1/device/vsnd/0/1/0/event-channel = "151" + * + *------------------------------- PCM device 2 -------------------------------- + * + * /local/domain/1/device/vsnd/0/2/name = "SPDIF" + * + *----------------------------- Stream 0, playback ---------------------------- + * + * /local/domain/1/device/vsnd/0/2/0/type = "p" + * /local/domain/1/device/vsnd/0/2/0/unique-id = "3" + * + * /local/domain/1/device/vsnd/0/2/0/ring-ref = "389" + * /local/domain/1/device/vsnd/0/2/0/event-channel = "152" + * + ****************************************************************************** + * Backend XenBus Nodes + ****************************************************************************** + * + *----------------------------- Protocol version ------------------------------ + * + * versions + * Values: + * + * List of XENSND_LIST_SEPARATOR separated protocol versions supported + * by the backend. For example "1,2,3". + * + ****************************************************************************** + * Frontend XenBus Nodes + ****************************************************************************** + * + *-------------------------------- Addressing --------------------------------- + * + * dom-id + * Values: + * + * Domain identifier. + * + * dev-id + * Values: + * + * Device identifier. + * + * pcm-dev-idx + * Values: + * + * Zero based contigous index of the PCM device. + * + * stream-idx + * Values: + * + * Zero based contigous index of the stream of the PCM device. + * + * The following pattern is used for addressing: + * /local/domain//device/vsnd////... + * + *----------------------------- Protocol version ------------------------------ + * + * version + * Values: + * + * Protocol version, chosen among the ones supported by the backend. + * + *------------------------------- PCM settings -------------------------------- + * + * Every virtualized sound frontend has a set of PCM devices and streams, each + * could be individually configured. Part of the PCM configuration can be + * defined at higher level of the hierarchy and be fully or partially re-used + * by the underlying layers. These configuration values are: + * o number of channels (min/max) + * o supported sample rates + * o supported sample formats. + * E.g. one can define these values for the whole card, device or stream. + * Every underlying layer in turn can re-define some or all of them to better + * fit its needs. For example, card may define number of channels to be + * in [1; 8] range, and some particular stream may be limited to [1; 2] only. + * The rule is that the underlying layer must be a subset of the upper layer + * range. + * + * channels-min + * Values: + * + * The minimum amount of channels that is supported, [1; channels-max]. + * Optional, if not set or omitted a value of 1 is used. + * + * channels-max + * Values: + * + * The maximum amount of channels that is supported. + * Must be at least . + * + * sample-rates + * Values: + * + * List of supported sample rates separated by XENSND_LIST_SEPARATOR. + * Sample rates are expressed as a list of decimal values w/o any + * ordering requirement. + * + * sample-formats + * Values: + * + * List of supported sample formats separated by XENSND_LIST_SEPARATOR. + * Items must not exceed XENSND_SAMPLE_FORMAT_MAX_LEN length. + * + * buffer-size + * Values: + * + * The maximum size in octets of the buffer to allocate per stream. + * + *----------------------- Virtual sound card settings ------------------------- + * short-name + * Values: + * + * Short name of the virtual sound card. Optional. + * + * long-name + * Values: + * + * Long name of the virtual sound card. Optional. + * + *----------------------------- Device settings ------------------------------- + * name + * Values: + * + * Name of the sound device within the virtual sound card. Optional. + * + *----------------------------- Stream settings ------------------------------- + * + * type + * Values: "p", "c" + * + * Stream type: "p" - playback stream, "c" - capture stream + * + * If both capture and playback are needed then two streams need to be + * defined under the same device. + * + * unique-id + * Values: + * + * After stream initialization it is assigned a unique ID (within the front + * driver), so every stream of the frontend can be identified by the + * backend by this ID. This is not equal to stream-idx as the later is + * zero based within the device, but this index is contigous within the + * driver. + * + *-------------------- Stream Request Transport Parameters -------------------- + * + * event-channel + * Values: + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: + * + * The Xen grant reference granting permission for the backend to map + * a sole page in a single page sized ring buffer. + * + ****************************************************************************** + * STATE DIAGRAMS + ****************************************************************************** + * + * Tool stack creates front and back state nodes with initial state + * XenbusStateInitialising. + * Tool stack creates and sets up frontend sound configuration nodes per domain. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query backend device identification + * data. + * o Open and validate backend device. + * | + * | + * V + * XenbusStateInitWait + * + * o Query frontend configuration + * o Allocate and initialize + * event channels per configured + * playback/capture stream. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the event channels. + * | + * | + * V + * XenbusStateConnected + * + * o Create and initialize OS + * virtual sound device instances + * as per configuration. + * | + * | + * V + * XenbusStateConnected + * + * XenbusStateUnknown + * XenbusStateClosed + * XenbusStateClosing + * o Remove virtual sound device + * o Remove event channels + * | + * | + * V + * XenbusStateClosed + * + *------------------------------- Recovery flow ------------------------------- + * + * In case of frontend unrecoverable errors backend handles that as + * if frontend goes into the XenbusStateClosed state. + * + * In case of backend unrecoverable errors frontend tries removing + * the virtualized device. If this is possible at the moment of error, + * then frontend goes into the XenbusStateInitialising state and is ready for + * new connection with backend. If the virtualized device is still in use and + * cannot be removed, then frontend goes into the XenbusStateReconfiguring state + * until either the virtualized device removed or backend initiates a new + * connection. On the virtualized device removal frontend goes into the + * XenbusStateInitialising state. + * + * Note on XenbusStateReconfiguring state of the frontend: if backend has + * unrecoverable errors then frontend cannot send requests to the backend + * and thus cannot provide functionality of the virtualized device anymore. + * After backend is back to normal the virtualized device may still hold some + * state: configuration in use, allocated buffers, client application state etc. + * So, in most cases, this will require frontend to implement complex recovery + * reconnect logic. Instead, by going into XenbusStateReconfiguring state, + * frontend will make sure no new clients of the virtualized device are + * accepted, allow existing client(s) to exit gracefully by signaling error + * state etc. + * Once all the clients are gone frontend can reinitialize the virtualized + * device and get into XenbusStateInitialising state again signaling the + * backend that a new connection can be made. + * + * There are multiple conditions possible under which frontend will go from + * XenbusStateReconfiguring into XenbusStateInitialising, some of them are OS + * specific. For example: + * 1. The underlying OS framework may provide callbacks to signal that the last + * client of the virtualized device has gone and the device can be removed + * 2. Frontend can schedule a deferred work (timer/tasklet/workqueue) + * to periodically check if this is the right time to re-try removal of + * the virtualized device. + * 3. By any other means. + * + ****************************************************************************** + * PCM FORMATS + ****************************************************************************** + * + * XENSND_PCM_FORMAT_[_] + * + * format: or + * S - signed, U - unsigned, F - float + * bits - 8, 16, 24, 32 + * name - MU_LAW, GSM, etc. + * + * endian: , may be absent + * LE - Little endian, BE - Big endian + */ +#define XENSND_PCM_FORMAT_S8 0 +#define XENSND_PCM_FORMAT_U8 1 +#define XENSND_PCM_FORMAT_S16_LE 2 +#define XENSND_PCM_FORMAT_S16_BE 3 +#define XENSND_PCM_FORMAT_U16_LE 4 +#define XENSND_PCM_FORMAT_U16_BE 5 +#define XENSND_PCM_FORMAT_S24_LE 6 +#define XENSND_PCM_FORMAT_S24_BE 7 +#define XENSND_PCM_FORMAT_U24_LE 8 +#define XENSND_PCM_FORMAT_U24_BE 9 +#define XENSND_PCM_FORMAT_S32_LE 10 +#define XENSND_PCM_FORMAT_S32_BE 11 +#define XENSND_PCM_FORMAT_U32_LE 12 +#define XENSND_PCM_FORMAT_U32_BE 13 +#define XENSND_PCM_FORMAT_F32_LE 14 /* 4-byte float, IEEE-754 32-bit, */ +#define XENSND_PCM_FORMAT_F32_BE 15 /* range -1.0 to 1.0 */ +#define XENSND_PCM_FORMAT_F64_LE 16 /* 8-byte float, IEEE-754 64-bit, */ +#define XENSND_PCM_FORMAT_F64_BE 17 /* range -1.0 to 1.0 */ +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE 18 +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE 19 +#define XENSND_PCM_FORMAT_MU_LAW 20 +#define XENSND_PCM_FORMAT_A_LAW 21 +#define XENSND_PCM_FORMAT_IMA_ADPCM 22 +#define XENSND_PCM_FORMAT_MPEG 23 +#define XENSND_PCM_FORMAT_GSM 24 + +/* + ****************************************************************************** + * REQUEST CODES + ****************************************************************************** + */ +#define XENSND_OP_OPEN 0 +#define XENSND_OP_CLOSE 1 +#define XENSND_OP_READ 2 +#define XENSND_OP_WRITE 3 +#define XENSND_OP_SET_VOLUME 4 +#define XENSND_OP_GET_VOLUME 5 +#define XENSND_OP_MUTE 6 +#define XENSND_OP_UNMUTE 7 + +/* + ****************************************************************************** + * XENSTORE FIELD AND PATH NAME STRINGS, HELPERS + ****************************************************************************** + */ +#define XENSND_DRIVER_NAME "vsnd" + +#define XENSND_LIST_SEPARATOR "," +/* Field names */ +#define XENSND_FIELD_BE_VERSIONS "versions" +#define XENSND_FIELD_FE_VERSION "version" +#define XENSND_FIELD_VCARD_SHORT_NAME "short-name" +#define XENSND_FIELD_VCARD_LONG_NAME "long-name" +#define XENSND_FIELD_RING_REF "ring-ref" +#define XENSND_FIELD_EVT_CHNL "event-channel" +#define XENSND_FIELD_DEVICE_NAME "name" +#define XENSND_FIELD_TYPE "type" +#define XENSND_FIELD_STREAM_UNIQUE_ID "unique-id" +#define XENSND_FIELD_CHANNELS_MIN "channels-min" +#define XENSND_FIELD_CHANNELS_MAX "channels-max" +#define XENSND_FIELD_SAMPLE_RATES "sample-rates" +#define XENSND_FIELD_SAMPLE_FORMATS "sample-formats" +#define XENSND_FIELD_BUFFER_SIZE "buffer-size" + +/* Stream type field values. */ +#define XENSND_STREAM_TYPE_PLAYBACK "p" +#define XENSND_STREAM_TYPE_CAPTURE "c" +/* Sample rate max string length */ +#define XENSND_SAMPLE_RATE_MAX_LEN 11 +/* Sample format field values */ +#define XENSND_SAMPLE_FORMAT_MAX_LEN 24 + +#define XENSND_PCM_FORMAT_S8_STR "s8" +#define XENSND_PCM_FORMAT_U8_STR "u8" +#define XENSND_PCM_FORMAT_S16_LE_STR "s16_le" +#define XENSND_PCM_FORMAT_S16_BE_STR "s16_be" +#define XENSND_PCM_FORMAT_U16_LE_STR "u16_le" +#define XENSND_PCM_FORMAT_U16_BE_STR "u16_be" +#define XENSND_PCM_FORMAT_S24_LE_STR "s24_le" +#define XENSND_PCM_FORMAT_S24_BE_STR "s24_be" +#define XENSND_PCM_FORMAT_U24_LE_STR "u24_le" +#define XENSND_PCM_FORMAT_U24_BE_STR "u24_be" +#define XENSND_PCM_FORMAT_S32_LE_STR "s32_le" +#define XENSND_PCM_FORMAT_S32_BE_STR "s32_be" +#define XENSND_PCM_FORMAT_U32_LE_STR "u32_le" +#define XENSND_PCM_FORMAT_U32_BE_STR "u32_be" +#define XENSND_PCM_FORMAT_F32_LE_STR "float_le" +#define XENSND_PCM_FORMAT_F32_BE_STR "float_be" +#define XENSND_PCM_FORMAT_F64_LE_STR "float64_le" +#define XENSND_PCM_FORMAT_F64_BE_STR "float64_be" +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE_STR "iec958_subframe_le" +#define XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE_STR "iec958_subframe_be" +#define XENSND_PCM_FORMAT_MU_LAW_STR "mu_law" +#define XENSND_PCM_FORMAT_A_LAW_STR "a_law" +#define XENSND_PCM_FORMAT_IMA_ADPCM_STR "ima_adpcm" +#define XENSND_PCM_FORMAT_MPEG_STR "mpeg" +#define XENSND_PCM_FORMAT_GSM_STR "gsm" + + +/* + ****************************************************************************** + * STATUS RETURN CODES + ****************************************************************************** + * + * Status return code is zero on success and -XEN_EXX on failure. + * + ****************************************************************************** + * Assumptions + ****************************************************************************** + * o usage of grant reference 0 as invalid grant reference: + * grant reference 0 is valid, but never exposed to a PV driver, + * because of the fact it is already in use/reserved by the PV console. + * o all references in this document to page sizes must be treated + * as pages of size XEN_PAGE_SIZE unless otherwise noted. + * + ****************************************************************************** + * Description of the protocol between frontend and backend driver + ****************************************************************************** + * + * The two halves of a Para-virtual sound driver communicate with + * each other using shared pages and event channels. + * Shared page contains a ring with request/response packets. + * + * Packets, used for input/output operations, e.g. read/write, set/get volume, + * etc., provide offset/length fields in order to allow asynchronous protocol + * operation with buffer space sharing: part of the buffer allocated at + * XENSND_OP_OPEN can be used for audio samples and part, for example, + * for volume control. + * + * All reserved fields in the structures below must be 0. + * + *---------------------------------- Requests --------------------------------- + * + * All request packets have the same length (32 octets) + * All request packets have common header: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * id - uint16_t, private guest value, echoed in response + * operation - uint8_t, operation code, XENSND_OP_??? + * + * For all packets which use offset and length: + * offset - uint32_t, read or write data offset within the shared buffer, + * passed with XENSND_OP_OPEN request, octets, + * [0; XENSND_OP_OPEN.buffer_sz - 1]. + * length - uint32_t, read or write data length, octets + * + * Request open - open a PCM stream for playback or capture: + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | XENSND_OP_OPEN | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | pcm_rate | 12 + * +----------------+----------------+----------------+----------------+ + * | pcm_format | pcm_channels | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * | buffer_sz | 20 + * +----------------+----------------+----------------+----------------+ + * | gref_directory | 24 + * +----------------+----------------+----------------+----------------+ + * | reserved | 28 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 + * +----------------+----------------+----------------+----------------+ + * + * pcm_rate - uint32_t, stream data rate, Hz + * pcm_format - uint8_t, XENSND_PCM_FORMAT_XXX value + * pcm_channels - uint8_t, number of channels of this stream, + * [channels-min; channels-max] + * buffer_sz - uint32_t, buffer size to be allocated, octets + * gref_directory - grant_ref_t, a reference to the first shared page + * describing shared buffer references. At least one page exists. If shared + * buffer size (buffer_sz) exceeds what can be addressed by this single page, + * then reference to the next page must be supplied (see gref_dir_next_page + * below) + */ + +struct xensnd_open_req { + uint32_t pcm_rate; + uint8_t pcm_format; + uint8_t pcm_channels; + uint16_t reserved; + uint32_t buffer_sz; + grant_ref_t gref_directory; +}; + +/* + * Shared page for XENSND_OP_OPEN buffer descriptor (gref_directory in the + * request) employs a list of pages, describing all pages of the shared data + * buffer: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | gref_dir_next_page | 4 + * +----------------+----------------+----------------+----------------+ + * | gref[0] | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | gref[i] | i*4+8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | gref[N - 1] | N*4+8 + * +----------------+----------------+----------------+----------------+ + * + * gref_dir_next_page - grant_ref_t, reference to the next page describing + * page directory. Must be 0 if there are no more pages in the list. + * gref[i] - grant_ref_t, reference to a shared page of the buffer + * allocated at XENSND_OP_OPEN + * + * Number of grant_ref_t entries in the whole page directory is not + * passed, but instead can be calculated as: + * num_grefs_total = (XENSND_OP_OPEN.buffer_sz + XEN_PAGE_SIZE - 1) / + * XEN_PAGE_SIZE + */ + +struct xensnd_page_directory { + grant_ref_t gref_dir_next_page; + grant_ref_t gref[1]; /* Variable length */ +}; + +/* + * Request close - close an opened pcm stream: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | XENSND_OP_CLOSE| reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 + * +----------------+----------------+----------------+----------------+ + * + * Request read/write - used for read (for capture) or write (for playback): + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | offset | 12 + * +----------------+----------------+----------------+----------------+ + * | length | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 + * +----------------+----------------+----------------+----------------+ + * + * operation - XENSND_OP_READ for read or XENSND_OP_WRITE for write + */ + +struct xensnd_rw_req { + uint32_t offset; + uint32_t length; +}; + +/* + * Request set/get volume - set/get channels' volume of the stream given: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | offset | 12 + * +----------------+----------------+----------------+----------------+ + * | length | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 + * +----------------+----------------+----------------+----------------+ + * + * operation - XENSND_OP_SET_VOLUME for volume set + * or XENSND_OP_GET_VOLUME for volume get + * Buffer passed with XENSND_OP_OPEN is used to exchange volume + * values: + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | channel[0] | 4 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | channel[i] | i*4 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | channel[N - 1] | (N-1)*4 + * +----------------+----------------+----------------+----------------+ + * + * N = XENSND_OP_OPEN.pcm_channels + * i - uint8_t, index of a channel + * channel[i] - sint32_t, volume of i-th channel + * Volume is expressed as a signed value in steps of 0.001 dB, + * while 0 being 0 dB. + * + * Request mute/unmute - mute/unmute stream: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | offset | 12 + * +----------------+----------------+----------------+----------------+ + * | length | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 + * +----------------+----------------+----------------+----------------+ + * + * operation - XENSND_OP_MUTE for mute or XENSND_OP_UNMUTE for unmute + * Buffer passed with XENSND_OP_OPEN is used to exchange mute/unmute + * values: + * + * 0 octet + * +----------------+----------------+----------------+----------------+ + * | channel[0] | 4 + * +----------------+----------------+----------------+----------------+ + * +/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | channel[i] | i*4 + * +----------------+----------------+----------------+----------------+ + * +/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | channel[N - 1] | (N-1)*4 + * +----------------+----------------+----------------+----------------+ + * + * N = XENSND_OP_OPEN.pcm_channels + * i - uint8_t, index of a channel + * channel[i] - uint8_t, non-zero if i-th channel needs to be muted/unmuted + * + *------------------------------------ N.B. ----------------------------------- + * + * The 'struct xensnd_rw_req' is also used for XENSND_OP_SET_VOLUME, + * XENSND_OP_GET_VOLUME, XENSND_OP_MUTE, XENSND_OP_UNMUTE. + */ + +/* + *---------------------------------- Responses -------------------------------- + * + * All response packets have the same length (32 octets) + * + * Response for all requests: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | status | 8 + * +----------------+----------------+----------------+----------------+ + * | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 32 + * +----------------+----------------+----------------+----------------+ + * + * id - uint16_t, copied from the request + * operation - uint8_t, XENSND_OP_* - copied from request + * status - int32_t, response status, zero on success and -XEN_EXX on failure + */ + +struct xensnd_req { + uint16_t id; + uint8_t operation; + uint8_t reserved[5]; + union { + struct xensnd_open_req open; + struct xensnd_rw_req rw; + uint8_t reserved[24]; + } op; +}; + +struct xensnd_resp { + uint16_t id; + uint8_t operation; + uint8_t reserved; + int32_t status; + uint8_t reserved1[24]; +}; + +DEFINE_RING_TYPES(xen_sndif, struct xensnd_req, struct xensnd_resp); + +#endif /* __XEN_PUBLIC_IO_SNDIF_H__ */ diff --git a/include/xen/page.h b/include/xen/page.h index 9dc46cb8a0fd79be7f4bdae7ae8f5f4dbdcdb7f4..064194f6453ebccc04ffc3cda9e1c1901b7c1728 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -38,7 +38,7 @@ struct xen_memory_region { unsigned long n_pfns; }; -#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */ +#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820_MAX_ENTRIES_ZEROPAGE */ extern __initdata struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS]; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index b5486e64860759aed11763ac983b3b4aa12988b3..c44a2ee8c8f8079a13095b3b67ec6f794a683ff8 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -22,6 +22,8 @@ void xen_timer_resume(void); void xen_arch_resume(void); void xen_arch_suspend(void); +void xen_reboot(int reason); + void xen_resume_notifier_register(struct notifier_block *nb); void xen_resume_notifier_unregister(struct notifier_block *nb); @@ -34,11 +36,25 @@ u64 xen_steal_clock(int cpu); int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; + +#ifdef CONFIG_XEN_PV int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); +#else +static inline int xen_create_contiguous_region(phys_addr_t pstart, + unsigned int order, + unsigned int address_bits, + dma_addr_t *dma_handle) +{ + return 0; +} + +static inline void xen_destroy_contiguous_region(phys_addr_t pstart, + unsigned int order) { } +#endif struct vm_area_struct; @@ -120,6 +136,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); +void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data); + #ifdef CONFIG_PREEMPT diff --git a/init/Kconfig b/init/Kconfig index a92f27da4a272ec873707bc4b7a68e46a4340b86..1d3475fc94967a58904627d80e8461a3721ddbb0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -521,11 +521,41 @@ config RCU_EXPERT config SRCU bool + default y help This option selects the sleepable version of RCU. This version permits arbitrary sleeping or blocking within RCU read-side critical sections. +config CLASSIC_SRCU + bool "Use v4.11 classic SRCU implementation" + default n + depends on RCU_EXPERT && SRCU + help + This option selects the traditional well-tested classic SRCU + implementation from v4.11, as might be desired for enterprise + Linux distributions. Without this option, the shiny new + Tiny SRCU and Tree SRCU implementations are used instead. + At some point, it is hoped that Tiny SRCU and Tree SRCU + will accumulate enough test time and confidence to allow + Classic SRCU to be dropped entirely. + + Say Y if you need a rock-solid SRCU. + + Say N if you would like help test Tree SRCU. + +config TINY_SRCU + bool + default y if SRCU && TINY_RCU && !CLASSIC_SRCU + help + This option selects the single-CPU non-preemptible version of SRCU. + +config TREE_SRCU + bool + default y if SRCU && !TINY_RCU && !CLASSIC_SRCU + help + This option selects the full-fledged version of SRCU. + config TASKS_RCU bool default n @@ -543,6 +573,9 @@ config RCU_STALL_COMMON the tiny variants to disable RCU CPU stall warnings, while making these warnings mandatory for the tree variants. +config RCU_NEED_SEGCBLIST + def_bool ( TREE_RCU || PREEMPT_RCU || TINY_SRCU || TREE_SRCU ) + config CONTEXT_TRACKING bool @@ -612,11 +645,17 @@ config RCU_FANOUT_LEAF initialization. These systems tend to run CPU-bound, and thus are not helped by synchronized interrupts, and thus tend to skew them, which reduces lock contention enough that large - leaf-level fanouts work well. + leaf-level fanouts work well. That said, setting leaf-level + fanout to a large number will likely cause problematic + lock contention on the leaf-level rcu_node structures unless + you boot with the skew_tick kernel parameter. Select a specific number if testing RCU itself. - Select the maximum permissible value for large systems. + Select the maximum permissible value for large systems, but + please understand that you may also need to set the skew_tick + kernel boot parameter to avoid contention on the rcu_node + structure's locks. Take the default if unsure. diff --git a/init/do_mounts.h b/init/do_mounts.h index 067af1d9e8b620bfac146dae57d0a0f690b33475..282d65bfd6741e51c7f341c9c4fda65a492936c6 100644 --- a/init/do_mounts.h +++ b/init/do_mounts.h @@ -19,29 +19,15 @@ static inline int create_dev(char *name, dev_t dev) return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); } -#if BITS_PER_LONG == 32 static inline u32 bstat(char *name) { - struct stat64 stat; - if (sys_stat64(name, &stat) != 0) + struct kstat stat; + if (vfs_stat(name, &stat) != 0) return 0; - if (!S_ISBLK(stat.st_mode)) + if (!S_ISBLK(stat.mode)) return 0; - if (stat.st_rdev != (u32)stat.st_rdev) - return 0; - return stat.st_rdev; -} -#else -static inline u32 bstat(char *name) -{ - struct stat stat; - if (sys_newstat(name, &stat) != 0) - return 0; - if (!S_ISBLK(stat.st_mode)) - return 0; - return stat.st_rdev; + return stat.rdev; } -#endif #ifdef CONFIG_BLK_DEV_RAM diff --git a/init/initramfs.c b/init/initramfs.c index 981f286c1d16ab57dff10cb10099f573b62f07b9..8a532050043f5f804f06ddbf980e54965c9055de 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -312,10 +312,10 @@ static int __init maybe_link(void) static void __init clean_path(char *path, umode_t fmode) { - struct stat st; + struct kstat st; - if (!sys_newlstat(path, &st) && (st.st_mode ^ fmode) & S_IFMT) { - if (S_ISDIR(st.st_mode)) + if (!vfs_lstat(path, &st) && (st.mode ^ fmode) & S_IFMT) { + if (S_ISDIR(st.mode)) sys_rmdir(path); else sys_unlink(path); @@ -581,13 +581,13 @@ static void __init clean_rootfs(void) num = sys_getdents64(fd, dirp, BUF_SIZE); while (num > 0) { while (num > 0) { - struct stat st; + struct kstat st; int ret; - ret = sys_newlstat(dirp->d_name, &st); + ret = vfs_lstat(dirp->d_name, &st); WARN_ON_ONCE(ret); if (!ret) { - if (S_ISDIR(st.st_mode)) + if (S_ISDIR(st.mode)) sys_rmdir(dirp->d_name); else sys_unlink(dirp->d_name); @@ -608,10 +608,12 @@ static void __init clean_rootfs(void) static int __init populate_rootfs(void) { + /* Load the built in initramfs */ char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) panic("%s", err); /* Failed to decompress INTERNAL initramfs */ - if (initrd_start) { + /* If available load the bootloader supplied initrd */ + if (initrd_start && !IS_ENABLED(CONFIG_INITRAMFS_FORCE)) { #ifdef CONFIG_BLK_DEV_RAM int fd; printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n"); @@ -640,6 +642,7 @@ static int __init populate_rootfs(void) free_initrd(); } done: + /* empty statement */; #else printk(KERN_INFO "Unpacking initramfs...\n"); err = unpack_to_rootfs((char *)initrd_start, @@ -648,13 +651,14 @@ static int __init populate_rootfs(void) printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err); free_initrd(); #endif - flush_delayed_fput(); - /* - * Try loading default modules from initramfs. This gives - * us a chance to load before device_initcalls. - */ - load_default_modules(); } + flush_delayed_fput(); + /* + * Try loading default modules from initramfs. This gives + * us a chance to load before device_initcalls. + */ + load_default_modules(); + return 0; } rootfs_initcall(populate_rootfs); diff --git a/init/main.c b/init/main.c index b0c11cbf5ddf8a55a3c832e4acbd72653d9c38f1..f866510472d7a263f03bc21b7a158e5b288185f9 100644 --- a/init/main.c +++ b/init/main.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -495,7 +495,7 @@ asmlinkage __visible void __init start_kernel(void) debug_objects_early_init(); /* - * Set up the the initial canary ASAP: + * Set up the initial canary ASAP: */ boot_init_stack_canary(); @@ -504,10 +504,10 @@ asmlinkage __visible void __init start_kernel(void) local_irq_disable(); early_boot_irqs_disabled = true; -/* - * Interrupts are still disabled. Do necessary setups, then - * enable them - */ + /* + * Interrupts are still disabled. Do necessary setups, then + * enable them. + */ boot_cpu_init(); page_address_init(); pr_notice("%s", linux_banner); @@ -545,6 +545,11 @@ asmlinkage __visible void __init start_kernel(void) trap_init(); mm_init(); + ftrace_init(); + + /* trace_printk can be enabled here */ + early_trace_init(); + /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() @@ -570,7 +575,7 @@ asmlinkage __visible void __init start_kernel(void) rcu_init(); - /* trace_printk() and trace points may be used after this */ + /* Trace events are available after this */ trace_init(); context_tracking_init(); @@ -670,8 +675,6 @@ asmlinkage __visible void __init start_kernel(void) efi_free_boot_services(); } - ftrace_init(); - /* Do the rest non-__init'ed, we're now alive */ rest_init(); } @@ -959,6 +962,7 @@ static int __ref kernel_init(void *unused) kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); + ftrace_free_init_mem(); free_initmem(); mark_readonly(); system_state = SYSTEM_RUNNING; diff --git a/ipc/shm.c b/ipc/shm.c index 481d2a9c298ab14916543a040d4f45b9b69764e0..34c4344e8d4b2f00206208f1d2c8d48e85dd0e48 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1095,11 +1095,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, unsigned long shmlba) { struct shmid_kernel *shp; - unsigned long addr; + unsigned long addr = (unsigned long)shmaddr; unsigned long size; struct file *file; int err; - unsigned long flags; + unsigned long flags = MAP_SHARED; unsigned long prot; int acc_mode; struct ipc_namespace *ns; @@ -1111,7 +1111,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, err = -EINVAL; if (shmid < 0) goto out; - else if ((addr = (ulong)shmaddr)) { + + if (addr) { if (addr & (shmlba - 1)) { /* * Round down to the nearest multiple of shmlba. @@ -1126,13 +1127,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, #endif goto out; } - flags = MAP_SHARED | MAP_FIXED; - } else { - if ((shmflg & SHM_REMAP)) - goto out; - flags = MAP_SHARED; - } + flags |= MAP_FIXED; + } else if ((shmflg & SHM_REMAP)) + goto out; if (shmflg & SHM_RDONLY) { prot = PROT_READ; diff --git a/ipc/util.c b/ipc/util.c index 798cad18dd878f2ee81f03e34431dbc001e2c2a9..caec7b1bfaa335f3d6df017eb9b0ad24058edd9c 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -403,12 +403,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) */ void *ipc_alloc(int size) { - void *out; - if (size > PAGE_SIZE) - out = vmalloc(size); - else - out = kmalloc(size, GFP_KERNEL); - return out; + return kvmalloc(size, GFP_KERNEL); } /** @@ -474,7 +469,7 @@ void ipc_rcu_free(struct rcu_head *head) * Check user, group, other permissions for access * to ipc resources. return 0 if allowed * - * @flag will most probably be 0 or S_...UGO from + * @flag will most probably be 0 or ``S_...UGO`` from */ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag) { @@ -672,10 +667,12 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) * * This function does some common audit and permissions check for some IPC_XXX * cmd and is called from semctl_down, shmctl_down and msgctl_down. - * It must be called without any lock held and - * - retrieves the ipc with the given id in the given table. - * - performs some audit and permission check, depending on the given cmd - * - returns a pointer to the ipc object or otherwise, the corresponding error. + * It must be called without any lock held and: + * + * - retrieves the ipc with the given id in the given table. + * - performs some audit and permission check, depending on the given cmd + * - returns a pointer to the ipc object or otherwise, the corresponding + * error. * * Call holding the both the rwsem and the rcu read lock. */ diff --git a/ipc/util.h b/ipc/util.h index 51f7ca58ac6742989c61c0e0fc8d73bc7eab6bf7..60ddccca464dd2a9f54ebe50401cdd009b3084ca 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -153,8 +153,6 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len); extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len); -extern void recompute_msgmni(struct ipc_namespace *); - static inline int ipc_buildid(int id, int seq) { return SEQ_MULTIPLIER * seq + id; diff --git a/kernel/Makefile b/kernel/Makefile index b302b4731d16547a88e4ddc6db21e130ae821113..72aa080f91f04bd9256f1df714186033294b19b9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SIG) += module_signing.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o +obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_KEXEC_CORE) += kexec_core.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o diff --git a/kernel/audit.c b/kernel/audit.c index a871bf80fde1adc79040936475b7045971e72d76..4b7d49868ce1e39d11603f6160d04e7e5d4457e2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include @@ -110,18 +112,19 @@ struct audit_net { * @pid: auditd PID * @portid: netlink portid * @net: the associated network namespace - * @lock: spinlock to protect write access + * @rcu: RCU head * * Description: * This struct is RCU protected; you must either hold the RCU lock for reading - * or the included spinlock for writing. + * or the associated spinlock for writing. */ static struct auditd_connection { - int pid; + struct pid *pid; u32 portid; struct net *net; - spinlock_t lock; -} auditd_conn; + struct rcu_head rcu; +} *auditd_conn = NULL; +static DEFINE_SPINLOCK(auditd_conn_lock); /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in @@ -151,12 +154,7 @@ static atomic_t audit_lost = ATOMIC_INIT(0); /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; -/* The audit_freelist is a list of pre-allocated audit buffers (if more - * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of - * being placed on the freelist). */ -static DEFINE_SPINLOCK(audit_freelist_lock); -static int audit_freelist_count; -static LIST_HEAD(audit_freelist); +static struct kmem_cache *audit_buffer_cache; /* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; @@ -191,17 +189,12 @@ DEFINE_MUTEX(audit_cmd_mutex); * should be at least that large. */ #define AUDIT_BUFSIZ 1024 -/* AUDIT_MAXFREE is the number of empty audit_buffers we keep on the - * audit_freelist. Doing so eliminates many kmalloc/kfree calls. */ -#define AUDIT_MAXFREE (2*NR_CPUS) - /* The audit_buffer is used when formatting an audit record. The caller * locks briefly to get the record off the freelist or to allocate the * buffer, and locks briefly to send the buffer to the netlink layer or * to place it on a transmit queue. Multiple audit_buffers can be in * use simultaneously. */ struct audit_buffer { - struct list_head list; struct sk_buff *skb; /* formatted skb ready to send */ struct audit_context *ctx; /* NULL or associated context */ gfp_t gfp_mask; @@ -220,17 +213,41 @@ struct audit_reply { * Description: * Return 1 if the task is a registered audit daemon, 0 otherwise. */ -int auditd_test_task(const struct task_struct *task) +int auditd_test_task(struct task_struct *task) { int rc; + struct auditd_connection *ac; rcu_read_lock(); - rc = (auditd_conn.pid && task->tgid == auditd_conn.pid ? 1 : 0); + ac = rcu_dereference(auditd_conn); + rc = (ac && ac->pid == task_tgid(task) ? 1 : 0); rcu_read_unlock(); return rc; } +/** + * auditd_pid_vnr - Return the auditd PID relative to the namespace + * + * Description: + * Returns the PID in relation to the namespace, 0 on failure. + */ +static pid_t auditd_pid_vnr(void) +{ + pid_t pid; + const struct auditd_connection *ac; + + rcu_read_lock(); + ac = rcu_dereference(auditd_conn); + if (!ac || !ac->pid) + pid = 0; + else + pid = pid_vnr(ac->pid); + rcu_read_unlock(); + + return pid; +} + /** * audit_get_sk - Return the audit socket for the given network namespace * @net: the destination network namespace @@ -250,14 +267,6 @@ static struct sock *audit_get_sk(const struct net *net) return aunet->sk; } -static void audit_set_portid(struct audit_buffer *ab, __u32 portid) -{ - if (ab) { - struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); - nlh->nlmsg_pid = portid; - } -} - void audit_panic(const char *message) { switch (audit_failure) { @@ -426,6 +435,24 @@ static int audit_set_failure(u32 state) return audit_do_config_change("audit_failure", &audit_failure, state); } +/** + * auditd_conn_free - RCU helper to release an auditd connection struct + * @rcu: RCU head + * + * Description: + * Drop any references inside the auditd connection tracking struct and free + * the memory. + */ + static void auditd_conn_free(struct rcu_head *rcu) + { + struct auditd_connection *ac; + + ac = container_of(rcu, struct auditd_connection, rcu); + put_pid(ac->pid); + put_net(ac->net); + kfree(ac); + } + /** * auditd_set - Set/Reset the auditd connection state * @pid: auditd PID @@ -434,22 +461,33 @@ static int audit_set_failure(u32 state) * * Description: * This function will obtain and drop network namespace references as - * necessary. + * necessary. Returns zero on success, negative values on failure. */ -static void auditd_set(int pid, u32 portid, struct net *net) +static int auditd_set(struct pid *pid, u32 portid, struct net *net) { unsigned long flags; + struct auditd_connection *ac_old, *ac_new; - spin_lock_irqsave(&auditd_conn.lock, flags); - auditd_conn.pid = pid; - auditd_conn.portid = portid; - if (auditd_conn.net) - put_net(auditd_conn.net); - if (net) - auditd_conn.net = get_net(net); - else - auditd_conn.net = NULL; - spin_unlock_irqrestore(&auditd_conn.lock, flags); + if (!pid || !net) + return -EINVAL; + + ac_new = kzalloc(sizeof(*ac_new), GFP_KERNEL); + if (!ac_new) + return -ENOMEM; + ac_new->pid = get_pid(pid); + ac_new->portid = portid; + ac_new->net = get_net(net); + + spin_lock_irqsave(&auditd_conn_lock, flags); + ac_old = rcu_dereference_protected(auditd_conn, + lockdep_is_held(&auditd_conn_lock)); + rcu_assign_pointer(auditd_conn, ac_new); + spin_unlock_irqrestore(&auditd_conn_lock, flags); + + if (ac_old) + call_rcu(&ac_old->rcu, auditd_conn_free); + + return 0; } /** @@ -544,13 +582,19 @@ static void kauditd_retry_skb(struct sk_buff *skb) */ static void auditd_reset(void) { + unsigned long flags; struct sk_buff *skb; + struct auditd_connection *ac_old; /* if it isn't already broken, break the connection */ - rcu_read_lock(); - if (auditd_conn.pid) - auditd_set(0, 0, NULL); - rcu_read_unlock(); + spin_lock_irqsave(&auditd_conn_lock, flags); + ac_old = rcu_dereference_protected(auditd_conn, + lockdep_is_held(&auditd_conn_lock)); + rcu_assign_pointer(auditd_conn, NULL); + spin_unlock_irqrestore(&auditd_conn_lock, flags); + + if (ac_old) + call_rcu(&ac_old->rcu, auditd_conn_free); /* flush all of the main and retry queues to the hold queue */ while ((skb = skb_dequeue(&audit_retry_queue))) @@ -576,6 +620,7 @@ static int auditd_send_unicast_skb(struct sk_buff *skb) u32 portid; struct net *net; struct sock *sk; + struct auditd_connection *ac; /* NOTE: we can't call netlink_unicast while in the RCU section so * take a reference to the network namespace and grab local @@ -585,15 +630,15 @@ static int auditd_send_unicast_skb(struct sk_buff *skb) * section netlink_unicast() should safely return an error */ rcu_read_lock(); - if (!auditd_conn.pid) { + ac = rcu_dereference(auditd_conn); + if (!ac) { rcu_read_unlock(); rc = -ECONNREFUSED; goto err; } - net = auditd_conn.net; - get_net(net); + net = get_net(ac->net); sk = audit_get_sk(net); - portid = auditd_conn.portid; + portid = ac->portid; rcu_read_unlock(); rc = netlink_unicast(sk, skb, portid, 0); @@ -728,6 +773,7 @@ static int kauditd_thread(void *dummy) u32 portid = 0; struct net *net = NULL; struct sock *sk = NULL; + struct auditd_connection *ac; #define UNICAST_RETRIES 5 @@ -735,14 +781,14 @@ static int kauditd_thread(void *dummy) while (!kthread_should_stop()) { /* NOTE: see the lock comments in auditd_send_unicast_skb() */ rcu_read_lock(); - if (!auditd_conn.pid) { + ac = rcu_dereference(auditd_conn); + if (!ac) { rcu_read_unlock(); goto main_queue; } - net = auditd_conn.net; - get_net(net); + net = get_net(ac->net); sk = audit_get_sk(net); - portid = auditd_conn.portid; + portid = ac->portid; rcu_read_unlock(); /* attempt to flush the hold queue */ @@ -816,7 +862,7 @@ int audit_send_list(void *_dest) return 0; } -struct sk_buff *audit_make_reply(__u32 portid, int seq, int type, int done, +struct sk_buff *audit_make_reply(int seq, int type, int done, int multi, const void *payload, int size) { struct sk_buff *skb; @@ -829,7 +875,7 @@ struct sk_buff *audit_make_reply(__u32 portid, int seq, int type, int done, if (!skb) return NULL; - nlh = nlmsg_put(skb, portid, seq, t, size, flags); + nlh = nlmsg_put(skb, 0, seq, t, size, flags); if (!nlh) goto out_kfree_skb; data = nlmsg_data(nlh); @@ -873,7 +919,6 @@ static int audit_send_reply_thread(void *arg) static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, int multi, const void *payload, int size) { - u32 portid = NETLINK_CB(request_skb).portid; struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct sk_buff *skb; struct task_struct *tsk; @@ -883,12 +928,12 @@ static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int if (!reply) return; - skb = audit_make_reply(portid, seq, type, done, multi, payload, size); + skb = audit_make_reply(seq, type, done, multi, payload, size); if (!skb) goto out; reply->net = get_net(net); - reply->portid = portid; + reply->portid = NETLINK_CB(request_skb).portid; reply->skb = skb; tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); @@ -1068,11 +1113,13 @@ static int audit_set_feature(struct sk_buff *skb) return 0; } -static int audit_replace(pid_t pid) +static int audit_replace(struct pid *pid) { + pid_t pvnr; struct sk_buff *skb; - skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, &pid, sizeof(pid)); + pvnr = pid_vnr(pid); + skb = audit_make_reply(0, AUDIT_REPLACE, 0, 0, &pvnr, sizeof(pvnr)); if (!skb) return -ENOMEM; return auditd_send_unicast_skb(skb); @@ -1102,9 +1149,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); s.enabled = audit_enabled; s.failure = audit_failure; - rcu_read_lock(); - s.pid = auditd_conn.pid; - rcu_read_unlock(); + /* NOTE: use pid_vnr() so the PID is relative to the current + * namespace */ + s.pid = auditd_pid_vnr(); s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); @@ -1130,51 +1177,61 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } if (s.mask & AUDIT_STATUS_PID) { - /* NOTE: we are using task_tgid_vnr() below because - * the s.pid value is relative to the namespace - * of the caller; at present this doesn't matter - * much since you can really only run auditd - * from the initial pid namespace, but something - * to keep in mind if this changes */ - int new_pid = s.pid; + /* NOTE: we are using the vnr PID functions below + * because the s.pid value is relative to the + * namespace of the caller; at present this + * doesn't matter much since you can really only + * run auditd from the initial pid namespace, but + * something to keep in mind if this changes */ + pid_t new_pid = s.pid; pid_t auditd_pid; - pid_t requesting_pid = task_tgid_vnr(current); + struct pid *req_pid = task_tgid(current); + + /* sanity check - PID values must match */ + if (new_pid != pid_vnr(req_pid)) + return -EINVAL; /* test the auditd connection */ - audit_replace(requesting_pid); + audit_replace(req_pid); - rcu_read_lock(); - auditd_pid = auditd_conn.pid; + auditd_pid = auditd_pid_vnr(); /* only the current auditd can unregister itself */ - if ((!new_pid) && (requesting_pid != auditd_pid)) { - rcu_read_unlock(); + if ((!new_pid) && (new_pid != auditd_pid)) { audit_log_config_change("audit_pid", new_pid, auditd_pid, 0); return -EACCES; } /* replacing a healthy auditd is not allowed */ if (auditd_pid && new_pid) { - rcu_read_unlock(); audit_log_config_change("audit_pid", new_pid, auditd_pid, 0); return -EEXIST; } - rcu_read_unlock(); - - if (audit_enabled != AUDIT_OFF) - audit_log_config_change("audit_pid", new_pid, - auditd_pid, 1); if (new_pid) { /* register a new auditd connection */ - auditd_set(new_pid, - NETLINK_CB(skb).portid, - sock_net(NETLINK_CB(skb).sk)); + err = auditd_set(req_pid, + NETLINK_CB(skb).portid, + sock_net(NETLINK_CB(skb).sk)); + if (audit_enabled != AUDIT_OFF) + audit_log_config_change("audit_pid", + new_pid, + auditd_pid, + err ? 0 : 1); + if (err) + return err; + /* try to process any backlog */ wake_up_interruptible(&kauditd_wait); - } else + } else { + if (audit_enabled != AUDIT_OFF) + audit_log_config_change("audit_pid", + new_pid, + auditd_pid, 1); + /* unregister the auditd connection */ auditd_reset(); + } } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); @@ -1242,7 +1299,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) size--; audit_log_n_untrustedstring(ab, data, size); } - audit_set_portid(ab, NETLINK_CB(skb).portid); audit_log_end(ab); } break; @@ -1256,8 +1312,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, NETLINK_CB(skb).portid, - seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1378,11 +1433,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err < 0 ? err : 0; } -/* - * Get message from skb. Each message is processed by audit_receive_msg. - * Malformed skbs with wrong length are discarded silently. +/** + * audit_receive - receive messages from a netlink control socket + * @skb: the message buffer + * + * Parse the provided skb and deal with any messages that may be present, + * malformed skbs are discarded. */ -static void audit_receive_skb(struct sk_buff *skb) +static void audit_receive(struct sk_buff *skb) { struct nlmsghdr *nlh; /* @@ -1395,21 +1453,15 @@ static void audit_receive_skb(struct sk_buff *skb) nlh = nlmsg_hdr(skb); len = skb->len; + mutex_lock(&audit_cmd_mutex); while (nlmsg_ok(nlh, len)) { err = audit_receive_msg(skb, nlh); /* if err or if this message says it wants a response */ if (err || (nlh->nlmsg_flags & NLM_F_ACK)) - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, NULL); nlh = nlmsg_next(nlh, &len); } -} - -/* Receive messages from netlink socket. */ -static void audit_receive(struct sk_buff *skb) -{ - mutex_lock(&audit_cmd_mutex); - audit_receive_skb(skb); mutex_unlock(&audit_cmd_mutex); } @@ -1447,10 +1499,11 @@ static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); - rcu_read_lock(); - if (net == auditd_conn.net) - auditd_reset(); - rcu_read_unlock(); + /* NOTE: you would think that we would want to check the auditd + * connection and potentially reset it here if it lives in this + * namespace, but since the auditd connection tracking struct holds a + * reference to this namespace (see auditd_set()) we are only ever + * going to get here after that connection has been released */ netlink_kernel_release(aunet->sk); } @@ -1470,8 +1523,9 @@ static int __init audit_init(void) if (audit_initialized == AUDIT_DISABLED) return 0; - memset(&auditd_conn, 0, sizeof(auditd_conn)); - spin_lock_init(&auditd_conn.lock); + audit_buffer_cache = kmem_cache_create("audit_buffer", + sizeof(struct audit_buffer), + 0, SLAB_PANIC, NULL); skb_queue_head_init(&audit_queue); skb_queue_head_init(&audit_retry_queue); @@ -1538,60 +1592,33 @@ __setup("audit_backlog_limit=", audit_backlog_limit_set); static void audit_buffer_free(struct audit_buffer *ab) { - unsigned long flags; - if (!ab) return; kfree_skb(ab->skb); - spin_lock_irqsave(&audit_freelist_lock, flags); - if (audit_freelist_count > AUDIT_MAXFREE) - kfree(ab); - else { - audit_freelist_count++; - list_add(&ab->list, &audit_freelist); - } - spin_unlock_irqrestore(&audit_freelist_lock, flags); + kmem_cache_free(audit_buffer_cache, ab); } -static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx, - gfp_t gfp_mask, int type) +static struct audit_buffer *audit_buffer_alloc(struct audit_context *ctx, + gfp_t gfp_mask, int type) { - unsigned long flags; - struct audit_buffer *ab = NULL; - struct nlmsghdr *nlh; - - spin_lock_irqsave(&audit_freelist_lock, flags); - if (!list_empty(&audit_freelist)) { - ab = list_entry(audit_freelist.next, - struct audit_buffer, list); - list_del(&ab->list); - --audit_freelist_count; - } - spin_unlock_irqrestore(&audit_freelist_lock, flags); - - if (!ab) { - ab = kmalloc(sizeof(*ab), gfp_mask); - if (!ab) - goto err; - } + struct audit_buffer *ab; - ab->ctx = ctx; - ab->gfp_mask = gfp_mask; + ab = kmem_cache_alloc(audit_buffer_cache, gfp_mask); + if (!ab) + return NULL; ab->skb = nlmsg_new(AUDIT_BUFSIZ, gfp_mask); if (!ab->skb) goto err; + if (!nlmsg_put(ab->skb, 0, 0, type, 0, 0)) + goto err; - nlh = nlmsg_put(ab->skb, 0, 0, type, 0, 0); - if (!nlh) - goto out_kfree_skb; + ab->ctx = ctx; + ab->gfp_mask = gfp_mask; return ab; -out_kfree_skb: - kfree_skb(ab->skb); - ab->skb = NULL; err: audit_buffer_free(ab); return NULL; @@ -1622,10 +1649,10 @@ unsigned int audit_serial(void) } static inline void audit_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { - *t = CURRENT_TIME; + ktime_get_real_ts64(t); *serial = audit_serial(); } } @@ -1649,7 +1676,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { struct audit_buffer *ab; - struct timespec t; + struct timespec64 t; unsigned int uninitialized_var(serial); if (audit_initialized != AUDIT_INITIALIZED) @@ -1702,8 +1729,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } audit_get_stamp(ab->ctx, &t, &serial); - audit_log_format(ab, "audit(%lu.%03lu:%u): ", - t.tv_sec, t.tv_nsec/1000000, serial); + audit_log_format(ab, "audit(%llu.%03lu:%u): ", + (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial); return ab; } diff --git a/kernel/audit.h b/kernel/audit.h index 0d87f8ab8778579dde21754e4ed647286f34e3ef..ddfce2ea4891221a49e9fc3e7f744920c1e67f8b 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -112,7 +112,7 @@ struct audit_context { enum audit_state state, current_state; unsigned int serial; /* serial number for record */ int major; /* syscall number */ - struct timespec ctime; /* time of syscall entry */ + struct timespec64 ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ long return_code;/* syscall return code */ u64 prio; @@ -218,7 +218,7 @@ extern void audit_log_name(struct audit_context *context, struct audit_names *n, const struct path *path, int record_num, int *call_panic); -extern int auditd_test_task(const struct task_struct *task); +extern int auditd_test_task(struct task_struct *task); #define AUDIT_INODE_BUCKETS 32 extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; @@ -237,8 +237,7 @@ extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); extern int parent_len(const char *path); extern int audit_compare_dname_path(const char *dname, const char *path, int plen); -extern struct sk_buff *audit_make_reply(__u32 portid, int seq, int type, - int done, int multi, +extern struct sk_buff *audit_make_reply(int seq, int type, int done, int multi, const void *payload, int size); extern void audit_panic(const char *message); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index 7ea57e516029d6b82a9e72adfea70d45f538cb2b..52f368b6561e984e5b93b8e71bc56cb80f3683aa 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -103,15 +103,15 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa goto out; } - fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_free_mark); + fsnotify_init_mark(&audit_mark->mark, audit_fsnotify_group); audit_mark->mark.mask = AUDIT_FS_EVENTS; audit_mark->path = pathname; audit_update_mark(audit_mark, dentry->d_inode); audit_mark->rule = krule; - ret = fsnotify_add_mark(&audit_mark->mark, audit_fsnotify_group, inode, NULL, true); + ret = fsnotify_add_mark(&audit_mark->mark, inode, NULL, true); if (ret < 0) { - audit_fsnotify_mark_free(audit_mark); + fsnotify_put_mark(&audit_mark->mark); audit_mark = ERR_PTR(ret); } out: @@ -168,7 +168,8 @@ static int audit_mark_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *dname, u32 cookie) + const unsigned char *dname, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct audit_fsnotify_mark *audit_mark; const struct inode *inode = NULL; @@ -187,7 +188,7 @@ static int audit_mark_handle_event(struct fsnotify_group *group, default: BUG(); return 0; - }; + } if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL)) @@ -201,6 +202,7 @@ static int audit_mark_handle_event(struct fsnotify_group *group, static const struct fsnotify_ops audit_mark_fsnotify_ops = { .handle_event = audit_mark_handle_event, + .free_mark = audit_fsnotify_free_mark, }; static int __init audit_fsnotify_init(void) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 7b44195da81bb25a080b165402b74cf8c7f59bcc..011d46e5f73fb448634c603fc474f479fe3e790b 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -3,13 +3,14 @@ #include #include #include +#include #include struct audit_tree; struct audit_chunk; struct audit_tree { - atomic_t count; + refcount_t count; int goner; struct audit_chunk *root; struct list_head chunks; @@ -77,7 +78,7 @@ static struct audit_tree *alloc_tree(const char *s) tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL); if (tree) { - atomic_set(&tree->count, 1); + refcount_set(&tree->count, 1); tree->goner = 0; INIT_LIST_HEAD(&tree->chunks); INIT_LIST_HEAD(&tree->rules); @@ -91,12 +92,12 @@ static struct audit_tree *alloc_tree(const char *s) static inline void get_tree(struct audit_tree *tree) { - atomic_inc(&tree->count); + refcount_inc(&tree->count); } static inline void put_tree(struct audit_tree *tree) { - if (atomic_dec_and_test(&tree->count)) + if (refcount_dec_and_test(&tree->count)) kfree_rcu(tree, head); } @@ -154,7 +155,7 @@ static struct audit_chunk *alloc_chunk(int count) INIT_LIST_HEAD(&chunk->owners[i].list); chunk->owners[i].index = i; } - fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); + fsnotify_init_mark(&chunk->mark, audit_tree_group); chunk->mark.mask = FS_IN_IGNORED; return chunk; } @@ -163,33 +164,54 @@ enum {HASH_SIZE = 128}; static struct list_head chunk_hash_heads[HASH_SIZE]; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock); -static inline struct list_head *chunk_hash(const struct inode *inode) +/* Function to return search key in our hash from inode. */ +static unsigned long inode_to_key(const struct inode *inode) { - unsigned long n = (unsigned long)inode / L1_CACHE_BYTES; + return (unsigned long)inode; +} + +/* + * Function to return search key in our hash from chunk. Key 0 is special and + * should never be present in the hash. + */ +static unsigned long chunk_to_key(struct audit_chunk *chunk) +{ + /* + * We have a reference to the mark so it should be attached to a + * connector. + */ + if (WARN_ON_ONCE(!chunk->mark.connector)) + return 0; + return (unsigned long)chunk->mark.connector->inode; +} + +static inline struct list_head *chunk_hash(unsigned long key) +{ + unsigned long n = key / L1_CACHE_BYTES; return chunk_hash_heads + n % HASH_SIZE; } /* hash_lock & entry->lock is held by caller */ static void insert_hash(struct audit_chunk *chunk) { - struct fsnotify_mark *entry = &chunk->mark; + unsigned long key = chunk_to_key(chunk); struct list_head *list; - if (!entry->inode) + if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED)) return; - list = chunk_hash(entry->inode); + list = chunk_hash(key); list_add_rcu(&chunk->hash, list); } /* called under rcu_read_lock */ struct audit_chunk *audit_tree_lookup(const struct inode *inode) { - struct list_head *list = chunk_hash(inode); + unsigned long key = inode_to_key(inode); + struct list_head *list = chunk_hash(key); struct audit_chunk *p; list_for_each_entry_rcu(p, list, hash) { - /* mark.inode may have gone NULL, but who cares? */ - if (p->mark.inode == inode) { + if (chunk_to_key(p) == key) { atomic_long_inc(&p->refs); return p; } @@ -233,11 +255,15 @@ static void untag_chunk(struct node *p) mutex_lock(&entry->group->mark_mutex); spin_lock(&entry->lock); - if (chunk->dead || !entry->inode) { + /* + * mark_mutex protects mark from getting detached and thus also from + * mark->connector->inode getting NULL. + */ + if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { spin_unlock(&entry->lock); mutex_unlock(&entry->group->mark_mutex); if (new) - free_chunk(new); + fsnotify_put_mark(&new->mark); goto out; } @@ -261,7 +287,7 @@ static void untag_chunk(struct node *p) if (!new) goto Fallback; - if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode, + if (fsnotify_add_mark_locked(&new->mark, entry->connector->inode, NULL, 1)) { fsnotify_put_mark(&new->mark); goto Fallback; @@ -327,7 +353,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) return -ENOMEM; entry = &chunk->mark; - if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { + if (fsnotify_add_mark(entry, inode, NULL, 0)) { fsnotify_put_mark(entry); return -ENOSPC; } @@ -366,7 +392,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) struct node *p; int n; - old_entry = fsnotify_find_inode_mark(audit_tree_group, inode); + old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks, + audit_tree_group); if (!old_entry) return create_chunk(inode, tree); @@ -393,17 +420,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) mutex_lock(&old_entry->group->mark_mutex); spin_lock(&old_entry->lock); - if (!old_entry->inode) { + /* + * mark_mutex protects mark from getting detached and thus also from + * mark->connector->inode getting NULL. + */ + if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { /* old_entry is being shot, lets just lie */ spin_unlock(&old_entry->lock); mutex_unlock(&old_entry->group->mark_mutex); fsnotify_put_mark(old_entry); - free_chunk(chunk); + fsnotify_put_mark(&chunk->mark); return -ENOENT; } - if (fsnotify_add_mark_locked(chunk_entry, old_entry->group, - old_entry->inode, NULL, 1)) { + if (fsnotify_add_mark_locked(chunk_entry, + old_entry->connector->inode, NULL, 1)) { spin_unlock(&old_entry->lock); mutex_unlock(&old_entry->group->mark_mutex); fsnotify_put_mark(chunk_entry); @@ -588,7 +619,8 @@ int audit_remove_tree_rule(struct audit_krule *rule) static int compare_root(struct vfsmount *mnt, void *arg) { - return d_backing_inode(mnt->mnt_root) == arg; + return inode_to_key(d_backing_inode(mnt->mnt_root)) == + (unsigned long)arg; } void audit_trim_trees(void) @@ -623,9 +655,10 @@ void audit_trim_trees(void) list_for_each_entry(node, &tree->chunks, list) { struct audit_chunk *chunk = find_chunk(node); /* this could be NULL if the watch is dying else where... */ - struct inode *inode = chunk->mark.inode; node->index |= 1U<<31; - if (iterate_mounts(compare_root, inode, root_mnt)) + if (iterate_mounts(compare_root, + (void *)chunk_to_key(chunk), + root_mnt)) node->index &= ~(1U<<31); } spin_unlock(&hash_lock); @@ -958,7 +991,8 @@ static int audit_tree_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { return 0; } @@ -979,6 +1013,7 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify static const struct fsnotify_ops audit_tree_ops = { .handle_event = audit_tree_handle_event, .freeing_mark = audit_tree_freeing_mark, + .free_mark = audit_tree_destroy_watch, }; static int __init audit_tree_init(void) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index f79e4658433d45e9d32a299ebf3f427b508ada2c..62d686d965813aeb4b8ef34877e886e08bd71912 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,7 @@ */ struct audit_watch { - atomic_t count; /* reference count */ + refcount_t count; /* reference count */ dev_t dev; /* associated superblock device */ char *path; /* insertion path */ unsigned long ino; /* associated inode number */ @@ -102,7 +103,7 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode) struct audit_parent *parent = NULL; struct fsnotify_mark *entry; - entry = fsnotify_find_inode_mark(audit_watch_group, inode); + entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_watch_group); if (entry) parent = container_of(entry, struct audit_parent, mark); @@ -111,12 +112,12 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode) void audit_get_watch(struct audit_watch *watch) { - atomic_inc(&watch->count); + refcount_inc(&watch->count); } void audit_put_watch(struct audit_watch *watch) { - if (atomic_dec_and_test(&watch->count)) { + if (refcount_dec_and_test(&watch->count)) { WARN_ON(watch->parent); WARN_ON(!list_empty(&watch->rules)); kfree(watch->path); @@ -157,9 +158,9 @@ static struct audit_parent *audit_init_parent(struct path *path) INIT_LIST_HEAD(&parent->watches); - fsnotify_init_mark(&parent->mark, audit_watch_free_mark); + fsnotify_init_mark(&parent->mark, audit_watch_group); parent->mark.mask = AUDIT_FS_WATCH; - ret = fsnotify_add_mark(&parent->mark, audit_watch_group, inode, NULL, 0); + ret = fsnotify_add_mark(&parent->mark, inode, NULL, 0); if (ret < 0) { audit_free_parent(parent); return ERR_PTR(ret); @@ -178,7 +179,7 @@ static struct audit_watch *audit_init_watch(char *path) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&watch->rules); - atomic_set(&watch->count, 1); + refcount_set(&watch->count, 1); watch->path = path; watch->dev = AUDIT_DEV_UNSET; watch->ino = AUDIT_INO_UNSET; @@ -472,7 +473,8 @@ static int audit_watch_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *dname, u32 cookie) + const unsigned char *dname, u32 cookie, + struct fsnotify_iter_info *iter_info) { const struct inode *inode; struct audit_parent *parent; @@ -492,7 +494,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, BUG(); inode = NULL; break; - }; + } if (mask & (FS_CREATE|FS_MOVED_TO) && inode) audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); @@ -506,6 +508,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, static const struct fsnotify_ops audit_watch_fsnotify_ops = { .handle_event = audit_watch_handle_event, + .free_mark = audit_watch_free_mark, }; static int __init audit_watch_init(void) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 880519d6cf2ad00fbb15a23e545a629f580ae6f8..0b0aa5854dac1ed41959f9383af95d4097ad4646 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -338,7 +338,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) entry->rule.listnr != AUDIT_FILTER_USER) return -EINVAL; break; - }; + } switch(f->type) { default: @@ -412,7 +412,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) if (entry->rule.listnr != AUDIT_FILTER_EXIT) return -EINVAL; break; - }; + } return 0; } @@ -1033,7 +1033,7 @@ int audit_del_rule(struct audit_entry *entry) } /* List rules using struct audit_rule_data. */ -static void audit_list_rules(__u32 portid, int seq, struct sk_buff_head *q) +static void audit_list_rules(int seq, struct sk_buff_head *q) { struct sk_buff *skb; struct audit_krule *r; @@ -1048,15 +1048,15 @@ static void audit_list_rules(__u32 portid, int seq, struct sk_buff_head *q) data = audit_krule_to_data(r); if (unlikely(!data)) break; - skb = audit_make_reply(portid, seq, AUDIT_LIST_RULES, - 0, 1, data, + skb = audit_make_reply(seq, AUDIT_LIST_RULES, 0, 1, + data, sizeof(*data) + data->buflen); if (skb) skb_queue_tail(q, skb); kfree(data); } } - skb = audit_make_reply(portid, seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); + skb = audit_make_reply(seq, AUDIT_LIST_RULES, 1, 1, NULL, 0); if (skb) skb_queue_tail(q, skb); } @@ -1085,13 +1085,11 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re /** * audit_rule_change - apply all rules to the specified message type * @type: audit message type - * @portid: target port id for netlink audit messages * @seq: netlink audit message sequence (serial) number * @data: payload data * @datasz: size of payload data */ -int audit_rule_change(int type, __u32 portid, int seq, void *data, - size_t datasz) +int audit_rule_change(int type, int seq, void *data, size_t datasz) { int err = 0; struct audit_entry *entry; @@ -1150,7 +1148,7 @@ int audit_list_rules_send(struct sk_buff *request_skb, int seq) skb_queue_head_init(&dest->q); mutex_lock(&audit_filter_mutex); - audit_list_rules(portid, seq, &dest->q); + audit_list_rules(seq, &dest->q); mutex_unlock(&audit_filter_mutex); tsk = kthread_run(audit_send_list, dest, "audit_send_list"); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1c2333155893fac54138a8b3f7311c2ca520992c..bb724baa7ac9443b9b0f56b8d046b5ab3de17a65 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include "audit.h" @@ -1532,7 +1533,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, return; context->serial = 0; - context->ctime = CURRENT_TIME; + ktime_get_real_ts64(&context->ctime); context->in_syscall = 1; context->current_state = state; context->ppid = 0; @@ -1596,7 +1597,7 @@ static inline void handle_one(const struct inode *inode) struct audit_tree_refs *p; struct audit_chunk *chunk; int count; - if (likely(hlist_empty(&inode->i_fsnotify_marks))) + if (likely(!inode->i_fsnotify_marks)) return; context = current->audit_context; p = context->trees; @@ -1639,7 +1640,7 @@ static void handle_path(const struct dentry *dentry) seq = read_seqbegin(&rename_lock); for(;;) { struct inode *inode = d_backing_inode(d); - if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) { + if (inode && unlikely(inode->i_fsnotify_marks)) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); if (chunk) { @@ -1941,13 +1942,13 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values * @ctx: audit_context for the task - * @t: timespec to store time recorded in the audit_context + * @t: timespec64 to store time recorded in the audit_context * @serial: serial value that is recorded in the audit_context * * Also sets the context as auditable. */ int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { if (!ctx->in_syscall) return 0; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e1ce4f4fd7fd47fda2c18776573c0f65479c6728..e1e5e658f2dbf887be70fbfc9492d4365aef5064 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,7 +1,7 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 6b6f41f0b21164a3cc2c26bb71be2b89098bbdbd..172dc8ee0e3bda95c39f7e037e7a33baf8012214 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1,4 +1,5 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016,2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -16,6 +17,8 @@ #include #include +#include "map_in_map.h" + static void bpf_array_free_percpu(struct bpf_array *array) { int i; @@ -83,6 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; + array->map.map_flags = attr->map_flags; array->elem_size = elem_size; if (!percpu) @@ -113,6 +117,30 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) return array->value + array->elem_size * index; } +/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ +static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + u32 elem_size = round_up(map->value_size, 8); + const int ret = BPF_REG_0; + const int map_ptr = BPF_REG_1; + const int index = BPF_REG_2; + + *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); + *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); + + if (is_power_of_2(elem_size)) { + *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); + } else { + *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); + } + *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(ret, 0); + return insn - insn_buf; +} + /* Called from eBPF program */ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -155,7 +183,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - u32 index = *(u32 *)key; + u32 index = key ? *(u32 *)key : U32_MAX; u32 *next = (u32 *)next_key; if (index >= array->map.max_entries) { @@ -260,21 +288,17 @@ static void array_map_free(struct bpf_map *map) bpf_map_area_free(array); } -static const struct bpf_map_ops array_ops = { +const struct bpf_map_ops array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_gen_lookup = array_map_gen_lookup, }; -static struct bpf_map_type_list array_type __ro_after_init = { - .ops = &array_ops, - .type = BPF_MAP_TYPE_ARRAY, -}; - -static const struct bpf_map_ops percpu_array_ops = { +const struct bpf_map_ops percpu_array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, @@ -283,19 +307,6 @@ static const struct bpf_map_ops percpu_array_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map_type_list percpu_array_type __ro_after_init = { - .ops = &percpu_array_ops, - .type = BPF_MAP_TYPE_PERCPU_ARRAY, -}; - -static int __init register_array_map(void) -{ - bpf_register_map_type(&array_type); - bpf_register_map_type(&percpu_array_type); - return 0; -} -late_initcall(register_array_map); - static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) { /* only file descriptors can be stored in this type of map */ @@ -399,7 +410,7 @@ void bpf_fd_array_map_clear(struct bpf_map *map) fd_array_map_delete_elem(map, &i); } -static const struct bpf_map_ops prog_array_ops = { +const struct bpf_map_ops prog_array_map_ops = { .map_alloc = fd_array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, @@ -409,18 +420,6 @@ static const struct bpf_map_ops prog_array_ops = { .map_fd_put_ptr = prog_fd_array_put_ptr, }; -static struct bpf_map_type_list prog_array_type __ro_after_init = { - .ops = &prog_array_ops, - .type = BPF_MAP_TYPE_PROG_ARRAY, -}; - -static int __init register_prog_array_map(void) -{ - bpf_register_map_type(&prog_array_type); - return 0; -} -late_initcall(register_prog_array_map); - static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, struct file *map_file) { @@ -511,7 +510,7 @@ static void perf_event_fd_array_release(struct bpf_map *map, rcu_read_unlock(); } -static const struct bpf_map_ops perf_event_array_ops = { +const struct bpf_map_ops perf_event_array_map_ops = { .map_alloc = fd_array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, @@ -522,18 +521,6 @@ static const struct bpf_map_ops perf_event_array_ops = { .map_release = perf_event_fd_array_release, }; -static struct bpf_map_type_list perf_event_array_type __ro_after_init = { - .ops = &perf_event_array_ops, - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, -}; - -static int __init register_perf_event_array_map(void) -{ - bpf_register_map_type(&perf_event_array_type); - return 0; -} -late_initcall(register_perf_event_array_map); - #ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, @@ -554,7 +541,7 @@ static void cgroup_fd_array_free(struct bpf_map *map) fd_array_map_free(map); } -static const struct bpf_map_ops cgroup_array_ops = { +const struct bpf_map_ops cgroup_array_map_ops = { .map_alloc = fd_array_map_alloc, .map_free = cgroup_fd_array_free, .map_get_next_key = array_map_get_next_key, @@ -563,16 +550,53 @@ static const struct bpf_map_ops cgroup_array_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, }; +#endif -static struct bpf_map_type_list cgroup_array_type __ro_after_init = { - .ops = &cgroup_array_ops, - .type = BPF_MAP_TYPE_CGROUP_ARRAY, -}; +static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) +{ + struct bpf_map *map, *inner_map_meta; + + inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); + if (IS_ERR(inner_map_meta)) + return inner_map_meta; -static int __init register_cgroup_array_map(void) + map = fd_array_map_alloc(attr); + if (IS_ERR(map)) { + bpf_map_meta_free(inner_map_meta); + return map; + } + + map->inner_map_meta = inner_map_meta; + + return map; +} + +static void array_of_map_free(struct bpf_map *map) { - bpf_register_map_type(&cgroup_array_type); - return 0; + /* map->inner_map_meta is only accessed by syscall which + * is protected by fdget/fdput. + */ + bpf_map_meta_free(map->inner_map_meta); + bpf_fd_array_map_clear(map); + fd_array_map_free(map); } -late_initcall(register_cgroup_array_map); -#endif + +static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_map **inner_map = array_map_lookup_elem(map, key); + + if (!inner_map) + return NULL; + + return READ_ONCE(*inner_map); +} + +const struct bpf_map_ops array_of_maps_map_ops = { + .map_alloc = array_of_map_alloc, + .map_free = array_of_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = array_of_map_lookup_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = bpf_map_fd_get_ptr, + .map_fd_put_ptr = bpf_map_fd_put_ptr, +}; diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index f62d1d56f41d0c5f3e3ef2c12b7dea3eb6578a10..e6ef4401a13804b6a6604bf22671e0c9dc4d39b2 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -13,7 +13,7 @@ #define LOCAL_FREE_TARGET (128) #define LOCAL_NR_SCANS LOCAL_FREE_TARGET -#define PERCPU_FREE_TARGET (16) +#define PERCPU_FREE_TARGET (4) #define PERCPU_NR_SCANS PERCPU_FREE_TARGET /* Helpers to get the local list index */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index da0f53690295610ff5ae447ed98fb6e70c99c4f1..ea6033cba94721fd8ca080354c825771d93620fb 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -154,7 +154,7 @@ int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, /** * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering - * @sk: The socken sending or receiving traffic + * @sk: The socket sending or receiving traffic * @skb: The skb that is being sent or received * @type: The type of program to be exectuted * @@ -189,10 +189,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, prog = rcu_dereference(cgrp->bpf.effective[type]); if (prog) { unsigned int offset = skb->data - skb_network_header(skb); + struct sock *save_sk = skb->sk; + skb->sk = sk; __skb_push(skb, offset); ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; __skb_pull(skb, offset); + skb->sk = save_sk; } rcu_read_unlock(); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b4f1cb0c5ac7104c3f12f9ed5c1e3fe159c57824..dedf367f59bba529ded5603d61782bccb80782cb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -76,8 +76,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; struct bpf_prog *fp; @@ -107,8 +106,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_alloc); struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; u32 pages, delta; int ret; @@ -394,27 +392,23 @@ static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) void bpf_prog_kallsyms_add(struct bpf_prog *fp) { - unsigned long flags; - if (!bpf_prog_kallsyms_candidate(fp) || !capable(CAP_SYS_ADMIN)) return; - spin_lock_irqsave(&bpf_lock, flags); + spin_lock_bh(&bpf_lock); bpf_prog_ksym_node_add(fp->aux); - spin_unlock_irqrestore(&bpf_lock, flags); + spin_unlock_bh(&bpf_lock); } void bpf_prog_kallsyms_del(struct bpf_prog *fp) { - unsigned long flags; - if (!bpf_prog_kallsyms_candidate(fp)) return; - spin_lock_irqsave(&bpf_lock, flags); + spin_lock_bh(&bpf_lock); bpf_prog_ksym_node_del(fp->aux); - spin_unlock_irqrestore(&bpf_lock, flags); + spin_unlock_bh(&bpf_lock); } static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) @@ -659,8 +653,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | - gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 361a69dfe5434d6afb554477b899e91db90fb29f..004334ea13ba3f56f10b33e9a1a50a97906000e4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -16,6 +16,7 @@ #include #include "percpu_freelist.h" #include "bpf_lru_list.h" +#include "map_in_map.h" struct bucket { struct hlist_nulls_head head; @@ -86,6 +87,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size return *(void __percpu **)(l->key + key_size); } +static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) +{ + return *(void **)(l->key + roundup(map->key_size, 8)); +} + static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) { return (struct htab_elem *) (htab->elems + i * htab->elem_size); @@ -426,7 +432,11 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, return NULL; } -/* Called from syscall or from eBPF program */ +/* Called from syscall or from eBPF program directly, so + * arguments have to match bpf_map_lookup_elem() exactly. + * The return value is adjusted by BPF instructions + * in htab_map_gen_lookup(). + */ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); @@ -458,6 +468,30 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +/* inline bpf_map_lookup_elem() call. + * Instead of: + * bpf_prog + * bpf_map_lookup_elem + * map->ops->map_lookup_elem + * htab_map_lookup_elem + * __htab_map_lookup_elem + * do: + * bpf_prog + * __htab_map_lookup_elem + */ +static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + const int ret = BPF_REG_0; + + *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); + *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); + return insn - insn_buf; +} + static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -506,12 +540,15 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) struct hlist_nulls_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; - int i; + int i = 0; WARN_ON_ONCE(!rcu_read_lock_held()); key_size = map->key_size; + if (!key) + goto find_first_elem; + hash = htab_map_hash(key, key_size); head = select_bucket(htab, hash); @@ -519,10 +556,8 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) /* lookup the key */ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); - if (!l) { - i = 0; + if (!l) goto find_first_elem; - } /* key was found, get next key in the same bucket */ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), @@ -582,6 +617,14 @@ static void htab_elem_free_rcu(struct rcu_head *head) static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) { + struct bpf_map *map = &htab->map; + + if (map->ops->map_fd_put_ptr) { + void *ptr = fd_htab_map_get_ptr(map, l); + + map->ops->map_fd_put_ptr(ptr); + } + if (htab_is_prealloc(htab)) { pcpu_freelist_push(&htab->freelist, &l->fnode); } else { @@ -1027,6 +1070,7 @@ static void delete_all_elements(struct bpf_htab *htab) } } } + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ static void htab_map_free(struct bpf_map *map) { @@ -1053,21 +1097,17 @@ static void htab_map_free(struct bpf_map *map) kfree(htab); } -static const struct bpf_map_ops htab_ops = { +const struct bpf_map_ops htab_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_map_lookup_elem, .map_update_elem = htab_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_gen_lookup = htab_map_gen_lookup, }; -static struct bpf_map_type_list htab_type __ro_after_init = { - .ops = &htab_ops, - .type = BPF_MAP_TYPE_HASH, -}; - -static const struct bpf_map_ops htab_lru_ops = { +const struct bpf_map_ops htab_lru_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, @@ -1076,11 +1116,6 @@ static const struct bpf_map_ops htab_lru_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_type __ro_after_init = { - .ops = &htab_lru_ops, - .type = BPF_MAP_TYPE_LRU_HASH, -}; - /* Called from eBPF program */ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) { @@ -1154,7 +1189,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, return ret; } -static const struct bpf_map_ops htab_percpu_ops = { +const struct bpf_map_ops htab_percpu_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, @@ -1163,12 +1198,7 @@ static const struct bpf_map_ops htab_percpu_ops = { .map_delete_elem = htab_map_delete_elem, }; -static struct bpf_map_type_list htab_percpu_type __ro_after_init = { - .ops = &htab_percpu_ops, - .type = BPF_MAP_TYPE_PERCPU_HASH, -}; - -static const struct bpf_map_ops htab_lru_percpu_ops = { +const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, @@ -1177,17 +1207,102 @@ static const struct bpf_map_ops htab_lru_percpu_ops = { .map_delete_elem = htab_lru_map_delete_elem, }; -static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = { - .ops = &htab_lru_percpu_ops, - .type = BPF_MAP_TYPE_LRU_PERCPU_HASH, -}; +static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr) +{ + struct bpf_map *map; + + if (attr->value_size != sizeof(u32)) + return ERR_PTR(-EINVAL); + + /* pointer is stored internally */ + attr->value_size = sizeof(void *); + map = htab_map_alloc(attr); + attr->value_size = sizeof(u32); -static int __init register_htab_map(void) + return map; +} + +static void fd_htab_map_free(struct bpf_map *map) { - bpf_register_map_type(&htab_type); - bpf_register_map_type(&htab_percpu_type); - bpf_register_map_type(&htab_lru_type); - bpf_register_map_type(&htab_lru_percpu_type); - return 0; + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct hlist_nulls_node *n; + struct hlist_nulls_head *head; + struct htab_elem *l; + int i; + + for (i = 0; i < htab->n_buckets; i++) { + head = select_bucket(htab, i); + + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { + void *ptr = fd_htab_map_get_ptr(map, l); + + map->ops->map_fd_put_ptr(ptr); + } + } + + htab_map_free(map); +} + +/* only called from syscall */ +int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags) +{ + void *ptr; + int ret; + u32 ufd = *(u32 *)value; + + ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + ret = htab_map_update_elem(map, key, &ptr, map_flags); + if (ret) + map->ops->map_fd_put_ptr(ptr); + + return ret; +} + +static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) +{ + struct bpf_map *map, *inner_map_meta; + + inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); + if (IS_ERR(inner_map_meta)) + return inner_map_meta; + + map = fd_htab_map_alloc(attr); + if (IS_ERR(map)) { + bpf_map_meta_free(inner_map_meta); + return map; + } + + map->inner_map_meta = inner_map_meta; + + return map; } -late_initcall(register_htab_map); + +static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_map **inner_map = htab_map_lookup_elem(map, key); + + if (!inner_map) + return NULL; + + return READ_ONCE(*inner_map); +} + +static void htab_of_map_free(struct bpf_map *map) +{ + bpf_map_meta_free(map->inner_map_meta); + fd_htab_map_free(map); +} + +const struct bpf_map_ops htab_of_maps_map_ops = { + .map_alloc = htab_of_map_alloc, + .map_free = htab_of_map_free, + .map_get_next_key = htab_map_get_next_key, + .map_lookup_elem = htab_of_map_lookup_elem, + .map_delete_elem = htab_map_delete_elem, + .map_fd_get_ptr = bpf_map_fd_get_ptr, + .map_fd_put_ptr = bpf_map_fd_put_ptr, +}; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fddcae801724be149b73794ab42f9f177ecf4f56..9bbd33497d3d0846de8ebd7b74828e5566045e68 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -429,7 +429,7 @@ static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) static int bpf_fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr bpf_rfiles[] = { { "" } }; + static const struct tree_descr bpf_rfiles[] = { { "" } }; struct bpf_mount_opts opts; struct inode *inode; int ret; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index b37bd9ab7f574242722c1d9b0503b896019488b2..b09185f0f17d3428ac445de2ed52756004ca5368 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -432,6 +432,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) trie->map.key_size = attr->key_size; trie->map.value_size = attr->value_size; trie->map.max_entries = attr->max_entries; + trie->map.map_flags = attr->map_flags; trie->data_size = attr->key_size - offsetof(struct bpf_lpm_trie_key, data); trie->max_prefixlen = trie->data_size * 8; @@ -505,7 +506,7 @@ static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) return -ENOTSUPP; } -static const struct bpf_map_ops trie_ops = { +const struct bpf_map_ops trie_map_ops = { .map_alloc = trie_alloc, .map_free = trie_free, .map_get_next_key = trie_get_next_key, @@ -513,15 +514,3 @@ static const struct bpf_map_ops trie_ops = { .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, }; - -static struct bpf_map_type_list trie_type __ro_after_init = { - .ops = &trie_ops, - .type = BPF_MAP_TYPE_LPM_TRIE, -}; - -static int __init register_trie_map(void) -{ - bpf_register_map_type(&trie_type); - return 0; -} -late_initcall(register_trie_map); diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c new file mode 100644 index 0000000000000000000000000000000000000000..59bcdf821ae47477f2a058af3e6918f0ce222325 --- /dev/null +++ b/kernel/bpf/map_in_map.c @@ -0,0 +1,97 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include "map_in_map.h" + +struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) +{ + struct bpf_map *inner_map, *inner_map_meta; + struct fd f; + + f = fdget(inner_map_ufd); + inner_map = __bpf_map_get(f); + if (IS_ERR(inner_map)) + return inner_map; + + /* prog_array->owner_prog_type and owner_jited + * is a runtime binding. Doing static check alone + * in the verifier is not enough. + */ + if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + fdput(f); + return ERR_PTR(-ENOTSUPP); + } + + /* Does not support >1 level map-in-map */ + if (inner_map->inner_map_meta) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + if (!inner_map_meta) { + fdput(f); + return ERR_PTR(-ENOMEM); + } + + inner_map_meta->map_type = inner_map->map_type; + inner_map_meta->key_size = inner_map->key_size; + inner_map_meta->value_size = inner_map->value_size; + inner_map_meta->map_flags = inner_map->map_flags; + inner_map_meta->ops = inner_map->ops; + inner_map_meta->max_entries = inner_map->max_entries; + + fdput(f); + return inner_map_meta; +} + +void bpf_map_meta_free(struct bpf_map *map_meta) +{ + kfree(map_meta); +} + +bool bpf_map_meta_equal(const struct bpf_map *meta0, + const struct bpf_map *meta1) +{ + /* No need to compare ops because it is covered by map_type */ + return meta0->map_type == meta1->map_type && + meta0->key_size == meta1->key_size && + meta0->value_size == meta1->value_size && + meta0->map_flags == meta1->map_flags && + meta0->max_entries == meta1->max_entries; +} + +void *bpf_map_fd_get_ptr(struct bpf_map *map, + struct file *map_file /* not used */, + int ufd) +{ + struct bpf_map *inner_map; + struct fd f; + + f = fdget(ufd); + inner_map = __bpf_map_get(f); + if (IS_ERR(inner_map)) + return inner_map; + + if (bpf_map_meta_equal(map->inner_map_meta, inner_map)) + inner_map = bpf_map_inc(inner_map, false); + else + inner_map = ERR_PTR(-EINVAL); + + fdput(f); + return inner_map; +} + +void bpf_map_fd_put_ptr(void *ptr) +{ + /* ptr->ops->map_free() has to go through one + * rcu grace period by itself. + */ + bpf_map_put(ptr); +} diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h new file mode 100644 index 0000000000000000000000000000000000000000..177fadb689dca9075ff9c43055d392821908ba6d --- /dev/null +++ b/kernel/bpf/map_in_map.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef __MAP_IN_MAP_H__ +#define __MAP_IN_MAP_H__ + +#include + +struct file; +struct bpf_map; + +struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd); +void bpf_map_meta_free(struct bpf_map *map_meta); +bool bpf_map_meta_equal(const struct bpf_map *meta0, + const struct bpf_map *meta1); +void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, + int ufd); +void bpf_map_fd_put_ptr(void *ptr); + +#endif diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 22aa45cd0324e320b9cd8b0f89bf9befdaae74b6..31147d730abf532cc8f10efc9a871b5dd498928d 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -88,6 +88,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) smap->map.key_size = attr->key_size; smap->map.value_size = value_size; smap->map.max_entries = attr->max_entries; + smap->map.map_flags = attr->map_flags; smap->n_buckets = n_buckets; smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; @@ -264,7 +265,7 @@ static void stack_map_free(struct bpf_map *map) put_callchain_buffers(); } -static const struct bpf_map_ops stack_map_ops = { +const struct bpf_map_ops stack_map_ops = { .map_alloc = stack_map_alloc, .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, @@ -272,15 +273,3 @@ static const struct bpf_map_ops stack_map_ops = { .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, }; - -static struct bpf_map_type_list stack_map_type __ro_after_init = { - .ops = &stack_map_ops, - .type = BPF_MAP_TYPE_STACK_TRACE, -}; - -static int __init register_stack_map(void) -{ - bpf_register_map_type(&stack_map_type); - return 0; -} -late_initcall(register_stack_map); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7af0dcc5d7555679cea6c08395ab54710e7066e6..265a0d854e3358c5c714af9cc3ab2bae8a4754b4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -27,30 +27,29 @@ DEFINE_PER_CPU(int, bpf_prog_active); int sysctl_unprivileged_bpf_disabled __read_mostly; -static LIST_HEAD(bpf_map_types); +static const struct bpf_map_ops * const bpf_map_types[] = { +#define BPF_PROG_TYPE(_id, _ops) +#define BPF_MAP_TYPE(_id, _ops) \ + [_id] = &_ops, +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +}; static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) { - struct bpf_map_type_list *tl; struct bpf_map *map; - list_for_each_entry(tl, &bpf_map_types, list_node) { - if (tl->type == attr->map_type) { - map = tl->ops->map_alloc(attr); - if (IS_ERR(map)) - return map; - map->ops = tl->ops; - map->map_type = attr->map_type; - return map; - } - } - return ERR_PTR(-EINVAL); -} + if (attr->map_type >= ARRAY_SIZE(bpf_map_types) || + !bpf_map_types[attr->map_type]) + return ERR_PTR(-EINVAL); -/* boot time registration of different map implementations */ -void bpf_register_map_type(struct bpf_map_type_list *tl) -{ - list_add(&tl->list_node, &bpf_map_types); + map = bpf_map_types[attr->map_type]->map_alloc(attr); + if (IS_ERR(map)) + return map; + map->ops = bpf_map_types[attr->map_type]; + map->map_type = attr->map_type; + return map; } void *bpf_map_area_alloc(size_t size) @@ -68,8 +67,7 @@ void *bpf_map_area_alloc(size_t size) return area; } - return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags, - PAGE_KERNEL); + return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL); } void bpf_map_area_free(void *area) @@ -215,7 +213,7 @@ int bpf_map_new_fd(struct bpf_map *map) offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ sizeof(attr->CMD##_LAST_FIELD)) != NULL -#define BPF_MAP_CREATE_LAST_FIELD map_flags +#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -352,6 +350,9 @@ static int map_lookup_elem(union bpf_attr *attr) err = bpf_percpu_array_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { err = bpf_stackmap_copy(map, key, value); + } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + err = -ENOTSUPP; } else { rcu_read_lock(); ptr = map->ops->map_lookup_elem(map, key); @@ -438,11 +439,17 @@ static int map_update_elem(union bpf_attr *attr) err = bpf_percpu_array_update(map, key, value, attr->flags); } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || map->map_type == BPF_MAP_TYPE_PROG_ARRAY || - map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { + map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { rcu_read_lock(); err = bpf_fd_array_map_update_elem(map, f.file, key, value, attr->flags); rcu_read_unlock(); + } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + rcu_read_lock(); + err = bpf_fd_htab_map_update_elem(map, f.file, key, value, + attr->flags); + rcu_read_unlock(); } else { rcu_read_lock(); err = map->ops->map_update_elem(map, key, value, attr->flags); @@ -528,14 +535,18 @@ static int map_get_next_key(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); - err = -ENOMEM; - key = kmalloc(map->key_size, GFP_USER); - if (!key) - goto err_put; + if (ukey) { + err = -ENOMEM; + key = kmalloc(map->key_size, GFP_USER); + if (!key) + goto err_put; - err = -EFAULT; - if (copy_from_user(key, ukey, map->key_size) != 0) - goto free_key; + err = -EFAULT; + if (copy_from_user(key, ukey, map->key_size) != 0) + goto free_key; + } else { + key = NULL; + } err = -ENOMEM; next_key = kmalloc(map->key_size, GFP_USER); @@ -564,79 +575,23 @@ static int map_get_next_key(union bpf_attr *attr) return err; } -static LIST_HEAD(bpf_prog_types); +static const struct bpf_verifier_ops * const bpf_prog_types[] = { +#define BPF_PROG_TYPE(_id, _ops) \ + [_id] = &_ops, +#define BPF_MAP_TYPE(_id, _ops) +#include +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +}; static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) { - struct bpf_prog_type_list *tl; - - list_for_each_entry(tl, &bpf_prog_types, list_node) { - if (tl->type == type) { - prog->aux->ops = tl->ops; - prog->type = type; - return 0; - } - } - - return -EINVAL; -} - -void bpf_register_prog_type(struct bpf_prog_type_list *tl) -{ - list_add(&tl->list_node, &bpf_prog_types); -} - -/* fixup insn->imm field of bpf_call instructions: - * if (insn->imm == BPF_FUNC_map_lookup_elem) - * insn->imm = bpf_map_lookup_elem - __bpf_call_base; - * else if (insn->imm == BPF_FUNC_map_update_elem) - * insn->imm = bpf_map_update_elem - __bpf_call_base; - * else ... - * - * this function is called after eBPF program passed verification - */ -static void fixup_bpf_calls(struct bpf_prog *prog) -{ - const struct bpf_func_proto *fn; - int i; + if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) + return -EINVAL; - for (i = 0; i < prog->len; i++) { - struct bpf_insn *insn = &prog->insnsi[i]; - - if (insn->code == (BPF_JMP | BPF_CALL)) { - /* we reach here when program has bpf_call instructions - * and it passed bpf_check(), means that - * ops->get_func_proto must have been supplied, check it - */ - BUG_ON(!prog->aux->ops->get_func_proto); - - if (insn->imm == BPF_FUNC_get_route_realm) - prog->dst_needed = 1; - if (insn->imm == BPF_FUNC_get_prandom_u32) - bpf_user_rnd_init_once(); - if (insn->imm == BPF_FUNC_xdp_adjust_head) - prog->xdp_adjust_head = 1; - if (insn->imm == BPF_FUNC_tail_call) { - /* mark bpf_tail_call as different opcode - * to avoid conditional branch in - * interpeter for every normal call - * and to prevent accidental JITing by - * JIT compiler that doesn't support - * bpf_tail_call yet - */ - insn->imm = 0; - insn->code |= BPF_X; - continue; - } - - fn = prog->aux->ops->get_func_proto(insn->imm); - /* all functions that have prototype and verifier allowed - * programs to call them, must be real in-kernel functions - */ - BUG_ON(!fn->func); - insn->imm = fn->func - __bpf_call_base; - } - } + prog->aux->ops = bpf_prog_types[type]; + prog->type = type; + return 0; } /* drop refcnt on maps used by eBPF program and free auxilary data */ @@ -828,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD kern_version +#define BPF_PROG_LOAD_LAST_FIELD prog_flags static int bpf_prog_load(union bpf_attr *attr) { @@ -841,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; + if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) + return -EINVAL; + /* copy eBPF program license from user space */ if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) @@ -892,9 +850,6 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* fixup BPF_CALL->imm field */ - fixup_bpf_calls(prog); - /* eBPF program is ready to be JITed */ prog = bpf_prog_select_runtime(prog, &err); if (err < 0) @@ -1020,6 +975,28 @@ static int bpf_prog_detach(const union bpf_attr *attr) } #endif /* CONFIG_CGROUP_BPF */ +#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration + +static int bpf_prog_test_run(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct bpf_prog *prog; + int ret = -ENOTSUPP; + + if (CHECK_ATTR(BPF_PROG_TEST_RUN)) + return -EINVAL; + + prog = bpf_prog_get(attr->test.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (prog->aux->ops->test_run) + ret = prog->aux->ops->test_run(prog, attr, uattr); + + bpf_prog_put(prog); + return ret; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -1086,7 +1063,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_OBJ_GET: err = bpf_obj_get(&attr); break; - #ifdef CONFIG_CGROUP_BPF case BPF_PROG_ATTACH: err = bpf_prog_attach(&attr); @@ -1095,7 +1071,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_prog_detach(&attr); break; #endif - + case BPF_PROG_TEST_RUN: + err = bpf_prog_test_run(&attr, uattr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a834068a400e279f963097489ed165c6ad1301b2..339c8a1371de0201df0f7ac799280168ea4d22e3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -140,9 +140,11 @@ struct bpf_verifier_stack_elem { struct bpf_verifier_stack_elem *next; }; -#define BPF_COMPLEXITY_LIMIT_INSNS 65536 +#define BPF_COMPLEXITY_LIMIT_INSNS 98304 #define BPF_COMPLEXITY_LIMIT_STACK 1024 +#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA) + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -239,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state) if (reg->max_value != BPF_REGISTER_MAX_RANGE) verbose(",max_value=%llu", (unsigned long long)reg->max_value); + if (reg->min_align) + verbose(",min_align=%u", reg->min_align); + if (reg->aux_off) + verbose(",aux_off=%u", reg->aux_off); + if (reg->aux_off_align) + verbose(",aux_off_align=%u", reg->aux_off_align); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -296,7 +304,8 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; -static void print_bpf_insn(struct bpf_insn *insn) +static void print_bpf_insn(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) { u8 class = BPF_CLASS(insn->code); @@ -360,9 +369,19 @@ static void print_bpf_insn(struct bpf_insn *insn) insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->src_reg, insn->imm); - } else if (BPF_MODE(insn->code) == BPF_IMM) { - verbose("(%02x) r%d = 0x%x\n", - insn->code, insn->dst_reg, insn->imm); + } else if (BPF_MODE(insn->code) == BPF_IMM && + BPF_SIZE(insn->code) == BPF_DW) { + /* At this point, we already made sure that the second + * part of the ldimm64 insn is accessible. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + + if (map_ptr && !env->allow_ptr_leaks) + imm = 0; + + verbose("(%02x) r%d = 0x%llx\n", insn->code, + insn->dst_reg, (unsigned long long)imm); } else { verbose("BUG_ld_%02x\n", insn->code); return; @@ -444,16 +463,22 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; +static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +{ + BUG_ON(regno >= MAX_BPF_REG); + + memset(®s[regno], 0, sizeof(regs[regno])); + regs[regno].type = NOT_INIT; + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + static void init_reg_state(struct bpf_reg_state *regs) { int i; - for (i = 0; i < MAX_BPF_REG; i++) { - regs[i].type = NOT_INIT; - regs[i].imm = 0; - regs[i].min_value = BPF_REGISTER_MIN_RANGE; - regs[i].max_value = BPF_REGISTER_MAX_RANGE; - } + for (i = 0; i < MAX_BPF_REG; i++) + mark_reg_not_init(regs, i); /* frame pointer */ regs[BPF_REG_FP].type = FRAME_PTR; @@ -479,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; + regs[regno].min_align = 0; } static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs, @@ -766,17 +792,37 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, - int off, int size) + int off, int size, bool strict) { - if (reg->id && size != 1) { - verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n"); - return -EACCES; + int ip_align; + int reg_off; + + /* Byte size accesses are always allowed. */ + if (!strict || size == 1) + return 0; + + reg_off = reg->off; + if (reg->id) { + if (reg->aux_off_align % size) { + verbose("Packet access is only %u byte aligned, %d byte access not allowed\n", + reg->aux_off_align, size); + return -EACCES; + } + reg_off += reg->aux_off; } - /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + /* For platforms that do not have a Kconfig enabling + * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of + * NET_IP_ALIGN is universally set to '2'. And on platforms + * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get + * to this code only in strict mode where we want to emulate + * the NET_IP_ALIGN==2 checking. Therefore use an + * unconditional IP align value of '2'. + */ + ip_align = 2; + if ((ip_align + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", - NET_IP_ALIGN, reg->off, off, size); + ip_align, reg_off, off, size); return -EACCES; } @@ -784,9 +830,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, } static int check_val_ptr_alignment(const struct bpf_reg_state *reg, - int size) + int size, bool strict) { - if (size != 1) { + if (strict && size != 1) { verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); return -EACCES; } @@ -794,16 +840,17 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg, return 0; } -static int check_ptr_alignment(const struct bpf_reg_state *reg, +static int check_ptr_alignment(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int off, int size) { + bool strict = env->strict_alignment; + switch (reg->type) { case PTR_TO_PACKET: - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : - check_pkt_ptr_alignment(reg, off, size); + return check_pkt_ptr_alignment(reg, off, size, strict); case PTR_TO_MAP_VALUE_ADJ: - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : - check_val_ptr_alignment(reg, size); + return check_val_ptr_alignment(reg, size, strict); default: if (off % size != 0) { verbose("misaligned access off %d size %d\n", @@ -836,7 +883,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (size < 0) return size; - err = check_ptr_alignment(reg, off, size); + err = check_ptr_alignment(env, reg, off, size); if (err) return err; @@ -870,6 +917,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, value_regno); /* note that reg.[id|off|range] == 0 */ state->regs[value_regno].type = reg_type; + state->regs[value_regno].aux_off = 0; + state->regs[value_regno].aux_off_align = 0; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { @@ -1215,6 +1264,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + if (func_id != BPF_FUNC_map_lookup_elem) + goto error; default: break; } @@ -1291,12 +1344,11 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } } -static int check_call(struct bpf_verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs = state->regs; - struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1363,11 +1415,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id) } /* reset caller saved regs */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* update return register */ if (fn->ret_type == RET_INTEGER) { @@ -1375,6 +1424,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id) } else if (fn->ret_type == RET_VOID) { regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { + struct bpf_insn_aux_data *insn_aux; + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0; /* remember map_ptr, so that check_map_access() @@ -1387,6 +1438,11 @@ static int check_call(struct bpf_verifier_env *env, int func_id) } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].id = ++env->id_gen; + insn_aux = &env->insn_aux_data[insn_idx]; + if (!insn_aux->map_ptr) + insn_aux->map_ptr = meta.map_ptr; + else if (insn_aux->map_ptr != meta.map_ptr) + insn_aux->map_ptr = BPF_MAP_PTR_POISON; } else { verbose("unknown return type %d of func %s#%d\n", fn->ret_type, func_id_name(func_id), func_id); @@ -1431,6 +1487,8 @@ static int check_packet_ptr_add(struct bpf_verifier_env *env, */ dst_reg->off += imm; } else { + bool had_id; + if (src_reg->type == PTR_TO_PACKET) { /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ tmp_reg = *dst_reg; /* save r7 state */ @@ -1464,14 +1522,23 @@ static int check_packet_ptr_add(struct bpf_verifier_env *env, src_reg->imm); return -EACCES; } + + had_id = (dst_reg->id != 0); + /* dst_reg stays as pkt_ptr type and since some positive * integer value was added to the pointer, increment its 'id' */ dst_reg->id = ++env->id_gen; - /* something was added to pkt_ptr, set range and off to zero */ + /* something was added to pkt_ptr, set range to zero */ + dst_reg->aux_off += dst_reg->off; dst_reg->off = 0; dst_reg->range = 0; + if (had_id) + dst_reg->aux_off_align = min(dst_reg->aux_off_align, + src_reg->min_align); + else + dst_reg->aux_off_align = src_reg->min_align; } return 0; } @@ -1645,6 +1712,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg) reg->min_value = BPF_REGISTER_MIN_RANGE; } +static u32 calc_align(u32 imm) +{ + if (!imm) + return 1U << 31; + return imm - ((imm - 1) & imm); +} + static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1652,8 +1726,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, s64 min_val = BPF_REGISTER_MIN_RANGE; u64 max_val = BPF_REGISTER_MAX_RANGE; u8 opcode = BPF_OP(insn->code); + u32 dst_align, src_align; dst_reg = ®s[insn->dst_reg]; + src_align = 0; if (BPF_SRC(insn->code) == BPF_X) { check_reg_overflow(®s[insn->src_reg]); min_val = regs[insn->src_reg].min_value; @@ -1669,12 +1745,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, regs[insn->src_reg].type != UNKNOWN_VALUE) { min_val = BPF_REGISTER_MIN_RANGE; max_val = BPF_REGISTER_MAX_RANGE; + src_align = 0; + } else { + src_align = regs[insn->src_reg].min_align; } } else if (insn->imm < BPF_REGISTER_MAX_RANGE && (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { min_val = max_val = insn->imm; + src_align = calc_align(insn->imm); } + dst_align = dst_reg->min_align; + /* We don't know anything about what was done to this register, mark it * as unknown. */ @@ -1699,18 +1781,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_reg->min_value += min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value += max_val; + dst_reg->min_align = min(src_align, dst_align); break; case BPF_SUB: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value -= min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value -= max_val; + dst_reg->min_align = min(src_align, dst_align); break; case BPF_MUL: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value *= min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value *= max_val; + dst_reg->min_align = max(src_align, dst_align); break; case BPF_AND: /* Disallow AND'ing of negative numbers, ain't nobody got time @@ -1722,17 +1807,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, else dst_reg->min_value = 0; dst_reg->max_value = max_val; + dst_reg->min_align = max(src_align, dst_align); break; case BPF_LSH: /* Gotta have special overflow logic here, if we're shifting * more than MAX_RANGE then just assume we have an invalid * range. */ - if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) + if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) { dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) - dst_reg->min_value <<= min_val; - + dst_reg->min_align = 1; + } else { + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value <<= min_val; + if (!dst_reg->min_align) + dst_reg->min_align = 1; + dst_reg->min_align <<= min_val; + } if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->max_value = BPF_REGISTER_MAX_RANGE; else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) @@ -1742,11 +1833,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* RSH by a negative number is undefined, and the BPF_RSH is an * unsigned shift, so make the appropriate casts. */ - if (min_val < 0 || dst_reg->min_value < 0) + if (min_val < 0 || dst_reg->min_value < 0) { dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else + } else { dst_reg->min_value = (u64)(dst_reg->min_value) >> min_val; + } + if (min_val < 0) { + dst_reg->min_align = 1; + } else { + dst_reg->min_align >>= (u64) min_val; + if (!dst_reg->min_align) + dst_reg->min_align = 1; + } if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value >>= max_val; break; @@ -1848,6 +1947,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].imm = insn->imm; regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; + regs[insn->dst_reg].min_align = calc_align(insn->imm); } } else if (opcode > BPF_END) { @@ -1909,6 +2009,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg->type = PTR_TO_STACK; dst_reg->imm = insn->imm; return 0; + } else if (opcode == BPF_ADD && + BPF_CLASS(insn->code) == BPF_ALU64 && + dst_reg->type == PTR_TO_STACK && + ((BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == CONST_IMM) || + BPF_SRC(insn->code) == BPF_K)) { + if (BPF_SRC(insn->code) == BPF_X) + dst_reg->imm += regs[insn->src_reg].imm; + else + dst_reg->imm += insn->imm; + return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && (dst_reg->type == PTR_TO_PACKET || @@ -2112,14 +2223,19 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, struct bpf_reg_state *reg = ®s[regno]; if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { - reg->type = type; + if (type == UNKNOWN_VALUE) { + __mark_reg_unknown_value(regs, regno); + } else if (reg->map_ptr->inner_map_meta) { + reg->type = CONST_PTR_TO_MAP; + reg->map_ptr = reg->map_ptr->inner_map_meta; + } else { + reg->type = type; + } /* We don't need id from this point onwards anymore, thus we * should better reset it, so that state pruning has chances * to take effect. */ reg->id = 0; - if (type == UNKNOWN_VALUE) - __mark_reg_unknown_value(regs, regno); } } @@ -2328,7 +2444,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -2361,11 +2476,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* reset caller saved regs to unreadable */ - for (i = 0; i < CALLER_SAVED_REGS; i++) { - reg = regs + caller_saved[i]; - reg->type = NOT_INIT; - reg->imm = 0; - } + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(regs, caller_saved[i]); /* mark destination R0 register as readable, since it contains * the value fetched from the packet @@ -2524,6 +2636,7 @@ static int check_cfg(struct bpf_verifier_env *env) env->explored_states[t + 1] = STATE_LIST_MARK; } else { /* conditional jump with two edges */ + env->explored_states[t] = STATE_LIST_MARK; ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret == 1) goto peek_stack; @@ -2575,7 +2688,8 @@ static int check_cfg(struct bpf_verifier_env *env) /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct bpf_reg_state *old, +static bool compare_ptrs_to_packet(struct bpf_verifier_env *env, + struct bpf_reg_state *old, struct bpf_reg_state *cur) { if (old->id != cur->id) @@ -2618,7 +2732,7 @@ static bool compare_ptrs_to_packet(struct bpf_reg_state *old, * 'if (R4 > data_end)' and all further insn were already good with r=20, * so they will be good with r=30 and we can prune the search. */ - if (old->off <= cur->off && + if (!env->strict_alignment && old->off <= cur->off && old->off >= old->range && cur->off >= cur->range) return true; @@ -2682,8 +2796,14 @@ static bool states_equal(struct bpf_verifier_env *env, rcur->type != NOT_INIT)) continue; + /* Don't care about the reg->id in this case. */ + if (rold->type == PTR_TO_MAP_VALUE_OR_NULL && + rcur->type == PTR_TO_MAP_VALUE_OR_NULL && + rold->map_ptr == rcur->map_ptr) + continue; + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && - compare_ptrs_to_packet(rold, rcur)) + compare_ptrs_to_packet(env, rold, rcur)) continue; return false; @@ -2816,15 +2936,22 @@ static int do_check(struct bpf_verifier_env *env) goto process_bpf_exit; } - if (log_level && do_print_state) { - verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); + if (need_resched()) + cond_resched(); + + if (log_level > 1 || (log_level && do_print_state)) { + if (log_level > 1) + verbose("%d:", insn_idx); + else + verbose("\nfrom %d to %d:", + prev_insn_idx, insn_idx); print_verifier_state(&env->cur_state); do_print_state = false; } if (log_level) { verbose("%d: ", insn_idx); - print_bpf_insn(insn); + print_bpf_insn(env, insn); } err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); @@ -2960,7 +3087,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm); + err = check_call(env, insn->imm, insn_idx); if (err) return err; @@ -3044,16 +3171,33 @@ static int do_check(struct bpf_verifier_env *env) return 0; } +static int check_map_prealloc(struct bpf_map *map) +{ + return (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) || + !(map->map_flags & BPF_F_NO_PREALLOC); +} + static int check_map_prog_compatibility(struct bpf_map *map, struct bpf_prog *prog) { - if (prog->type == BPF_PROG_TYPE_PERF_EVENT && - (map->map_type == BPF_MAP_TYPE_HASH || - map->map_type == BPF_MAP_TYPE_PERCPU_HASH) && - (map->map_flags & BPF_F_NO_PREALLOC)) { - verbose("perf_event programs can only use preallocated hash map\n"); - return -EINVAL; + /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use + * preallocated hash maps, since doing memory allocation + * in overflow_handler can crash depending on where nmi got + * triggered. + */ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { + if (!check_map_prealloc(map)) { + verbose("perf_event programs can only use preallocated hash map\n"); + return -EINVAL; + } + if (map->inner_map_meta && + !check_map_prealloc(map->inner_map_meta)) { + verbose("perf_event programs can only use preallocated inner hash map\n"); + return -EINVAL; + } } return 0; } @@ -3182,6 +3326,41 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) insn->src_reg = 0; } +/* single env->prog->insni[off] instruction was replaced with the range + * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying + * [0, off) and [off, end) to new locations, so the patched range stays zero + */ +static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, + u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + + if (cnt == 1) + return 0; + new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len); + if (!new_data) + return -ENOMEM; + memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); + memcpy(new_data + off + cnt - 1, old_data + off, + sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + env->insn_aux_data = new_data; + vfree(old_data); + return 0; +} + +static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) +{ + struct bpf_prog *new_prog; + + new_prog = bpf_patch_insn_single(env->prog, off, patch, len); + if (!new_prog) + return NULL; + if (adjust_insn_aux_data(env, new_prog->len, off, len)) + return NULL; + return new_prog; +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -3201,10 +3380,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) verbose("bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { - new_prog = bpf_patch_insn_single(env->prog, 0, - insn_buf, cnt); + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); if (!new_prog) return -ENOMEM; + env->prog = new_prog; delta += cnt - 1; } @@ -3229,7 +3408,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) else continue; - if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) + if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); @@ -3238,8 +3417,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, - cnt); + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; @@ -3253,6 +3431,89 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return 0; } +/* fixup insn->imm field of bpf_call instructions + * and inline eligible helpers as explicit sequence of BPF instructions + * + * this function is called after eBPF program passed verification + */ +static int fixup_bpf_calls(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; + const struct bpf_func_proto *fn; + const int insn_cnt = prog->len; + struct bpf_insn insn_buf[16]; + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; + + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_tail_call) { + /* If we tail call into other programs, we + * cannot make any assumptions since they can + * be replaced dynamically during runtime in + * the program array. + */ + prog->cb_access = 1; + + /* mark bpf_tail_call as different opcode to avoid + * conditional branch in the interpeter for every normal + * call and to prevent accidental JITing by JIT compiler + * that doesn't support bpf_tail_call yet + */ + insn->imm = 0; + insn->code |= BPF_X; + continue; + } + + if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) { + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (map_ptr == BPF_MAP_PTR_POISON || + !map_ptr->ops->map_gen_lookup) + goto patch_call_imm; + + cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, + cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + + /* keep walking new program and skip insns we just inserted */ + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + +patch_call_imm: + fn = prog->aux->ops->get_func_proto(insn->imm); + /* all functions that have prototype and verifier allowed + * programs to call them, must be real in-kernel functions + */ + if (!fn->func) { + verbose("kernel subsystem misconfigured func %s#%d\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } + insn->imm = fn->func - __bpf_call_base; + } + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -3321,6 +3582,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) log_level = 0; } + env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + env->strict_alignment = true; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; @@ -3348,6 +3613,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); + if (ret == 0) + ret = fixup_bpf_calls(env); + if (log_level && log_len >= log_size - 1) { BUG_ON(log_len >= log_size); /* verifier log exceeded user supplied buffer */ @@ -3423,6 +3691,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, log_level = 0; + env->strict_alignment = false; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + env->strict_alignment = true; + env->explored_states = kcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), GFP_KERNEL); diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 9203bfb0560399af9611bd40516758ab01076cba..00f4d6bf048fab1d25068842859075a3e3b952dd 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -5,6 +5,7 @@ #include #include #include +#include /* * A cgroup can be associated with multiple css_sets as different tasks may @@ -134,7 +135,7 @@ static inline void put_css_set(struct css_set *cset) * can see it. Similar to atomic_dec_and_lock(), but for an * rwlock */ - if (atomic_add_unless(&cset->refcount, -1, 1)) + if (refcount_dec_not_one(&cset->refcount)) return; spin_lock_irqsave(&css_set_lock, flags); @@ -147,7 +148,7 @@ static inline void put_css_set(struct css_set *cset) */ static inline void get_css_set(struct css_set *cset) { - atomic_inc(&cset->refcount); + refcount_inc(&cset->refcount); } bool cgroup_ssid_enabled(int ssid); @@ -163,7 +164,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct cgroup_root *root, unsigned long magic, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 1dc22f6b49f5e06c4af22222dfb1b32c885ce16a..85d75152402dd8c601c65c988d11efa1f75945ad 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -346,7 +346,7 @@ static int cgroup_task_count(const struct cgroup *cgrp) spin_lock_irq(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) - count += atomic_read(&link->cset->refcount); + count += refcount_read(&link->cset->refcount); spin_unlock_irq(&css_set_lock); return count; } @@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, struct cgroup_subsys *ss; struct dentry *dentry; int i, ret; + bool new_root = false; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); @@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ret = -ENOMEM; goto out_unlock; } + new_root = true; init_cgroup_root(root, &opts); - ret = cgroup_setup_root(root, opts.subsys_mask); + ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD); if (ret) cgroup_free_root(root); @@ -1200,6 +1202,18 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, CGROUP_SUPER_MAGIC, ns); + /* + * There's a race window after we release cgroup_mutex and before + * allocating a superblock. Make sure a concurrent process won't + * be able to re-use the root during this window by delaying the + * initialization of root refcnt. + */ + if (new_root) { + mutex_lock(&cgroup_mutex); + percpu_ref_reinit(&root->cgrp.self.refcnt); + mutex_unlock(&cgroup_mutex); + } + /* * If @pinned_sb, we're reusing an existing root and holding an * extra ref on its sb. Mount is complete. Put the extra ref. @@ -1286,7 +1300,7 @@ static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css, u64 count; rcu_read_lock(); - count = atomic_read(&task_css_set(current)->refcount); + count = refcount_read(&task_css_set(current)->refcount); rcu_read_unlock(); return count; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 687f5e0194efccbadce199c0570cd08b8c96181a..8d4e85eae42c08481899e415075ee42c6d12f90f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -189,7 +189,7 @@ static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { - .count = { .counter = 2, }, + .count = REFCOUNT_INIT(2), .user_ns = &init_user_ns, .ns.ops = &cgroupns_operations, .ns.inum = PROC_CGROUP_INIT_INO, @@ -436,7 +436,12 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, return css; } -static void cgroup_get(struct cgroup *cgrp) +static void __maybe_unused cgroup_get(struct cgroup *cgrp) +{ + css_get(&cgrp->self); +} + +static void cgroup_get_live(struct cgroup *cgrp) { WARN_ON_ONCE(cgroup_is_dead(cgrp)); css_get(&cgrp->self); @@ -554,7 +559,7 @@ EXPORT_SYMBOL_GPL(of_css); * haven't been created. */ struct css_set init_css_set = { - .refcount = ATOMIC_INIT(1), + .refcount = REFCOUNT_INIT(1), .tasks = LIST_HEAD_INIT(init_css_set.tasks), .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), @@ -724,7 +729,7 @@ void put_css_set_locked(struct css_set *cset) lockdep_assert_held(&css_set_lock); - if (!atomic_dec_and_test(&cset->refcount)) + if (!refcount_dec_and_test(&cset->refcount)) return; /* This css_set is dead. unlink it and release cgroup and css refs */ @@ -932,7 +937,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, list_add_tail(&link->cgrp_link, &cset->cgrp_links); if (cgroup_parent(cgrp)) - cgroup_get(cgrp); + cgroup_get_live(cgrp); } /** @@ -977,7 +982,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, return NULL; } - atomic_set(&cset->refcount, 1); + refcount_set(&cset->refcount, 1); INIT_LIST_HEAD(&cset->tasks); INIT_LIST_HEAD(&cset->mg_tasks); INIT_LIST_HEAD(&cset->task_iters); @@ -1640,7 +1645,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -1656,8 +1661,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) root_cgrp->id = ret; root_cgrp->ancestor_ids[0] = ret; - ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, - GFP_KERNEL); + ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, + ref_flags, GFP_KERNEL); if (ret) goto out; @@ -1802,7 +1807,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, return ERR_PTR(-EINVAL); } cgrp_dfl_visible = true; - cgroup_get(&cgrp_dfl_root.cgrp); + cgroup_get_live(&cgrp_dfl_root.cgrp); dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, CGROUP2_SUPER_MAGIC, ns); @@ -2576,7 +2581,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp) if (!css || !percpu_ref_is_dying(&css->refcnt)) continue; - cgroup_get(dsct); + cgroup_get_live(dsct); prepare_to_wait(&dsct->offline_waitq, &wait, TASK_UNINTERRUPTIBLE); @@ -3947,7 +3952,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, { lockdep_assert_held(&cgroup_mutex); - cgroup_get(cgrp); + cgroup_get_live(cgrp); memset(css, 0, sizeof(*css)); css->cgroup = cgrp; @@ -4123,7 +4128,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) /* allocation complete, commit to creation */ list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); atomic_inc(&root->nr_cgrps); - cgroup_get(parent); + cgroup_get_live(parent); /* * @cgrp is now fully operational. If something fails after this @@ -4260,6 +4265,11 @@ static void kill_css(struct cgroup_subsys_state *css) { lockdep_assert_held(&cgroup_mutex); + if (css->flags & CSS_DYING) + return; + + css->flags |= CSS_DYING; + /* * This must happen before css is disassociated with its cgroup. * See seq_css() for details. @@ -4513,7 +4523,7 @@ int __init cgroup_init(void) hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); - BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); + BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); mutex_unlock(&cgroup_mutex); @@ -4947,7 +4957,7 @@ struct cgroup *cgroup_get_from_path(const char *path) if (kn) { if (kernfs_type(kn) == KERNFS_DIR) { cgrp = kn->priv; - cgroup_get(cgrp); + cgroup_get_live(cgrp); } else { cgrp = ERR_PTR(-ENOTDIR); } @@ -5027,6 +5037,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) /* Socket clone path */ if (skcd->val) { + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ cgroup_get(sock_cgroup_ptr(skcd)); return; } diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 0f41292be0fb7dea2800acae3550c0dea3903f45..ae643412948added94f05d7efb120631f7798b44 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -176,9 +176,9 @@ typedef enum { } cpuset_flagbits_t; /* convenient tests for these bits */ -static inline bool is_cpuset_online(const struct cpuset *cs) +static inline bool is_cpuset_online(struct cpuset *cs) { - return test_bit(CS_ONLINE, &cs->flags); + return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); } static inline int is_cpu_exclusive(const struct cpuset *cs) @@ -2121,10 +2121,8 @@ int __init cpuset_init(void) { int err = 0; - if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) - BUG(); - if (!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)) - BUG(); + BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); @@ -2139,8 +2137,7 @@ int __init cpuset_init(void) if (err < 0) return err; - if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) - BUG(); + BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)); return 0; } @@ -2354,7 +2351,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) rebuild_sched_domains(); } -void cpuset_update_active_cpus(bool cpu_online) +void cpuset_update_active_cpus(void) { /* * We're inside cpu hotplug critical region which usually nests diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c index 96d38dab6fb2f7fdad9e64236572562c9c43ff36..66129eb4371d124f97c2a3233549be3673bd93ca 100644 --- a/kernel/cgroup/namespace.c +++ b/kernel/cgroup/namespace.c @@ -31,7 +31,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) kfree(new_ns); return ERR_PTR(ret); } - atomic_set(&new_ns->count, 1); + refcount_set(&new_ns->count, 1); new_ns->ns.ops = &cgroupns_operations; return new_ns; } diff --git a/kernel/compat.c b/kernel/compat.c index 19aec5d981081d26914c14f5db2e76646c399888..933bcb31ae10d4c1dcaf2c9b5f48861080d9b3df 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -108,8 +108,8 @@ COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv, COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv, struct timezone __user *, tz) { + struct timespec64 new_ts; struct timeval user_tv; - struct timespec new_ts; struct timezone new_tz; if (tv) { @@ -123,7 +123,7 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv, return -EFAULT; } - return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); + return do_sys_settimeofday64(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } static int __compat_get_timeval(struct timeval *tv, const struct compat_timeval __user *ctv) @@ -240,18 +240,20 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, struct compat_timespec __user *, rmtp) { struct timespec tu, rmt; + struct timespec64 tu64; mm_segment_t oldfs; long ret; if (compat_get_timespec(&tu, rqtp)) return -EFAULT; - if (!timespec_valid(&tu)) + tu64 = timespec_to_timespec64(tu); + if (!timespec64_valid(&tu64)) return -EINVAL; oldfs = get_fs(); set_fs(KERNEL_DS); - ret = hrtimer_nanosleep(&tu, + ret = hrtimer_nanosleep(&tu64, rmtp ? (struct timespec __user *)&rmt : NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC); set_fs(oldfs); diff --git a/kernel/cpu.c b/kernel/cpu.c index 37b223e4fc05b74fc50aa51df0c307d65da026c3..cb5103413bd8df5056b3eda682a8d93b08656dd8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1125,6 +1125,8 @@ core_initcall(cpu_hotplug_pm_sync_init); #endif /* CONFIG_PM_SLEEP_SMP */ +int __boot_cpu_id; + #endif /* CONFIG_SMP */ /* Boot processor state steps */ @@ -1656,13 +1658,13 @@ static ssize_t write_cpuhp_target(struct device *dev, ret = !sp->name || sp->cant_stop ? -EINVAL : 0; mutex_unlock(&cpuhp_state_mutex); if (ret) - return ret; + goto out; if (st->state < target) ret = do_cpu_up(dev->id, target); else ret = do_cpu_down(dev->id, target); - +out: unlock_device_hotplug(); return ret ? ret : count; } @@ -1815,6 +1817,10 @@ void __init boot_cpu_init(void) set_cpu_active(cpu, true); set_cpu_present(cpu, true); set_cpu_possible(cpu, true); + +#ifdef CONFIG_SMP + __boot_cpu_id = cpu; +#endif } /* diff --git a/kernel/crash_core.c b/kernel/crash_core.c new file mode 100644 index 0000000000000000000000000000000000000000..fcbd568f1e95069e6f42a46cad5f5c739f273def --- /dev/null +++ b/kernel/crash_core.c @@ -0,0 +1,439 @@ +/* + * crash.c - kernel crash support code. + * Copyright (C) 2002-2004 Eric Biederman + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include + +#include +#include + +/* vmcoreinfo stuff */ +static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +size_t vmcoreinfo_size; +size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); + +/* + * parsing the "crashkernel" commandline + * + * this code is intended to be called from architecture specific code + */ + + +/* + * This function parses command lines in the format + * + * crashkernel=ramsize-range:size[,...][@offset] + * + * The function returns 0 on success and -EINVAL on failure. + */ +static int __init parse_crashkernel_mem(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + char *cur = cmdline, *tmp; + + /* for each entry of the comma-separated list */ + do { + unsigned long long start, end = ULLONG_MAX, size; + + /* get the start of the range */ + start = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("crashkernel: Memory value expected\n"); + return -EINVAL; + } + cur = tmp; + if (*cur != '-') { + pr_warn("crashkernel: '-' expected\n"); + return -EINVAL; + } + cur++; + + /* if no ':' is here, than we read the end */ + if (*cur != ':') { + end = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("crashkernel: Memory value expected\n"); + return -EINVAL; + } + cur = tmp; + if (end <= start) { + pr_warn("crashkernel: end <= start\n"); + return -EINVAL; + } + } + + if (*cur != ':') { + pr_warn("crashkernel: ':' expected\n"); + return -EINVAL; + } + cur++; + + size = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("Memory value expected\n"); + return -EINVAL; + } + cur = tmp; + if (size >= system_ram) { + pr_warn("crashkernel: invalid size\n"); + return -EINVAL; + } + + /* match ? */ + if (system_ram >= start && system_ram < end) { + *crash_size = size; + break; + } + } while (*cur++ == ','); + + if (*crash_size > 0) { + while (*cur && *cur != ' ' && *cur != '@') + cur++; + if (*cur == '@') { + cur++; + *crash_base = memparse(cur, &tmp); + if (cur == tmp) { + pr_warn("Memory value expected after '@'\n"); + return -EINVAL; + } + } + } + + return 0; +} + +/* + * That function parses "simple" (old) crashkernel command lines like + * + * crashkernel=size[@offset] + * + * It returns 0 on success and -EINVAL on failure. + */ +static int __init parse_crashkernel_simple(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + if (*cur == '@') + *crash_base = memparse(cur+1, &cur); + else if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char: %c\n", *cur); + return -EINVAL; + } + + return 0; +} + +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + +/* + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. + */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char: %c\n", *cur); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char: %c\n", *cur); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + +static int __init __parse_crashkernel(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *name, + const char *suffix) +{ + char *first_colon, *first_space; + char *ck_cmdline; + + BUG_ON(!crash_size || !crash_base); + *crash_size = 0; + *crash_base = 0; + + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); + + if (!ck_cmdline) + return -EINVAL; + + ck_cmdline += strlen(name); + + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + suffix); + /* + * if the commandline contains a ':', then that's the extended + * syntax -- if not, it must be the classic syntax + */ + first_colon = strchr(ck_cmdline, ':'); + first_space = strchr(ck_cmdline, ' '); + if (first_colon && (!first_space || first_colon < first_space)) + return parse_crashkernel_mem(ck_cmdline, system_ram, + crash_size, crash_base); + + return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); +} + +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ +int __init parse_crashkernel(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", NULL); +} + +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); +} + +int __init parse_crashkernel_low(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel=", suffix_tbl[SUFFIX_LOW]); +} + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); + memcpy(buf, name, note->n_namesz); + buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); + memcpy(buf, data, data_len); + buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); + + return buf; +} + +void final_note(Elf_Word *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +static void update_vmcoreinfo_note(void) +{ + u32 *buf = vmcoreinfo_note; + + if (!vmcoreinfo_size) + return; + buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, + vmcoreinfo_size); + final_note(buf); +} + +void crash_save_vmcoreinfo(void) +{ + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); + update_vmcoreinfo_note(); +} + +void vmcoreinfo_append_str(const char *fmt, ...) +{ + va_list args; + char buf[0x50]; + size_t r; + + va_start(args, fmt); + r = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); + + memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); + + vmcoreinfo_size += r; +} + +/* + * provide an empty default implementation here -- architecture + * code may override this + */ +void __weak arch_crash_save_vmcoreinfo(void) +{} + +phys_addr_t __weak paddr_vmcoreinfo_note(void) +{ + return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note); +} + +static int __init crash_save_vmcoreinfo_init(void) +{ + VMCOREINFO_OSRELEASE(init_uts_ns.name.release); + VMCOREINFO_PAGESIZE(PAGE_SIZE); + + VMCOREINFO_SYMBOL(init_uts_ns); + VMCOREINFO_SYMBOL(node_online_map); +#ifdef CONFIG_MMU + VMCOREINFO_SYMBOL(swapper_pg_dir); +#endif + VMCOREINFO_SYMBOL(_stext); + VMCOREINFO_SYMBOL(vmap_area_list); + +#ifndef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(mem_map); + VMCOREINFO_SYMBOL(contig_page_data); +#endif +#ifdef CONFIG_SPARSEMEM + VMCOREINFO_SYMBOL(mem_section); + VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); + VMCOREINFO_STRUCT_SIZE(mem_section); + VMCOREINFO_OFFSET(mem_section, section_mem_map); +#endif + VMCOREINFO_STRUCT_SIZE(page); + VMCOREINFO_STRUCT_SIZE(pglist_data); + VMCOREINFO_STRUCT_SIZE(zone); + VMCOREINFO_STRUCT_SIZE(free_area); + VMCOREINFO_STRUCT_SIZE(list_head); + VMCOREINFO_SIZE(nodemask_t); + VMCOREINFO_OFFSET(page, flags); + VMCOREINFO_OFFSET(page, _refcount); + VMCOREINFO_OFFSET(page, mapping); + VMCOREINFO_OFFSET(page, lru); + VMCOREINFO_OFFSET(page, _mapcount); + VMCOREINFO_OFFSET(page, private); + VMCOREINFO_OFFSET(page, compound_dtor); + VMCOREINFO_OFFSET(page, compound_order); + VMCOREINFO_OFFSET(page, compound_head); + VMCOREINFO_OFFSET(pglist_data, node_zones); + VMCOREINFO_OFFSET(pglist_data, nr_zones); +#ifdef CONFIG_FLAT_NODE_MEM_MAP + VMCOREINFO_OFFSET(pglist_data, node_mem_map); +#endif + VMCOREINFO_OFFSET(pglist_data, node_start_pfn); + VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); + VMCOREINFO_OFFSET(pglist_data, node_id); + VMCOREINFO_OFFSET(zone, free_area); + VMCOREINFO_OFFSET(zone, vm_stat); + VMCOREINFO_OFFSET(zone, spanned_pages); + VMCOREINFO_OFFSET(free_area, free_list); + VMCOREINFO_OFFSET(list_head, next); + VMCOREINFO_OFFSET(list_head, prev); + VMCOREINFO_OFFSET(vmap_area, va_start); + VMCOREINFO_OFFSET(vmap_area, list); + VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); + log_buf_vmcoreinfo_setup(); + VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); + VMCOREINFO_NUMBER(NR_FREE_PAGES); + VMCOREINFO_NUMBER(PG_lru); + VMCOREINFO_NUMBER(PG_private); + VMCOREINFO_NUMBER(PG_swapcache); + VMCOREINFO_NUMBER(PG_slab); +#ifdef CONFIG_MEMORY_FAILURE + VMCOREINFO_NUMBER(PG_hwpoison); +#endif + VMCOREINFO_NUMBER(PG_head_mask); + VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); +#ifdef CONFIG_HUGETLB_PAGE + VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); +#endif + + arch_crash_save_vmcoreinfo(); + update_vmcoreinfo_note(); + + return 0; +} + +subsys_initcall(crash_save_vmcoreinfo_init); diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index c04917cad1bfdc50fe4f00b08eaa58c9e7e7692e..1b2be63c85282fdb6f910e145853f97c23d8efb4 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -229,12 +229,18 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, } if (regs) { + mm_segment_t fs; + if (crosstask) goto exit_put; if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); + + fs = get_fs(); + set_fs(USER_DS); perf_callchain_user(&ctx, regs); + set_fs(fs); } } diff --git a/kernel/events/core.c b/kernel/events/core.c index ff01cba86f430fd29916ab73c755698bf81feff0..6c4e523dc1e2e6b53d44bf6a540bc168bcde1eca 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_mmap_events); if (event->attr.comm) atomic_dec(&nr_comm_events); + if (event->attr.namespaces) + atomic_dec(&nr_namespaces_events); if (event->attr.task) atomic_dec(&nr_task_events); if (event->attr.freq) @@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec) perf_event_comm_event(&comm_event); } +/* + * namespaces tracking + */ + +struct perf_namespaces_event { + struct task_struct *task; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[NR_NAMESPACES]; + } event_id; +}; + +static int perf_event_namespaces_match(struct perf_event *event) +{ + return event->attr.namespaces; +} + +static void perf_event_namespaces_output(struct perf_event *event, + void *data) +{ + struct perf_namespaces_event *namespaces_event = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_namespaces_match(event)) + return; + + perf_event_header__init_id(&namespaces_event->event_id.header, + &sample, event); + ret = perf_output_begin(&handle, event, + namespaces_event->event_id.header.size); + if (ret) + return; + + namespaces_event->event_id.pid = perf_event_pid(event, + namespaces_event->task); + namespaces_event->event_id.tid = perf_event_tid(event, + namespaces_event->task); + + perf_output_put(&handle, namespaces_event->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, + struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct path ns_path; + struct inode *ns_inode; + void *error; + + error = ns_get_path(&ns_path, task, ns_ops); + if (!error) { + ns_inode = ns_path.dentry->d_inode; + ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev); + ns_link_info->ino = ns_inode->i_ino; + } +} + +void perf_event_namespaces(struct task_struct *task) +{ + struct perf_namespaces_event namespaces_event; + struct perf_ns_link_info *ns_link_info; + + if (!atomic_read(&nr_namespaces_events)) + return; + + namespaces_event = (struct perf_namespaces_event){ + .task = task, + .event_id = { + .header = { + .type = PERF_RECORD_NAMESPACES, + .misc = 0, + .size = sizeof(namespaces_event.event_id), + }, + /* .pid */ + /* .tid */ + .nr_namespaces = NR_NAMESPACES, + /* .link_info[NR_NAMESPACES] */ + }, + }; + + ns_link_info = namespaces_event.event_id.link_info; + + perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX], + task, &mntns_operations); + +#ifdef CONFIG_USER_NS + perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX], + task, &userns_operations); +#endif +#ifdef CONFIG_NET_NS + perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX], + task, &netns_operations); +#endif +#ifdef CONFIG_UTS_NS + perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX], + task, &utsns_operations); +#endif +#ifdef CONFIG_IPC_NS + perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX], + task, &ipcns_operations); +#endif +#ifdef CONFIG_PID_NS + perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX], + task, &pidns_operations); +#endif +#ifdef CONFIG_CGROUPS + perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX], + task, &cgroupns_operations); +#endif + + perf_iterate_sb(perf_event_namespaces_output, + &namespaces_event, + NULL); +} + /* * mmap tracking */ @@ -7184,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + * To avoid leaking information to userspace, we must always + * reject kernel samples when exclude_kernel is set. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -7204,6 +7351,12 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); + /* + * For security, drop the skid kernel samples if necessary. + */ + if (!sample_is_allowed(event, regs)) + return ret; + /* * XXX event_limit might not quite work as expected on inherited * events @@ -9146,6 +9299,8 @@ static void account_event(struct perf_event *event) atomic_inc(&nr_mmap_events); if (event->attr.comm) atomic_inc(&nr_comm_events); + if (event->attr.namespaces) + atomic_inc(&nr_namespaces_events); if (event->attr.task) atomic_inc(&nr_task_events); if (event->attr.freq) @@ -9691,6 +9846,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EACCES; } + if (attr.namespaces) { + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } + if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 257fa460b846032744e2da500d489d0995678571..2831480c63a28b8e9b8cee1c0b30968860b3fcd0 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->paused = 1; } +void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) +{ + /* + * OVERWRITE is determined by perf_aux_output_end() and can't + * be passed in directly. + */ + if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE)) + return; + + handle->aux_flags |= flags; +} +EXPORT_SYMBOL_GPL(perf_aux_output_flag); + /* * This is called before hardware starts writing to the AUX area to * obtain an output handle and make sure there's room in the buffer. @@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, handle->event = event; handle->head = aux_head; handle->size = 0; + handle->aux_flags = 0; /* * In overwrite mode, AUX data stores do not depend on aux_tail, @@ -408,34 +422,32 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * of the AUX buffer management code is that after pmu::stop(), the AUX * transaction must be stopped and therefore drop the AUX reference count. */ -void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) +void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) { + bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED); struct ring_buffer *rb = handle->rb; - bool wakeup = truncated; unsigned long aux_head; - u64 flags = 0; - - if (truncated) - flags |= PERF_AUX_FLAG_TRUNCATED; /* in overwrite mode, driver provides aux_head via handle */ if (rb->aux_overwrite) { - flags |= PERF_AUX_FLAG_OVERWRITE; + handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; local_set(&rb->aux_head, aux_head); } else { + handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; + aux_head = local_read(&rb->aux_head); local_add(size, &rb->aux_head); } - if (size || flags) { + if (size || handle->aux_flags) { /* * Only send RECORD_AUX if we have something useful to communicate */ - perf_event_aux_event(handle->event, aux_head, size, flags); + perf_event_aux_event(handle->event, aux_head, size, + handle->aux_flags); } aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); @@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, } if (wakeup) { - if (truncated) + if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) handle->event->pending_disable = 1; perf_output_wakeup(handle); } diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93de8c3be3180f3cbd8694b955a1ac3..e53770d2bf956bf5bf6ec1a42c75121552713da6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -87,6 +87,7 @@ #include #include #include +#include #include #include @@ -178,6 +179,24 @@ void __weak arch_release_thread_stack(unsigned long *stack) */ #define NR_CACHED_STACKS 2 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); + +static int free_vm_stack_cache(unsigned int cpu) +{ + struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); + int i; + + for (i = 0; i < NR_CACHED_STACKS; i++) { + struct vm_struct *vm_stack = cached_vm_stacks[i]; + + if (!vm_stack) + continue; + + vfree(vm_stack->addr); + cached_vm_stacks[i] = NULL; + } + + return 0; +} #endif static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) @@ -202,7 +221,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, VMALLOC_START, VMALLOC_END, - THREADINFO_GFP | __GFP_HIGHMEM, + THREADINFO_GFP, PAGE_KERNEL, 0, node, __builtin_return_address(0)); @@ -466,6 +485,11 @@ void __init fork_init(void) for (i = 0; i < UCOUNT_COUNTS; i++) { init_user_ns.ucount_max[i] = max_threads/2; } + +#ifdef CONFIG_VMAP_STACK + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", + NULL, free_vm_stack_cache); +#endif } int __weak arch_dup_task_struct(struct task_struct *dst, @@ -536,7 +560,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) set_task_stack_end_magic(tsk); #ifdef CONFIG_CC_STACKPROTECTOR - tsk->stack_canary = get_random_int(); + tsk->stack_canary = get_random_long(); #endif /* @@ -1313,7 +1337,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) if (atomic_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); /* - * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it + * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it * without an RCU grace period, see __lock_task_sighand(). */ kmem_cache_free(sighand_cachep, sighand); @@ -1438,6 +1462,7 @@ static void rt_mutex_init_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES p->pi_waiters = RB_ROOT; p->pi_waiters_leftmost = NULL; + p->pi_top_task = NULL; p->pi_blocked_on = NULL; #endif } @@ -1552,6 +1577,18 @@ static __latent_entropy struct task_struct *copy_process( if (!p) goto fork_out; + /* + * This _must_ happen before we call free_task(), i.e. before we jump + * to any of the bad_fork_* labels. This is to avoid freeing + * p->set_child_tid which is (ab)used as a kthread's data pointer for + * kernel threads (PF_KTHREAD). + */ + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; + /* + * Clear TID on mm_release()? + */ + p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; + ftrace_graph_init_task(p); rt_mutex_init_task(p); @@ -1679,9 +1716,12 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cleanup_perf; /* copy all the process information */ shm_init_task(p); - retval = copy_semundo(clone_flags, p); + retval = security_task_alloc(p, clone_flags); if (retval) goto bad_fork_cleanup_audit; + retval = copy_semundo(clone_flags, p); + if (retval) + goto bad_fork_cleanup_security; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; @@ -1715,11 +1755,6 @@ static __latent_entropy struct task_struct *copy_process( } } - p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; - /* - * Clear TID on mm_release()? - */ - p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif @@ -1797,6 +1832,8 @@ static __latent_entropy struct task_struct *copy_process( p->parent_exec_id = current->self_exec_id; } + klp_copy_process(p); + spin_lock(¤t->sighand->siglock); /* @@ -1815,11 +1852,13 @@ static __latent_entropy struct task_struct *copy_process( */ recalc_sigpending(); if (signal_pending(current)) { - spin_unlock(¤t->sighand->siglock); - write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cancel_cgroup; } + if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) { + retval = -ENOMEM; + goto bad_fork_cancel_cgroup; + } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -1877,6 +1916,8 @@ static __latent_entropy struct task_struct *copy_process( return p; bad_fork_cancel_cgroup: + spin_unlock(¤t->sighand->siglock); + write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p); bad_fork_free_pid: cgroup_threadgroup_change_end(current); @@ -1903,6 +1944,8 @@ static __latent_entropy struct task_struct *copy_process( exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); +bad_fork_cleanup_security: + security_task_free(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_perf: @@ -2144,7 +2187,7 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, @@ -2352,6 +2395,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) } } + perf_event_namespaces(current); + bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); diff --git a/kernel/futex.c b/kernel/futex.c index 45858ec739411f5741667e560552757697441e6b..357348a6cf6b4d71dbc24c9ad89ad2d0e63b20d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -802,7 +802,7 @@ static int refill_pi_state_cache(void) return 0; } -static struct futex_pi_state * alloc_pi_state(void) +static struct futex_pi_state *alloc_pi_state(void) { struct futex_pi_state *pi_state = current->pi_state_cache; @@ -812,6 +812,11 @@ static struct futex_pi_state * alloc_pi_state(void) return pi_state; } +static void get_pi_state(struct futex_pi_state *pi_state) +{ + WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount)); +} + /* * Drops a reference to the pi_state object and frees or caches it * when the last reference is gone. @@ -856,7 +861,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) * Look up the task based on what TID userspace gave us. * We dont trust it. */ -static struct task_struct * futex_find_get_task(pid_t pid) +static struct task_struct *futex_find_get_task(pid_t pid) { struct task_struct *p; @@ -916,10 +921,12 @@ void exit_pi_state_list(struct task_struct *curr) pi_state->owner = NULL; raw_spin_unlock_irq(&curr->pi_lock); - rt_mutex_unlock(&pi_state->pi_mutex); - + get_pi_state(pi_state); spin_unlock(&hb->lock); + rt_mutex_futex_unlock(&pi_state->pi_mutex); + put_pi_state(pi_state); + raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); @@ -973,6 +980,39 @@ void exit_pi_state_list(struct task_struct *curr) * * [10] There is no transient state which leaves owner and user space * TID out of sync. + * + * + * Serialization and lifetime rules: + * + * hb->lock: + * + * hb -> futex_q, relation + * futex_q -> pi_state, relation + * + * (cannot be raw because hb can contain arbitrary amount + * of futex_q's) + * + * pi_mutex->wait_lock: + * + * {uval, pi_state} + * + * (and pi_mutex 'obviously') + * + * p->pi_lock: + * + * p->pi_state_list -> pi_state->list, relation + * + * pi_state->refcount: + * + * pi_state lifetime + * + * + * Lock order: + * + * hb->lock + * pi_mutex->wait_lock + * p->pi_lock + * */ /* @@ -980,10 +1020,13 @@ void exit_pi_state_list(struct task_struct *curr) * the pi_state against the user space value. If correct, attach to * it. */ -static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, +static int attach_to_pi_state(u32 __user *uaddr, u32 uval, + struct futex_pi_state *pi_state, struct futex_pi_state **ps) { pid_t pid = uval & FUTEX_TID_MASK; + u32 uval2; + int ret; /* * Userspace might have messed up non-PI and PI futexes [3] @@ -991,8 +1034,38 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, if (unlikely(!pi_state)) return -EINVAL; + /* + * We get here with hb->lock held, and having found a + * futex_top_waiter(). This means that futex_lock_pi() of said futex_q + * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), + * which in turn means that futex_lock_pi() still has a reference on + * our pi_state. + * + * The waiter holding a reference on @pi_state also protects against + * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() + * and futex_wait_requeue_pi() as it cannot go to 0 and consequently + * free pi_state before we can take a reference ourselves. + */ WARN_ON(!atomic_read(&pi_state->refcount)); + /* + * Now that we have a pi_state, we can acquire wait_lock + * and do the state validation. + */ + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + + /* + * Since {uval, pi_state} is serialized by wait_lock, and our current + * uval was read without holding it, it can have changed. Verify it + * still is what we expect it to be, otherwise retry the entire + * operation. + */ + if (get_futex_value_locked(&uval2, uaddr)) + goto out_efault; + + if (uval != uval2) + goto out_eagain; + /* * Handle the owner died case: */ @@ -1008,11 +1081,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * is not 0. Inconsistent state. [5] */ if (pid) - return -EINVAL; + goto out_einval; /* * Take a ref on the state and return success. [4] */ - goto out_state; + goto out_attach; } /* @@ -1024,14 +1097,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * Take a ref on the state and return success. [6] */ if (!pid) - goto out_state; + goto out_attach; } else { /* * If the owner died bit is not set, then the pi_state * must have an owner. [7] */ if (!pi_state->owner) - return -EINVAL; + goto out_einval; } /* @@ -1040,11 +1113,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, * user space TID. [9/10] */ if (pid != task_pid_vnr(pi_state->owner)) - return -EINVAL; -out_state: - atomic_inc(&pi_state->refcount); + goto out_einval; + +out_attach: + get_pi_state(pi_state); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); *ps = pi_state; return 0; + +out_einval: + ret = -EINVAL; + goto out_error; + +out_eagain: + ret = -EAGAIN; + goto out_error; + +out_efault: + ret = -EFAULT; + goto out_error; + +out_error: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + return ret; } /* @@ -1095,6 +1186,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, /* * No existing pi state. First waiter. [2] + * + * This creates pi_state, we have hb->lock held, this means nothing can + * observe this state, wait_lock is irrelevant. */ pi_state = alloc_pi_state(); @@ -1119,17 +1213,18 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, return 0; } -static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, +static int lookup_pi_state(u32 __user *uaddr, u32 uval, + struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps) { - struct futex_q *match = futex_top_waiter(hb, key); + struct futex_q *top_waiter = futex_top_waiter(hb, key); /* * If there is a waiter on that futex, validate it and * attach to the pi_state when the validation succeeds. */ - if (match) - return attach_to_pi_state(uval, match->pi_state, ps); + if (top_waiter) + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); /* * We are the first waiter - try to look up the owner based on @@ -1148,7 +1243,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT; - /*If user space value changed, let the caller retry */ + /* If user space value changed, let the caller retry */ return curval != uval ? -EAGAIN : 0; } @@ -1176,7 +1271,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, struct task_struct *task, int set_waiters) { u32 uval, newval, vpid = task_pid_vnr(task); - struct futex_q *match; + struct futex_q *top_waiter; int ret; /* @@ -1202,9 +1297,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * Lookup existing state first. If it exists, try to attach to * its pi_state. */ - match = futex_top_waiter(hb, key); - if (match) - return attach_to_pi_state(uval, match->pi_state, ps); + top_waiter = futex_top_waiter(hb, key); + if (top_waiter) + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); /* * No waiter and user TID is 0. We are here because the @@ -1285,50 +1380,44 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) wake_q_add(wake_q, p); __unqueue_futex(q); /* - * The waiting task can free the futex_q as soon as - * q->lock_ptr = NULL is written, without taking any locks. A - * memory barrier is required here to prevent the following - * store to lock_ptr from getting ahead of the plist_del. + * The waiting task can free the futex_q as soon as q->lock_ptr = NULL + * is written, without taking any locks. This is possible in the event + * of a spurious wakeup, for example. A memory barrier is required here + * to prevent the following store to lock_ptr from getting ahead of the + * plist_del in __unqueue_futex(). */ - smp_wmb(); - q->lock_ptr = NULL; + smp_store_release(&q->lock_ptr, NULL); } -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, - struct futex_hash_bucket *hb) +/* + * Caller must hold a reference on @pi_state. + */ +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) { - struct task_struct *new_owner; - struct futex_pi_state *pi_state = this->pi_state; u32 uninitialized_var(curval), newval; + struct task_struct *new_owner; + bool postunlock = false; DEFINE_WAKE_Q(wake_q); - bool deboost; int ret = 0; - if (!pi_state) - return -EINVAL; - - /* - * If current does not own the pi_state then the futex is - * inconsistent and user space fiddled with the futex value. - */ - if (pi_state->owner != current) - return -EINVAL; - - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); + if (WARN_ON_ONCE(!new_owner)) { + /* + * As per the comment in futex_unlock_pi() this should not happen. + * + * When this happens, give up our locks and try again, giving + * the futex_lock_pi() instance time to complete, either by + * waiting on the rtmutex or removing itself from the futex + * queue. + */ + ret = -EAGAIN; + goto out_unlock; + } /* - * It is possible that the next waiter (the one that brought - * this owner to the kernel) timed out and is no longer - * waiting on the lock. - */ - if (!new_owner) - new_owner = this->task; - - /* - * We pass it to the next owner. The WAITERS bit is always - * kept enabled while there is PI state around. We cleanup the - * owner died bit, because we are the owner. + * We pass it to the next owner. The WAITERS bit is always kept + * enabled while there is PI state around. We cleanup the owner + * died bit, because we are the owner. */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); @@ -1337,6 +1426,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; + } else if (curval != uval) { /* * If a unconditional UNLOCK_PI operation (user space did not @@ -1349,10 +1439,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, else ret = -EINVAL; } - if (ret) { - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - return ret; - } + + if (ret) + goto out_unlock; + + /* + * This is a point of no return; once we modify the uval there is no + * going back and subsequent operations must not fail. + */ raw_spin_lock(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); @@ -1365,22 +1459,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); +out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - /* - * First unlock HB so the waiter does not spin on it once he got woken - * up. Second wake up the waiter before the priority is adjusted. If we - * deboost first (and lose our higher priority), then the task might get - * scheduled away before the wake up can take place. - */ - spin_unlock(&hb->lock); - wake_up_q(&wake_q); - if (deboost) - rt_mutex_adjust_prio(current); + if (postunlock) + rt_mutex_postunlock(&wake_q); - return 0; + return ret; } /* @@ -1826,7 +1913,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * If that call succeeds then we have pi_state and an * initial refcount on it. */ - ret = lookup_pi_state(ret, hb2, &key2, &pi_state); + ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); } switch (ret) { @@ -1909,7 +1996,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * refcount on the pi_state and store the pointer in * the futex_q object of the waiter. */ - atomic_inc(&pi_state->refcount); + get_pi_state(pi_state); this->pi_state = pi_state; ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, this->rt_waiter, @@ -2009,20 +2096,7 @@ queue_unlock(struct futex_hash_bucket *hb) hb_waiters_dec(hb); } -/** - * queue_me() - Enqueue the futex_q on the futex_hash_bucket - * @q: The futex_q to enqueue - * @hb: The destination hash bucket - * - * The hb->lock must be held by the caller, and is released here. A call to - * queue_me() is typically paired with exactly one call to unqueue_me(). The - * exceptions involve the PI related operations, which may use unqueue_me_pi() - * or nothing if the unqueue is done as part of the wake process and the unqueue - * state is implicit in the state of woken task (see futex_wait_requeue_pi() for - * an example). - */ -static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) - __releases(&hb->lock) +static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) { int prio; @@ -2039,6 +2113,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) plist_node_init(&q->list, prio); plist_add(&q->list, &hb->chain); q->task = current; +} + +/** + * queue_me() - Enqueue the futex_q on the futex_hash_bucket + * @q: The futex_q to enqueue + * @hb: The destination hash bucket + * + * The hb->lock must be held by the caller, and is released here. A call to + * queue_me() is typically paired with exactly one call to unqueue_me(). The + * exceptions involve the PI related operations, which may use unqueue_me_pi() + * or nothing if the unqueue is done as part of the wake process and the unqueue + * state is implicit in the state of woken task (see futex_wait_requeue_pi() for + * an example). + */ +static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) + __releases(&hb->lock) +{ + __queue_me(q, hb); spin_unlock(&hb->lock); } @@ -2125,10 +2217,13 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; - struct task_struct *oldowner = pi_state->owner; u32 uval, uninitialized_var(curval), newval; + struct task_struct *oldowner; int ret; + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + + oldowner = pi_state->owner; /* Owner died? */ if (!pi_state->owner) newtid |= FUTEX_OWNER_DIED; @@ -2136,7 +2231,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, /* * We are here either because we stole the rtmutex from the * previous highest priority waiter or we are the highest priority - * waiter but failed to get the rtmutex the first time. + * waiter but have failed to get the rtmutex the first time. + * * We have to replace the newowner TID in the user space variable. * This must be atomic as we have to preserve the owner died bit here. * @@ -2144,17 +2240,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * because we can fault here. Imagine swapped out pages or a fork * that marked all the anonymous memory readonly for cow. * - * Modifying pi_state _before_ the user space value would - * leave the pi_state in an inconsistent state when we fault - * here, because we need to drop the hash bucket lock to - * handle the fault. This might be observed in the PID check - * in lookup_pi_state. + * Modifying pi_state _before_ the user space value would leave the + * pi_state in an inconsistent state when we fault here, because we + * need to drop the locks to handle the fault. This might be observed + * in the PID check in lookup_pi_state. */ retry: if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; - while (1) { + for (;;) { newval = (uval & FUTEX_OWNER_DIED) | newtid; if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) @@ -2169,47 +2264,60 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * itself. */ if (pi_state->owner != NULL) { - raw_spin_lock_irq(&pi_state->owner->pi_lock); + raw_spin_lock(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); - raw_spin_unlock_irq(&pi_state->owner->pi_lock); + raw_spin_unlock(&pi_state->owner->pi_lock); } pi_state->owner = newowner; - raw_spin_lock_irq(&newowner->pi_lock); + raw_spin_lock(&newowner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &newowner->pi_state_list); - raw_spin_unlock_irq(&newowner->pi_lock); + raw_spin_unlock(&newowner->pi_lock); + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + return 0; /* - * To handle the page fault we need to drop the hash bucket - * lock here. That gives the other task (either the highest priority - * waiter itself or the task which stole the rtmutex) the - * chance to try the fixup of the pi_state. So once we are - * back from handling the fault we need to check the pi_state - * after reacquiring the hash bucket lock and before trying to - * do another fixup. When the fixup has been done already we - * simply return. + * To handle the page fault we need to drop the locks here. That gives + * the other task (either the highest priority waiter itself or the + * task which stole the rtmutex) the chance to try the fixup of the + * pi_state. So once we are back from handling the fault we need to + * check the pi_state after reacquiring the locks and before trying to + * do another fixup. When the fixup has been done already we simply + * return. + * + * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely + * drop hb->lock since the caller owns the hb -> futex_q relation. + * Dropping the pi_mutex->wait_lock requires the state revalidate. */ handle_fault: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(q->lock_ptr); ret = fault_in_user_writeable(uaddr); spin_lock(q->lock_ptr); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); /* * Check if someone else fixed it for us: */ - if (pi_state->owner != oldowner) - return 0; + if (pi_state->owner != oldowner) { + ret = 0; + goto out_unlock; + } if (ret) - return ret; + goto out_unlock; goto retry; + +out_unlock: + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + return ret; } static long futex_wait_restart(struct restart_block *restart); @@ -2231,57 +2339,32 @@ static long futex_wait_restart(struct restart_block *restart); */ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { - struct task_struct *owner; int ret = 0; if (locked) { /* * Got the lock. We might not be the anticipated owner if we * did a lock-steal - fix up the PI-state in that case: + * + * We can safely read pi_state->owner without holding wait_lock + * because we now own the rt_mutex, only the owner will attempt + * to change it. */ if (q->pi_state->owner != current) ret = fixup_pi_state_owner(uaddr, q, current); goto out; } - /* - * Catch the rare case, where the lock was released when we were on the - * way back before we locked the hash bucket. - */ - if (q->pi_state->owner == current) { - /* - * Try to get the rt_mutex now. This might fail as some other - * task acquired the rt_mutex after we removed ourself from the - * rt_mutex waiters list. - */ - if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { - locked = 1; - goto out; - } - - /* - * pi_state is incorrect, some other task did a lock steal and - * we returned due to timeout or signal without taking the - * rt_mutex. Too late. - */ - raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); - owner = rt_mutex_owner(&q->pi_state->pi_mutex); - if (!owner) - owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); - raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); - ret = fixup_pi_state_owner(uaddr, q, owner); - goto out; - } - /* * Paranoia check. If we did not take the lock, then we should not be * the owner of the rt_mutex. */ - if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) + if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " "pi-state %p\n", ret, q->pi_state->pi_mutex.owner, q->pi_state->owner); + } out: return ret ? ret : locked; @@ -2505,6 +2588,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; + struct futex_pi_state *pi_state = NULL; + struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; int res, ret; @@ -2557,24 +2642,67 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, } } + WARN_ON(!q.pi_state); + /* * Only actually queue now that the atomic ops are done: */ - queue_me(&q, hb); + __queue_me(&q, hb); - WARN_ON(!q.pi_state); - /* - * Block on the PI mutex: - */ - if (!trylock) { - ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); - } else { - ret = rt_mutex_trylock(&q.pi_state->pi_mutex); + if (trylock) { + ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); /* Fixup the trylock return value: */ ret = ret ? 0 : -EWOULDBLOCK; + goto no_block; } + rt_mutex_init_waiter(&rt_waiter); + + /* + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not + * hold it while doing rt_mutex_start_proxy(), because then it will + * include hb->lock in the blocking chain, even through we'll not in + * fact hold it while blocking. This will lead it to report -EDEADLK + * and BUG when futex_unlock_pi() interleaves with this. + * + * Therefore acquire wait_lock while holding hb->lock, but drop the + * latter before calling rt_mutex_start_proxy_lock(). This still fully + * serializes against futex_unlock_pi() as that does the exact same + * lock handoff sequence. + */ + raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); + spin_unlock(q.lock_ptr); + ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); + raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); + + if (ret) { + if (ret == 1) + ret = 0; + + spin_lock(q.lock_ptr); + goto no_block; + } + + + if (unlikely(to)) + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + + ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); + spin_lock(q.lock_ptr); + /* + * If we failed to acquire the lock (signal/timeout), we must + * first acquire the hb->lock before removing the lock from the + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex + * wait lists consistent. + * + * In particular; it is important that futex_unlock_pi() can not + * observe this inconsistency. + */ + if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) + ret = 0; + +no_block: /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. @@ -2591,12 +2719,19 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, * If fixup_owner() faulted and was unable to handle the fault, unlock * it and return the fault to userspace. */ - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) - rt_mutex_unlock(&q.pi_state->pi_mutex); + if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { + pi_state = q.pi_state; + get_pi_state(pi_state); + } /* Unqueue and drop the lock */ unqueue_me_pi(&q); + if (pi_state) { + rt_mutex_futex_unlock(&pi_state->pi_mutex); + put_pi_state(pi_state); + } + goto out_put_key; out_unlock_put_key: @@ -2605,8 +2740,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, out_put_key: put_futex_key(&q.key); out: - if (to) + if (to) { + hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); + } return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: @@ -2633,7 +2770,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current); union futex_key key = FUTEX_KEY_INIT; struct futex_hash_bucket *hb; - struct futex_q *match; + struct futex_q *top_waiter; int ret; retry: @@ -2657,12 +2794,37 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * all and we at least want to know if user space fiddled * with the futex value instead of blindly unlocking. */ - match = futex_top_waiter(hb, &key); - if (match) { - ret = wake_futex_pi(uaddr, uval, match, hb); + top_waiter = futex_top_waiter(hb, &key); + if (top_waiter) { + struct futex_pi_state *pi_state = top_waiter->pi_state; + + ret = -EINVAL; + if (!pi_state) + goto out_unlock; + /* - * In case of success wake_futex_pi dropped the hash - * bucket lock. + * If current does not own the pi_state then the futex is + * inconsistent and user space fiddled with the futex value. + */ + if (pi_state->owner != current) + goto out_unlock; + + get_pi_state(pi_state); + /* + * By taking wait_lock while still holding hb->lock, we ensure + * there is no point where we hold neither; and therefore + * wake_futex_pi() must observe a state consistent with what we + * observed. + */ + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + spin_unlock(&hb->lock); + + ret = wake_futex_pi(uaddr, uval, pi_state); + + put_pi_state(pi_state); + + /* + * Success, we're done! No tricky corner cases. */ if (!ret) goto out_putkey; @@ -2677,7 +2839,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * setting the FUTEX_WAITERS bit. Try again. */ if (ret == -EAGAIN) { - spin_unlock(&hb->lock); put_futex_key(&key); goto retry; } @@ -2685,7 +2846,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * wake_futex_pi has detected invalid state. Tell user * space. */ - goto out_unlock; + goto out_putkey; } /* @@ -2695,8 +2856,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * preserve the WAITERS bit not the OWNER_DIED one. We are the * owner. */ - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { + spin_unlock(&hb->lock); goto pi_faulted; + } /* * If uval has changed, let user space handle it. @@ -2710,7 +2873,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) return ret; pi_faulted: - spin_unlock(&hb->lock); put_futex_key(&key); ret = fault_in_user_writeable(uaddr); @@ -2814,6 +2976,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 __user *uaddr2) { struct hrtimer_sleeper timeout, *to = NULL; + struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; @@ -2840,10 +3003,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ - debug_rt_mutex_init_waiter(&rt_waiter); - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); - RB_CLEAR_NODE(&rt_waiter.tree_entry); - rt_waiter.task = NULL; + rt_mutex_init_waiter(&rt_waiter); ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) @@ -2898,8 +3058,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) - rt_mutex_unlock(&q.pi_state->pi_mutex); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { + pi_state = q.pi_state; + get_pi_state(pi_state); + } /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. @@ -2917,10 +3079,13 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); - debug_rt_mutex_free_waiter(&rt_waiter); + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); spin_lock(q.lock_ptr); + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) + ret = 0; + + debug_rt_mutex_free_waiter(&rt_waiter); /* * Fixup the pi_state owner and possibly acquire the lock if we * haven't already. @@ -2938,13 +3103,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * the fault, unlock the rt_mutex and return the fault to * userspace. */ - if (ret && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { + pi_state = q.pi_state; + get_pi_state(pi_state); + } /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } + if (pi_state) { + rt_mutex_futex_unlock(&pi_state->pi_mutex); + put_pi_state(pi_state); + } + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 2f9df37940a0f5a489efc4eaa6afc1ccaa35ad68..c51a49c9be7064d30638d7ae184a4fcbc2136ec7 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) } EXPORT_SYMBOL(__gcov_merge_icall_topn); +void __gcov_exit(void) +{ + /* Unused. */ +} +EXPORT_SYMBOL(__gcov_exit); + /** * gcov_enable_events - enable event reporting through gcov_event() * diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 6a5c239c7669c5ad8e01565be5a6fd38ae7d4452..46a18e72bce614c804fbe958f0a6b4d00d11027a 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include #include "gcov.h" -#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) +#if (__GNUC__ >= 7) +#define GCOV_COUNTERS 9 +#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define GCOV_COUNTERS 10 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 #define GCOV_COUNTERS 9 diff --git a/kernel/groups.c b/kernel/groups.c index 8dd7a61b7115e8808408cd2cb911d224a9528297..d09727692a2af1801fb82982d4a08daee5c86d62 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -18,7 +18,7 @@ struct group_info *groups_alloc(int gidsetsize) len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize; gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY); if (!gi) - gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL); + gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL); if (!gi) return NULL; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index f0f8e2a8496feafbb0f9589a6d8159779b9effe0..751593ed7c0b0b9cc9bf74735fb9bd5d7e100be2 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -43,6 +43,7 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_ int __read_mostly sysctl_hung_task_warnings = 10; static int __read_mostly did_panic; +static bool hung_task_show_lock; static struct task_struct *watchdog_task; @@ -120,12 +121,14 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); - debug_show_all_locks(); + hung_task_show_lock = true; } touch_nmi_watchdog(); if (sysctl_hung_task_panic) { + if (hung_task_show_lock) + debug_show_all_locks(); trigger_all_cpu_backtrace(); panic("hung_task: blocked tasks"); } @@ -172,6 +175,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) if (test_taint(TAINT_DIE) || did_panic) return; + hung_task_show_lock = false; rcu_read_lock(); for_each_process_thread(g, t) { if (!max_count--) @@ -187,6 +191,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } unlock: rcu_read_unlock(); + if (hung_task_show_lock) + debug_show_all_locks(); } static long hung_timeout_jiffies(unsigned long last_checked, diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index d052947fe78507ac80bbe58a09cc612bca7bab73..e2d356dd75812df8e42dfac3feb132edd5612e33 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -98,7 +98,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int ncpus, v, vecs_to_assign, vecs_per_node; /* Spread the vectors per node */ - vecs_per_node = (affv - curvec) / nodes; + vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes; /* Get the cpus on this node which are in the mask */ cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n)); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index be3c34e4f2ac1bbf679e92de5d82398a22aba7c9..c94da688ee9b30ff9e746372b20a0c30f4c8cf5f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -348,7 +348,10 @@ void handle_nested_irq(unsigned int irq) irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); raw_spin_unlock_irq(&desc->lock); - action_ret = action->thread_fn(action->irq, action->dev_id); + action_ret = IRQ_NONE; + for_each_action_of_desc(desc, action) + action_ret |= action->thread_fn(action->irq, action->dev_id); + if (!noirqdebug) note_interrupt(desc, action_ret); @@ -877,8 +880,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, if (!desc) return; - __irq_do_set_handler(desc, handle, 1, NULL); desc->irq_common_data.handler_data = data; + __irq_do_set_handler(desc, handle, 1, NULL); irq_put_desc_busunlock(desc, flags); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4afe5cc5af1828a49f1005825eadc880a727088..425170d4439be5926a63ef9be50c71c1dd5878a9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -852,7 +852,7 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) * This code is triggered unconditionally. Check the affinity * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. */ - if (desc->irq_common_data.affinity) + if (cpumask_available(desc->irq_common_data.affinity)) cpumask_copy(mask, desc->irq_common_data.affinity); else valid = false; @@ -1212,8 +1212,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * set the trigger type must match. Also all must * agree on ONESHOT. */ + unsigned int oldtype = irqd_get_trigger_type(&desc->irq_data); + if (!((old->flags & new->flags) & IRQF_SHARED) || - ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) || + (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || ((old->flags ^ new->flags) & IRQF_ONESHOT)) goto mismatch; @@ -1310,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) ret = __irq_set_trigger(desc, new->flags & IRQF_TRIGGER_MASK); - if (ret) + if (ret) { + irq_release_resources(desc); goto out_mask; + } } desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ @@ -1557,7 +1561,7 @@ void remove_irq(unsigned int irq, struct irqaction *act) struct irq_desc *desc = irq_to_desc(irq); if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc))) - __free_irq(irq, act->dev_id); + __free_irq(irq, act->dev_id); } EXPORT_SYMBOL_GPL(remove_irq); @@ -1574,20 +1578,27 @@ EXPORT_SYMBOL_GPL(remove_irq); * have completed. * * This function must not be called from interrupt context. + * + * Returns the devname argument passed to request_irq. */ -void free_irq(unsigned int irq, void *dev_id) +const void *free_irq(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + const char *devname; if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) - return; + return NULL; #ifdef CONFIG_SMP if (WARN_ON(desc->affinity_notify)) desc->affinity_notify = NULL; #endif - kfree(__free_irq(irq, dev_id)); + action = __free_irq(irq, dev_id); + devname = action->name; + kfree(action); + return devname; } EXPORT_SYMBOL(free_irq); diff --git a/kernel/kcov.c b/kernel/kcov.c index 85e5546cd791cc31261cd6deb8aea933bb41c008..cd771993f96f4a7ff23592f1abaea22c71444fda 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -60,15 +60,8 @@ void notrace __sanitizer_cov_trace_pc(void) /* * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. - * The checks for whether we are in an interrupt are open-coded, because - * 1. We can't use in_interrupt() here, since it also returns true - * when we are inside local_bh_disable() section. - * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()), - * since that leads to slower generated code (three separate tests, - * one for each of the flags). */ - if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET - | NMI_MASK))) + if (!t || !in_task()) return; mode = READ_ONCE(t->kcov_mode); if (mode == KCOV_MODE_TRACE) { diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index bfe62d5b38722251681f7257788cd14533fe4362..ae1a3ba24df56958bb48f30ddde85ae6ae833fdf 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -51,12 +51,6 @@ DEFINE_MUTEX(kexec_mutex); /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; -/* vmcoreinfo stuff */ -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; -size_t vmcoreinfo_size; -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); - /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; @@ -996,34 +990,6 @@ int crash_shrink_memory(unsigned long new_size) return ret; } -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, - size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) + 3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - void crash_save_cpu(struct pt_regs *regs, int cpu) { struct elf_prstatus prstatus; @@ -1084,403 +1050,6 @@ static int __init crash_notes_memory_init(void) subsys_initcall(crash_notes_memory_init); -/* - * parsing the "crashkernel" commandline - * - * this code is intended to be called from architecture specific code - */ - - -/* - * This function parses command lines in the format - * - * crashkernel=ramsize-range:size[,...][@offset] - * - * The function returns 0 on success and -EINVAL on failure. - */ -static int __init parse_crashkernel_mem(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - char *cur = cmdline, *tmp; - - /* for each entry of the comma-separated list */ - do { - unsigned long long start, end = ULLONG_MAX, size; - - /* get the start of the range */ - start = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("crashkernel: Memory value expected\n"); - return -EINVAL; - } - cur = tmp; - if (*cur != '-') { - pr_warn("crashkernel: '-' expected\n"); - return -EINVAL; - } - cur++; - - /* if no ':' is here, than we read the end */ - if (*cur != ':') { - end = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("crashkernel: Memory value expected\n"); - return -EINVAL; - } - cur = tmp; - if (end <= start) { - pr_warn("crashkernel: end <= start\n"); - return -EINVAL; - } - } - - if (*cur != ':') { - pr_warn("crashkernel: ':' expected\n"); - return -EINVAL; - } - cur++; - - size = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("Memory value expected\n"); - return -EINVAL; - } - cur = tmp; - if (size >= system_ram) { - pr_warn("crashkernel: invalid size\n"); - return -EINVAL; - } - - /* match ? */ - if (system_ram >= start && system_ram < end) { - *crash_size = size; - break; - } - } while (*cur++ == ','); - - if (*crash_size > 0) { - while (*cur && *cur != ' ' && *cur != '@') - cur++; - if (*cur == '@') { - cur++; - *crash_base = memparse(cur, &tmp); - if (cur == tmp) { - pr_warn("Memory value expected after '@'\n"); - return -EINVAL; - } - } - } - - return 0; -} - -/* - * That function parses "simple" (old) crashkernel command lines like - * - * crashkernel=size[@offset] - * - * It returns 0 on success and -EINVAL on failure. - */ -static int __init parse_crashkernel_simple(char *cmdline, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - char *cur = cmdline; - - *crash_size = memparse(cmdline, &cur); - if (cmdline == cur) { - pr_warn("crashkernel: memory value expected\n"); - return -EINVAL; - } - - if (*cur == '@') - *crash_base = memparse(cur+1, &cur); - else if (*cur != ' ' && *cur != '\0') { - pr_warn("crashkernel: unrecognized char: %c\n", *cur); - return -EINVAL; - } - - return 0; -} - -#define SUFFIX_HIGH 0 -#define SUFFIX_LOW 1 -#define SUFFIX_NULL 2 -static __initdata char *suffix_tbl[] = { - [SUFFIX_HIGH] = ",high", - [SUFFIX_LOW] = ",low", - [SUFFIX_NULL] = NULL, -}; - -/* - * That function parses "suffix" crashkernel command lines like - * - * crashkernel=size,[high|low] - * - * It returns 0 on success and -EINVAL on failure. - */ -static int __init parse_crashkernel_suffix(char *cmdline, - unsigned long long *crash_size, - const char *suffix) -{ - char *cur = cmdline; - - *crash_size = memparse(cmdline, &cur); - if (cmdline == cur) { - pr_warn("crashkernel: memory value expected\n"); - return -EINVAL; - } - - /* check with suffix */ - if (strncmp(cur, suffix, strlen(suffix))) { - pr_warn("crashkernel: unrecognized char: %c\n", *cur); - return -EINVAL; - } - cur += strlen(suffix); - if (*cur != ' ' && *cur != '\0') { - pr_warn("crashkernel: unrecognized char: %c\n", *cur); - return -EINVAL; - } - - return 0; -} - -static __init char *get_last_crashkernel(char *cmdline, - const char *name, - const char *suffix) -{ - char *p = cmdline, *ck_cmdline = NULL; - - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - char *end_p = strchr(p, ' '); - char *q; - - if (!end_p) - end_p = p + strlen(p); - - if (!suffix) { - int i; - - /* skip the one with any known suffix */ - for (i = 0; suffix_tbl[i]; i++) { - q = end_p - strlen(suffix_tbl[i]); - if (!strncmp(q, suffix_tbl[i], - strlen(suffix_tbl[i]))) - goto next; - } - ck_cmdline = p; - } else { - q = end_p - strlen(suffix); - if (!strncmp(q, suffix, strlen(suffix))) - ck_cmdline = p; - } -next: - p = strstr(p+1, name); - } - - if (!ck_cmdline) - return NULL; - - return ck_cmdline; -} - -static int __init __parse_crashkernel(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base, - const char *name, - const char *suffix) -{ - char *first_colon, *first_space; - char *ck_cmdline; - - BUG_ON(!crash_size || !crash_base); - *crash_size = 0; - *crash_base = 0; - - ck_cmdline = get_last_crashkernel(cmdline, name, suffix); - - if (!ck_cmdline) - return -EINVAL; - - ck_cmdline += strlen(name); - - if (suffix) - return parse_crashkernel_suffix(ck_cmdline, crash_size, - suffix); - /* - * if the commandline contains a ':', then that's the extended - * syntax -- if not, it must be the classic syntax - */ - first_colon = strchr(ck_cmdline, ':'); - first_space = strchr(ck_cmdline, ' '); - if (first_colon && (!first_space || first_colon < first_space)) - return parse_crashkernel_mem(ck_cmdline, system_ram, - crash_size, crash_base); - - return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); -} - -/* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. - */ -int __init parse_crashkernel(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel=", NULL); -} - -int __init parse_crashkernel_high(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel=", suffix_tbl[SUFFIX_HIGH]); -} - -int __init parse_crashkernel_low(char *cmdline, - unsigned long long system_ram, - unsigned long long *crash_size, - unsigned long long *crash_base) -{ - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel=", suffix_tbl[SUFFIX_LOW]); -} - -static void update_vmcoreinfo_note(void) -{ - u32 *buf = vmcoreinfo_note; - - if (!vmcoreinfo_size) - return; - buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, - vmcoreinfo_size); - final_note(buf); -} - -void crash_save_vmcoreinfo(void) -{ - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); - update_vmcoreinfo_note(); -} - -void vmcoreinfo_append_str(const char *fmt, ...) -{ - va_list args; - char buf[0x50]; - size_t r; - - va_start(args, fmt); - r = vscnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - - r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); - - memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); - - vmcoreinfo_size += r; -} - -/* - * provide an empty default implementation here -- architecture - * code may override this - */ -void __weak arch_crash_save_vmcoreinfo(void) -{} - -phys_addr_t __weak paddr_vmcoreinfo_note(void) -{ - return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note); -} - -static int __init crash_save_vmcoreinfo_init(void) -{ - VMCOREINFO_OSRELEASE(init_uts_ns.name.release); - VMCOREINFO_PAGESIZE(PAGE_SIZE); - - VMCOREINFO_SYMBOL(init_uts_ns); - VMCOREINFO_SYMBOL(node_online_map); -#ifdef CONFIG_MMU - VMCOREINFO_SYMBOL(swapper_pg_dir); -#endif - VMCOREINFO_SYMBOL(_stext); - VMCOREINFO_SYMBOL(vmap_area_list); - -#ifndef CONFIG_NEED_MULTIPLE_NODES - VMCOREINFO_SYMBOL(mem_map); - VMCOREINFO_SYMBOL(contig_page_data); -#endif -#ifdef CONFIG_SPARSEMEM - VMCOREINFO_SYMBOL(mem_section); - VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); - VMCOREINFO_STRUCT_SIZE(mem_section); - VMCOREINFO_OFFSET(mem_section, section_mem_map); -#endif - VMCOREINFO_STRUCT_SIZE(page); - VMCOREINFO_STRUCT_SIZE(pglist_data); - VMCOREINFO_STRUCT_SIZE(zone); - VMCOREINFO_STRUCT_SIZE(free_area); - VMCOREINFO_STRUCT_SIZE(list_head); - VMCOREINFO_SIZE(nodemask_t); - VMCOREINFO_OFFSET(page, flags); - VMCOREINFO_OFFSET(page, _refcount); - VMCOREINFO_OFFSET(page, mapping); - VMCOREINFO_OFFSET(page, lru); - VMCOREINFO_OFFSET(page, _mapcount); - VMCOREINFO_OFFSET(page, private); - VMCOREINFO_OFFSET(page, compound_dtor); - VMCOREINFO_OFFSET(page, compound_order); - VMCOREINFO_OFFSET(page, compound_head); - VMCOREINFO_OFFSET(pglist_data, node_zones); - VMCOREINFO_OFFSET(pglist_data, nr_zones); -#ifdef CONFIG_FLAT_NODE_MEM_MAP - VMCOREINFO_OFFSET(pglist_data, node_mem_map); -#endif - VMCOREINFO_OFFSET(pglist_data, node_start_pfn); - VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); - VMCOREINFO_OFFSET(pglist_data, node_id); - VMCOREINFO_OFFSET(zone, free_area); - VMCOREINFO_OFFSET(zone, vm_stat); - VMCOREINFO_OFFSET(zone, spanned_pages); - VMCOREINFO_OFFSET(free_area, free_list); - VMCOREINFO_OFFSET(list_head, next); - VMCOREINFO_OFFSET(list_head, prev); - VMCOREINFO_OFFSET(vmap_area, va_start); - VMCOREINFO_OFFSET(vmap_area, list); - VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); - log_buf_kexec_setup(); - VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); - VMCOREINFO_NUMBER(NR_FREE_PAGES); - VMCOREINFO_NUMBER(PG_lru); - VMCOREINFO_NUMBER(PG_private); - VMCOREINFO_NUMBER(PG_swapcache); - VMCOREINFO_NUMBER(PG_slab); -#ifdef CONFIG_MEMORY_FAILURE - VMCOREINFO_NUMBER(PG_hwpoison); -#endif - VMCOREINFO_NUMBER(PG_head_mask); - VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); -#endif - - arch_crash_save_vmcoreinfo(); - update_vmcoreinfo_note(); - - return 0; -} - -subsys_initcall(crash_save_vmcoreinfo_init); - /* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error. diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 699c5bc51a9219d664578e4d7978dee94d291c82..adfe3b4cfe05a101bcee0da8e72db6fe6a10cf72 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -58,15 +58,6 @@ #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) -/* - * Some oddball architectures like 64bit powerpc have function descriptors - * so this must be overridable. - */ -#ifndef kprobe_lookup_name -#define kprobe_lookup_name(name, addr) \ - addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) -#endif - static int kprobes_initialized; static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; @@ -81,6 +72,12 @@ static struct { raw_spinlock_t lock ____cacheline_aligned_in_smp; } kretprobe_table_locks[KPROBE_TABLE_SIZE]; +kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, + unsigned int __unused) +{ + return ((kprobe_opcode_t *)(kallsyms_lookup_name(name))); +} + static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) { return &(kretprobe_table_locks[hash].lock); @@ -125,7 +122,7 @@ static void *alloc_insn_page(void) return module_alloc(PAGE_SIZE); } -static void free_insn_page(void *page) +void __weak free_insn_page(void *page) { module_memfree(page); } @@ -598,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work) } /* Wait for completing optimization and unoptimization */ -static void wait_for_kprobe_optimizer(void) +void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); @@ -746,13 +743,20 @@ static void kill_optimized_kprobe(struct kprobe *p) arch_remove_optimized_kprobe(op); } +static inline +void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) +{ + if (!kprobe_ftrace(p)) + arch_prepare_optimized_kprobe(op, p); +} + /* Try to prepare optimized instructions */ static void prepare_optimized_kprobe(struct kprobe *p) { struct optimized_kprobe *op; op = container_of(p, struct optimized_kprobe, kp); - arch_prepare_optimized_kprobe(op, p); + __prepare_optimized_kprobe(op, p); } /* Allocate new optimized_kprobe and try to prepare optimized instructions */ @@ -766,7 +770,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) INIT_LIST_HEAD(&op->list); op->kp.addr = p->addr; - arch_prepare_optimized_kprobe(op, p); + __prepare_optimized_kprobe(op, p); return &op->kp; } @@ -1391,21 +1395,19 @@ bool within_kprobe_blacklist(unsigned long addr) * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, + const char *symbol_name, unsigned int offset) { - kprobe_opcode_t *addr = p->addr; - - if ((p->symbol_name && p->addr) || - (!p->symbol_name && !p->addr)) + if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; - if (p->symbol_name) { - kprobe_lookup_name(p->symbol_name, addr); + if (symbol_name) { + addr = kprobe_lookup_name(symbol_name, offset); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + p->offset); + addr = (kprobe_opcode_t *)(((char *)addr) + offset); if (addr) return addr; @@ -1413,6 +1415,11 @@ static kprobe_opcode_t *kprobe_addr(struct kprobe *p) return ERR_PTR(-EINVAL); } +static kprobe_opcode_t *kprobe_addr(struct kprobe *p) +{ + return _kprobe_addr(p->addr, p->symbol_name, p->offset); +} + /* Check passed kprobe is valid and return kprobe in kprobe_table. */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { @@ -1740,11 +1747,12 @@ void unregister_kprobes(struct kprobe **kps, int num) } EXPORT_SYMBOL_GPL(unregister_kprobes); -int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +int __weak kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { return NOTIFY_DONE; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); static struct notifier_block kprobe_exceptions_nb = { .notifier_call = kprobe_exceptions_notify, @@ -1875,6 +1883,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(pre_handler_kretprobe); +bool __weak arch_function_offset_within_entry(unsigned long offset) +{ + return !offset; +} + +bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +{ + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + + if (IS_ERR(kp_addr)) + return false; + + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || + !arch_function_offset_within_entry(offset)) + return false; + + return true; +} + int register_kretprobe(struct kretprobe *rp) { int ret = 0; @@ -1882,6 +1909,9 @@ int register_kretprobe(struct kretprobe *rp) int i; void *addr; + if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) + return -EINVAL; + if (kretprobe_blacklist_size) { addr = kprobe_addr(&rp->kp); if (IS_ERR(addr)) @@ -2153,6 +2183,12 @@ static int kprobes_module_callback(struct notifier_block *nb, * The vaddr this probe is installed will soon * be vfreed buy not synced to disk. Hence, * disarming the breakpoint isn't needed. + * + * Note, this will also move any optimized probes + * that are pending to be removed from their + * corresponding lists to the freeing_list and + * will not be touched by the delayed + * kprobe_optimizer work handler. */ kill_kprobe(p); } @@ -2192,8 +2228,8 @@ static int __init init_kprobes(void) if (kretprobe_blacklist_size) { /* lookup the function address from its name */ for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { - kprobe_lookup_name(kretprobe_blacklist[i].name, - kretprobe_blacklist[i].addr); + kretprobe_blacklist[i].addr = + kprobe_lookup_name(kretprobe_blacklist[i].name, 0); if (!kretprobe_blacklist[i].addr) printk("kretprobe: lookup failed: %s\n", kretprobe_blacklist[i].name); diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 0999679d6f26706350cca88f42b970a1306f7037..23cd70651238383ac0b112acf4827ebd1b1ad399 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -125,6 +125,10 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj, } KERNEL_ATTR_RW(kexec_crash_size); +#endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_CRASH_CORE + static ssize_t vmcoreinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -134,7 +138,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj, } KERNEL_ATTR_RO(vmcoreinfo); -#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_CRASH_CORE */ /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, @@ -219,6 +223,8 @@ static struct attribute * kernel_attrs[] = { &kexec_loaded_attr.attr, &kexec_crash_loaded_attr.attr, &kexec_crash_size_attr.attr, +#endif +#ifdef CONFIG_CRASH_CORE &vmcoreinfo_attr.attr, #endif #ifndef CONFIG_TINY_RCU diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 0450225579367eee10de6695679fffa7b963f013..ec4565122e6553f490dfb2434b6c14102b152bb2 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -10,6 +10,7 @@ config LIVEPATCH depends on SYSFS depends on KALLSYMS_ALL depends on HAVE_LIVEPATCH + depends on !TRIM_UNUSED_KSYMS help Say Y here if you want to support kernel live patching. This option has no runtime impact until a kernel "patch" diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile index e8780c0901d9c56da9e0daa6abff776a3db7111a..2b8bdb1925dad406031a89b6dfea02ee066d534c 100644 --- a/kernel/livepatch/Makefile +++ b/kernel/livepatch/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o -livepatch-objs := core.o +livepatch-objs := core.o patch.o transition.o diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index af4643873e7179c0e750d3d69a665844d06f4725..b9628e43c78f60711f8a8c21693aef5c7041f2bb 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -24,61 +24,31 @@ #include #include #include -#include #include #include #include #include #include +#include #include - -/** - * struct klp_ops - structure for tracking registered ftrace ops structs - * - * A single ftrace_ops is shared between all enabled replacement functions - * (klp_func structs) which have the same old_addr. This allows the switch - * between function versions to happen instantaneously by updating the klp_ops - * struct's func_stack list. The winner is the klp_func at the top of the - * func_stack (front of the list). - * - * @node: node for the global klp_ops list - * @func_stack: list head for the stack of klp_func's (active func is on top) - * @fops: registered ftrace ops struct - */ -struct klp_ops { - struct list_head node; - struct list_head func_stack; - struct ftrace_ops fops; -}; +#include "core.h" +#include "patch.h" +#include "transition.h" /* - * The klp_mutex protects the global lists and state transitions of any - * structure reachable from them. References to any structure must be obtained - * under mutex protection (except in klp_ftrace_handler(), which uses RCU to - * ensure it gets consistent data). + * klp_mutex is a coarse lock which serializes access to klp data. All + * accesses to klp-related variables and structures must have mutex protection, + * except within the following functions which carefully avoid the need for it: + * + * - klp_ftrace_handler() + * - klp_update_patch_state() */ -static DEFINE_MUTEX(klp_mutex); +DEFINE_MUTEX(klp_mutex); static LIST_HEAD(klp_patches); -static LIST_HEAD(klp_ops); static struct kobject *klp_root_kobj; -static struct klp_ops *klp_find_ops(unsigned long old_addr) -{ - struct klp_ops *ops; - struct klp_func *func; - - list_for_each_entry(ops, &klp_ops, node) { - func = list_first_entry(&ops->func_stack, struct klp_func, - stack_node); - if (func->old_addr == old_addr) - return ops; - } - - return NULL; -} - static bool klp_is_module(struct klp_object *obj) { return obj->name; @@ -117,7 +87,6 @@ static void klp_find_object_module(struct klp_object *obj) mutex_unlock(&module_mutex); } -/* klp_mutex must be held by caller */ static bool klp_is_patch_registered(struct klp_patch *patch) { struct klp_patch *mypatch; @@ -182,7 +151,10 @@ static int klp_find_object_symbol(const char *objname, const char *name, }; mutex_lock(&module_mutex); - kallsyms_on_each_symbol(klp_find_callback, &args); + if (objname) + module_kallsyms_on_each_symbol(klp_find_callback, &args); + else + kallsyms_on_each_symbol(klp_find_callback, &args); mutex_unlock(&module_mutex); /* @@ -233,7 +205,7 @@ static int klp_resolve_symbols(Elf_Shdr *relasec, struct module *pmod) for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { sym = pmod->core_kallsyms.symtab + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { - pr_err("symbol %s is not marked as a livepatch symbol", + pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); return -EINVAL; } @@ -243,7 +215,7 @@ static int klp_resolve_symbols(Elf_Shdr *relasec, struct module *pmod) ".klp.sym.%55[^.].%127[^,],%lu", objname, symname, &sympos); if (cnt != 3) { - pr_err("symbol %s has an incorrectly formatted name", + pr_err("symbol %s has an incorrectly formatted name\n", strtab + sym->st_name); return -EINVAL; } @@ -288,7 +260,7 @@ static int klp_write_object_relocations(struct module *pmod, */ cnt = sscanf(secname, ".klp.rela.%55[^.]", sec_objname); if (cnt != 1) { - pr_err("section %s has an incorrectly formatted name", + pr_err("section %s has an incorrectly formatted name\n", secname); ret = -EINVAL; break; @@ -311,191 +283,30 @@ static int klp_write_object_relocations(struct module *pmod, return ret; } -static void notrace klp_ftrace_handler(unsigned long ip, - unsigned long parent_ip, - struct ftrace_ops *fops, - struct pt_regs *regs) -{ - struct klp_ops *ops; - struct klp_func *func; - - ops = container_of(fops, struct klp_ops, fops); - - rcu_read_lock(); - func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, - stack_node); - if (WARN_ON_ONCE(!func)) - goto unlock; - - klp_arch_set_pc(regs, (unsigned long)func->new_func); -unlock: - rcu_read_unlock(); -} - -/* - * Convert a function address into the appropriate ftrace location. - * - * Usually this is just the address of the function, but on some architectures - * it's more complicated so allow them to provide a custom behaviour. - */ -#ifndef klp_get_ftrace_location -static unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - return faddr; -} -#endif - -static void klp_disable_func(struct klp_func *func) -{ - struct klp_ops *ops; - - if (WARN_ON(func->state != KLP_ENABLED)) - return; - if (WARN_ON(!func->old_addr)) - return; - - ops = klp_find_ops(func->old_addr); - if (WARN_ON(!ops)) - return; - - if (list_is_singular(&ops->func_stack)) { - unsigned long ftrace_loc; - - ftrace_loc = klp_get_ftrace_location(func->old_addr); - if (WARN_ON(!ftrace_loc)) - return; - - WARN_ON(unregister_ftrace_function(&ops->fops)); - WARN_ON(ftrace_set_filter_ip(&ops->fops, ftrace_loc, 1, 0)); - - list_del_rcu(&func->stack_node); - list_del(&ops->node); - kfree(ops); - } else { - list_del_rcu(&func->stack_node); - } - - func->state = KLP_DISABLED; -} - -static int klp_enable_func(struct klp_func *func) -{ - struct klp_ops *ops; - int ret; - - if (WARN_ON(!func->old_addr)) - return -EINVAL; - - if (WARN_ON(func->state != KLP_DISABLED)) - return -EINVAL; - - ops = klp_find_ops(func->old_addr); - if (!ops) { - unsigned long ftrace_loc; - - ftrace_loc = klp_get_ftrace_location(func->old_addr); - if (!ftrace_loc) { - pr_err("failed to find location for function '%s'\n", - func->old_name); - return -EINVAL; - } - - ops = kzalloc(sizeof(*ops), GFP_KERNEL); - if (!ops) - return -ENOMEM; - - ops->fops.func = klp_ftrace_handler; - ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | - FTRACE_OPS_FL_DYNAMIC | - FTRACE_OPS_FL_IPMODIFY; - - list_add(&ops->node, &klp_ops); - - INIT_LIST_HEAD(&ops->func_stack); - list_add_rcu(&func->stack_node, &ops->func_stack); - - ret = ftrace_set_filter_ip(&ops->fops, ftrace_loc, 0, 0); - if (ret) { - pr_err("failed to set ftrace filter for function '%s' (%d)\n", - func->old_name, ret); - goto err; - } - - ret = register_ftrace_function(&ops->fops); - if (ret) { - pr_err("failed to register ftrace handler for function '%s' (%d)\n", - func->old_name, ret); - ftrace_set_filter_ip(&ops->fops, ftrace_loc, 1, 0); - goto err; - } - - - } else { - list_add_rcu(&func->stack_node, &ops->func_stack); - } - - func->state = KLP_ENABLED; - - return 0; - -err: - list_del_rcu(&func->stack_node); - list_del(&ops->node); - kfree(ops); - return ret; -} - -static void klp_disable_object(struct klp_object *obj) -{ - struct klp_func *func; - - klp_for_each_func(obj, func) - if (func->state == KLP_ENABLED) - klp_disable_func(func); - - obj->state = KLP_DISABLED; -} - -static int klp_enable_object(struct klp_object *obj) -{ - struct klp_func *func; - int ret; - - if (WARN_ON(obj->state != KLP_DISABLED)) - return -EINVAL; - - if (WARN_ON(!klp_is_object_loaded(obj))) - return -EINVAL; - - klp_for_each_func(obj, func) { - ret = klp_enable_func(func); - if (ret) { - klp_disable_object(obj); - return ret; - } - } - obj->state = KLP_ENABLED; - - return 0; -} - static int __klp_disable_patch(struct klp_patch *patch) { - struct klp_object *obj; + if (klp_transition_patch) + return -EBUSY; /* enforce stacking: only the last enabled patch can be disabled */ if (!list_is_last(&patch->list, &klp_patches) && - list_next_entry(patch, list)->state == KLP_ENABLED) + list_next_entry(patch, list)->enabled) return -EBUSY; - pr_notice("disabling patch '%s'\n", patch->mod->name); + klp_init_transition(patch, KLP_UNPATCHED); - klp_for_each_object(patch, obj) { - if (obj->state == KLP_ENABLED) - klp_disable_object(obj); - } + /* + * Enforce the order of the func->transition writes in + * klp_init_transition() and the TIF_PATCH_PENDING writes in + * klp_start_transition(). In the rare case where klp_ftrace_handler() + * is called shortly after klp_update_patch_state() switches the task, + * this ensures the handler sees that func->transition is set. + */ + smp_wmb(); - patch->state = KLP_DISABLED; + klp_start_transition(); + klp_try_complete_transition(); + patch->enabled = false; return 0; } @@ -519,7 +330,7 @@ int klp_disable_patch(struct klp_patch *patch) goto err; } - if (patch->state == KLP_DISABLED) { + if (!patch->enabled) { ret = -EINVAL; goto err; } @@ -537,32 +348,61 @@ static int __klp_enable_patch(struct klp_patch *patch) struct klp_object *obj; int ret; - if (WARN_ON(patch->state != KLP_DISABLED)) + if (klp_transition_patch) + return -EBUSY; + + if (WARN_ON(patch->enabled)) return -EINVAL; /* enforce stacking: only the first disabled patch can be enabled */ if (patch->list.prev != &klp_patches && - list_prev_entry(patch, list)->state == KLP_DISABLED) + !list_prev_entry(patch, list)->enabled) return -EBUSY; + /* + * A reference is taken on the patch module to prevent it from being + * unloaded. + * + * Note: For immediate (no consistency model) patches we don't allow + * patch modules to unload since there is no safe/sane method to + * determine if a thread is still running in the patched code contained + * in the patch module once the ftrace registration is successful. + */ + if (!try_module_get(patch->mod)) + return -ENODEV; + pr_notice("enabling patch '%s'\n", patch->mod->name); + klp_init_transition(patch, KLP_PATCHED); + + /* + * Enforce the order of the func->transition writes in + * klp_init_transition() and the ops->func_stack writes in + * klp_patch_object(), so that klp_ftrace_handler() will see the + * func->transition updates before the handler is registered and the + * new funcs become visible to the handler. + */ + smp_wmb(); + klp_for_each_object(patch, obj) { if (!klp_is_object_loaded(obj)) continue; - ret = klp_enable_object(obj); - if (ret) - goto unregister; + ret = klp_patch_object(obj); + if (ret) { + pr_warn("failed to enable patch '%s'\n", + patch->mod->name); + + klp_cancel_transition(); + return ret; + } } - patch->state = KLP_ENABLED; + klp_start_transition(); + klp_try_complete_transition(); + patch->enabled = true; return 0; - -unregister: - WARN_ON(__klp_disable_patch(patch)); - return ret; } /** @@ -599,6 +439,7 @@ EXPORT_SYMBOL_GPL(klp_enable_patch); * /sys/kernel/livepatch * /sys/kernel/livepatch/ * /sys/kernel/livepatch//enabled + * /sys/kernel/livepatch//transition * /sys/kernel/livepatch// * /sys/kernel/livepatch/// */ @@ -608,26 +449,34 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, { struct klp_patch *patch; int ret; - unsigned long val; + bool enabled; - ret = kstrtoul(buf, 10, &val); + ret = kstrtobool(buf, &enabled); if (ret) - return -EINVAL; - - if (val != KLP_DISABLED && val != KLP_ENABLED) - return -EINVAL; + return ret; patch = container_of(kobj, struct klp_patch, kobj); mutex_lock(&klp_mutex); - if (val == patch->state) { + if (!klp_is_patch_registered(patch)) { + /* + * Module with the patch could either disappear meanwhile or is + * not properly initialized yet. + */ + ret = -EINVAL; + goto err; + } + + if (patch->enabled == enabled) { /* already in requested state */ ret = -EINVAL; goto err; } - if (val == KLP_ENABLED) { + if (patch == klp_transition_patch) { + klp_reverse_transition(); + } else if (enabled) { ret = __klp_enable_patch(patch); if (ret) goto err; @@ -652,21 +501,33 @@ static ssize_t enabled_show(struct kobject *kobj, struct klp_patch *patch; patch = container_of(kobj, struct klp_patch, kobj); - return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->state); + return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->enabled); +} + +static ssize_t transition_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct klp_patch *patch; + + patch = container_of(kobj, struct klp_patch, kobj); + return snprintf(buf, PAGE_SIZE-1, "%d\n", + patch == klp_transition_patch); } static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled); +static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition); static struct attribute *klp_patch_attrs[] = { &enabled_kobj_attr.attr, + &transition_kobj_attr.attr, NULL }; static void klp_kobj_release_patch(struct kobject *kobj) { - /* - * Once we have a consistency model we'll need to module_put() the - * patch module here. See klp_register_patch() for more details. - */ + struct klp_patch *patch; + + patch = container_of(kobj, struct klp_patch, kobj); + complete(&patch->finish); } static struct kobj_type klp_ktype_patch = { @@ -737,7 +598,6 @@ static void klp_free_patch(struct klp_patch *patch) klp_free_objects_limited(patch, NULL); if (!list_empty(&patch->list)) list_del(&patch->list); - kobject_put(&patch->kobj); } static int klp_init_func(struct klp_object *obj, struct klp_func *func) @@ -746,7 +606,8 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) return -EINVAL; INIT_LIST_HEAD(&func->stack_node); - func->state = KLP_DISABLED; + func->patched = false; + func->transition = false; /* The format for the sysfs directory is where sympos * is the nth occurrence of this symbol in kallsyms for the patched @@ -787,6 +648,22 @@ static int klp_init_object_loaded(struct klp_patch *patch, &func->old_addr); if (ret) return ret; + + ret = kallsyms_lookup_size_offset(func->old_addr, + &func->old_size, NULL); + if (!ret) { + pr_err("kallsyms size lookup failed for '%s'\n", + func->old_name); + return -ENOENT; + } + + ret = kallsyms_lookup_size_offset((unsigned long)func->new_func, + &func->new_size, NULL); + if (!ret) { + pr_err("kallsyms size lookup failed for '%s' replacement\n", + func->old_name); + return -ENOENT; + } } return 0; @@ -801,7 +678,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) if (!obj->funcs) return -EINVAL; - obj->state = KLP_DISABLED; + obj->patched = false; obj->mod = NULL; klp_find_object_module(obj); @@ -842,12 +719,15 @@ static int klp_init_patch(struct klp_patch *patch) mutex_lock(&klp_mutex); - patch->state = KLP_DISABLED; + patch->enabled = false; + init_completion(&patch->finish); ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, klp_root_kobj, "%s", patch->mod->name); - if (ret) - goto unlock; + if (ret) { + mutex_unlock(&klp_mutex); + return ret; + } klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); @@ -863,9 +743,12 @@ static int klp_init_patch(struct klp_patch *patch) free: klp_free_objects_limited(patch, obj); - kobject_put(&patch->kobj); -unlock: + mutex_unlock(&klp_mutex); + + kobject_put(&patch->kobj); + wait_for_completion(&patch->finish); + return ret; } @@ -879,23 +762,29 @@ static int klp_init_patch(struct klp_patch *patch) */ int klp_unregister_patch(struct klp_patch *patch) { - int ret = 0; + int ret; mutex_lock(&klp_mutex); if (!klp_is_patch_registered(patch)) { ret = -EINVAL; - goto out; + goto err; } - if (patch->state == KLP_ENABLED) { + if (patch->enabled) { ret = -EBUSY; - goto out; + goto err; } klp_free_patch(patch); -out: + mutex_unlock(&klp_mutex); + + kobject_put(&patch->kobj); + wait_for_completion(&patch->finish); + + return 0; +err: mutex_unlock(&klp_mutex); return ret; } @@ -908,17 +797,18 @@ EXPORT_SYMBOL_GPL(klp_unregister_patch); * Initializes the data structure associated with the patch and * creates the sysfs interface. * + * There is no need to take the reference on the patch module here. It is done + * later when the patch is enabled. + * * Return: 0 on success, otherwise error */ int klp_register_patch(struct klp_patch *patch) { - int ret; - if (!patch || !patch->mod) return -EINVAL; if (!is_livepatch_module(patch->mod)) { - pr_err("module %s is not marked as a livepatch module", + pr_err("module %s is not marked as a livepatch module\n", patch->mod->name); return -EINVAL; } @@ -927,20 +817,16 @@ int klp_register_patch(struct klp_patch *patch) return -ENODEV; /* - * A reference is taken on the patch module to prevent it from being - * unloaded. Right now, we don't allow patch modules to unload since - * there is currently no method to determine if a thread is still - * running in the patched code contained in the patch module once - * the ftrace registration is successful. + * Architectures without reliable stack traces have to set + * patch->immediate because there's currently no way to patch kthreads + * with the consistency model. */ - if (!try_module_get(patch->mod)) - return -ENODEV; - - ret = klp_init_patch(patch); - if (ret) - module_put(patch->mod); + if (!klp_have_reliable_stack() && !patch->immediate) { + pr_err("This architecture doesn't have support for the livepatch consistency model.\n"); + return -ENOSYS; + } - return ret; + return klp_init_patch(patch); } EXPORT_SYMBOL_GPL(klp_register_patch); @@ -975,13 +861,17 @@ int klp_module_coming(struct module *mod) goto err; } - if (patch->state == KLP_DISABLED) + /* + * Only patch the module if the patch is enabled or is + * in transition. + */ + if (!patch->enabled && patch != klp_transition_patch) break; pr_notice("applying patch '%s' to loading module '%s'\n", patch->mod->name, obj->mod->name); - ret = klp_enable_object(obj); + ret = klp_patch_object(obj); if (ret) { pr_warn("failed to apply patch '%s' to module '%s' (%d)\n", patch->mod->name, obj->mod->name, ret); @@ -1032,10 +922,14 @@ void klp_module_going(struct module *mod) if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) continue; - if (patch->state != KLP_DISABLED) { + /* + * Only unpatch the module if the patch is enabled or + * is in transition. + */ + if (patch->enabled || patch == klp_transition_patch) { pr_notice("reverting patch '%s' on unloading module '%s'\n", patch->mod->name, obj->mod->name); - klp_disable_object(obj); + klp_unpatch_object(obj); } klp_free_object_loaded(obj); diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h new file mode 100644 index 0000000000000000000000000000000000000000..c74f24c478379fdd6a22f312eb970f8c2ba42a74 --- /dev/null +++ b/kernel/livepatch/core.h @@ -0,0 +1,6 @@ +#ifndef _LIVEPATCH_CORE_H +#define _LIVEPATCH_CORE_H + +extern struct mutex klp_mutex; + +#endif /* _LIVEPATCH_CORE_H */ diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c new file mode 100644 index 0000000000000000000000000000000000000000..f8269036bf0b84f89faa2db3b5dc5fe4c259a019 --- /dev/null +++ b/kernel/livepatch/patch.c @@ -0,0 +1,272 @@ +/* + * patch.c - livepatch patching functions + * + * Copyright (C) 2014 Seth Jennings + * Copyright (C) 2014 SUSE + * Copyright (C) 2015 Josh Poimboeuf + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include "patch.h" +#include "transition.h" + +static LIST_HEAD(klp_ops); + +struct klp_ops *klp_find_ops(unsigned long old_addr) +{ + struct klp_ops *ops; + struct klp_func *func; + + list_for_each_entry(ops, &klp_ops, node) { + func = list_first_entry(&ops->func_stack, struct klp_func, + stack_node); + if (func->old_addr == old_addr) + return ops; + } + + return NULL; +} + +static void notrace klp_ftrace_handler(unsigned long ip, + unsigned long parent_ip, + struct ftrace_ops *fops, + struct pt_regs *regs) +{ + struct klp_ops *ops; + struct klp_func *func; + int patch_state; + + ops = container_of(fops, struct klp_ops, fops); + + rcu_read_lock(); + + func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, + stack_node); + + /* + * func should never be NULL because preemption should be disabled here + * and unregister_ftrace_function() does the equivalent of a + * synchronize_sched() before the func_stack removal. + */ + if (WARN_ON_ONCE(!func)) + goto unlock; + + /* + * In the enable path, enforce the order of the ops->func_stack and + * func->transition reads. The corresponding write barrier is in + * __klp_enable_patch(). + * + * (Note that this barrier technically isn't needed in the disable + * path. In the rare case where klp_update_patch_state() runs before + * this handler, its TIF_PATCH_PENDING read and this func->transition + * read need to be ordered. But klp_update_patch_state() already + * enforces that.) + */ + smp_rmb(); + + if (unlikely(func->transition)) { + + /* + * Enforce the order of the func->transition and + * current->patch_state reads. Otherwise we could read an + * out-of-date task state and pick the wrong function. The + * corresponding write barrier is in klp_init_transition(). + */ + smp_rmb(); + + patch_state = current->patch_state; + + WARN_ON_ONCE(patch_state == KLP_UNDEFINED); + + if (patch_state == KLP_UNPATCHED) { + /* + * Use the previously patched version of the function. + * If no previous patches exist, continue with the + * original function. + */ + func = list_entry_rcu(func->stack_node.next, + struct klp_func, stack_node); + + if (&func->stack_node == &ops->func_stack) + goto unlock; + } + } + + klp_arch_set_pc(regs, (unsigned long)func->new_func); +unlock: + rcu_read_unlock(); +} + +/* + * Convert a function address into the appropriate ftrace location. + * + * Usually this is just the address of the function, but on some architectures + * it's more complicated so allow them to provide a custom behaviour. + */ +#ifndef klp_get_ftrace_location +static unsigned long klp_get_ftrace_location(unsigned long faddr) +{ + return faddr; +} +#endif + +static void klp_unpatch_func(struct klp_func *func) +{ + struct klp_ops *ops; + + if (WARN_ON(!func->patched)) + return; + if (WARN_ON(!func->old_addr)) + return; + + ops = klp_find_ops(func->old_addr); + if (WARN_ON(!ops)) + return; + + if (list_is_singular(&ops->func_stack)) { + unsigned long ftrace_loc; + + ftrace_loc = klp_get_ftrace_location(func->old_addr); + if (WARN_ON(!ftrace_loc)) + return; + + WARN_ON(unregister_ftrace_function(&ops->fops)); + WARN_ON(ftrace_set_filter_ip(&ops->fops, ftrace_loc, 1, 0)); + + list_del_rcu(&func->stack_node); + list_del(&ops->node); + kfree(ops); + } else { + list_del_rcu(&func->stack_node); + } + + func->patched = false; +} + +static int klp_patch_func(struct klp_func *func) +{ + struct klp_ops *ops; + int ret; + + if (WARN_ON(!func->old_addr)) + return -EINVAL; + + if (WARN_ON(func->patched)) + return -EINVAL; + + ops = klp_find_ops(func->old_addr); + if (!ops) { + unsigned long ftrace_loc; + + ftrace_loc = klp_get_ftrace_location(func->old_addr); + if (!ftrace_loc) { + pr_err("failed to find location for function '%s'\n", + func->old_name); + return -EINVAL; + } + + ops = kzalloc(sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + ops->fops.func = klp_ftrace_handler; + ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | + FTRACE_OPS_FL_DYNAMIC | + FTRACE_OPS_FL_IPMODIFY; + + list_add(&ops->node, &klp_ops); + + INIT_LIST_HEAD(&ops->func_stack); + list_add_rcu(&func->stack_node, &ops->func_stack); + + ret = ftrace_set_filter_ip(&ops->fops, ftrace_loc, 0, 0); + if (ret) { + pr_err("failed to set ftrace filter for function '%s' (%d)\n", + func->old_name, ret); + goto err; + } + + ret = register_ftrace_function(&ops->fops); + if (ret) { + pr_err("failed to register ftrace handler for function '%s' (%d)\n", + func->old_name, ret); + ftrace_set_filter_ip(&ops->fops, ftrace_loc, 1, 0); + goto err; + } + + + } else { + list_add_rcu(&func->stack_node, &ops->func_stack); + } + + func->patched = true; + + return 0; + +err: + list_del_rcu(&func->stack_node); + list_del(&ops->node); + kfree(ops); + return ret; +} + +void klp_unpatch_object(struct klp_object *obj) +{ + struct klp_func *func; + + klp_for_each_func(obj, func) + if (func->patched) + klp_unpatch_func(func); + + obj->patched = false; +} + +int klp_patch_object(struct klp_object *obj) +{ + struct klp_func *func; + int ret; + + if (WARN_ON(obj->patched)) + return -EINVAL; + + klp_for_each_func(obj, func) { + ret = klp_patch_func(func); + if (ret) { + klp_unpatch_object(obj); + return ret; + } + } + obj->patched = true; + + return 0; +} + +void klp_unpatch_objects(struct klp_patch *patch) +{ + struct klp_object *obj; + + klp_for_each_object(patch, obj) + if (obj->patched) + klp_unpatch_object(obj); +} diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h new file mode 100644 index 0000000000000000000000000000000000000000..0db227170c36a10de1c2774d06dd03aeadeeb07b --- /dev/null +++ b/kernel/livepatch/patch.h @@ -0,0 +1,33 @@ +#ifndef _LIVEPATCH_PATCH_H +#define _LIVEPATCH_PATCH_H + +#include +#include +#include + +/** + * struct klp_ops - structure for tracking registered ftrace ops structs + * + * A single ftrace_ops is shared between all enabled replacement functions + * (klp_func structs) which have the same old_addr. This allows the switch + * between function versions to happen instantaneously by updating the klp_ops + * struct's func_stack list. The winner is the klp_func at the top of the + * func_stack (front of the list). + * + * @node: node for the global klp_ops list + * @func_stack: list head for the stack of klp_func's (active func is on top) + * @fops: registered ftrace ops struct + */ +struct klp_ops { + struct list_head node; + struct list_head func_stack; + struct ftrace_ops fops; +}; + +struct klp_ops *klp_find_ops(unsigned long old_addr); + +int klp_patch_object(struct klp_object *obj); +void klp_unpatch_object(struct klp_object *obj); +void klp_unpatch_objects(struct klp_patch *patch); + +#endif /* _LIVEPATCH_PATCH_H */ diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c new file mode 100644 index 0000000000000000000000000000000000000000..adc0cc64aa4b6ae199e5f4a5b2af0ad59c382175 --- /dev/null +++ b/kernel/livepatch/transition.c @@ -0,0 +1,553 @@ +/* + * transition.c - Kernel Live Patching transition functions + * + * Copyright (C) 2015-2016 Josh Poimboeuf + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include "core.h" +#include "patch.h" +#include "transition.h" +#include "../sched/sched.h" + +#define MAX_STACK_ENTRIES 100 +#define STACK_ERR_BUF_SIZE 128 + +struct klp_patch *klp_transition_patch; + +static int klp_target_state = KLP_UNDEFINED; + +/* + * This work can be performed periodically to finish patching or unpatching any + * "straggler" tasks which failed to transition in the first attempt. + */ +static void klp_transition_work_fn(struct work_struct *work) +{ + mutex_lock(&klp_mutex); + + if (klp_transition_patch) + klp_try_complete_transition(); + + mutex_unlock(&klp_mutex); +} +static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn); + +/* + * The transition to the target patch state is complete. Clean up the data + * structures. + */ +static void klp_complete_transition(void) +{ + struct klp_object *obj; + struct klp_func *func; + struct task_struct *g, *task; + unsigned int cpu; + bool immediate_func = false; + + if (klp_target_state == KLP_UNPATCHED) { + /* + * All tasks have transitioned to KLP_UNPATCHED so we can now + * remove the new functions from the func_stack. + */ + klp_unpatch_objects(klp_transition_patch); + + /* + * Make sure klp_ftrace_handler() can no longer see functions + * from this patch on the ops->func_stack. Otherwise, after + * func->transition gets cleared, the handler may choose a + * removed function. + */ + synchronize_rcu(); + } + + if (klp_transition_patch->immediate) + goto done; + + klp_for_each_object(klp_transition_patch, obj) { + klp_for_each_func(obj, func) { + func->transition = false; + if (func->immediate) + immediate_func = true; + } + } + + if (klp_target_state == KLP_UNPATCHED && !immediate_func) + module_put(klp_transition_patch->mod); + + /* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */ + if (klp_target_state == KLP_PATCHED) + synchronize_rcu(); + + read_lock(&tasklist_lock); + for_each_process_thread(g, task) { + WARN_ON_ONCE(test_tsk_thread_flag(task, TIF_PATCH_PENDING)); + task->patch_state = KLP_UNDEFINED; + } + read_unlock(&tasklist_lock); + + for_each_possible_cpu(cpu) { + task = idle_task(cpu); + WARN_ON_ONCE(test_tsk_thread_flag(task, TIF_PATCH_PENDING)); + task->patch_state = KLP_UNDEFINED; + } + +done: + klp_target_state = KLP_UNDEFINED; + klp_transition_patch = NULL; +} + +/* + * This is called in the error path, to cancel a transition before it has + * started, i.e. klp_init_transition() has been called but + * klp_start_transition() hasn't. If the transition *has* been started, + * klp_reverse_transition() should be used instead. + */ +void klp_cancel_transition(void) +{ + if (WARN_ON_ONCE(klp_target_state != KLP_PATCHED)) + return; + + klp_target_state = KLP_UNPATCHED; + klp_complete_transition(); +} + +/* + * Switch the patched state of the task to the set of functions in the target + * patch state. + * + * NOTE: If task is not 'current', the caller must ensure the task is inactive. + * Otherwise klp_ftrace_handler() might read the wrong 'patch_state' value. + */ +void klp_update_patch_state(struct task_struct *task) +{ + rcu_read_lock(); + + /* + * This test_and_clear_tsk_thread_flag() call also serves as a read + * barrier (smp_rmb) for two cases: + * + * 1) Enforce the order of the TIF_PATCH_PENDING read and the + * klp_target_state read. The corresponding write barrier is in + * klp_init_transition(). + * + * 2) Enforce the order of the TIF_PATCH_PENDING read and a future read + * of func->transition, if klp_ftrace_handler() is called later on + * the same CPU. See __klp_disable_patch(). + */ + if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING)) + task->patch_state = READ_ONCE(klp_target_state); + + rcu_read_unlock(); +} + +/* + * Determine whether the given stack trace includes any references to a + * to-be-patched or to-be-unpatched function. + */ +static int klp_check_stack_func(struct klp_func *func, + struct stack_trace *trace) +{ + unsigned long func_addr, func_size, address; + struct klp_ops *ops; + int i; + + if (func->immediate) + return 0; + + for (i = 0; i < trace->nr_entries; i++) { + address = trace->entries[i]; + + if (klp_target_state == KLP_UNPATCHED) { + /* + * Check for the to-be-unpatched function + * (the func itself). + */ + func_addr = (unsigned long)func->new_func; + func_size = func->new_size; + } else { + /* + * Check for the to-be-patched function + * (the previous func). + */ + ops = klp_find_ops(func->old_addr); + + if (list_is_singular(&ops->func_stack)) { + /* original function */ + func_addr = func->old_addr; + func_size = func->old_size; + } else { + /* previously patched function */ + struct klp_func *prev; + + prev = list_next_entry(func, stack_node); + func_addr = (unsigned long)prev->new_func; + func_size = prev->new_size; + } + } + + if (address >= func_addr && address < func_addr + func_size) + return -EAGAIN; + } + + return 0; +} + +/* + * Determine whether it's safe to transition the task to the target patch state + * by looking for any to-be-patched or to-be-unpatched functions on its stack. + */ +static int klp_check_stack(struct task_struct *task, char *err_buf) +{ + static unsigned long entries[MAX_STACK_ENTRIES]; + struct stack_trace trace; + struct klp_object *obj; + struct klp_func *func; + int ret; + + trace.skip = 0; + trace.nr_entries = 0; + trace.max_entries = MAX_STACK_ENTRIES; + trace.entries = entries; + ret = save_stack_trace_tsk_reliable(task, &trace); + WARN_ON_ONCE(ret == -ENOSYS); + if (ret) { + snprintf(err_buf, STACK_ERR_BUF_SIZE, + "%s: %s:%d has an unreliable stack\n", + __func__, task->comm, task->pid); + return ret; + } + + klp_for_each_object(klp_transition_patch, obj) { + if (!obj->patched) + continue; + klp_for_each_func(obj, func) { + ret = klp_check_stack_func(func, &trace); + if (ret) { + snprintf(err_buf, STACK_ERR_BUF_SIZE, + "%s: %s:%d is sleeping on function %s\n", + __func__, task->comm, task->pid, + func->old_name); + return ret; + } + } + } + + return 0; +} + +/* + * Try to safely switch a task to the target patch state. If it's currently + * running, or it's sleeping on a to-be-patched or to-be-unpatched function, or + * if the stack is unreliable, return false. + */ +static bool klp_try_switch_task(struct task_struct *task) +{ + struct rq *rq; + struct rq_flags flags; + int ret; + bool success = false; + char err_buf[STACK_ERR_BUF_SIZE]; + + err_buf[0] = '\0'; + + /* check if this task has already switched over */ + if (task->patch_state == klp_target_state) + return true; + + /* + * For arches which don't have reliable stack traces, we have to rely + * on other methods (e.g., switching tasks at kernel exit). + */ + if (!klp_have_reliable_stack()) + return false; + + /* + * Now try to check the stack for any to-be-patched or to-be-unpatched + * functions. If all goes well, switch the task to the target patch + * state. + */ + rq = task_rq_lock(task, &flags); + + if (task_running(rq, task) && task != current) { + snprintf(err_buf, STACK_ERR_BUF_SIZE, + "%s: %s:%d is running\n", __func__, task->comm, + task->pid); + goto done; + } + + ret = klp_check_stack(task, err_buf); + if (ret) + goto done; + + success = true; + + clear_tsk_thread_flag(task, TIF_PATCH_PENDING); + task->patch_state = klp_target_state; + +done: + task_rq_unlock(rq, task, &flags); + + /* + * Due to console deadlock issues, pr_debug() can't be used while + * holding the task rq lock. Instead we have to use a temporary buffer + * and print the debug message after releasing the lock. + */ + if (err_buf[0] != '\0') + pr_debug("%s", err_buf); + + return success; + +} + +/* + * Try to switch all remaining tasks to the target patch state by walking the + * stacks of sleeping tasks and looking for any to-be-patched or + * to-be-unpatched functions. If such functions are found, the task can't be + * switched yet. + * + * If any tasks are still stuck in the initial patch state, schedule a retry. + */ +void klp_try_complete_transition(void) +{ + unsigned int cpu; + struct task_struct *g, *task; + bool complete = true; + + WARN_ON_ONCE(klp_target_state == KLP_UNDEFINED); + + /* + * If the patch can be applied or reverted immediately, skip the + * per-task transitions. + */ + if (klp_transition_patch->immediate) + goto success; + + /* + * Try to switch the tasks to the target patch state by walking their + * stacks and looking for any to-be-patched or to-be-unpatched + * functions. If such functions are found on a stack, or if the stack + * is deemed unreliable, the task can't be switched yet. + * + * Usually this will transition most (or all) of the tasks on a system + * unless the patch includes changes to a very common function. + */ + read_lock(&tasklist_lock); + for_each_process_thread(g, task) + if (!klp_try_switch_task(task)) + complete = false; + read_unlock(&tasklist_lock); + + /* + * Ditto for the idle "swapper" tasks. + */ + get_online_cpus(); + for_each_possible_cpu(cpu) { + task = idle_task(cpu); + if (cpu_online(cpu)) { + if (!klp_try_switch_task(task)) + complete = false; + } else if (task->patch_state != klp_target_state) { + /* offline idle tasks can be switched immediately */ + clear_tsk_thread_flag(task, TIF_PATCH_PENDING); + task->patch_state = klp_target_state; + } + } + put_online_cpus(); + + if (!complete) { + /* + * Some tasks weren't able to be switched over. Try again + * later and/or wait for other methods like kernel exit + * switching. + */ + schedule_delayed_work(&klp_transition_work, + round_jiffies_relative(HZ)); + return; + } + +success: + pr_notice("'%s': %s complete\n", klp_transition_patch->mod->name, + klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); + + /* we're done, now cleanup the data structures */ + klp_complete_transition(); +} + +/* + * Start the transition to the specified target patch state so tasks can begin + * switching to it. + */ +void klp_start_transition(void) +{ + struct task_struct *g, *task; + unsigned int cpu; + + WARN_ON_ONCE(klp_target_state == KLP_UNDEFINED); + + pr_notice("'%s': %s...\n", klp_transition_patch->mod->name, + klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); + + /* + * If the patch can be applied or reverted immediately, skip the + * per-task transitions. + */ + if (klp_transition_patch->immediate) + return; + + /* + * Mark all normal tasks as needing a patch state update. They'll + * switch either in klp_try_complete_transition() or as they exit the + * kernel. + */ + read_lock(&tasklist_lock); + for_each_process_thread(g, task) + if (task->patch_state != klp_target_state) + set_tsk_thread_flag(task, TIF_PATCH_PENDING); + read_unlock(&tasklist_lock); + + /* + * Mark all idle tasks as needing a patch state update. They'll switch + * either in klp_try_complete_transition() or at the idle loop switch + * point. + */ + for_each_possible_cpu(cpu) { + task = idle_task(cpu); + if (task->patch_state != klp_target_state) + set_tsk_thread_flag(task, TIF_PATCH_PENDING); + } +} + +/* + * Initialize the global target patch state and all tasks to the initial patch + * state, and initialize all function transition states to true in preparation + * for patching or unpatching. + */ +void klp_init_transition(struct klp_patch *patch, int state) +{ + struct task_struct *g, *task; + unsigned int cpu; + struct klp_object *obj; + struct klp_func *func; + int initial_state = !state; + + WARN_ON_ONCE(klp_target_state != KLP_UNDEFINED); + + klp_transition_patch = patch; + + /* + * Set the global target patch state which tasks will switch to. This + * has no effect until the TIF_PATCH_PENDING flags get set later. + */ + klp_target_state = state; + + /* + * If the patch can be applied or reverted immediately, skip the + * per-task transitions. + */ + if (patch->immediate) + return; + + /* + * Initialize all tasks to the initial patch state to prepare them for + * switching to the target state. + */ + read_lock(&tasklist_lock); + for_each_process_thread(g, task) { + WARN_ON_ONCE(task->patch_state != KLP_UNDEFINED); + task->patch_state = initial_state; + } + read_unlock(&tasklist_lock); + + /* + * Ditto for the idle "swapper" tasks. + */ + for_each_possible_cpu(cpu) { + task = idle_task(cpu); + WARN_ON_ONCE(task->patch_state != KLP_UNDEFINED); + task->patch_state = initial_state; + } + + /* + * Enforce the order of the task->patch_state initializations and the + * func->transition updates to ensure that klp_ftrace_handler() doesn't + * see a func in transition with a task->patch_state of KLP_UNDEFINED. + * + * Also enforce the order of the klp_target_state write and future + * TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't + * set a task->patch_state to KLP_UNDEFINED. + */ + smp_wmb(); + + /* + * Set the func transition states so klp_ftrace_handler() will know to + * switch to the transition logic. + * + * When patching, the funcs aren't yet in the func_stack and will be + * made visible to the ftrace handler shortly by the calls to + * klp_patch_object(). + * + * When unpatching, the funcs are already in the func_stack and so are + * already visible to the ftrace handler. + */ + klp_for_each_object(patch, obj) + klp_for_each_func(obj, func) + func->transition = true; +} + +/* + * This function can be called in the middle of an existing transition to + * reverse the direction of the target patch state. This can be done to + * effectively cancel an existing enable or disable operation if there are any + * tasks which are stuck in the initial patch state. + */ +void klp_reverse_transition(void) +{ + unsigned int cpu; + struct task_struct *g, *task; + + klp_transition_patch->enabled = !klp_transition_patch->enabled; + + klp_target_state = !klp_target_state; + + /* + * Clear all TIF_PATCH_PENDING flags to prevent races caused by + * klp_update_patch_state() running in parallel with + * klp_start_transition(). + */ + read_lock(&tasklist_lock); + for_each_process_thread(g, task) + clear_tsk_thread_flag(task, TIF_PATCH_PENDING); + read_unlock(&tasklist_lock); + + for_each_possible_cpu(cpu) + clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING); + + /* Let any remaining calls to klp_update_patch_state() complete */ + synchronize_rcu(); + + klp_start_transition(); +} + +/* Called from copy_process() during fork */ +void klp_copy_process(struct task_struct *child) +{ + child->patch_state = current->patch_state; + + /* TIF_PATCH_PENDING gets copied in setup_thread_stack() */ +} diff --git a/kernel/livepatch/transition.h b/kernel/livepatch/transition.h new file mode 100644 index 0000000000000000000000000000000000000000..ce09b326546cd3bf71f6e0a300bfd865eb1677ca --- /dev/null +++ b/kernel/livepatch/transition.h @@ -0,0 +1,14 @@ +#ifndef _LIVEPATCH_TRANSITION_H +#define _LIVEPATCH_TRANSITION_H + +#include + +extern struct klp_patch *klp_transition_patch; + +void klp_init_transition(struct klp_patch *patch, int state); +void klp_cancel_transition(void); +void klp_start_transition(void); +void klp_try_complete_transition(void); +void klp_reverse_transition(void); + +#endif /* _LIVEPATCH_TRANSITION_H */ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 34e97d970761671a804a9ba2dede77809f42c155..c0e31bfee25c2ab0cfefef14b9e24e2ce8a5c523 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -660,6 +661,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; + bool is_static = false; if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { debug_locks_off(); @@ -673,10 +675,23 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) /* * Static locks do not have their class-keys yet - for them the key - * is the lock object itself: + * is the lock object itself. If the lock is in the per cpu area, + * the canonical address of the lock (per cpu offset removed) is + * used. */ - if (unlikely(!lock->key)) - lock->key = (void *)lock; + if (unlikely(!lock->key)) { + unsigned long can_addr, addr = (unsigned long)lock; + + if (__is_kernel_percpu_address(addr, &can_addr)) + lock->key = (void *)can_addr; + else if (__is_module_percpu_address(addr, &can_addr)) + lock->key = (void *)can_addr; + else if (static_obj(lock)) + lock->key = (void *)lock; + else + return ERR_PTR(-EINVAL); + is_static = true; + } /* * NOTE: the class-key must be unique. For dynamic locks, a static @@ -708,7 +723,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) } } - return NULL; + return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); } /* @@ -726,19 +741,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) DEBUG_LOCKS_WARN_ON(!irqs_disabled()); class = look_up_lock_class(lock, subclass); - if (likely(class)) + if (likely(!IS_ERR_OR_NULL(class))) goto out_set_class_cache; /* * Debug-check: all keys must be persistent! - */ - if (!static_obj(lock->key)) { + */ + if (IS_ERR(class)) { debug_locks_off(); printk("INFO: trying to register non-static key.\n"); printk("the code is fine but needs lockdep annotation.\n"); printk("turning off the locking correctness validator.\n"); dump_stack(); - return NULL; } @@ -1144,10 +1158,10 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, return 0; printk("\n"); - printk("======================================================\n"); - printk("[ INFO: possible circular locking dependency detected ]\n"); + pr_warn("======================================================\n"); + pr_warn("WARNING: possible circular locking dependency detected\n"); print_kernel_ident(); - printk("-------------------------------------------------------\n"); + pr_warn("------------------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(check_src); @@ -1482,11 +1496,11 @@ print_bad_irq_dependency(struct task_struct *curr, return 0; printk("\n"); - printk("======================================================\n"); - printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + pr_warn("=====================================================\n"); + pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", irqclass, irqclass); print_kernel_ident(); - printk("------------------------------------------------------\n"); + pr_warn("-----------------------------------------------------\n"); printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, @@ -1711,10 +1725,10 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, return 0; printk("\n"); - printk("=============================================\n"); - printk("[ INFO: possible recursive locking detected ]\n"); + pr_warn("============================================\n"); + pr_warn("WARNING: possible recursive locking detected\n"); print_kernel_ident(); - printk("---------------------------------------------\n"); + pr_warn("--------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(next); @@ -2061,10 +2075,10 @@ static void print_collision(struct task_struct *curr, struct lock_chain *chain) { printk("\n"); - printk("======================\n"); - printk("[chain_key collision ]\n"); + pr_warn("============================\n"); + pr_warn("WARNING: chain_key collision\n"); print_kernel_ident(); - printk("----------------------\n"); + pr_warn("----------------------------\n"); printk("%s/%d: ", current->comm, task_pid_nr(current)); printk("Hash chain already cached but the contents don't match!\n"); @@ -2360,10 +2374,10 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, return 0; printk("\n"); - printk("=================================\n"); - printk("[ INFO: inconsistent lock state ]\n"); + pr_warn("================================\n"); + pr_warn("WARNING: inconsistent lock state\n"); print_kernel_ident(); - printk("---------------------------------\n"); + pr_warn("--------------------------------\n"); printk("inconsistent {%s} -> {%s} usage.\n", usage_str[prev_bit], usage_str[new_bit]); @@ -2425,10 +2439,10 @@ print_irq_inversion_bug(struct task_struct *curr, return 0; printk("\n"); - printk("=========================================================\n"); - printk("[ INFO: possible irq lock inversion dependency detected ]\n"); + pr_warn("========================================================\n"); + pr_warn("WARNING: possible irq lock inversion dependency detected\n"); print_kernel_ident(); - printk("---------------------------------------------------------\n"); + pr_warn("--------------------------------------------------------\n"); printk("%s/%d just changed the state of lock:\n", curr->comm, task_pid_nr(curr)); print_lock(this); @@ -2863,6 +2877,8 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) if (unlikely(!debug_locks)) return; + gfp_mask = current_gfp_context(gfp_mask); + /* no reclaim without waiting on it */ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) return; @@ -2872,7 +2888,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) return; /* We're only interested __GFP_FS allocations for now */ - if (!(gfp_mask & __GFP_FS)) + if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS)) return; /* @@ -2881,6 +2897,10 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) return; + /* Disable lockdep if explicitly requested */ + if (gfp_mask & __GFP_NOLOCKDEP) + return; + mark_held_locks(curr, RECLAIM_FS); } @@ -3170,10 +3190,10 @@ print_lock_nested_lock_not_held(struct task_struct *curr, return 0; printk("\n"); - printk("==================================\n"); - printk("[ BUG: Nested lock was not taken ]\n"); + pr_warn("==================================\n"); + pr_warn("WARNING: Nested lock was not taken\n"); print_kernel_ident(); - printk("----------------------------------\n"); + pr_warn("----------------------------------\n"); printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); print_lock(hlock); @@ -3383,10 +3403,10 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, return 0; printk("\n"); - printk("=====================================\n"); - printk("[ BUG: bad unlock balance detected! ]\n"); + pr_warn("=====================================\n"); + pr_warn("WARNING: bad unlock balance detected!\n"); print_kernel_ident(); - printk("-------------------------------------\n"); + pr_warn("-------------------------------------\n"); printk("%s/%d is trying to release lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3419,7 +3439,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) * Clearly if the lock hasn't been acquired _ever_, we're not * holding it either, so report failure. */ - if (!class) + if (IS_ERR_OR_NULL(class)) return 0; /* @@ -3437,13 +3457,67 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) return 0; } +/* @depth must not be zero */ +static struct held_lock *find_held_lock(struct task_struct *curr, + struct lockdep_map *lock, + unsigned int depth, int *idx) +{ + struct held_lock *ret, *hlock, *prev_hlock; + int i; + + i = depth - 1; + hlock = curr->held_locks + i; + ret = hlock; + if (match_held_lock(hlock, lock)) + goto out; + + ret = NULL; + for (i--, prev_hlock = hlock--; + i >= 0; + i--, prev_hlock = hlock--) { + /* + * We must not cross into another context: + */ + if (prev_hlock->irq_context != hlock->irq_context) { + ret = NULL; + break; + } + if (match_held_lock(hlock, lock)) { + ret = hlock; + break; + } + } + +out: + *idx = i; + return ret; +} + +static int reacquire_held_locks(struct task_struct *curr, unsigned int depth, + int idx) +{ + struct held_lock *hlock; + + for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { + if (!__lock_acquire(hlock->instance, + hlock_class(hlock)->subclass, + hlock->trylock, + hlock->read, hlock->check, + hlock->hardirqs_off, + hlock->nest_lock, hlock->acquire_ip, + hlock->references, hlock->pin_count)) + return 1; + } + return 0; +} + static int __lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip) { struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; + struct held_lock *hlock; struct lock_class *class; unsigned int depth; int i; @@ -3456,21 +3530,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name, if (DEBUG_LOCKS_WARN_ON(!depth)) return 0; - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; - } - return print_unlock_imbalance_bug(curr, lock, ip); + hlock = find_held_lock(curr, lock, depth, &i); + if (!hlock) + return print_unlock_imbalance_bug(curr, lock, ip); -found_it: lockdep_init_map(lock, name, key, 0); class = register_lock_class(lock, subclass, 0); hlock->class_idx = class - lock_classes + 1; @@ -3478,15 +3541,46 @@ __lock_set_class(struct lockdep_map *lock, const char *name, curr->lockdep_depth = i; curr->curr_chain_key = hlock->prev_chain_key; - for (; i < depth; i++) { - hlock = curr->held_locks + i; - if (!__lock_acquire(hlock->instance, - hlock_class(hlock)->subclass, hlock->trylock, - hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip, - hlock->references, hlock->pin_count)) - return 0; - } + if (reacquire_held_locks(curr, depth, i)) + return 0; + + /* + * I took it apart and put it back together again, except now I have + * these 'spare' parts.. where shall I put them. + */ + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth)) + return 0; + return 1; +} + +static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock; + unsigned int depth; + int i; + + depth = curr->lockdep_depth; + /* + * This function is about (re)setting the class of a held lock, + * yet we're not actually holding any locks. Naughty user! + */ + if (DEBUG_LOCKS_WARN_ON(!depth)) + return 0; + + hlock = find_held_lock(curr, lock, depth, &i); + if (!hlock) + return print_unlock_imbalance_bug(curr, lock, ip); + + curr->lockdep_depth = i; + curr->curr_chain_key = hlock->prev_chain_key; + + WARN(hlock->read, "downgrading a read lock"); + hlock->read = 1; + hlock->acquire_ip = ip; + + if (reacquire_held_locks(curr, depth, i)) + return 0; /* * I took it apart and put it back together again, except now I have @@ -3508,7 +3602,7 @@ static int __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; + struct held_lock *hlock; unsigned int depth; int i; @@ -3527,21 +3621,10 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) * Check whether the lock exists in the current stack * of held locks: */ - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; - } - return print_unlock_imbalance_bug(curr, lock, ip); + hlock = find_held_lock(curr, lock, depth, &i); + if (!hlock) + return print_unlock_imbalance_bug(curr, lock, ip); -found_it: if (hlock->instance == lock) lock_release_holdtime(hlock); @@ -3568,15 +3651,8 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) curr->lockdep_depth = i; curr->curr_chain_key = hlock->prev_chain_key; - for (i++; i < depth; i++) { - hlock = curr->held_locks + i; - if (!__lock_acquire(hlock->instance, - hlock_class(hlock)->subclass, hlock->trylock, - hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip, - hlock->references, hlock->pin_count)) - return 0; - } + if (reacquire_held_locks(curr, depth, i + 1)) + return 0; /* * We had N bottles of beer on the wall, we drank one, but now @@ -3741,6 +3817,23 @@ void lock_set_class(struct lockdep_map *lock, const char *name, } EXPORT_SYMBOL_GPL(lock_set_class); +void lock_downgrade(struct lockdep_map *lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + current->lockdep_recursion = 1; + check_flags(flags); + if (__lock_downgrade(lock, ip)) + check_chain_key(current); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_downgrade); + /* * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: @@ -3861,7 +3954,7 @@ EXPORT_SYMBOL_GPL(lock_unpin_lock); void lockdep_set_current_reclaim_state(gfp_t gfp_mask) { - current->lockdep_reclaim_gfp = gfp_mask; + current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask); } EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state); @@ -3882,10 +3975,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, return 0; printk("\n"); - printk("=================================\n"); - printk("[ BUG: bad contention detected! ]\n"); + pr_warn("=================================\n"); + pr_warn("WARNING: bad contention detected!\n"); print_kernel_ident(); - printk("---------------------------------\n"); + pr_warn("---------------------------------\n"); printk("%s/%d is trying to contend lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3905,7 +3998,7 @@ static void __lock_contended(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; + struct held_lock *hlock; struct lock_class_stats *stats; unsigned int depth; int i, contention_point, contending_point; @@ -3918,22 +4011,12 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; + hlock = find_held_lock(curr, lock, depth, &i); + if (!hlock) { + print_lock_contention_bug(curr, lock, ip); + return; } - print_lock_contention_bug(curr, lock, ip); - return; -found_it: if (hlock->instance != lock) return; @@ -3957,7 +4040,7 @@ static void __lock_acquired(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; - struct held_lock *hlock, *prev_hlock; + struct held_lock *hlock; struct lock_class_stats *stats; unsigned int depth; u64 now, waittime = 0; @@ -3971,22 +4054,12 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; - prev_hlock = NULL; - for (i = depth-1; i >= 0; i--) { - hlock = curr->held_locks + i; - /* - * We must not cross into another context: - */ - if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) - break; - if (match_held_lock(hlock, lock)) - goto found_it; - prev_hlock = hlock; + hlock = find_held_lock(curr, lock, depth, &i); + if (!hlock) { + print_lock_contention_bug(curr, lock, _RET_IP_); + return; } - print_lock_contention_bug(curr, lock, _RET_IP_); - return; -found_it: if (hlock->instance != lock) return; @@ -4174,7 +4247,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) * If the class exists we look it up and zap it: */ class = look_up_lock_class(lock, j); - if (class) + if (!IS_ERR_OR_NULL(class)) zap_class(class); } /* @@ -4246,10 +4319,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, return; printk("\n"); - printk("=========================\n"); - printk("[ BUG: held lock freed! ]\n"); + pr_warn("=========================\n"); + pr_warn("WARNING: held lock freed!\n"); print_kernel_ident(); - printk("-------------------------\n"); + pr_warn("-------------------------\n"); printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", curr->comm, task_pid_nr(curr), mem_from, mem_to-1); print_lock(hlock); @@ -4304,11 +4377,11 @@ static void print_held_locks_bug(void) return; printk("\n"); - printk("=====================================\n"); - printk("[ BUG: %s/%d still has locks held! ]\n", + pr_warn("====================================\n"); + pr_warn("WARNING: %s/%d still has locks held!\n", current->comm, task_pid_nr(current)); print_kernel_ident(); - printk("-------------------------------------\n"); + pr_warn("------------------------------------\n"); lockdep_print_held_locks(current); printk("\nstack backtrace:\n"); dump_stack(); @@ -4373,7 +4446,7 @@ void debug_show_all_locks(void) } while_each_thread(g, p); printk("\n"); - printk("=============================================\n\n"); + pr_warn("=============================================\n\n"); if (unlock) read_unlock(&tasklist_lock); @@ -4403,10 +4476,10 @@ asmlinkage __visible void lockdep_sys_exit(void) if (!debug_locks_off()) return; printk("\n"); - printk("================================================\n"); - printk("[ BUG: lock held when returning to user space! ]\n"); + pr_warn("================================================\n"); + pr_warn("WARNING: lock held when returning to user space!\n"); print_kernel_ident(); - printk("------------------------------------------------\n"); + pr_warn("------------------------------------------------\n"); printk("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); @@ -4423,13 +4496,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ printk("\n"); - pr_err("===============================\n"); - pr_err("[ ERR: suspicious RCU usage. ]\n"); + pr_warn("=============================\n"); + pr_warn("WARNING: suspicious RCU usage\n"); print_kernel_ident(); - pr_err("-------------------------------\n"); - pr_err("%s:%d %s!\n", file, line, s); - pr_err("\nother info that might help us debug this:\n\n"); - pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n", + pr_warn("-----------------------------\n"); + printk("%s:%d %s!\n", file, line, s); + printk("\nother info that might help us debug this:\n\n"); + printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" : !rcu_is_watching() diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index c2b88490d857583026a35090b62f7891446b7ba2..c08fbd2f5ba9fa2a806f326a3a85f5d021d74027 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -46,13 +46,13 @@ enum { (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) /* - * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text, * .data and .bss to fit in required 32MB limit for the kernel. With - * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * CONFIG_LOCKDEP we could go over this limit and cause system boot-up problems. * So, reduce the static allocations for lockdeps related structures so that * everything fits in current required size limit. */ -#ifdef CONFIG_PROVE_LOCKING_SMALL +#ifdef CONFIG_LOCKDEP_SMALL /* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 97ee9df32e0f0395dc79dc0236557bd1103047c5..58e366ad36f4e9bf87bf487966a91130feb4f7d7 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -102,10 +102,11 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) return; } - printk("\n============================================\n"); - printk( "[ BUG: circular locking deadlock detected! ]\n"); - printk("%s\n", print_tainted()); - printk( "--------------------------------------------\n"); + pr_warn("\n"); + pr_warn("============================================\n"); + pr_warn("WARNING: circular locking deadlock detected!\n"); + pr_warn("%s\n", print_tainted()); + pr_warn("--------------------------------------------\n"); printk("%s/%d is deadlocking current task %s/%d\n\n", task->comm, task_pid_nr(task), current->comm, task_pid_nr(current)); @@ -174,12 +175,3 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) lock->name = name; } -void -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) -{ -} - -void rt_mutex_deadlock_account_unlock(struct task_struct *task) -{ -} - diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h index d0519c3432b678b8c5de116b939cde12fa6db152..b585af9a1b508ea28007a05f4804606c336f14bd 100644 --- a/kernel/locking/rtmutex-debug.h +++ b/kernel/locking/rtmutex-debug.h @@ -9,9 +9,6 @@ * This file contains macros used solely by rtmutex.c. Debug version. */ -extern void -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); -extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6edc32ecd9c54446e0bfbe692c407afa07433e6c..28cd09e635ed669fe6c93b28eb1d59e46bbf4615 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -224,6 +224,12 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, } #endif +/* + * Only use with rt_mutex_waiter_{less,equal}() + */ +#define task_to_waiter(p) \ + &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } + static inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, struct rt_mutex_waiter *right) @@ -238,12 +244,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left, * then right waiter has a dl_prio() too. */ if (dl_prio(left->prio)) - return dl_time_before(left->task->dl.deadline, - right->task->dl.deadline); + return dl_time_before(left->deadline, right->deadline); return 0; } +static inline int +rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + struct rt_mutex_waiter *right) +{ + if (left->prio != right->prio) + return 0; + + /* + * If both waiters have dl_prio(), we check the deadlines of the + * associated tasks. + * If left waiter has a dl_prio(), and we didn't return 0 above, + * then right waiter has a dl_prio() too. + */ + if (dl_prio(left->prio)) + return left->deadline == right->deadline; + + return 1; +} + static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { @@ -322,72 +346,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) RB_CLEAR_NODE(&waiter->pi_tree_entry); } -/* - * Calculate task priority from the waiter tree priority - * - * Return task->normal_prio when the waiter tree is empty or when - * the waiter is not allowed to do priority boosting - */ -int rt_mutex_getprio(struct task_struct *task) +static void rt_mutex_adjust_prio(struct task_struct *p) { - if (likely(!task_has_pi_waiters(task))) - return task->normal_prio; + struct task_struct *pi_task = NULL; - return min(task_top_pi_waiter(task)->prio, - task->normal_prio); -} + lockdep_assert_held(&p->pi_lock); -struct task_struct *rt_mutex_get_top_task(struct task_struct *task) -{ - if (likely(!task_has_pi_waiters(task))) - return NULL; + if (task_has_pi_waiters(p)) + pi_task = task_top_pi_waiter(p)->task; - return task_top_pi_waiter(task)->task; -} - -/* - * Called by sched_setscheduler() to get the priority which will be - * effective after the change. - */ -int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) -{ - if (!task_has_pi_waiters(task)) - return newprio; - - if (task_top_pi_waiter(task)->task->prio <= newprio) - return task_top_pi_waiter(task)->task->prio; - return newprio; -} - -/* - * Adjust the priority of a task, after its pi_waiters got modified. - * - * This can be both boosting and unboosting. task->pi_lock must be held. - */ -static void __rt_mutex_adjust_prio(struct task_struct *task) -{ - int prio = rt_mutex_getprio(task); - - if (task->prio != prio || dl_prio(prio)) - rt_mutex_setprio(task, prio); -} - -/* - * Adjust task priority (undo boosting). Called from the exit path of - * rt_mutex_slowunlock() and rt_mutex_slowlock(). - * - * (Note: We do this outside of the protection of lock->wait_lock to - * allow the lock to be taken while or before we readjust the priority - * of task. We do not use the spin_xx_mutex() variants here as we are - * outside of the debug path.) - */ -void rt_mutex_adjust_prio(struct task_struct *task) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - __rt_mutex_adjust_prio(task); - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + rt_mutex_setprio(p, pi_task); } /* @@ -610,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * enabled we continue, but stop the requeueing in the chain * walk. */ - if (waiter->prio == task->prio) { + if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { if (!detect_deadlock) goto out_unlock_pi; else @@ -706,7 +674,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, /* [7] Requeue the waiter in the lock waiter tree. */ rt_mutex_dequeue(lock, waiter); + + /* + * Update the waiter prio fields now that we're dequeued. + * + * These values can have changed through either: + * + * sys_sched_set_scheduler() / sys_sched_setattr() + * + * or + * + * DL CBS enforcement advancing the effective deadline. + * + * Even though pi_waiters also uses these fields, and that tree is only + * updated in [11], we can do this here, since we hold [L], which + * serializes all pi_waiters access and rb_erase() does not care about + * the values of the node being removed. + */ waiter->prio = task->prio; + waiter->deadline = task->dl.deadline; + rt_mutex_enqueue(lock, waiter); /* [8] Release the task */ @@ -747,7 +734,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, */ rt_mutex_dequeue_pi(task, prerequeue_top_waiter); rt_mutex_enqueue_pi(task, waiter); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); } else if (prerequeue_top_waiter == waiter) { /* @@ -763,7 +750,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, rt_mutex_dequeue_pi(task, waiter); waiter = rt_mutex_top_waiter(lock); rt_mutex_enqueue_pi(task, waiter); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); } else { /* * Nothing changed. No need to do any priority @@ -833,6 +820,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) { + lockdep_assert_held(&lock->wait_lock); + /* * Before testing whether we can acquire @lock, we set the * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all @@ -892,7 +881,8 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, * the top waiter priority (kernel view), * @task lost. */ - if (task->prio >= rt_mutex_top_waiter(lock)->prio) + if (!rt_mutex_waiter_less(task_to_waiter(task), + rt_mutex_top_waiter(lock))) return 0; /* @@ -938,8 +928,6 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, */ rt_mutex_set_owner(lock, task); - rt_mutex_deadlock_account_lock(lock, task); - return 1; } @@ -960,6 +948,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex *next_lock; int chain_walk = 0, res; + lockdep_assert_held(&lock->wait_lock); + /* * Early deadlock detection. We really don't want the task to * enqueue on itself just to untangle the mess later. It's not @@ -973,10 +963,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, return -EDEADLK; raw_spin_lock(&task->pi_lock); - __rt_mutex_adjust_prio(task); + rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; waiter->prio = task->prio; + waiter->deadline = task->dl.deadline; /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) @@ -995,7 +986,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, rt_mutex_dequeue_pi(owner, top_waiter); rt_mutex_enqueue_pi(owner, waiter); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); if (owner->pi_blocked_on) chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { @@ -1047,12 +1038,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, waiter = rt_mutex_top_waiter(lock); /* - * Remove it from current->pi_waiters. We do not adjust a - * possible priority boost right now. We execute wakeup in the - * boosted mode and go back to normal after releasing - * lock->wait_lock. + * Remove it from current->pi_waiters and deboost. + * + * We must in fact deboost here in order to ensure we call + * rt_mutex_setprio() to update p->pi_top_task before the + * task unblocks. */ rt_mutex_dequeue_pi(current, waiter); + rt_mutex_adjust_prio(current); /* * As we are waking up the top waiter, and the waiter stays @@ -1064,9 +1057,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, */ lock->owner = (void *) RT_MUTEX_HAS_WAITERS; - raw_spin_unlock(¤t->pi_lock); - + /* + * We deboosted before waking the top waiter task such that we don't + * run two tasks with the 'same' priority (and ensure the + * p->pi_top_task pointer points to a blocked task). This however can + * lead to priority inversion if we would get preempted after the + * deboost but before waking our donor task, hence the preempt_disable() + * before unlock. + * + * Pairs with preempt_enable() in rt_mutex_postunlock(); + */ + preempt_disable(); wake_q_add(wake_q, waiter->task); + raw_spin_unlock(¤t->pi_lock); } /* @@ -1082,6 +1085,8 @@ static void remove_waiter(struct rt_mutex *lock, struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex *next_lock; + lockdep_assert_held(&lock->wait_lock); + raw_spin_lock(¤t->pi_lock); rt_mutex_dequeue(lock, waiter); current->pi_blocked_on = NULL; @@ -1101,7 +1106,7 @@ static void remove_waiter(struct rt_mutex *lock, if (rt_mutex_has_waiters(lock)) rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); - __rt_mutex_adjust_prio(owner); + rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ next_lock = task_blocked_on_lock(owner); @@ -1140,8 +1145,7 @@ void rt_mutex_adjust_pi(struct task_struct *task) raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; - if (!waiter || (waiter->prio == task->prio && - !dl_prio(task->prio))) { + if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); return; } @@ -1155,6 +1159,14 @@ void rt_mutex_adjust_pi(struct task_struct *task) next_lock, NULL, task); } +void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) +{ + debug_rt_mutex_init_waiter(waiter); + RB_CLEAR_NODE(&waiter->pi_tree_entry); + RB_CLEAR_NODE(&waiter->tree_entry); + waiter->task = NULL; +} + /** * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop * @lock: the rt_mutex to take @@ -1237,9 +1249,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, unsigned long flags; int ret = 0; - debug_rt_mutex_init_waiter(&waiter); - RB_CLEAR_NODE(&waiter.pi_tree_entry); - RB_CLEAR_NODE(&waiter.tree_entry); + rt_mutex_init_waiter(&waiter); /* * Technically we could use raw_spin_[un]lock_irq() here, but this can @@ -1330,7 +1340,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) /* * Slow path to release a rt-mutex. - * Return whether the current task needs to undo a potential priority boosting. + * + * Return whether the current task needs to call rt_mutex_postunlock(). */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, struct wake_q_head *wake_q) @@ -1342,8 +1353,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, debug_rt_mutex_unlock(lock); - rt_mutex_deadlock_account_unlock(current); - /* * We must be careful here if the fast path is enabled. If we * have no waiters queued we cannot set owner to NULL here @@ -1390,11 +1399,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, * Queue the next waiter for wakeup once we release the wait_lock. */ mark_wakeup_next_waiter(wake_q, lock); - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); - /* check PI boosting */ - return true; + return true; /* call rt_mutex_postunlock() */ } /* @@ -1409,11 +1416,10 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk)) { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { - rt_mutex_deadlock_account_lock(lock, current); + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; - } else - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); + + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); } static inline int @@ -1425,24 +1431,33 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, enum rtmutex_chainwalk chwalk)) { if (chwalk == RT_MUTEX_MIN_CHAINWALK && - likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { - rt_mutex_deadlock_account_lock(lock, current); + likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 0; - } else - return slowfn(lock, state, timeout, chwalk); + + return slowfn(lock, state, timeout, chwalk); } static inline int rt_mutex_fasttrylock(struct rt_mutex *lock, int (*slowfn)(struct rt_mutex *lock)) { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { - rt_mutex_deadlock_account_lock(lock, current); + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) return 1; - } + return slowfn(lock); } +/* + * Performs the wakeup of the the top-waiter and re-enables preemption. + */ +void rt_mutex_postunlock(struct wake_q_head *wake_q) +{ + wake_up_q(wake_q); + + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ + preempt_enable(); +} + static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -1450,18 +1465,11 @@ rt_mutex_fastunlock(struct rt_mutex *lock, { DEFINE_WAKE_Q(wake_q); - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { - rt_mutex_deadlock_account_unlock(current); - - } else { - bool deboost = slowfn(lock, &wake_q); - - wake_up_q(&wake_q); + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) + return; - /* Undo pi boosting if necessary: */ - if (deboost) - rt_mutex_adjust_prio(current); - } + if (slowfn(lock, &wake_q)) + rt_mutex_postunlock(&wake_q); } /** @@ -1495,16 +1503,11 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); /* - * Futex variant with full deadlock detection. + * Futex variant, must not use fastpath. */ -int rt_mutex_timed_futex_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *timeout) +int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) { - might_sleep(); - - return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, - RT_MUTEX_FULL_CHAINWALK, - rt_mutex_slowlock); + return rt_mutex_slowtrylock(lock); } /** @@ -1563,20 +1566,43 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) EXPORT_SYMBOL_GPL(rt_mutex_unlock); /** - * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock - * @lock: the rt_mutex to be unlocked - * - * Returns: true/false indicating whether priority adjustment is - * required or not. + * Futex variant, that since futex variants do not use the fast-path, can be + * simple and will not need to retry. */ -bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wqh) +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wake_q) { - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { - rt_mutex_deadlock_account_unlock(current); - return false; + lockdep_assert_held(&lock->wait_lock); + + debug_rt_mutex_unlock(lock); + + if (!rt_mutex_has_waiters(lock)) { + lock->owner = NULL; + return false; /* done */ } - return rt_mutex_slowunlock(lock, wqh); + + /* + * We've already deboosted, mark_wakeup_next_waiter() will + * retain preempt_disabled when we drop the wait_lock, to + * avoid inversion prior to the wakeup. preempt_disable() + * therein pairs with rt_mutex_postunlock(). + */ + mark_wakeup_next_waiter(wake_q, lock); + + return true; /* call postunlock() */ +} + +void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) +{ + DEFINE_WAKE_Q(wake_q); + bool postunlock; + + raw_spin_lock_irq(&lock->wait_lock); + postunlock = __rt_mutex_futex_unlock(lock, &wake_q); + raw_spin_unlock_irq(&lock->wait_lock); + + if (postunlock) + rt_mutex_postunlock(&wake_q); } /** @@ -1637,7 +1663,6 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, __rt_mutex_init(lock, NULL); debug_rt_mutex_proxy_lock(lock, proxy_owner); rt_mutex_set_owner(lock, proxy_owner); - rt_mutex_deadlock_account_lock(lock, proxy_owner); } /** @@ -1657,34 +1682,16 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, { debug_rt_mutex_proxy_unlock(lock); rt_mutex_set_owner(lock, NULL); - rt_mutex_deadlock_account_unlock(proxy_owner); } -/** - * rt_mutex_start_proxy_lock() - Start lock acquisition for another task - * @lock: the rt_mutex to take - * @waiter: the pre-initialized rt_mutex_waiter - * @task: the task to prepare - * - * Returns: - * 0 - task blocked on lock - * 1 - acquired the lock for task, caller should wake it up - * <0 - error - * - * Special API call for FUTEX_REQUEUE_PI support. - */ -int rt_mutex_start_proxy_lock(struct rt_mutex *lock, +int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task) { int ret; - raw_spin_lock_irq(&lock->wait_lock); - - if (try_to_take_rt_mutex(lock, task, NULL)) { - raw_spin_unlock_irq(&lock->wait_lock); + if (try_to_take_rt_mutex(lock, task, NULL)) return 1; - } /* We enforce deadlock detection for futexes */ ret = task_blocks_on_rt_mutex(lock, waiter, task, @@ -1703,13 +1710,37 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, if (unlikely(ret)) remove_waiter(lock, waiter); - raw_spin_unlock_irq(&lock->wait_lock); - debug_rt_mutex_print_deadlock(waiter); return ret; } +/** + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for FUTEX_REQUEUE_PI support. + */ +int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task) +{ + int ret; + + raw_spin_lock_irq(&lock->wait_lock); + ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + raw_spin_unlock_irq(&lock->wait_lock); + + return ret; +} + /** * rt_mutex_next_owner - return the next owner of the lock * @@ -1731,36 +1762,87 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) } /** - * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * rt_mutex_wait_proxy_lock() - Wait for lock acquisition * @lock: the rt_mutex we were woken on * @to: the timeout, null if none. hrtimer should already have * been started. * @waiter: the pre-initialized rt_mutex_waiter * - * Complete the lock acquisition started our behalf by another thread. + * Wait for the the lock acquisition started on our behalf by + * rt_mutex_start_proxy_lock(). Upon failure, the caller must call + * rt_mutex_cleanup_proxy_lock(). * * Returns: * 0 - success * <0 - error, one of -EINTR, -ETIMEDOUT * - * Special API call for PI-futex requeue support + * Special API call for PI-futex support */ -int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter) { int ret; raw_spin_lock_irq(&lock->wait_lock); - - set_current_state(TASK_INTERRUPTIBLE); - /* sleep on the mutex */ + set_current_state(TASK_INTERRUPTIBLE); ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); + raw_spin_unlock_irq(&lock->wait_lock); - if (unlikely(ret)) - remove_waiter(lock, waiter); + return ret; +} + +/** + * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition + * @lock: the rt_mutex we were woken on + * @waiter: the pre-initialized rt_mutex_waiter + * + * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). + * + * Unless we acquired the lock; we're still enqueued on the wait-list and can + * in fact still be granted ownership until we're removed. Therefore we can + * find we are in fact the owner and must disregard the + * rt_mutex_wait_proxy_lock() failure. + * + * Returns: + * true - did the cleanup, we done. + * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, + * caller should disregards its return value. + * + * Special API call for PI-futex support + */ +bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter) +{ + bool cleanup = false; + raw_spin_lock_irq(&lock->wait_lock); + /* + * Do an unconditional try-lock, this deals with the lock stealing + * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter() + * sets a NULL owner. + * + * We're not interested in the return value, because the subsequent + * test on rt_mutex_owner() will infer that. If the trylock succeeded, + * we will own the lock and it will have removed the waiter. If we + * failed the trylock, we're still not owner and we need to remove + * ourselves. + */ + try_to_take_rt_mutex(lock, current, waiter); + /* + * Unless we're the owner; we're still enqueued on the wait_list. + * So check if we became owner, if not, take us off the wait_list. + */ + if (rt_mutex_owner(lock) != current) { + remove_waiter(lock, waiter); + cleanup = true; + } /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. @@ -1769,5 +1851,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, raw_spin_unlock_irq(&lock->wait_lock); - return ret; + return cleanup; } diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h index c4060584c40769a3174aebcf7cc7ff245d7363c3..6607802efa8bd3e5fb93b72b88f613b24c158dc1 100644 --- a/kernel/locking/rtmutex.h +++ b/kernel/locking/rtmutex.h @@ -11,8 +11,6 @@ */ #define rt_mutex_deadlock_check(l) (0) -#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) -#define rt_mutex_deadlock_account_unlock(l) do { } while (0) #define debug_rt_mutex_init_waiter(w) do { } while (0) #define debug_rt_mutex_free_waiter(w) do { } while (0) #define debug_rt_mutex_lock(l) do { } while (0) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 856dfff5c33ab5a24fd0464032daf1bf8b81e351..72ad45a9a79439c53f73afee4ab87330957a0ff2 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -34,6 +34,7 @@ struct rt_mutex_waiter { struct rt_mutex *deadlock_lock; #endif int prio; + u64 deadline; }; /* @@ -103,16 +104,26 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); +extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); +extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task); extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); -extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter); -extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); -extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, - struct wake_q_head *wqh); -extern void rt_mutex_adjust_prio(struct task_struct *task); +extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter); +extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter); + +extern int rt_mutex_futex_trylock(struct rt_mutex *l); + +extern void rt_mutex_futex_unlock(struct rt_mutex *lock); +extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, + struct wake_q_head *wqh); + +extern void rt_mutex_postunlock(struct wake_q_head *wake_q); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 90a74ccd85a4b979295828bddee80b6dbddb62a9..4d48b1c4870dda0b33c7f93e1aed01fe8b6ceb61 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -124,10 +124,8 @@ EXPORT_SYMBOL(up_write); */ void downgrade_write(struct rw_semaphore *sem) { - /* - * lockdep: a downgraded write will live on as a write - * dependency. - */ + lock_downgrade(&sem->dep_map, _RET_IP_); + rwsem_set_reader_owned(sem); __downgrade_write(sem); } diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 6b7abb334ca6027dd2b189a211671fa98a33be82..39f56c870051b505611746ff0033cc6a7f6e1a10 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -353,8 +353,8 @@ static int test_cycle(unsigned int ncpus) struct stress { struct work_struct work; struct ww_mutex *locks; + unsigned long timeout; int nlocks; - int nloops; }; static int *get_random_order(int count) @@ -398,12 +398,11 @@ static void stress_inorder_work(struct work_struct *work) if (!order) return; - ww_acquire_init(&ctx, &ww_class); - do { int contended = -1; int n, err; + ww_acquire_init(&ctx, &ww_class); retry: err = 0; for (n = 0; n < nlocks; n++) { @@ -433,9 +432,9 @@ static void stress_inorder_work(struct work_struct *work) __func__, err); break; } - } while (--stress->nloops); - ww_acquire_fini(&ctx); + ww_acquire_fini(&ctx); + } while (!time_after(jiffies, stress->timeout)); kfree(order); kfree(stress); @@ -470,9 +469,9 @@ static void stress_reorder_work(struct work_struct *work) kfree(order); order = NULL; - ww_acquire_init(&ctx, &ww_class); - do { + ww_acquire_init(&ctx, &ww_class); + list_for_each_entry(ll, &locks, link) { err = ww_mutex_lock(ll->lock, &ctx); if (!err) @@ -495,9 +494,9 @@ static void stress_reorder_work(struct work_struct *work) dummy_load(stress); list_for_each_entry(ll, &locks, link) ww_mutex_unlock(ll->lock); - } while (--stress->nloops); - ww_acquire_fini(&ctx); + ww_acquire_fini(&ctx); + } while (!time_after(jiffies, stress->timeout)); out: list_for_each_entry_safe(ll, ln, &locks, link) @@ -523,7 +522,7 @@ static void stress_one_work(struct work_struct *work) __func__, err); break; } - } while (--stress->nloops); + } while (!time_after(jiffies, stress->timeout)); kfree(stress); } @@ -533,7 +532,7 @@ static void stress_one_work(struct work_struct *work) #define STRESS_ONE BIT(2) #define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE) -static int stress(int nlocks, int nthreads, int nloops, unsigned int flags) +static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; int n; @@ -575,7 +574,7 @@ static int stress(int nlocks, int nthreads, int nloops, unsigned int flags) INIT_WORK(&stress->work, fn); stress->locks = locks; stress->nlocks = nlocks; - stress->nloops = nloops; + stress->timeout = jiffies + 2*HZ; queue_work(wq, &stress->work); nthreads--; @@ -619,15 +618,15 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(16, 2*ncpus, 1<<10, STRESS_INORDER); + ret = stress(16, 2*ncpus, STRESS_INORDER); if (ret) return ret; - ret = stress(16, 2*ncpus, 1<<10, STRESS_REORDER); + ret = stress(16, 2*ncpus, STRESS_REORDER); if (ret) return ret; - ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + ret = stress(4095, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); if (ret) return ret; diff --git a/kernel/memremap.c b/kernel/memremap.c index 07e85e5229da849d33391f97234c1e1fff2c5ce1..23a6483c3666e35fec7f927d2f6cd4a2e38dbe81 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -182,18 +182,6 @@ struct page_map { struct vmem_altmap altmap; }; -void get_zone_device_page(struct page *page) -{ - percpu_ref_get(page->pgmap->ref); -} -EXPORT_SYMBOL(get_zone_device_page); - -void put_zone_device_page(struct page *page) -{ - put_dev_pagemap(page->pgmap); -} -EXPORT_SYMBOL(put_zone_device_page); - static void pgmap_radix_release(struct resource *res) { resource_size_t key, align_start, align_size, align_end; @@ -237,6 +225,10 @@ static void devm_memremap_pages_release(struct device *dev, void *data) struct resource *res = &page_map->res; resource_size_t align_start, align_size; struct dev_pagemap *pgmap = &page_map->pgmap; + unsigned long pfn; + + for_each_device_pfn(pfn, page_map) + put_page(pfn_to_page(pfn)); if (percpu_ref_tryget_live(pgmap->ref)) { dev_WARN(dev, "%s: page mapping is still live!\n", __func__); @@ -277,7 +269,10 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys) * * Notes: * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time - * (or devm release event). + * (or devm release event). The expected order of events is that @ref has + * been through percpu_ref_kill() before devm_memremap_pages_release(). The + * wait for the completion of all references being dropped and + * percpu_ref_exit() must occur after devm_memremap_pages_release(). * * 2/ @res is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but @@ -379,6 +374,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, */ list_del(&page->lru); page->pgmap = pgmap; + percpu_ref_get(ref); } devres_add(dev, page_map); return __va(res->start); diff --git a/kernel/module.c b/kernel/module.c index 7eba6dea4f417dbf363731cf2a3cc374ce1d703b..4a3665f8f8372aab68b658c24c30678806823eee 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -49,6 +49,9 @@ #include #include #include +#ifdef CONFIG_STRICT_MODULE_RWX +#include +#endif #include #include #include @@ -665,16 +668,7 @@ static void percpu_modcopy(struct module *mod, memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } -/** - * is_module_percpu_address - test whether address is from module static percpu - * @addr: address to test - * - * Test whether @addr belongs to module static percpu area. - * - * RETURNS: - * %true if @addr is from module static percpu area - */ -bool is_module_percpu_address(unsigned long addr) +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) { struct module *mod; unsigned int cpu; @@ -688,9 +682,15 @@ bool is_module_percpu_address(unsigned long addr) continue; for_each_possible_cpu(cpu) { void *start = per_cpu_ptr(mod->percpu, cpu); - - if ((void *)addr >= start && - (void *)addr < start + mod->percpu_size) { + void *va = (void *)addr; + + if (va >= start && va < start + mod->percpu_size) { + if (can_addr) { + *can_addr = (unsigned long) (va - start); + *can_addr += (unsigned long) + per_cpu_ptr(mod->percpu, + get_boot_cpu_id()); + } preempt_enable(); return true; } @@ -701,6 +701,20 @@ bool is_module_percpu_address(unsigned long addr) return false; } +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + return __is_module_percpu_address(addr, NULL); +} + #else /* ... !CONFIG_SMP */ static inline void __percpu *mod_percpu(struct module *mod) @@ -732,6 +746,11 @@ bool is_module_percpu_address(unsigned long addr) return false; } +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) +{ + return false; +} + #endif /* CONFIG_SMP */ #define MODINFO_ATTR(field) \ @@ -947,6 +966,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; + audit_log_kern_module(name); + if (mutex_lock_interruptible(&module_mutex) != 0) return -EINTR; @@ -2846,7 +2867,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, /* Suck in entire file: we'll want most of it. */ info->hdr = __vmalloc(info->len, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL); + GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL); if (!info->hdr) return -ENOMEM; @@ -4017,7 +4038,7 @@ unsigned long module_kallsyms_lookup_name(const char *name) /* Don't lock: we're in enough trouble already. */ preempt_disable(); - if ((colon = strchr(name, ':')) != NULL) { + if ((colon = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) { if ((mod = find_module_all(name, colon - name, false)) != NULL) ret = mod_find_symname(mod, colon+1); } else { diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 782102e59eed5b4b5379126b865fb9c795892cdd..f6c5d330059ac7996a1aeb7a4c4fdfad128d847d 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct kmem_cache *nsproxy_cachep; @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) goto out; } switch_task_namespaces(tsk, new_nsproxy); + + perf_event_namespaces(tsk); out: fput(file); return err; diff --git a/kernel/padata.c b/kernel/padata.c index 3202aa17492c808af5331044de710a2f34e277a3..ac8f1e524836b37a0d305e412b19d6aeeb50c133 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -154,8 +154,6 @@ EXPORT_SYMBOL(padata_do_parallel); * A pointer to the control struct of the next object that needs * serialization, if present in one of the percpu reorder queues. * - * NULL, if all percpu reorder queues are empty. - * * -EINPROGRESS, if the next object that needs serialization will * be parallel processed by another cpu and is not yet present in * the cpu's reorder queue. @@ -182,8 +180,6 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) cpu = padata_index_to_cpu(pd, next_index); next_queue = per_cpu_ptr(pd->pqueue, cpu); - padata = NULL; - reorder = &next_queue->reorder; spin_lock(&reorder->lock); @@ -235,12 +231,11 @@ static void padata_reorder(struct parallel_data *pd) padata = padata_get_next(pd); /* - * All reorder queues are empty, or the next object that needs - * serialization is parallel processed by another cpu and is - * still on it's way to the cpu's reorder queue, nothing to - * do for now. + * If the next object that needs serialization is parallel + * processed by another cpu and is still on it's way to the + * cpu's reorder queue, nothing to do for now. */ - if (!padata || PTR_ERR(padata) == -EINPROGRESS) + if (PTR_ERR(padata) == -EINPROGRESS) break; /* @@ -354,7 +349,7 @@ static int padata_setup_cpumasks(struct parallel_data *pd, cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { - free_cpumask_var(pd->cpumask.cbcpu); + free_cpumask_var(pd->cpumask.pcpu); return -ENOMEM; } diff --git a/kernel/params.c b/kernel/params.c index a6d6149c0fe60df1ca38d9a66acef281b78ee79d..60b2d8101355fedcfd8dfee99f8a2fa467ea99d9 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -160,58 +160,6 @@ static int parse_one(char *param, return -ENOENT; } -/* You can use " around spaces, but can't escape ". */ -/* Hyphens and underscores equivalent in parameter names. */ -static char *next_arg(char *args, char **param, char **val) -{ - unsigned int i, equals = 0; - int in_quote = 0, quoted = 0; - char *next; - - if (*args == '"') { - args++; - in_quote = 1; - quoted = 1; - } - - for (i = 0; args[i]; i++) { - if (isspace(args[i]) && !in_quote) - break; - if (equals == 0) { - if (args[i] == '=') - equals = i; - } - if (args[i] == '"') - in_quote = !in_quote; - } - - *param = args; - if (!equals) - *val = NULL; - else { - args[equals] = '\0'; - *val = args + equals + 1; - - /* Don't include quotes in value. */ - if (**val == '"') { - (*val)++; - if (args[i-1] == '"') - args[i-1] = '\0'; - } - } - if (quoted && args[i-1] == '"') - args[i-1] = '\0'; - - if (args[i]) { - args[i] = '\0'; - next = args + i + 1; - } else - next = args + i; - - /* Chew up trailing spaces. */ - return skip_spaces(next); -} - /* Args looks like "foo=bar,bar2 baz=fuz wiz". */ char *parse_args(const char *doing, char *args, diff --git a/kernel/pid.c b/kernel/pid.c index 0143ac0ddceb9c79a2c686e6b10b7265b7b5d925..fd1cde1e45760df67a6cc54fa97959061660ab3f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -321,8 +321,10 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) + if (pid_ns_prepare_proc(ns)) { + disable_pid_allocation(ns); goto out_free; + } } get_pid_ns(ns); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index de461aa0bf9acc933254d34ae2ced925f45eefbc..74a5a7255b4d9cb473cc7708d851c64402cca942 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -277,7 +277,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) * if reparented. */ for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->nr_hashed == init_pids) break; schedule(); @@ -374,6 +374,29 @@ static struct ns_common *pidns_get(struct task_struct *task) return ns ? &ns->ns : NULL; } +static struct ns_common *pidns_for_children_get(struct task_struct *task) +{ + struct pid_namespace *ns = NULL; + + task_lock(task); + if (task->nsproxy) { + ns = task->nsproxy->pid_ns_for_children; + get_pid_ns(ns); + } + task_unlock(task); + + if (ns) { + read_lock(&tasklist_lock); + if (!ns->child_reaper) { + put_pid_ns(ns); + ns = NULL; + } + read_unlock(&tasklist_lock); + } + + return ns ? &ns->ns : NULL; +} + static void pidns_put(struct ns_common *ns) { put_pid_ns(to_pid_ns(ns)); @@ -443,6 +466,17 @@ const struct proc_ns_operations pidns_operations = { .get_parent = pidns_get_parent, }; +const struct proc_ns_operations pidns_for_children_operations = { + .name = "pid_for_children", + .real_ns_name = "pid", + .type = CLONE_NEWPID, + .get = pidns_for_children_get, + .put = pidns_put, + .install = pidns_install, + .owner = pidns_owner, + .get_parent = pidns_get_parent, +}; + static __init int pid_namespaces_init(void) { pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d79a38de425a0d642834adb5e8ac68ba237db93d..fa46606f33565613d2ef13a1e948f2bae0dfaa8b 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -36,6 +36,9 @@ #include #include #include +#ifdef CONFIG_STRICT_KERNEL_RWX +#include +#endif #include "power.h" @@ -1422,7 +1425,7 @@ static unsigned int nr_meta_pages; * Numbers of normal and highmem page frames allocated for hibernation image * before suspending devices. */ -unsigned int alloc_normal, alloc_highmem; +static unsigned int alloc_normal, alloc_highmem; /* * Memory bitmap used for marking saveable pages (during hibernation) or * hibernation image pages (during restore) diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c index d5760c42f042f4accfb65de23784bcaa877d9b00..61d41ca418441a15945b376964f6f3ed07ac84b8 100644 --- a/kernel/printk/braille.c +++ b/kernel/printk/braille.c @@ -2,12 +2,13 @@ #include #include +#include #include #include "console_cmdline.h" #include "braille.h" -char *_braille_console_setup(char **str, char **brl_options) +int _braille_console_setup(char **str, char **brl_options) { if (!strncmp(*str, "brl,", 4)) { *brl_options = ""; @@ -15,14 +16,14 @@ char *_braille_console_setup(char **str, char **brl_options) } else if (!strncmp(*str, "brl=", 4)) { *brl_options = *str + 4; *str = strchr(*brl_options, ','); - if (!*str) + if (!*str) { pr_err("need port name after brl=\n"); - else - *((*str)++) = 0; - } else - return NULL; + return -EINVAL; + } + *((*str)++) = 0; + } - return *str; + return 0; } int diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h index 769d771145c881cd78f43c035ea0866adb96b9b9..749a6756843a08e2a5f2a9ae4fa984658b43a65b 100644 --- a/kernel/printk/braille.h +++ b/kernel/printk/braille.h @@ -9,7 +9,14 @@ braille_set_options(struct console_cmdline *c, char *brl_options) c->brl_options = brl_options; } -char * +/* + * Setup console according to braille options. + * Return -EINVAL on syntax error, 0 on success (or no braille option was + * actually given). + * Modifies str to point to the serial options + * Sets brl_options to the parsed braille options. + */ +int _braille_console_setup(char **str, char **brl_options); int @@ -25,10 +32,10 @@ braille_set_options(struct console_cmdline *c, char *brl_options) { } -static inline char * +static inline int _braille_console_setup(char **str, char **brl_options) { - return NULL; + return 0; } static inline int diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 2984fb0f0257420de4a7aa2595a52c34f0d33a0e..a1db38abac5b750e8ce228b441b27900360665ec 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -270,7 +270,6 @@ static struct console *exclusive_console; static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; -static int selected_console = -1; static int preferred_console = -1; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); @@ -1002,7 +1001,7 @@ const struct file_operations kmsg_fops = { .release = devkmsg_release, }; -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_CORE /* * This appends the listed symbols to /proc/vmcore * @@ -1011,7 +1010,7 @@ const struct file_operations kmsg_fops = { * symbols are specifically used so that utilities can access and extract the * dmesg log from a vmcore file after a crash. */ -void log_buf_kexec_setup(void) +void log_buf_vmcoreinfo_setup(void) { VMCOREINFO_SYMBOL(log_buf); VMCOREINFO_SYMBOL(log_buf_len); @@ -1911,14 +1910,14 @@ static int __add_preferred_console(char *name, int idx, char *options, i++, c++) { if (strcmp(c->name, name) == 0 && c->index == idx) { if (!brl_options) - selected_console = i; + preferred_console = i; return 0; } } if (i == MAX_CMDLINECONSOLES) return -E2BIG; if (!brl_options) - selected_console = i; + preferred_console = i; strlcpy(c->name, name, sizeof(c->name)); c->options = options; braille_set_options(c, brl_options); @@ -2031,15 +2030,16 @@ void resume_console(void) * @cpu: unused * * If printk() is called from a CPU that is not online yet, the messages - * will be spooled but will not show up on the console. This function is - * called when a new CPU comes online (or fails to come up), and ensures - * that any such output gets printed. + * will be printed on the console only if there are CON_ANYTIME consoles. + * This function is called when a new CPU comes online (or fails to come + * up) or goes offline. */ static int console_cpu_notify(unsigned int cpu) { if (!cpuhp_tasks_frozen) { - console_lock(); - console_unlock(); + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); } return 0; } @@ -2161,7 +2161,7 @@ void console_unlock(void) } /* - * Console drivers are called under logbuf_lock, so + * Console drivers are called with interrupts disabled, so * @console_may_schedule should be cleared before; however, we may * end up dumping a lot of lines, for example, if called from * console registration path, and should invoke cond_resched() @@ -2169,11 +2169,15 @@ void console_unlock(void) * scheduling stall on a slow console leading to RCU stall and * softlockup warnings which exacerbate the issue with more * messages practically incapacitating the system. + * + * console_trylock() is not able to detect the preemptive + * context reliably. Therefore the value must be stored before + * and cleared after the the "again" goto label. */ do_cond_resched = console_may_schedule; +again: console_may_schedule = 0; -again: /* * We released the console_sem lock, so we need to recheck if * cpu is online and (if not) is there at least one CON_ANYTIME @@ -2409,6 +2413,7 @@ void register_console(struct console *newcon) unsigned long flags; struct console *bcon = NULL; struct console_cmdline *c; + static bool has_preferred; if (console_drivers) for_each_console(bcon) @@ -2435,15 +2440,15 @@ void register_console(struct console *newcon) if (console_drivers && console_drivers->flags & CON_BOOT) bcon = console_drivers; - if (preferred_console < 0 || bcon || !console_drivers) - preferred_console = selected_console; + if (!has_preferred || bcon || !console_drivers) + has_preferred = preferred_console >= 0; /* * See if we want to use this console driver. If we * didn't select a console we take the first one * that registers here. */ - if (preferred_console < 0) { + if (!has_preferred) { if (newcon->index < 0) newcon->index = 0; if (newcon->setup == NULL || @@ -2451,7 +2456,7 @@ void register_console(struct console *newcon) newcon->flags |= CON_ENABLED; if (newcon->device) { newcon->flags |= CON_CONSDEV; - preferred_console = 0; + has_preferred = true; } } } @@ -2484,9 +2489,9 @@ void register_console(struct console *newcon) } newcon->flags |= CON_ENABLED; - if (i == selected_console) { + if (i == preferred_console) { newcon->flags |= CON_CONSDEV; - preferred_console = selected_console; + has_preferred = true; } break; } @@ -2610,6 +2615,30 @@ int unregister_console(struct console *console) } EXPORT_SYMBOL(unregister_console); +/* + * Initialize the console device. This is called *early*, so + * we can't necessarily depend on lots of kernel help here. + * Just do some early initializations, and do the complex setup + * later. + */ +void __init console_init(void) +{ + initcall_t *call; + + /* Setup the default TTY line discipline. */ + n_tty_init(); + + /* + * set up the console device so that later boot sequences can + * inform about problems etc.. + */ + call = __con_initcall_start; + while (call < __con_initcall_end) { + (*call)(); + call++; + } +} + /* * Some boot consoles access data that is in the init section and which will * be discarded after the initcalls have been run. To make sure that no code diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 266ddcc1d8bbbc6af7bceda3657618beef2a9c59..60f356d91060c8974268cc5bc02d57c75d359afc 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -60,19 +60,25 @@ int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, } +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, + const struct cred *ptracer_cred) +{ + BUG_ON(!list_empty(&child->ptrace_entry)); + list_add(&child->ptrace_entry, &new_parent->ptraced); + child->parent = new_parent; + child->ptracer_cred = get_cred(ptracer_cred); +} + /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) +static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - BUG_ON(!list_empty(&child->ptrace_entry)); - list_add(&child->ptrace_entry, &new_parent->ptraced); - child->parent = new_parent; rcu_read_lock(); - child->ptracer_cred = get_cred(__task_cred(new_parent)); + __ptrace_link(child, new_parent, __task_cred(new_parent)); rcu_read_unlock(); } @@ -386,7 +392,7 @@ static int ptrace_attach(struct task_struct *task, long request, flags |= PT_SEIZED; task->ptrace = flags; - __ptrace_link(task, current); + ptrace_link(task, current); /* SEIZE doesn't trap tracee on attach */ if (!seize) @@ -459,7 +465,7 @@ static int ptrace_traceme(void) */ if (!ret && !(current->real_parent->flags & PF_EXITING)) { current->ptrace = PT_PTRACED; - __ptrace_link(current, current->real_parent); + ptrace_link(current, current->real_parent); } } write_unlock_irq(&tasklist_lock); diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 18dfc485225c3954ccf29a8743bc636f10db7d41..23803c7d51804debb62b91934cdba42f305c70d3 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -3,10 +3,13 @@ KCOV_INSTRUMENT := n obj-y += update.o sync.o -obj-$(CONFIG_SRCU) += srcu.o +obj-$(CONFIG_CLASSIC_SRCU) += srcu.o +obj-$(CONFIG_TREE_SRCU) += srcutree.o +obj-$(CONFIG_TINY_SRCU) += srcutiny.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o obj-$(CONFIG_TREE_RCU) += tree.o obj-$(CONFIG_PREEMPT_RCU) += tree.o obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o obj-$(CONFIG_TINY_RCU) += tiny.o +obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 0d6ff3e471be6c1597e0e78fb90d07eb0ce9c546..73e16ec4054b83d163f58d6786df4e5fb6a00ae4 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -56,6 +56,83 @@ #define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ DYNTICK_TASK_FLAG) + +/* + * Grace-period counter management. + */ + +#define RCU_SEQ_CTR_SHIFT 2 +#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) + +/* + * Return the counter portion of a sequence number previously returned + * by rcu_seq_snap() or rcu_seq_current(). + */ +static inline unsigned long rcu_seq_ctr(unsigned long s) +{ + return s >> RCU_SEQ_CTR_SHIFT; +} + +/* + * Return the state portion of a sequence number previously returned + * by rcu_seq_snap() or rcu_seq_current(). + */ +static inline int rcu_seq_state(unsigned long s) +{ + return s & RCU_SEQ_STATE_MASK; +} + +/* + * Set the state portion of the pointed-to sequence number. + * The caller is responsible for preventing conflicting updates. + */ +static inline void rcu_seq_set_state(unsigned long *sp, int newstate) +{ + WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK); + WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate); +} + +/* Adjust sequence number for start of update-side operation. */ +static inline void rcu_seq_start(unsigned long *sp) +{ + WRITE_ONCE(*sp, *sp + 1); + smp_mb(); /* Ensure update-side operation after counter increment. */ + WARN_ON_ONCE(rcu_seq_state(*sp) != 1); +} + +/* Adjust sequence number for end of update-side operation. */ +static inline void rcu_seq_end(unsigned long *sp) +{ + smp_mb(); /* Ensure update-side operation before counter increment. */ + WARN_ON_ONCE(!rcu_seq_state(*sp)); + WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1); +} + +/* Take a snapshot of the update side's sequence number. */ +static inline unsigned long rcu_seq_snap(unsigned long *sp) +{ + unsigned long s; + + s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK; + smp_mb(); /* Above access must not bleed into critical section. */ + return s; +} + +/* Return the current value the update side's sequence number, no ordering. */ +static inline unsigned long rcu_seq_current(unsigned long *sp) +{ + return READ_ONCE(*sp); +} + +/* + * Given a snapshot from rcu_seq_snap(), determine whether or not a + * full update-side operation has occurred. + */ +static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) +{ + return ULONG_CMP_GE(READ_ONCE(*sp), s); +} + /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the @@ -109,12 +186,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) rcu_lock_acquire(&rcu_callback_map); if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); + RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);) kfree((void *)head - offset); rcu_lock_release(&rcu_callback_map); return true; } else { - RCU_TRACE(trace_rcu_invoke_callback(rn, head)); + RCU_TRACE(trace_rcu_invoke_callback(rn, head);) head->func(head); rcu_lock_release(&rcu_callback_map); return false; @@ -144,4 +221,76 @@ void rcu_test_sync_prims(void); */ extern void resched_cpu(int cpu); +#if defined(SRCU) || !defined(TINY_RCU) + +#include + +extern int rcu_num_lvls; +extern int num_rcu_lvl[]; +extern int rcu_num_nodes; +static bool rcu_fanout_exact; +static int rcu_fanout_leaf; + +/* + * Compute the per-level fanout, either using the exact fanout specified + * or balancing the tree, depending on the rcu_fanout_exact boot parameter. + */ +static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) +{ + int i; + + if (rcu_fanout_exact) { + levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; + for (i = rcu_num_lvls - 2; i >= 0; i--) + levelspread[i] = RCU_FANOUT; + } else { + int ccur; + int cprv; + + cprv = nr_cpu_ids; + for (i = rcu_num_lvls - 1; i >= 0; i--) { + ccur = levelcnt[i]; + levelspread[i] = (cprv + ccur - 1) / ccur; + cprv = ccur; + } + } +} + +/* + * Do a full breadth-first scan of the rcu_node structures for the + * specified rcu_state structure. + */ +#define rcu_for_each_node_breadth_first(rsp, rnp) \ + for ((rnp) = &(rsp)->node[0]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) + +/* + * Do a breadth-first scan of the non-leaf rcu_node structures for the + * specified rcu_state structure. Note that if there is a singleton + * rcu_node tree with but one rcu_node structure, this loop is a no-op. + */ +#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ + for ((rnp) = &(rsp)->node[0]; \ + (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) + +/* + * Scan the leaves of the rcu_node hierarchy for the specified rcu_state + * structure. Note that if there is a singleton rcu_node tree with but + * one rcu_node structure, this loop -will- visit the rcu_node structure. + * It is still a leaf node, even if it is also the root node. + */ +#define rcu_for_each_leaf_node(rsp, rnp) \ + for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) + +/* + * Iterate over all possible CPUs in a leaf RCU node. + */ +#define for_each_leaf_node_possible_cpu(rnp, cpu) \ + for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \ + cpu <= rnp->grphi; \ + cpu = cpumask_next((cpu), cpu_possible_mask)) + +#endif /* #if defined(SRCU) || !defined(TINY_RCU) */ + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c new file mode 100644 index 0000000000000000000000000000000000000000..2b62a38b080fa5e04af36635f2d6dcedffe8c21e --- /dev/null +++ b/kernel/rcu/rcu_segcblist.c @@ -0,0 +1,505 @@ +/* + * RCU segmented callback lists, function definitions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Authors: Paul E. McKenney + */ + +#include +#include +#include + +#include "rcu_segcblist.h" + +/* Initialize simple callback list. */ +void rcu_cblist_init(struct rcu_cblist *rclp) +{ + rclp->head = NULL; + rclp->tail = &rclp->head; + rclp->len = 0; + rclp->len_lazy = 0; +} + +/* + * Debug function to actually count the number of callbacks. + * If the number exceeds the limit specified, return -1. + */ +long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) +{ + int cnt = 0; + struct rcu_head **rhpp = &rclp->head; + + for (;;) { + if (!*rhpp) + return cnt; + if (++cnt > lim) + return -1; + rhpp = &(*rhpp)->next; + } +} + +/* + * Dequeue the oldest rcu_head structure from the specified callback + * list. This function assumes that the callback is non-lazy, but + * the caller can later invoke rcu_cblist_dequeued_lazy() if it + * finds otherwise (and if it cares about laziness). This allows + * different users to have different ways of determining laziness. + */ +struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) +{ + struct rcu_head *rhp; + + rhp = rclp->head; + if (!rhp) + return NULL; + rclp->len--; + rclp->head = rhp->next; + if (!rclp->head) + rclp->tail = &rclp->head; + return rhp; +} + +/* + * Initialize an rcu_segcblist structure. + */ +void rcu_segcblist_init(struct rcu_segcblist *rsclp) +{ + int i; + + BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); + BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); + rsclp->head = NULL; + for (i = 0; i < RCU_CBLIST_NSEGS; i++) + rsclp->tails[i] = &rsclp->head; + rsclp->len = 0; + rsclp->len_lazy = 0; +} + +/* + * Disable the specified rcu_segcblist structure, so that callbacks can + * no longer be posted to it. This structure must be empty. + */ +void rcu_segcblist_disable(struct rcu_segcblist *rsclp) +{ + WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); + WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); + WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); + rsclp->tails[RCU_NEXT_TAIL] = NULL; +} + +/* + * Is the specified segment of the specified rcu_segcblist structure + * empty of callbacks? + */ +bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) +{ + if (seg == RCU_DONE_TAIL) + return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; + return rsclp->tails[seg - 1] == rsclp->tails[seg]; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * are ready to be invoked? + */ +bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * are still pending, that is, not yet ready to be invoked? + */ +bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); +} + +/* + * Dequeue and return the first ready-to-invoke callback. If there + * are no ready-to-invoke callbacks, return NULL. Disables interrupts + * to avoid interference. Does not protect from interference from other + * CPUs or tasks. + */ +struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) +{ + unsigned long flags; + int i; + struct rcu_head *rhp; + + local_irq_save(flags); + if (!rcu_segcblist_ready_cbs(rsclp)) { + local_irq_restore(flags); + return NULL; + } + rhp = rsclp->head; + BUG_ON(!rhp); + rsclp->head = rhp->next; + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { + if (rsclp->tails[i] != &rhp->next) + break; + rsclp->tails[i] = &rsclp->head; + } + smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ + WRITE_ONCE(rsclp->len, rsclp->len - 1); + local_irq_restore(flags); + return rhp; +} + +/* + * Account for the fact that a previously dequeued callback turned out + * to be marked as lazy. + */ +void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) +{ + unsigned long flags; + + local_irq_save(flags); + rsclp->len_lazy--; + local_irq_restore(flags); +} + +/* + * Return a pointer to the first callback in the specified rcu_segcblist + * structure. This is useful for diagnostics. + */ +struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) +{ + if (rcu_segcblist_is_enabled(rsclp)) + return rsclp->head; + return NULL; +} + +/* + * Return a pointer to the first pending callback in the specified + * rcu_segcblist structure. This is useful just after posting a given + * callback -- if that callback is the first pending callback, then + * you cannot rely on someone else having already started up the required + * grace period. + */ +struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) +{ + if (rcu_segcblist_is_enabled(rsclp)) + return *rsclp->tails[RCU_DONE_TAIL]; + return NULL; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * have not yet been processed beyond having been posted, that is, + * does it contain callbacks in its last segment? + */ +bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); +} + +/* + * Enqueue the specified callback onto the specified rcu_segcblist + * structure, updating accounting as needed. Note that the ->len + * field may be accessed locklessly, hence the WRITE_ONCE(). + * The ->len field is used by rcu_barrier() and friends to determine + * if it must post a callback on this structure, and it is OK + * for rcu_barrier() to sometimes post callbacks needlessly, but + * absolutely not OK for it to ever miss posting a callback. + */ +void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy) +{ + WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ + if (lazy) + rsclp->len_lazy++; + smp_mb(); /* Ensure counts are updated before callback is enqueued. */ + rhp->next = NULL; + *rsclp->tails[RCU_NEXT_TAIL] = rhp; + rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; +} + +/* + * Entrain the specified callback onto the specified rcu_segcblist at + * the end of the last non-empty segment. If the entire rcu_segcblist + * is empty, make no change, but return false. + * + * This is intended for use by rcu_barrier()-like primitives, -not- + * for normal grace-period use. IMPORTANT: The callback you enqueue + * will wait for all prior callbacks, NOT necessarily for a grace + * period. You have been warned. + */ +bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy) +{ + int i; + + if (rcu_segcblist_n_cbs(rsclp) == 0) + return false; + WRITE_ONCE(rsclp->len, rsclp->len + 1); + if (lazy) + rsclp->len_lazy++; + smp_mb(); /* Ensure counts are updated before callback is entrained. */ + rhp->next = NULL; + for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) + if (rsclp->tails[i] != rsclp->tails[i - 1]) + break; + *rsclp->tails[i] = rhp; + for (; i <= RCU_NEXT_TAIL; i++) + rsclp->tails[i] = &rhp->next; + return true; +} + +/* + * Extract only the counts from the specified rcu_segcblist structure, + * and place them in the specified rcu_cblist structure. This function + * supports both callback orphaning and invocation, hence the separation + * of counts and callbacks. (Callbacks ready for invocation must be + * orphaned and adopted separately from pending callbacks, but counts + * apply to all callbacks. Locking must be used to make sure that + * both orphaned-callbacks lists are consistent.) + */ +void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rclp->len_lazy += rsclp->len_lazy; + rclp->len += rsclp->len; + rsclp->len_lazy = 0; + WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ +} + +/* + * Extract only those callbacks ready to be invoked from the specified + * rcu_segcblist structure and place them in the specified rcu_cblist + * structure. + */ +void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rcu_segcblist_ready_cbs(rsclp)) + return; /* Nothing to do. */ + *rclp->tail = rsclp->head; + rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; + *rsclp->tails[RCU_DONE_TAIL] = NULL; + rclp->tail = rsclp->tails[RCU_DONE_TAIL]; + for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) + if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) + rsclp->tails[i] = &rsclp->head; +} + +/* + * Extract only those callbacks still pending (not yet ready to be + * invoked) from the specified rcu_segcblist structure and place them in + * the specified rcu_cblist structure. Note that this loses information + * about any callbacks that might have been partway done waiting for + * their grace period. Too bad! They will have to start over. + */ +void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rcu_segcblist_pend_cbs(rsclp)) + return; /* Nothing to do. */ + *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; + rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; + *rsclp->tails[RCU_DONE_TAIL] = NULL; + for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) + rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; +} + +/* + * Insert counts from the specified rcu_cblist structure in the + * specified rcu_segcblist structure. + */ +void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rsclp->len_lazy += rclp->len_lazy; + /* ->len sampled locklessly. */ + WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); + rclp->len_lazy = 0; + rclp->len = 0; +} + +/* + * Move callbacks from the specified rcu_cblist to the beginning of the + * done-callbacks segment of the specified rcu_segcblist. + */ +void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rclp->head) + return; /* No callbacks to move. */ + *rclp->tail = rsclp->head; + rsclp->head = rclp->head; + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) + if (&rsclp->head == rsclp->tails[i]) + rsclp->tails[i] = rclp->tail; + else + break; + rclp->head = NULL; + rclp->tail = &rclp->head; +} + +/* + * Move callbacks from the specified rcu_cblist to the end of the + * new-callbacks segment of the specified rcu_segcblist. + */ +void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + if (!rclp->head) + return; /* Nothing to do. */ + *rsclp->tails[RCU_NEXT_TAIL] = rclp->head; + rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; + rclp->head = NULL; + rclp->tail = &rclp->head; +} + +/* + * Advance the callbacks in the specified rcu_segcblist structure based + * on the current value passed in for the grace-period counter. + */ +void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq) +{ + int i, j; + + WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); + if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) + return; + + /* + * Find all callbacks whose ->gp_seq numbers indicate that they + * are ready to invoke, and put them into the RCU_DONE_TAIL segment. + */ + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { + if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) + break; + rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; + } + + /* If no callbacks moved, nothing more need be done. */ + if (i == RCU_WAIT_TAIL) + return; + + /* Clean up tail pointers that might have been misordered above. */ + for (j = RCU_WAIT_TAIL; j < i; j++) + rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; + + /* + * Callbacks moved, so clean up the misordered ->tails[] pointers + * that now point into the middle of the list of ready-to-invoke + * callbacks. The overall effect is to copy down the later pointers + * into the gap that was created by the now-ready segments. + */ + for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { + if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) + break; /* No more callbacks. */ + rsclp->tails[j] = rsclp->tails[i]; + rsclp->gp_seq[j] = rsclp->gp_seq[i]; + } +} + +/* + * "Accelerate" callbacks based on more-accurate grace-period information. + * The reason for this is that RCU does not synchronize the beginnings and + * ends of grace periods, and that callbacks are posted locally. This in + * turn means that the callbacks must be labelled conservatively early + * on, as getting exact information would degrade both performance and + * scalability. When more accurate grace-period information becomes + * available, previously posted callbacks can be "accelerated", marking + * them to complete at the end of the earlier grace period. + * + * This function operates on an rcu_segcblist structure, and also the + * grace-period sequence number seq at which new callbacks would become + * ready to invoke. Returns true if there are callbacks that won't be + * ready to invoke until seq, false otherwise. + */ +bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) +{ + int i; + + WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); + if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) + return false; + + /* + * Find the segment preceding the oldest segment of callbacks + * whose ->gp_seq[] completion is at or after that passed in via + * "seq", skipping any empty segments. This oldest segment, along + * with any later segments, can be merged in with any newly arrived + * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" + * as their ->gp_seq[] grace-period completion sequence number. + */ + for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) + if (rsclp->tails[i] != rsclp->tails[i - 1] && + ULONG_CMP_LT(rsclp->gp_seq[i], seq)) + break; + + /* + * If all the segments contain callbacks that correspond to + * earlier grace-period sequence numbers than "seq", leave. + * Assuming that the rcu_segcblist structure has enough + * segments in its arrays, this can only happen if some of + * the non-done segments contain callbacks that really are + * ready to invoke. This situation will get straightened + * out by the next call to rcu_segcblist_advance(). + * + * Also advance to the oldest segment of callbacks whose + * ->gp_seq[] completion is at or after that passed in via "seq", + * skipping any empty segments. + */ + if (++i >= RCU_NEXT_TAIL) + return false; + + /* + * Merge all later callbacks, including newly arrived callbacks, + * into the segment located by the for-loop above. Assign "seq" + * as the ->gp_seq[] value in order to correctly handle the case + * where there were no pending callbacks in the rcu_segcblist + * structure other than in the RCU_NEXT_TAIL segment. + */ + for (; i < RCU_NEXT_TAIL; i++) { + rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; + rsclp->gp_seq[i] = seq; + } + return true; +} + +/* + * Scan the specified rcu_segcblist structure for callbacks that need + * a grace period later than the one specified by "seq". We don't look + * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't + * have a grace-period sequence number. + */ +bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i; + + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) + if (rsclp->tails[i - 1] != rsclp->tails[i] && + ULONG_CMP_LT(seq, rsclp->gp_seq[i])) + return true; + return false; +} diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h new file mode 100644 index 0000000000000000000000000000000000000000..6e36e36478cd3f57338260904eb9335d3e0b85a5 --- /dev/null +++ b/kernel/rcu/rcu_segcblist.h @@ -0,0 +1,164 @@ +/* + * RCU segmented callback lists, internal-to-rcu header file + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Authors: Paul E. McKenney + */ + +#include + +/* + * Account for the fact that a previously dequeued callback turned out + * to be marked as lazy. + */ +static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) +{ + rclp->len_lazy--; +} + +/* + * Interim function to return rcu_cblist head pointer. Longer term, the + * rcu_cblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) +{ + return rclp->head; +} + +/* + * Interim function to return rcu_cblist head pointer. Longer term, the + * rcu_cblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) +{ + WARN_ON_ONCE(!rclp->head); + return rclp->tail; +} + +void rcu_cblist_init(struct rcu_cblist *rclp); +long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim); +struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp); + +/* + * Is the specified rcu_segcblist structure empty? + * + * But careful! The fact that the ->head field is NULL does not + * necessarily imply that there are no callbacks associated with + * this structure. When callbacks are being invoked, they are + * removed as a group. If callback invocation must be preempted, + * the remaining callbacks will be added back to the list. Either + * way, the counts are updated later. + * + * So it is often the case that rcu_segcblist_n_cbs() should be used + * instead. + */ +static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) +{ + return !rsclp->head; +} + +/* Return number of callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) +{ + return READ_ONCE(rsclp->len); +} + +/* Return number of lazy callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) +{ + return rsclp->len_lazy; +} + +/* Return number of lazy callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) +{ + return rsclp->len - rsclp->len_lazy; +} + +/* + * Is the specified rcu_segcblist enabled, for example, not corresponding + * to an offline or callback-offloaded CPU? + */ +static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) +{ + return !!rsclp->tails[RCU_NEXT_TAIL]; +} + +/* + * Are all segments following the specified segment of the specified + * rcu_segcblist structure empty of callbacks? (The specified + * segment might well contain callbacks.) + */ +static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) +{ + return !*rsclp->tails[seg]; +} + +/* + * Interim function to return rcu_segcblist head pointer. Longer term, the + * rcu_segcblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) +{ + return rsclp->head; +} + +/* + * Interim function to return rcu_segcblist head pointer. Longer term, the + * rcu_segcblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) +{ + WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); + return rsclp->tails[RCU_NEXT_TAIL]; +} + +void rcu_segcblist_init(struct rcu_segcblist *rsclp); +void rcu_segcblist_disable(struct rcu_segcblist *rsclp); +bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg); +bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); +bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp); +struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp); +void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp); +struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); +struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp); +bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp); +void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy); +bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy); +void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp); +void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp); +void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp); +void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp); +void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp); +void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp); +void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq); +bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); +bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, + unsigned long seq); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index cccc417a813502f8bc0f6d0af930b25d3d1b4f52..ae6e574d4cf5c3fbdd8ab9a56009981cec9935a1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -559,19 +559,34 @@ static void srcu_torture_barrier(void) static void srcu_torture_stats(void) { - int cpu; - int idx = srcu_ctlp->completed & 0x1; + int __maybe_unused cpu; + int idx; - pr_alert("%s%s per-CPU(idx=%d):", +#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU) +#ifdef CONFIG_TREE_SRCU + idx = srcu_ctlp->srcu_idx & 0x1; +#else /* #ifdef CONFIG_TREE_SRCU */ + idx = srcu_ctlp->completed & 0x1; +#endif /* #else #ifdef CONFIG_TREE_SRCU */ + pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; long c0, c1; - struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); +#ifdef CONFIG_TREE_SRCU + struct srcu_data *counts; + counts = per_cpu_ptr(srcu_ctlp->sda, cpu); + u0 = counts->srcu_unlock_count[!idx]; + u1 = counts->srcu_unlock_count[idx]; +#else /* #ifdef CONFIG_TREE_SRCU */ + struct srcu_array *counts; + + counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); u0 = counts->unlock_count[!idx]; u1 = counts->unlock_count[idx]; +#endif /* #else #ifdef CONFIG_TREE_SRCU */ /* * Make sure that a lock is always counted if the corresponding @@ -579,14 +594,26 @@ static void srcu_torture_stats(void) */ smp_rmb(); +#ifdef CONFIG_TREE_SRCU + l0 = counts->srcu_lock_count[!idx]; + l1 = counts->srcu_lock_count[idx]; +#else /* #ifdef CONFIG_TREE_SRCU */ l0 = counts->lock_count[!idx]; l1 = counts->lock_count[idx]; +#endif /* #else #ifdef CONFIG_TREE_SRCU */ c0 = l0 - u0; c1 = l1 - u1; pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } pr_cont("\n"); +#elif defined(CONFIG_TINY_SRCU) + idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1; + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n", + torture_type, TORTURE_FLAG, idx, + READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]), + READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx])); +#endif } static void srcu_torture_synchronize_expedited(void) @@ -1333,12 +1360,14 @@ rcu_torture_stats_print(void) cur_ops->stats(); if (rtcv_snap == rcu_torture_current_version && rcu_torture_current != NULL) { - int __maybe_unused flags; - unsigned long __maybe_unused gpnum; - unsigned long __maybe_unused completed; + int __maybe_unused flags = 0; + unsigned long __maybe_unused gpnum = 0; + unsigned long __maybe_unused completed = 0; rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed); + srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, + &flags, &gpnum, &completed); wtp = READ_ONCE(writer_task); pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n", rcu_torture_writer_state_getname(), diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index ef3bcfb15b39ec4815275077d3e79825db740a27..dea03614263fdc185fbdb3a7e3ec9ec8d1618888 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -22,7 +22,7 @@ * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt + * Documentation/RCU/ *.txt * */ @@ -243,8 +243,14 @@ static bool srcu_readers_active(struct srcu_struct *sp) * cleanup_srcu_struct - deconstruct a sleep-RCU structure * @sp: structure to clean up. * - * Must invoke this after you are finished using a given srcu_struct that - * was initialized via init_srcu_struct(), else you leak memory. + * Must invoke this only after you are finished using a given srcu_struct + * that was initialized via init_srcu_struct(). This code does some + * probabalistic checking, spotting late uses of srcu_read_lock(), + * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). + * If any such late uses are detected, the per-CPU memory associated with + * the srcu_struct is simply leaked and WARN_ON() is invoked. If the + * caller frees the srcu_struct itself, a use-after-free crash will likely + * ensue, but at least there will be a warning printed. */ void cleanup_srcu_struct(struct srcu_struct *sp) { @@ -257,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. + * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) @@ -265,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->completed) & 0x1; - __this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); + this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -275,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). - * Must be called from process context. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c new file mode 100644 index 0000000000000000000000000000000000000000..32798eb14853d47b9b155bb4bd3c4007dd13736e --- /dev/null +++ b/kernel/rcu/srcutiny.c @@ -0,0 +1,217 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * tiny version for non-preemptible single-CPU use. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "rcu_segcblist.h" +#include "rcu.h" + +static int init_srcu_struct_fields(struct srcu_struct *sp) +{ + sp->srcu_lock_nesting[0] = 0; + sp->srcu_lock_nesting[1] = 0; + init_swait_queue_head(&sp->srcu_wq); + sp->srcu_gp_seq = 0; + rcu_segcblist_init(&sp->srcu_cblist); + sp->srcu_gp_running = false; + sp->srcu_gp_waiting = false; + sp->srcu_idx = 0; + INIT_WORK(&sp->srcu_work, srcu_drive_gp); + return 0; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int __init_srcu_struct(struct srcu_struct *sp, const char *name, + struct lock_class_key *key) +{ + /* Don't re-initialize a lock while it is held. */ + debug_check_no_locks_freed((void *)sp, sizeof(*sp)); + lockdep_init_map(&sp->dep_map, name, key, 0); + return init_srcu_struct_fields(sp); +} +EXPORT_SYMBOL_GPL(__init_srcu_struct); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/* + * init_srcu_struct - initialize a sleep-RCU structure + * @sp: structure to initialize. + * + * Must invoke this on a given srcu_struct before passing that srcu_struct + * to any other function. Each srcu_struct represents a separate domain + * of SRCU protection. + */ +int init_srcu_struct(struct srcu_struct *sp) +{ + return init_srcu_struct_fields(sp); +} +EXPORT_SYMBOL_GPL(init_srcu_struct); + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/* + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +void cleanup_srcu_struct(struct srcu_struct *sp) +{ + WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); + flush_work(&sp->srcu_work); + WARN_ON(rcu_seq_state(sp->srcu_gp_seq)); + WARN_ON(sp->srcu_gp_running); + WARN_ON(sp->srcu_gp_waiting); + WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)); +} +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Can be invoked from irq/bh handlers, but the matching + * __srcu_read_unlock() must be in the same handler instance. Returns an + * index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + idx = READ_ONCE(sp->srcu_idx); + WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock); + +/* + * Removes the count for the old reader from the appropriate element of + * the srcu_struct. + */ +void __srcu_read_unlock(struct srcu_struct *sp, int idx) +{ + int newval = sp->srcu_lock_nesting[idx] - 1; + + WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); + if (!newval && READ_ONCE(sp->srcu_gp_waiting)) + swake_up(&sp->srcu_wq); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock); + +/* + * Workqueue handler to drive one grace period and invoke any callbacks + * that become ready as a result. Single-CPU and !PREEMPT operation + * means that we get away with murder on synchronization. ;-) + */ +void srcu_drive_gp(struct work_struct *wp) +{ + int idx; + struct rcu_cblist ready_cbs; + struct srcu_struct *sp; + struct rcu_head *rhp; + + sp = container_of(wp, struct srcu_struct, srcu_work); + if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist)) + return; /* Already running or nothing to do. */ + + /* Tag recently arrived callbacks and wait for readers. */ + WRITE_ONCE(sp->srcu_gp_running, true); + rcu_segcblist_accelerate(&sp->srcu_cblist, + rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_start(&sp->srcu_gp_seq); + idx = sp->srcu_idx; + WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); + WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ + swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); + WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + rcu_seq_end(&sp->srcu_gp_seq); + + /* Update callback list based on GP, and invoke ready callbacks. */ + rcu_segcblist_advance(&sp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) { + rcu_cblist_init(&ready_cbs); + local_irq_disable(); + rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs); + local_irq_enable(); + rhp = rcu_cblist_dequeue(&ready_cbs); + for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { + local_bh_disable(); + rhp->func(rhp); + local_bh_enable(); + } + local_irq_disable(); + rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs); + local_irq_enable(); + } + WRITE_ONCE(sp->srcu_gp_running, false); + + /* + * If more callbacks, reschedule ourselves. This can race with + * a call_srcu() at interrupt level, but the ->srcu_gp_running + * checks will straighten that out. + */ + if (!rcu_segcblist_empty(&sp->srcu_cblist)) + schedule_work(&sp->srcu_work); +} +EXPORT_SYMBOL_GPL(srcu_drive_gp); + +/* + * Enqueue an SRCU callback on the specified srcu_struct structure, + * initiating grace-period processing if it is not already running. + */ +void call_srcu(struct srcu_struct *sp, struct rcu_head *head, + rcu_callback_t func) +{ + unsigned long flags; + + head->func = func; + local_irq_save(flags); + rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); + local_irq_restore(flags); + if (!READ_ONCE(sp->srcu_gp_running)) + schedule_work(&sp->srcu_work); +} +EXPORT_SYMBOL_GPL(call_srcu); + +/* + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + struct rcu_synchronize rs; + + init_rcu_head_on_stack(&rs.head); + init_completion(&rs.completion); + call_srcu(sp, &rs.head, wakeme_after_rcu); + wait_for_completion(&rs.completion); + destroy_rcu_head_on_stack(&rs.head); +} +EXPORT_SYMBOL_GPL(synchronize_srcu); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c new file mode 100644 index 0000000000000000000000000000000000000000..157654fa436a2f2f0c9284aedf51315747d017d9 --- /dev/null +++ b/kernel/rcu/srcutree.c @@ -0,0 +1,1154 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2006 + * Copyright (C) Fujitsu, 2012 + * + * Author: Paul McKenney + * Lai Jiangshan + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU/ *.txt + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rcu.h" +#include "rcu_segcblist.h" + +ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */ +module_param(exp_holdoff, ulong, 0444); + +static void srcu_invoke_callbacks(struct work_struct *work); +static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); + +/* + * Initialize SRCU combining tree. Note that statically allocated + * srcu_struct structures might already have srcu_read_lock() and + * srcu_read_unlock() running against them. So if the is_static parameter + * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. + */ +static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) +{ + int cpu; + int i; + int level = 0; + int levelspread[RCU_NUM_LVLS]; + struct srcu_data *sdp; + struct srcu_node *snp; + struct srcu_node *snp_first; + + /* Work out the overall tree geometry. */ + sp->level[0] = &sp->node[0]; + for (i = 1; i < rcu_num_lvls; i++) + sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; + rcu_init_levelspread(levelspread, num_rcu_lvl); + + /* Each pass through this loop initializes one srcu_node structure. */ + rcu_for_each_node_breadth_first(sp, snp) { + spin_lock_init(&snp->lock); + WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != + ARRAY_SIZE(snp->srcu_data_have_cbs)); + for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { + snp->srcu_have_cbs[i] = 0; + snp->srcu_data_have_cbs[i] = 0; + } + snp->srcu_gp_seq_needed_exp = 0; + snp->grplo = -1; + snp->grphi = -1; + if (snp == &sp->node[0]) { + /* Root node, special case. */ + snp->srcu_parent = NULL; + continue; + } + + /* Non-root node. */ + if (snp == sp->level[level + 1]) + level++; + snp->srcu_parent = sp->level[level - 1] + + (snp - sp->level[level]) / + levelspread[level - 1]; + } + + /* + * Initialize the per-CPU srcu_data array, which feeds into the + * leaves of the srcu_node tree. + */ + WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != + ARRAY_SIZE(sdp->srcu_unlock_count)); + level = rcu_num_lvls - 1; + snp_first = sp->level[level]; + for_each_possible_cpu(cpu) { + sdp = per_cpu_ptr(sp->sda, cpu); + spin_lock_init(&sdp->lock); + rcu_segcblist_init(&sdp->srcu_cblist); + sdp->srcu_cblist_invoking = false; + sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; + sdp->mynode = &snp_first[cpu / levelspread[level]]; + for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { + if (snp->grplo < 0) + snp->grplo = cpu; + snp->grphi = cpu; + } + sdp->cpu = cpu; + INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); + sdp->sp = sp; + sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); + if (is_static) + continue; + + /* Dynamically allocated, better be no srcu_read_locks()! */ + for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) { + sdp->srcu_lock_count[i] = 0; + sdp->srcu_unlock_count[i] = 0; + } + } +} + +/* + * Initialize non-compile-time initialized fields, including the + * associated srcu_node and srcu_data structures. The is_static + * parameter is passed through to init_srcu_struct_nodes(), and + * also tells us that ->sda has already been wired up to srcu_data. + */ +static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) +{ + mutex_init(&sp->srcu_cb_mutex); + mutex_init(&sp->srcu_gp_mutex); + sp->srcu_idx = 0; + sp->srcu_gp_seq = 0; + sp->srcu_barrier_seq = 0; + mutex_init(&sp->srcu_barrier_mutex); + atomic_set(&sp->srcu_barrier_cpu_cnt, 0); + INIT_DELAYED_WORK(&sp->work, process_srcu); + if (!is_static) + sp->sda = alloc_percpu(struct srcu_data); + init_srcu_struct_nodes(sp, is_static); + sp->srcu_gp_seq_needed_exp = 0; + sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ + return sp->sda ? 0 : -ENOMEM; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int __init_srcu_struct(struct srcu_struct *sp, const char *name, + struct lock_class_key *key) +{ + /* Don't re-initialize a lock while it is held. */ + debug_check_no_locks_freed((void *)sp, sizeof(*sp)); + lockdep_init_map(&sp->dep_map, name, key, 0); + spin_lock_init(&sp->gp_lock); + return init_srcu_struct_fields(sp, false); +} +EXPORT_SYMBOL_GPL(__init_srcu_struct); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/** + * init_srcu_struct - initialize a sleep-RCU structure + * @sp: structure to initialize. + * + * Must invoke this on a given srcu_struct before passing that srcu_struct + * to any other function. Each srcu_struct represents a separate domain + * of SRCU protection. + */ +int init_srcu_struct(struct srcu_struct *sp) +{ + spin_lock_init(&sp->gp_lock); + return init_srcu_struct_fields(sp, false); +} +EXPORT_SYMBOL_GPL(init_srcu_struct); + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/* + * First-use initialization of statically allocated srcu_struct + * structure. Wiring up the combining tree is more than can be + * done with compile-time initialization, so this check is added + * to each update-side SRCU primitive. Use ->gp_lock, which -is- + * compile-time initialized, to resolve races involving multiple + * CPUs trying to garner first-use privileges. + */ +static void check_init_srcu_struct(struct srcu_struct *sp) +{ + unsigned long flags; + + WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT); + /* The smp_load_acquire() pairs with the smp_store_release(). */ + if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ + return; /* Already initialized. */ + spin_lock_irqsave(&sp->gp_lock, flags); + if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { + spin_unlock_irqrestore(&sp->gp_lock, flags); + return; + } + init_srcu_struct_fields(sp, true); + spin_unlock_irqrestore(&sp->gp_lock, flags); +} + +/* + * Returns approximate total of the readers' ->srcu_lock_count[] values + * for the rank of per-CPU counters specified by idx. + */ +static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) +{ + int cpu; + unsigned long sum = 0; + + for_each_possible_cpu(cpu) { + struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + + sum += READ_ONCE(cpuc->srcu_lock_count[idx]); + } + return sum; +} + +/* + * Returns approximate total of the readers' ->srcu_unlock_count[] values + * for the rank of per-CPU counters specified by idx. + */ +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) +{ + int cpu; + unsigned long sum = 0; + + for_each_possible_cpu(cpu) { + struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + + sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); + } + return sum; +} + +/* + * Return true if the number of pre-existing readers is determined to + * be zero. + */ +static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) +{ + unsigned long unlocks; + + unlocks = srcu_readers_unlock_idx(sp, idx); + + /* + * Make sure that a lock is always counted if the corresponding + * unlock is counted. Needs to be a smp_mb() as the read side may + * contain a read from a variable that is written to before the + * synchronize_srcu() in the write side. In this case smp_mb()s + * A and B act like the store buffering pattern. + * + * This smp_mb() also pairs with smp_mb() C to prevent accesses + * after the synchronize_srcu() from being executed before the + * grace period ends. + */ + smp_mb(); /* A */ + + /* + * If the locks are the same as the unlocks, then there must have + * been no readers on this index at some time in between. This does + * not mean that there are no more readers, as one could have read + * the current index but not have incremented the lock counter yet. + * + * Possible bug: There is no guarantee that there haven't been + * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were + * counted, meaning that this could return true even if there are + * still active readers. Since there are no memory barriers around + * srcu_flip(), the CPU is not required to increment ->srcu_idx + * before running srcu_readers_unlock_idx(), which means that there + * could be an arbitrarily large number of critical sections that + * execute after srcu_readers_unlock_idx() but use the old value + * of ->srcu_idx. + */ + return srcu_readers_lock_idx(sp, idx) == unlocks; +} + +/** + * srcu_readers_active - returns true if there are readers. and false + * otherwise + * @sp: which srcu_struct to count active readers (holding srcu_read_lock). + * + * Note that this is not an atomic primitive, and can therefore suffer + * severe errors when invoked on an active srcu_struct. That said, it + * can be useful as an error check at cleanup time. + */ +static bool srcu_readers_active(struct srcu_struct *sp) +{ + int cpu; + unsigned long sum = 0; + + for_each_possible_cpu(cpu) { + struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + + sum += READ_ONCE(cpuc->srcu_lock_count[0]); + sum += READ_ONCE(cpuc->srcu_lock_count[1]); + sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); + sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); + } + return sum; +} + +#define SRCU_INTERVAL 1 + +/* + * Return grace-period delay, zero if there are expedited grace + * periods pending, SRCU_INTERVAL otherwise. + */ +static unsigned long srcu_get_delay(struct srcu_struct *sp) +{ + if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), + READ_ONCE(sp->srcu_gp_seq_needed_exp))) + return 0; + return SRCU_INTERVAL; +} + +/** + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +void cleanup_srcu_struct(struct srcu_struct *sp) +{ + int cpu; + + if (WARN_ON(!srcu_get_delay(sp))) + return; /* Leakage unless caller handles error. */ + if (WARN_ON(srcu_readers_active(sp))) + return; /* Leakage unless caller handles error. */ + flush_delayed_work(&sp->work); + for_each_possible_cpu(cpu) + flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); + if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(srcu_readers_active(sp))) { + pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); + return; /* Caller forgot to stop doing call_srcu()? */ + } + free_percpu(sp->sda); + sp->sda = NULL; +} +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + idx = READ_ONCE(sp->srcu_idx) & 0x1; + this_cpu_inc(sp->sda->srcu_lock_count[idx]); + smp_mb(); /* B */ /* Avoid leaking the critical section. */ + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock); + +/* + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + */ +void __srcu_read_unlock(struct srcu_struct *sp, int idx) +{ + smp_mb(); /* C */ /* Avoid leaking the critical section. */ + this_cpu_inc(sp->sda->srcu_unlock_count[idx]); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock); + +/* + * We use an adaptive strategy for synchronize_srcu() and especially for + * synchronize_srcu_expedited(). We spin for a fixed time period + * (defined below) to allow SRCU readers to exit their read-side critical + * sections. If there are still some readers after a few microseconds, + * we repeatedly block for 1-millisecond time periods. + */ +#define SRCU_RETRY_CHECK_DELAY 5 + +/* + * Start an SRCU grace period. + */ +static void srcu_gp_start(struct srcu_struct *sp) +{ + struct srcu_data *sdp = this_cpu_ptr(sp->sda); + int state; + + RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock), + "Invoked srcu_gp_start() without ->gp_lock!"); + WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, + rcu_seq_snap(&sp->srcu_gp_seq)); + smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ + rcu_seq_start(&sp->srcu_gp_seq); + state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + WARN_ON_ONCE(state != SRCU_STATE_SCAN1); +} + +/* + * Track online CPUs to guide callback workqueue placement. + */ +DEFINE_PER_CPU(bool, srcu_online); + +void srcu_online_cpu(unsigned int cpu) +{ + WRITE_ONCE(per_cpu(srcu_online, cpu), true); +} + +void srcu_offline_cpu(unsigned int cpu) +{ + WRITE_ONCE(per_cpu(srcu_online, cpu), false); +} + +/* + * Place the workqueue handler on the specified CPU if online, otherwise + * just run it whereever. This is useful for placing workqueue handlers + * that are to invoke the specified CPU's callbacks. + */ +static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, + struct delayed_work *dwork, + unsigned long delay) +{ + bool ret; + + preempt_disable(); + if (READ_ONCE(per_cpu(srcu_online, cpu))) + ret = queue_delayed_work_on(cpu, wq, dwork, delay); + else + ret = queue_delayed_work(wq, dwork, delay); + preempt_enable(); + return ret; +} + +/* + * Schedule callback invocation for the specified srcu_data structure, + * if possible, on the corresponding CPU. + */ +static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) +{ + srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, + &sdp->work, delay); +} + +/* + * Schedule callback invocation for all srcu_data structures associated + * with the specified srcu_node structure that have callbacks for the + * just-completed grace period, the one corresponding to idx. If possible, + * schedule this invocation on the corresponding CPUs. + */ +static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, + unsigned long mask, unsigned long delay) +{ + int cpu; + + for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { + if (!(mask & (1 << (cpu - snp->grplo)))) + continue; + srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); + } +} + +/* + * Note the end of an SRCU grace period. Initiates callback invocation + * and starts a new grace period if needed. + * + * The ->srcu_cb_mutex acquisition does not protect any data, but + * instead prevents more than one grace period from starting while we + * are initiating callback invocation. This allows the ->srcu_have_cbs[] + * array to have a finite number of elements. + */ +static void srcu_gp_end(struct srcu_struct *sp) +{ + unsigned long cbdelay; + bool cbs; + unsigned long gpseq; + int idx; + int idxnext; + unsigned long mask; + struct srcu_node *snp; + + /* Prevent more than one additional grace period. */ + mutex_lock(&sp->srcu_cb_mutex); + + /* End the current grace period. */ + spin_lock_irq(&sp->gp_lock); + idx = rcu_seq_state(sp->srcu_gp_seq); + WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); + cbdelay = srcu_get_delay(sp); + sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + rcu_seq_end(&sp->srcu_gp_seq); + gpseq = rcu_seq_current(&sp->srcu_gp_seq); + if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) + sp->srcu_gp_seq_needed_exp = gpseq; + spin_unlock_irq(&sp->gp_lock); + mutex_unlock(&sp->srcu_gp_mutex); + /* A new grace period can start at this point. But only one. */ + + /* Initiate callback invocation as needed. */ + idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); + idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); + rcu_for_each_node_breadth_first(sp, snp) { + spin_lock_irq(&snp->lock); + cbs = false; + if (snp >= sp->level[rcu_num_lvls - 1]) + cbs = snp->srcu_have_cbs[idx] == gpseq; + snp->srcu_have_cbs[idx] = gpseq; + rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); + if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq)) + snp->srcu_gp_seq_needed_exp = gpseq; + mask = snp->srcu_data_have_cbs[idx]; + snp->srcu_data_have_cbs[idx] = 0; + spin_unlock_irq(&snp->lock); + if (cbs) { + smp_mb(); /* GP end before CB invocation. */ + srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); + } + } + + /* Callback initiation done, allow grace periods after next. */ + mutex_unlock(&sp->srcu_cb_mutex); + + /* Start a new grace period if needed. */ + spin_lock_irq(&sp->gp_lock); + gpseq = rcu_seq_current(&sp->srcu_gp_seq); + if (!rcu_seq_state(gpseq) && + ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { + srcu_gp_start(sp); + spin_unlock_irq(&sp->gp_lock); + /* Throttle expedited grace periods: Should be rare! */ + srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff + ? 0 : SRCU_INTERVAL); + } else { + spin_unlock_irq(&sp->gp_lock); + } +} + +/* + * Funnel-locking scheme to scalably mediate many concurrent expedited + * grace-period requests. This function is invoked for the first known + * expedited request for a grace period that has already been requested, + * but without expediting. To start a completely new grace period, + * whether expedited or not, use srcu_funnel_gp_start() instead. + */ +static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, + unsigned long s) +{ + unsigned long flags; + + for (; snp != NULL; snp = snp->srcu_parent) { + if (rcu_seq_done(&sp->srcu_gp_seq, s) || + ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) + return; + spin_lock_irqsave(&snp->lock, flags); + if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { + spin_unlock_irqrestore(&snp->lock, flags); + return; + } + WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); + spin_unlock_irqrestore(&snp->lock, flags); + } + spin_lock_irqsave(&sp->gp_lock, flags); + if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) + sp->srcu_gp_seq_needed_exp = s; + spin_unlock_irqrestore(&sp->gp_lock, flags); +} + +/* + * Funnel-locking scheme to scalably mediate many concurrent grace-period + * requests. The winner has to do the work of actually starting grace + * period s. Losers must either ensure that their desired grace-period + * number is recorded on at least their leaf srcu_node structure, or they + * must take steps to invoke their own callbacks. + */ +static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, + unsigned long s, bool do_norm) +{ + unsigned long flags; + int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); + struct srcu_node *snp = sdp->mynode; + unsigned long snp_seq; + + /* Each pass through the loop does one level of the srcu_node tree. */ + for (; snp != NULL; snp = snp->srcu_parent) { + if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) + return; /* GP already done and CBs recorded. */ + spin_lock_irqsave(&snp->lock, flags); + if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { + snp_seq = snp->srcu_have_cbs[idx]; + if (snp == sdp->mynode && snp_seq == s) + snp->srcu_data_have_cbs[idx] |= sdp->grpmask; + spin_unlock_irqrestore(&snp->lock, flags); + if (snp == sdp->mynode && snp_seq != s) { + smp_mb(); /* CBs after GP! */ + srcu_schedule_cbs_sdp(sdp, do_norm + ? SRCU_INTERVAL + : 0); + return; + } + if (!do_norm) + srcu_funnel_exp_start(sp, snp, s); + return; + } + snp->srcu_have_cbs[idx] = s; + if (snp == sdp->mynode) + snp->srcu_data_have_cbs[idx] |= sdp->grpmask; + if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) + snp->srcu_gp_seq_needed_exp = s; + spin_unlock_irqrestore(&snp->lock, flags); + } + + /* Top of tree, must ensure the grace period will be started. */ + spin_lock_irqsave(&sp->gp_lock, flags); + if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { + /* + * Record need for grace period s. Pair with load + * acquire setting up for initialization. + */ + smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ + } + if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) + sp->srcu_gp_seq_needed_exp = s; + + /* If grace period not already done and none in progress, start it. */ + if (!rcu_seq_done(&sp->srcu_gp_seq, s) && + rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { + WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); + srcu_gp_start(sp); + queue_delayed_work(system_power_efficient_wq, &sp->work, + srcu_get_delay(sp)); + } + spin_unlock_irqrestore(&sp->gp_lock, flags); +} + +/* + * Wait until all readers counted by array index idx complete, but + * loop an additional time if there is an expedited grace period pending. + * The caller must ensure that ->srcu_idx is not changed while checking. + */ +static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) +{ + for (;;) { + if (srcu_readers_active_idx_check(sp, idx)) + return true; + if (--trycount + !srcu_get_delay(sp) <= 0) + return false; + udelay(SRCU_RETRY_CHECK_DELAY); + } +} + +/* + * Increment the ->srcu_idx counter so that future SRCU readers will + * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows + * us to wait for pre-existing readers in a starvation-free manner. + */ +static void srcu_flip(struct srcu_struct *sp) +{ + WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); + + /* + * Ensure that if the updater misses an __srcu_read_unlock() + * increment, that task's next __srcu_read_lock() will see the + * above counter update. Note that both this memory barrier + * and the one in srcu_readers_active_idx_check() provide the + * guarantee for __srcu_read_lock(). + */ + smp_mb(); /* D */ /* Pairs with C. */ +} + +/* + * If SRCU is likely idle, return true, otherwise return false. + * + * Note that it is OK for several current from-idle requests for a new + * grace period from idle to specify expediting because they will all end + * up requesting the same grace period anyhow. So no loss. + * + * Note also that if any CPU (including the current one) is still invoking + * callbacks, this function will nevertheless say "idle". This is not + * ideal, but the overhead of checking all CPUs' callback lists is even + * less ideal, especially on large systems. Furthermore, the wakeup + * can happen before the callback is fully removed, so we have no choice + * but to accept this type of error. + * + * This function is also subject to counter-wrap errors, but let's face + * it, if this function was preempted for enough time for the counters + * to wrap, it really doesn't matter whether or not we expedite the grace + * period. The extra overhead of a needlessly expedited grace period is + * negligible when amoritized over that time period, and the extra latency + * of a needlessly non-expedited grace period is similarly negligible. + */ +static bool srcu_might_be_idle(struct srcu_struct *sp) +{ + unsigned long curseq; + unsigned long flags; + struct srcu_data *sdp; + unsigned long t; + + /* If the local srcu_data structure has callbacks, not idle. */ + local_irq_save(flags); + sdp = this_cpu_ptr(sp->sda); + if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { + local_irq_restore(flags); + return false; /* Callbacks already present, so not idle. */ + } + local_irq_restore(flags); + + /* + * No local callbacks, so probabalistically probe global state. + * Exact information would require acquiring locks, which would + * kill scalability, hence the probabalistic nature of the probe. + */ + + /* First, see if enough time has passed since the last GP. */ + t = ktime_get_mono_fast_ns(); + if (exp_holdoff == 0 || + time_in_range_open(t, sp->srcu_last_gp_end, + sp->srcu_last_gp_end + exp_holdoff)) + return false; /* Too soon after last GP. */ + + /* Next, check for probable idleness. */ + curseq = rcu_seq_current(&sp->srcu_gp_seq); + smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ + if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) + return false; /* Grace period in progress, so not idle. */ + smp_mb(); /* Order ->srcu_gp_seq with prior access. */ + if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) + return false; /* GP # changed, so not idle. */ + return true; /* With reasonable probability, idle! */ +} + +/* + * Enqueue an SRCU callback on the srcu_data structure associated with + * the current CPU and the specified srcu_struct structure, initiating + * grace-period processing if it is not already running. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing SRCU read-side critical section. On systems with + * more than one CPU, this means that when "func()" is invoked, each CPU + * is guaranteed to have executed a full memory barrier since the end of + * its last corresponding SRCU read-side critical section whose beginning + * preceded the call to call_rcu(). It also means that each CPU executing + * an SRCU read-side critical section that continues beyond the start of + * "func()" must have executed a memory barrier after the call_rcu() + * but before the beginning of that SRCU read-side critical section. + * Note that these guarantees include CPUs that are offline, idle, or + * executing in user mode, as well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting SRCU callback function "func()", then both CPU A and CPU + * B are guaranteed to execute a full memory barrier during the time + * interval between the call to call_rcu() and the invocation of "func()". + * This guarantee applies even if CPU A and CPU B are the same CPU (but + * again only if the system has more than one CPU). + * + * Of course, these guarantees apply only for invocations of call_srcu(), + * srcu_read_lock(), and srcu_read_unlock() that are all passed the same + * srcu_struct structure. + */ +void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, + rcu_callback_t func, bool do_norm) +{ + unsigned long flags; + bool needexp = false; + bool needgp = false; + unsigned long s; + struct srcu_data *sdp; + + check_init_srcu_struct(sp); + rhp->func = func; + local_irq_save(flags); + sdp = this_cpu_ptr(sp->sda); + spin_lock(&sdp->lock); + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + s = rcu_seq_snap(&sp->srcu_gp_seq); + (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); + if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { + sdp->srcu_gp_seq_needed = s; + needgp = true; + } + if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { + sdp->srcu_gp_seq_needed_exp = s; + needexp = true; + } + spin_unlock_irqrestore(&sdp->lock, flags); + if (needgp) + srcu_funnel_gp_start(sp, sdp, s, do_norm); + else if (needexp) + srcu_funnel_exp_start(sp, sdp->mynode, s); +} + +void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, + rcu_callback_t func) +{ + __call_srcu(sp, rhp, func, true); +} +EXPORT_SYMBOL_GPL(call_srcu); + +/* + * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). + */ +static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) +{ + struct rcu_synchronize rcu; + + RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || + lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); + + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) + return; + might_sleep(); + check_init_srcu_struct(sp); + init_completion(&rcu.completion); + init_rcu_head_on_stack(&rcu.head); + __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); + wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); +} + +/** + * synchronize_srcu_expedited - Brute-force SRCU grace period + * @sp: srcu_struct with which to synchronize. + * + * Wait for an SRCU grace period to elapse, but be more aggressive about + * spinning rather than blocking when waiting. + * + * Note that synchronize_srcu_expedited() has the same deadlock and + * memory-ordering properties as does synchronize_srcu(). + */ +void synchronize_srcu_expedited(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, rcu_gp_is_normal()); +} +EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); + +/** + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + * @sp: srcu_struct with which to synchronize. + * + * Wait for the count to drain to zero of both indexes. To avoid the + * possible starvation of synchronize_srcu(), it waits for the count of + * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, + * and then flip the srcu_idx and wait for the count of the other index. + * + * Can block; must be called from process context. + * + * Note that it is illegal to call synchronize_srcu() from the corresponding + * SRCU read-side critical section; doing so will result in deadlock. + * However, it is perfectly legal to call synchronize_srcu() on one + * srcu_struct from some other srcu_struct's read-side critical section, + * as long as the resulting graph of srcu_structs is acyclic. + * + * There are memory-ordering constraints implied by synchronize_srcu(). + * On systems with more than one CPU, when synchronize_srcu() returns, + * each CPU is guaranteed to have executed a full memory barrier since + * the end of its last corresponding SRCU-sched read-side critical section + * whose beginning preceded the call to synchronize_srcu(). In addition, + * each CPU having an SRCU read-side critical section that extends beyond + * the return from synchronize_srcu() is guaranteed to have executed a + * full memory barrier after the beginning of synchronize_srcu() and before + * the beginning of that SRCU read-side critical section. Note that these + * guarantees include CPUs that are offline, idle, or executing in user mode, + * as well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked synchronize_srcu(), which returned + * to its caller on CPU B, then both CPU A and CPU B are guaranteed + * to have executed a full memory barrier during the execution of + * synchronize_srcu(). This guarantee applies even if CPU A and CPU B + * are the same CPU, but again only if the system has more than one CPU. + * + * Of course, these memory-ordering guarantees apply only when + * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are + * passed the same srcu_struct structure. + * + * If SRCU is likely idle, expedite the first request. This semantic + * was provided by Classic SRCU, and is relied upon by its users, so TREE + * SRCU must also provide it. Note that detecting idleness is heuristic + * and subject to both false positives and negatives. + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) + synchronize_srcu_expedited(sp); + else + __synchronize_srcu(sp, true); +} +EXPORT_SYMBOL_GPL(synchronize_srcu); + +/* + * Callback function for srcu_barrier() use. + */ +static void srcu_barrier_cb(struct rcu_head *rhp) +{ + struct srcu_data *sdp; + struct srcu_struct *sp; + + sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); + sp = sdp->sp; + if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) + complete(&sp->srcu_barrier_completion); +} + +/** + * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. + * @sp: srcu_struct on which to wait for in-flight callbacks. + */ +void srcu_barrier(struct srcu_struct *sp) +{ + int cpu; + struct srcu_data *sdp; + unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); + + check_init_srcu_struct(sp); + mutex_lock(&sp->srcu_barrier_mutex); + if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { + smp_mb(); /* Force ordering following return. */ + mutex_unlock(&sp->srcu_barrier_mutex); + return; /* Someone else did our work for us. */ + } + rcu_seq_start(&sp->srcu_barrier_seq); + init_completion(&sp->srcu_barrier_completion); + + /* Initial count prevents reaching zero until all CBs are posted. */ + atomic_set(&sp->srcu_barrier_cpu_cnt, 1); + + /* + * Each pass through this loop enqueues a callback, but only + * on CPUs already having callbacks enqueued. Note that if + * a CPU already has callbacks enqueue, it must have already + * registered the need for a future grace period, so all we + * need do is enqueue a callback that will use the same + * grace period as the last callback already in the queue. + */ + for_each_possible_cpu(cpu) { + sdp = per_cpu_ptr(sp->sda, cpu); + spin_lock_irq(&sdp->lock); + atomic_inc(&sp->srcu_barrier_cpu_cnt); + sdp->srcu_barrier_head.func = srcu_barrier_cb; + if (!rcu_segcblist_entrain(&sdp->srcu_cblist, + &sdp->srcu_barrier_head, 0)) + atomic_dec(&sp->srcu_barrier_cpu_cnt); + spin_unlock_irq(&sdp->lock); + } + + /* Remove the initial count, at which point reaching zero can happen. */ + if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) + complete(&sp->srcu_barrier_completion); + wait_for_completion(&sp->srcu_barrier_completion); + + rcu_seq_end(&sp->srcu_barrier_seq); + mutex_unlock(&sp->srcu_barrier_mutex); +} +EXPORT_SYMBOL_GPL(srcu_barrier); + +/** + * srcu_batches_completed - return batches completed. + * @sp: srcu_struct on which to report batch completion. + * + * Report the number of batches, correlated with, but not necessarily + * precisely the same as, the number of grace periods that have elapsed. + */ +unsigned long srcu_batches_completed(struct srcu_struct *sp) +{ + return sp->srcu_idx; +} +EXPORT_SYMBOL_GPL(srcu_batches_completed); + +/* + * Core SRCU state machine. Push state bits of ->srcu_gp_seq + * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has + * completed in that state. + */ +static void srcu_advance_state(struct srcu_struct *sp) +{ + int idx; + + mutex_lock(&sp->srcu_gp_mutex); + + /* + * Because readers might be delayed for an extended period after + * fetching ->srcu_idx for their index, at any point in time there + * might well be readers using both idx=0 and idx=1. We therefore + * need to wait for readers to clear from both index values before + * invoking a callback. + * + * The load-acquire ensures that we see the accesses performed + * by the prior grace period. + */ + idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ + if (idx == SRCU_STATE_IDLE) { + spin_lock_irq(&sp->gp_lock); + if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { + WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); + spin_unlock_irq(&sp->gp_lock); + mutex_unlock(&sp->srcu_gp_mutex); + return; + } + idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + if (idx == SRCU_STATE_IDLE) + srcu_gp_start(sp); + spin_unlock_irq(&sp->gp_lock); + if (idx != SRCU_STATE_IDLE) { + mutex_unlock(&sp->srcu_gp_mutex); + return; /* Someone else started the grace period. */ + } + } + + if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { + idx = 1 ^ (sp->srcu_idx & 1); + if (!try_check_zero(sp, idx, 1)) { + mutex_unlock(&sp->srcu_gp_mutex); + return; /* readers present, retry later. */ + } + srcu_flip(sp); + rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); + } + + if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { + + /* + * SRCU read-side critical sections are normally short, + * so check at least twice in quick succession after a flip. + */ + idx = 1 ^ (sp->srcu_idx & 1); + if (!try_check_zero(sp, idx, 2)) { + mutex_unlock(&sp->srcu_gp_mutex); + return; /* readers present, retry later. */ + } + srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ + } +} + +/* + * Invoke a limited number of SRCU callbacks that have passed through + * their grace period. If there are more to do, SRCU will reschedule + * the workqueue. Note that needed memory barriers have been executed + * in this task's context by srcu_readers_active_idx_check(). + */ +static void srcu_invoke_callbacks(struct work_struct *work) +{ + bool more; + struct rcu_cblist ready_cbs; + struct rcu_head *rhp; + struct srcu_data *sdp; + struct srcu_struct *sp; + + sdp = container_of(work, struct srcu_data, work.work); + sp = sdp->sp; + rcu_cblist_init(&ready_cbs); + spin_lock_irq(&sdp->lock); + smp_mb(); /* Old grace periods before callback invocation! */ + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + if (sdp->srcu_cblist_invoking || + !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { + spin_unlock_irq(&sdp->lock); + return; /* Someone else on the job or nothing to do. */ + } + + /* We are on the job! Extract and invoke ready callbacks. */ + sdp->srcu_cblist_invoking = true; + rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); + spin_unlock_irq(&sdp->lock); + rhp = rcu_cblist_dequeue(&ready_cbs); + for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { + local_bh_disable(); + rhp->func(rhp); + local_bh_enable(); + } + + /* + * Update counts, accelerate new callbacks, and if needed, + * schedule another round of callback invocation. + */ + spin_lock_irq(&sdp->lock); + rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); + (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, + rcu_seq_snap(&sp->srcu_gp_seq)); + sdp->srcu_cblist_invoking = false; + more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); + spin_unlock_irq(&sdp->lock); + if (more) + srcu_schedule_cbs_sdp(sdp, 0); +} + +/* + * Finished one round of SRCU grace period. Start another if there are + * more SRCU callbacks queued, otherwise put SRCU into not-running state. + */ +static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) +{ + bool pushgp = true; + + spin_lock_irq(&sp->gp_lock); + if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { + if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { + /* All requests fulfilled, time to go idle. */ + pushgp = false; + } + } else if (!rcu_seq_state(sp->srcu_gp_seq)) { + /* Outstanding request and no GP. Start one. */ + srcu_gp_start(sp); + } + spin_unlock_irq(&sp->gp_lock); + + if (pushgp) + queue_delayed_work(system_power_efficient_wq, &sp->work, delay); +} + +/* + * This is the work-queue function that handles SRCU grace periods. + */ +void process_srcu(struct work_struct *work) +{ + struct srcu_struct *sp; + + sp = container_of(work, struct srcu_struct, work.work); + + srcu_advance_state(sp); + srcu_reschedule(sp, srcu_get_delay(sp)); +} +EXPORT_SYMBOL_GPL(process_srcu); + +void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = rcu_seq_ctr(sp->srcu_gp_seq); + *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); +} +EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 6ad330dbbae2ec3fa4f74fdc1cc1a2ff1154bc9d..e5385731e39109d9a27a47d123ac44908402fcce 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -79,7 +79,7 @@ EXPORT_SYMBOL(__rcu_is_watching); */ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { - RCU_TRACE(reset_cpu_stall_ticks(rcp)); + RCU_TRACE(reset_cpu_stall_ticks(rcp);) if (rcp->donetail != rcp->curtail) { rcp->donetail = rcp->curtail; return 1; @@ -125,7 +125,7 @@ void rcu_bh_qs(void) */ void rcu_check_callbacks(int user) { - RCU_TRACE(check_cpu_stalls()); + RCU_TRACE(check_cpu_stalls();) if (user) rcu_sched_qs(); else if (!in_softirq()) @@ -143,7 +143,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) const char *rn = NULL; struct rcu_head *next, *list; unsigned long flags; - RCU_TRACE(int cb_count = 0); + RCU_TRACE(int cb_count = 0;) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); @@ -152,7 +152,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) local_irq_restore(flags); return; } - RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1);) list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; @@ -162,7 +162,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) local_irq_restore(flags); /* Invoke the callbacks on the local list. */ - RCU_TRACE(rn = rcp->name); + RCU_TRACE(rn = rcp->name;) while (list) { next = list->next; prefetch(next); @@ -171,9 +171,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) __rcu_reclaim(rn, list); local_bh_enable(); list = next; - RCU_TRACE(cb_count++); + RCU_TRACE(cb_count++;) } - RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); + RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);) RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), is_idle_task(current), @@ -221,7 +221,7 @@ static void __call_rcu(struct rcu_head *head, local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; - RCU_TRACE(rcp->qlen++); + RCU_TRACE(rcp->qlen++;) local_irq_restore(flags); if (unlikely(is_idle_task(current))) { @@ -254,8 +254,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); - RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk)); - RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk)); + RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk);) + RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk);) rcu_early_boot_tests(); } diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index c64b827ecbca19656395e873ca06da0c92a6298e..371034e77f87c8c6a7942495284ec3c1331dad18 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -52,7 +52,7 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { RCU_TRACE(.name = "rcu_bh") }; -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) #include int rcu_scheduler_active __read_mostly; @@ -65,15 +65,16 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. * The reason for this is that Tiny RCU does not need kthreads, so does * not have to care about the fact that the scheduler is half-initialized - * at a certain phase of the boot process. + * at a certain phase of the boot process. Unless SRCU is in the mix. */ void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU) + ? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING; } -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ #ifdef CONFIG_RCU_TRACE @@ -162,8 +163,8 @@ static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) static void check_cpu_stalls(void) { - RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk)); - RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk)); + RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk);) + RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk);) } #endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 50fee7689e7125787cb3a02ffaa6e9837b1b05d2..e354e475e645ff4f0fab186913a274e931da0db3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "tree.h" #include "rcu.h" @@ -97,8 +98,8 @@ struct rcu_state sname##_state = { \ .gpnum = 0UL - 300UL, \ .completed = 0UL - 300UL, \ .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ - .orphan_nxttail = &sname##_state.orphan_nxtlist, \ - .orphan_donetail = &sname##_state.orphan_donelist, \ + .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \ + .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .name = RCU_STATE_NAME(sname), \ .abbr = sabbr, \ @@ -123,7 +124,7 @@ static int rcu_fanout_leaf = RCU_FANOUT_LEAF; module_param(rcu_fanout_leaf, int, 0444); int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; /* Number of rcu_nodes at specified level. */ -static int num_rcu_lvl[] = NUM_RCU_LVL_INIT; +int num_rcu_lvl[] = NUM_RCU_LVL_INIT; int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ /* panic() on RCU Stall sysctl. */ int sysctl_panic_on_rcu_stall __read_mostly; @@ -199,7 +200,7 @@ static const int gp_cleanup_delay; /* * Number of grace periods between delays, normalized by the duration of - * the delay. The longer the the delay, the more the grace periods between + * the delay. The longer the delay, the more the grace periods between * each delay. The reason for this normalization is that it means that, * for non-zero delays, the overall slowdown of grace periods is constant * regardless of the duration of the delay. This arrangement balances @@ -272,17 +273,39 @@ void rcu_bh_qs(void) } } -static DEFINE_PER_CPU(int, rcu_sched_qs_mask); +/* + * Steal a bit from the bottom of ->dynticks for idle entry/exit + * control. Initially this is for TLB flushing. + */ +#define RCU_DYNTICK_CTRL_MASK 0x1 +#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) +#ifndef rcu_eqs_special_exit +#define rcu_eqs_special_exit() do { } while (0) +#endif static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, - .dynticks = ATOMIC_INIT(1), + .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), #ifdef CONFIG_NO_HZ_FULL_SYSIDLE .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, .dynticks_idle = ATOMIC_INIT(1), #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; +/* + * There's a few places, currently just in the tracing infrastructure, + * that uses rcu_irq_enter() to make sure RCU is watching. But there's + * a small location where that will not even work. In those cases + * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter() + * can be called. + */ +static DEFINE_PER_CPU(bool, disable_rcu_irq_enter); + +bool rcu_irq_enter_disabled(void) +{ + return this_cpu_read(disable_rcu_irq_enter); +} + /* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. @@ -290,15 +313,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { static void rcu_dynticks_eqs_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int special; + int seq; /* - * CPUs seeing atomic_inc_return() must see prior RCU read-side + * CPUs seeing atomic_add_return() must see prior RCU read-side * critical sections, and we also must force ordering with the * next idle sojourn. */ - special = atomic_inc_return(&rdtp->dynticks); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1); + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); + /* Better be in an extended quiescent state! */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + (seq & RCU_DYNTICK_CTRL_CTR)); + /* Better not have special action (TLB flush) pending! */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + (seq & RCU_DYNTICK_CTRL_MASK)); } /* @@ -308,15 +336,22 @@ static void rcu_dynticks_eqs_enter(void) static void rcu_dynticks_eqs_exit(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int special; + int seq; /* - * CPUs seeing atomic_inc_return() must see prior idle sojourns, + * CPUs seeing atomic_add_return() must see prior idle sojourns, * and we also must force ordering with the next RCU read-side * critical section. */ - special = atomic_inc_return(&rdtp->dynticks); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1)); + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !(seq & RCU_DYNTICK_CTRL_CTR)); + if (seq & RCU_DYNTICK_CTRL_MASK) { + atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks); + smp_mb__after_atomic(); /* _exit after clearing mask. */ + /* Prefer duplicate flushes to losing a flush. */ + rcu_eqs_special_exit(); + } } /* @@ -333,9 +368,9 @@ static void rcu_dynticks_eqs_online(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - if (atomic_read(&rdtp->dynticks) & 0x1) + if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR) return; - atomic_add(0x1, &rdtp->dynticks); + atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); } /* @@ -347,7 +382,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - return !(atomic_read(&rdtp->dynticks) & 0x1); + return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR); } /* @@ -358,7 +393,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp) { int snap = atomic_add_return(0, &rdtp->dynticks); - return snap; + return snap & ~RCU_DYNTICK_CTRL_MASK; } /* @@ -367,7 +402,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp) */ static bool rcu_dynticks_in_eqs(int snap) { - return !(snap & 0x1); + return !(snap & RCU_DYNTICK_CTRL_CTR); } /* @@ -387,14 +422,34 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap) static void rcu_dynticks_momentary_idle(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int special = atomic_add_return(2, &rdtp->dynticks); + int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, + &rdtp->dynticks); /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(special & 0x1)); + WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); } -DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); -EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); +/* + * Set the special (bottom) bit of the specified CPU so that it + * will take special action (such as flushing its TLB) on the + * next exit from an extended quiescent state. Returns true if + * the bit was successfully set, or false if the CPU was not in + * an extended quiescent state. + */ +bool rcu_eqs_special_set(int cpu) +{ + int old; + int new; + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + do { + old = atomic_read(&rdtp->dynticks); + if (old & RCU_DYNTICK_CTRL_CTR) + return false; + new = old | RCU_DYNTICK_CTRL_MASK; + } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old); + return true; +} /* * Let the RCU core know that this CPU has gone through the scheduler, @@ -403,44 +458,14 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); * memory barriers to let the RCU core know about it, regardless of what * this CPU might (or might not) do in the near future. * - * We inform the RCU core by emulating a zero-duration dyntick-idle - * period, which we in turn do by incrementing the ->dynticks counter - * by two. + * We inform the RCU core by emulating a zero-duration dyntick-idle period. * * The caller must have disabled interrupts. */ static void rcu_momentary_dyntick_idle(void) { - struct rcu_data *rdp; - int resched_mask; - struct rcu_state *rsp; - - /* - * Yes, we can lose flag-setting operations. This is OK, because - * the flag will be set again after some delay. - */ - resched_mask = raw_cpu_read(rcu_sched_qs_mask); - raw_cpu_write(rcu_sched_qs_mask, 0); - - /* Find the flavor that needs a quiescent state. */ - for_each_rcu_flavor(rsp) { - rdp = raw_cpu_ptr(rsp->rda); - if (!(resched_mask & rsp->flavor_mask)) - continue; - smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ - if (READ_ONCE(rdp->mynode->completed) != - READ_ONCE(rdp->cond_resched_completed)) - continue; - - /* - * Pretend to be momentarily idle for the quiescent state. - * This allows the grace-period kthread to record the - * quiescent state, with no need for this CPU to do anything - * further. - */ - rcu_dynticks_momentary_idle(); - break; - } + raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); + rcu_dynticks_momentary_idle(); } /* @@ -448,14 +473,22 @@ static void rcu_momentary_dyntick_idle(void) * and requires special handling for preemptible RCU. * The caller must have disabled interrupts. */ -void rcu_note_context_switch(void) +void rcu_note_context_switch(bool preempt) { barrier(); /* Avoid RCU read-side critical sections leaking down. */ trace_rcu_utilization(TPS("Start context switch")); rcu_sched_qs(); rcu_preempt_note_context_switch(); - if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + /* Load rcu_urgent_qs before other flags. */ + if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) + goto out; + this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); + if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) rcu_momentary_dyntick_idle(); + this_cpu_inc(rcu_dynticks.rcu_qs_ctr); + if (!preempt) + rcu_note_voluntary_context_switch_lite(current); +out: trace_rcu_utilization(TPS("End context switch")); barrier(); /* Avoid RCU read-side critical sections leaking up. */ } @@ -478,29 +511,26 @@ void rcu_all_qs(void) { unsigned long flags; + if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs)) + return; + preempt_disable(); + /* Load rcu_urgent_qs before other flags. */ + if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) { + preempt_enable(); + return; + } + this_cpu_write(rcu_dynticks.rcu_urgent_qs, false); barrier(); /* Avoid RCU read-side critical sections leaking down. */ - if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) { + if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) { local_irq_save(flags); rcu_momentary_dyntick_idle(); local_irq_restore(flags); } - if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) { - /* - * Yes, we just checked a per-CPU variable with preemption - * enabled, so we might be migrated to some other CPU at - * this point. That is OK because in that case, the - * migration will supply the needed quiescent state. - * We might end up needlessly disabling preemption and - * invoking rcu_sched_qs() on the destination CPU, but - * the probability and cost are both quite low, so this - * should not be a problem in practice. - */ - preempt_disable(); + if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) rcu_sched_qs(); - preempt_enable(); - } - this_cpu_inc(rcu_qs_ctr); + this_cpu_inc(rcu_dynticks.rcu_qs_ctr); barrier(); /* Avoid RCU read-side critical sections leaking up. */ + preempt_enable(); } EXPORT_SYMBOL_GPL(rcu_all_qs); @@ -689,15 +719,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, default: break; } - if (rsp != NULL) { - *flags = READ_ONCE(rsp->gp_flags); - *gpnum = READ_ONCE(rsp->gpnum); - *completed = READ_ONCE(rsp->completed); + if (rsp == NULL) return; - } - *flags = 0; - *gpnum = 0; - *completed = 0; + *flags = READ_ONCE(rsp->gp_flags); + *gpnum = READ_ONCE(rsp->gpnum); + *completed = READ_ONCE(rsp->completed); } EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); @@ -712,16 +738,6 @@ void rcutorture_record_progress(unsigned long vernum) } EXPORT_SYMBOL_GPL(rcutorture_record_progress); -/* - * Does the CPU have callbacks ready to be invoked? - */ -static int -cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) -{ - return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] && - rdp->nxttail[RCU_NEXT_TAIL] != NULL; -} - /* * Return the root node of the specified rcu_state structure. */ @@ -752,44 +768,39 @@ static int rcu_future_needs_gp(struct rcu_state *rsp) static bool cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) { - int i; - if (rcu_gp_in_progress(rsp)) return false; /* No, a grace period is already in progress. */ if (rcu_future_needs_gp(rsp)) return true; /* Yes, a no-CBs CPU needs one. */ - if (!rdp->nxttail[RCU_NEXT_TAIL]) + if (!rcu_segcblist_is_enabled(&rdp->cblist)) return false; /* No, this is a no-CBs (or offline) CPU. */ - if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) + if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) return true; /* Yes, CPU has newly registered callbacks. */ - for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) - if (rdp->nxttail[i - 1] != rdp->nxttail[i] && - ULONG_CMP_LT(READ_ONCE(rsp->completed), - rdp->nxtcompleted[i])) - return true; /* Yes, CBs for future grace period. */ + if (rcu_segcblist_future_gp_needed(&rdp->cblist, + READ_ONCE(rsp->completed))) + return true; /* Yes, CBs for future grace period. */ return false; /* No grace period needed. */ } /* - * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state + * rcu_eqs_enter_common - current CPU is entering an extended quiescent state * - * If the new value of the ->dynticks_nesting counter now is zero, - * we really have entered idle, and must do the appropriate accounting. - * The caller must have disabled interrupts. + * Enter idle, doing appropriate accounting. The caller must have + * disabled interrupts. */ -static void rcu_eqs_enter_common(long long oldval, bool user) +static void rcu_eqs_enter_common(bool user) { struct rcu_state *rsp; struct rcu_data *rdp; - RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); + trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -800,7 +811,10 @@ static void rcu_eqs_enter_common(long long oldval, bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - rcu_dynticks_eqs_enter(); + __this_cpu_inc(disable_rcu_irq_enter); + rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */ + rcu_dynticks_eqs_enter(); /* After this, tracing works again. */ + __this_cpu_dec(disable_rcu_irq_enter); rcu_dynticks_task_enter(); /* @@ -821,19 +835,15 @@ static void rcu_eqs_enter_common(long long oldval, bool user) */ static void rcu_eqs_enter(bool user) { - long long oldval; struct rcu_dynticks *rdtp; rdtp = this_cpu_ptr(&rcu_dynticks); - oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (oldval & DYNTICK_TASK_NEST_MASK) == 0); - if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { - rdtp->dynticks_nesting = 0; - rcu_eqs_enter_common(oldval, user); - } else { + (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); + if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rcu_eqs_enter_common(user); + else rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; - } } /** @@ -892,19 +902,18 @@ void rcu_user_enter(void) */ void rcu_irq_exit(void) { - long long oldval; struct rcu_dynticks *rdtp; RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); rdtp = this_cpu_ptr(&rcu_dynticks); - oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting--; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting < 0); - if (rdtp->dynticks_nesting) - trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); - else - rcu_eqs_enter_common(oldval, true); + rdtp->dynticks_nesting < 1); + if (rdtp->dynticks_nesting <= 1) { + rcu_eqs_enter_common(true); + } else { + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); + rdtp->dynticks_nesting--; + } rcu_sysidle_enter(1); } @@ -1150,6 +1159,24 @@ bool notrace rcu_is_watching(void) } EXPORT_SYMBOL_GPL(rcu_is_watching); +/* + * If a holdout task is actually running, request an urgent quiescent + * state from its CPU. This is unsynchronized, so migrations can cause + * the request to go to the wrong CPU. Which is OK, all that will happen + * is that the CPU's next context switch will be a bit slower and next + * time around this task will generate another request. + */ +void rcu_request_urgent_qs_task(struct task_struct *t) +{ + int cpu; + + barrier(); + cpu = task_cpu(t); + if (!task_curr(t)) + return; /* This task is not running on that CPU. */ + smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true); +} + #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* @@ -1235,7 +1262,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, bool *isidle, unsigned long *maxj) { unsigned long jtsq; - int *rcrmp; + bool *rnhqp; + bool *ruqp; unsigned long rjtsc; struct rcu_node *rnp; @@ -1271,11 +1299,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * might not be the case for nohz_full CPUs looping in the kernel. */ rnp = rdp->mynode; + ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && - READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) && + READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) && READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); return 1; + } else { + /* Load rcu_qs_ctr before store to rcu_urgent_qs. */ + smp_store_release(ruqp, true); } /* Check for the CPU being offline. */ @@ -1292,7 +1324,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * in-kernel CPU-bound tasks cannot advance grace periods. * So if the grace period is old enough, make the CPU pay attention. * Note that the unsynchronized assignments to the per-CPU - * rcu_sched_qs_mask variable are safe. Yes, setting of + * rcu_need_heavy_qs variable are safe. Yes, setting of * bits can be lost, but they will be set again on the next * force-quiescent-state pass. So lost bit sets do not result * in incorrect behavior, merely in a grace period lasting @@ -1306,16 +1338,13 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * is set too high, we override with half of the RCU CPU stall * warning delay. */ - rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); - if (time_after(jiffies, rdp->rsp->gp_start + jtsq) || - time_after(jiffies, rdp->rsp->jiffies_resched)) { - if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { - WRITE_ONCE(rdp->cond_resched_completed, - READ_ONCE(rdp->mynode->completed)); - smp_mb(); /* ->cond_resched_completed before *rcrmp. */ - WRITE_ONCE(*rcrmp, - READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask); - } + rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); + if (!READ_ONCE(*rnhqp) && + (time_after(jiffies, rdp->rsp->gp_start + jtsq) || + time_after(jiffies, rdp->rsp->jiffies_resched))) { + WRITE_ONCE(*rnhqp, true); + /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ + smp_store_release(ruqp, true); rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ } @@ -1475,7 +1504,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) print_cpu_stall_info_end(); for_each_possible_cpu(cpu) - totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; + totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, + cpu)->cblist); pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), (long)rsp->gpnum, (long)rsp->completed, totqlen); @@ -1529,7 +1559,8 @@ static void print_cpu_stall(struct rcu_state *rsp) print_cpu_stall_info(rsp, smp_processor_id()); print_cpu_stall_info_end(); for_each_possible_cpu(cpu) - totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; + totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, + cpu)->cblist); pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", jiffies - rsp->gp_start, (long)rsp->gpnum, (long)rsp->completed, totqlen); @@ -1631,30 +1662,6 @@ void rcu_cpu_stall_reset(void) WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2); } -/* - * Initialize the specified rcu_data structure's default callback list - * to empty. The default callback list is the one that is not used by - * no-callbacks CPUs. - */ -static void init_default_callback_list(struct rcu_data *rdp) -{ - int i; - - rdp->nxtlist = NULL; - for (i = 0; i < RCU_NEXT_SIZE; i++) - rdp->nxttail[i] = &rdp->nxtlist; -} - -/* - * Initialize the specified rcu_data structure's callback list to empty. - */ -static void init_callback_list(struct rcu_data *rdp) -{ - if (init_nocb_callback_list(rdp)) - return; - init_default_callback_list(rdp); -} - /* * Determine the value that ->completed will have at the end of the * next subsequent grace period. This is used to tag callbacks so that @@ -1709,7 +1716,6 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, unsigned long *c_out) { unsigned long c; - int i; bool ret = false; struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); @@ -1755,13 +1761,11 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, /* * Get a new grace-period number. If there really is no grace * period in progress, it will be smaller than the one we obtained - * earlier. Adjust callbacks as needed. Note that even no-CBs - * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. + * earlier. Adjust callbacks as needed. */ c = rcu_cbs_completed(rdp->rsp, rnp_root); - for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) - if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) - rdp->nxtcompleted[i] = c; + if (!rcu_is_nocb_cpu(rdp->cpu)) + (void)rcu_segcblist_accelerate(&rdp->cblist, c); /* * If the needed for the required grace period is already @@ -1793,9 +1797,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, /* * Clean up any old requests for the just-ended grace period. Also return - * whether any additional grace periods have been requested. Also invoke - * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads - * waiting for this grace period to complete. + * whether any additional grace periods have been requested. */ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { @@ -1841,57 +1843,27 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp) static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { - unsigned long c; - int i; - bool ret; - - /* If the CPU has no callbacks, nothing to do. */ - if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) - return false; - - /* - * Starting from the sublist containing the callbacks most - * recently assigned a ->completed number and working down, find the - * first sublist that is not assignable to an upcoming grace period. - * Such a sublist has something in it (first two tests) and has - * a ->completed number assigned that will complete sooner than - * the ->completed number for newly arrived callbacks (last test). - * - * The key point is that any later sublist can be assigned the - * same ->completed number as the newly arrived callbacks, which - * means that the callbacks in any of these later sublist can be - * grouped into a single sublist, whether or not they have already - * been assigned a ->completed number. - */ - c = rcu_cbs_completed(rsp, rnp); - for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--) - if (rdp->nxttail[i] != rdp->nxttail[i - 1] && - !ULONG_CMP_GE(rdp->nxtcompleted[i], c)) - break; + bool ret = false; - /* - * If there are no sublist for unassigned callbacks, leave. - * At the same time, advance "i" one sublist, so that "i" will - * index into the sublist where all the remaining callbacks should - * be grouped into. - */ - if (++i >= RCU_NEXT_TAIL) + /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ + if (!rcu_segcblist_pend_cbs(&rdp->cblist)) return false; /* - * Assign all subsequent callbacks' ->completed number to the next - * full grace period and group them all in the sublist initially - * indexed by "i". + * Callbacks are often registered with incomplete grace-period + * information. Something about the fact that getting exact + * information requires acquiring a global lock... RCU therefore + * makes a conservative estimate of the grace period number at which + * a given callback will become ready to invoke. The following + * code checks this estimate and improves it when possible, thus + * accelerating callback invocation to an earlier grace-period + * number. */ - for (; i <= RCU_NEXT_TAIL; i++) { - rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxtcompleted[i] = c; - } - /* Record any needed additional grace periods. */ - ret = rcu_start_future_gp(rnp, rdp, NULL); + if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp))) + ret = rcu_start_future_gp(rnp, rdp, NULL); /* Trace depending on how much we were able to accelerate. */ - if (!*rdp->nxttail[RCU_WAIT_TAIL]) + if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); else trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); @@ -1911,32 +1883,15 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { - int i, j; - - /* If the CPU has no callbacks, nothing to do. */ - if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) + /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ + if (!rcu_segcblist_pend_cbs(&rdp->cblist)) return false; /* * Find all callbacks whose ->completed numbers indicate that they * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. */ - for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { - if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i])) - break; - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i]; - } - /* Clean up any sublist tail pointers that were misordered above. */ - for (j = RCU_WAIT_TAIL; j < i; j++) - rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL]; - - /* Copy down callbacks to fill in empty sublists. */ - for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { - if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL]) - break; - rdp->nxttail[j] = rdp->nxttail[i]; - rdp->nxtcompleted[j] = rdp->nxtcompleted[i]; - } + rcu_segcblist_advance(&rdp->cblist, rnp->completed); /* Classify any remaining callbacks. */ return rcu_accelerate_cbs(rsp, rnp, rdp); @@ -1981,7 +1936,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); need_gp = !!(rnp->qsmask & rdp->grpmask); rdp->cpu_no_qs.b.norm = need_gp; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); rdp->core_needs_qs = need_gp; zero_cpu_stall_ticks(rdp); WRITE_ONCE(rdp->gpwrap, false); @@ -2579,7 +2534,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * within the current grace period. */ rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } @@ -2653,13 +2608,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, * because _rcu_barrier() excludes CPU-hotplug operations, so it * cannot be running now. Thus no memory barrier is required. */ - if (rdp->nxtlist != NULL) { - rsp->qlen_lazy += rdp->qlen_lazy; - rsp->qlen += rdp->qlen; - rdp->n_cbs_orphaned += rdp->qlen; - rdp->qlen_lazy = 0; - WRITE_ONCE(rdp->qlen, 0); - } + rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist); + rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done); /* * Next, move those callbacks still needing a grace period to @@ -2667,31 +2617,18 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, * Some of the callbacks might have gone partway through a grace * period, but that is too bad. They get to start over because we * cannot assume that grace periods are synchronized across CPUs. - * We don't bother updating the ->nxttail[] array yet, instead - * we just reset the whole thing later on. */ - if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { - *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; - rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; - *rdp->nxttail[RCU_DONE_TAIL] = NULL; - } + rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend); /* * Then move the ready-to-invoke callbacks to the orphanage, * where some other CPU will pick them up. These will not be * required to pass though another grace period: They are done. */ - if (rdp->nxtlist != NULL) { - *rsp->orphan_donetail = rdp->nxtlist; - rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; - } + rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done); - /* - * Finally, initialize the rcu_data structure's list to empty and - * disallow further callbacks on this CPU. - */ - init_callback_list(rdp); - rdp->nxttail[RCU_NEXT_TAIL] = NULL; + /* Finally, disallow further callbacks on this CPU. */ + rcu_segcblist_disable(&rdp->cblist); } /* @@ -2700,7 +2637,6 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, */ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) { - int i; struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ @@ -2709,13 +2645,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) return; /* Do the accounting first. */ - rdp->qlen_lazy += rsp->qlen_lazy; - rdp->qlen += rsp->qlen; - rdp->n_cbs_adopted += rsp->qlen; - if (rsp->qlen_lazy != rsp->qlen) + rdp->n_cbs_adopted += rsp->orphan_done.len; + if (rsp->orphan_done.len_lazy != rsp->orphan_done.len) rcu_idle_count_callbacks_posted(); - rsp->qlen_lazy = 0; - rsp->qlen = 0; + rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done); /* * We do not need a memory barrier here because the only way we @@ -2723,24 +2656,13 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) * we are the task doing the rcu_barrier(). */ - /* First adopt the ready-to-invoke callbacks. */ - if (rsp->orphan_donelist != NULL) { - *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; - *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; - for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) - if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) - rdp->nxttail[i] = rsp->orphan_donetail; - rsp->orphan_donelist = NULL; - rsp->orphan_donetail = &rsp->orphan_donelist; - } - - /* And then adopt the callbacks that still need a grace period. */ - if (rsp->orphan_nxtlist != NULL) { - *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; - rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; - rsp->orphan_nxtlist = NULL; - rsp->orphan_nxttail = &rsp->orphan_nxtlist; - } + /* First adopt the ready-to-invoke callbacks, then the done ones. */ + rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done); + WARN_ON_ONCE(rsp->orphan_done.head); + rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend); + WARN_ON_ONCE(rsp->orphan_pend.head); + WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != + !rcu_segcblist_n_cbs(&rdp->cblist)); } /* @@ -2748,14 +2670,14 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) */ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { - RCU_TRACE(unsigned long mask); - RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); - RCU_TRACE(struct rcu_node *rnp = rdp->mynode); + RCU_TRACE(unsigned long mask;) + RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);) + RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return; - RCU_TRACE(mask = rdp->grpmask); + RCU_TRACE(mask = rdp->grpmask;) trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), TPS("cpuofl")); @@ -2828,9 +2750,11 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) rcu_adopt_orphan_cbs(rsp, flags); raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); - WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, - "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", - cpu, rdp->qlen, rdp->nxtlist); + WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || + !rcu_segcblist_empty(&rdp->cblist), + "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", + cpu, rcu_segcblist_n_cbs(&rdp->cblist), + rcu_segcblist_first_cb(&rdp->cblist)); } /* @@ -2840,14 +2764,17 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; - struct rcu_head *next, *list, **tail; - long bl, count, count_lazy; - int i; + struct rcu_head *rhp; + struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); + long bl, count; /* If no callbacks are ready, just return. */ - if (!cpu_has_callbacks_ready_to_invoke(rdp)) { - trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); - trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist), + if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { + trace_rcu_batch_start(rsp->name, + rcu_segcblist_n_lazy_cbs(&rdp->cblist), + rcu_segcblist_n_cbs(&rdp->cblist), 0); + trace_rcu_batch_end(rsp->name, 0, + !rcu_segcblist_empty(&rdp->cblist), need_resched(), is_idle_task(current), rcu_is_callbacks_kthread()); return; @@ -2855,73 +2782,61 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* * Extract the list of ready callbacks, disabling to prevent - * races with call_rcu() from interrupt handlers. + * races with call_rcu() from interrupt handlers. Leave the + * callback counts, as rcu_barrier() needs to be conservative. */ local_irq_save(flags); WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); bl = rdp->blimit; - trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); - list = rdp->nxtlist; - rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; - *rdp->nxttail[RCU_DONE_TAIL] = NULL; - tail = rdp->nxttail[RCU_DONE_TAIL]; - for (i = RCU_NEXT_SIZE - 1; i >= 0; i--) - if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) - rdp->nxttail[i] = &rdp->nxtlist; + trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist), + rcu_segcblist_n_cbs(&rdp->cblist), bl); + rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); local_irq_restore(flags); /* Invoke callbacks. */ - count = count_lazy = 0; - while (list) { - next = list->next; - prefetch(next); - debug_rcu_head_unqueue(list); - if (__rcu_reclaim(rsp->name, list)) - count_lazy++; - list = next; - /* Stop only if limit reached and CPU has something to do. */ - if (++count >= bl && + rhp = rcu_cblist_dequeue(&rcl); + for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) { + debug_rcu_head_unqueue(rhp); + if (__rcu_reclaim(rsp->name, rhp)) + rcu_cblist_dequeued_lazy(&rcl); + /* + * Stop only if limit reached and CPU has something to do. + * Note: The rcl structure counts down from zero. + */ + if (-rcl.len >= bl && (need_resched() || (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) break; } local_irq_save(flags); - trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), - is_idle_task(current), - rcu_is_callbacks_kthread()); - - /* Update count, and requeue any remaining callbacks. */ - if (list != NULL) { - *tail = rdp->nxtlist; - rdp->nxtlist = list; - for (i = 0; i < RCU_NEXT_SIZE; i++) - if (&rdp->nxtlist == rdp->nxttail[i]) - rdp->nxttail[i] = tail; - else - break; - } + count = -rcl.len; + trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(), + is_idle_task(current), rcu_is_callbacks_kthread()); + + /* Update counts and requeue any remaining callbacks. */ + rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl); smp_mb(); /* List handling before counting for rcu_barrier(). */ - rdp->qlen_lazy -= count_lazy; - WRITE_ONCE(rdp->qlen, rdp->qlen - count); rdp->n_cbs_invoked += count; + rcu_segcblist_insert_count(&rdp->cblist, &rcl); /* Reinstate batch limit if we have worked down the excess. */ - if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) + count = rcu_segcblist_n_cbs(&rdp->cblist); + if (rdp->blimit == LONG_MAX && count <= qlowmark) rdp->blimit = blimit; /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ - if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { + if (count == 0 && rdp->qlen_last_fqs_check != 0) { rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; - } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) - rdp->qlen_last_fqs_check = rdp->qlen; - WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); + } else if (count < rdp->qlen_last_fqs_check - qhimark) + rdp->qlen_last_fqs_check = count; + WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); local_irq_restore(flags); /* Re-invoke RCU core processing if there are callbacks remaining. */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (rcu_segcblist_ready_cbs(&rdp->cblist)) invoke_rcu_core(); } @@ -3087,7 +3002,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) bool needwake; struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); - WARN_ON_ONCE(rdp->beenonline == 0); + WARN_ON_ONCE(!rdp->beenonline); /* Update RCU state based on any recent quiescent states. */ rcu_check_quiescent_state(rsp, rdp); @@ -3105,7 +3020,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) } /* If there are callbacks ready, invoke them. */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (rcu_segcblist_ready_cbs(&rdp->cblist)) invoke_rcu_callbacks(rsp, rdp); /* Do any needed deferred wakeups of rcuo kthreads. */ @@ -3177,7 +3092,8 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, * invoking force_quiescent_state() if the newly enqueued callback * is the only one waiting for a grace period to complete. */ - if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { + if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) > + rdp->qlen_last_fqs_check + qhimark)) { /* Are we ignoring a completed grace period? */ note_gp_changes(rsp, rdp); @@ -3195,10 +3111,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, /* Give the grace period a kick. */ rdp->blimit = LONG_MAX; if (rsp->n_force_qs == rdp->n_force_qs_snap && - *rdp->nxttail[RCU_DONE_TAIL] != head) + rcu_segcblist_first_pend_cb(&rdp->cblist) != head) force_quiescent_state(rsp); rdp->n_force_qs_snap = rsp->n_force_qs; - rdp->qlen_last_fqs_check = rdp->qlen; + rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); } } } @@ -3238,7 +3154,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, rdp = this_cpu_ptr(rsp->rda); /* Add the callback to our list. */ - if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) { + if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) { int offline; if (cpu != -1) @@ -3257,23 +3173,21 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, */ BUG_ON(cpu != -1); WARN_ON_ONCE(!rcu_is_watching()); - if (!likely(rdp->nxtlist)) - init_default_callback_list(rdp); + if (rcu_segcblist_empty(&rdp->cblist)) + rcu_segcblist_init(&rdp->cblist); } - WRITE_ONCE(rdp->qlen, rdp->qlen + 1); - if (lazy) - rdp->qlen_lazy++; - else + rcu_segcblist_enqueue(&rdp->cblist, head, lazy); + if (!lazy) rcu_idle_count_callbacks_posted(); - smp_mb(); /* Count before adding callback for rcu_barrier(). */ - *rdp->nxttail[RCU_NEXT_TAIL] = head; - rdp->nxttail[RCU_NEXT_TAIL] = &head->next; if (__is_kfree_rcu_offset((unsigned long)func)) trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, - rdp->qlen_lazy, rdp->qlen); + rcu_segcblist_n_lazy_cbs(&rdp->cblist), + rcu_segcblist_n_cbs(&rdp->cblist)); else - trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); + trace_rcu_callback(rsp->name, head, + rcu_segcblist_n_lazy_cbs(&rdp->cblist), + rcu_segcblist_n_cbs(&rdp->cblist)); /* Go handle any RCU core processing required. */ __call_rcu_core(rsp, rdp, head, flags); @@ -3519,41 +3433,6 @@ void cond_synchronize_sched(unsigned long oldstate) } EXPORT_SYMBOL_GPL(cond_synchronize_sched); -/* Adjust sequence number for start of update-side operation. */ -static void rcu_seq_start(unsigned long *sp) -{ - WRITE_ONCE(*sp, *sp + 1); - smp_mb(); /* Ensure update-side operation after counter increment. */ - WARN_ON_ONCE(!(*sp & 0x1)); -} - -/* Adjust sequence number for end of update-side operation. */ -static void rcu_seq_end(unsigned long *sp) -{ - smp_mb(); /* Ensure update-side operation before counter increment. */ - WRITE_ONCE(*sp, *sp + 1); - WARN_ON_ONCE(*sp & 0x1); -} - -/* Take a snapshot of the update side's sequence number. */ -static unsigned long rcu_seq_snap(unsigned long *sp) -{ - unsigned long s; - - s = (READ_ONCE(*sp) + 3) & ~0x1; - smp_mb(); /* Above access must not bleed into critical section. */ - return s; -} - -/* - * Given a snapshot from rcu_seq_snap(), determine whether or not a - * full update-side operation has occurred. - */ -static bool rcu_seq_done(unsigned long *sp, unsigned long s) -{ - return ULONG_CMP_GE(READ_ONCE(*sp), s); -} - /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, for the specified type of RCU, returning 1 if so. @@ -3577,7 +3456,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && - rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { + rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) { rdp->n_rp_core_needs_qs++; } else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) { rdp->n_rp_report_qs++; @@ -3585,7 +3464,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) } /* Does this CPU have callbacks ready to invoke? */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) { + if (rcu_segcblist_ready_cbs(&rdp->cblist)) { rdp->n_rp_cb_ready++; return 1; } @@ -3649,10 +3528,10 @@ static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); - if (!rdp->nxtlist) + if (rcu_segcblist_empty(&rdp->cblist)) continue; hc = true; - if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { + if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) { al = false; break; } @@ -3761,7 +3640,7 @@ static void _rcu_barrier(struct rcu_state *rsp) __call_rcu(&rdp->barrier_head, rcu_barrier_callback, rsp, cpu, 0); } - } else if (READ_ONCE(rdp->qlen)) { + } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { _rcu_barrier_trace(rsp, "OnlineQ", cpu, rsp->barrier_sequence); smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); @@ -3870,8 +3749,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - if (!rdp->nxtlist) - init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ + if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ + !init_nocb_callback_list(rdp)) + rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_sysidle_init_percpu_data(rdp->dynticks); rcu_dynticks_eqs_online(); @@ -3890,12 +3770,16 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; rdp->cpu_no_qs.b.norm = true; - rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); + rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu); rdp->core_needs_qs = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } +/* + * Invoked early in the CPU-online process, when pretty much all + * services are available. The incoming CPU is not present. + */ int rcutree_prepare_cpu(unsigned int cpu) { struct rcu_state *rsp; @@ -3909,6 +3793,9 @@ int rcutree_prepare_cpu(unsigned int cpu) return 0; } +/* + * Update RCU priority boot kthread affinity for CPU-hotplug changes. + */ static void rcutree_affinity_setting(unsigned int cpu, int outgoing) { struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); @@ -3916,20 +3803,34 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoing) rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); } +/* + * Near the end of the CPU-online process. Pretty much all services + * enabled, and the CPU is now very much alive. + */ int rcutree_online_cpu(unsigned int cpu) { sync_sched_exp_online_cleanup(cpu); rcutree_affinity_setting(cpu, -1); + if (IS_ENABLED(CONFIG_TREE_SRCU)) + srcu_online_cpu(cpu); return 0; } +/* + * Near the beginning of the process. The CPU is still very much alive + * with pretty much all services enabled. + */ int rcutree_offline_cpu(unsigned int cpu) { rcutree_affinity_setting(cpu, cpu); + if (IS_ENABLED(CONFIG_TREE_SRCU)) + srcu_offline_cpu(cpu); return 0; } - +/* + * Near the end of the offline process. We do only tracing here. + */ int rcutree_dying_cpu(unsigned int cpu) { struct rcu_state *rsp; @@ -3939,6 +3840,9 @@ int rcutree_dying_cpu(unsigned int cpu) return 0; } +/* + * The outgoing CPU is gone and we are running elsewhere. + */ int rcutree_dead_cpu(unsigned int cpu) { struct rcu_state *rsp; @@ -3956,6 +3860,10 @@ int rcutree_dead_cpu(unsigned int cpu) * incoming CPUs are not allowed to use RCU read-side critical sections * until this function is called. Failing to observe this restriction * will result in lockdep splats. + * + * Note that this function is special in that it is invoked directly + * from the incoming CPU rather than from the cpuhp_step mechanism. + * This is because this function must be invoked at a precise location. */ void rcu_cpu_starting(unsigned int cpu) { @@ -3978,9 +3886,6 @@ void rcu_cpu_starting(unsigned int cpu) #ifdef CONFIG_HOTPLUG_CPU /* - * The CPU is exiting the idle loop into the arch_cpu_idle_dead() - * function. We now remove it from the rcu_node tree's ->qsmaskinit - * bit masks. * The CPU is exiting the idle loop into the arch_cpu_idle_dead() * function. We now remove it from the rcu_node tree's ->qsmaskinit * bit masks. @@ -3999,6 +3904,14 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } +/* + * The outgoing function has no further need of RCU, so remove it from + * the list of CPUs that RCU must track. + * + * Note that this function is special in that it is invoked directly + * from the outgoing CPU rather than from the cpuhp_step mechanism. + * This is because this function must be invoked at a precise location. + */ void rcu_report_dead(unsigned int cpu) { struct rcu_state *rsp; @@ -4013,6 +3926,10 @@ void rcu_report_dead(unsigned int cpu) } #endif +/* + * On non-huge systems, use expedited RCU grace periods to make suspend + * and hibernation run faster. + */ static int rcu_pm_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -4083,7 +4000,7 @@ early_initcall(rcu_spawn_gp_kthread); * task is booting the system, and such primitives are no-ops). After this * function is called, any synchronous grace-period primitives are run as * expedited, with the requesting task driving the grace period forward. - * A later core_initcall() rcu_exp_runtime_mode() will switch to full + * A later core_initcall() rcu_set_runtime_mode() will switch to full * runtime RCU functionality. */ void rcu_scheduler_starting(void) @@ -4095,31 +4012,6 @@ void rcu_scheduler_starting(void) rcu_test_sync_prims(); } -/* - * Compute the per-level fanout, either using the exact fanout specified - * or balancing the tree, depending on the rcu_fanout_exact boot parameter. - */ -static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt) -{ - int i; - - if (rcu_fanout_exact) { - levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; - for (i = rcu_num_lvls - 2; i >= 0; i--) - levelspread[i] = RCU_FANOUT; - } else { - int ccur; - int cprv; - - cprv = nr_cpu_ids; - for (i = rcu_num_lvls - 1; i >= 0; i--) { - ccur = levelcnt[i]; - levelspread[i] = (cprv + ccur - 1) / ccur; - cprv = ccur; - } - } -} - /* * Helper function for rcu_init() that initializes one rcu_state structure. */ @@ -4129,9 +4021,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) static const char * const fqs[] = RCU_FQS_NAME_INIT; static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; - static u8 fl_mask = 0x1; - int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */ int levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ int cpustride = 1; int i; @@ -4146,20 +4036,16 @@ static void __init rcu_init_one(struct rcu_state *rsp) /* Initialize the level-tracking arrays. */ - for (i = 0; i < rcu_num_lvls; i++) - levelcnt[i] = num_rcu_lvl[i]; for (i = 1; i < rcu_num_lvls; i++) - rsp->level[i] = rsp->level[i - 1] + levelcnt[i - 1]; - rcu_init_levelspread(levelspread, levelcnt); - rsp->flavor_mask = fl_mask; - fl_mask <<= 1; + rsp->level[i] = rsp->level[i - 1] + num_rcu_lvl[i - 1]; + rcu_init_levelspread(levelspread, num_rcu_lvl); /* Initialize the elements themselves, starting from the leaves. */ for (i = rcu_num_lvls - 1; i >= 0; i--) { cpustride *= levelspread[i]; rnp = rsp->level[i]; - for (j = 0; j < levelcnt[i]; j++, rnp++) { + for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) { raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock)); lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock), &rcu_node_class[i], buf[i]); @@ -4332,6 +4218,8 @@ void __init rcu_init(void) for_each_online_cpu(cpu) { rcutree_prepare_cpu(cpu); rcu_cpu_starting(cpu); + if (IS_ENABLED(CONFIG_TREE_SRCU)) + srcu_online_cpu(cpu); } } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ec62a05bfdb3c81b9a82593de2aeefffb96bf300..ba38262c3554495c59b0aacba52ba245ba108d53 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -30,80 +30,9 @@ #include #include #include +#include -/* - * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and - * CONFIG_RCU_FANOUT_LEAF. - * In theory, it should be possible to add more levels straightforwardly. - * In practice, this did work well going from three levels to four. - * Of course, your mileage may vary. - */ - -#ifdef CONFIG_RCU_FANOUT -#define RCU_FANOUT CONFIG_RCU_FANOUT -#else /* #ifdef CONFIG_RCU_FANOUT */ -# ifdef CONFIG_64BIT -# define RCU_FANOUT 64 -# else -# define RCU_FANOUT 32 -# endif -#endif /* #else #ifdef CONFIG_RCU_FANOUT */ - -#ifdef CONFIG_RCU_FANOUT_LEAF -#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF -#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ -# ifdef CONFIG_64BIT -# define RCU_FANOUT_LEAF 64 -# else -# define RCU_FANOUT_LEAF 32 -# endif -#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ - -#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) -#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) -#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) -#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) - -#if NR_CPUS <= RCU_FANOUT_1 -# define RCU_NUM_LVLS 1 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_NODES NUM_RCU_LVL_0 -# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 } -# define RCU_NODE_NAME_INIT { "rcu_node_0" } -# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" } -#elif NR_CPUS <= RCU_FANOUT_2 -# define RCU_NUM_LVLS 2 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) -# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1) -# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } -# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" } -# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" } -#elif NR_CPUS <= RCU_FANOUT_3 -# define RCU_NUM_LVLS 3 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) -# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) -# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2) -# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } -# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" } -# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } -#elif NR_CPUS <= RCU_FANOUT_4 -# define RCU_NUM_LVLS 4 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) -# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) -# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) -# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) -# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } -# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } -# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } -#else -# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" -#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ - -extern int rcu_num_lvls; -extern int rcu_num_nodes; +#include "rcu_segcblist.h" /* * Dynticks per-CPU state. @@ -113,6 +42,9 @@ struct rcu_dynticks { /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ + bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ + unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ + bool rcu_urgent_qs; /* GP old need light quiescent state. */ #ifdef CONFIG_NO_HZ_FULL_SYSIDLE long long dynticks_idle_nesting; /* irq/process nesting level from idle. */ @@ -261,41 +193,6 @@ struct rcu_node { */ #define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo)) -/* - * Do a full breadth-first scan of the rcu_node structures for the - * specified rcu_state structure. - */ -#define rcu_for_each_node_breadth_first(rsp, rnp) \ - for ((rnp) = &(rsp)->node[0]; \ - (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) - -/* - * Do a breadth-first scan of the non-leaf rcu_node structures for the - * specified rcu_state structure. Note that if there is a singleton - * rcu_node tree with but one rcu_node structure, this loop is a no-op. - */ -#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ - for ((rnp) = &(rsp)->node[0]; \ - (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) - -/* - * Scan the leaves of the rcu_node hierarchy for the specified rcu_state - * structure. Note that if there is a singleton rcu_node tree with but - * one rcu_node structure, this loop -will- visit the rcu_node structure. - * It is still a leaf node, even if it is also the root node. - */ -#define rcu_for_each_leaf_node(rsp, rnp) \ - for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ - (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) - -/* - * Iterate over all possible CPUs in a leaf RCU node. - */ -#define for_each_leaf_node_possible_cpu(rnp, cpu) \ - for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \ - cpu <= rnp->grphi; \ - cpu = cpumask_next((cpu), cpu_possible_mask)) - /* * Union to allow "aggregate OR" operation on the need for a quiescent * state by the normal and expedited grace periods. @@ -336,34 +233,9 @@ struct rcu_data { /* period it is aware of. */ /* 2) batch handling */ - /* - * If nxtlist is not NULL, it is partitioned as follows. - * Any of the partitions might be empty, in which case the - * pointer to that partition will be equal to the pointer for - * the following partition. When the list is empty, all of - * the nxttail elements point to the ->nxtlist pointer itself, - * which in that case is NULL. - * - * [nxtlist, *nxttail[RCU_DONE_TAIL]): - * Entries that batch # <= ->completed - * The grace period for these entries has completed, and - * the other grace-period-completed entries may be moved - * here temporarily in rcu_process_callbacks(). - * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): - * Entries that batch # <= ->completed - 1: waiting for current GP - * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): - * Entries known to have arrived before current GP ended - * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): - * Entries that might have arrived after current GP ended - * Note that the value of *nxttail[RCU_NEXT_TAIL] will - * always be NULL, as this is the end of the list. - */ - struct rcu_head *nxtlist; - struct rcu_head **nxttail[RCU_NEXT_SIZE]; - unsigned long nxtcompleted[RCU_NEXT_SIZE]; - /* grace periods for sublists. */ - long qlen_lazy; /* # of lazy queued callbacks */ - long qlen; /* # of queued callbacks, incl lazy */ + struct rcu_segcblist cblist; /* Segmented callback list, with */ + /* different callbacks waiting for */ + /* different grace periods. */ long qlen_last_fqs_check; /* qlen at last check for QS forcing */ unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ @@ -482,7 +354,6 @@ struct rcu_state { struct rcu_node *level[RCU_NUM_LVLS + 1]; /* Hierarchy levels (+1 to */ /* shut bogus gcc warning) */ - u8 flavor_mask; /* bit in flavor mask. */ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ call_rcu_func_t call; /* call_rcu() flavor. */ int ncpus; /* # CPUs seen so far. */ @@ -502,14 +373,11 @@ struct rcu_state { raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp; /* Protect following fields. */ - struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ + struct rcu_cblist orphan_pend; /* Orphaned callbacks that */ /* need a grace period. */ - struct rcu_head **orphan_nxttail; /* Tail of above. */ - struct rcu_head *orphan_donelist; /* Orphaned callbacks that */ + struct rcu_cblist orphan_done; /* Orphaned callbacks that */ /* are ready to invoke. */ - struct rcu_head **orphan_donetail; /* Tail of above. */ - long qlen_lazy; /* Number of lazy callbacks. */ - long qlen; /* Total number of callbacks. */ + /* (Contains counts.) */ /* End of fields guarded by orphan_lock. */ struct mutex barrier_mutex; /* Guards barrier fields. */ @@ -596,6 +464,7 @@ extern struct rcu_state rcu_preempt_state; #endif /* #ifdef CONFIG_PREEMPT_RCU */ int rcu_dynticks_snap(struct rcu_dynticks *rdtp); +bool rcu_eqs_special_set(int cpu); #ifdef CONFIG_RCU_BOOST DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); @@ -673,6 +542,14 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp); static void rcu_dynticks_task_enter(void); static void rcu_dynticks_task_exit(void); +#ifdef CONFIG_SRCU +void srcu_online_cpu(unsigned int cpu); +void srcu_offline_cpu(unsigned int cpu); +#else /* #ifdef CONFIG_SRCU */ +void srcu_online_cpu(unsigned int cpu) { } +void srcu_offline_cpu(unsigned int cpu) { } +#endif /* #else #ifdef CONFIG_SRCU */ + #endif /* #ifndef RCU_TREE_NONCORE */ #ifdef CONFIG_RCU_TRACE diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index a7b639ccd46e0ade81946639ef1d50bdc2b68d21..e513b4ab119769488419eba9bff7601f1d556182 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -292,7 +292,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, rnp->grphi, TPS("wait")); - wait_event(rnp->exp_wq[(s >> 1) & 0x3], + wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], sync_exp_work_done(rsp, &rdp->exp_workdone2, s)); return true; @@ -331,6 +331,8 @@ static void sync_sched_exp_handler(void *data) return; } __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); + /* Store .exp before .rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); resched_cpu(smp_processor_id()); } @@ -531,7 +533,8 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) rnp->exp_seq_rq = s; spin_unlock(&rnp->exp_lock); } - wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]); + smp_mb(); /* All above changes before wakeup. */ + wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]); } trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); mutex_unlock(&rsp->exp_wake_mutex); @@ -609,9 +612,9 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, /* Wait for expedited grace period to complete. */ rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); rnp = rcu_get_root(rsp); - wait_event(rnp->exp_wq[(s >> 1) & 0x3], - sync_exp_work_done(rsp, - &rdp->exp_workdone0, s)); + wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], + sync_exp_work_done(rsp, &rdp->exp_workdone0, s)); + smp_mb(); /* Workqueue actions happen before return. */ /* Let the next expedited grace period start. */ mutex_unlock(&rsp->exp_mutex); @@ -735,15 +738,3 @@ void synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ - -/* - * Switch to run-time mode once Tree RCU has fully initialized. - */ -static int __init rcu_exp_runtime_mode(void) -{ - rcu_test_sync_prims(); - rcu_scheduler_active = RCU_SCHEDULER_RUNNING; - rcu_test_sync_prims(); - return 0; -} -core_initcall(rcu_exp_runtime_mode); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0a62a8f1caacfab8a2a39c729f70e445f8048cbf..c9a48657512ae35a833d6cc1c56ff9687a899b17 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1350,10 +1350,10 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) */ if ((rdp->completed != rnp->completed || unlikely(READ_ONCE(rdp->gpwrap))) && - rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) + rcu_segcblist_pend_cbs(&rdp->cblist)) note_gp_changes(rsp, rdp); - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (rcu_segcblist_ready_cbs(&rdp->cblist)) cbs_ready = true; } return cbs_ready; @@ -1461,7 +1461,7 @@ static void rcu_prepare_for_idle(void) rdtp->last_accelerate = jiffies; for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); - if (!*rdp->nxttail[RCU_DONE_TAIL]) + if (rcu_segcblist_pend_cbs(&rdp->cblist)) continue; rnp = rdp->mynode; raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ @@ -1529,7 +1529,7 @@ static void rcu_oom_notify_cpu(void *unused) for_each_rcu_flavor(rsp) { rdp = raw_cpu_ptr(rsp->rda); - if (rdp->qlen_lazy != 0) { + if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) { atomic_inc(&oom_callback_count); rsp->call(&rdp->oom_head, rcu_oom_callback); } @@ -1709,7 +1709,7 @@ __setup("rcu_nocbs=", rcu_nocb_setup); static int __init parse_rcu_nocb_poll(char *arg) { - rcu_nocb_poll = 1; + rcu_nocb_poll = true; return 0; } early_param("rcu_nocb_poll", parse_rcu_nocb_poll); @@ -1860,7 +1860,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else { - rdp->nocb_defer_wakeup = RCU_NOGP_WAKE; + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE); + /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmptyIsDeferred")); } @@ -1872,7 +1874,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); } else { - rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE; + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE); + /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ + smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvfIsDeferred")); } @@ -1930,30 +1934,26 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp, unsigned long flags) { - long ql = rsp->qlen; - long qll = rsp->qlen_lazy; + long ql = rsp->orphan_done.len; + long qll = rsp->orphan_done.len_lazy; /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ if (!rcu_is_nocb_cpu(smp_processor_id())) return false; - rsp->qlen = 0; - rsp->qlen_lazy = 0; /* First, enqueue the donelist, if any. This preserves CB ordering. */ - if (rsp->orphan_donelist != NULL) { - __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, - rsp->orphan_donetail, ql, qll, flags); - ql = qll = 0; - rsp->orphan_donelist = NULL; - rsp->orphan_donetail = &rsp->orphan_donelist; + if (rsp->orphan_done.head) { + __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done), + rcu_cblist_tail(&rsp->orphan_done), + ql, qll, flags); } - if (rsp->orphan_nxtlist != NULL) { - __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, - rsp->orphan_nxttail, ql, qll, flags); - ql = qll = 0; - rsp->orphan_nxtlist = NULL; - rsp->orphan_nxttail = &rsp->orphan_nxtlist; + if (rsp->orphan_pend.head) { + __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend), + rcu_cblist_tail(&rsp->orphan_pend), + ql, qll, flags); } + rcu_cblist_init(&rsp->orphan_done); + rcu_cblist_init(&rsp->orphan_pend); return true; } @@ -2395,16 +2395,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) return false; /* If there are early-boot callbacks, move them to nocb lists. */ - if (rdp->nxtlist) { - rdp->nocb_head = rdp->nxtlist; - rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL]; - atomic_long_set(&rdp->nocb_q_count, rdp->qlen); - atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy); - rdp->nxtlist = NULL; - rdp->qlen = 0; - rdp->qlen_lazy = 0; + if (!rcu_segcblist_empty(&rdp->cblist)) { + rdp->nocb_head = rcu_segcblist_head(&rdp->cblist); + rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist); + atomic_long_set(&rdp->nocb_q_count, + rcu_segcblist_n_cbs(&rdp->cblist)); + atomic_long_set(&rdp->nocb_q_count_lazy, + rcu_segcblist_n_lazy_cbs(&rdp->cblist)); + rcu_segcblist_init(&rdp->cblist); } - rdp->nxttail[RCU_NEXT_TAIL] = NULL; + rcu_segcblist_disable(&rdp->cblist); return true; } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 8751a748499a3d3a93419fe5273bbf4876aa2149..6cea17a1ea301f23180126504acf9d84d27e16f6 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -41,11 +41,11 @@ #include #include #include +#include #define RCU_TREE_NONCORE #include "tree.h" - -DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); +#include "rcu.h" static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) @@ -121,7 +121,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) cpu_is_offline(rdp->cpu) ? '!' : ' ', ulong2long(rdp->completed), ulong2long(rdp->gpnum), rdp->cpu_no_qs.b.norm, - rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), + rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu), rdp->core_needs_qs); seq_printf(m, " dt=%d/%llx/%d df=%lu", rcu_dynticks_snap(rdp->dynticks), @@ -130,17 +130,15 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); seq_printf(m, " of=%lu", rdp->offline_fqs); rcu_nocb_q_lengths(rdp, &ql, &qll); - qll += rdp->qlen_lazy; - ql += rdp->qlen; + qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist); + ql += rcu_segcblist_n_cbs(&rdp->cblist); seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", qll, ql, - ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != - rdp->nxttail[RCU_NEXT_TAIL]], - ".R"[rdp->nxttail[RCU_WAIT_TAIL] != - rdp->nxttail[RCU_NEXT_READY_TAIL]], - ".W"[rdp->nxttail[RCU_DONE_TAIL] != - rdp->nxttail[RCU_WAIT_TAIL]], - ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); + ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)], + ".R"[!rcu_segcblist_segempty(&rdp->cblist, + RCU_NEXT_READY_TAIL)], + ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)], + ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]); #ifdef CONFIG_RCU_BOOST seq_printf(m, " kt=%d/%c ktl=%x", per_cpu(rcu_cpu_has_work, rdp->cpu), @@ -278,7 +276,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, - READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); + READ_ONCE(rsp->n_force_qs_lh), + rsp->orphan_done.len_lazy, + rsp->orphan_done.len); for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 55c8530316c7ce6077df9814cc4c7997f7282486..273e869ca21d546b5457987bd676f70c08bba5f3 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); * non-expedited counterparts? Intended for use within RCU. Note * that if the user specifies both rcu_expedited and rcu_normal, then * rcu_normal wins. (Except during the time period during boot from - * when the first task is spawned until the rcu_exp_runtime_mode() + * when the first task is spawned until the rcu_set_runtime_mode() * core_initcall() is invoked, at which point everything is expedited.) */ bool rcu_gp_is_normal(void) @@ -190,6 +190,39 @@ void rcu_end_inkernel_boot(void) #endif /* #ifndef CONFIG_TINY_RCU */ +/* + * Test each non-SRCU synchronous grace-period wait API. This is + * useful just after a change in mode for these primitives, and + * during early boot. + */ +void rcu_test_sync_prims(void) +{ + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + synchronize_rcu(); + synchronize_rcu_bh(); + synchronize_sched(); + synchronize_rcu_expedited(); + synchronize_rcu_bh_expedited(); + synchronize_sched_expedited(); +} + +#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) + +/* + * Switch to run-time mode once RCU has fully initialized. + */ +static int __init rcu_set_runtime_mode(void) +{ + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_test_sync_prims(); + return 0; +} +core_initcall(rcu_set_runtime_mode); + +#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ + #ifdef CONFIG_PREEMPT_RCU /* @@ -632,6 +665,7 @@ static void check_holdout_task(struct task_struct *t, put_task_struct(t); return; } + rcu_request_urgent_qs_task(t); if (!needreport) return; if (*firstreport) { @@ -817,23 +851,6 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ -/* - * Test each non-SRCU synchronous grace-period wait API. This is - * useful just after a change in mode for these primitives, and - * during early boot. - */ -void rcu_test_sync_prims(void) -{ - if (!IS_ENABLED(CONFIG_PROVE_RCU)) - return; - synchronize_rcu(); - synchronize_rcu_bh(); - synchronize_sched(); - synchronize_rcu_expedited(); - synchronize_rcu_bh_expedited(); - synchronize_sched_expedited(); -} - #ifdef CONFIG_PROVE_RCU /* diff --git a/kernel/relay.c b/kernel/relay.c index 0e413d9eec8af484517300e1c142325356865771..39a9dfc69486b57a85d115dcefee75d35907a74d 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1212,7 +1212,6 @@ static ssize_t subbuf_splice_actor(struct file *in, .nr_pages = 0, .nr_pages_max = PIPE_DEF_BUFFERS, .partial = partial, - .flags = flags, .ops = &relay_pipe_buf_ops, .spd_release = relay_page_release, }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3b31fc05a0f1e45be5985b860a5fde95ee969832..326d4f88e2b1dbda470c7730c6ae1a67cc095f99 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -85,21 +85,6 @@ int sysctl_sched_rt_runtime = 950000; /* CPUs with isolated domains */ cpumask_var_t cpu_isolated_map; -/* - * this_rq_lock - lock this runqueue and disable interrupts. - */ -static struct rq *this_rq_lock(void) - __acquires(rq->lock) -{ - struct rq *rq; - - local_irq_disable(); - rq = this_rq(); - raw_spin_lock(&rq->lock); - - return rq; -} - /* * __task_rq_lock - lock the rq @p resides on. */ @@ -233,8 +218,11 @@ void update_rq_clock(struct rq *rq) return; #ifdef CONFIG_SCHED_DEBUG + if (sched_feat(WARN_DOUBLE_CLOCK)) + SCHED_WARN_ON(rq->clock_update_flags & RQCF_UPDATED); rq->clock_update_flags |= RQCF_UPDATED; #endif + delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; if (delta < 0) return; @@ -261,13 +249,14 @@ static void hrtick_clear(struct rq *rq) static enum hrtimer_restart hrtick(struct hrtimer *timer) { struct rq *rq = container_of(timer, struct rq, hrtick_timer); + struct rq_flags rf; WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); - raw_spin_lock(&rq->lock); + rq_lock(rq, &rf); update_rq_clock(rq); rq->curr->sched_class->task_tick(rq, rq->curr, 1); - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); return HRTIMER_NORESTART; } @@ -287,11 +276,12 @@ static void __hrtick_restart(struct rq *rq) static void __hrtick_start(void *arg) { struct rq *rq = arg; + struct rq_flags rf; - raw_spin_lock(&rq->lock); + rq_lock(rq, &rf); __hrtick_restart(rq); rq->hrtick_csd_pending = 0; - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); } /* @@ -762,17 +752,23 @@ static void set_load_weight(struct task_struct *p) static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) { - update_rq_clock(rq); + if (!(flags & ENQUEUE_NOCLOCK)) + update_rq_clock(rq); + if (!(flags & ENQUEUE_RESTORE)) sched_info_queued(rq, p); + p->sched_class->enqueue_task(rq, p, flags); } static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) { - update_rq_clock(rq); + if (!(flags & DEQUEUE_NOCLOCK)) + update_rq_clock(rq); + if (!(flags & DEQUEUE_SAVE)) sched_info_dequeued(rq, p); + p->sched_class->dequeue_task(rq, p, flags); } @@ -946,18 +942,19 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) * * Returns (locked) new rq. Old rq's lock is released. */ -static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new_cpu) +static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, + struct task_struct *p, int new_cpu) { lockdep_assert_held(&rq->lock); p->on_rq = TASK_ON_RQ_MIGRATING; - dequeue_task(rq, p, 0); + dequeue_task(rq, p, DEQUEUE_NOCLOCK); set_task_cpu(p, new_cpu); - raw_spin_unlock(&rq->lock); + rq_unlock(rq, rf); rq = cpu_rq(new_cpu); - raw_spin_lock(&rq->lock); + rq_lock(rq, rf); BUG_ON(task_cpu(p) != new_cpu); enqueue_task(rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; @@ -980,7 +977,8 @@ struct migration_arg { * So we race with normal scheduler movements, but that's OK, as long * as the task is no longer on this CPU. */ -static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_cpu) +static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, + struct task_struct *p, int dest_cpu) { if (unlikely(!cpu_active(dest_cpu))) return rq; @@ -989,7 +987,8 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) return rq; - rq = move_queued_task(rq, p, dest_cpu); + update_rq_clock(rq); + rq = move_queued_task(rq, rf, p, dest_cpu); return rq; } @@ -1004,6 +1003,7 @@ static int migration_cpu_stop(void *data) struct migration_arg *arg = data; struct task_struct *p = arg->task; struct rq *rq = this_rq(); + struct rq_flags rf; /* * The original target CPU might have gone down and we might @@ -1018,7 +1018,7 @@ static int migration_cpu_stop(void *data) sched_ttwu_pending(); raw_spin_lock(&p->pi_lock); - raw_spin_lock(&rq->lock); + rq_lock(rq, &rf); /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -1026,11 +1026,11 @@ static int migration_cpu_stop(void *data) */ if (task_rq(p) == rq) { if (task_on_rq_queued(p)) - rq = __migrate_task(rq, p, arg->dest_cpu); + rq = __migrate_task(rq, &rf, p, arg->dest_cpu); else p->wake_cpu = arg->dest_cpu; } - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); raw_spin_unlock(&p->pi_lock); local_irq_enable(); @@ -1063,7 +1063,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) * holding rq->lock. */ lockdep_assert_held(&rq->lock); - dequeue_task(rq, p, DEQUEUE_SAVE); + dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); } if (running) put_prev_task(rq, p); @@ -1071,7 +1071,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) p->sched_class->set_cpus_allowed(p, new_mask); if (queued) - enqueue_task(rq, p, ENQUEUE_RESTORE); + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); if (running) set_curr_task(rq, p); } @@ -1150,9 +1150,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, * OK, since we're going to drop the lock immediately * afterwards anyway. */ - rq_unpin_lock(rq, &rf); - rq = move_queued_task(rq, p, dest_cpu); - rq_repin_lock(rq, &rf); + rq = move_queued_task(rq, &rf, p, dest_cpu); } out: task_rq_unlock(rq, p, &rf); @@ -1217,16 +1215,24 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) { if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; + struct rq_flags srf, drf; src_rq = task_rq(p); dst_rq = cpu_rq(cpu); + rq_pin_lock(src_rq, &srf); + rq_pin_lock(dst_rq, &drf); + p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(src_rq, p, 0); set_task_cpu(p, cpu); activate_task(dst_rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(dst_rq, p, 0); + + rq_unpin_lock(dst_rq, &drf); + rq_unpin_lock(src_rq, &srf); + } else { /* * Task isn't running anymore; make it appear like we migrated @@ -1680,7 +1686,7 @@ static void ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, struct rq_flags *rf) { - int en_flags = ENQUEUE_WAKEUP; + int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; lockdep_assert_held(&rq->lock); @@ -1726,14 +1732,13 @@ void sched_ttwu_pending(void) struct rq *rq = this_rq(); struct llist_node *llist = llist_del_all(&rq->wake_list); struct task_struct *p; - unsigned long flags; struct rq_flags rf; if (!llist) return; - raw_spin_lock_irqsave(&rq->lock, flags); - rq_pin_lock(rq, &rf); + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); while (llist) { int wake_flags = 0; @@ -1747,8 +1752,7 @@ void sched_ttwu_pending(void) ttwu_do_activate(rq, p, wake_flags, &rf); } - rq_unpin_lock(rq, &rf); - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); } void scheduler_ipi(void) @@ -1806,7 +1810,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags) void wake_up_if_idle(int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long flags; + struct rq_flags rf; rcu_read_lock(); @@ -1816,11 +1820,11 @@ void wake_up_if_idle(int cpu) if (set_nr_if_polling(rq->idle)) { trace_sched_wake_idle_without_ipi(cpu); } else { - raw_spin_lock_irqsave(&rq->lock, flags); + rq_lock_irqsave(rq, &rf); if (is_idle_task(rq->curr)) smp_send_reschedule(cpu); /* Else CPU is not idle, do nothing here: */ - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); } out: @@ -1846,11 +1850,10 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) } #endif - raw_spin_lock(&rq->lock); - rq_pin_lock(rq, &rf); + rq_lock(rq, &rf); + update_rq_clock(rq); ttwu_do_activate(rq, p, wake_flags, &rf); - rq_unpin_lock(rq, &rf); - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); } /* @@ -2097,11 +2100,9 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) * disabled avoiding further scheduler activity on it and we've * not yet picked a replacement task. */ - rq_unpin_lock(rq, rf); - raw_spin_unlock(&rq->lock); + rq_unlock(rq, rf); raw_spin_lock(&p->pi_lock); - raw_spin_lock(&rq->lock); - rq_repin_lock(rq, rf); + rq_relock(rq, rf); } if (!(p->state & TASK_NORMAL)) @@ -2114,7 +2115,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) delayacct_blkio_end(); atomic_dec(&rq->nr_iowait); } - ttwu_activate(rq, p, ENQUEUE_WAKEUP); + ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); } ttwu_do_wakeup(rq, p, 0, rf); @@ -2555,7 +2556,7 @@ void wake_up_new_task(struct task_struct *p) update_rq_clock(rq); post_init_entity_util_avg(&p->se); - activate_task(rq, p, 0); + activate_task(rq, p, ENQUEUE_NOCLOCK); p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); check_preempt_curr(rq, p, WF_FORK); @@ -3093,15 +3094,18 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; + struct rq_flags rf; sched_clock_tick(); - raw_spin_lock(&rq->lock); + rq_lock(rq, &rf); + update_rq_clock(rq); curr->sched_class->task_tick(rq, curr, 0); cpu_load_update_active(rq); calc_global_load_tick(rq); - raw_spin_unlock(&rq->lock); + + rq_unlock(rq, &rf); perf_event_task_tick(); @@ -3378,7 +3382,7 @@ static void __sched notrace __schedule(bool preempt) hrtick_clear(rq); local_irq_disable(); - rcu_note_context_switch(); + rcu_note_context_switch(preempt); /* * Make sure that signal_pending_state()->signal_pending() below @@ -3386,18 +3390,18 @@ static void __sched notrace __schedule(bool preempt) * done by the caller to avoid the race with signal_wake_up(). */ smp_mb__before_spinlock(); - raw_spin_lock(&rq->lock); - rq_pin_lock(rq, &rf); + rq_lock(rq, &rf); /* Promote REQ to ACT */ rq->clock_update_flags <<= 1; + update_rq_clock(rq); switch_count = &prev->nivcsw; if (!preempt && prev->state) { if (unlikely(signal_pending_state(prev->state, prev))) { prev->state = TASK_RUNNING; } else { - deactivate_task(rq, prev, DEQUEUE_SLEEP); + deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); prev->on_rq = 0; if (prev->in_iowait) { @@ -3421,9 +3425,6 @@ static void __sched notrace __schedule(bool preempt) switch_count = &prev->nvcsw; } - if (task_on_rq_queued(prev)) - update_rq_clock(rq); - next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); clear_preempt_need_resched(); @@ -3439,8 +3440,7 @@ static void __sched notrace __schedule(bool preempt) rq = context_switch(rq, prev, next, &rf); } else { rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); - rq_unpin_lock(rq, &rf); - raw_spin_unlock_irq(&rq->lock); + rq_unlock_irq(rq, &rf); } balance_callback(rq); @@ -3502,6 +3502,31 @@ asmlinkage __visible void __sched schedule(void) } EXPORT_SYMBOL(schedule); +/* + * synchronize_rcu_tasks() makes sure that no task is stuck in preempted + * state (have scheduled out non-voluntarily) by making sure that all + * tasks have either left the run queue or have gone into user space. + * As idle tasks do not do either, they must not ever be preempted + * (schedule out non-voluntarily). + * + * schedule_idle() is similar to schedule_preempt_disable() except that it + * never enables preemption because it does not call sched_submit_work(). + */ +void __sched schedule_idle(void) +{ + /* + * As this skips calling sched_submit_work(), which the idle task does + * regardless because that function is a nop when the task is in a + * TASK_RUNNING state, make sure this isn't used someplace that the + * current task can be in any other state. Note, idle is always in the + * TASK_RUNNING state. + */ + WARN_ON_ONCE(current->state); + do { + __schedule(false); + } while (need_resched()); +} + #ifdef CONFIG_CONTEXT_TRACKING asmlinkage __visible void __sched schedule_user(void) { @@ -3671,10 +3696,25 @@ EXPORT_SYMBOL(default_wake_function); #ifdef CONFIG_RT_MUTEXES +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) +{ + if (pi_task) + prio = min(prio, pi_task->prio); + + return prio; +} + +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + return __rt_effective_prio(pi_task, prio); +} + /* * rt_mutex_setprio - set the current priority of a task - * @p: task - * @prio: prio value (kernel-internal form) + * @p: task to boost + * @pi_task: donor task * * This function changes the 'effective' priority of a task. It does * not touch ->normal_prio like __setscheduler(). @@ -3682,17 +3722,42 @@ EXPORT_SYMBOL(default_wake_function); * Used by the rt_mutex code to implement priority inheritance * logic. Call site only calls if the priority of the task changed. */ -void rt_mutex_setprio(struct task_struct *p, int prio) +void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) { - int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; + int prio, oldprio, queued, running, queue_flag = + DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; const struct sched_class *prev_class; struct rq_flags rf; struct rq *rq; - BUG_ON(prio > MAX_PRIO); + /* XXX used to be waiter->prio, not waiter->task->prio */ + prio = __rt_effective_prio(pi_task, p->normal_prio); + + /* + * If nothing changed; bail early. + */ + if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) + return; rq = __task_rq_lock(p, &rf); update_rq_clock(rq); + /* + * Set under pi_lock && rq->lock, such that the value can be used under + * either lock. + * + * Note that there is loads of tricky to make this pointer cache work + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to + * ensure a task is de-boosted (pi_task is set to NULL) before the + * task is allowed to run again (and can exit). This ensures the pointer + * points to a blocked task -- which guaratees the task is present. + */ + p->pi_top_task = pi_task; + + /* + * For FIFO/RR we only need to set prio, if that matches we're done. + */ + if (prio == p->prio && !dl_prio(prio)) + goto out_unlock; /* * Idle task boosting is a nono in general. There is one @@ -3712,7 +3777,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) goto out_unlock; } - trace_sched_pi_setprio(p, prio); + trace_sched_pi_setprio(p, pi_task); oldprio = p->prio; if (oldprio == prio) @@ -3736,7 +3801,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) * running task */ if (dl_prio(prio)) { - struct task_struct *pi_task = rt_mutex_get_top_task(p); if (!dl_prio(p->normal_prio) || (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; @@ -3774,6 +3838,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio) balance_callback(rq); preempt_enable(); } +#else +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + return prio; +} #endif void set_user_nice(struct task_struct *p, long nice) @@ -3805,7 +3874,7 @@ void set_user_nice(struct task_struct *p, long nice) queued = task_on_rq_queued(p); running = task_current(rq, p); if (queued) - dequeue_task(rq, p, DEQUEUE_SAVE); + dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); if (running) put_prev_task(rq, p); @@ -3816,7 +3885,7 @@ void set_user_nice(struct task_struct *p, long nice) delta = p->prio - old_prio; if (queued) { - enqueue_task(rq, p, ENQUEUE_RESTORE); + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -4020,10 +4089,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, * Keep a potential priority boosting if called from * sched_setscheduler(). */ + p->prio = normal_prio(p); if (keep_boost) - p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); - else - p->prio = normal_prio(p); + p->prio = rt_effective_prio(p, p->prio); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -4126,7 +4194,7 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_class *prev_class; struct rq_flags rf; int reset_on_fork; - int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE; + int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; struct rq *rq; /* May grab non-irq protected spin_locks: */ @@ -4310,7 +4378,7 @@ static int __sched_setscheduler(struct task_struct *p, * the runqueue. This will be done when the task deboost * itself. */ - new_effective_prio = rt_mutex_get_effective_prio(p, newprio); + new_effective_prio = rt_effective_prio(p, newprio); if (new_effective_prio == oldprio) queue_flags &= ~DEQUEUE_MOVE; } @@ -4923,7 +4991,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, */ SYSCALL_DEFINE0(sched_yield) { - struct rq *rq = this_rq_lock(); + struct rq_flags rf; + struct rq *rq; + + local_irq_disable(); + rq = this_rq(); + rq_lock(rq, &rf); schedstat_inc(rq->yld_count); current->sched_class->yield_task(rq); @@ -4932,9 +5005,8 @@ SYSCALL_DEFINE0(sched_yield) * Since we are going to call schedule() anyway, there's * no need to preempt or enable interrupts: */ - __release(rq->lock); - spin_release(&rq->lock.dep_map, 1, _THIS_IP_); - do_raw_spin_unlock(&rq->lock); + preempt_disable(); + rq_unlock(rq, &rf); sched_preempt_enable_no_resched(); schedule(); @@ -5514,7 +5586,7 @@ void sched_setnuma(struct task_struct *p, int nid) p->numa_preferred_nid = nid; if (queued) - enqueue_task(rq, p, ENQUEUE_RESTORE); + enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); if (running) set_curr_task(rq, p); task_rq_unlock(rq, p, &rf); @@ -5533,7 +5605,7 @@ void idle_task_exit(void) BUG_ON(cpu_online(smp_processor_id())); if (mm != &init_mm) { - switch_mm_irqs_off(mm, &init_mm, current); + switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } mmdrop(mm); @@ -5579,11 +5651,11 @@ static struct task_struct fake_task = { * there's no concurrency possible, we hold the required locks anyway * because of lock validation efforts. */ -static void migrate_tasks(struct rq *dead_rq) +static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) { struct rq *rq = dead_rq; struct task_struct *next, *stop = rq->stop; - struct rq_flags rf; + struct rq_flags orf = *rf; int dest_cpu; /* @@ -5602,9 +5674,7 @@ static void migrate_tasks(struct rq *dead_rq) * class method both need to have an up-to-date * value of rq->clock[_task] */ - rq_pin_lock(rq, &rf); update_rq_clock(rq); - rq_unpin_lock(rq, &rf); for (;;) { /* @@ -5617,8 +5687,7 @@ static void migrate_tasks(struct rq *dead_rq) /* * pick_next_task() assumes pinned rq->lock: */ - rq_repin_lock(rq, &rf); - next = pick_next_task(rq, &fake_task, &rf); + next = pick_next_task(rq, &fake_task, rf); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); @@ -5631,10 +5700,9 @@ static void migrate_tasks(struct rq *dead_rq) * because !cpu_active at this point, which means load-balance * will not interfere. Also, stop-machine. */ - rq_unpin_lock(rq, &rf); - raw_spin_unlock(&rq->lock); + rq_unlock(rq, rf); raw_spin_lock(&next->pi_lock); - raw_spin_lock(&rq->lock); + rq_relock(rq, rf); /* * Since we're inside stop-machine, _nothing_ should have @@ -5648,12 +5716,12 @@ static void migrate_tasks(struct rq *dead_rq) /* Find suitable destination for @next, with force if needed. */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); - - rq = __migrate_task(rq, next, dest_cpu); + rq = __migrate_task(rq, rf, next, dest_cpu); if (rq != dead_rq) { - raw_spin_unlock(&rq->lock); + rq_unlock(rq, rf); rq = dead_rq; - raw_spin_lock(&rq->lock); + *rf = orf; + rq_relock(rq, rf); } raw_spin_unlock(&next->pi_lock); } @@ -5732,7 +5800,7 @@ static void cpuset_cpu_active(void) * cpuset configurations. */ } - cpuset_update_active_cpus(true); + cpuset_update_active_cpus(); } static int cpuset_cpu_inactive(unsigned int cpu) @@ -5755,7 +5823,7 @@ static int cpuset_cpu_inactive(unsigned int cpu) if (overflow) return -EBUSY; - cpuset_update_active_cpus(false); + cpuset_update_active_cpus(); } else { num_cpus_frozen++; partition_sched_domains(1, NULL, NULL); @@ -5766,7 +5834,7 @@ static int cpuset_cpu_inactive(unsigned int cpu) int sched_cpu_activate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long flags; + struct rq_flags rf; set_cpu_active(cpu, true); @@ -5784,12 +5852,12 @@ int sched_cpu_activate(unsigned int cpu) * 2) At runtime, if cpuset_cpu_active() fails to rebuild the * domains. */ - raw_spin_lock_irqsave(&rq->lock, flags); + rq_lock_irqsave(rq, &rf); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_online(rq); } - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); update_max_interval(); @@ -5847,18 +5915,20 @@ int sched_cpu_starting(unsigned int cpu) int sched_cpu_dying(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long flags; + struct rq_flags rf; /* Handle pending wakeups and then migrate everything off */ sched_ttwu_pending(); - raw_spin_lock_irqsave(&rq->lock, flags); + + rq_lock_irqsave(rq, &rf); if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } - migrate_tasks(rq); + migrate_tasks(rq, &rf); BUG_ON(rq->nr_running != 1); - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); + calc_load_migrate(rq); update_max_interval(); nohz_balance_exit_idle(cpu); @@ -6412,7 +6482,8 @@ static void sched_change_group(struct task_struct *tsk, int type) */ void sched_move_task(struct task_struct *tsk) { - int queued, running; + int queued, running, queue_flags = + DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; struct rq_flags rf; struct rq *rq; @@ -6423,14 +6494,14 @@ void sched_move_task(struct task_struct *tsk) queued = task_on_rq_queued(tsk); if (queued) - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); + dequeue_task(rq, tsk, queue_flags); if (running) put_prev_task(rq, tsk); sched_change_group(tsk, TASK_MOVE_GROUP); if (queued) - enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE); + enqueue_task(rq, tsk, queue_flags); if (running) set_curr_task(rq, tsk); @@ -7008,14 +7079,15 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) for_each_online_cpu(i) { struct cfs_rq *cfs_rq = tg->cfs_rq[i]; struct rq *rq = cfs_rq->rq; + struct rq_flags rf; - raw_spin_lock_irq(&rq->lock); + rq_lock_irq(rq, &rf); cfs_rq->runtime_enabled = runtime_enabled; cfs_rq->runtime_remaining = 0; if (cfs_rq->throttled) unthrottle_cfs_rq(cfs_rq); - raw_spin_unlock_irq(&rq->lock); + rq_unlock_irq(rq, &rf); } if (runtime_was_enabled && !runtime_enabled) cfs_bandwidth_usage_dec(); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 54c577578da6899160cf4a611e87a386a2fd7db2..076a2e31951ccbb538e8004a85cf1f247818d57e 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -61,6 +61,11 @@ struct sugov_cpu { unsigned long util; unsigned long max; unsigned int flags; + + /* The field below is for single-CPU policies only. */ +#ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +#endif }; static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); @@ -93,22 +98,20 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, { struct cpufreq_policy *policy = sg_policy->policy; + if (sg_policy->next_freq == next_freq) + return; + + sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; if (policy->fast_switch_enabled) { - if (sg_policy->next_freq == next_freq) { - trace_cpu_frequency(policy->cur, smp_processor_id()); - return; - } - sg_policy->next_freq = next_freq; next_freq = cpufreq_driver_fast_switch(policy, next_freq); if (next_freq == CPUFREQ_ENTRY_INVALID) return; policy->cur = next_freq; trace_cpu_frequency(next_freq, smp_processor_id()); - } else if (sg_policy->next_freq != next_freq) { - sg_policy->next_freq = next_freq; + } else { sg_policy->work_in_progress = true; irq_work_queue(&sg_policy->irq_work); } @@ -192,6 +195,19 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, sg_cpu->iowait_boost >>= 1; } +#ifdef CONFIG_NO_HZ_COMMON +static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) +{ + unsigned long idle_calls = tick_nohz_get_idle_calls(); + bool ret = idle_calls == sg_cpu->saved_idle_calls; + + sg_cpu->saved_idle_calls = idle_calls; + return ret; +} +#else +static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } +#endif /* CONFIG_NO_HZ_COMMON */ + static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int flags) { @@ -200,6 +216,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, struct cpufreq_policy *policy = sg_policy->policy; unsigned long util, max; unsigned int next_f; + bool busy; sugov_set_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; @@ -207,40 +224,36 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; + busy = sugov_cpu_is_busy(sg_cpu); + if (flags & SCHED_CPUFREQ_RT_DL) { next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_policy, util, max); + /* + * Do not reduce the frequency if the CPU has not been idle + * recently, as the reduction is likely to be premature then. + */ + if (busy && next_f < sg_policy->next_freq) + next_f = sg_policy->next_freq; } sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, - unsigned long util, unsigned long max, - unsigned int flags) +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned int max_f = policy->cpuinfo.max_freq; - u64 last_freq_update_time = sg_policy->last_freq_update_time; + unsigned long util = 0, max = 1; unsigned int j; - if (flags & SCHED_CPUFREQ_RT_DL) - return max_f; - - sugov_iowait_boost(sg_cpu, &util, &max); - for_each_cpu(j, policy->cpus) { - struct sugov_cpu *j_sg_cpu; + struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); unsigned long j_util, j_max; s64 delta_ns; - if (j == smp_processor_id()) - continue; - - j_sg_cpu = &per_cpu(sugov_cpu, j); /* * If the CPU utilization was last updated before the previous * frequency update and the time elapsed between the last update @@ -248,13 +261,13 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, * enough, don't take the CPU into account as it probably is * idle now (and clear iowait_boost for it). */ - delta_ns = last_freq_update_time - j_sg_cpu->last_update; + delta_ns = time - j_sg_cpu->last_update; if (delta_ns > TICK_NSEC) { j_sg_cpu->iowait_boost = 0; continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) - return max_f; + return policy->cpuinfo.max_freq; j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; @@ -289,7 +302,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_cpu, util, max, flags); + if (flags & SCHED_CPUFREQ_RT_DL) + next_f = sg_policy->policy->cpuinfo.max_freq; + else + next_f = sugov_next_freq_shared(sg_cpu, time); + sugov_update_commit(sg_policy, time, next_f); } @@ -473,7 +490,6 @@ static int sugov_init(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy; struct sugov_tunables *tunables; - unsigned int lat; int ret = 0; /* State should be equivalent to EXIT */ @@ -512,10 +528,16 @@ static int sugov_init(struct cpufreq_policy *policy) goto stop_kthread; } - tunables->rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) - tunables->rate_limit_us *= lat; + if (policy->transition_delay_us) { + tunables->rate_limit_us = policy->transition_delay_us; + } else { + unsigned int lat; + + tunables->rate_limit_us = LATENCY_MULTIPLIER; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) + tunables->rate_limit_us *= lat; + } policy->governor_data = sg_policy; sg_policy->tunables = tunables; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index f3778e2b46c8dc00c90d9165f6ae9efbf1f16dc7..aea3135c5d90f434ee72980c30f0db1129ef752b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void) sched_clock_irqtime = 0; } +static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, + enum cpu_usage_stat idx) +{ + u64 *cpustat = kcpustat_this_cpu->cpustat; + + u64_stats_update_begin(&irqtime->sync); + cpustat[idx] += delta; + irqtime->total += delta; + irqtime->tick_delta += delta; + u64_stats_update_end(&irqtime->sync); +} + /* * Called before incrementing preempt_count on {soft,}irq_enter * and before decrementing preempt_count on {soft,}irq_exit. @@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void) void irqtime_account_irq(struct task_struct *curr) { struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); - u64 *cpustat = kcpustat_this_cpu->cpustat; s64 delta; int cpu; @@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr) delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; - u64_stats_update_begin(&irqtime->sync); /* * We do not account for softirq time from ksoftirqd here. * We want to continue accounting softirq time to ksoftirqd thread * in that case, so as not to confuse scheduler with a special task * that do not consume any time, but still wants to run. */ - if (hardirq_count()) { - cpustat[CPUTIME_IRQ] += delta; - irqtime->tick_delta += delta; - } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) { - cpustat[CPUTIME_SOFTIRQ] += delta; - irqtime->tick_delta += delta; - } - - u64_stats_update_end(&irqtime->sync); + if (hardirq_count()) + irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); + else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) + irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } EXPORT_SYMBOL_GPL(irqtime_account_irq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dea138964b9107b3e22542a8b80f5cf1d43c1dee..c77e4b1d51c09d1fc948d0bb105cd97b50fc6a42 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -717,18 +717,12 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) } #ifdef CONFIG_SMP + +#include "sched-pelt.h" + static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); static unsigned long task_h_load(struct task_struct *p); -/* - * We choose a half-life close to 1 scheduling period. - * Note: The tables runnable_avg_yN_inv and runnable_avg_yN_sum are - * dependent on this value. - */ -#define LOAD_AVG_PERIOD 32 -#define LOAD_AVG_MAX 47742 /* maximum possible load avg */ -#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_AVG_MAX */ - /* Give new sched_entity start runnable values to heavy its load in infant time */ void init_entity_runnable_average(struct sched_entity *se) { @@ -2733,47 +2727,15 @@ static inline void update_cfs_shares(struct sched_entity *se) #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_SMP -/* Precomputed fixed inverse multiplies for multiplication by y^n */ -static const u32 runnable_avg_yN_inv[] = { - 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, - 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, - 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, - 0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9, - 0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80, - 0x85aac367, 0x82cd8698, -}; - -/* - * Precomputed \Sum y^k { 1<=k<=n }. These are floor(true_value) to prevent - * over-estimates when re-combining. - */ -static const u32 runnable_avg_yN_sum[] = { - 0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103, - 9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082, - 17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371, -}; - -/* - * Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to - * lower integers. See Documentation/scheduler/sched-avg.txt how these - * were generated: - */ -static const u32 __accumulated_sum_N32[] = { - 0, 23371, 35056, 40899, 43820, 45281, - 46011, 46376, 46559, 46650, 46696, 46719, -}; - /* * Approximate: * val * y^n, where y^32 ~= 0.5 (~1 scheduling period) */ -static __always_inline u64 decay_load(u64 val, u64 n) +static u64 decay_load(u64 val, u64 n) { unsigned int local_n; - if (!n) - return val; - else if (unlikely(n > LOAD_AVG_PERIOD * 63)) + if (unlikely(n > LOAD_AVG_PERIOD * 63)) return 0; /* after bounds checking we can collapse to 32-bit */ @@ -2795,30 +2757,97 @@ static __always_inline u64 decay_load(u64 val, u64 n) return val; } +static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) +{ + u32 c1, c2, c3 = d3; /* y^0 == 1 */ + + /* + * c1 = d1 y^p + */ + c1 = decay_load((u64)d1, periods); + + /* + * p-1 + * c2 = 1024 \Sum y^n + * n=1 + * + * inf inf + * = 1024 ( \Sum y^n - \Sum y^n - y^0 ) + * n=0 n=p + */ + c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024; + + return c1 + c2 + c3; +} + +#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) + /* - * For updates fully spanning n periods, the contribution to runnable - * average will be: \Sum 1024*y^n + * Accumulate the three separate parts of the sum; d1 the remainder + * of the last (incomplete) period, d2 the span of full periods and d3 + * the remainder of the (incomplete) current period. + * + * d1 d2 d3 + * ^ ^ ^ + * | | | + * |<->|<----------------->|<--->| + * ... |---x---|------| ... |------|-----x (now) + * + * p-1 + * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0 + * n=1 * - * We can compute this reasonably efficiently by combining: - * y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for n delta < 1024 */ + u64 periods; - if (likely(n <= LOAD_AVG_PERIOD)) - return runnable_avg_yN_sum[n]; - else if (unlikely(n >= LOAD_AVG_MAX_N)) - return LOAD_AVG_MAX; + scale_freq = arch_scale_freq_capacity(NULL, cpu); + scale_cpu = arch_scale_cpu_capacity(NULL, cpu); - /* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */ - contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD]; - n %= LOAD_AVG_PERIOD; - contrib = decay_load(contrib, n); - return contrib + runnable_avg_yN_sum[n]; -} + delta += sa->period_contrib; + periods = delta / 1024; /* A period is 1024us (~1ms) */ -#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) + /* + * Step 1: decay old *_sum if we crossed period boundaries. + */ + if (periods) { + sa->load_sum = decay_load(sa->load_sum, periods); + if (cfs_rq) { + cfs_rq->runnable_load_sum = + decay_load(cfs_rq->runnable_load_sum, periods); + } + sa->util_sum = decay_load((u64)(sa->util_sum), periods); + + /* + * Step 2 + */ + delta %= 1024; + contrib = __accumulate_pelt_segments(periods, + 1024 - sa->period_contrib, delta); + } + sa->period_contrib = delta; + + contrib = cap_scale(contrib, scale_freq); + if (weight) { + sa->load_sum += weight * contrib; + if (cfs_rq) + cfs_rq->runnable_load_sum += weight * contrib; + } + if (running) + sa->util_sum += contrib * scale_cpu; + + return periods; +} /* * We can represent the historical contribution to runnable average as the @@ -2849,13 +2878,10 @@ static u32 __compute_runnable_contrib(u64 n) * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ static __always_inline int -__update_load_avg(u64 now, int cpu, struct sched_avg *sa, +___update_load_avg(u64 now, int cpu, struct sched_avg *sa, unsigned long weight, int running, struct cfs_rq *cfs_rq) { - u64 delta, scaled_delta, periods; - u32 contrib; - unsigned int delta_w, scaled_delta_w, decayed = 0; - unsigned long scale_freq, scale_cpu; + u64 delta; delta = now - sa->last_update_time; /* @@ -2874,83 +2900,52 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, delta >>= 10; if (!delta) return 0; - sa->last_update_time = now; - - scale_freq = arch_scale_freq_capacity(NULL, cpu); - scale_cpu = arch_scale_cpu_capacity(NULL, cpu); - - /* delta_w is the amount already accumulated against our next period */ - delta_w = sa->period_contrib; - if (delta + delta_w >= 1024) { - decayed = 1; - /* how much left for next period will start over, we don't know yet */ - sa->period_contrib = 0; + sa->last_update_time += delta << 10; - /* - * Now that we know we're crossing a period boundary, figure - * out how much from delta we need to complete the current - * period and accrue it. - */ - delta_w = 1024 - delta_w; - scaled_delta_w = cap_scale(delta_w, scale_freq); - if (weight) { - sa->load_sum += weight * scaled_delta_w; - if (cfs_rq) { - cfs_rq->runnable_load_sum += - weight * scaled_delta_w; - } - } - if (running) - sa->util_sum += scaled_delta_w * scale_cpu; - - delta -= delta_w; - - /* Figure out how many additional periods this update spans */ - periods = delta / 1024; - delta %= 1024; + /* + * Now we know we crossed measurement unit boundaries. The *_avg + * accrues by two steps: + * + * Step 1: accumulate *_sum since last_update_time. If we haven't + * crossed period boundaries, finish. + */ + if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq)) + return 0; - sa->load_sum = decay_load(sa->load_sum, periods + 1); - if (cfs_rq) { - cfs_rq->runnable_load_sum = - decay_load(cfs_rq->runnable_load_sum, periods + 1); - } - sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1); - - /* Efficiently calculate \sum (1..n_period) 1024*y^i */ - contrib = __compute_runnable_contrib(periods); - contrib = cap_scale(contrib, scale_freq); - if (weight) { - sa->load_sum += weight * contrib; - if (cfs_rq) - cfs_rq->runnable_load_sum += weight * contrib; - } - if (running) - sa->util_sum += contrib * scale_cpu; + /* + * Step 2: update *_avg. + */ + sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX); + if (cfs_rq) { + cfs_rq->runnable_load_avg = + div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX); } + sa->util_avg = sa->util_sum / LOAD_AVG_MAX; - /* Remainder of delta accrued against u_0` */ - scaled_delta = cap_scale(delta, scale_freq); - if (weight) { - sa->load_sum += weight * scaled_delta; - if (cfs_rq) - cfs_rq->runnable_load_sum += weight * scaled_delta; - } - if (running) - sa->util_sum += scaled_delta * scale_cpu; + return 1; +} - sa->period_contrib += delta; +static int +__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) +{ + return ___update_load_avg(now, cpu, &se->avg, 0, 0, NULL); +} - if (decayed) { - sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX); - if (cfs_rq) { - cfs_rq->runnable_load_avg = - div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX); - } - sa->util_avg = sa->util_sum / LOAD_AVG_MAX; - } +static int +__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + return ___update_load_avg(now, cpu, &se->avg, + se->on_rq * scale_load_down(se->load.weight), + cfs_rq->curr == se, NULL); +} - return decayed; +static int +__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) +{ + return ___update_load_avg(now, cpu, &cfs_rq->avg, + scale_load_down(cfs_rq->load.weight), + cfs_rq->curr != NULL, cfs_rq); } /* @@ -3014,6 +3009,9 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) void set_task_rq_fair(struct sched_entity *se, struct cfs_rq *prev, struct cfs_rq *next) { + u64 p_last_update_time; + u64 n_last_update_time; + if (!sched_feat(ATTACH_AGE_LOAD)) return; @@ -3024,11 +3022,11 @@ void set_task_rq_fair(struct sched_entity *se, * time. This will result in the wakee task is less decayed, but giving * the wakee more load sounds not bad. */ - if (se->avg.last_update_time && prev) { - u64 p_last_update_time; - u64 n_last_update_time; + if (!(se->avg.last_update_time && prev)) + return; #ifndef CONFIG_64BIT + { u64 p_last_update_time_copy; u64 n_last_update_time_copy; @@ -3043,14 +3041,13 @@ void set_task_rq_fair(struct sched_entity *se, } while (p_last_update_time != p_last_update_time_copy || n_last_update_time != n_last_update_time_copy); + } #else - p_last_update_time = prev->avg.last_update_time; - n_last_update_time = next->avg.last_update_time; + p_last_update_time = prev->avg.last_update_time; + n_last_update_time = next->avg.last_update_time; #endif - __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)), - &se->avg, 0, 0, NULL); - se->avg.last_update_time = n_last_update_time; - } + __update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se); + se->avg.last_update_time = n_last_update_time; } /* Take into account change of utilization of a child task group */ @@ -3173,6 +3170,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se) return 1; } +/* + * Check if we need to update the load and the utilization of a blocked + * group_entity: + */ +static inline bool skip_blocked_update(struct sched_entity *se) +{ + struct cfs_rq *gcfs_rq = group_cfs_rq(se); + + /* + * If sched_entity still have not zero load or utilization, we have to + * decay it: + */ + if (se->avg.load_avg || se->avg.util_avg) + return false; + + /* + * If there is a pending propagation, we have to update the load and + * the utilization of the sched_entity: + */ + if (gcfs_rq->propagate_avg) + return false; + + /* + * Otherwise, the load and the utilization of the sched_entity is + * already zero and there is no pending propagation, so it will be a + * waste of time to try to decay it: + */ + return true; +} + #else /* CONFIG_FAIR_GROUP_SCHED */ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} @@ -3265,8 +3292,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) set_tg_cfs_propagate(cfs_rq); } - decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, - scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq); + decayed = __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq); #ifndef CONFIG_64BIT smp_wmb(); @@ -3298,11 +3324,8 @@ static inline void update_load_avg(struct sched_entity *se, int flags) * Track task load average for carrying it to new CPU after migrated, and * track group sched_entity load average for task_h_load calc in migration */ - if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) { - __update_load_avg(now, cpu, &se->avg, - se->on_rq * scale_load_down(se->load.weight), - cfs_rq->curr == se, NULL); - } + if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) + __update_load_avg_se(now, cpu, cfs_rq, se); decayed = update_cfs_rq_load_avg(now, cfs_rq, true); decayed |= propagate_entity_load_avg(se); @@ -3407,7 +3430,7 @@ void sync_entity_load_avg(struct sched_entity *se) u64 last_update_time; last_update_time = cfs_rq_last_update_time(cfs_rq); - __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); + __update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se); } /* @@ -3540,7 +3563,7 @@ static inline void check_schedstat_required(void) trace_sched_stat_runtime_enabled()) { printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " "stat_blocked and stat_runtime require the " - "kernel parameter schedstats=enabled or " + "kernel parameter schedstats=enable or " "kernel.sched_schedstats=1\n"); } #endif @@ -4271,8 +4294,9 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, throttled_list) { struct rq *rq = rq_of(cfs_rq); + struct rq_flags rf; - raw_spin_lock(&rq->lock); + rq_lock(rq, &rf); if (!cfs_rq_throttled(cfs_rq)) goto next; @@ -4289,7 +4313,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, unthrottle_cfs_rq(cfs_rq); next: - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); if (!remaining) break; @@ -5097,15 +5121,16 @@ void cpu_load_update_nohz_stop(void) unsigned long curr_jiffies = READ_ONCE(jiffies); struct rq *this_rq = this_rq(); unsigned long load; + struct rq_flags rf; if (curr_jiffies == this_rq->last_load_update_tick) return; load = weighted_cpuload(cpu_of(this_rq)); - raw_spin_lock(&this_rq->lock); + rq_lock(this_rq, &rf); update_rq_clock(this_rq); cpu_load_update_nohz(this_rq, curr_jiffies, load); - raw_spin_unlock(&this_rq->lock); + rq_unlock(this_rq, &rf); } #else /* !CONFIG_NO_HZ_COMMON */ static inline void cpu_load_update_nohz(struct rq *this_rq, @@ -6769,7 +6794,7 @@ static void detach_task(struct task_struct *p, struct lb_env *env) lockdep_assert_held(&env->src_rq->lock); p->on_rq = TASK_ON_RQ_MIGRATING; - deactivate_task(env->src_rq, p, 0); + deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK); set_task_cpu(p, env->dst_cpu); } @@ -6902,7 +6927,7 @@ static void attach_task(struct rq *rq, struct task_struct *p) lockdep_assert_held(&rq->lock); BUG_ON(task_rq(p) != rq); - activate_task(rq, p, 0); + activate_task(rq, p, ENQUEUE_NOCLOCK); p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(rq, p, 0); } @@ -6913,9 +6938,12 @@ static void attach_task(struct rq *rq, struct task_struct *p) */ static void attach_one_task(struct rq *rq, struct task_struct *p) { - raw_spin_lock(&rq->lock); + struct rq_flags rf; + + rq_lock(rq, &rf); + update_rq_clock(rq); attach_task(rq, p); - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); } /* @@ -6926,8 +6954,10 @@ static void attach_tasks(struct lb_env *env) { struct list_head *tasks = &env->tasks; struct task_struct *p; + struct rq_flags rf; - raw_spin_lock(&env->dst_rq->lock); + rq_lock(env->dst_rq, &rf); + update_rq_clock(env->dst_rq); while (!list_empty(tasks)) { p = list_first_entry(tasks, struct task_struct, se.group_node); @@ -6936,7 +6966,7 @@ static void attach_tasks(struct lb_env *env) attach_task(env->dst_rq, p); } - raw_spin_unlock(&env->dst_rq->lock); + rq_unlock(env->dst_rq, &rf); } #ifdef CONFIG_FAIR_GROUP_SCHED @@ -6944,9 +6974,9 @@ static void update_blocked_averages(int cpu) { struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq; - unsigned long flags; + struct rq_flags rf; - raw_spin_lock_irqsave(&rq->lock, flags); + rq_lock_irqsave(rq, &rf); update_rq_clock(rq); /* @@ -6954,6 +6984,8 @@ static void update_blocked_averages(int cpu) * list_add_leaf_cfs_rq() for details. */ for_each_leaf_cfs_rq(rq, cfs_rq) { + struct sched_entity *se; + /* throttled entities do not contribute to load */ if (throttled_hierarchy(cfs_rq)) continue; @@ -6961,11 +6993,12 @@ static void update_blocked_averages(int cpu) if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true)) update_tg_load_avg(cfs_rq, 0); - /* Propagate pending load changes to the parent */ - if (cfs_rq->tg->se[cpu]) - update_load_avg(cfs_rq->tg->se[cpu], 0); + /* Propagate pending load changes to the parent, if any: */ + se = cfs_rq->tg->se[cpu]; + if (se && !skip_blocked_update(se)) + update_load_avg(se, 0); } - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); } /* @@ -7019,12 +7052,12 @@ static inline void update_blocked_averages(int cpu) { struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq = &rq->cfs; - unsigned long flags; + struct rq_flags rf; - raw_spin_lock_irqsave(&rq->lock, flags); + rq_lock_irqsave(rq, &rf); update_rq_clock(rq); update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true); - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); } static unsigned long task_h_load(struct task_struct *p) @@ -7525,6 +7558,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd { struct sched_domain *child = env->sd->child; struct sched_group *sg = env->sd->groups; + struct sg_lb_stats *local = &sds->local_stat; struct sg_lb_stats tmp_sgs; int load_idx, prefer_sibling = 0; bool overload = false; @@ -7541,7 +7575,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg)); if (local_group) { sds->local = sg; - sgs = &sds->local_stat; + sgs = local; if (env->idle != CPU_NEWLY_IDLE || time_after_eq(jiffies, sg->sgc->next_update)) @@ -7565,8 +7599,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd * the tasks on the system). */ if (prefer_sibling && sds->local && - group_has_capacity(env, &sds->local_stat) && - (sgs->sum_nr_running > 1)) { + group_has_capacity(env, local) && + (sgs->sum_nr_running > local->sum_nr_running + 1)) { sgs->group_no_capacity = 1; sgs->group_type = group_classify(sg, sgs); } @@ -7597,7 +7631,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /** * check_asym_packing - Check to see if the group is packed into the - * sched doman. + * sched domain. * * This is primarily intended to used at the sibling level. Some * cores like POWER7 prefer to use lower numbered SMT threads. In the @@ -8042,7 +8076,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd_parent = sd->parent; struct sched_group *group; struct rq *busiest; - unsigned long flags; + struct rq_flags rf; struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); struct lb_env env = { @@ -8105,7 +8139,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); more_balance: - raw_spin_lock_irqsave(&busiest->lock, flags); + rq_lock_irqsave(busiest, &rf); update_rq_clock(busiest); /* @@ -8122,14 +8156,14 @@ static int load_balance(int this_cpu, struct rq *this_rq, * See task_rq_lock() family for the details. */ - raw_spin_unlock(&busiest->lock); + rq_unlock(busiest, &rf); if (cur_ld_moved) { attach_tasks(&env); ld_moved += cur_ld_moved; } - local_irq_restore(flags); + local_irq_restore(rf.flags); if (env.flags & LBF_NEED_BREAK) { env.flags &= ~LBF_NEED_BREAK; @@ -8207,6 +8241,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, sd->nr_balance_failed++; if (need_active_balance(&env)) { + unsigned long flags; + raw_spin_lock_irqsave(&busiest->lock, flags); /* don't kick the active_load_balance_cpu_stop, @@ -8444,8 +8480,9 @@ static int active_load_balance_cpu_stop(void *data) struct rq *target_rq = cpu_rq(target_cpu); struct sched_domain *sd; struct task_struct *p = NULL; + struct rq_flags rf; - raw_spin_lock_irq(&busiest_rq->lock); + rq_lock_irq(busiest_rq, &rf); /* make sure the requested cpu hasn't gone down in the meantime */ if (unlikely(busiest_cpu != smp_processor_id() || @@ -8496,7 +8533,7 @@ static int active_load_balance_cpu_stop(void *data) rcu_read_unlock(); out_unlock: busiest_rq->active_balance = 0; - raw_spin_unlock(&busiest_rq->lock); + rq_unlock(busiest_rq, &rf); if (p) attach_one_task(target_rq, p); @@ -8794,10 +8831,13 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) * do the balance. */ if (time_after_eq(jiffies, rq->next_balance)) { - raw_spin_lock_irq(&rq->lock); + struct rq_flags rf; + + rq_lock_irq(rq, &rf); update_rq_clock(rq); cpu_load_update_idle(rq); - raw_spin_unlock_irq(&rq->lock); + rq_unlock_irq(rq, &rf); + rebalance_domains(rq, CPU_IDLE); } @@ -8988,8 +9028,9 @@ static void task_fork_fair(struct task_struct *p) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se, *curr; struct rq *rq = this_rq(); + struct rq_flags rf; - raw_spin_lock(&rq->lock); + rq_lock(rq, &rf); update_rq_clock(rq); cfs_rq = task_cfs_rq(current); @@ -9010,7 +9051,7 @@ static void task_fork_fair(struct task_struct *p) } se->vruntime -= cfs_rq->min_vruntime; - raw_spin_unlock(&rq->lock); + rq_unlock(rq, &rf); } /* @@ -9372,7 +9413,6 @@ static DEFINE_MUTEX(shares_mutex); int sched_group_set_shares(struct task_group *tg, unsigned long shares) { int i; - unsigned long flags; /* * We can't change the weight of the root cgroup. @@ -9389,19 +9429,17 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) tg->shares = shares; for_each_possible_cpu(i) { struct rq *rq = cpu_rq(i); - struct sched_entity *se; + struct sched_entity *se = tg->se[i]; + struct rq_flags rf; - se = tg->se[i]; /* Propagate contribution to hierarchy */ - raw_spin_lock_irqsave(&rq->lock, flags); - - /* Possible calls to update_curr() need rq clock */ + rq_lock_irqsave(rq, &rf); update_rq_clock(rq); for_each_sched_entity(se) { update_load_avg(se, UPDATE_TG); update_cfs_shares(se); } - raw_spin_unlock_irqrestore(&rq->lock, flags); + rq_unlock_irqrestore(rq, &rf); } done: diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 1b3c8189b28656d2644a714ff60ceab7d015d97b..11192e0cb122c72066a2f46417246464d909ea7c 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -56,6 +56,13 @@ SCHED_FEAT(TTWU_QUEUE, true) */ SCHED_FEAT(SIS_AVG_CPU, false) +/* + * Issue a WARN when we do multiple update_rq_clock() calls + * in a single rq->lock section. Default disabled because the + * annotations are not complete. + */ +SCHED_FEAT(WARN_DOUBLE_CLOCK, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index ac6d5176463dca591b40263d9524d728c363ec09..ef63adce0c9cf98eecd9a6896e16d804c6bc1759 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -264,7 +265,10 @@ static void do_idle(void) smp_mb__after_atomic(); sched_ttwu_pending(); - schedule_preempt_disabled(); + schedule_idle(); + + if (unlikely(klp_patch_pending(current))) + klp_update_patch_state(current); } bool cpu_in_idle(unsigned long pc) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 9f3e40226dec875c7b318b4a9e6a2c01a89604ac..979b7341008afc94f839f38b3fd84a12b7a3fb16 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1927,6 +1927,87 @@ static int find_next_push_cpu(struct rq *rq) #define RT_PUSH_IPI_EXECUTING 1 #define RT_PUSH_IPI_RESTART 2 +/* + * When a high priority task schedules out from a CPU and a lower priority + * task is scheduled in, a check is made to see if there's any RT tasks + * on other CPUs that are waiting to run because a higher priority RT task + * is currently running on its CPU. In this case, the CPU with multiple RT + * tasks queued on it (overloaded) needs to be notified that a CPU has opened + * up that may be able to run one of its non-running queued RT tasks. + * + * On large CPU boxes, there's the case that several CPUs could schedule + * a lower priority task at the same time, in which case it will look for + * any overloaded CPUs that it could pull a task from. To do this, the runqueue + * lock must be taken from that overloaded CPU. Having 10s of CPUs all fighting + * for a single overloaded CPU's runqueue lock can produce a large latency. + * (This has actually been observed on large boxes running cyclictest). + * Instead of taking the runqueue lock of the overloaded CPU, each of the + * CPUs that scheduled a lower priority task simply sends an IPI to the + * overloaded CPU. An IPI is much cheaper than taking an runqueue lock with + * lots of contention. The overloaded CPU will look to push its non-running + * RT task off, and if it does, it can then ignore the other IPIs coming + * in, and just pass those IPIs off to any other overloaded CPU. + * + * When a CPU schedules a lower priority task, it only sends an IPI to + * the "next" CPU that has overloaded RT tasks. This prevents IPI storms, + * as having 10 CPUs scheduling lower priority tasks and 10 CPUs with + * RT overloaded tasks, would cause 100 IPIs to go out at once. + * + * The overloaded RT CPU, when receiving an IPI, will try to push off its + * overloaded RT tasks and then send an IPI to the next CPU that has + * overloaded RT tasks. This stops when all CPUs with overloaded RT tasks + * have completed. Just because a CPU may have pushed off its own overloaded + * RT task does not mean it should stop sending the IPI around to other + * overloaded CPUs. There may be another RT task waiting to run on one of + * those CPUs that are of higher priority than the one that was just + * pushed. + * + * An optimization that could possibly be made is to make a CPU array similar + * to the cpupri array mask of all running RT tasks, but for the overloaded + * case, then the IPI could be sent to only the CPU with the highest priority + * RT task waiting, and that CPU could send off further IPIs to the CPU with + * the next highest waiting task. Since the overloaded case is much less likely + * to happen, the complexity of this implementation may not be worth it. + * Instead, just send an IPI around to all overloaded CPUs. + * + * The rq->rt.push_flags holds the status of the IPI that is going around. + * A run queue can only send out a single IPI at a time. The possible flags + * for rq->rt.push_flags are: + * + * (None or zero): No IPI is going around for the current rq + * RT_PUSH_IPI_EXECUTING: An IPI for the rq is being passed around + * RT_PUSH_IPI_RESTART: The priority of the running task for the rq + * has changed, and the IPI should restart + * circulating the overloaded CPUs again. + * + * rq->rt.push_cpu contains the CPU that is being sent the IPI. It is updated + * before sending to the next CPU. + * + * Instead of having all CPUs that schedule a lower priority task send + * an IPI to the same "first" CPU in the RT overload mask, they send it + * to the next overloaded CPU after their own CPU. This helps distribute + * the work when there's more than one overloaded CPU and multiple CPUs + * scheduling in lower priority tasks. + * + * When a rq schedules a lower priority task than what was currently + * running, the next CPU with overloaded RT tasks is examined first. + * That is, if CPU 1 and 5 are overloaded, and CPU 3 schedules a lower + * priority task, it will send an IPI first to CPU 5, then CPU 5 will + * send to CPU 1 if it is still overloaded. CPU 1 will clear the + * rq->rt.push_flags if RT_PUSH_IPI_RESTART is not set. + * + * The first CPU to notice IPI_RESTART is set, will clear that flag and then + * send an IPI to the next overloaded CPU after the rq->cpu and not the next + * CPU after push_cpu. That is, if CPU 1, 4 and 5 are overloaded when CPU 3 + * schedules a lower priority task, and the IPI_RESTART gets set while the + * handling is being done on CPU 5, it will clear the flag and send it back to + * CPU 4 instead of CPU 1. + * + * Note, the above logic can be disabled by turning off the sched_feature + * RT_PUSH_IPI. Then the rq lock of the overloaded CPU will simply be + * taken by the CPU requesting a pull and the waiting RT task will be pulled + * by that CPU. This may be fine for machines with few CPUs. + */ static void tell_cpu_to_push(struct rq *rq) { int cpu; diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h new file mode 100644 index 0000000000000000000000000000000000000000..cd200d16529efe9ca8cbc56afa5896fde001e654 --- /dev/null +++ b/kernel/sched/sched-pelt.h @@ -0,0 +1,13 @@ +/* Generated by Documentation/scheduler/sched-pelt; do not modify. */ + +static const u32 runnable_avg_yN_inv[] = { + 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, + 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, + 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, + 0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9, + 0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80, + 0x85aac367, 0x82cd8698, +}; + +#define LOAD_AVG_PERIOD 32 +#define LOAD_AVG_MAX 47742 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 5cbf92214ad89287d111ab8300e5b55923d83ffe..6dda2aab731e04c5e1f88272f180f44df22e907a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1331,15 +1331,17 @@ extern const u32 sched_prio_to_wmult[40]; #define DEQUEUE_SLEEP 0x01 #define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */ #define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */ +#define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */ #define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_RESTORE 0x02 #define ENQUEUE_MOVE 0x04 +#define ENQUEUE_NOCLOCK 0x08 -#define ENQUEUE_HEAD 0x08 -#define ENQUEUE_REPLENISH 0x10 +#define ENQUEUE_HEAD 0x10 +#define ENQUEUE_REPLENISH 0x20 #ifdef CONFIG_SMP -#define ENQUEUE_MIGRATED 0x20 +#define ENQUEUE_MIGRATED 0x40 #else #define ENQUEUE_MIGRATED 0x00 #endif @@ -1465,6 +1467,8 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq) } #endif +extern void schedule_idle(void); + extern void sysrq_sched_debug_show(void); extern void sched_init_granularity(void); extern void update_max_interval(void); @@ -1624,6 +1628,7 @@ static inline void sched_avg_update(struct rq *rq) { } struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(rq->lock); + struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(p->pi_lock) __acquires(rq->lock); @@ -1645,6 +1650,62 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } +static inline void +rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) + __acquires(rq->lock) +{ + raw_spin_lock_irqsave(&rq->lock, rf->flags); + rq_pin_lock(rq, rf); +} + +static inline void +rq_lock_irq(struct rq *rq, struct rq_flags *rf) + __acquires(rq->lock) +{ + raw_spin_lock_irq(&rq->lock); + rq_pin_lock(rq, rf); +} + +static inline void +rq_lock(struct rq *rq, struct rq_flags *rf) + __acquires(rq->lock) +{ + raw_spin_lock(&rq->lock); + rq_pin_lock(rq, rf); +} + +static inline void +rq_relock(struct rq *rq, struct rq_flags *rf) + __acquires(rq->lock) +{ + raw_spin_lock(&rq->lock); + rq_repin_lock(rq, rf); +} + +static inline void +rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + rq_unpin_lock(rq, rf); + raw_spin_unlock_irqrestore(&rq->lock, rf->flags); +} + +static inline void +rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + rq_unpin_lock(rq, rf); + raw_spin_unlock_irq(&rq->lock); +} + +static inline void +rq_unlock(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + rq_unpin_lock(rq, rf); + raw_spin_unlock(&rq->lock); +} + #ifdef CONFIG_SMP #ifdef CONFIG_PREEMPT @@ -1869,6 +1930,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { } #ifdef CONFIG_IRQ_TIME_ACCOUNTING struct irqtime { + u64 total; u64 tick_delta; u64 irq_start_time; struct u64_stats_sync sync; @@ -1876,16 +1938,20 @@ struct irqtime { DECLARE_PER_CPU(struct irqtime, cpu_irqtime); +/* + * Returns the irqtime minus the softirq time computed by ksoftirqd. + * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime + * and never move forward. + */ static inline u64 irq_time_read(int cpu) { struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); - u64 *cpustat = kcpustat_cpu(cpu).cpustat; unsigned int seq; u64 total; do { seq = __u64_stats_fetch_begin(&irqtime->sync); - total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; + total = irqtime->total; } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); return total; diff --git a/kernel/signal.c b/kernel/signal.c index 7e59ebc2c25e669ef60f54d6e3ae6839c34d2f2a..ca92bcfeb322f3f836031ec8b3ab21867f39adf5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1237,7 +1237,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, } /* * This sighand can be already freed and even reused, but - * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which + * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which * initializes ->siglock: this slab can't go away, it has * the same object type, ->siglock can't be reinitialized. * @@ -1318,7 +1318,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) } } -int kill_proc_info(int sig, struct siginfo *info, pid_t pid) +static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; rcu_read_lock(); diff --git a/kernel/softirq.c b/kernel/softirq.c index 744fa611cae06b26d89a04e915f0d7fadf37035b..4e09821f9d9e8b5815037bd469110e2618abb7f6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -309,7 +309,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); WARN_ON_ONCE(in_interrupt()); - tsk_restore_flags(current, old_flags, PF_MEMALLOC); + current_restore_flags(old_flags, PF_MEMALLOC); } asmlinkage __visible void do_softirq(void) diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 9c15a9124e83b50661414fa425393cdb51866313..f8edee9c792de527cfb968664cc1ae91ea062d1f 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -54,8 +54,8 @@ int snprint_stack_trace(char *buf, size_t size, EXPORT_SYMBOL_GPL(snprint_stack_trace); /* - * Architectures that do not implement save_stack_trace_tsk or - * save_stack_trace_regs get this weak alias and a once-per-bootup warning + * Architectures that do not implement save_stack_trace_*() + * get these weak aliases and once-per-bootup warnings * (whenever this facility is utilized - for example by procfs): */ __weak void @@ -69,3 +69,11 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) { WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n"); } + +__weak int +save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + WARN_ONCE(1, KERN_INFO "save_stack_tsk_reliable() not implemented yet.\n"); + return -ENOSYS; +} diff --git a/kernel/sys.c b/kernel/sys.c index 7ff6d1b10cecac8e66583f3bc233fd6fcb445b5c..8a94b4eabcaa291e6315858deba48a0269363fb9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1396,8 +1396,7 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, !capable(CAP_SYS_RESOURCE)) retval = -EPERM; if (!retval) - retval = security_task_setrlimit(tsk->group_leader, - resource, new_rlim); + retval = security_task_setrlimit(tsk, resource, new_rlim); if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { /* * The caller is asking for an immediate RLIMIT_CPU @@ -1432,25 +1431,26 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, } /* rcu lock must be held */ -static int check_prlimit_permission(struct task_struct *task) +static int check_prlimit_permission(struct task_struct *task, + unsigned int flags) { const struct cred *cred = current_cred(), *tcred; + bool id_match; if (current == task) return 0; tcred = __task_cred(task); - if (uid_eq(cred->uid, tcred->euid) && - uid_eq(cred->uid, tcred->suid) && - uid_eq(cred->uid, tcred->uid) && - gid_eq(cred->gid, tcred->egid) && - gid_eq(cred->gid, tcred->sgid) && - gid_eq(cred->gid, tcred->gid)) - return 0; - if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) - return 0; + id_match = (uid_eq(cred->uid, tcred->euid) && + uid_eq(cred->uid, tcred->suid) && + uid_eq(cred->uid, tcred->uid) && + gid_eq(cred->gid, tcred->egid) && + gid_eq(cred->gid, tcred->sgid) && + gid_eq(cred->gid, tcred->gid)); + if (!id_match && !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) + return -EPERM; - return -EPERM; + return security_task_prlimit(cred, tcred, flags); } SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, @@ -1460,12 +1460,17 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, struct rlimit64 old64, new64; struct rlimit old, new; struct task_struct *tsk; + unsigned int checkflags = 0; int ret; + if (old_rlim) + checkflags |= LSM_PRLIMIT_READ; + if (new_rlim) { if (copy_from_user(&new64, new_rlim, sizeof(new64))) return -EFAULT; rlim64_to_rlim(&new64, &new); + checkflags |= LSM_PRLIMIT_WRITE; } rcu_read_lock(); @@ -1474,7 +1479,7 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, rcu_read_unlock(); return -ESRCH; } - ret = check_prlimit_permission(tsk); + ret = check_prlimit_permission(tsk, checkflags); if (ret) { rcu_read_unlock(); return ret; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8c8714fcb53c35a390becf14f2fa8e1bfc20142c..4dfba1a76cc360f649cbe6f35b57cf9413f2f5e1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1176,6 +1176,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = timer_migration_handler, + .extra1 = &zero, + .extra2 = &one, }, #endif #ifdef CONFIG_BPF_SYSCALL @@ -2574,7 +2576,7 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, int write, void *data) { if (write) { - if (*lvalp > LONG_MAX / HZ) + if (*lvalp > INT_MAX / HZ) return 1; *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); } else { diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 8a5e44236f78d3ba069e0c81ef3304f4be5ba7fd..4559e914452b4b8a47d1cf2efc0320be253e5025 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * Maximum length of a cpumask that can be specified in @@ -210,6 +211,8 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; + u64 delta, utime, stime; + u64 start_time; /* * Add additional stats from live tasks except zombie thread group @@ -227,6 +230,7 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) memset(stats, 0, sizeof(*stats)); tsk = first; + start_time = ktime_get_ns(); do { if (tsk->exit_state) continue; @@ -238,6 +242,16 @@ static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) */ delayacct_add_tsk(stats, tsk); + /* calculate task elapsed time in nsec */ + delta = start_time - tsk->start_time; + /* Convert to micro seconds */ + do_div(delta, NSEC_PER_USEC); + stats->ac_etime += delta; + + task_cputime(tsk, &utime, &stime); + stats->ac_utime += div_u64(utime, NSEC_PER_USEC); + stats->ac_stime += div_u64(stime, NSEC_PER_USEC); + stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; } while_each_thread(first, tsk); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ce3a31e8eb3687e8633848d645ba4c4dc741a8f3..ee2f4202d82aa2acec5e438218a2405a4315f166 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; - start = ktime_add(start, base->gettime()); + start = ktime_add_safe(start, base->gettime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); @@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) overrun++; } - alarm->node.expires = ktime_add(alarm->node.expires, interval); + alarm->node.expires = ktime_add_safe(alarm->node.expires, interval); return overrun; } EXPORT_SYMBOL_GPL(alarm_forward); @@ -541,7 +541,7 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, * * Returns the granularity of underlying alarm base clock */ -static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) +static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { if (!alarmtimer_get_rtcdev()) return -EINVAL; @@ -558,14 +558,14 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) * * Provides the underlying alarm base time. */ -static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) +static int alarm_clock_get(clockid_t which_clock, struct timespec64 *tp) { struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; if (!alarmtimer_get_rtcdev()) return -EINVAL; - *tp = ktime_to_timespec(base->gettime()); + *tp = ktime_to_timespec64(base->gettime()); return 0; } @@ -598,19 +598,19 @@ static int alarm_timer_create(struct k_itimer *new_timer) * Copies out the current itimerspec data */ static void alarm_timer_get(struct k_itimer *timr, - struct itimerspec *cur_setting) + struct itimerspec64 *cur_setting) { ktime_t relative_expiry_time = alarm_expires_remaining(&(timr->it.alarm.alarmtimer)); if (ktime_to_ns(relative_expiry_time) > 0) { - cur_setting->it_value = ktime_to_timespec(relative_expiry_time); + cur_setting->it_value = ktime_to_timespec64(relative_expiry_time); } else { cur_setting->it_value.tv_sec = 0; cur_setting->it_value.tv_nsec = 0; } - cur_setting->it_interval = ktime_to_timespec(timr->it.alarm.interval); + cur_setting->it_interval = ktime_to_timespec64(timr->it.alarm.interval); } /** @@ -640,8 +640,8 @@ static int alarm_timer_del(struct k_itimer *timr) * Sets the timer to new_setting, and starts the timer. */ static int alarm_timer_set(struct k_itimer *timr, int flags, - struct itimerspec *new_setting, - struct itimerspec *old_setting) + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting) { ktime_t exp; @@ -659,14 +659,22 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, return TIMER_RETRY; /* start the timer */ - timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); - exp = timespec_to_ktime(new_setting->it_value); + timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval); + + /* + * Rate limit to the tick as a hot fix to prevent DOS. Will be + * mopped up later. + */ + if (timr->it.alarm.interval < TICK_NSEC) + timr->it.alarm.interval = TICK_NSEC; + + exp = timespec64_to_ktime(new_setting->it_value); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now; now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); - exp = ktime_add(now, exp); + exp = ktime_add_safe(now, exp); } alarm_start(&timr->it.alarm.alarmtimer, exp); @@ -790,13 +798,14 @@ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) * Handles clock_nanosleep calls against _ALARM clockids */ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, - struct timespec *tsreq, struct timespec __user *rmtp) + struct timespec64 *tsreq, + struct timespec __user *rmtp) { enum alarmtimer_type type = clock2alarm(which_clock); + struct restart_block *restart; struct alarm alarm; ktime_t exp; int ret = 0; - struct restart_block *restart; if (!alarmtimer_get_rtcdev()) return -ENOTSUPP; @@ -809,7 +818,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); - exp = timespec_to_ktime(*tsreq); + exp = timespec64_to_ktime(*tsreq); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now = alarm_bases[type].gettime(); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 97ac0951f164a386ca7199195607a1af90ed14ea..4237e0744e26bd276de92ca083a44676c9ede240 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -468,7 +468,7 @@ void clockevents_register_device(struct clock_event_device *dev) } EXPORT_SYMBOL_GPL(clockevents_register_device); -void clockevents_config(struct clock_event_device *dev, u32 freq) +static void clockevents_config(struct clock_event_device *dev, u32 freq) { u64 sec; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ec08f527d7ee9101399fd150ad788b8a40f2f859..ac053bb5296e8ef84be3ce4c57084ac9d05c2812 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -987,7 +987,7 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); * Returns: * 0 when the timer was not active * 1 when the timer was active - * -1 when the timer is currently excuting the callback function and + * -1 when the timer is currently executing the callback function and * cannot be stopped */ int hrtimer_try_to_cancel(struct hrtimer *timer) @@ -1368,10 +1368,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) ktime_to_ns(delta)); } -/* - * local version of hrtimer_peek_ahead_timers() called with interrupts - * disabled. - */ +/* called with interrupts disabled */ static inline void __hrtimer_peek_ahead_timers(void) { struct tick_device *td; @@ -1506,7 +1503,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) return ret; } -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, +long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid) { struct restart_block *restart; @@ -1519,7 +1516,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); + hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); if (do_nanosleep(&t, mode)) goto out; @@ -1550,15 +1547,17 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, struct timespec __user *, rmtp) { + struct timespec64 tu64; struct timespec tu; if (copy_from_user(&tu, rqtp, sizeof(tu))) return -EFAULT; - if (!timespec_valid(&tu)) + tu64 = timespec_to_timespec64(tu); + if (!timespec64_valid(&tu64)) return -EINVAL; - return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + return hrtimer_nanosleep(&tu64, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); } /* diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 9cff0ab82b635d33d2936697d909724d07767e69..31d588d37a173ae6fc65ab7d956736982e381991 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -297,7 +297,7 @@ static int pc_clock_adjtime(clockid_t id, struct timex *tx) return err; } -static int pc_clock_gettime(clockid_t id, struct timespec *ts) +static int pc_clock_gettime(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -316,7 +316,7 @@ static int pc_clock_gettime(clockid_t id, struct timespec *ts) return err; } -static int pc_clock_getres(clockid_t id, struct timespec *ts) +static int pc_clock_getres(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -335,7 +335,7 @@ static int pc_clock_getres(clockid_t id, struct timespec *ts) return err; } -static int pc_clock_settime(clockid_t id, const struct timespec *ts) +static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -399,7 +399,7 @@ static int pc_timer_delete(struct k_itimer *kit) return err; } -static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts) +static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec64 *ts) { clockid_t id = kit->it_clock; struct posix_clock_desc cd; @@ -414,7 +414,7 @@ static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts) } static int pc_timer_settime(struct k_itimer *kit, int flags, - struct itimerspec *ts, struct itimerspec *old) + struct itimerspec64 *ts, struct itimerspec64 *old) { clockid_t id = kit->it_clock; struct posix_clock_desc cd; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 4513ad16a253f6d2b0ccf6a3aa178b1e058c5c52..d2a1e6dd02913f4beb58b1d79be12d72761deec3 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -116,7 +116,7 @@ static inline u64 virt_ticks(struct task_struct *p) } static int -posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) +posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { int error = check_clock(which_clock); if (!error) { @@ -135,7 +135,7 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) } static int -posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) +posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -261,7 +261,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock, static int posix_cpu_clock_get_task(struct task_struct *tsk, const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { int err = -EINVAL; u64 rtn; @@ -275,13 +275,13 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, } if (!err) - *tp = ns_to_timespec(rtn); + *tp = ns_to_timespec64(rtn); return err; } -static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) +static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int err = -EINVAL; @@ -562,7 +562,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, * and try again. (This happens when the timer is in the middle of firing.) */ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, - struct itimerspec *new, struct itimerspec *old) + struct itimerspec64 *new, struct itimerspec64 *old) { unsigned long flags; struct sighand_struct *sighand; @@ -572,7 +572,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, WARN_ON_ONCE(p == NULL); - new_expires = timespec_to_ns(&new->it_value); + new_expires = timespec64_to_ns(&new->it_value); /* * Protect against sighand release/switch in exit/exec and p->cpu_timers @@ -633,7 +633,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, bump_cpu_timer(timer, val); if (val < timer->it.cpu.expires) { old_expires = timer->it.cpu.expires - val; - old->it_value = ns_to_timespec(old_expires); + old->it_value = ns_to_timespec64(old_expires); } else { old->it_value.tv_nsec = 1; old->it_value.tv_sec = 0; @@ -671,7 +671,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * Install the new reload setting, and * set up the signal and overrun bookkeeping. */ - timer->it.cpu.incr = timespec_to_ns(&new->it_interval); + timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); /* * This acts as a modification timestamp for the timer, @@ -695,12 +695,12 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ret = 0; out: if (old) - old->it_interval = ns_to_timespec(old_incr); + old->it_interval = ns_to_timespec64(old_incr); return ret; } -static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) +static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) { u64 now; struct task_struct *p = timer->it.cpu.task; @@ -710,7 +710,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) /* * Easy part: convert the reload time. */ - itp->it_interval = ns_to_timespec(timer->it.cpu.incr); + itp->it_interval = ns_to_timespec64(timer->it.cpu.incr); if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; @@ -739,7 +739,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * Call the timer disarmed, nothing else to do. */ timer->it.cpu.expires = 0; - itp->it_value = ns_to_timespec(timer->it.cpu.expires); + itp->it_value = ns_to_timespec64(timer->it.cpu.expires); return; } else { cpu_timer_sample_group(timer->it_clock, p, &now); @@ -748,7 +748,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } if (now < timer->it.cpu.expires) { - itp->it_value = ns_to_timespec(timer->it.cpu.expires - now); + itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now); } else { /* * The timer should have expired already, but the firing @@ -825,6 +825,10 @@ static void check_thread_timers(struct task_struct *tsk, * At the hard limit, we just die. * No need to calculate anything else now. */ + if (print_fatal_signals) { + pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } @@ -836,9 +840,10 @@ static void check_thread_timers(struct task_struct *tsk, soft += USEC_PER_SEC; sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; } - printk(KERN_INFO - "RT Watchdog Timeout: %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); + if (print_fatal_signals) { + pr_info("RT Watchdog Timeout (soft): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } @@ -935,6 +940,10 @@ static void check_process_timers(struct task_struct *tsk, * At the hard limit, we just die. * No need to calculate anything else now. */ + if (print_fatal_signals) { + pr_info("RT Watchdog Timeout (hard): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } @@ -942,6 +951,10 @@ static void check_process_timers(struct task_struct *tsk, /* * At the soft limit, send a SIGXCPU every second. */ + if (print_fatal_signals) { + pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); if (soft < hard) { soft++; @@ -1214,7 +1227,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct itimerspec *it) + struct timespec64 *rqtp, struct itimerspec64 *it) { struct k_itimer timer; int error; @@ -1229,7 +1242,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, error = posix_cpu_timer_create(&timer); timer.it_process = current; if (!error) { - static struct itimerspec zero_it; + static struct itimerspec64 zero_it; memset(it, 0, sizeof *it); it->it_value = *rqtp; @@ -1264,7 +1277,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, /* * We were interrupted by a signal. */ - *rqtp = ns_to_timespec(timer.it.cpu.expires); + *rqtp = ns_to_timespec64(timer.it.cpu.expires); error = posix_cpu_timer_set(&timer, 0, &zero_it, it); if (!error) { /* @@ -1301,10 +1314,11 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, static long posix_cpu_nsleep_restart(struct restart_block *restart_block); static int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct timespec __user *rmtp) + struct timespec64 *rqtp, struct timespec __user *rmtp) { struct restart_block *restart_block = ¤t->restart_block; - struct itimerspec it; + struct itimerspec64 it; + struct timespec ts; int error; /* @@ -1312,7 +1326,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, */ if (CPUCLOCK_PERTHREAD(which_clock) && (CPUCLOCK_PID(which_clock) == 0 || - CPUCLOCK_PID(which_clock) == current->pid)) + CPUCLOCK_PID(which_clock) == task_pid_vnr(current))) return -EINVAL; error = do_cpu_nanosleep(which_clock, flags, rqtp, &it); @@ -1324,13 +1338,14 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, /* * Report back to the user the time still remaining. */ - if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + ts = timespec64_to_timespec(it.it_value); + if (rmtp && copy_to_user(rmtp, &ts, sizeof(*rmtp))) return -EFAULT; restart_block->fn = posix_cpu_nsleep_restart; restart_block->nanosleep.clockid = which_clock; restart_block->nanosleep.rmtp = rmtp; - restart_block->nanosleep.expires = timespec_to_ns(rqtp); + restart_block->nanosleep.expires = timespec64_to_ns(rqtp); } return error; } @@ -1338,11 +1353,12 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, static long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->nanosleep.clockid; - struct timespec t; - struct itimerspec it; + struct itimerspec64 it; + struct timespec64 t; + struct timespec tmp; int error; - t = ns_to_timespec(restart_block->nanosleep.expires); + t = ns_to_timespec64(restart_block->nanosleep.expires); error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); @@ -1351,10 +1367,11 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block) /* * Report back to the user the time still remaining. */ - if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) + tmp = timespec64_to_timespec(it.it_value); + if (rmtp && copy_to_user(rmtp, &tmp, sizeof(*rmtp))) return -EFAULT; - restart_block->nanosleep.expires = timespec_to_ns(&t); + restart_block->nanosleep.expires = timespec64_to_ns(&t); } return error; @@ -1364,12 +1381,12 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block) #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) static int process_cpu_clock_getres(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_getres(PROCESS_CLOCK, tp); } static int process_cpu_clock_get(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_get(PROCESS_CLOCK, tp); } @@ -1379,7 +1396,7 @@ static int process_cpu_timer_create(struct k_itimer *timer) return posix_cpu_timer_create(timer); } static int process_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, + struct timespec64 *rqtp, struct timespec __user *rmtp) { return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); @@ -1389,12 +1406,12 @@ static long process_cpu_nsleep_restart(struct restart_block *restart_block) return -EINVAL; } static int thread_cpu_clock_getres(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_getres(THREAD_CLOCK, tp); } static int thread_cpu_clock_get(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_get(THREAD_CLOCK, tp); } diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index cd6716e115e8d81a89afdc95286590ca9f7e500e..c0cd53eb018a249f99a3d120ffde9fbcc2abbdef 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -49,26 +49,32 @@ SYS_NI(alarm); SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { + struct timespec64 new_tp64; struct timespec new_tp; if (which_clock != CLOCK_REALTIME) return -EINVAL; if (copy_from_user(&new_tp, tp, sizeof (*tp))) return -EFAULT; - return do_sys_settimeofday(&new_tp, NULL); + + new_tp64 = timespec_to_timespec64(new_tp); + return do_sys_settimeofday64(&new_tp64, NULL); } SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct timespec __user *,tp) { + struct timespec64 kernel_tp64; struct timespec kernel_tp; switch (which_clock) { - case CLOCK_REALTIME: ktime_get_real_ts(&kernel_tp); break; - case CLOCK_MONOTONIC: ktime_get_ts(&kernel_tp); break; - case CLOCK_BOOTTIME: get_monotonic_boottime(&kernel_tp); break; + case CLOCK_REALTIME: ktime_get_real_ts64(&kernel_tp64); break; + case CLOCK_MONOTONIC: ktime_get_ts64(&kernel_tp64); break; + case CLOCK_BOOTTIME: get_monotonic_boottime64(&kernel_tp64); break; default: return -EINVAL; } + + kernel_tp = timespec64_to_timespec(kernel_tp64); if (copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) return -EFAULT; return 0; @@ -97,6 +103,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct timespec __user *, rqtp, struct timespec __user *, rmtp) { + struct timespec64 t64; struct timespec t; switch (which_clock) { @@ -105,9 +112,10 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, case CLOCK_BOOTTIME: if (copy_from_user(&t, rqtp, sizeof (struct timespec))) return -EFAULT; - if (!timespec_valid(&t)) + t64 = timespec_to_timespec64(t); + if (!timespec64_valid(&t64)) return -EINVAL; - return hrtimer_nanosleep(&t, rmtp, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(&t64, rmtp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); default: diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 50a6a47020dea9e055b7267926ee9b0e39399d3c..4d7b2ce09c27cb6b77b1b2d18e379c4efed48d71 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -130,12 +130,12 @@ static struct k_clock posix_clocks[MAX_CLOCKS]; /* * These ones are defined below. */ -static int common_nsleep(const clockid_t, int flags, struct timespec *t, +static int common_nsleep(const clockid_t, int flags, struct timespec64 *t, struct timespec __user *rmtp); static int common_timer_create(struct k_itimer *new_timer); -static void common_timer_get(struct k_itimer *, struct itimerspec *); +static void common_timer_get(struct k_itimer *, struct itimerspec64 *); static int common_timer_set(struct k_itimer *, int, - struct itimerspec *, struct itimerspec *); + struct itimerspec64 *, struct itimerspec64 *); static int common_timer_del(struct k_itimer *timer); static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); @@ -204,17 +204,17 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) } /* Get clock_realtime */ -static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp) +static int posix_clock_realtime_get(clockid_t which_clock, struct timespec64 *tp) { - ktime_get_real_ts(tp); + ktime_get_real_ts64(tp); return 0; } /* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, - const struct timespec *tp) + const struct timespec64 *tp) { - return do_sys_settimeofday(tp, NULL); + return do_sys_settimeofday64(tp, NULL); } static int posix_clock_realtime_adj(const clockid_t which_clock, @@ -226,54 +226,54 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, /* * Get monotonic time for posix timers */ -static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) +static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp) { - ktime_get_ts(tp); + ktime_get_ts64(tp); return 0; } /* * Get monotonic-raw time for posix timers */ -static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) +static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { - getrawmonotonic(tp); + getrawmonotonic64(tp); return 0; } -static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp) +static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) { - *tp = current_kernel_time(); + *tp = current_kernel_time64(); return 0; } static int posix_get_monotonic_coarse(clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { - *tp = get_monotonic_coarse(); + *tp = get_monotonic_coarse64(); return 0; } -static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp) +static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp) { - *tp = ktime_to_timespec(KTIME_LOW_RES); + *tp = ktime_to_timespec64(KTIME_LOW_RES); return 0; } -static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp) +static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) { - get_monotonic_boottime(tp); + get_monotonic_boottime64(tp); return 0; } -static int posix_get_tai(clockid_t which_clock, struct timespec *tp) +static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) { - timekeeping_clocktai(tp); + timekeeping_clocktai64(tp); return 0; } -static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec *tp) +static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; tp->tv_nsec = hrtimer_resolution; @@ -734,18 +734,18 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) * report. */ static void -common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) +common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { ktime_t now, remaining, iv; struct hrtimer *timer = &timr->it.real.timer; - memset(cur_setting, 0, sizeof(struct itimerspec)); + memset(cur_setting, 0, sizeof(*cur_setting)); iv = timr->it.real.interval; /* interval timer ? */ if (iv) - cur_setting->it_interval = ktime_to_timespec(iv); + cur_setting->it_interval = ktime_to_timespec64(iv); else if (!hrtimer_active(timer) && (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) return; @@ -771,13 +771,14 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) cur_setting->it_value.tv_nsec = 1; } else - cur_setting->it_value = ktime_to_timespec(remaining); + cur_setting->it_value = ktime_to_timespec64(remaining); } /* Get the time remaining on a POSIX.1b interval timer. */ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct itimerspec __user *, setting) { + struct itimerspec64 cur_setting64; struct itimerspec cur_setting; struct k_itimer *timr; struct k_clock *kc; @@ -792,10 +793,11 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, if (WARN_ON_ONCE(!kc || !kc->timer_get)) ret = -EINVAL; else - kc->timer_get(timr, &cur_setting); + kc->timer_get(timr, &cur_setting64); unlock_timer(timr, flags); + cur_setting = itimerspec64_to_itimerspec(&cur_setting64); if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting))) return -EFAULT; @@ -831,7 +833,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) /* timr->it_lock is taken. */ static int common_timer_set(struct k_itimer *timr, int flags, - struct itimerspec *new_setting, struct itimerspec *old_setting) + struct itimerspec64 *new_setting, struct itimerspec64 *old_setting) { struct hrtimer *timer = &timr->it.real.timer; enum hrtimer_mode mode; @@ -860,10 +862,10 @@ common_timer_set(struct k_itimer *timr, int flags, hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); timr->it.real.timer.function = posix_timer_fn; - hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value)); + hrtimer_set_expires(timer, timespec64_to_ktime(new_setting->it_value)); /* Convert interval */ - timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); + timr->it.real.interval = timespec64_to_ktime(new_setting->it_interval); /* SIGEV_NONE timers are not queued ! See common_timer_get */ if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { @@ -883,21 +885,23 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, const struct itimerspec __user *, new_setting, struct itimerspec __user *, old_setting) { - struct k_itimer *timr; + struct itimerspec64 new_spec64, old_spec64; + struct itimerspec64 *rtn = old_setting ? &old_spec64 : NULL; struct itimerspec new_spec, old_spec; - int error = 0; + struct k_itimer *timr; unsigned long flag; - struct itimerspec *rtn = old_setting ? &old_spec : NULL; struct k_clock *kc; + int error = 0; if (!new_setting) return -EINVAL; if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) return -EFAULT; + new_spec64 = itimerspec_to_itimerspec64(&new_spec); - if (!timespec_valid(&new_spec.it_interval) || - !timespec_valid(&new_spec.it_value)) + if (!timespec64_valid(&new_spec64.it_interval) || + !timespec64_valid(&new_spec64.it_value)) return -EINVAL; retry: timr = lock_timer(timer_id, &flag); @@ -908,7 +912,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; else - error = kc->timer_set(timr, flags, &new_spec, rtn); + error = kc->timer_set(timr, flags, &new_spec64, rtn); unlock_timer(timr, flag); if (error == TIMER_RETRY) { @@ -916,6 +920,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, goto retry; } + old_spec = itimerspec64_to_itimerspec(&old_spec64); if (old_setting && !error && copy_to_user(old_setting, &old_spec, sizeof (old_spec))) error = -EFAULT; @@ -1014,6 +1019,7 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 new_tp64; struct timespec new_tp; if (!kc || !kc->clock_set) @@ -1021,21 +1027,24 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, if (copy_from_user(&new_tp, tp, sizeof (*tp))) return -EFAULT; + new_tp64 = timespec_to_timespec64(new_tp); - return kc->clock_set(which_clock, &new_tp); + return kc->clock_set(which_clock, &new_tp64); } SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct timespec __user *,tp) { struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 kernel_tp64; struct timespec kernel_tp; int error; if (!kc) return -EINVAL; - error = kc->clock_get(which_clock, &kernel_tp); + error = kc->clock_get(which_clock, &kernel_tp64); + kernel_tp = timespec64_to_timespec(kernel_tp64); if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) error = -EFAULT; @@ -1070,13 +1079,15 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 rtn_tp64; struct timespec rtn_tp; int error; if (!kc) return -EINVAL; - error = kc->clock_getres(which_clock, &rtn_tp); + error = kc->clock_getres(which_clock, &rtn_tp64); + rtn_tp = timespec64_to_timespec(rtn_tp64); if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) error = -EFAULT; @@ -1088,7 +1099,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, * nanosleep for monotonic and realtime clocks */ static int common_nsleep(const clockid_t which_clock, int flags, - struct timespec *tsave, struct timespec __user *rmtp) + struct timespec64 *tsave, struct timespec __user *rmtp) { return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, @@ -1100,6 +1111,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, struct timespec __user *, rmtp) { struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 t64; struct timespec t; if (!kc) @@ -1110,10 +1122,11 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, if (copy_from_user(&t, rqtp, sizeof (struct timespec))) return -EFAULT; - if (!timespec_valid(&t)) + t64 = timespec_to_timespec64(t); + if (!timespec64_valid(&t64)) return -EINVAL; - return kc->nsleep(which_clock, flags, &t, rmtp); + return kc->nsleep(which_clock, flags, &t64, rmtp); } /* diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index ea6b610c4c57c3f4d600cfc5a42597a34bedc72b..2d8f05aad442a5fcafc37b98a177f84bde2047a5 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -206,6 +206,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) update_clock_read_data(&rd); + if (sched_clock_timer.function != NULL) { + /* update timeout for clock wrap */ + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + } + r = rate; if (r >= 4000000) { r /= 1000000; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 987e496bb51a9cc84c92bedc62cf8d69e8e85668..b398c2ea69b290cdaec1769b7d11cbc501646652 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -37,9 +37,11 @@ static int tick_broadcast_forced; static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); #ifdef CONFIG_TICK_ONESHOT +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_clear_oneshot(int cpu) { } static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif @@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, /** * tick_broadcast_setup_oneshot - setup the broadcast device */ -void tick_broadcast_setup_oneshot(struct clock_event_device *bc) +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { int cpu = smp_processor_id(); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f738251000fe6b07de4f3d54ee78d8f78408c5d7..be0ac01f2e1225b6d4bb814029854dfdba3cd5cb 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) -extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); @@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7fe53be860778b07210fb7a333bf49695bded575..64c97fc130c4db2f1241185d9075af7a0f5175f5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -993,6 +993,18 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls - return the current idle calls counter value + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls(void) +{ + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); + + return ts->idle_calls; +} + static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE diff --git a/kernel/time/time.c b/kernel/time/time.c index 25bdd250457192df0a2dfc13e6c9078bb92be0d6..49c73c6ed648900d873190f9ebaefe9cb44f4f98 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -193,8 +193,8 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, struct timezone __user *, tz) { + struct timespec64 new_ts; struct timeval user_tv; - struct timespec new_ts; struct timezone new_tz; if (tv) { @@ -212,7 +212,7 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, return -EFAULT; } - return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); + return do_sys_settimeofday64(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) @@ -230,20 +230,6 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; } -/** - * current_fs_time - Return FS time - * @sb: Superblock. - * - * Return the current time truncated to the time granularity supported by - * the fs. - */ -struct timespec current_fs_time(struct super_block *sb) -{ - struct timespec now = current_kernel_time(); - return timespec_trunc(now, sb->s_time_gran); -} -EXPORT_SYMBOL(current_fs_time); - /* * Convert jiffies to milliseconds and back. * diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5b63a2102c2907bc42a870fe2545bafdb10cdf28..9652bc57fd09811fa4e3ffbabc81b9139e75f125 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -996,8 +996,7 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, return 0; /* Interpolate shortest distance from beginning or end of history */ - interp_forward = partial_history_cycles > total_history_cycles/2 ? - true : false; + interp_forward = partial_history_cycles > total_history_cycles / 2; partial_history_cycles = interp_forward ? total_history_cycles - partial_history_cycles : partial_history_cycles; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 1dc0256bfb6e1f00bfafb1c6e085feda9f04cfa3..152a706ef8b87ca8f86a195d2fedfbc728c592d7 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -241,7 +241,7 @@ int timer_migration_handler(struct ctl_table *table, int write, int ret; mutex_lock(&mutex); - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) timers_update_migration(false); mutex_unlock(&mutex); @@ -1120,7 +1120,7 @@ void add_timer_on(struct timer_list *timer, int cpu) EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - deactive a timer. + * del_timer - deactivate a timer. * @timer: the timer to be deactivated * * del_timer() deactivates a timer - this works on both active and inactive diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ff8d5c13d04bd0911c62584b6f2764b7a27cb89a..0e7f5428a1484ed85215a72e115759eace1392fb 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -86,6 +87,9 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; + + touch_nmi_watchdog(); + raw_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = timerqueue_getnext(&base->active); @@ -197,6 +201,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) { struct clock_event_device *dev = td->evtdev; + touch_nmi_watchdog(); + SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); if (cpu < 0) SEQ_printf(m, "Broadcast device\n"); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d4a06e714645df56f75db97ba6bb052534a4bb41..7e06f04e98fe2c5a0194ce05c88a270fcf0bfc4d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -134,7 +134,8 @@ config FUNCTION_TRACER select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER - select GLOB + select GLOB + select TASKS_RCU if PREEMPT help Enable the kernel to trace every kernel function. This is done by using a compiler feature to insert a small, 5-byte No-Operation @@ -455,7 +456,7 @@ config UPROBE_EVENTS select UPROBES select PROBE_EVENTS select TRACING - default n + default y help This allows the user to add tracing events on top of userspace dynamic events (similar to tracepoints) on the fly via the trace diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b2058a7f94bd8797f5629158aeed8bf045c20e4f..193c5f5e3f7988e8d27eed5c4292e2345e3c5bf7 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -690,8 +690,8 @@ void blk_trace_shutdown(struct request_queue *q) /** * blk_add_trace_rq - Add a trace for a request oriented action - * @q: queue the io is for * @rq: the source request + * @error: return status to log * @nr_bytes: number of completed bytes * @what: the action * @@ -699,10 +699,10 @@ void blk_trace_shutdown(struct request_queue *q) * Records an action against a request. Will log the bio offset + size. * **/ -static void blk_add_trace_rq(struct request_queue *q, struct request *rq, +static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt = rq->q->blk_trace; if (likely(!bt)) return; @@ -713,40 +713,32 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), - rq->cmd_flags, what, rq->errors, 0, NULL); -} - -static void blk_add_trace_rq_abort(void *ignore, - struct request_queue *q, struct request *rq) -{ - blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT); + rq->cmd_flags, what, error, 0, NULL); } static void blk_add_trace_rq_insert(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT); + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT); } static void blk_add_trace_rq_issue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE); + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE); } static void blk_add_trace_rq_requeue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE); + blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE); } -static void blk_add_trace_rq_complete(void *ignore, - struct request_queue *q, - struct request *rq, - unsigned int nr_bytes) +static void blk_add_trace_rq_complete(void *ignore, struct request *rq, + int error, unsigned int nr_bytes) { - blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE); + blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE); } /** @@ -941,7 +933,7 @@ static void blk_add_trace_rq_remap(void *ignore, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), - rq_data_dir(rq), 0, BLK_TA_REMAP, !!rq->errors, + rq_data_dir(rq), 0, BLK_TA_REMAP, 0, sizeof(r), &r); } @@ -966,7 +958,7 @@ void blk_add_driver_data(struct request_queue *q, return; __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, - BLK_TA_DRV_DATA, rq->errors, len, data); + BLK_TA_DRV_DATA, 0, len, data); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -974,8 +966,6 @@ static void blk_register_tracepoints(void) { int ret; - ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); - WARN_ON(ret); ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); WARN_ON(ret); ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); @@ -1028,7 +1018,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); - unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL); tracepoint_synchronize_unregister(); } @@ -1673,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; if (attr == &dev_attr_act_mask) { - if (sscanf(buf, "%llx", &value) != 1) { + if (kstrtoull(buf, 0, &value)) { /* Assume it is a list of trace category names */ ret = blk_trace_str2mask(buf); if (ret < 0) goto out; value = ret; } - } else if (sscanf(buf, "%llu", &value) != 1) + } else if (kstrtoull(buf, 0, &value)) goto out; ret = -ENXIO; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cee9802cf3e00f0f5ef1625df14fa9d6892b3581..460a031c77e592411b83a4071d976706db975687 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -96,7 +96,7 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, if (unlikely(in_interrupt() || current->flags & (PF_KTHREAD | PF_EXITING))) return -EPERM; - if (unlikely(segment_eq(get_fs(), KERNEL_DS))) + if (unlikely(uaccess_kernel())) return -EPERM; if (!access_ok(VERIFY_WRITE, unsafe_ptr, size)) return -EPERM; @@ -501,16 +501,11 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type return true; } -static const struct bpf_verifier_ops kprobe_prog_ops = { +const struct bpf_verifier_ops kprobe_prog_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; -static struct bpf_prog_type_list kprobe_tl __ro_after_init = { - .ops = &kprobe_prog_ops, - .type = BPF_PROG_TYPE_KPROBE, -}; - BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, u64, flags, void *, data, u64, size) { @@ -584,16 +579,11 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type return true; } -static const struct bpf_verifier_ops tracepoint_prog_ops = { +const struct bpf_verifier_ops tracepoint_prog_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = tp_prog_is_valid_access, }; -static struct bpf_prog_type_list tracepoint_tl __ro_after_init = { - .ops = &tracepoint_prog_ops, - .type = BPF_PROG_TYPE_TRACEPOINT, -}; - static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) { @@ -642,22 +632,8 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -static const struct bpf_verifier_ops perf_event_prog_ops = { +const struct bpf_verifier_ops perf_event_prog_ops = { .get_func_proto = tp_prog_func_proto, .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; - -static struct bpf_prog_type_list perf_event_tl __ro_after_init = { - .ops = &perf_event_prog_ops, - .type = BPF_PROG_TYPE_PERF_EVENT, -}; - -static int __init register_kprobe_prog_ops(void) -{ - bpf_register_prog_type(&kprobe_tl); - bpf_register_prog_type(&tracepoint_tl); - bpf_register_prog_type(&perf_event_tl); - return 0; -} -late_initcall(register_kprobe_prog_ops); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 27bb2e61276e1c6b3a581ce0f3a1f750d5975227..9e5841dc14b5fa62e0c1470df704c3dc9b99f8ad 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -36,6 +36,7 @@ #include +#include #include #include "trace_output.h" @@ -1095,22 +1096,20 @@ static bool update_all_ops; # error Dynamic ftrace depends on MCOUNT_RECORD #endif -static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; - -struct ftrace_func_probe { - struct hlist_node node; - struct ftrace_probe_ops *ops; - unsigned long flags; - unsigned long ip; - void *data; - struct list_head free_list; -}; - struct ftrace_func_entry { struct hlist_node hlist; unsigned long ip; }; +struct ftrace_func_probe { + struct ftrace_probe_ops *probe_ops; + struct ftrace_ops ops; + struct trace_array *tr; + struct list_head list; + void *data; + int ref; +}; + /* * We make these constant because no one should touch them, * but they are used as the default "empty hash", to avoid allocating @@ -1271,7 +1270,7 @@ static void remove_hash_entry(struct ftrace_hash *hash, struct ftrace_func_entry *entry) { - hlist_del(&entry->hlist); + hlist_del_rcu(&entry->hlist); hash->count--; } @@ -2807,18 +2806,28 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * callers are done before leaving this function. * The same goes for freeing the per_cpu data of the per_cpu * ops. - * - * Again, normal synchronize_sched() is not good enough. - * We need to do a hard force of sched synchronization. - * This is because we use preempt_disable() to do RCU, but - * the function tracers can be called where RCU is not watching - * (like before user_exit()). We can not rely on the RCU - * infrastructure to do the synchronization, thus we must do it - * ourselves. */ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ schedule_on_each_cpu(ftrace_sync); + /* + * When the kernel is preeptive, tasks can be preempted + * while on a ftrace trampoline. Just scheduling a task on + * a CPU is not good enough to flush them. Calling + * synchornize_rcu_tasks() will wait for those tasks to + * execute and either schedule voluntarily or enter user space. + */ + if (IS_ENABLED(CONFIG_PREEMPT)) + synchronize_rcu_tasks(); + arch_ftrace_trampoline_free(ops); if (ops->flags & FTRACE_OPS_FL_PER_CPU) @@ -3055,34 +3064,63 @@ struct ftrace_iterator { struct ftrace_page *pg; struct dyn_ftrace *func; struct ftrace_func_probe *probe; + struct ftrace_func_entry *probe_entry; struct trace_parser parser; struct ftrace_hash *hash; struct ftrace_ops *ops; - int hidx; + int pidx; int idx; unsigned flags; }; static void * -t_hash_next(struct seq_file *m, loff_t *pos) +t_probe_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; + struct trace_array *tr = iter->ops->private; + struct list_head *func_probes; + struct ftrace_hash *hash; + struct list_head *next; struct hlist_node *hnd = NULL; struct hlist_head *hhd; + int size; (*pos)++; iter->pos = *pos; - if (iter->probe) - hnd = &iter->probe->node; - retry: - if (iter->hidx >= FTRACE_FUNC_HASHSIZE) + if (!tr) return NULL; - hhd = &ftrace_func_hash[iter->hidx]; + func_probes = &tr->func_probes; + if (list_empty(func_probes)) + return NULL; + + if (!iter->probe) { + next = func_probes->next; + iter->probe = list_entry(next, struct ftrace_func_probe, list); + } + + if (iter->probe_entry) + hnd = &iter->probe_entry->hlist; + + hash = iter->probe->ops.func_hash->filter_hash; + size = 1 << hash->size_bits; + + retry: + if (iter->pidx >= size) { + if (iter->probe->list.next == func_probes) + return NULL; + next = iter->probe->list.next; + iter->probe = list_entry(next, struct ftrace_func_probe, list); + hash = iter->probe->ops.func_hash->filter_hash; + size = 1 << hash->size_bits; + iter->pidx = 0; + } + + hhd = &hash->buckets[iter->pidx]; if (hlist_empty(hhd)) { - iter->hidx++; + iter->pidx++; hnd = NULL; goto retry; } @@ -3092,7 +3130,7 @@ t_hash_next(struct seq_file *m, loff_t *pos) else { hnd = hnd->next; if (!hnd) { - iter->hidx++; + iter->pidx++; goto retry; } } @@ -3100,26 +3138,28 @@ t_hash_next(struct seq_file *m, loff_t *pos) if (WARN_ON_ONCE(!hnd)) return NULL; - iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node); + iter->probe_entry = hlist_entry(hnd, struct ftrace_func_entry, hlist); return iter; } -static void *t_hash_start(struct seq_file *m, loff_t *pos) +static void *t_probe_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; void *p = NULL; loff_t l; - if (!(iter->flags & FTRACE_ITER_DO_HASH)) + if (!(iter->flags & FTRACE_ITER_DO_PROBES)) return NULL; if (iter->func_pos > *pos) return NULL; - iter->hidx = 0; + iter->probe = NULL; + iter->probe_entry = NULL; + iter->pidx = 0; for (l = 0; l <= (*pos - iter->func_pos); ) { - p = t_hash_next(m, &l); + p = t_probe_next(m, &l); if (!p) break; } @@ -3127,50 +3167,42 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) return NULL; /* Only set this if we have an item */ - iter->flags |= FTRACE_ITER_HASH; + iter->flags |= FTRACE_ITER_PROBE; return iter; } static int -t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) +t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) { - struct ftrace_func_probe *rec; + struct ftrace_func_entry *probe_entry; + struct ftrace_probe_ops *probe_ops; + struct ftrace_func_probe *probe; - rec = iter->probe; - if (WARN_ON_ONCE(!rec)) + probe = iter->probe; + probe_entry = iter->probe_entry; + + if (WARN_ON_ONCE(!probe || !probe_entry)) return -EIO; - if (rec->ops->print) - return rec->ops->print(m, rec->ip, rec->ops, rec->data); + probe_ops = probe->probe_ops; - seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func); + if (probe_ops->print) + return probe_ops->print(m, probe_entry->ip, probe_ops, probe->data); - if (rec->data) - seq_printf(m, ":%p", rec->data); - seq_putc(m, '\n'); + seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip, + (void *)probe_ops->func); return 0; } static void * -t_next(struct seq_file *m, void *v, loff_t *pos) +t_func_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct ftrace_ops *ops = iter->ops; struct dyn_ftrace *rec = NULL; - if (unlikely(ftrace_disabled)) - return NULL; - - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_next(m, pos); - (*pos)++; - iter->pos = iter->func_pos = *pos; - - if (iter->flags & FTRACE_ITER_PRINTALL) - return t_hash_start(m, pos); retry: if (iter->idx >= iter->pg->index) { @@ -3181,11 +3213,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } } else { rec = &iter->pg->records[iter->idx++]; - if (((iter->flags & FTRACE_ITER_FILTER) && - !(ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))) || - - ((iter->flags & FTRACE_ITER_NOTRACE) && - !ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) || + if (((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && + !ftrace_lookup_ip(iter->hash, rec->ip)) || ((iter->flags & FTRACE_ITER_ENABLED) && !(rec->flags & FTRACE_FL_ENABLED))) { @@ -3196,24 +3225,51 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } if (!rec) - return t_hash_start(m, pos); + return NULL; + iter->pos = iter->func_pos = *pos; iter->func = rec; return iter; } +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + loff_t l = *pos; /* t_hash_start() must use original pos */ + void *ret; + + if (unlikely(ftrace_disabled)) + return NULL; + + if (iter->flags & FTRACE_ITER_PROBE) + return t_probe_next(m, pos); + + if (iter->flags & FTRACE_ITER_PRINTALL) { + /* next must increment pos, and t_probe_start does not */ + (*pos)++; + return t_probe_start(m, &l); + } + + ret = t_func_next(m, pos); + + if (!ret) + return t_probe_start(m, &l); + + return ret; +} + static void reset_iter_read(struct ftrace_iterator *iter) { iter->pos = 0; iter->func_pos = 0; - iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_HASH); + iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE); } static void *t_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct ftrace_ops *ops = iter->ops; void *p = NULL; loff_t l; @@ -3233,20 +3289,19 @@ static void *t_start(struct seq_file *m, loff_t *pos) * off, we can short cut and just print out that all * functions are enabled. */ - if ((iter->flags & FTRACE_ITER_FILTER && - ftrace_hash_empty(ops->func_hash->filter_hash)) || - (iter->flags & FTRACE_ITER_NOTRACE && - ftrace_hash_empty(ops->func_hash->notrace_hash))) { + if ((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && + ftrace_hash_empty(iter->hash)) { + iter->func_pos = 1; /* Account for the message */ if (*pos > 0) - return t_hash_start(m, pos); + return t_probe_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; /* reset in case of seek/pread */ - iter->flags &= ~FTRACE_ITER_HASH; + iter->flags &= ~FTRACE_ITER_PROBE; return iter; } - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_start(m, pos); + if (iter->flags & FTRACE_ITER_PROBE) + return t_probe_start(m, pos); /* * Unfortunately, we need to restart at ftrace_pages_start @@ -3256,13 +3311,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) iter->pg = ftrace_pages_start; iter->idx = 0; for (l = 0; l <= *pos; ) { - p = t_next(m, p, &l); + p = t_func_next(m, &l); if (!p) break; } if (!p) - return t_hash_start(m, pos); + return t_probe_start(m, pos); return iter; } @@ -3293,8 +3348,8 @@ static int t_show(struct seq_file *m, void *v) struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec; - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_show(m, iter); + if (iter->flags & FTRACE_ITER_PROBE) + return t_probe_show(m, iter); if (iter->flags & FTRACE_ITER_PRINTALL) { if (iter->flags & FTRACE_ITER_NOTRACE) @@ -3355,12 +3410,13 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENODEV; iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); - if (iter) { - iter->pg = ftrace_pages_start; - iter->ops = &global_ops; - } + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->ops = &global_ops; - return iter ? 0 : -ENOMEM; + return 0; } static int @@ -3369,13 +3425,14 @@ ftrace_enabled_open(struct inode *inode, struct file *file) struct ftrace_iterator *iter; iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); - if (iter) { - iter->pg = ftrace_pages_start; - iter->flags = FTRACE_ITER_ENABLED; - iter->ops = &global_ops; - } + if (!iter) + return -ENOMEM; - return iter ? 0 : -ENOMEM; + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_ENABLED; + iter->ops = &global_ops; + + return 0; } /** @@ -3440,7 +3497,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, ret = -ENOMEM; goto out_unlock; } - } + } else + iter->hash = hash; if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; @@ -3470,7 +3528,7 @@ ftrace_filter_open(struct inode *inode, struct file *file) struct ftrace_ops *ops = inode->i_private; return ftrace_regex_open(ops, - FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, + FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES, inode, file); } @@ -3573,22 +3631,20 @@ ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g, /* blank module name to match all modules */ if (!mod_g->len) { /* blank module globbing: modname xor exclude_mod */ - if ((!exclude_mod) != (!modname)) + if (!exclude_mod != !modname) goto func_match; return 0; } - /* not matching the module */ - if (!modname || !mod_matches) { - if (exclude_mod) - goto func_match; - else - return 0; - } - - if (mod_matches && exclude_mod) + /* + * exclude_mod is set to trace everything but the given + * module. If it is set and the module matches, then + * return 0. If it is not set, and the module doesn't match + * also return 0. Otherwise, check the function to see if + * that matches. + */ + if (!mod_matches == !exclude_mod) return 0; - func_match: /* blank search means to match all funcs in the mod */ if (!func_g->len) @@ -3654,6 +3710,56 @@ ftrace_match_records(struct ftrace_hash *hash, char *buff, int len) return match_records(hash, buff, len, NULL); } +static void ftrace_ops_update_code(struct ftrace_ops *ops, + struct ftrace_ops_hash *old_hash) +{ + struct ftrace_ops *op; + + if (!ftrace_enabled) + return; + + if (ops->flags & FTRACE_OPS_FL_ENABLED) { + ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); + return; + } + + /* + * If this is the shared global_ops filter, then we need to + * check if there is another ops that shares it, is enabled. + * If so, we still need to run the modify code. + */ + if (ops->func_hash != &global_ops.local_hash) + return; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->func_hash == &global_ops.local_hash && + op->flags & FTRACE_OPS_FL_ENABLED) { + ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash); + /* Only need to do this once */ + return; + } + } while_for_each_ftrace_op(op); +} + +static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, + struct ftrace_hash **orig_hash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops_hash old_hash_ops; + struct ftrace_hash *old_hash; + int ret; + + old_hash = *orig_hash; + old_hash_ops.filter_hash = ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; + ret = ftrace_hash_move(ops, enable, orig_hash, hash); + if (!ret) { + ftrace_ops_update_code(ops, &old_hash_ops); + free_ftrace_hash_rcu(old_hash); + } + return ret; +} /* * We register the module command as a template to show others how @@ -3661,7 +3767,7 @@ ftrace_match_records(struct ftrace_hash *hash, char *buff, int len) */ static int -ftrace_mod_callback(struct ftrace_hash *hash, +ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, char *func, char *cmd, char *module, int enable) { int ret; @@ -3695,16 +3801,11 @@ core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { - struct ftrace_func_probe *entry; - struct hlist_head *hhd; - unsigned long key; - - key = hash_long(ip, FTRACE_HASH_BITS); - - hhd = &ftrace_func_hash[key]; + struct ftrace_probe_ops *probe_ops; + struct ftrace_func_probe *probe; - if (hlist_empty(hhd)) - return; + probe = container_of(op, struct ftrace_func_probe, ops); + probe_ops = probe->probe_ops; /* * Disable preemption for these calls to prevent a RCU grace @@ -3712,213 +3813,340 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - hlist_for_each_entry_rcu_notrace(entry, hhd, node) { - if (entry->ip == ip) - entry->ops->func(ip, parent_ip, &entry->data); - } + probe_ops->func(ip, parent_ip, probe->tr, probe_ops, probe->data); preempt_enable_notrace(); } -static struct ftrace_ops trace_probe_ops __read_mostly = -{ - .func = function_trace_probe_call, - .flags = FTRACE_OPS_FL_INITIALIZED, - INIT_OPS_HASH(trace_probe_ops) +struct ftrace_func_map { + struct ftrace_func_entry entry; + void *data; }; -static int ftrace_probe_registered; +struct ftrace_func_mapper { + struct ftrace_hash hash; +}; -static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash) +/** + * allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper + * + * Returns a ftrace_func_mapper descriptor that can be used to map ips to data. + */ +struct ftrace_func_mapper *allocate_ftrace_func_mapper(void) { - int ret; - int i; + struct ftrace_hash *hash; - if (ftrace_probe_registered) { - /* still need to update the function call sites */ - if (ftrace_enabled) - ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, - old_hash); - return; - } + /* + * The mapper is simply a ftrace_hash, but since the entries + * in the hash are not ftrace_func_entry type, we define it + * as a separate structure. + */ + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + return (struct ftrace_func_mapper *)hash; +} - for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { - struct hlist_head *hhd = &ftrace_func_hash[i]; - if (hhd->first) - break; - } - /* Nothing registered? */ - if (i == FTRACE_FUNC_HASHSIZE) - return; +/** + * ftrace_func_mapper_find_ip - Find some data mapped to an ip + * @mapper: The mapper that has the ip maps + * @ip: the instruction pointer to find the data for + * + * Returns the data mapped to @ip if found otherwise NULL. The return + * is actually the address of the mapper data pointer. The address is + * returned for use cases where the data is no bigger than a long, and + * the user can use the data pointer as its data instead of having to + * allocate more memory for the reference. + */ +void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, + unsigned long ip) +{ + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; - ret = ftrace_startup(&trace_probe_ops, 0); + entry = ftrace_lookup_ip(&mapper->hash, ip); + if (!entry) + return NULL; - ftrace_probe_registered = 1; + map = (struct ftrace_func_map *)entry; + return &map->data; } -static bool __disable_ftrace_function_probe(void) +/** + * ftrace_func_mapper_add_ip - Map some data to an ip + * @mapper: The mapper that has the ip maps + * @ip: The instruction pointer address to map @data to + * @data: The data to map to @ip + * + * Returns 0 on succes otherwise an error. + */ +int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, + unsigned long ip, void *data) { - int i; + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; - if (!ftrace_probe_registered) - return false; + entry = ftrace_lookup_ip(&mapper->hash, ip); + if (entry) + return -EBUSY; - for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { - struct hlist_head *hhd = &ftrace_func_hash[i]; - if (hhd->first) - return false; - } + map = kmalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; - /* no more funcs left */ - ftrace_shutdown(&trace_probe_ops, 0); + map->entry.ip = ip; + map->data = data; - ftrace_probe_registered = 0; - return true; -} + __add_hash_entry(&mapper->hash, &map->entry); + return 0; +} -static void ftrace_free_entry(struct ftrace_func_probe *entry) +/** + * ftrace_func_mapper_remove_ip - Remove an ip from the mapping + * @mapper: The mapper that has the ip maps + * @ip: The instruction pointer address to remove the data from + * + * Returns the data if it is found, otherwise NULL. + * Note, if the data pointer is used as the data itself, (see + * ftrace_func_mapper_find_ip(), then the return value may be meaningless, + * if the data pointer was set to zero. + */ +void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, + unsigned long ip) { - if (entry->ops->free) - entry->ops->free(entry->ops, entry->ip, &entry->data); + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; + void *data; + + entry = ftrace_lookup_ip(&mapper->hash, ip); + if (!entry) + return NULL; + + map = (struct ftrace_func_map *)entry; + data = map->data; + + remove_hash_entry(&mapper->hash, entry); kfree(entry); + + return data; +} + +/** + * free_ftrace_func_mapper - free a mapping of ips and data + * @mapper: The mapper that has the ip maps + * @free_func: A function to be called on each data item. + * + * This is used to free the function mapper. The @free_func is optional + * and can be used if the data needs to be freed as well. + */ +void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, + ftrace_mapper_func free_func) +{ + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; + struct hlist_head *hhd; + int size = 1 << mapper->hash.size_bits; + int i; + + if (free_func && mapper->hash.count) { + for (i = 0; i < size; i++) { + hhd = &mapper->hash.buckets[i]; + hlist_for_each_entry(entry, hhd, hlist) { + map = (struct ftrace_func_map *)entry; + free_func(map); + } + } + } + free_ftrace_hash(&mapper->hash); +} + +static void release_probe(struct ftrace_func_probe *probe) +{ + struct ftrace_probe_ops *probe_ops; + + mutex_lock(&ftrace_lock); + + WARN_ON(probe->ref <= 0); + + /* Subtract the ref that was used to protect this instance */ + probe->ref--; + + if (!probe->ref) { + probe_ops = probe->probe_ops; + /* + * Sending zero as ip tells probe_ops to free + * the probe->data itself + */ + if (probe_ops->free) + probe_ops->free(probe_ops, probe->tr, 0, probe->data); + list_del(&probe->list); + kfree(probe); + } + mutex_unlock(&ftrace_lock); +} + +static void acquire_probe_locked(struct ftrace_func_probe *probe) +{ + /* + * Add one ref to keep it from being freed when releasing the + * ftrace_lock mutex. + */ + probe->ref++; } int -register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data) +register_ftrace_function_probe(char *glob, struct trace_array *tr, + struct ftrace_probe_ops *probe_ops, + void *data) { - struct ftrace_ops_hash old_hash_ops; - struct ftrace_func_probe *entry; - struct ftrace_glob func_g; - struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; - struct ftrace_hash *old_hash = *orig_hash; + struct ftrace_func_entry *entry; + struct ftrace_func_probe *probe; + struct ftrace_hash **orig_hash; + struct ftrace_hash *old_hash; struct ftrace_hash *hash; - struct ftrace_page *pg; - struct dyn_ftrace *rec; - int not; - unsigned long key; int count = 0; + int size; int ret; + int i; - func_g.type = filter_parse_regex(glob, strlen(glob), - &func_g.search, ¬); - func_g.len = strlen(func_g.search); - - /* we do not support '!' for function probes */ - if (WARN_ON(not)) + if (WARN_ON(!tr)) return -EINVAL; - mutex_lock(&trace_probe_ops.func_hash->regex_lock); + /* We do not support '!' for function probes */ + if (WARN_ON(glob[0] == '!')) + return -EINVAL; - old_hash_ops.filter_hash = old_hash; - /* Probes only have filters */ - old_hash_ops.notrace_hash = NULL; - hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); - if (!hash) { - count = -ENOMEM; - goto out; + mutex_lock(&ftrace_lock); + /* Check if the probe_ops is already registered */ + list_for_each_entry(probe, &tr->func_probes, list) { + if (probe->probe_ops == probe_ops) + break; } - - if (unlikely(ftrace_disabled)) { - count = -ENODEV; - goto out; + if (&probe->list == &tr->func_probes) { + probe = kzalloc(sizeof(*probe), GFP_KERNEL); + if (!probe) { + mutex_unlock(&ftrace_lock); + return -ENOMEM; + } + probe->probe_ops = probe_ops; + probe->ops.func = function_trace_probe_call; + probe->tr = tr; + ftrace_ops_init(&probe->ops); + list_add(&probe->list, &tr->func_probes); } - mutex_lock(&ftrace_lock); + acquire_probe_locked(probe); - do_for_each_ftrace_rec(pg, rec) { + mutex_unlock(&ftrace_lock); - if (rec->flags & FTRACE_FL_DISABLED) - continue; + mutex_lock(&probe->ops.func_hash->regex_lock); - if (!ftrace_match_record(rec, &func_g, NULL, 0)) - continue; + orig_hash = &probe->ops.func_hash->filter_hash; + old_hash = *orig_hash; + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - /* If we did not process any, then return error */ - if (!count) - count = -ENOMEM; - goto out_unlock; - } + ret = ftrace_match_records(hash, glob, strlen(glob)); - count++; + /* Nothing found? */ + if (!ret) + ret = -EINVAL; - entry->data = data; + if (ret < 0) + goto out; - /* - * The caller might want to do something special - * for each function we find. We call the callback - * to give the caller an opportunity to do so. - */ - if (ops->init) { - if (ops->init(ops, rec->ip, &entry->data) < 0) { - /* caller does not like this func */ - kfree(entry); + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + if (ftrace_lookup_ip(old_hash, entry->ip)) continue; + /* + * The caller might want to do something special + * for each function we find. We call the callback + * to give the caller an opportunity to do so. + */ + if (probe_ops->init) { + ret = probe_ops->init(probe_ops, tr, + entry->ip, data, + &probe->data); + if (ret < 0) { + if (probe_ops->free && count) + probe_ops->free(probe_ops, tr, + 0, probe->data); + probe->data = NULL; + goto out; + } } + count++; } + } - ret = enter_record(hash, rec, 0); - if (ret < 0) { - kfree(entry); - count = ret; - goto out_unlock; - } - - entry->ops = ops; - entry->ip = rec->ip; + mutex_lock(&ftrace_lock); - key = hash_long(entry->ip, FTRACE_HASH_BITS); - hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); + if (!count) { + /* Nothing was added? */ + ret = -EINVAL; + goto out_unlock; + } - } while_for_each_ftrace_rec(); + ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash, + hash, 1); + if (ret < 0) + goto err_unlock; - ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + /* One ref for each new function traced */ + probe->ref += count; - __enable_ftrace_function_probe(&old_hash_ops); - - if (!ret) - free_ftrace_hash_rcu(old_hash); - else - count = ret; + if (!(probe->ops.flags & FTRACE_OPS_FL_ENABLED)) + ret = ftrace_startup(&probe->ops, 0); out_unlock: mutex_unlock(&ftrace_lock); + + if (!ret) + ret = count; out: - mutex_unlock(&trace_probe_ops.func_hash->regex_lock); + mutex_unlock(&probe->ops.func_hash->regex_lock); free_ftrace_hash(hash); - return count; -} + release_probe(probe); -enum { - PROBE_TEST_FUNC = 1, - PROBE_TEST_DATA = 2 -}; + return ret; -static void -__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data, int flags) + err_unlock: + if (!probe_ops->free || !count) + goto out_unlock; + + /* Failed to do the move, need to call the free functions */ + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + if (ftrace_lookup_ip(old_hash, entry->ip)) + continue; + probe_ops->free(probe_ops, tr, entry->ip, probe->data); + } + } + goto out_unlock; +} + +int +unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, + struct ftrace_probe_ops *probe_ops) { struct ftrace_ops_hash old_hash_ops; - struct ftrace_func_entry *rec_entry; - struct ftrace_func_probe *entry; - struct ftrace_func_probe *p; + struct ftrace_func_entry *entry; + struct ftrace_func_probe *probe; struct ftrace_glob func_g; - struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; - struct ftrace_hash *old_hash = *orig_hash; - struct list_head free_list; - struct ftrace_hash *hash; + struct ftrace_hash **orig_hash; + struct ftrace_hash *old_hash; + struct ftrace_hash *hash = NULL; struct hlist_node *tmp; + struct hlist_head hhd; char str[KSYM_SYMBOL_LEN]; - int i, ret; - bool disabled; + int count = 0; + int i, ret = -ENODEV; + int size; - if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) + if (!glob || !strlen(glob) || !strcmp(glob, "*")) func_g.search = NULL; - else if (glob) { + else { int not; func_g.type = filter_parse_regex(glob, strlen(glob), @@ -3928,95 +4156,112 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, /* we do not support '!' for function probes */ if (WARN_ON(not)) - return; + return -EINVAL; + } + + mutex_lock(&ftrace_lock); + /* Check if the probe_ops is already registered */ + list_for_each_entry(probe, &tr->func_probes, list) { + if (probe->probe_ops == probe_ops) + break; } + if (&probe->list == &tr->func_probes) + goto err_unlock_ftrace; - mutex_lock(&trace_probe_ops.func_hash->regex_lock); + ret = -EINVAL; + if (!(probe->ops.flags & FTRACE_OPS_FL_INITIALIZED)) + goto err_unlock_ftrace; + + acquire_probe_locked(probe); + + mutex_unlock(&ftrace_lock); + + mutex_lock(&probe->ops.func_hash->regex_lock); + + orig_hash = &probe->ops.func_hash->filter_hash; + old_hash = *orig_hash; + + if (ftrace_hash_empty(old_hash)) + goto out_unlock; old_hash_ops.filter_hash = old_hash; /* Probes only have filters */ old_hash_ops.notrace_hash = NULL; - hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + ret = -ENOMEM; + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); if (!hash) - /* Hmm, should report this somehow */ goto out_unlock; - INIT_LIST_HEAD(&free_list); - - for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { - struct hlist_head *hhd = &ftrace_func_hash[i]; - - hlist_for_each_entry_safe(entry, tmp, hhd, node) { - - /* break up if statements for readability */ - if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) - continue; + INIT_HLIST_HEAD(&hhd); - if ((flags & PROBE_TEST_DATA) && entry->data != data) - continue; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { - /* do this last, since it is the most expensive */ if (func_g.search) { kallsyms_lookup(entry->ip, NULL, NULL, NULL, str); if (!ftrace_match(str, &func_g)) continue; } - - rec_entry = ftrace_lookup_ip(hash, entry->ip); - /* It is possible more than one entry had this ip */ - if (rec_entry) - free_hash_entry(hash, rec_entry); - - hlist_del_rcu(&entry->node); - list_add(&entry->free_list, &free_list); + count++; + remove_hash_entry(hash, entry); + hlist_add_head(&entry->hlist, &hhd); } } + + /* Nothing found? */ + if (!count) { + ret = -EINVAL; + goto out_unlock; + } + mutex_lock(&ftrace_lock); - disabled = __disable_ftrace_function_probe(); - /* - * Remove after the disable is called. Otherwise, if the last - * probe is removed, a null hash means *all enabled*. - */ - ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + + WARN_ON(probe->ref < count); + + probe->ref -= count; + + if (ftrace_hash_empty(hash)) + ftrace_shutdown(&probe->ops, 0); + + ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash, + hash, 1); /* still need to update the function call sites */ - if (ftrace_enabled && !disabled) - ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, + if (ftrace_enabled && !ftrace_hash_empty(hash)) + ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); synchronize_sched(); - if (!ret) - free_ftrace_hash_rcu(old_hash); - list_for_each_entry_safe(entry, p, &free_list, free_list) { - list_del(&entry->free_list); - ftrace_free_entry(entry); + hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { + hlist_del(&entry->hlist); + if (probe_ops->free) + probe_ops->free(probe_ops, tr, entry->ip, probe->data); + kfree(entry); } mutex_unlock(&ftrace_lock); out_unlock: - mutex_unlock(&trace_probe_ops.func_hash->regex_lock); + mutex_unlock(&probe->ops.func_hash->regex_lock); free_ftrace_hash(hash); -} -void -unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data) -{ - __unregister_ftrace_function_probe(glob, ops, data, - PROBE_TEST_FUNC | PROBE_TEST_DATA); -} + release_probe(probe); -void -unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) -{ - __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC); + return ret; + + err_unlock_ftrace: + mutex_unlock(&ftrace_lock); + return ret; } -void unregister_ftrace_function_probe_all(char *glob) +void clear_ftrace_function_probes(struct trace_array *tr) { - __unregister_ftrace_function_probe(glob, NULL, NULL, 0); + struct ftrace_func_probe *probe, *n; + + list_for_each_entry_safe(probe, n, &tr->func_probes, list) + unregister_ftrace_function_probe_func(NULL, tr, probe->probe_ops); } static LIST_HEAD(ftrace_commands); @@ -4068,9 +4313,11 @@ __init int unregister_ftrace_command(struct ftrace_func_command *cmd) return ret; } -static int ftrace_process_regex(struct ftrace_hash *hash, +static int ftrace_process_regex(struct ftrace_iterator *iter, char *buff, int len, int enable) { + struct ftrace_hash *hash = iter->hash; + struct trace_array *tr = iter->ops->private; char *func, *command, *next = buff; struct ftrace_func_command *p; int ret = -EINVAL; @@ -4090,10 +4337,13 @@ static int ftrace_process_regex(struct ftrace_hash *hash, command = strsep(&next, ":"); + if (WARN_ON_ONCE(!tr)) + return -EINVAL; + mutex_lock(&ftrace_cmd_mutex); list_for_each_entry(p, &ftrace_commands, list) { if (strcmp(p->name, command) == 0) { - ret = p->func(hash, func, command, next, enable); + ret = p->func(tr, hash, func, command, next, enable); goto out_unlock; } } @@ -4130,7 +4380,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, if (read >= 0 && trace_parser_loaded(parser) && !trace_parser_cont(parser)) { - ret = ftrace_process_regex(iter->hash, parser->buffer, + ret = ftrace_process_regex(iter, parser->buffer, parser->idx, enable); trace_parser_clear(parser); if (ret < 0) @@ -4175,44 +4425,11 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return add_hash_entry(hash, ip); } -static void ftrace_ops_update_code(struct ftrace_ops *ops, - struct ftrace_ops_hash *old_hash) -{ - struct ftrace_ops *op; - - if (!ftrace_enabled) - return; - - if (ops->flags & FTRACE_OPS_FL_ENABLED) { - ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); - return; - } - - /* - * If this is the shared global_ops filter, then we need to - * check if there is another ops that shares it, is enabled. - * If so, we still need to run the modify code. - */ - if (ops->func_hash != &global_ops.local_hash) - return; - - do_for_each_ftrace_op(op, ftrace_ops_list) { - if (op->func_hash == &global_ops.local_hash && - op->flags & FTRACE_OPS_FL_ENABLED) { - ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash); - /* Only need to do this once */ - return; - } - } while_for_each_ftrace_op(op); -} - static int ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, unsigned long ip, int remove, int reset, int enable) { struct ftrace_hash **orig_hash; - struct ftrace_ops_hash old_hash_ops; - struct ftrace_hash *old_hash; struct ftrace_hash *hash; int ret; @@ -4247,14 +4464,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, } mutex_lock(&ftrace_lock); - old_hash = *orig_hash; - old_hash_ops.filter_hash = ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; - ret = ftrace_hash_move(ops, enable, orig_hash, hash); - if (!ret) { - ftrace_ops_update_code(ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); - } + ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); mutex_unlock(&ftrace_lock); out_regex_unlock: @@ -4493,10 +4703,8 @@ static void __init set_ftrace_early_filters(void) int ftrace_regex_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; - struct ftrace_ops_hash old_hash_ops; struct ftrace_iterator *iter; struct ftrace_hash **orig_hash; - struct ftrace_hash *old_hash; struct trace_parser *parser; int filter_hash; int ret; @@ -4526,16 +4734,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file) orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); - old_hash = *orig_hash; - old_hash_ops.filter_hash = iter->ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = iter->ops->func_hash->notrace_hash; - ret = ftrace_hash_move(iter->ops, filter_hash, - orig_hash, iter->hash); - if (!ret) { - ftrace_ops_update_code(iter->ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); - } + ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash, + iter->hash, filter_hash); mutex_unlock(&ftrace_lock); + } else { + /* For read only, the hash is the ops hash */ + iter->hash = NULL; } mutex_unlock(&iter->ops->func_hash->regex_lock); @@ -4859,7 +5063,7 @@ ftrace_graph_release(struct inode *inode, struct file *file) } out: - kfree(fgd->new_hash); + free_ftrace_hash(fgd->new_hash); kfree(fgd); return ret; @@ -5274,6 +5478,50 @@ void ftrace_module_init(struct module *mod) } #endif /* CONFIG_MODULES */ +void __init ftrace_free_init_mem(void) +{ + unsigned long start = (unsigned long)(&__init_begin); + unsigned long end = (unsigned long)(&__init_end); + struct ftrace_page **last_pg = &ftrace_pages_start; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + struct dyn_ftrace key; + int order; + + key.ip = start; + key.flags = end; /* overload flags, as it is unsigned long */ + + mutex_lock(&ftrace_lock); + + for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) { + if (end < pg->records[0].ip || + start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) + continue; + again: + rec = bsearch(&key, pg->records, pg->index, + sizeof(struct dyn_ftrace), + ftrace_cmp_recs); + if (!rec) + continue; + pg->index--; + if (!pg->index) { + *last_pg = pg->next; + order = get_count_order(pg->size / ENTRIES_PER_PAGE); + free_pages((unsigned long)pg->records, order); + kfree(pg); + pg = container_of(last_pg, struct ftrace_page, next); + if (!(*last_pg)) + ftrace_pages = pg; + continue; + } + memmove(rec, rec + 1, + (pg->index - (rec - pg->records)) * sizeof(*rec)); + /* More than one function may be in this block */ + goto again; + } + mutex_unlock(&ftrace_lock); +} + void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; @@ -5316,25 +5564,13 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) static void ftrace_update_trampoline(struct ftrace_ops *ops) { - -/* - * Currently there's no safe way to free a trampoline when the kernel - * is configured with PREEMPT. That is because a task could be preempted - * when it jumped to the trampoline, it may be preempted for a long time - * depending on the system load, and currently there's no way to know - * when it will be off the trampoline. If the trampoline is freed - * too early, when the task runs again, it will be executing on freed - * memory and crash. - */ -#ifdef CONFIG_PREEMPT - /* Currently, only non dynamic ops can have a trampoline */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - return; -#endif - arch_ftrace_update_trampoline(ops); } +void ftrace_init_trace_array(struct trace_array *tr) +{ + INIT_LIST_HEAD(&tr->func_probes); +} #else static struct ftrace_ops global_ops = { @@ -5389,6 +5625,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr) { tr->ops = &global_ops; tr->ops->private = tr; + ftrace_init_trace_array(tr); } void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) @@ -5543,6 +5780,43 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, trace_ignore_this_task(pid_list, next)); } +static void +ftrace_pid_follow_sched_process_fork(void *data, + struct task_struct *self, + struct task_struct *task) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = data; + + pid_list = rcu_dereference_sched(tr->function_pids); + trace_filter_add_remove_task(pid_list, self, task); +} + +static void +ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = data; + + pid_list = rcu_dereference_sched(tr->function_pids); + trace_filter_add_remove_task(pid_list, NULL, task); +} + +void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) +{ + if (enable) { + register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, + tr); + register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + tr); + } else { + unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, + tr); + unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + tr); + } +} + static void clear_ftrace_pids(struct trace_array *tr) { struct trace_pid_list *pid_list; @@ -5566,6 +5840,15 @@ static void clear_ftrace_pids(struct trace_array *tr) trace_free_pid_list(pid_list); } +void ftrace_clear_pids(struct trace_array *tr) +{ + mutex_lock(&ftrace_lock); + + clear_ftrace_pids(tr); + + mutex_unlock(&ftrace_lock); +} + static void ftrace_pid_reset(struct trace_array *tr) { mutex_lock(&ftrace_lock); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 54e7a90db848df3d1bca17e1ca966e57d2ce57d2..4ae268e687fe1683d9253235d539d840d8bf4f14 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -438,6 +438,7 @@ struct ring_buffer_per_cpu { raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; + struct buffer_data_page *free_page; unsigned long nr_pages; unsigned int current_context; struct list_head *pages; @@ -3405,11 +3406,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); int ring_buffer_iter_empty(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; + struct buffer_page *reader; + struct buffer_page *head_page; + struct buffer_page *commit_page; + unsigned commit; cpu_buffer = iter->cpu_buffer; - return iter->head_page == cpu_buffer->commit_page && - iter->head == rb_commit_index(cpu_buffer); + /* Remember, trace recording is off when iterator is in use */ + reader = cpu_buffer->reader_page; + head_page = cpu_buffer->head_page; + commit_page = cpu_buffer->commit_page; + commit = rb_page_commit(commit_page); + + return ((iter->head_page == commit_page && iter->head == commit) || + (iter->head_page == reader && commit_page == head_page && + head_page->read == commit && + iter->head == rb_page_commit(cpu_buffer->reader_page))); } EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); @@ -4377,9 +4390,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); */ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) { - struct buffer_data_page *bpage; + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct buffer_data_page *bpage = NULL; + unsigned long flags; struct page *page; + local_irq_save(flags); + arch_spin_lock(&cpu_buffer->lock); + + if (cpu_buffer->free_page) { + bpage = cpu_buffer->free_page; + cpu_buffer->free_page = NULL; + } + + arch_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); + + if (bpage) + goto out; + page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); if (!page) @@ -4387,6 +4416,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu) bpage = page_address(page); + out: rb_init_page(bpage); return bpage; @@ -4396,13 +4426,29 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page); /** * ring_buffer_free_read_page - free an allocated read page * @buffer: the buffer the page was allocate for + * @cpu: the cpu buffer the page came from * @data: the page to free * * Free a page allocated from ring_buffer_alloc_read_page. */ -void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) +void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) { - free_page((unsigned long)data); + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct buffer_data_page *bpage = data; + unsigned long flags; + + local_irq_save(flags); + arch_spin_lock(&cpu_buffer->lock); + + if (!cpu_buffer->free_page) { + cpu_buffer->free_page = bpage; + bpage = NULL; + } + + arch_spin_unlock(&cpu_buffer->lock); + local_irq_restore(flags); + + free_page((unsigned long)bpage); } EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index c190a4d5013c5ecd637c78cac7c1273dcf0b7fe1..9fbcaf56788626335bfd5412dffaf820a9b308bc 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -171,7 +171,7 @@ static enum event_status read_page(int cpu) } } } - ring_buffer_free_read_page(buffer, bpage); + ring_buffer_free_read_page(buffer, cpu, bpage); if (ret < 0) return EVENT_DROPPED; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f35109514a015c38de8b2e1da99399fd5f399692..1122f151466f64425089b9b9ecbd4b1a7584ba8a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -257,7 +257,7 @@ unsigned long long ns2usecs(u64 nsec) /* trace_flags that are default zero for instances */ #define ZEROED_TRACE_FLAGS \ - TRACE_ITER_EVENT_FORK + (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK) /* * The global_trace is the descriptor that holds the top-level tracing @@ -757,7 +757,7 @@ __trace_buffer_lock_reserve(struct ring_buffer *buffer, return event; } -static void tracer_tracing_on(struct trace_array *tr) +void tracer_tracing_on(struct trace_array *tr) { if (tr->trace_buffer.buffer) ring_buffer_record_on(tr->trace_buffer.buffer); @@ -894,23 +894,8 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -/** - * trace_snapshot - take a snapshot of the current buffer. - * - * This causes a swap between the snapshot buffer and the current live - * tracing buffer. You can use this to take snapshots of the live - * trace when some condition is triggered, but continue to trace. - * - * Note, make sure to allocate the snapshot with either - * a tracing_snapshot_alloc(), or by doing it manually - * with: echo 1 > /sys/kernel/debug/tracing/snapshot - * - * If the snapshot buffer is not allocated, it will stop tracing. - * Basically making a permanent snapshot. - */ -void tracing_snapshot(void) +static void tracing_snapshot_instance(struct trace_array *tr) { - struct trace_array *tr = &global_trace; struct tracer *tracer = tr->current_trace; unsigned long flags; @@ -938,6 +923,27 @@ void tracing_snapshot(void) update_max_tr(tr, current, smp_processor_id()); local_irq_restore(flags); } + +/** + * trace_snapshot - take a snapshot of the current buffer. + * + * This causes a swap between the snapshot buffer and the current live + * tracing buffer. You can use this to take snapshots of the live + * trace when some condition is triggered, but continue to trace. + * + * Note, make sure to allocate the snapshot with either + * a tracing_snapshot_alloc(), or by doing it manually + * with: echo 1 > /sys/kernel/debug/tracing/snapshot + * + * If the snapshot buffer is not allocated, it will stop tracing. + * Basically making a permanent snapshot. + */ +void tracing_snapshot(void) +{ + struct trace_array *tr = &global_trace; + + tracing_snapshot_instance(tr); +} EXPORT_SYMBOL_GPL(tracing_snapshot); static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, @@ -1039,7 +1045,7 @@ void tracing_snapshot_alloc(void) EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); #endif /* CONFIG_TRACER_SNAPSHOT */ -static void tracer_tracing_off(struct trace_array *tr) +void tracer_tracing_off(struct trace_array *tr) { if (tr->trace_buffer.buffer) ring_buffer_record_off(tr->trace_buffer.buffer); @@ -1424,6 +1430,28 @@ static int wait_on_pipe(struct trace_iterator *iter, bool full) } #ifdef CONFIG_FTRACE_STARTUP_TEST +static bool selftests_can_run; + +struct trace_selftests { + struct list_head list; + struct tracer *type; +}; + +static LIST_HEAD(postponed_selftests); + +static int save_selftest(struct tracer *type) +{ + struct trace_selftests *selftest; + + selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); + if (!selftest) + return -ENOMEM; + + selftest->type = type; + list_add(&selftest->list, &postponed_selftests); + return 0; +} + static int run_tracer_selftest(struct tracer *type) { struct trace_array *tr = &global_trace; @@ -1433,6 +1461,14 @@ static int run_tracer_selftest(struct tracer *type) if (!type->selftest || tracing_selftest_disabled) return 0; + /* + * If a tracer registers early in boot up (before scheduling is + * initialized and such), then do not run its selftests yet. + * Instead, run it a little later in the boot process. + */ + if (!selftests_can_run) + return save_selftest(type); + /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current @@ -1482,6 +1518,47 @@ static int run_tracer_selftest(struct tracer *type) printk(KERN_CONT "PASSED\n"); return 0; } + +static __init int init_trace_selftests(void) +{ + struct trace_selftests *p, *n; + struct tracer *t, **last; + int ret; + + selftests_can_run = true; + + mutex_lock(&trace_types_lock); + + if (list_empty(&postponed_selftests)) + goto out; + + pr_info("Running postponed tracer tests:\n"); + + list_for_each_entry_safe(p, n, &postponed_selftests, list) { + ret = run_tracer_selftest(p->type); + /* If the test fails, then warn and remove from available_tracers */ + if (ret < 0) { + WARN(1, "tracer: %s failed selftest, disabling\n", + p->type->name); + last = &trace_types; + for (t = trace_types; t; t = t->next) { + if (t == p->type) { + *last = t->next; + break; + } + last = &t->next; + } + } + list_del(&p->list); + kfree(p); + } + + out: + mutex_unlock(&trace_types_lock); + + return 0; +} +core_initcall(init_trace_selftests); #else static inline int run_tracer_selftest(struct tracer *type) { @@ -1899,7 +1976,7 @@ static void __trace_find_cmdline(int pid, char comm[]) map = savedcmd->map_pid_to_cmdline[pid]; if (map != NO_CMDLINE_MAP) - strcpy(comm, get_saved_cmdlines(map)); + strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); else strcpy(comm, "<...>"); } @@ -1927,6 +2004,18 @@ void tracing_record_cmdline(struct task_struct *tsk) __this_cpu_write(trace_cmdline_save, false); } +/* + * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq + * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function + * simplifies those functions and keeps them in sync. + */ +enum print_line_t trace_handle_return(struct trace_seq *s) +{ + return trace_seq_has_overflowed(s) ? + TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; +} +EXPORT_SYMBOL_GPL(trace_handle_return); + void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc) @@ -2479,7 +2568,36 @@ static inline void ftrace_trace_stack(struct trace_array *tr, void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL); + struct ring_buffer *buffer = tr->trace_buffer.buffer; + + if (rcu_is_watching()) { + __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + return; + } + + /* + * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), + * but if the above rcu_is_watching() failed, then the NMI + * triggered someplace critical, and rcu_irq_enter() should + * not be called from NMI. + */ + if (unlikely(in_nmi())) + return; + + /* + * It is possible that a function is being traced in a + * location that RCU is not watching. A call to + * rcu_irq_enter() will make sure that it is, but there's + * a few internal rcu functions that could be traced + * where that wont work either. In those cases, we just + * do nothing. + */ + if (unlikely(rcu_irq_enter_disabled())) + return; + + rcu_irq_enter_irqson(); + __ftrace_trace_stack(buffer, flags, skip, pc, NULL); + rcu_irq_exit_irqson(); } /** @@ -3222,13 +3340,14 @@ static void test_cpu_buff_start(struct trace_iterator *iter) if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) return; - if (iter->started && cpumask_test_cpu(iter->cpu, iter->started)) + if (cpumask_available(iter->started) && + cpumask_test_cpu(iter->cpu, iter->started)) return; if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries) return; - if (iter->started) + if (cpumask_available(iter->started)) cpumask_set_cpu(iter->cpu, iter->started); /* Don't print started cpu buffer for the first entry of the trace */ @@ -4122,6 +4241,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) if (mask == TRACE_ITER_EVENT_FORK) trace_event_follow_fork(tr, enabled); + if (mask == TRACE_ITER_FUNC_FORK) + ftrace_pid_follow_fork(tr, enabled); + if (mask == TRACE_ITER_OVERWRITE) { ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE @@ -4355,6 +4477,7 @@ static const char readme_msg[] = "\t -:[/]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" + "place (kretprobe): [:][+]|\n" #endif #ifdef CONFIG_UPROBE_EVENTS "\t place: :\n" @@ -5529,7 +5652,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .partial = partial_def, .nr_pages = 0, /* This gets updated below. */ .nr_pages_max = PIPE_DEF_BUFFERS, - .flags = flags, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; @@ -5962,6 +6084,7 @@ static int tracing_clock_open(struct inode *inode, struct file *file) struct ftrace_buffer_info { struct trace_iterator iter; void *spare; + unsigned int spare_cpu; unsigned int read; }; @@ -6291,9 +6414,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, return -EBUSY; #endif - if (!info->spare) + if (!info->spare) { info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, iter->cpu_file); + info->spare_cpu = iter->cpu_file; + } if (!info->spare) return -ENOMEM; @@ -6353,7 +6478,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); if (info->spare) - ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare); + ring_buffer_free_read_page(iter->trace_buffer->buffer, + info->spare_cpu, info->spare); kfree(info); mutex_unlock(&trace_types_lock); @@ -6364,6 +6490,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) struct buffer_ref { struct ring_buffer *buffer; void *page; + int cpu; int ref; }; @@ -6375,7 +6502,7 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, if (--ref->ref) return; - ring_buffer_free_read_page(ref->buffer, ref->page); + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); kfree(ref); buf->private = 0; } @@ -6409,7 +6536,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) if (--ref->ref) return; - ring_buffer_free_read_page(ref->buffer, ref->page); + ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); kfree(ref); spd->partial[i].private = 0; } @@ -6427,7 +6554,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .pages = pages_def, .partial = partial_def, .nr_pages_max = PIPE_DEF_BUFFERS, - .flags = flags, .ops = &buffer_pipe_buf_ops, .spd_release = buffer_spd_release, }; @@ -6474,11 +6600,13 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, kfree(ref); break; } + ref->cpu = iter->cpu_file; r = ring_buffer_read_page(ref->buffer, &ref->page, len, iter->cpu_file, 1); if (r < 0) { - ring_buffer_free_read_page(ref->buffer, ref->page); + ring_buffer_free_read_page(ref->buffer, ref->cpu, + ref->page); kfree(ref); break; } @@ -6649,43 +6777,89 @@ static const struct file_operations tracing_dyn_info_fops = { #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) static void -ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_snapshot(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - tracing_snapshot(); + tracing_snapshot_instance(tr); } static void -ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - unsigned long *count = (long *)data; + struct ftrace_func_mapper *mapper = data; + long *count = NULL; - if (!*count) - return; + if (mapper) + count = (long *)ftrace_func_mapper_find_ip(mapper, ip); + + if (count) { + + if (*count <= 0) + return; - if (*count != -1) (*count)--; + } - tracing_snapshot(); + tracing_snapshot_instance(tr); } static int ftrace_snapshot_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { - long count = (long)data; + struct ftrace_func_mapper *mapper = data; + long *count = NULL; seq_printf(m, "%ps:", (void *)ip); seq_puts(m, "snapshot"); - if (count == -1) - seq_puts(m, ":unlimited\n"); + if (mapper) + count = (long *)ftrace_func_mapper_find_ip(mapper, ip); + + if (count) + seq_printf(m, ":count=%ld\n", *count); else - seq_printf(m, ":count=%ld\n", count); + seq_puts(m, ":unlimited\n"); return 0; } +static int +ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *init_data, void **data) +{ + struct ftrace_func_mapper *mapper = *data; + + if (!mapper) { + mapper = allocate_ftrace_func_mapper(); + if (!mapper) + return -ENOMEM; + *data = mapper; + } + + return ftrace_func_mapper_add_ip(mapper, ip, init_data); +} + +static void +ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *data) +{ + struct ftrace_func_mapper *mapper = data; + + if (!ip) { + if (!mapper) + return; + free_ftrace_func_mapper(mapper, NULL); + return; + } + + ftrace_func_mapper_remove_ip(mapper, ip); +} + static struct ftrace_probe_ops snapshot_probe_ops = { .func = ftrace_snapshot, .print = ftrace_snapshot_print, @@ -6694,10 +6868,12 @@ static struct ftrace_probe_ops snapshot_probe_ops = { static struct ftrace_probe_ops snapshot_count_probe_ops = { .func = ftrace_count_snapshot, .print = ftrace_snapshot_print, + .init = ftrace_snapshot_init, + .free = ftrace_snapshot_free, }; static int -ftrace_trace_snapshot_callback(struct ftrace_hash *hash, +ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { struct ftrace_probe_ops *ops; @@ -6711,10 +6887,8 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash, ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; - if (glob[0] == '!') { - unregister_ftrace_function_probe_func(glob+1, ops); - return 0; - } + if (glob[0] == '!') + return unregister_ftrace_function_probe_func(glob+1, tr, ops); if (!param) goto out_reg; @@ -6733,11 +6907,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash, return ret; out_reg: - ret = register_ftrace_function_probe(glob, ops, count); + ret = alloc_snapshot(tr); + if (ret < 0) + goto out; - if (ret >= 0) - alloc_snapshot(&global_trace); + ret = register_ftrace_function_probe(glob, tr, ops, count); + out: return ret < 0 ? ret : 0; } @@ -7346,6 +7522,8 @@ static int instance_mkdir(const char *name) goto out_free_tr; } + ftrace_init_trace_array(tr); + init_tracer_tracefs(tr, tr->dir); init_trace_flags_index(tr); __update_tracer_options(tr); @@ -7401,7 +7579,9 @@ static int instance_rmdir(const char *name) } tracing_set_nop(tr); + clear_ftrace_function_probes(tr); event_trace_del_tracer(tr); + ftrace_clear_pids(tr); ftrace_destroy_function_files(tr); tracefs_remove_recursive(tr->dir); free_trace_buffers(tr); @@ -7965,6 +8145,9 @@ __init static int tracer_alloc_buffers(void) register_tracer(&nop_trace); + /* Function tracing may start here (via kernel command line) */ + init_function_trace(); + /* All seems OK, enable tracing */ tracing_disabled = 0; @@ -7999,7 +8182,7 @@ __init static int tracer_alloc_buffers(void) return ret; } -void __init trace_init(void) +void __init early_trace_init(void) { if (tracepoint_printk) { tracepoint_print_iter = @@ -8010,6 +8193,10 @@ void __init trace_init(void) static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); +} + +void __init trace_init(void) +{ trace_event_init(); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ae1cce91fead25a065899109e426a6cc1e597d28..39fd77330aabb12a68d2104d9c20e81ea739f20e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -262,6 +262,9 @@ struct trace_array { #ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; +#ifdef CONFIG_DYNAMIC_FTRACE + struct list_head func_probes; +#endif /* function tracing enabled */ int function_enabled; #endif @@ -579,6 +582,8 @@ void tracing_reset_all_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); int tracer_tracing_is_on(struct trace_array *tr); +void tracer_tracing_on(struct trace_array *tr); +void tracer_tracing_off(struct trace_array *tr); struct dentry *trace_create_file(const char *name, umode_t mode, struct dentry *parent, @@ -696,6 +701,9 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; +void ftrace_init_trace_array(struct trace_array *tr); +#else +static inline void ftrace_init_trace_array(struct trace_array *tr) { } #endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); @@ -880,6 +888,14 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER +struct ftrace_func_command { + struct list_head list; + char *name; + int (*func)(struct trace_array *tr, + struct ftrace_hash *hash, + char *func, char *cmd, + char *params, int enable); +}; extern bool ftrace_filter_param __initdata; static inline int ftrace_trace_task(struct trace_array *tr) { @@ -896,6 +912,9 @@ int using_ftrace_ops_list_func(void); void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer); void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d_tracer); +void ftrace_clear_pids(struct trace_array *tr); +int init_function_trace(void); +void ftrace_pid_follow_fork(struct trace_array *tr, bool enable); #else static inline int ftrace_trace_task(struct trace_array *tr) { @@ -914,15 +933,76 @@ ftrace_init_global_array_ops(struct trace_array *tr) { } static inline void ftrace_reset_array_ops(struct trace_array *tr) { } static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { } static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { } +static inline void ftrace_clear_pids(struct trace_array *tr) { } +static inline int init_function_trace(void) { return 0; } +static inline void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) { } /* ftace_func_t type is not defined, use macro instead of static inline */ #define ftrace_init_array_ops(tr, func) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER */ #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) + +struct ftrace_probe_ops { + void (*func)(unsigned long ip, + unsigned long parent_ip, + struct trace_array *tr, + struct ftrace_probe_ops *ops, + void *data); + int (*init)(struct ftrace_probe_ops *ops, + struct trace_array *tr, + unsigned long ip, void *init_data, + void **data); + void (*free)(struct ftrace_probe_ops *ops, + struct trace_array *tr, + unsigned long ip, void *data); + int (*print)(struct seq_file *m, + unsigned long ip, + struct ftrace_probe_ops *ops, + void *data); +}; + +struct ftrace_func_mapper; +typedef int (*ftrace_mapper_func)(void *data); + +struct ftrace_func_mapper *allocate_ftrace_func_mapper(void); +void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, + unsigned long ip); +int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, + unsigned long ip, void *data); +void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, + unsigned long ip); +void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, + ftrace_mapper_func free_func); + +extern int +register_ftrace_function_probe(char *glob, struct trace_array *tr, + struct ftrace_probe_ops *ops, void *data); +extern int +unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, + struct ftrace_probe_ops *ops); +extern void clear_ftrace_function_probes(struct trace_array *tr); + +int register_ftrace_command(struct ftrace_func_command *cmd); +int unregister_ftrace_command(struct ftrace_func_command *cmd); + void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent); void ftrace_destroy_filter_files(struct ftrace_ops *ops); #else +struct ftrace_func_command; + +static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) +{ + return -EINVAL; +} +static inline __init int unregister_ftrace_command(char *cmd_name) +{ + return -EINVAL; +} +static inline void clear_ftrace_function_probes(struct trace_array *tr) +{ +} + /* * The ops parameter passed in is usually undefined. * This must be a macro. @@ -987,11 +1067,13 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, #ifdef CONFIG_FUNCTION_TRACER # define FUNCTION_FLAGS \ - C(FUNCTION, "function-trace"), + C(FUNCTION, "function-trace"), \ + C(FUNC_FORK, "function-fork"), # define FUNCTION_DEFAULT_FLAGS TRACE_ITER_FUNCTION #else # define FUNCTION_FLAGS # define FUNCTION_DEFAULT_FLAGS 0UL +# define TRACE_ITER_FUNC_FORK 0UL #endif #ifdef CONFIG_STACKTRACE diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c index e49fbe901cfc64c60734d52a0a7c8db320606b00..16a8cf02eee96ae4fcac5d0641673b6c38f003fd 100644 --- a/kernel/trace/trace_benchmark.c +++ b/kernel/trace/trace_benchmark.c @@ -153,10 +153,18 @@ static int benchmark_event_kthread(void *arg) trace_do_benchmark(); /* - * We don't go to sleep, but let others - * run as well. + * We don't go to sleep, but let others run as well. + * This is bascially a "yield()" to let any task that + * wants to run, schedule in, but if the CPU is idle, + * we'll keep burning cycles. + * + * Note the _rcu_qs() version of cond_resched() will + * notify synchronize_rcu_tasks() that this thread has + * passed a quiescent state for rcu_tasks. Otherwise + * this thread will never voluntarily schedule which would + * block synchronize_rcu_tasks() indefinitely. */ - cond_resched(); + cond_resched_rcu_qs(); } return 0; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c203ac4df791f28e074d387ce573cec74bc40cf6..adcdbbeae0108bad436d0af31297dc555ad7f545 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -348,14 +348,14 @@ FTRACE_ENTRY(hwlat, hwlat_entry, __field( u64, duration ) __field( u64, outer_duration ) __field( u64, nmi_total_ts ) - __field_struct( struct timespec, timestamp ) - __field_desc( long, timestamp, tv_sec ) + __field_struct( struct timespec64, timestamp ) + __field_desc( s64, timestamp, tv_sec ) __field_desc( long, timestamp, tv_nsec ) __field( unsigned int, nmi_count ) __field( unsigned int, seqnum ) ), - F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", + F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", __entry->seqnum, __entry->tv_sec, __entry->tv_nsec, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 93116549a284366a7670bdd82316b049ec72412e..e7973e10398c83cc9210b9ab6a70c23a07950297 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2460,15 +2460,8 @@ struct event_probe_data { bool enable; }; -static void -event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) +static void update_event_probe(struct event_probe_data *data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; - - if (!data) - return; - if (data->enable) clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); else @@ -2476,77 +2469,141 @@ event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) } static void -event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data) +event_enable_probe(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; + void **pdata; - if (!data) + pdata = ftrace_func_mapper_find_ip(mapper, ip); + if (!pdata || !*pdata) + return; + + edata = *pdata; + update_event_probe(edata); +} + +static void +event_enable_count_probe(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) +{ + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; + void **pdata; + + pdata = ftrace_func_mapper_find_ip(mapper, ip); + if (!pdata || !*pdata) return; - if (!data->count) + edata = *pdata; + + if (!edata->count) return; /* Skip if the event is in a state we want to switch to */ - if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) + if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; - if (data->count != -1) - (data->count)--; + if (edata->count != -1) + (edata->count)--; - event_enable_probe(ip, parent_ip, _data); + update_event_probe(edata); } static int event_enable_print(struct seq_file *m, unsigned long ip, - struct ftrace_probe_ops *ops, void *_data) + struct ftrace_probe_ops *ops, void *data) { - struct event_probe_data *data = _data; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; + void **pdata; + + pdata = ftrace_func_mapper_find_ip(mapper, ip); + + if (WARN_ON_ONCE(!pdata || !*pdata)) + return 0; + + edata = *pdata; seq_printf(m, "%ps:", (void *)ip); seq_printf(m, "%s:%s:%s", - data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, - data->file->event_call->class->system, - trace_event_name(data->file->event_call)); + edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, + edata->file->event_call->class->system, + trace_event_name(edata->file->event_call)); - if (data->count == -1) + if (edata->count == -1) seq_puts(m, ":unlimited\n"); else - seq_printf(m, ":count=%ld\n", data->count); + seq_printf(m, ":count=%ld\n", edata->count); return 0; } static int -event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip, - void **_data) +event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *init_data, void **data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = *data; + struct event_probe_data *edata = init_data; + int ret; + + if (!mapper) { + mapper = allocate_ftrace_func_mapper(); + if (!mapper) + return -ENODEV; + *data = mapper; + } + + ret = ftrace_func_mapper_add_ip(mapper, ip, edata); + if (ret < 0) + return ret; + + edata->ref++; - data->ref++; + return 0; +} + +static int free_probe_data(void *data) +{ + struct event_probe_data *edata = data; + + edata->ref--; + if (!edata->ref) { + /* Remove the SOFT_MODE flag */ + __ftrace_event_enable_disable(edata->file, 0, 1); + module_put(edata->file->event_call->mod); + kfree(edata); + } return 0; } static void -event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip, - void **_data) +event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *data) { - struct event_probe_data **pdata = (struct event_probe_data **)_data; - struct event_probe_data *data = *pdata; + struct ftrace_func_mapper *mapper = data; + struct event_probe_data *edata; - if (WARN_ON_ONCE(data->ref <= 0)) + if (!ip) { + if (!mapper) + return; + free_ftrace_func_mapper(mapper, free_probe_data); return; - - data->ref--; - if (!data->ref) { - /* Remove the SOFT_MODE flag */ - __ftrace_event_enable_disable(data->file, 0, 1); - module_put(data->file->event_call->mod); - kfree(data); } - *pdata = NULL; + + edata = ftrace_func_mapper_remove_ip(mapper, ip); + + if (WARN_ON_ONCE(!edata)) + return; + + if (WARN_ON_ONCE(edata->ref <= 0)) + return; + + free_probe_data(edata); } static struct ftrace_probe_ops event_enable_probe_ops = { @@ -2578,10 +2635,9 @@ static struct ftrace_probe_ops event_disable_count_probe_ops = { }; static int -event_enable_func(struct ftrace_hash *hash, +event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enabled) { - struct trace_array *tr = top_trace_array(); struct trace_event_file *file; struct ftrace_probe_ops *ops; struct event_probe_data *data; @@ -2619,12 +2675,12 @@ event_enable_func(struct ftrace_hash *hash, ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; if (glob[0] == '!') { - unregister_ftrace_function_probe_func(glob+1, ops); - ret = 0; + ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); goto out; } ret = -ENOMEM; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) goto out; @@ -2661,7 +2717,8 @@ event_enable_func(struct ftrace_hash *hash, ret = __ftrace_event_enable_disable(file, 1, 1); if (ret < 0) goto out_put; - ret = register_ftrace_function_probe(glob, ops, data); + + ret = register_ftrace_function_probe(glob, tr, ops, data); /* * The above returns on success the # of functions enabled, * but if it didn't find any functions it returns zero. diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 0efa00d80623db3de438a5233a8ee6f84fae65ed..a3bddbfd0874a26bcdbc84b3b4713cb206a0998b 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -267,10 +267,14 @@ static struct tracer function_trace __tracer_data = }; #ifdef CONFIG_DYNAMIC_FTRACE -static void update_traceon_count(void **data, bool on) +static void update_traceon_count(struct ftrace_probe_ops *ops, + unsigned long ip, + struct trace_array *tr, bool on, + void *data) { - long *count = (long *)data; - long old_count = *count; + struct ftrace_func_mapper *mapper = data; + long *count; + long old_count; /* * Tracing gets disabled (or enabled) once per count. @@ -301,23 +305,22 @@ static void update_traceon_count(void **data, bool on) * setting the tracing_on file. But we currently don't care * about that. */ - if (!old_count) + count = (long *)ftrace_func_mapper_find_ip(mapper, ip); + old_count = *count; + + if (old_count <= 0) return; /* Make sure we see count before checking tracing state */ smp_rmb(); - if (on == !!tracing_is_on()) + if (on == !!tracer_tracing_is_on(tr)) return; if (on) - tracing_on(); + tracer_tracing_on(tr); else - tracing_off(); - - /* unlimited? */ - if (old_count == -1) - return; + tracer_tracing_off(tr); /* Make sure tracing state is visible before updating count */ smp_wmb(); @@ -326,33 +329,41 @@ static void update_traceon_count(void **data, bool on) } static void -ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - update_traceon_count(data, 1); + update_traceon_count(ops, ip, tr, 1, data); } static void -ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - update_traceon_count(data, 0); + update_traceon_count(ops, ip, tr, 0, data); } static void -ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_traceon(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - if (tracing_is_on()) + if (tracer_tracing_is_on(tr)) return; - tracing_on(); + tracer_tracing_on(tr); } static void -ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_traceoff(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - if (!tracing_is_on()) + if (!tracer_tracing_is_on(tr)) return; - tracing_off(); + tracer_tracing_off(tr); } /* @@ -364,144 +375,218 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data) */ #define STACK_SKIP 4 +static __always_inline void trace_stack(struct trace_array *tr) +{ + unsigned long flags; + int pc; + + local_save_flags(flags); + pc = preempt_count(); + + __trace_stack(tr, flags, STACK_SKIP, pc); +} + static void -ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - trace_dump_stack(STACK_SKIP); + trace_stack(tr); } static void -ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - long *count = (long *)data; + struct ftrace_func_mapper *mapper = data; + long *count; long old_count; long new_count; + if (!tracing_is_on()) + return; + + /* unlimited? */ + if (!mapper) { + trace_stack(tr); + return; + } + + count = (long *)ftrace_func_mapper_find_ip(mapper, ip); + /* * Stack traces should only execute the number of times the * user specified in the counter. */ do { - - if (!tracing_is_on()) - return; - old_count = *count; if (!old_count) return; - /* unlimited? */ - if (old_count == -1) { - trace_dump_stack(STACK_SKIP); - return; - } - new_count = old_count - 1; new_count = cmpxchg(count, old_count, new_count); if (new_count == old_count) - trace_dump_stack(STACK_SKIP); + trace_stack(tr); + + if (!tracing_is_on()) + return; } while (new_count != old_count); } -static int update_count(void **data) +static int update_count(struct ftrace_probe_ops *ops, unsigned long ip, + void *data) { - unsigned long *count = (long *)data; + struct ftrace_func_mapper *mapper = data; + long *count = NULL; - if (!*count) - return 0; + if (mapper) + count = (long *)ftrace_func_mapper_find_ip(mapper, ip); - if (*count != -1) + if (count) { + if (*count <= 0) + return 0; (*count)--; + } return 1; } static void -ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - if (update_count(data)) + if (update_count(ops, ip, data)) ftrace_dump(DUMP_ALL); } /* Only dump the current CPU buffer. */ static void -ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data) +ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, + struct trace_array *tr, struct ftrace_probe_ops *ops, + void *data) { - if (update_count(data)) + if (update_count(ops, ip, data)) ftrace_dump(DUMP_ORIG); } static int ftrace_probe_print(const char *name, struct seq_file *m, - unsigned long ip, void *data) + unsigned long ip, struct ftrace_probe_ops *ops, + void *data) { - long count = (long)data; + struct ftrace_func_mapper *mapper = data; + long *count = NULL; seq_printf(m, "%ps:%s", (void *)ip, name); - if (count == -1) - seq_puts(m, ":unlimited\n"); + if (mapper) + count = (long *)ftrace_func_mapper_find_ip(mapper, ip); + + if (count) + seq_printf(m, ":count=%ld\n", *count); else - seq_printf(m, ":count=%ld\n", count); + seq_puts(m, ":unlimited\n"); return 0; } static int ftrace_traceon_print(struct seq_file *m, unsigned long ip, - struct ftrace_probe_ops *ops, void *data) + struct ftrace_probe_ops *ops, + void *data) { - return ftrace_probe_print("traceon", m, ip, data); + return ftrace_probe_print("traceon", m, ip, ops, data); } static int ftrace_traceoff_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { - return ftrace_probe_print("traceoff", m, ip, data); + return ftrace_probe_print("traceoff", m, ip, ops, data); } static int ftrace_stacktrace_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { - return ftrace_probe_print("stacktrace", m, ip, data); + return ftrace_probe_print("stacktrace", m, ip, ops, data); } static int ftrace_dump_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { - return ftrace_probe_print("dump", m, ip, data); + return ftrace_probe_print("dump", m, ip, ops, data); } static int ftrace_cpudump_print(struct seq_file *m, unsigned long ip, struct ftrace_probe_ops *ops, void *data) { - return ftrace_probe_print("cpudump", m, ip, data); + return ftrace_probe_print("cpudump", m, ip, ops, data); +} + + +static int +ftrace_count_init(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *init_data, void **data) +{ + struct ftrace_func_mapper *mapper = *data; + + if (!mapper) { + mapper = allocate_ftrace_func_mapper(); + if (!mapper) + return -ENOMEM; + *data = mapper; + } + + return ftrace_func_mapper_add_ip(mapper, ip, init_data); +} + +static void +ftrace_count_free(struct ftrace_probe_ops *ops, struct trace_array *tr, + unsigned long ip, void *data) +{ + struct ftrace_func_mapper *mapper = data; + + if (!ip) { + free_ftrace_func_mapper(mapper, NULL); + return; + } + + ftrace_func_mapper_remove_ip(mapper, ip); } static struct ftrace_probe_ops traceon_count_probe_ops = { .func = ftrace_traceon_count, .print = ftrace_traceon_print, + .init = ftrace_count_init, + .free = ftrace_count_free, }; static struct ftrace_probe_ops traceoff_count_probe_ops = { .func = ftrace_traceoff_count, .print = ftrace_traceoff_print, + .init = ftrace_count_init, + .free = ftrace_count_free, }; static struct ftrace_probe_ops stacktrace_count_probe_ops = { .func = ftrace_stacktrace_count, .print = ftrace_stacktrace_print, + .init = ftrace_count_init, + .free = ftrace_count_free, }; static struct ftrace_probe_ops dump_probe_ops = { .func = ftrace_dump_probe, .print = ftrace_dump_print, + .init = ftrace_count_init, + .free = ftrace_count_free, }; static struct ftrace_probe_ops cpudump_probe_ops = { @@ -525,7 +610,8 @@ static struct ftrace_probe_ops stacktrace_probe_ops = { }; static int -ftrace_trace_probe_callback(struct ftrace_probe_ops *ops, +ftrace_trace_probe_callback(struct trace_array *tr, + struct ftrace_probe_ops *ops, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { @@ -537,10 +623,8 @@ ftrace_trace_probe_callback(struct ftrace_probe_ops *ops, if (!enable) return -EINVAL; - if (glob[0] == '!') { - unregister_ftrace_function_probe_func(glob+1, ops); - return 0; - } + if (glob[0] == '!') + return unregister_ftrace_function_probe_func(glob+1, tr, ops); if (!param) goto out_reg; @@ -559,13 +643,13 @@ ftrace_trace_probe_callback(struct ftrace_probe_ops *ops, return ret; out_reg: - ret = register_ftrace_function_probe(glob, ops, count); + ret = register_ftrace_function_probe(glob, tr, ops, count); return ret < 0 ? ret : 0; } static int -ftrace_trace_onoff_callback(struct ftrace_hash *hash, +ftrace_trace_onoff_callback(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { struct ftrace_probe_ops *ops; @@ -576,24 +660,24 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash, else ops = param ? &traceoff_count_probe_ops : &traceoff_probe_ops; - return ftrace_trace_probe_callback(ops, hash, glob, cmd, + return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd, param, enable); } static int -ftrace_stacktrace_callback(struct ftrace_hash *hash, +ftrace_stacktrace_callback(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { struct ftrace_probe_ops *ops; ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops; - return ftrace_trace_probe_callback(ops, hash, glob, cmd, + return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd, param, enable); } static int -ftrace_dump_callback(struct ftrace_hash *hash, +ftrace_dump_callback(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { struct ftrace_probe_ops *ops; @@ -601,12 +685,12 @@ ftrace_dump_callback(struct ftrace_hash *hash, ops = &dump_probe_ops; /* Only dump once. */ - return ftrace_trace_probe_callback(ops, hash, glob, cmd, + return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd, "1", enable); } static int -ftrace_cpudump_callback(struct ftrace_hash *hash, +ftrace_cpudump_callback(struct trace_array *tr, struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enable) { struct ftrace_probe_ops *ops; @@ -614,7 +698,7 @@ ftrace_cpudump_callback(struct ftrace_hash *hash, ops = &cpudump_probe_ops; /* Only dump once. */ - return ftrace_trace_probe_callback(ops, hash, glob, cmd, + return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd, "1", enable); } @@ -687,9 +771,8 @@ static inline int init_func_cmd_traceon(void) } #endif /* CONFIG_DYNAMIC_FTRACE */ -static __init int init_function_trace(void) +__init int init_function_trace(void) { init_func_cmd_traceon(); return register_tracer(&function_trace); } -core_initcall(init_function_trace); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 21ea6ae77d93fd45f50ed71059fbe4954b9ef2c0..d7c8e4ec3d9d6cd92b6d78a2c66c984cbbc6fb6c 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -79,12 +79,12 @@ static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC; /* Individual latency samples are stored here when detected. */ struct hwlat_sample { - u64 seqnum; /* unique sequence */ - u64 duration; /* delta */ - u64 outer_duration; /* delta (outer loop) */ - u64 nmi_total_ts; /* Total time spent in NMIs */ - struct timespec timestamp; /* wall time */ - int nmi_count; /* # NMIs during this sample */ + u64 seqnum; /* unique sequence */ + u64 duration; /* delta */ + u64 outer_duration; /* delta (outer loop) */ + u64 nmi_total_ts; /* Total time spent in NMIs */ + struct timespec64 timestamp; /* wall time */ + int nmi_count; /* # NMIs during this sample */ }; /* keep the global state somewhere. */ @@ -250,7 +250,7 @@ static int get_sample(void) s.seqnum = hwlat_data.count; s.duration = sample; s.outer_duration = outer_sample; - s.timestamp = CURRENT_TIME; + ktime_get_real_ts64(&s.timestamp); s.nmi_total_ts = nmi_total_ts; s.nmi_count = nmi_count; trace_hwlat_sample(&s); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5f688cc724f00abcd9d09686adc75fb96e4b5b02..c129fca6ec993a85aeb30c7e3aa69254e3f5ac16 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -25,6 +25,7 @@ #include "trace_probe.h" #define KPROBE_EVENT_SYSTEM "kprobes" +#define KRETPROBE_MAXACTIVE_MAX 4096 /** * Kprobe event core functions @@ -282,6 +283,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, void *addr, const char *symbol, unsigned long offs, + int maxactive, int nargs, bool is_return) { struct trace_kprobe *tk; @@ -309,6 +311,8 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, else tk->rp.kp.pre_handler = kprobe_dispatcher; + tk->rp.maxactive = maxactive; + if (!event || !is_good_name(event)) { ret = -EINVAL; goto error; @@ -598,8 +602,10 @@ static int create_trace_kprobe(int argc, char **argv) { /* * Argument syntax: - * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] - * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] + * - Add kprobe: + * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] + * - Add kretprobe: + * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] * Fetch args: * $retval : fetch return value * $stack : fetch stack address @@ -619,6 +625,7 @@ static int create_trace_kprobe(int argc, char **argv) int i, ret = 0; bool is_return = false, is_delete = false; char *symbol = NULL, *event = NULL, *group = NULL; + int maxactive = 0; char *arg; unsigned long offset = 0; void *addr = NULL; @@ -637,8 +644,28 @@ static int create_trace_kprobe(int argc, char **argv) return -EINVAL; } - if (argv[0][1] == ':') { - event = &argv[0][2]; + event = strchr(&argv[0][1], ':'); + if (event) { + event[0] = '\0'; + event++; + } + if (is_return && isdigit(argv[0][1])) { + ret = kstrtouint(&argv[0][1], 0, &maxactive); + if (ret) { + pr_info("Failed to parse maxactive.\n"); + return ret; + } + /* kretprobes instances are iterated over via a list. The + * maximum should stay reasonable. + */ + if (maxactive > KRETPROBE_MAXACTIVE_MAX) { + pr_info("Maxactive is too big (%d > %d).\n", + maxactive, KRETPROBE_MAXACTIVE_MAX); + return -E2BIG; + } + } + + if (event) { if (strchr(event, '/')) { group = event; event = strchr(group, '/') + 1; @@ -681,10 +708,6 @@ static int create_trace_kprobe(int argc, char **argv) return -EINVAL; } if (isdigit(argv[1][0])) { - if (is_return) { - pr_info("Return probe point must be a symbol.\n"); - return -EINVAL; - } /* an address specified */ ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { @@ -700,8 +723,9 @@ static int create_trace_kprobe(int argc, char **argv) pr_info("Failed to parse symbol.\n"); return ret; } - if (offset && is_return) { - pr_info("Return probe must be used without offset.\n"); + if (offset && is_return && + !function_offset_within_entry(NULL, symbol, offset)) { + pr_info("Given offset is not valid for return probe.\n"); return -EINVAL; } } @@ -718,8 +742,8 @@ static int create_trace_kprobe(int argc, char **argv) is_return ? 'r' : 'p', addr); event = buf; } - tk = alloc_trace_kprobe(group, event, addr, symbol, offset, argc, - is_return); + tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, + argc, is_return); if (IS_ERR(tk)) { pr_info("Failed to allocate trace_probe.(%d)\n", (int)PTR_ERR(tk)); @@ -1511,6 +1535,11 @@ static __init int kprobe_trace_self_tests_init(void) end: release_all_trace_kprobes(); + /* + * Wait for the optimizer work to finish. Otherwise it might fiddle + * with probes in already freed __init text. + */ + wait_for_kprobe_optimizer(); if (warn) pr_cont("NG: Some tests are failed. Please check them.\n"); else diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 02a4aeb22c4785cc808a7906c970dfa420796cbe..08f9bab8089e74c2acd63e0eaa7ef53f922b88c4 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -4,7 +4,6 @@ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt * */ - #include #include #include @@ -1161,11 +1160,11 @@ trace_hwlat_print(struct trace_iterator *iter, int flags, trace_assign_type(field, entry); - trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld", + trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%lld.%09ld", field->seqnum, field->duration, field->outer_duration, - field->timestamp.tv_sec, + (long long)field->timestamp.tv_sec, field->timestamp.tv_nsec); if (field->nmi_count) { @@ -1195,10 +1194,10 @@ trace_hwlat_raw(struct trace_iterator *iter, int flags, trace_assign_type(field, iter->ent); - trace_seq_printf(s, "%llu %lld %ld %09ld %u\n", + trace_seq_printf(s, "%llu %lld %lld %09ld %u\n", field->duration, field->outer_duration, - field->timestamp.tv_sec, + (long long)field->timestamp.tv_sec, field->timestamp.tv_nsec, field->seqnum); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5fb1f2c87e6b846b7f9d32823ef3aede4b28db9e..76aa04d4c9257482181c9b2885530fb4dc417edd 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -35,7 +35,7 @@ unsigned long stack_trace_max_size; arch_spinlock_t stack_trace_max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -static DEFINE_PER_CPU(int, trace_active); +DEFINE_PER_CPU(int, disable_stack_tracer); static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; @@ -96,6 +96,14 @@ check_stack(unsigned long ip, unsigned long *stack) if (in_nmi()) return; + /* + * There's a slight chance that we are tracing inside the + * RCU infrastructure, and rcu_irq_enter() will not work + * as expected. + */ + if (unlikely(rcu_irq_enter_disabled())) + return; + local_irq_save(flags); arch_spin_lock(&stack_trace_max_lock); @@ -207,13 +215,12 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { unsigned long stack; - int cpu; preempt_disable_notrace(); - cpu = raw_smp_processor_id(); /* no atomic needed, we only modify this variable by this cpu */ - if (per_cpu(trace_active, cpu)++ != 0) + __this_cpu_inc(disable_stack_tracer); + if (__this_cpu_read(disable_stack_tracer) != 1) goto out; ip += MCOUNT_INSN_SIZE; @@ -221,7 +228,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, check_stack(ip, &stack); out: - per_cpu(trace_active, cpu)--; + __this_cpu_dec(disable_stack_tracer); /* prevent recursion in schedule */ preempt_enable_notrace(); } @@ -253,7 +260,6 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, long *ptr = filp->private_data; unsigned long val, flags; int ret; - int cpu; ret = kstrtoul_from_user(ubuf, count, 10, &val); if (ret) @@ -264,16 +270,15 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, /* * In case we trace inside arch_spin_lock() or after (NMI), * we will cause circular lock, so we also need to increase - * the percpu trace_active here. + * the percpu disable_stack_tracer here. */ - cpu = smp_processor_id(); - per_cpu(trace_active, cpu)++; + __this_cpu_inc(disable_stack_tracer); arch_spin_lock(&stack_trace_max_lock); *ptr = val; arch_spin_unlock(&stack_trace_max_lock); - per_cpu(trace_active, cpu)--; + __this_cpu_dec(disable_stack_tracer); local_irq_restore(flags); return count; @@ -307,12 +312,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - int cpu; - local_irq_disable(); - cpu = smp_processor_id(); - per_cpu(trace_active, cpu)++; + __this_cpu_inc(disable_stack_tracer); arch_spin_lock(&stack_trace_max_lock); @@ -324,12 +326,9 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { - int cpu; - arch_spin_unlock(&stack_trace_max_lock); - cpu = smp_processor_id(); - per_cpu(trace_active, cpu)--; + __this_cpu_dec(disable_stack_tracer); local_irq_enable(); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c0168b7da1eaf22c216147ca5ebd03ef7311dca8..c74bf39ef7643fdc4ecc219aa25b58feabcd91f3 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3209,9 +3209,8 @@ static int init_worker_pool(struct worker_pool *pool) INIT_LIST_HEAD(&pool->idle_list); hash_init(pool->busy_hash); - init_timer_deferrable(&pool->idle_timer); - pool->idle_timer.function = idle_worker_timeout; - pool->idle_timer.data = (unsigned long)pool; + setup_deferrable_timer(&pool->idle_timer, idle_worker_timeout, + (unsigned long)pool); setup_timer(&pool->mayday_timer, pool_mayday_timeout, (unsigned long)pool); @@ -4735,6 +4734,29 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg) return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu); + +/** + * work_on_cpu_safe - run a function in thread context on a particular cpu + * @cpu: the cpu to run on + * @fn: the function to run + * @arg: the function argument + * + * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold + * any locks which would prevent @fn from completing. + * + * Return: The value @fn returns. + */ +long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +{ + long ret = -ENODEV; + + get_online_cpus(); + if (cpu_online(cpu)) + ret = work_on_cpu(cpu, fn, arg); + put_online_cpus(); + return ret; +} +EXPORT_SYMBOL_GPL(work_on_cpu_safe); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 97d62c2da6c25dd5721f8c1c75264c83201f7247..e4587ebe52c7ec3c7923c661dea9b8f15b77bd8e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -130,7 +130,8 @@ config DYNAMIC_DEBUG nullarbor:~ # echo -n 'func svc_process -p' > /dynamic_debug/control - See Documentation/dynamic-debug-howto.txt for additional information. + See Documentation/admin-guide/dynamic-debug-howto.rst for additional + information. endmenu # "printk and dmesg options" @@ -356,7 +357,7 @@ config FRAME_POINTER bool "Compile the kernel with frame pointers" depends on DEBUG_KERNEL && \ (CRIS || M68K || FRV || UML || \ - AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \ + SUPERH || BLACKFIN || MN10300 || METAG) || \ ARCH_WANT_FRAME_POINTERS default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS help @@ -404,8 +405,8 @@ config MAGIC_SYSRQ by pressing various keys while holding SysRq (Alt+PrintScreen). It also works on a serial console (on PC hardware at least), if you send a BREAK and then within 5 seconds a command keypress. The - keys are documented in . Don't say Y - unless you really know what this hack does. + keys are documented in . + Don't say Y unless you really know what this hack does. config MAGIC_SYSRQ_DEFAULT_ENABLE hex "Enable magic SysRq key functions by default" @@ -414,7 +415,7 @@ config MAGIC_SYSRQ_DEFAULT_ENABLE help Specifies which SysRq key functions are enabled by default. This may be set to 1 or 0 to enable or disable them all, or - to a bitmask as described in Documentation/sysrq.txt. + to a bitmask as described in Documentation/admin-guide/sysrq.rst. config MAGIC_SYSRQ_SERIAL bool "Enable magic SysRq key over serial" @@ -1103,9 +1104,6 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. -config PROVE_LOCKING_SMALL - bool - config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -1114,6 +1112,9 @@ config LOCKDEP select KALLSYMS select KALLSYMS_ALL +config LOCKDEP_SMALL + bool + config LOCK_STAT bool "Lock usage statistics" depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -1718,19 +1719,21 @@ config LKDTM Documentation/fault-injection/provoke-crashes.txt config TEST_LIST_SORT - bool "Linked list sorting test" - depends on DEBUG_KERNEL + tristate "Linked list sorting test" + depends on DEBUG_KERNEL || m help Enable this to turn on 'list_sort()' function test. This test is - executed only once during system boot, so affects only boot time. + executed only once during system boot (so affects only boot time), + or at module load time. If unsure, say N. config TEST_SORT - bool "Array-based sort test" - depends on DEBUG_KERNEL + tristate "Array-based sort test" + depends on DEBUG_KERNEL || m help - This option enables the self-test function of 'sort()' at boot. + This option enables the self-test function of 'sort()' at boot, + or at module load time. If unsure, say N. diff --git a/lib/Makefile b/lib/Makefile index 320ac46a8725b6f9dac0726767a4976a2bd8907c..0166fbc0fa811f8462bc2f3dafcae9408ae922c4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -19,7 +19,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ idr.o int_sqrt.o extable.o \ - sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ + sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o \ @@ -41,7 +41,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ - once.o refcount.o + once.o refcount.o usercopy.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o @@ -52,6 +52,7 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o diff --git a/lib/bitmap.c b/lib/bitmap.c index 0b66f0e5eb6bb172ae240fe59dd1eec0d9065493..08c6ef3a2b6fc5134968fd05bfbc8cd0b2ee91aa 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -502,11 +502,11 @@ EXPORT_SYMBOL(bitmap_print_to_pagebuf); * Syntax: range:used_size/group_size * Example: 0-1023:2/256 ==> 0,1,256,257,512,513,768,769 * - * Returns 0 on success, -errno on invalid input strings. - * Error values: - * %-EINVAL: second number in range smaller than first - * %-EINVAL: invalid character in string - * %-ERANGE: bit number specified too large for mask + * Returns: 0 on success, -errno on invalid input strings. Error values: + * + * - ``-EINVAL``: second number in range smaller than first + * - ``-EINVAL``: invalid character in string + * - ``-ERANGE``: bit number specified too large for mask */ static int __bitmap_parselist(const char *buf, unsigned int buflen, int is_user, unsigned long *maskp, @@ -864,14 +864,16 @@ EXPORT_SYMBOL(bitmap_bitremap); * 11 was set in @orig had no affect on @dst. * * Example [2] for bitmap_fold() + bitmap_onto(): - * Let's say @relmap has these ten bits set: + * Let's say @relmap has these ten bits set:: + * * 40 41 42 43 45 48 53 61 74 95 + * * (for the curious, that's 40 plus the first ten terms of the * Fibonacci sequence.) * * Further lets say we use the following code, invoking * bitmap_fold() then bitmap_onto, as suggested above to - * avoid the possibility of an empty @dst result: + * avoid the possibility of an empty @dst result:: * * unsigned long *tmp; // a temporary bitmap's bits * @@ -882,22 +884,26 @@ EXPORT_SYMBOL(bitmap_bitremap); * various @orig's. I list the zero-based positions of each set bit. * The tmp column shows the intermediate result, as computed by * using bitmap_fold() to fold the @orig bitmap modulo ten - * (the weight of @relmap). + * (the weight of @relmap): * + * =============== ============== ================= * @orig tmp @dst * 0 0 40 * 1 1 41 * 9 9 95 - * 10 0 40 (*) + * 10 0 40 [#f1]_ * 1 3 5 7 1 3 5 7 41 43 48 61 * 0 1 2 3 4 0 1 2 3 4 40 41 42 43 45 * 0 9 18 27 0 9 8 7 40 61 74 95 * 0 10 20 30 0 40 * 0 11 22 33 0 1 2 3 40 41 42 43 * 0 12 24 36 0 2 4 6 40 42 45 53 - * 78 102 211 1 2 8 41 42 74 (*) + * 78 102 211 1 2 8 41 42 74 [#f1]_ + * =============== ============== ================= + * + * .. [#f1] * - * (*) For these marked lines, if we hadn't first done bitmap_fold() + * For these marked lines, if we hadn't first done bitmap_fold() * into tmp, then the @dst result would have been empty. * * If either of @orig or @relmap is empty (no set bits), then @dst diff --git a/lib/bug.c b/lib/bug.c index 06edbbef062322f12662f95d726b2d83856ac62c..a6a1137d06db75f94d005a9e646e061847be1c93 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -47,7 +47,7 @@ #include #include -extern const struct bug_entry __start___bug_table[], __stop___bug_table[]; +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; static inline unsigned long bug_addr(const struct bug_entry *bug) { @@ -62,10 +62,10 @@ static inline unsigned long bug_addr(const struct bug_entry *bug) /* Updates are protected by module mutex */ static LIST_HEAD(module_bug_list); -static const struct bug_entry *module_find_bug(unsigned long bugaddr) +static struct bug_entry *module_find_bug(unsigned long bugaddr) { struct module *mod; - const struct bug_entry *bug = NULL; + struct bug_entry *bug = NULL; rcu_read_lock_sched(); list_for_each_entry_rcu(mod, &module_bug_list, bug_list) { @@ -122,15 +122,15 @@ void module_bug_cleanup(struct module *mod) #else -static inline const struct bug_entry *module_find_bug(unsigned long bugaddr) +static inline struct bug_entry *module_find_bug(unsigned long bugaddr) { return NULL; } #endif -const struct bug_entry *find_bug(unsigned long bugaddr) +struct bug_entry *find_bug(unsigned long bugaddr) { - const struct bug_entry *bug; + struct bug_entry *bug; for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) if (bugaddr == bug_addr(bug)) @@ -141,9 +141,9 @@ const struct bug_entry *find_bug(unsigned long bugaddr) enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) { - const struct bug_entry *bug; + struct bug_entry *bug; const char *file; - unsigned line, warning; + unsigned line, warning, once, done; if (!is_valid_bugaddr(bugaddr)) return BUG_TRAP_TYPE_NONE; @@ -164,6 +164,18 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) line = bug->line; #endif warning = (bug->flags & BUGFLAG_WARNING) != 0; + once = (bug->flags & BUGFLAG_ONCE) != 0; + done = (bug->flags & BUGFLAG_DONE) != 0; + + if (warning && once) { + if (done) + return BUG_TRAP_TYPE_WARN; + + /* + * Since this is the only store, concurrency is not an issue. + */ + bug->flags |= BUGFLAG_DONE; + } } if (warning) { diff --git a/lib/cmdline.c b/lib/cmdline.c index 8f13cf73c2ecf916c203ef7e963d250a113c3dee..3c6432df7e63466a24d41dead807c7ef14c0ab86 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * If a hyphen was found in get_option, this will handle the @@ -189,3 +190,59 @@ bool parse_option_str(const char *str, const char *option) return false; } + +/* + * Parse a string to get a param value pair. + * You can use " around spaces, but can't escape ". + * Hyphens and underscores equivalent in parameter names. + */ +char *next_arg(char *args, char **param, char **val) +{ + unsigned int i, equals = 0; + int in_quote = 0, quoted = 0; + char *next; + + if (*args == '"') { + args++; + in_quote = 1; + quoted = 1; + } + + for (i = 0; args[i]; i++) { + if (isspace(args[i]) && !in_quote) + break; + if (equals == 0) { + if (args[i] == '=') + equals = i; + } + if (args[i] == '"') + in_quote = !in_quote; + } + + *param = args; + if (!equals) + *val = NULL; + else { + args[equals] = '\0'; + *val = args + equals + 1; + + /* Don't include quotes in value. */ + if (**val == '"') { + (*val)++; + if (args[i-1] == '"') + args[i-1] = '\0'; + } + } + if (quoted && args[i-1] == '"') + args[i-1] = '\0'; + + if (args[i]) { + args[i] = '\0'; + next = args + i + 1; + } else + next = args + i; + + /* Chew up trailing spaces. */ + return skip_spaces(next); + //return next; +} diff --git a/lib/devres.c b/lib/devres.c index cb1464c411a2b4fa88778343bf33208ccd8dae76..78eca713b1d9ad99caa14deb3b688559aaf0fd63 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -17,7 +17,7 @@ static int devm_ioremap_match(struct device *dev, void *res, void *match_data) /** * devm_ioremap - Managed ioremap() * @dev: Generic device to remap IO address for - * @offset: BUS offset to map + * @offset: Resource address to map * @size: Size of map * * Managed ioremap(). Map is automatically unmapped on driver detach. @@ -45,7 +45,7 @@ EXPORT_SYMBOL(devm_ioremap); /** * devm_ioremap_nocache - Managed ioremap_nocache() * @dev: Generic device to remap IO address for - * @offset: BUS offset to map + * @offset: Resource address to map * @size: Size of map * * Managed ioremap_nocache(). Map is automatically unmapped on driver @@ -74,7 +74,7 @@ EXPORT_SYMBOL(devm_ioremap_nocache); /** * devm_ioremap_wc - Managed ioremap_wc() * @dev: Generic device to remap IO address for - * @offset: BUS offset to map + * @offset: Resource address to map * @size: Size of map * * Managed ioremap_wc(). Map is automatically unmapped on driver detach. diff --git a/lib/dma-debug.c b/lib/dma-debug.c index b157b46cc9a69ca830a2cdbc8abcbc9baab4a63e..ea4cc3dde4f1bac9f3b8337fdac95408cb3e8b35 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -942,21 +942,17 @@ static int device_dma_allocations(struct device *dev, struct dma_debug_entry **o unsigned long flags; int count = 0, i; - local_irq_save(flags); - for (i = 0; i < HASH_SIZE; ++i) { - spin_lock(&dma_entry_hash[i].lock); + spin_lock_irqsave(&dma_entry_hash[i].lock, flags); list_for_each_entry(entry, &dma_entry_hash[i].list, list) { if (entry->dev == dev) { count += 1; *out_entry = entry; } } - spin_unlock(&dma_entry_hash[i].lock); + spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); } - local_irq_restore(flags); - return count; } @@ -1502,7 +1498,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; entry->pfn = page_to_pfn(virt_to_page(virt)); - entry->offset = (size_t) virt & ~PAGE_MASK; + entry->offset = offset_in_page(virt); entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; @@ -1518,7 +1514,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, .type = dma_debug_coherent, .dev = dev, .pfn = page_to_pfn(virt_to_page(virt)), - .offset = (size_t) virt & ~PAGE_MASK, + .offset = offset_in_page(virt), .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 6a823a53e357bc83e84476d93cc2c155f0adfee5..4ff157159a0d4d9d746fad5d228e878bdecd5be2 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -56,7 +56,7 @@ static void fail_dump(struct fault_attr *attr) static bool fail_task(struct fault_attr *attr, struct task_struct *task) { - return !in_interrupt() && task->make_it_fail; + return in_task() && task->make_it_fail; } #define MAX_STACK_TRACE_DEPTH 32 diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 60abc44385b795de7449b3810e9e294b7d2c48fc..f835964c9485f147699609bf2c2dc1ffecb8833b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -413,7 +413,7 @@ void iov_iter_init(struct iov_iter *i, int direction, size_t count) { /* It will get better. Eventually... */ - if (segment_eq(get_fs(), KERNEL_DS)) { + if (uaccess_kernel()) { direction |= ITER_KVEC; i->type = direction; i->kvec = (struct kvec *)iov; @@ -604,7 +604,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) return 0; } iterate_and_advance(i, bytes, v, - __copy_from_user_nocache((to += v.iov_len) - v.iov_len, + __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), @@ -625,7 +625,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) if (unlikely(i->count < bytes)) return false; iterate_all_kinds(i, bytes, v, ({ - if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len, + if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)) return false; 0;}), @@ -790,6 +790,8 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) { if (!unroll) return; + if (WARN_ON(unroll > MAX_RW_COUNT)) + return; i->count += unroll; if (unlikely(i->type & ITER_PIPE)) { struct pipe_inode_info *pipe = i->pipe; @@ -798,7 +800,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) while (1) { size_t n = off - pipe->bufs[idx].offset; if (unroll < n) { - off -= (n - unroll); + off -= unroll; break; } unroll -= n; @@ -1028,10 +1030,7 @@ EXPORT_SYMBOL(iov_iter_get_pages); static struct page **get_pages_array(size_t n) { - struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); - if (!p) - p = vmalloc(n * sizeof(struct page *)); - return p; + return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); } static ssize_t pipe_get_pages_alloc(struct iov_iter *i, diff --git a/lib/kobject.c b/lib/kobject.c index 445dcaeb0f56de7882a1b1278a90ef7931f4099c..763d70a189410cb7307b87474801a662f631f806 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -601,12 +601,15 @@ struct kobject *kobject_get(struct kobject *kobj) } EXPORT_SYMBOL(kobject_get); -static struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj) +struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj) { + if (!kobj) + return NULL; if (!kref_get_unless_zero(&kobj->kref)) kobj = NULL; return kobj; } +EXPORT_SYMBOL(kobject_get_unless_zero); /* * kobject_cleanup - free kobject resources. diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 74a54b7f25626e8c6d224af2b7384f7dcbf2e72f..9f79547d1b9782237a563f6a2e838655f6adf6dc 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -43,7 +43,7 @@ static struct crypto_shash *tfm; u32 crc32c(u32 crc, const void *address, unsigned int length) { SHASH_DESC_ON_STACK(shash, tfm); - u32 *ctx = (u32 *)shash_desc_ctx(shash); + u32 ret, *ctx = (u32 *)shash_desc_ctx(shash); int err; shash->tfm = tfm; @@ -53,7 +53,9 @@ u32 crc32c(u32 crc, const void *address, unsigned int length) err = crypto_shash_update(shash, address, length); BUG_ON(err); - return *ctx; + ret = *ctx; + barrier_data(ctx); + return ret; } EXPORT_SYMBOL(crc32c); diff --git a/lib/list_sort.c b/lib/list_sort.c index 3fe401067e20ba81c762fb00ae00730de6b9502f..9e9acc37652f2814542438f65d20ea9b3aab6539 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -1,6 +1,3 @@ - -#define pr_fmt(fmt) "list_sort_test: " fmt - #include #include #include @@ -145,149 +142,3 @@ void list_sort(void *priv, struct list_head *head, merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); } EXPORT_SYMBOL(list_sort); - -#ifdef CONFIG_TEST_LIST_SORT - -#include -#include - -/* - * The pattern of set bits in the list length determines which cases - * are hit in list_sort(). - */ -#define TEST_LIST_LEN (512+128+2) /* not including head */ - -#define TEST_POISON1 0xDEADBEEF -#define TEST_POISON2 0xA324354C - -struct debug_el { - unsigned int poison1; - struct list_head list; - unsigned int poison2; - int value; - unsigned serial; -}; - -/* Array, containing pointers to all elements in the test list */ -static struct debug_el **elts __initdata; - -static int __init check(struct debug_el *ela, struct debug_el *elb) -{ - if (ela->serial >= TEST_LIST_LEN) { - pr_err("error: incorrect serial %d\n", ela->serial); - return -EINVAL; - } - if (elb->serial >= TEST_LIST_LEN) { - pr_err("error: incorrect serial %d\n", elb->serial); - return -EINVAL; - } - if (elts[ela->serial] != ela || elts[elb->serial] != elb) { - pr_err("error: phantom element\n"); - return -EINVAL; - } - if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { - pr_err("error: bad poison: %#x/%#x\n", - ela->poison1, ela->poison2); - return -EINVAL; - } - if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { - pr_err("error: bad poison: %#x/%#x\n", - elb->poison1, elb->poison2); - return -EINVAL; - } - return 0; -} - -static int __init cmp(void *priv, struct list_head *a, struct list_head *b) -{ - struct debug_el *ela, *elb; - - ela = container_of(a, struct debug_el, list); - elb = container_of(b, struct debug_el, list); - - check(ela, elb); - return ela->value - elb->value; -} - -static int __init list_sort_test(void) -{ - int i, count = 1, err = -ENOMEM; - struct debug_el *el; - struct list_head *cur; - LIST_HEAD(head); - - pr_debug("start testing list_sort()\n"); - - elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); - if (!elts) { - pr_err("error: cannot allocate memory\n"); - return err; - } - - for (i = 0; i < TEST_LIST_LEN; i++) { - el = kmalloc(sizeof(*el), GFP_KERNEL); - if (!el) { - pr_err("error: cannot allocate memory\n"); - goto exit; - } - /* force some equivalencies */ - el->value = prandom_u32() % (TEST_LIST_LEN / 3); - el->serial = i; - el->poison1 = TEST_POISON1; - el->poison2 = TEST_POISON2; - elts[i] = el; - list_add_tail(&el->list, &head); - } - - list_sort(NULL, &head, cmp); - - err = -EINVAL; - for (cur = head.next; cur->next != &head; cur = cur->next) { - struct debug_el *el1; - int cmp_result; - - if (cur->next->prev != cur) { - pr_err("error: list is corrupted\n"); - goto exit; - } - - cmp_result = cmp(NULL, cur, cur->next); - if (cmp_result > 0) { - pr_err("error: list is not sorted\n"); - goto exit; - } - - el = container_of(cur, struct debug_el, list); - el1 = container_of(cur->next, struct debug_el, list); - if (cmp_result == 0 && el->serial >= el1->serial) { - pr_err("error: order of equivalent elements not " - "preserved\n"); - goto exit; - } - - if (check(el, el1)) { - pr_err("error: element check failed\n"); - goto exit; - } - count++; - } - if (head.prev != cur) { - pr_err("error: list is corrupted\n"); - goto exit; - } - - - if (count != TEST_LIST_LEN) { - pr_err("error: bad list length %d", count); - goto exit; - } - - err = 0; -exit: - for (i = 0; i < TEST_LIST_LEN; i++) - kfree(elts[i]); - kfree(elts); - return err; -} -late_initcall(list_sort_test); -#endif /* CONFIG_TEST_LIST_SORT */ diff --git a/lib/md5.c b/lib/md5.c deleted file mode 100644 index bb0cd01d356d4263670a06909a2717eb913915a7..0000000000000000000000000000000000000000 --- a/lib/md5.c +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include - -#define F1(x, y, z) (z ^ (x & (y ^ z))) -#define F2(x, y, z) F1(z, x, y) -#define F3(x, y, z) (x ^ y ^ z) -#define F4(x, y, z) (y ^ (x | ~z)) - -#define MD5STEP(f, w, x, y, z, in, s) \ - (w += f(x, y, z) + in, w = (w<>(32-s)) + x) - -void md5_transform(__u32 *hash, __u32 const *in) -{ - u32 a, b, c, d; - - a = hash[0]; - b = hash[1]; - c = hash[2]; - d = hash[3]; - - MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); - MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); - MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); - MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); - MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); - MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); - MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); - MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); - - MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); - MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); - MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); - - MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); - MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); - MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); - - MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); - MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); - MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); - MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); - - hash[0] += a; - hash[1] += b; - hash[2] += c; - hash[3] += d; -} -EXPORT_SYMBOL(md5_transform); diff --git a/lib/nlattr.c b/lib/nlattr.c index b42b8577fc2346045c4d88ede93960bf1cd8f3e7..a7e0b16078dff5035d1494d15f62622fdfe54b66 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -112,6 +112,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the * specified policy. Attributes with a type exceeding maxtype will be @@ -120,20 +121,23 @@ static int validate_nla(const struct nlattr *nla, int maxtype, * Returns 0 on success or a negative error code. */ int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy) + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { const struct nlattr *nla; - int rem, err; + int rem; nla_for_each_attr(nla, head, len, rem) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; + int err = validate_nla(nla, maxtype, policy); + + if (err < 0) { + if (extack) + extack->bad_attr = nla; + return err; + } } - err = 0; -errout: - return err; + return 0; } EXPORT_SYMBOL(nla_validate); @@ -180,7 +184,8 @@ EXPORT_SYMBOL(nla_policy_len); * Returns 0 on success or a negative error code. */ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy) + int len, const struct nla_policy *policy, + struct netlink_ext_ack *extack) { const struct nlattr *nla; int rem, err; @@ -193,8 +198,11 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, if (type > 0 && type <= maxtype) { if (policy) { err = validate_nla(nla, maxtype, policy); - if (err < 0) + if (err < 0) { + if (extack) + extack->bad_attr = nla; goto errout; + } } tb[type] = (struct nlattr *)nla; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9ac959ef4cae972374540f34895465507e656d7b..fe03c6d527611937e196395f83f7348da86f8f06 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -260,6 +260,22 @@ void percpu_ref_switch_to_atomic(struct percpu_ref *ref, spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); } +EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); + +/** + * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode + * @ref: percpu_ref to switch to atomic mode + * + * Schedule switching the ref to atomic mode, and wait for the + * switch to complete. Caller must ensure that no other thread + * will switch back to percpu mode. + */ +void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) +{ + percpu_ref_switch_to_atomic(ref, NULL); + wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); +} +EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); /** * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode @@ -290,6 +306,7 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref) spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); } +EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); /** * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 691a9ad48497b02e3b09304d6565165ef2317b16..898e8799841759ff20f1dc3eb25f277ebb3a8119 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2284,6 +2284,8 @@ static int radix_tree_cpu_dead(unsigned int cpu) void __init radix_tree_init(void) { int ret; + + BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, diff --git a/lib/refcount.c b/lib/refcount.c index aa09ad3c30b0dc37a920c46f0da711f366d29423..9f906783987e2a22813d6fd2a9f395ecccca1aef 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -37,11 +37,29 @@ #include #include +/** + * refcount_add_not_zero - add a value to a refcount unless it is 0 + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ bool refcount_add_not_zero(unsigned int i, refcount_t *r) { - unsigned int old, new, val = atomic_read(&r->refs); + unsigned int new, val = atomic_read(&r->refs); - for (;;) { + do { if (!val) return false; @@ -51,37 +69,54 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) new = val + i; if (new < val) new = UINT_MAX; - old = atomic_cmpxchg_relaxed(&r->refs, val, new); - if (old == val) - break; - val = old; - } + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } -EXPORT_SYMBOL_GPL(refcount_add_not_zero); +EXPORT_SYMBOL(refcount_add_not_zero); +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ void refcount_add(unsigned int i, refcount_t *r) { WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); } -EXPORT_SYMBOL_GPL(refcount_add); +EXPORT_SYMBOL(refcount_add); -/* - * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. +/** + * refcount_inc_not_zero - increment a refcount unless it is 0 + * @r: the refcount to increment + * + * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency * and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise */ bool refcount_inc_not_zero(refcount_t *r) { - unsigned int old, new, val = atomic_read(&r->refs); + unsigned int new, val = atomic_read(&r->refs); - for (;;) { + do { new = val + 1; if (!val) @@ -90,36 +125,57 @@ bool refcount_inc_not_zero(refcount_t *r) if (unlikely(!new)) return true; - old = atomic_cmpxchg_relaxed(&r->refs, val, new); - if (old == val) - break; - - val = old; - } + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } -EXPORT_SYMBOL_GPL(refcount_inc_not_zero); +EXPORT_SYMBOL(refcount_inc_not_zero); -/* - * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. +/** + * refcount_inc - increment a refcount + * @r: the refcount to increment + * + * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. * * Provides no memory ordering, it is assumed the caller already has a - * reference on the object, will WARN when this is not so. + * reference on the object. + * + * Will WARN if the refcount is 0, as this represents a possible use-after-free + * condition. */ void refcount_inc(refcount_t *r) { WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); } -EXPORT_SYMBOL_GPL(refcount_inc); +EXPORT_SYMBOL(refcount_inc); +/** + * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * @i: amount to subtract from the refcount + * @r: the refcount + * + * Similar to atomic_dec_and_test(), but it will WARN, return false and + * ultimately leak on underflow and will fail to decrement when saturated + * at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_dec(), or one of its variants, should instead be used to + * decrement a reference count. + * + * Return: true if the resulting refcount is 0, false otherwise + */ bool refcount_sub_and_test(unsigned int i, refcount_t *r) { - unsigned int old, new, val = atomic_read(&r->refs); + unsigned int new, val = atomic_read(&r->refs); - for (;;) { + do { if (unlikely(val == UINT_MAX)) return false; @@ -129,46 +185,51 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) return false; } - old = atomic_cmpxchg_release(&r->refs, val, new); - if (old == val) - break; - - val = old; - } + } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); return !new; } -EXPORT_SYMBOL_GPL(refcount_sub_and_test); +EXPORT_SYMBOL(refcount_sub_and_test); -/* +/** + * refcount_dec_and_test - decrement a refcount and test if it is 0 + * @r: the refcount + * * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to * decrement when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides a control dependency such that free() must come after. * See the comment on top. + * + * Return: true if the resulting refcount is 0, false otherwise */ bool refcount_dec_and_test(refcount_t *r) { return refcount_sub_and_test(1, r); } -EXPORT_SYMBOL_GPL(refcount_dec_and_test); +EXPORT_SYMBOL(refcount_dec_and_test); -/* +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * * Similar to atomic_dec(), it will WARN on underflow and fail to decrement * when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done * before. */ - void refcount_dec(refcount_t *r) { WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); } -EXPORT_SYMBOL_GPL(refcount_dec); +EXPORT_SYMBOL(refcount_dec); -/* +/** + * refcount_dec_if_one - decrement a refcount if it is 1 + * @r: the refcount + * * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the * success thereof. * @@ -178,24 +239,33 @@ EXPORT_SYMBOL_GPL(refcount_dec); * It can be used like a try-delete operator; this explicit case is provided * and not cmpxchg in generic, because that would allow implementing unsafe * operations. + * + * Return: true if the resulting refcount is 0, false otherwise */ bool refcount_dec_if_one(refcount_t *r) { - return atomic_cmpxchg_release(&r->refs, 1, 0) == 1; + int val = 1; + + return atomic_try_cmpxchg_release(&r->refs, &val, 0); } -EXPORT_SYMBOL_GPL(refcount_dec_if_one); +EXPORT_SYMBOL(refcount_dec_if_one); -/* +/** + * refcount_dec_not_one - decrement a refcount if it is not 1 + * @r: the refcount + * * No atomic_t counterpart, it decrements unless the value is 1, in which case * it will return false. * * Was often done like: atomic_add_unless(&var, -1, 1) + * + * Return: true if the decrement operation was successful, false otherwise */ bool refcount_dec_not_one(refcount_t *r) { - unsigned int old, new, val = atomic_read(&r->refs); + unsigned int new, val = atomic_read(&r->refs); - for (;;) { + do { if (unlikely(val == UINT_MAX)) return true; @@ -208,24 +278,27 @@ bool refcount_dec_not_one(refcount_t *r) return true; } - old = atomic_cmpxchg_release(&r->refs, val, new); - if (old == val) - break; - - val = old; - } + } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); return true; } -EXPORT_SYMBOL_GPL(refcount_dec_not_one); +EXPORT_SYMBOL(refcount_dec_not_one); -/* +/** + * refcount_dec_and_mutex_lock - return holding mutex if able to decrement + * refcount to 0 + * @r: the refcount + * @lock: the mutex to be locked + * * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail * to decrement when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides a control dependency such that free() must come after. * See the comment on top. + * + * Return: true and hold mutex if able to decrement refcount to 0, false + * otherwise */ bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) { @@ -240,15 +313,23 @@ bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) return true; } -EXPORT_SYMBOL_GPL(refcount_dec_and_mutex_lock); +EXPORT_SYMBOL(refcount_dec_and_mutex_lock); -/* +/** + * refcount_dec_and_lock - return holding spinlock if able to decrement + * refcount to 0 + * @r: the refcount + * @lock: the spinlock to be locked + * * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to * decrement when saturated at UINT_MAX. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides a control dependency such that free() must come after. * See the comment on top. + * + * Return: true and hold spinlock if able to decrement refcount to 0, false + * otherwise */ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) { @@ -263,5 +344,5 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) return true; } -EXPORT_SYMBOL_GPL(refcount_dec_and_lock); +EXPORT_SYMBOL(refcount_dec_and_lock); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index f8635fd5744259431305b73599949011d67a78b0..d9e7274a04cd98d14456b1bba9ad2e241eae8895 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -86,16 +86,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, size = min(size, 1U << tbl->nest); if (sizeof(spinlock_t) != 0) { - tbl->locks = NULL; -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE && - gfp == GFP_KERNEL) - tbl->locks = vmalloc(size * sizeof(spinlock_t)); -#endif - if (gfp != GFP_KERNEL) - gfp |= __GFP_NOWARN | __GFP_NORETRY; - - if (!tbl->locks) + if (gfpflags_allow_blocking(gfp)) + tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp); + else tbl->locks = kmalloc_array(size, sizeof(spinlock_t), gfp); if (!tbl->locks) @@ -535,7 +528,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, struct rhash_head *head; int elasticity; - elasticity = ht->elasticity; + elasticity = RHT_ELASTICITY; pprev = rht_bucket_var(tbl, hash); rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *list; @@ -958,35 +951,20 @@ int rhashtable_init(struct rhashtable *ht, if (params->min_size) ht->p.min_size = roundup_pow_of_two(params->min_size); - if (params->max_size) - ht->p.max_size = rounddown_pow_of_two(params->max_size); + /* Cap total entries at 2^31 to avoid nelems overflow. */ + ht->max_elems = 1u << 31; - if (params->insecure_max_entries) - ht->p.insecure_max_entries = - rounddown_pow_of_two(params->insecure_max_entries); - else - ht->p.insecure_max_entries = ht->p.max_size * 2; + if (params->max_size) { + ht->p.max_size = rounddown_pow_of_two(params->max_size); + if (ht->p.max_size < ht->max_elems / 2) + ht->max_elems = ht->p.max_size * 2; + } - ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); if (params->nelem_hint) size = rounded_hashtable_size(&ht->p); - /* The maximum (not average) chain length grows with the - * size of the hash table, at a rate of (log N)/(log log N). - * The value of 16 is selected so that even if the hash - * table grew to 2^32 you would not expect the maximum - * chain length to exceed it unless we are under attack - * (or extremely unlucky). - * - * As this limit is only to detect attacks, we don't need - * to set it to a lower value as you'd need the chain - * length to vastly exceed 16 to have any real effect - * on the system. - */ - if (!params->insecure_elasticity) - ht->elasticity = 16; - if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); else diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 60e800e0b5a0d986ec0e2664dd9e4185aa50328a..80aa8d5463faf9f4c39d5c58bb7efdb927836cdb 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -79,15 +79,15 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) } EXPORT_SYMBOL_GPL(sbitmap_resize); -static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, - bool wrap) +static int __sbitmap_get_word(unsigned long *word, unsigned long depth, + unsigned int hint, bool wrap) { unsigned int orig_hint = hint; int nr; while (1) { - nr = find_next_zero_bit(&word->word, word->depth, hint); - if (unlikely(nr >= word->depth)) { + nr = find_next_zero_bit(word, depth, hint); + if (unlikely(nr >= depth)) { /* * We started with an offset, and we didn't reset the * offset to 0 in a failure case, so start from 0 to @@ -100,11 +100,11 @@ static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, return -1; } - if (!test_and_set_bit(nr, &word->word)) + if (!test_and_set_bit(nr, word)) break; hint = nr + 1; - if (hint >= word->depth - 1) + if (hint >= depth - 1) hint = 0; } @@ -119,7 +119,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) index = SB_NR_TO_INDEX(sb, alloc_hint); for (i = 0; i < sb->map_nr; i++) { - nr = __sbitmap_get_word(&sb->map[index], + nr = __sbitmap_get_word(&sb->map[index].word, + sb->map[index].depth, SB_NR_TO_BIT(sb, alloc_hint), !round_robin); if (nr != -1) { @@ -141,6 +142,37 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) } EXPORT_SYMBOL_GPL(sbitmap_get); +int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, + unsigned long shallow_depth) +{ + unsigned int i, index; + int nr = -1; + + index = SB_NR_TO_INDEX(sb, alloc_hint); + + for (i = 0; i < sb->map_nr; i++) { + nr = __sbitmap_get_word(&sb->map[index].word, + min(sb->map[index].depth, shallow_depth), + SB_NR_TO_BIT(sb, alloc_hint), true); + if (nr != -1) { + nr += index << sb->shift; + break; + } + + /* Jump to next index. */ + index++; + alloc_hint = index << sb->shift; + + if (index >= sb->map_nr) { + index = 0; + alloc_hint = 0; + } + } + + return nr; +} +EXPORT_SYMBOL_GPL(sbitmap_get_shallow); + bool sbitmap_any_bit_set(const struct sbitmap *sb) { unsigned int i; @@ -342,6 +374,35 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq) } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); +int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth) +{ + unsigned int hint, depth; + int nr; + + hint = this_cpu_read(*sbq->alloc_hint); + depth = READ_ONCE(sbq->sb.depth); + if (unlikely(hint >= depth)) { + hint = depth ? prandom_u32() % depth : 0; + this_cpu_write(*sbq->alloc_hint, hint); + } + nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); + + if (nr == -1) { + /* If the map is full, a hint won't do us much good. */ + this_cpu_write(*sbq->alloc_hint, 0); + } else if (nr == hint || unlikely(sbq->round_robin)) { + /* Only update the hint if we used it. */ + hint = nr + 1; + if (hint >= depth - 1) + hint = 0; + this_cpu_write(*sbq->alloc_hint, hint); + } + + return nr; +} +EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); + static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; diff --git a/lib/string.c b/lib/string.c index ed83562a53ae5c57b7c55232cf703d3e4de90c58..1c1fc9187b054ccae0d55f6678a23971de109718 100644 --- a/lib/string.c +++ b/lib/string.c @@ -131,7 +131,7 @@ EXPORT_SYMBOL(strncpy); * @src: Where to copy the string from * @size: size of destination buffer * - * Compatible with *BSD: the result is always a valid + * Compatible with ``*BSD``: the result is always a valid * NUL-terminated string that fits in the buffer (unless, * of course, the buffer size is zero). It does not pad * out the result like strncpy() does. @@ -656,6 +656,32 @@ int match_string(const char * const *array, size_t n, const char *string) } EXPORT_SYMBOL(match_string); +/** + * __sysfs_match_string - matches given string in an array + * @array: array of strings + * @n: number of strings in the array or -1 for NULL terminated arrays + * @str: string to match with + * + * Returns index of @str in the @array or -EINVAL, just like match_string(). + * Uses sysfs_streq instead of strcmp for matching. + */ +int __sysfs_match_string(const char * const *array, size_t n, const char *str) +{ + const char *item; + int index; + + for (index = 0; index < n; index++) { + item = array[index]; + if (!item) + break; + if (sysfs_streq(item, str)) + return index; + } + + return -EINVAL; +} +EXPORT_SYMBOL(__sysfs_match_string); + #ifndef __HAVE_ARCH_MEMSET /** * memset - Fill a region of memory with the given value diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 0362da0b66c352e4cb3eb96748fe2db4955d6b11..be88cbaadde3aeed809b2997e2ef683fba62dba4 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -434,6 +434,41 @@ static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) return 0; } +static int __bpf_fill_stxdw(struct bpf_test *self, int size) +{ + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = BPF_ALU32_IMM(BPF_MOV, R0, 1); + insn[1] = BPF_ST_MEM(size, R10, -40, 42); + + for (i = 2; i < len - 2; i++) + insn[i] = BPF_STX_XADD(size, R10, R0, -40); + + insn[len - 2] = BPF_LDX_MEM(size, R0, R10, -40); + insn[len - 1] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + +static int bpf_fill_stxw(struct bpf_test *self) +{ + return __bpf_fill_stxdw(self, BPF_W); +} + +static int bpf_fill_stxdw(struct bpf_test *self) +{ + return __bpf_fill_stxdw(self, BPF_DW); +} + static struct bpf_test tests[] = { { "TAX", @@ -4302,6 +4337,41 @@ static struct bpf_test tests[] = { { }, { { 0, 0x22 } }, }, + { + "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU64_REG(BPF_MOV, R1, R10), + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_W, R10, -40, 0x10), + BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_ALU64_REG(BPF_MOV, R0, R10), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_W, R10, -40, 0x10), + BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12 } }, + }, + { + "STX_XADD_W: X + 1 + 1 + 1 + ...", + { }, + INTERNAL, + { }, + { { 0, 4134 } }, + .fill_helper = bpf_fill_stxw, + }, { "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", .u.insns_int = { @@ -4315,6 +4385,41 @@ static struct bpf_test tests[] = { { }, { { 0, 0x22 } }, }, + { + "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU64_REG(BPF_MOV, R1, R10), + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_DW, R10, -40, 0x10), + BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_ALU64_REG(BPF_MOV, R0, R10), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + }, + { + "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12), + BPF_ST_MEM(BPF_DW, R10, -40, 0x10), + BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12 } }, + }, + { + "STX_XADD_DW: X + 1 + 1 + 1 + ...", + { }, + INTERNAL, + { }, + { { 0, 4134 } }, + .fill_helper = bpf_fill_stxdw, + }, /* BPF_JMP | BPF_EXIT */ { "JMP_EXIT", @@ -4399,6 +4504,44 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "JMP_JSGE_K: Signed jump: value walk 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -3), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 6), + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 4), + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 2), + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 1), + BPF_EXIT_INSN(), /* bad exit */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* good exit */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, + { + "JMP_JSGE_K: Signed jump: value walk 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -3), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 4), + BPF_ALU64_IMM(BPF_ADD, R1, 2), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 2), + BPF_ALU64_IMM(BPF_ADD, R1, 2), + BPF_JMP_IMM(BPF_JSGE, R1, 0, 1), + BPF_EXIT_INSN(), /* bad exit */ + BPF_ALU32_IMM(BPF_MOV, R0, 1), /* good exit */ + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGT | BPF_K */ { "JMP_JGT_K: if (3 > 2) return 1", @@ -4656,6 +4799,51 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + /* Mainly testing JIT + imm64 here. */ + "JMP_JGE_X: ldimm64 test 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 2), + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xeeeeeeeeU } }, + }, + { + "JMP_JGE_X: ldimm64 test 2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 0), + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffffU } }, + }, + { + "JMP_JGE_X: ldimm64 test 3", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_LD_IMM64(R1, 3), + BPF_LD_IMM64(R2, 2), + BPF_JMP_REG(BPF_JGE, R1, R2, 4), + BPF_LD_IMM64(R0, 0xffffffffffffffffULL), + BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JNE | BPF_X */ { "JMP_JNE_X: if (3 != 2) return 1", diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c new file mode 100644 index 0000000000000000000000000000000000000000..28e817387b04f1c8862c54deefd4e8309f26437e --- /dev/null +++ b/lib/test_list_sort.c @@ -0,0 +1,150 @@ +#define pr_fmt(fmt) "list_sort_test: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* + * The pattern of set bits in the list length determines which cases + * are hit in list_sort(). + */ +#define TEST_LIST_LEN (512+128+2) /* not including head */ + +#define TEST_POISON1 0xDEADBEEF +#define TEST_POISON2 0xA324354C + +struct debug_el { + unsigned int poison1; + struct list_head list; + unsigned int poison2; + int value; + unsigned serial; +}; + +/* Array, containing pointers to all elements in the test list */ +static struct debug_el **elts __initdata; + +static int __init check(struct debug_el *ela, struct debug_el *elb) +{ + if (ela->serial >= TEST_LIST_LEN) { + pr_err("error: incorrect serial %d\n", ela->serial); + return -EINVAL; + } + if (elb->serial >= TEST_LIST_LEN) { + pr_err("error: incorrect serial %d\n", elb->serial); + return -EINVAL; + } + if (elts[ela->serial] != ela || elts[elb->serial] != elb) { + pr_err("error: phantom element\n"); + return -EINVAL; + } + if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { + pr_err("error: bad poison: %#x/%#x\n", + ela->poison1, ela->poison2); + return -EINVAL; + } + if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { + pr_err("error: bad poison: %#x/%#x\n", + elb->poison1, elb->poison2); + return -EINVAL; + } + return 0; +} + +static int __init cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct debug_el *ela, *elb; + + ela = container_of(a, struct debug_el, list); + elb = container_of(b, struct debug_el, list); + + check(ela, elb); + return ela->value - elb->value; +} + +static int __init list_sort_test(void) +{ + int i, count = 1, err = -ENOMEM; + struct debug_el *el; + struct list_head *cur; + LIST_HEAD(head); + + pr_debug("start testing list_sort()\n"); + + elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); + if (!elts) { + pr_err("error: cannot allocate memory\n"); + return err; + } + + for (i = 0; i < TEST_LIST_LEN; i++) { + el = kmalloc(sizeof(*el), GFP_KERNEL); + if (!el) { + pr_err("error: cannot allocate memory\n"); + goto exit; + } + /* force some equivalencies */ + el->value = prandom_u32() % (TEST_LIST_LEN / 3); + el->serial = i; + el->poison1 = TEST_POISON1; + el->poison2 = TEST_POISON2; + elts[i] = el; + list_add_tail(&el->list, &head); + } + + list_sort(NULL, &head, cmp); + + err = -EINVAL; + for (cur = head.next; cur->next != &head; cur = cur->next) { + struct debug_el *el1; + int cmp_result; + + if (cur->next->prev != cur) { + pr_err("error: list is corrupted\n"); + goto exit; + } + + cmp_result = cmp(NULL, cur, cur->next); + if (cmp_result > 0) { + pr_err("error: list is not sorted\n"); + goto exit; + } + + el = container_of(cur, struct debug_el, list); + el1 = container_of(cur->next, struct debug_el, list); + if (cmp_result == 0 && el->serial >= el1->serial) { + pr_err("error: order of equivalent elements not " + "preserved\n"); + goto exit; + } + + if (check(el, el1)) { + pr_err("error: element check failed\n"); + goto exit; + } + count++; + } + if (head.prev != cur) { + pr_err("error: list is corrupted\n"); + goto exit; + } + + + if (count != TEST_LIST_LEN) { + pr_err("error: bad list length %d", count); + goto exit; + } + + err = 0; +exit: + for (i = 0; i < TEST_LIST_LEN; i++) + kfree(elts[i]); + kfree(elts); + return err; +} +module_init(list_sort_test); +MODULE_LICENSE("GPL"); diff --git a/lib/test_sort.c b/lib/test_sort.c index 4db3911db50ace76356b6530f03480955226b79a..d389c1cc2f6cf795783c0572771b54bb64f9b751 100644 --- a/lib/test_sort.c +++ b/lib/test_sort.c @@ -1,11 +1,8 @@ #include #include -#include +#include -/* - * A simple boot-time regression test - * License: GPL - */ +/* a simple boot-time regression test */ #define TEST_LEN 1000 @@ -41,4 +38,6 @@ static int __init test_sort_init(void) kfree(a); return err; } -subsys_initcall(test_sort_init); + +module_init(test_sort_init); +MODULE_LICENSE("GPL"); diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c index 1a8d71a685318ca901090805e98f45e12fdde315..4621db801b233009c096452cbcd137d00574ad77 100644 --- a/lib/test_user_copy.c +++ b/lib/test_user_copy.c @@ -31,7 +31,6 @@ * their capability at compile-time, we just have to opt-out certain archs. */ #if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \ - !defined(CONFIG_AVR32) && \ !defined(CONFIG_BLACKFIN) && \ !defined(CONFIG_M32R) && \ !defined(CONFIG_M68K) && \ diff --git a/lib/usercopy.c b/lib/usercopy.c new file mode 100644 index 0000000000000000000000000000000000000000..1b6010a3beb8528a71676e2fdf0cf19924fd889a --- /dev/null +++ b/lib/usercopy.c @@ -0,0 +1,26 @@ +#include + +/* out-of-line parts */ + +#ifndef INLINE_COPY_FROM_USER +unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned long res = n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = raw_copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +EXPORT_SYMBOL(_copy_from_user); +#endif + +#ifndef INLINE_COPY_TO_USER +unsigned long _copy_to_user(void *to, const void __user *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = raw_copy_to_user(to, from, n); + return n; +} +EXPORT_SYMBOL(_copy_to_user); +#endif diff --git a/lib/vsprintf.c b/lib/vsprintf.c index e3bf4e0f10b568ba3d74674642adeff013cb62c8..2d41de3f98a1c9a0e0883b3d73b6980b0110cff1 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1477,6 +1477,9 @@ int kptr_restrict __read_mostly; * by an extra set of alphanumeric characters that are extended format * specifiers. * + * Please update scripts/checkpatch.pl when adding/removing conversion + * characters. (Search for "check for vsprintf extension"). + * * Right now we handle: * * - 'F' For symbolic function descriptor pointers with offset @@ -1954,13 +1957,13 @@ set_precision(struct printf_spec *spec, int prec) * This function generally follows C99 vsnprintf, but has some * extensions and a few limitations: * - * %n is unsupported - * %p* is handled by pointer() + * - ``%n`` is unsupported + * - ``%p*`` is handled by pointer() * * See pointer() or Documentation/printk-formats.txt for more * extensive description. * - * ** Please update the documentation in both places when making changes ** + * **Please update the documentation in both places when making changes** * * The return value is the number of characters which would * be generated for the given input, excluding the trailing diff --git a/lib/zlib_inflate/inftrees.c b/lib/zlib_inflate/inftrees.c index 3fe6ce5b53e51999b0ad15f97843b93c313cb60f..02894305292663abc7c4ac9b8e1fbaed18396f6b 100644 --- a/lib/zlib_inflate/inftrees.c +++ b/lib/zlib_inflate/inftrees.c @@ -109,7 +109,7 @@ int zlib_inflate_table(codetype type, unsigned short *lens, unsigned codes, *bits = 1; return 0; /* no symbols, but wait for decoding to report error */ } - for (min = 1; min <= MAXBITS; min++) + for (min = 1; min < MAXBITS; min++) if (count[min] != 0) break; if (root < min) root = min; diff --git a/mm/Kconfig b/mm/Kconfig index 9b8fccb969dc0fee5f5e0a623bc5b97a99bbc602..beb7a455915d062f75bce89477961e121f2bca32 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -312,7 +312,6 @@ config NEED_BOUNCE_POOL config NR_QUICK int depends on QUICKLIST - default "2" if AVR32 default "1" config VIRT_TO_BUS diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 79d0fd13b5b3c1a826f472398fd60a1ae1cb5da6..5b0adf1435de7dba61e7d0b34c6a7fd912041fd1 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -42,7 +42,6 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT config PAGE_POISONING bool "Poison pages after freeing" - select PAGE_EXTENSION select PAGE_POISONING_NO_SANITY if HIBERNATION ---help--- Fill the pages with poison patterns after free_pages() and verify diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c6f2a37028c205db8143ebe58677c790c66a0faf..f028a9a472fd9b2c7098bce8fe622fd58ba2f140 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -12,8 +12,6 @@ #include #include -static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); - struct backing_dev_info noop_backing_dev_info = { .name = "noop", .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, @@ -242,6 +240,8 @@ static __init int bdi_class_init(void) } postcore_initcall(bdi_class_init); +static int bdi_init(struct backing_dev_info *bdi); + static int __init default_bdi_init(void) { int err; @@ -294,6 +294,8 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, memset(wb, 0, sizeof(*wb)); + if (wb != &bdi->wb) + bdi_get(bdi); wb->bdi = bdi; wb->last_old_flush = jiffies; INIT_LIST_HEAD(&wb->b_dirty); @@ -314,8 +316,10 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, wb->dirty_sleep = jiffies; wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); - if (!wb->congested) - return -ENOMEM; + if (!wb->congested) { + err = -ENOMEM; + goto out_put_bdi; + } err = fprop_local_init_percpu(&wb->completions, gfp); if (err) @@ -335,9 +339,14 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, fprop_local_destroy_percpu(&wb->completions); out_put_cong: wb_congested_put(wb->congested); +out_put_bdi: + if (wb != &bdi->wb) + bdi_put(bdi); return err; } +static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb); + /* * Remove bdi from the global list and shutdown any threads we have running */ @@ -347,10 +356,18 @@ static void wb_shutdown(struct bdi_writeback *wb) spin_lock_bh(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { spin_unlock_bh(&wb->work_lock); + /* + * Wait for wb shutdown to finish if someone else is just + * running wb_shutdown(). Otherwise we could proceed to wb / + * bdi destruction before wb_shutdown() is finished. + */ + wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); return; } + set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); + cgwb_remove_from_bdi_list(wb); /* * Drain work list and shutdown the delayed_work. !WB_registered * tells wb_workfn() that @wb is dying and its work_list needs to @@ -359,6 +376,12 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); + /* + * Make sure bit gets cleared after shutdown is finished. Matches with + * the barrier provided by test_and_clear_bit() above. + */ + smp_wmb(); + clear_bit(WB_shutting_down, &wb->state); } static void wb_exit(struct bdi_writeback *wb) @@ -372,6 +395,8 @@ static void wb_exit(struct bdi_writeback *wb) fprop_local_destroy_percpu(&wb->completions); wb_congested_put(wb->congested); + if (wb != &wb->bdi->wb) + bdi_put(wb->bdi); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -381,11 +406,9 @@ static void wb_exit(struct bdi_writeback *wb) /* * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree, * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU - * protected. cgwb_release_wait is used to wait for the completion of cgwb - * releases from bdi destruction path. + * protected. */ static DEFINE_SPINLOCK(cgwb_lock); -static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait); /** * wb_congested_get_create - get or create a wb_congested @@ -438,7 +461,7 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) return NULL; atomic_set(&new_congested->refcnt, 0); - new_congested->bdi = bdi; + new_congested->__bdi = bdi; new_congested->blkcg_id = blkcg_id; goto retry; @@ -466,10 +489,10 @@ void wb_congested_put(struct bdi_writeback_congested *congested) } /* bdi might already have been destroyed leaving @congested unlinked */ - if (congested->bdi) { + if (congested->__bdi) { rb_erase(&congested->rb_node, - &congested->bdi->cgwb_congested_tree); - congested->bdi = NULL; + &congested->__bdi->cgwb_congested_tree); + congested->__bdi = NULL; } spin_unlock_irqrestore(&cgwb_lock, flags); @@ -480,11 +503,6 @@ static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); - struct backing_dev_info *bdi = wb->bdi; - - spin_lock_irq(&cgwb_lock); - list_del_rcu(&wb->bdi_node); - spin_unlock_irq(&cgwb_lock); wb_shutdown(wb); @@ -495,9 +513,6 @@ static void cgwb_release_workfn(struct work_struct *work) percpu_ref_exit(&wb->refcnt); wb_exit(wb); kfree_rcu(wb, rcu); - - if (atomic_dec_and_test(&bdi->usage_cnt)) - wake_up_all(&cgwb_release_wait); } static void cgwb_release(struct percpu_ref *refcnt) @@ -517,6 +532,13 @@ static void cgwb_kill(struct bdi_writeback *wb) percpu_ref_kill(&wb->refcnt); } +static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) +{ + spin_lock_irq(&cgwb_lock); + list_del_rcu(&wb->bdi_node); + spin_unlock_irq(&cgwb_lock); +} + static int cgwb_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp) { @@ -580,7 +602,6 @@ static int cgwb_create(struct backing_dev_info *bdi, /* we might have raced another instance of this function */ ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb); if (!ret) { - atomic_inc(&bdi->usage_cnt); list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list); @@ -670,7 +691,6 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; - atomic_set(&bdi->usage_cnt, 1); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { @@ -680,29 +700,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) return ret; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) +static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { struct radix_tree_iter iter; void **slot; + struct bdi_writeback *wb; WARN_ON(test_bit(WB_registered, &bdi->wb.state)); spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); - spin_unlock_irq(&cgwb_lock); - /* - * All cgwb's must be shutdown and released before returning. Drain - * the usage counter to wait for all cgwb's ever created on @bdi. - */ - atomic_dec(&bdi->usage_cnt); - wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt)); - /* - * Grab back our reference so that we hold it when @bdi gets - * re-registered. - */ - atomic_inc(&bdi->usage_cnt); + while (!list_empty(&bdi->wb_list)) { + wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, + bdi_node); + spin_unlock_irq(&cgwb_lock); + wb_shutdown(wb); + spin_lock_irq(&cgwb_lock); + } + spin_unlock_irq(&cgwb_lock); } /** @@ -752,11 +769,18 @@ static void cgwb_bdi_exit(struct backing_dev_info *bdi) rb_entry(rbn, struct bdi_writeback_congested, rb_node); rb_erase(rbn, &bdi->cgwb_congested_tree); - congested->bdi = NULL; /* mark @congested unlinked */ + congested->__bdi = NULL; /* mark @congested unlinked */ } spin_unlock_irq(&cgwb_lock); } +static void cgwb_bdi_register(struct backing_dev_info *bdi) +{ + spin_lock_irq(&cgwb_lock); + list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); + spin_unlock_irq(&cgwb_lock); +} + #else /* CONFIG_CGROUP_WRITEBACK */ static int cgwb_bdi_init(struct backing_dev_info *bdi) @@ -777,16 +801,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) return 0; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } +static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { } static void cgwb_bdi_exit(struct backing_dev_info *bdi) { wb_congested_put(bdi->wb_congested); } +static void cgwb_bdi_register(struct backing_dev_info *bdi) +{ + list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); +} + +static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) +{ + list_del_rcu(&wb->bdi_node); +} + #endif /* CONFIG_CGROUP_WRITEBACK */ -int bdi_init(struct backing_dev_info *bdi) +static int bdi_init(struct backing_dev_info *bdi) { int ret; @@ -802,11 +836,8 @@ int bdi_init(struct backing_dev_info *bdi) ret = cgwb_bdi_init(bdi); - list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list); - return ret; } -EXPORT_SYMBOL(bdi_init); struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) { @@ -823,22 +854,20 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) } return bdi; } +EXPORT_SYMBOL(bdi_alloc_node); -int bdi_register(struct backing_dev_info *bdi, struct device *parent, - const char *fmt, ...) +int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) { - va_list args; struct device *dev; if (bdi->dev) /* The driver needs to use separate queues per device */ return 0; - va_start(args, fmt); - dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); - va_end(args); + dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); if (IS_ERR(dev)) return PTR_ERR(dev); + cgwb_bdi_register(bdi); bdi->dev = dev; bdi_debug_register(bdi, dev_name(dev)); @@ -851,20 +880,25 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, trace_writeback_bdi_register(bdi); return 0; } -EXPORT_SYMBOL(bdi_register); +EXPORT_SYMBOL(bdi_register_va); -int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) +int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...) { - return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); + va_list args; + int ret; + + va_start(args, fmt); + ret = bdi_register_va(bdi, fmt, args); + va_end(args); + return ret; } -EXPORT_SYMBOL(bdi_register_dev); +EXPORT_SYMBOL(bdi_register); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner) { int rc; - rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt), - MINOR(owner->devt)); + rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt)); if (rc) return rc; /* Leaking owner reference... */ @@ -892,7 +926,7 @@ void bdi_unregister(struct backing_dev_info *bdi) /* make sure nobody finds us on the bdi_list anymore */ bdi_remove_from_list(bdi); wb_shutdown(&bdi->wb); - cgwb_bdi_destroy(bdi); + cgwb_bdi_unregister(bdi); if (bdi->dev) { bdi_debug_unregister(bdi); @@ -906,19 +940,16 @@ void bdi_unregister(struct backing_dev_info *bdi) } } -static void bdi_exit(struct backing_dev_info *bdi) -{ - WARN_ON_ONCE(bdi->dev); - wb_exit(&bdi->wb); - cgwb_bdi_exit(bdi); -} - static void release_bdi(struct kref *ref) { struct backing_dev_info *bdi = container_of(ref, struct backing_dev_info, refcnt); - bdi_exit(bdi); + if (test_bit(WB_registered, &bdi->wb.state)) + bdi_unregister(bdi); + WARN_ON_ONCE(bdi->dev); + wb_exit(&bdi->wb); + cgwb_bdi_exit(bdi); kfree(bdi); } @@ -926,38 +957,7 @@ void bdi_put(struct backing_dev_info *bdi) { kref_put(&bdi->refcnt, release_bdi); } - -void bdi_destroy(struct backing_dev_info *bdi) -{ - bdi_unregister(bdi); - bdi_exit(bdi); -} -EXPORT_SYMBOL(bdi_destroy); - -/* - * For use from filesystems to quickly init and register a bdi associated - * with dirty writeback - */ -int bdi_setup_and_register(struct backing_dev_info *bdi, char *name) -{ - int err; - - bdi->name = name; - bdi->capabilities = 0; - err = bdi_init(bdi); - if (err) - return err; - - err = bdi_register(bdi, NULL, "%.28s-%ld", name, - atomic_long_inc_return(&bdi_seq)); - if (err) { - bdi_destroy(bdi); - return err; - } - - return 0; -} -EXPORT_SYMBOL(bdi_setup_and_register); +EXPORT_SYMBOL(bdi_put); static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), diff --git a/mm/cma.c b/mm/cma.c index a6033e3444304c95adb7c392984cc6600684c7ec..978b4a1441ef7120678e1fc7973b38c8ccf4dd5a 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -53,6 +53,11 @@ unsigned long cma_get_size(const struct cma *cma) return cma->count << PAGE_SHIFT; } +const char *cma_get_name(const struct cma *cma) +{ + return cma->name ? cma->name : "(undefined)"; +} + static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, int align_order) { @@ -168,6 +173,7 @@ core_initcall(cma_init_reserved_areas); */ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, + const char *name, struct cma **res_cma) { struct cma *cma; @@ -198,6 +204,13 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * subsystems (like slab allocator) are available. */ cma = &cma_areas[cma_area_count]; + if (name) { + cma->name = name; + } else { + cma->name = kasprintf(GFP_KERNEL, "cma%d\n", cma_area_count); + if (!cma->name) + return -ENOMEM; + } cma->base_pfn = PFN_DOWN(base); cma->count = size >> PAGE_SHIFT; cma->order_per_bit = order_per_bit; @@ -229,7 +242,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, struct cma **res_cma) + bool fixed, const char *name, struct cma **res_cma) { phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start; @@ -335,7 +348,7 @@ int __init cma_declare_contiguous(phys_addr_t base, base = addr; } - ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma); + ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); if (ret) goto err; @@ -491,3 +504,17 @@ bool cma_release(struct cma *cma, const struct page *pages, unsigned int count) return true; } + +int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) +{ + int i; + + for (i = 0; i < cma_area_count; i++) { + int ret = it(&cma_areas[i], data); + + if (ret) + return ret; + } + + return 0; +} diff --git a/mm/cma.h b/mm/cma.h index 17c75a4246c8bbab8b56fe4d562cd85ea670a21f..49861286279d7dfb52b5643b59557aa0f9341d36 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -11,6 +11,7 @@ struct cma { struct hlist_head mem_head; spinlock_t mem_head_lock; #endif + const char *name; }; extern struct cma cma_areas[MAX_CMA_AREAS]; diff --git a/mm/cma_debug.c b/mm/cma_debug.c index ffc0c3d0ae64a610409d85a5ac5704c2a660ab0c..595b757bef72722fb2715afd9ab0cd1536181c9e 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -167,7 +167,7 @@ static void cma_debugfs_add_one(struct cma *cma, int idx) char name[16]; int u32s; - sprintf(name, "cma-%d", idx); + sprintf(name, "cma-%s", cma->name); tmp = debugfs_create_dir(name, cma_debugfs_root); diff --git a/mm/compaction.c b/mm/compaction.c index 81e1eaa2a2cf1bea89767185e9cb9549f2139ca2..613c59e928cb5b07075c197126e01b9cdafff992 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -89,11 +89,6 @@ static void map_pages(struct list_head *list) list_splice(&tmp_list, list); } -static inline bool migrate_async_suitable(int migratetype) -{ - return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; -} - #ifdef CONFIG_COMPACTION int PageMovable(struct page *page) @@ -988,13 +983,26 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, #endif /* CONFIG_COMPACTION || CONFIG_CMA */ #ifdef CONFIG_COMPACTION -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct compact_control *cc, +static bool suitable_migration_source(struct compact_control *cc, struct page *page) { - if (cc->ignore_block_suitable) + int block_mt; + + if ((cc->mode != MIGRATE_ASYNC) || !cc->direct_compaction) return true; + block_mt = get_pageblock_migratetype(page); + + if (cc->migratetype == MIGRATE_MOVABLE) + return is_migrate_movable(block_mt); + else + return block_mt == cc->migratetype; +} + +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct compact_control *cc, + struct page *page) +{ /* If the page is a large free page, then disallow migration */ if (PageBuddy(page)) { /* @@ -1006,8 +1014,11 @@ static bool suitable_migration_target(struct compact_control *cc, return false; } + if (cc->ignore_block_suitable) + return true; + /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(get_pageblock_migratetype(page))) + if (is_migrate_movable(get_pageblock_migratetype(page))) return true; /* Otherwise skip the block */ @@ -1242,8 +1253,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, * Async compaction is optimistic to see if the minimum amount * of work satisfies the allocation. */ - if (cc->mode == MIGRATE_ASYNC && - !migrate_async_suitable(get_pageblock_migratetype(page))) + if (!suitable_migration_source(cc, page)) continue; /* Perform the isolation */ @@ -1276,11 +1286,11 @@ static inline bool is_via_compact_memory(int order) return order == -1; } -static enum compact_result __compact_finished(struct zone *zone, struct compact_control *cc, - const int migratetype) +static enum compact_result __compact_finished(struct zone *zone, + struct compact_control *cc) { unsigned int order; - unsigned long watermark; + const int migratetype = cc->migratetype; if (cc->contended || fatal_signal_pending(current)) return COMPACT_CONTENDED; @@ -1308,12 +1318,16 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_ if (is_via_compact_memory(cc->order)) return COMPACT_CONTINUE; - /* Compaction run is not finished if the watermark is not met */ - watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK]; - - if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx, - cc->alloc_flags)) - return COMPACT_CONTINUE; + if (cc->finishing_block) { + /* + * We have finished the pageblock, but better check again that + * we really succeeded. + */ + if (IS_ALIGNED(cc->migrate_pfn, pageblock_nr_pages)) + cc->finishing_block = false; + else + return COMPACT_CONTINUE; + } /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { @@ -1335,20 +1349,40 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_ * other migratetype buddy lists. */ if (find_suitable_fallback(area, order, migratetype, - true, &can_steal) != -1) - return COMPACT_SUCCESS; + true, &can_steal) != -1) { + + /* movable pages are OK in any pageblock */ + if (migratetype == MIGRATE_MOVABLE) + return COMPACT_SUCCESS; + + /* + * We are stealing for a non-movable allocation. Make + * sure we finish compacting the current pageblock + * first so it is as free as possible and we won't + * have to steal another one soon. This only applies + * to sync compaction, as async compaction operates + * on pageblocks of the same migratetype. + */ + if (cc->mode == MIGRATE_ASYNC || + IS_ALIGNED(cc->migrate_pfn, + pageblock_nr_pages)) { + return COMPACT_SUCCESS; + } + + cc->finishing_block = true; + return COMPACT_CONTINUE; + } } return COMPACT_NO_SUITABLE_PAGE; } static enum compact_result compact_finished(struct zone *zone, - struct compact_control *cc, - const int migratetype) + struct compact_control *cc) { int ret; - ret = __compact_finished(zone, cc, migratetype); + ret = __compact_finished(zone, cc); trace_mm_compaction_finished(zone, cc->order, ret); if (ret == COMPACT_NO_SUITABLE_PAGE) ret = COMPACT_CONTINUE; @@ -1481,9 +1515,9 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro enum compact_result ret; unsigned long start_pfn = zone->zone_start_pfn; unsigned long end_pfn = zone_end_pfn(zone); - const int migratetype = gfpflags_to_migratetype(cc->gfp_mask); const bool sync = cc->mode != MIGRATE_ASYNC; + cc->migratetype = gfpflags_to_migratetype(cc->gfp_mask); ret = compaction_suitable(zone, cc->order, cc->alloc_flags, cc->classzone_idx); /* Compaction is likely to fail */ @@ -1533,8 +1567,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro migrate_prep_local(); - while ((ret = compact_finished(zone, cc, migratetype)) == - COMPACT_CONTINUE) { + while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) { int err; switch (isolate_migratepages(zone, cc)) { diff --git a/mm/filemap.c b/mm/filemap.c index 1694623a628902b76e11d6983eebd1ea7d33ddf1..6f1be573a5e60fbb0c4a6cbcf85df9b32b0fc673 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -519,7 +519,7 @@ EXPORT_SYMBOL(filemap_write_and_wait); * * Write out and wait upon file offsets lstart->lend, inclusive. * - * Note that `lend' is inclusive (describes the last byte to be written) so + * Note that @lend is inclusive (describes the last byte to be written) so * that this function can be used to write to the very end-of-file (end = -1). */ int filemap_write_and_wait_range(struct address_space *mapping, @@ -1277,12 +1277,14 @@ EXPORT_SYMBOL(find_lock_entry); * * PCG flags modify how the page is returned. * - * FGP_ACCESSED: the page will be marked accessed - * FGP_LOCK: Page is return locked - * FGP_CREAT: If page is not present then a new page is allocated using - * @gfp_mask and added to the page cache and the VM's LRU - * list. The page is returned locked and with an increased - * refcount. Otherwise, %NULL is returned. + * @fgp_flags can be: + * + * - FGP_ACCESSED: the page will be marked accessed + * - FGP_LOCK: Page is return locked + * - FGP_CREAT: If page is not present then a new page is allocated using + * @gfp_mask and added to the page cache and the VM's LRU + * list. The page is returned locked and with an increased + * refcount. Otherwise, NULL is returned. * * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even * if the GFP flags specified for FGP_CREAT are atomic. @@ -2033,7 +2035,6 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - struct iov_iter data = *iter; loff_t size; size = i_size_read(inode); @@ -2044,11 +2045,12 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) file_accessed(file); - retval = mapping->a_ops->direct_IO(iocb, &data); + retval = mapping->a_ops->direct_IO(iocb, iter); if (retval >= 0) { iocb->ki_pos += retval; - iov_iter_advance(iter, retval); + count -= retval; } + iov_iter_revert(iter, count - iov_iter_count(iter)); /* * Btrfs can have a short DIO read if we encounter @@ -2059,7 +2061,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) * the rest of the read. Buffered reads will not work for * DAX files, so don't bother trying. */ - if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size || + if (retval < 0 || !count || iocb->ki_pos >= size || IS_DAX(inode)) goto out; } @@ -2202,12 +2204,12 @@ int filemap_fault(struct vm_fault *vmf) struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; pgoff_t offset = vmf->pgoff; + pgoff_t max_off; struct page *page; - loff_t size; int ret = 0; - size = round_up(i_size_read(inode), PAGE_SIZE); - if (offset >= size >> PAGE_SHIFT) + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) return VM_FAULT_SIGBUS; /* @@ -2256,8 +2258,8 @@ int filemap_fault(struct vm_fault *vmf) * Found the page and have a reference on it. * We must recheck i_size under page lock. */ - size = round_up(i_size_read(inode), PAGE_SIZE); - if (unlikely(offset >= size >> PAGE_SHIFT)) { + max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + if (unlikely(offset >= max_off)) { unlock_page(page); put_page(page); return VM_FAULT_SIGBUS; @@ -2323,7 +2325,7 @@ void filemap_map_pages(struct vm_fault *vmf, struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; pgoff_t last_pgoff = start_pgoff; - loff_t size; + unsigned long max_idx; struct page *head, *page; rcu_read_lock(); @@ -2369,8 +2371,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->mapping != mapping || !PageUptodate(page)) goto unlock; - size = round_up(i_size_read(mapping->host), PAGE_SIZE); - if (page->index >= size >> PAGE_SHIFT) + max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); + if (page->index >= max_idx) goto unlock; if (file->f_ra.mmap_miss > 0) @@ -2704,7 +2706,6 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) ssize_t written; size_t write_len; pgoff_t end; - struct iov_iter data; write_len = iov_iter_count(from); end = (pos + write_len - 1) >> PAGE_SHIFT; @@ -2719,22 +2720,19 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) * about to write. We do this *before* the write so that we can return * without clobbering -EIOCBQUEUED from ->direct_IO(). */ - if (mapping->nrpages) { - written = invalidate_inode_pages2_range(mapping, + written = invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end); - /* - * If a page can not be invalidated, return 0 to fall back - * to buffered write. - */ - if (written) { - if (written == -EBUSY) - return 0; - goto out; - } + /* + * If a page can not be invalidated, return 0 to fall back + * to buffered write. + */ + if (written) { + if (written == -EBUSY) + return 0; + goto out; } - data = *from; - written = mapping->a_ops->direct_IO(iocb, &data); + written = mapping->a_ops->direct_IO(iocb, from); /* * Finally, try again to invalidate clean pages which might have been @@ -2744,20 +2742,19 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) * so we don't support it 100%. If this invalidation * fails, tough, the write still worked... */ - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); if (written > 0) { pos += written; - iov_iter_advance(from, written); + write_len -= written; if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { i_size_write(inode, pos); mark_inode_dirty(inode); } iocb->ki_pos = pos; } + iov_iter_revert(from, write_len - iov_iter_count(from)); out: return written; } @@ -2794,12 +2791,6 @@ ssize_t generic_perform_write(struct file *file, ssize_t written = 0; unsigned int flags = 0; - /* - * Copies from kernel address space cannot fail (NFSD is a big user). - */ - if (!iter_is_iovec(i)) - flags |= AOP_FLAG_UNINTERRUPTIBLE; - do { struct page *page; unsigned long offset; /* Offset into pagecache page */ @@ -3001,7 +2992,7 @@ EXPORT_SYMBOL(generic_file_write_iter); * @gfp_mask: memory allocation flags (and I/O mode) * * The address_space is to try to release any data against the page - * (presumably at page->private). If the release was successful, return `1'. + * (presumably at page->private). If the release was successful, return '1'. * Otherwise return zero. * * This may also be called if PG_fscache is set on a page, indicating that the diff --git a/mm/frame_vector.c b/mm/frame_vector.c index db77dcb38afda3d3720a228e14236fb2e324f929..72ebec18629c2529fc6d2b6faeb46040adcac611 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -200,10 +200,7 @@ struct frame_vector *frame_vector_create(unsigned int nr_frames) * Avoid higher order allocations, use vmalloc instead. It should * be rare anyway. */ - if (size <= PAGE_SIZE) - vec = kmalloc(size, GFP_KERNEL); - else - vec = vmalloc(size); + vec = kvmalloc(size, GFP_KERNEL); if (!vec) return NULL; vec->nr_allocated = nr_frames; diff --git a/mm/gup.c b/mm/gup.c index 04aa405350dce8656db4293a34e95e9bfbe166d8..576c4df588823ab948c4a5d0dd12b6d54b94b430 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, /* mlock all present pages, but do not fault in new pages */ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) return -ENOENT; - /* For mm_populate(), just skip the stack guard page. */ - if ((*flags & FOLL_POPULATE) && - (stack_guard_page_start(vma, address) || - stack_guard_page_end(vma, address + PAGE_SIZE))) - return -ENOENT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) @@ -407,12 +402,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, ret = handle_mm_fault(vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, *flags); + + if (err) + return err; BUG(); } @@ -723,12 +716,10 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, ret = handle_mm_fault(vma, address, fault_flags); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return -EHWPOISON; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, 0); + + if (err) + return err; BUG(); } @@ -1189,34 +1180,57 @@ struct page *get_dump_page(unsigned long addr) */ #ifdef CONFIG_HAVE_GENERIC_RCU_GUP +#ifndef gup_get_pte +/* + * We assume that the PTE can be read atomically. If this is not the case for + * your architecture, please provide the helper. + */ +static inline pte_t gup_get_pte(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + +static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) +{ + while ((*nr) - nr_start) { + struct page *page = pages[--(*nr)]; + + ClearPageReferenced(page); + put_page(page); + } +} + #ifdef __HAVE_ARCH_PTE_SPECIAL static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { + struct dev_pagemap *pgmap = NULL; + int nr_start = *nr, ret = 0; pte_t *ptep, *ptem; - int ret = 0; ptem = ptep = pte_offset_map(&pmd, addr); do { - /* - * In the line below we are assuming that the pte can be read - * atomically. If this is not the case for your architecture, - * please wrap this in a helper function! - * - * for an example see gup_get_pte in arch/x86/mm/gup.c - */ - pte_t pte = READ_ONCE(*ptep); + pte_t pte = gup_get_pte(ptep); struct page *head, *page; /* * Similar to the PMD case below, NUMA hinting must take slow * path using the pte_protnone check. */ - if (!pte_present(pte) || pte_special(pte) || - pte_protnone(pte) || (write && !pte_write(pte))) + if (pte_protnone(pte)) + goto pte_unmap; + + if (!pte_access_permitted(pte, write)) goto pte_unmap; - if (!arch_pte_access_permitted(pte, write)) + if (pte_devmap(pte)) { + pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + goto pte_unmap; + } + } else if (pte_special(pte)) goto pte_unmap; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); @@ -1232,6 +1246,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } VM_BUG_ON_PAGE(compound_head(page) != head, page); + + put_dev_pagemap(pgmap); + SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -1261,15 +1278,76 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } #endif /* __HAVE_ARCH_PTE_SPECIAL */ +#ifdef __HAVE_ARCH_PTE_DEVMAP +static int __gup_device_huge(unsigned long pfn, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + int nr_start = *nr; + struct dev_pagemap *pgmap = NULL; + + do { + struct page *page = pfn_to_page(pfn); + + pgmap = get_dev_pagemap(pfn, pgmap); + if (unlikely(!pgmap)) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + SetPageReferenced(page); + pages[*nr] = page; + get_page(page); + put_dev_pagemap(pgmap); + (*nr)++; + pfn++; + } while (addr += PAGE_SIZE, addr != end); + return 1; +} + +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} +#else +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + BUILD_BUG(); + return 0; +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + BUILD_BUG(); + return 0; +} +#endif + static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct page *head, *page; int refs; - if (write && !pmd_write(orig)) + if (!pmd_access_permitted(orig, write)) return 0; + if (pmd_devmap(orig)) + return __gup_device_huge_pmd(orig, addr, end, pages, nr); + refs = 0; head = pmd_page(orig); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); @@ -1293,6 +1371,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } + SetPageReferenced(head); return 1; } @@ -1302,9 +1381,12 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, struct page *head, *page; int refs; - if (write && !pud_write(orig)) + if (!pud_access_permitted(orig, write)) return 0; + if (pud_devmap(orig)) + return __gup_device_huge_pud(orig, addr, end, pages, nr); + refs = 0; head = pud_page(orig); page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); @@ -1328,6 +1410,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } + SetPageReferenced(head); return 1; } @@ -1338,9 +1421,10 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, int refs; struct page *head, *page; - if (write && !pgd_write(orig)) + if (!pgd_access_permitted(orig, write)) return 0; + BUILD_BUG_ON(pgd_devmap(orig)); refs = 0; head = pgd_page(orig); page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); @@ -1364,6 +1448,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, return 0; } + SetPageReferenced(head); return 1; } @@ -1481,7 +1566,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) + (void __user *)start, len))) return 0; /* @@ -1520,6 +1605,21 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, return nr; } +#ifndef gup_fast_permitted +/* + * Check if it's allowed to use __get_user_pages_fast() for the range, or + * we need to fall back to the slow version: + */ +bool gup_fast_permitted(unsigned long start, int nr_pages, int write) +{ + unsigned long len, end; + + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + return end >= start; +} +#endif + /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address @@ -1539,11 +1639,14 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - int nr, ret; + int nr = 0, ret = 0; start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; + + if (gup_fast_permitted(start, nr_pages, write)) { + nr = __get_user_pages_fast(start, nr_pages, write, pages); + ret = nr; + } if (nr < nr_pages) { /* Try to get the remaining pages with get_user_pages */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f3c4f9d22821f889104340332eee93c5e124df4d..88c6167f194db0ec07d707329569c2d4a9d34876 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -715,7 +715,8 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf) } static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write) + pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write, + pgtable_t pgtable) { struct mm_struct *mm = vma->vm_mm; pmd_t entry; @@ -729,6 +730,12 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, entry = pmd_mkyoung(pmd_mkdirty(entry)); entry = maybe_pmd_mkwrite(entry, vma); } + + if (pgtable) { + pgtable_trans_huge_deposit(mm, pmd, pgtable); + atomic_long_inc(&mm->nr_ptes); + } + set_pmd_at(mm, addr, pmd, entry); update_mmu_cache_pmd(vma, addr, pmd); spin_unlock(ptl); @@ -738,6 +745,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, pfn_t pfn, bool write) { pgprot_t pgprot = vma->vm_page_prot; + pgtable_t pgtable = NULL; /* * If we had pmd_special, we could avoid all these restrictions, * but we need to be consistent with PTEs and architectures that @@ -752,9 +760,15 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return VM_FAULT_SIGBUS; + if (arch_needs_pgtable_deposit()) { + pgtable = pte_alloc_one(vma->vm_mm, addr); + if (!pgtable) + return VM_FAULT_OOM; + } + track_pfn_insert(vma, &pgprot, pfn); - insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write); + insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable); return VM_FAULT_NOPAGE; } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); @@ -1412,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) */ if (unlikely(pmd_trans_migrating(*vmf->pmd))) { page = pmd_page(*vmf->pmd); + if (!get_page_unless_zero(page)) + goto out_unlock; spin_unlock(vmf->ptl); wait_on_page_locked(page); + put_page(page); goto out; } @@ -1445,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) /* Migration could have started since the pmd_trans_migrating check */ if (!page_locked) { + page_nid = -1; + if (!get_page_unless_zero(page)) + goto out_unlock; spin_unlock(vmf->ptl); wait_on_page_locked(page); - page_nid = -1; + put_page(page); goto out; } @@ -1564,9 +1584,6 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ClearPageDirty(page); unlock_page(page); - if (PageActive(page)) - deactivate_page(page); - if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) { pmdp_invalidate(vma, addr, pmd); orig_pmd = pmd_mkold(orig_pmd); @@ -1575,6 +1592,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, set_pmd_at(mm, addr, pmd, orig_pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); } + + mark_page_lazyfree(page); ret = true; out: spin_unlock(ptl); @@ -1612,12 +1631,13 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); if (vma_is_dax(vma)) { + if (arch_needs_pgtable_deposit()) + zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); if (is_huge_zero_pmd(orig_pmd)) tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE); } else if (is_huge_zero_pmd(orig_pmd)) { - pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); - atomic_long_dec(&tlb->mm->nr_ptes); + zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE); } else { @@ -1626,10 +1646,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); VM_BUG_ON_PAGE(!PageHead(page), page); if (PageAnon(page)) { - pgtable_t pgtable; - pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); - pte_free(tlb->mm, pgtable); - atomic_long_dec(&tlb->mm->nr_ptes); + zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); } else { if (arch_needs_pgtable_deposit()) @@ -2145,15 +2162,15 @@ static void freeze_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; - int ret; + bool unmap_success; VM_BUG_ON_PAGE(!PageHead(page), page); if (PageAnon(page)) ttu_flags |= TTU_MIGRATION; - ret = try_to_unmap(page, ttu_flags); - VM_BUG_ON_PAGE(ret, page); + unmap_success = try_to_unmap(page, ttu_flags); + VM_BUG_ON_PAGE(!unmap_success, page); } static void unfreeze_page(struct page *page) @@ -2399,7 +2416,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) VM_BUG_ON_PAGE(is_huge_zero_page(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); if (PageAnon(head)) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e5828875f7bbd7a770d5c23334a0e3994ffe544f..3eedb187e5496f36f7f3186267f475254bcda5ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { + int err = vm_fault_to_errno(ret, flags); + + if (err) + return err; + remainder = 0; break; } diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 9d26fd9fefe4a1f4ec78279455c3b27f4ddbb875..356df057a2a8d3752b9e058ce74fe7b48142e02b 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -34,8 +34,7 @@ static int hwpoison_inject(void *data, u64 val) if (!hwpoison_filter_enable) goto inject; - if (!PageLRU(hpage) && !PageHuge(p)) - shake_page(hpage, 0); + shake_page(hpage, 0); /* * This implies unable to support non-LRU pages. */ diff --git a/mm/internal.h b/mm/internal.h index 266efaeaa370a46debcc5b6b614a72e33833ac4d..0e4f558412fb195c0b2bd997fbc0ac1b9c1e4566 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -80,12 +80,17 @@ static inline void set_page_refcounted(struct page *page) extern unsigned long highest_memmap_pfn; +/* + * Maximum number of reclaim retries without progress before the OOM + * killer is consider the only way forward. + */ +#define MAX_RECLAIM_RETRIES 16 + /* * in mm/vmscan.c: */ extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); -extern bool pgdat_reclaimable(struct pglist_data *pgdat); /* * in mm/rmap.c: @@ -178,6 +183,7 @@ extern int user_min_free_kbytes; struct compact_control { struct list_head freepages; /* List of free pages to migrate to */ struct list_head migratepages; /* List of pages being migrated */ + struct zone *zone; unsigned long nr_freepages; /* Number of isolated free pages */ unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long total_migrate_scanned; @@ -185,17 +191,18 @@ struct compact_control { unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ unsigned long last_migrated_pfn;/* Not yet flushed page being freed */ + const gfp_t gfp_mask; /* gfp mask of a direct compactor */ + int order; /* order a direct compactor needs */ + int migratetype; /* migratetype of direct compactor */ + const unsigned int alloc_flags; /* alloc flags of a direct compactor */ + const int classzone_idx; /* zone index of a direct compactor */ enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ bool whole_zone; /* Whole zone should/has been scanned */ - int order; /* order a direct compactor needs */ - const gfp_t gfp_mask; /* gfp mask of a direct compactor */ - const unsigned int alloc_flags; /* alloc flags of a direct compactor */ - const int classzone_idx; /* zone index of a direct compactor */ - struct zone *zone; bool contended; /* Signal lock or sched contention */ + bool finishing_block; /* Finishing current pageblock */ }; unsigned long @@ -505,4 +512,14 @@ extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags vmaflag_names[]; extern const struct trace_print_flags gfpflag_names[]; +static inline bool is_migrate_highatomic(enum migratetype migratetype) +{ + return migratetype == MIGRATE_HIGHATOMIC; +} + +static inline bool is_migrate_highatomic_page(struct page *page) +{ + return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC; +} + #endif /* __MM_INTERNAL_H */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 98b27195e38b07fc1b6e20c1f9c49db9bc112303..c81549d5c8330f59bec68165127dff1d3aab85bd 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size, *size += sizeof(struct kasan_alloc_meta); /* Add free meta. */ - if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor || + if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor || cache->object_size < sizeof(struct kasan_free_meta)) { cache->kasan_info.free_meta_offset = *size; *size += sizeof(struct kasan_free_meta); @@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return; kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); @@ -572,12 +572,13 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) s8 shadow_byte; /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); if (shadow_byte < 0 || shadow_byte >= KASAN_SHADOW_SCALE_SIZE) { - kasan_report_double_free(cache, object, shadow_byte); + kasan_report_double_free(cache, object, + __builtin_return_address(1)); return true; } @@ -690,7 +691,7 @@ int kasan_module_alloc(void *addr, size_t size) ret = __vmalloc_node_range(shadow_size, 1, shadow_start, shadow_start + shadow_size, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index dd2dea8eb0771a506c0b510efc79c3fc5253bda5..1229298cce646ddc725c4f1ea9d823a0c71e3cd1 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -99,7 +99,7 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_double_free(struct kmem_cache *cache, void *object, - s8 shadow); + void *ip); #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index ab42a0803f161c6834b1362aefd5ded1990eb04f..beee0e980e2dd3385b007fbcb119f599207dacc0 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -51,7 +51,13 @@ static const void *find_first_bad_addr(const void *addr, size_t size) return first_bad_addr; } -static void print_error_description(struct kasan_access_info *info) +static bool addr_has_shadow(struct kasan_access_info *info) +{ + return (info->access_addr >= + kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +} + +static const char *get_shadow_bug_type(struct kasan_access_info *info) { const char *bug_type = "unknown-crash"; u8 *shadow_addr; @@ -98,12 +104,39 @@ static void print_error_description(struct kasan_access_info *info) break; } - pr_err("BUG: KASAN: %s in %pS at addr %p\n", - bug_type, (void *)info->ip, - info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, task_pid_nr(current)); + return bug_type; +} + +const char *get_wild_bug_type(struct kasan_access_info *info) +{ + const char *bug_type = "unknown-crash"; + + if ((unsigned long)info->access_addr < PAGE_SIZE) + bug_type = "null-ptr-deref"; + else if ((unsigned long)info->access_addr < TASK_SIZE) + bug_type = "user-memory-access"; + else + bug_type = "wild-memory-access"; + + return bug_type; +} + +static const char *get_bug_type(struct kasan_access_info *info) +{ + if (addr_has_shadow(info)) + return get_shadow_bug_type(info); + return get_wild_bug_type(info); +} + +static void print_error_description(struct kasan_access_info *info) +{ + const char *bug_type = get_bug_type(info); + + pr_err("BUG: KASAN: %s in %pS\n", + bug_type, (void *)info->ip); + pr_err("%s of size %zu at addr %p by task %s/%d\n", + info->is_write ? "Write" : "Read", info->access_size, + info->access_addr, current->comm, task_pid_nr(current)); } static inline bool kernel_or_module_addr(const void *addr) @@ -144,9 +177,9 @@ static void kasan_end_report(unsigned long *flags) kasan_enable_current(); } -static void print_track(struct kasan_track *track) +static void print_track(struct kasan_track *track, const char *prefix) { - pr_err("PID = %u\n", track->pid); + pr_err("%s by task %u:\n", prefix, track->pid); if (track->stack) { struct stack_trace trace; @@ -157,59 +190,84 @@ static void print_track(struct kasan_track *track) } } -static void kasan_object_err(struct kmem_cache *cache, void *object) +static struct page *addr_to_page(const void *addr) { - struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); + if ((addr >= (void *)PAGE_OFFSET) && + (addr < high_memory)) + return virt_to_head_page(addr); + return NULL; +} - dump_stack(); - pr_err("Object at %p, in cache %s size: %d\n", object, cache->name, - cache->object_size); +static void describe_object_addr(struct kmem_cache *cache, void *object, + const void *addr) +{ + unsigned long access_addr = (unsigned long)addr; + unsigned long object_addr = (unsigned long)object; + const char *rel_type; + int rel_bytes; - if (!(cache->flags & SLAB_KASAN)) + pr_err("The buggy address belongs to the object at %p\n" + " which belongs to the cache %s of size %d\n", + object, cache->name, cache->object_size); + + if (!addr) return; - pr_err("Allocated:\n"); - print_track(&alloc_info->alloc_track); - pr_err("Freed:\n"); - print_track(&alloc_info->free_track); + if (access_addr < object_addr) { + rel_type = "to the left"; + rel_bytes = object_addr - access_addr; + } else if (access_addr >= object_addr + cache->object_size) { + rel_type = "to the right"; + rel_bytes = access_addr - (object_addr + cache->object_size); + } else { + rel_type = "inside"; + rel_bytes = access_addr - object_addr; + } + + pr_err("The buggy address is located %d bytes %s of\n" + " %d-byte region [%p, %p)\n", + rel_bytes, rel_type, cache->object_size, (void *)object_addr, + (void *)(object_addr + cache->object_size)); } -void kasan_report_double_free(struct kmem_cache *cache, void *object, - s8 shadow) +static void describe_object(struct kmem_cache *cache, void *object, + const void *addr) { - unsigned long flags; + struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object); - kasan_start_report(&flags); - pr_err("BUG: Double free or freeing an invalid pointer\n"); - pr_err("Unexpected shadow byte: 0x%hhX\n", shadow); - kasan_object_err(cache, object); - kasan_end_report(&flags); + if (cache->flags & SLAB_KASAN) { + print_track(&alloc_info->alloc_track, "Allocated"); + pr_err("\n"); + print_track(&alloc_info->free_track, "Freed"); + pr_err("\n"); + } + + describe_object_addr(cache, object, addr); } -static void print_address_description(struct kasan_access_info *info) +static void print_address_description(void *addr) { - const void *addr = info->access_addr; + struct page *page = addr_to_page(addr); - if ((addr >= (void *)PAGE_OFFSET) && - (addr < high_memory)) { - struct page *page = virt_to_head_page(addr); - - if (PageSlab(page)) { - void *object; - struct kmem_cache *cache = page->slab_cache; - object = nearest_obj(cache, page, - (void *)info->access_addr); - kasan_object_err(cache, object); - return; - } - dump_page(page, "kasan: bad access detected"); + dump_stack(); + pr_err("\n"); + + if (page && PageSlab(page)) { + struct kmem_cache *cache = page->slab_cache; + void *object = nearest_obj(cache, page, addr); + + describe_object(cache, object, addr); } - if (kernel_or_module_addr(addr)) { - if (!init_task_stack_addr(addr)) - pr_err("Address belongs to variable %pS\n", addr); + if (kernel_or_module_addr(addr) && !init_task_stack_addr(addr)) { + pr_err("The buggy address belongs to the variable:\n"); + pr_err(" %pS\n", addr); + } + + if (page) { + pr_err("The buggy address belongs to the page:\n"); + dump_page(page, "kasan: bad access detected"); } - dump_stack(); } static bool row_is_guilty(const void *row, const void *guilty) @@ -264,31 +322,34 @@ static void print_shadow_for_address(const void *addr) } } +void kasan_report_double_free(struct kmem_cache *cache, void *object, + void *ip) +{ + unsigned long flags; + + kasan_start_report(&flags); + pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", ip); + pr_err("\n"); + print_address_description(object); + pr_err("\n"); + print_shadow_for_address(object); + kasan_end_report(&flags); +} + static void kasan_report_error(struct kasan_access_info *info) { unsigned long flags; - const char *bug_type; kasan_start_report(&flags); - if (info->access_addr < - kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) { - if ((unsigned long)info->access_addr < PAGE_SIZE) - bug_type = "null-ptr-deref"; - else if ((unsigned long)info->access_addr < TASK_SIZE) - bug_type = "user-memory-access"; - else - bug_type = "wild-memory-access"; - pr_err("BUG: KASAN: %s on address %p\n", - bug_type, info->access_addr); - pr_err("%s of size %zu by task %s/%d\n", - info->is_write ? "Write" : "Read", - info->access_size, current->comm, - task_pid_nr(current)); + print_error_description(info); + pr_err("\n"); + + if (!addr_has_shadow(info)) { dump_stack(); } else { - print_error_description(info); - print_address_description(info); + print_address_description((void *)info->access_addr); + pr_err("\n"); print_shadow_for_address(info->first_bad_addr); } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index ba40b7f673f4dd44af403c7ed33860c6e2094046..945fd1ca49b5af0bc3b87dbfe8098f0f602775a2 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -483,8 +483,7 @@ void __khugepaged_exit(struct mm_struct *mm) static void release_pte_page(struct page *page) { - /* 0 stands for page_is_file_cache(page) == false */ - dec_node_page_state(page, NR_ISOLATED_ANON + 0); + dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); unlock_page(page); putback_lru_page(page); } @@ -532,7 +531,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, VM_BUG_ON_PAGE(PageCompound(page), page); VM_BUG_ON_PAGE(!PageAnon(page), page); - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); /* * We can do it before isolate_lru_page because the @@ -550,7 +548,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, * The page must only be referenced by the scanned process * and page swap cache. */ - if (page_count(page) != 1 + !!PageSwapCache(page)) { + if (page_count(page) != 1 + PageSwapCache(page)) { unlock_page(page); result = SCAN_PAGE_COUNT; goto out; @@ -579,8 +577,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, result = SCAN_DEL_PAGE_LRU; goto out; } - /* 0 stands for page_is_file_cache(page) == false */ - inc_node_page_state(page, NR_ISOLATED_ANON + 0); + inc_node_page_state(page, + NR_ISOLATED_ANON + page_is_file_cache(page)); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageLRU(page), page); @@ -614,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spinlock_t *ptl) { pte_t *_pte; - for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) { + for (_pte = pte; _pte < pte + HPAGE_PMD_NR; + _pte++, page++, address += PAGE_SIZE) { pte_t pteval = *_pte; struct page *src_page; @@ -653,9 +652,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, spin_unlock(ptl); free_page_and_swap_cache(src_page); } - - address += PAGE_SIZE; - page++; + cond_resched(); } } @@ -909,8 +906,10 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, return false; } /* check if the pmd is still valid */ - if (mm_find_pmd(mm, address) != pmd) + if (mm_find_pmd(mm, address) != pmd) { + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; + } } if (ret & VM_FAULT_ERROR) { trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); @@ -1183,7 +1182,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, * The page must only be referenced by the scanned process * and page swap cache. */ - if (page_count(page) != 1 + !!PageSwapCache(page)) { + if (page_count(page) != 1 + PageSwapCache(page)) { result = SCAN_PAGE_COUNT; goto out_unmap; } diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index 5bf191756a4a07b04ffe1dd792f475e1e0af0492..2d5959c5f7c50469ca3d59da9c62c6c2dd932917 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) { /* TODO: RCU freeing is unsupported for now; hide false positives. */ - if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) + if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) kmemcheck_mark_freed(object, size); } diff --git a/mm/ksm.c b/mm/ksm.c index 19b4f2dea7a591793ff8e18b6eeeafdc5df1de30..216184af0e192b5405efc5a594129fa7c53ae953 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, goto out; if (PageTransCompound(page)) { - err = split_huge_page(page); - if (err) + if (split_huge_page(page)) goto out_unlock; } @@ -1933,11 +1932,10 @@ struct page *ksm_might_need_to_copy(struct page *page, return new_page; } -int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) +void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) { struct stable_node *stable_node; struct rmap_item *rmap_item; - int ret = SWAP_AGAIN; int search_new_forks = 0; VM_BUG_ON_PAGE(!PageKsm(page), page); @@ -1950,7 +1948,7 @@ int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) stable_node = page_stable_node(page); if (!stable_node) - return ret; + return; again: hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; @@ -1978,23 +1976,20 @@ int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; - ret = rwc->rmap_one(page, vma, - rmap_item->address, rwc->arg); - if (ret != SWAP_AGAIN) { + if (!rwc->rmap_one(page, vma, + rmap_item->address, rwc->arg)) { anon_vma_unlock_read(anon_vma); - goto out; + return; } if (rwc->done && rwc->done(page)) { anon_vma_unlock_read(anon_vma); - goto out; + return; } } anon_vma_unlock_read(anon_vma); } if (!search_new_forks++) goto again; -out: - return ret; } #ifdef CONFIG_MIGRATION diff --git a/mm/madvise.c b/mm/madvise.c index 7a2abf0127aef7a9d4879278293d8cab766133e1..25b78ee4fc2c77addde06a22580e02bf05bb3b51 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -411,10 +411,9 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, ptent = pte_mkold(ptent); ptent = pte_mkclean(ptent); set_pte_at(mm, addr, pte, ptent); - if (PageActive(page)) - deactivate_page(page); tlb_remove_tlb_entry(tlb, pte, addr); } + mark_page_lazyfree(page); } out: if (nr_swap) { @@ -606,34 +605,40 @@ static long madvise_remove(struct vm_area_struct *vma, /* * Error injection support for memory error handling. */ -static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end) +static int madvise_inject_error(int behavior, + unsigned long start, unsigned long end) { - struct page *p; + struct page *page; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + for (; start < end; start += PAGE_SIZE << - compound_order(compound_head(p))) { + compound_order(compound_head(page))) { int ret; - ret = get_user_pages_fast(start, 1, 0, &p); + ret = get_user_pages_fast(start, 1, 0, &page); if (ret != 1) return ret; - if (PageHWPoison(p)) { - put_page(p); + if (PageHWPoison(page)) { + put_page(page); continue; } - if (bhv == MADV_SOFT_OFFLINE) { - pr_info("Soft offlining page %#lx at %#lx\n", - page_to_pfn(p), start); - ret = soft_offline_page(p, MF_COUNT_INCREASED); + + if (behavior == MADV_SOFT_OFFLINE) { + pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n", + page_to_pfn(page), start); + + ret = soft_offline_page(page, MF_COUNT_INCREASED); if (ret) return ret; continue; } - pr_info("Injecting memory failure for page %#lx at %#lx\n", - page_to_pfn(p), start); - ret = memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED); + pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", + page_to_pfn(page), start); + + ret = memory_failure(page_to_pfn(page), 0, MF_COUNT_INCREASED); if (ret) return ret; } @@ -651,13 +656,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, case MADV_WILLNEED: return madvise_willneed(vma, prev, start, end); case MADV_FREE: - /* - * XXX: In this implementation, MADV_FREE works like - * MADV_DONTNEED on swapless system or full swap. - */ - if (get_nr_swap_pages() > 0) - return madvise_free(vma, prev, start, end); - /* passthrough */ + return madvise_free(vma, prev, start, end); case MADV_DONTNEED: return madvise_dontneed(vma, prev, start, end); default: @@ -688,6 +687,10 @@ madvise_behavior_valid(int behavior) #endif case MADV_DONTDUMP: case MADV_DODUMP: +#ifdef CONFIG_MEMORY_FAILURE + case MADV_SOFT_OFFLINE: + case MADV_HWPOISON: +#endif return true; default: @@ -761,10 +764,6 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) size_t len; struct blk_plug plug; -#ifdef CONFIG_MEMORY_FAILURE - if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) - return madvise_hwpoison(behavior, start, start+len_in); -#endif if (!madvise_behavior_valid(behavior)) return error; @@ -784,6 +783,11 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) if (end == start) return error; +#ifdef CONFIG_MEMORY_FAILURE + if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) + return madvise_inject_error(behavior, start, start + len_in); +#endif + write = madvise_need_mmap_write(behavior); if (write) { if (down_write_killable(¤t->mm->mmap_sem)) diff --git a/mm/memblock.c b/mm/memblock.c index 696f06d17c4e89b676f19c3c3a5a4c1908697caf..7b8a5db76a2fec7331f09048f3c19ebdfddf9f16 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -804,6 +804,18 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); } +/** + * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return 0 on success, -errno on failure. + */ +int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); +} + /** * __next_reserved_mem_region - next function for for_each_reserved_region() * @idx: pointer to u64 loop variable @@ -1531,11 +1543,37 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit) (phys_addr_t)ULLONG_MAX); } +void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size) +{ + int start_rgn, end_rgn; + int i, ret; + + if (!size) + return; + + ret = memblock_isolate_range(&memblock.memory, base, size, + &start_rgn, &end_rgn); + if (ret) + return; + + /* remove all the MAP regions */ + for (i = memblock.memory.cnt - 1; i >= end_rgn; i--) + if (!memblock_is_nomap(&memblock.memory.regions[i])) + memblock_remove_region(&memblock.memory, i); + + for (i = start_rgn - 1; i >= 0; i--) + if (!memblock_is_nomap(&memblock.memory.regions[i])) + memblock_remove_region(&memblock.memory, i); + + /* truncate the reserved regions */ + memblock_remove_range(&memblock.reserved, 0, base); + memblock_remove_range(&memblock.reserved, + base + size, (phys_addr_t)ULLONG_MAX); +} + void __init memblock_mem_limit_remove_map(phys_addr_t limit) { - struct memblock_type *type = &memblock.memory; phys_addr_t max_addr; - int i, ret, start_rgn, end_rgn; if (!limit) return; @@ -1546,19 +1584,7 @@ void __init memblock_mem_limit_remove_map(phys_addr_t limit) if (max_addr == (phys_addr_t)ULLONG_MAX) return; - ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX, - &start_rgn, &end_rgn); - if (ret) - return; - - /* remove all the MAP regions above the limit */ - for (i = end_rgn - 1; i >= start_rgn; i--) { - if (!memblock_is_nomap(&type->regions[i])) - memblock_remove_region(type, i); - } - /* truncate the reserved regions */ - memblock_remove_range(&memblock.reserved, max_addr, - (phys_addr_t)ULLONG_MAX); + memblock_cap_memory_range(0, max_addr); } static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) @@ -1713,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type) } } +extern unsigned long __init_memblock +memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) +{ + struct memblock_region *rgn; + unsigned long size = 0; + int idx; + + for_each_memblock_type((&memblock.reserved), rgn) { + phys_addr_t start, end; + + if (rgn->base + rgn->size < start_addr) + continue; + if (rgn->base > end_addr) + continue; + + start = rgn->base; + end = start + rgn->size; + size += end - start; + } + + return size; +} + void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2bd7541d7c11231431c060ca6cfe84a89f096fe3..94172089f52fce369c57ce8d3e783a9480d522f2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -100,24 +100,7 @@ static bool do_memsw_account(void) return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && do_swap_account; } -static const char * const mem_cgroup_stat_names[] = { - "cache", - "rss", - "rss_huge", - "mapped_file", - "dirty", - "writeback", - "swap", -}; - -static const char * const mem_cgroup_events_names[] = { - "pgpgin", - "pgpgout", - "pgfault", - "pgmajfault", -}; - -static const char * const mem_cgroup_lru_names[] = { +static const char *const mem_cgroup_lru_names[] = { "inactive_anon", "active_anon", "inactive_file", @@ -568,32 +551,15 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) * common workload, threshold and synchronization as vmstat[] should be * implemented. */ -static unsigned long -mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) -{ - long val = 0; - int cpu; - - /* Per-cpu values can be negative, use a signed accumulator */ - for_each_possible_cpu(cpu) - val += per_cpu(memcg->stat->count[idx], cpu); - /* - * Summing races with updates, so val may be negative. Avoid exposing - * transient negative values. - */ - if (val < 0) - val = 0; - return val; -} -static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx) +static unsigned long memcg_sum_events(struct mem_cgroup *memcg, + enum memcg_event_item event) { unsigned long val = 0; int cpu; for_each_possible_cpu(cpu) - val += per_cpu(memcg->stat->events[idx], cpu); + val += per_cpu(memcg->stat->events[event], cpu); return val; } @@ -606,23 +572,23 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, * counted as CACHE even if it's on ANON LRU. */ if (PageAnon(page)) - __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], - nr_pages); - else - __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], - nr_pages); + __this_cpu_add(memcg->stat->count[MEMCG_RSS], nr_pages); + else { + __this_cpu_add(memcg->stat->count[MEMCG_CACHE], nr_pages); + if (PageSwapBacked(page)) + __this_cpu_add(memcg->stat->count[NR_SHMEM], nr_pages); + } if (compound) { VM_BUG_ON_PAGE(!PageTransHuge(page), page); - __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], - nr_pages); + __this_cpu_add(memcg->stat->count[MEMCG_RSS_HUGE], nr_pages); } /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) - __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); + __this_cpu_inc(memcg->stat->events[PGPGIN]); else { - __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); + __this_cpu_inc(memcg->stat->events[PGPGOUT]); nr_pages = -nr_pages; /* for event */ } @@ -1144,6 +1110,28 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) return false; } +unsigned int memcg1_stats[] = { + MEMCG_CACHE, + MEMCG_RSS, + MEMCG_RSS_HUGE, + NR_SHMEM, + NR_FILE_MAPPED, + NR_FILE_DIRTY, + NR_WRITEBACK, + MEMCG_SWAP, +}; + +static const char *const memcg1_stat_names[] = { + "cache", + "rss", + "rss_huge", + "shmem", + "mapped_file", + "dirty", + "writeback", + "swap", +}; + #define K(x) ((x) << (PAGE_SHIFT-10)) /** * mem_cgroup_print_oom_info: Print OOM information relevant to memory controller. @@ -1188,11 +1176,11 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) pr_cont_cgroup_path(iter->css.cgroup); pr_cont(":"); - for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { - if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) + for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { + if (memcg1_stats[i] == MEMCG_SWAP && !do_swap_account) continue; - pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], - K(mem_cgroup_read_stat(iter, i))); + pr_cont(" %s:%luKB", memcg1_stat_names[i], + K(memcg_page_state(iter, memcg1_stats[i]))); } for (i = 0; i < NR_LRU_LISTS; i++) @@ -1837,7 +1825,7 @@ static void reclaim_high(struct mem_cgroup *memcg, do { if (page_counter_read(&memcg->memory) <= memcg->high) continue; - mem_cgroup_events(memcg, MEMCG_HIGH, 1); + mem_cgroup_event(memcg, MEMCG_HIGH); try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); } while ((memcg = parent_mem_cgroup(memcg))); } @@ -1928,7 +1916,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (!gfpflags_allow_blocking(gfp_mask)) goto nomem; - mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); + mem_cgroup_event(mem_over_limit, MEMCG_MAX); nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, gfp_mask, may_swap); @@ -1971,7 +1959,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (fatal_signal_pending(current)) goto force; - mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1); + mem_cgroup_event(mem_over_limit, MEMCG_OOM); mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages * PAGE_SIZE)); @@ -2381,7 +2369,7 @@ void mem_cgroup_split_huge_fixup(struct page *head) for (i = 1; i < HPAGE_PMD_NR; i++) head[i].mem_cgroup = head->mem_cgroup; - __this_cpu_sub(head->mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE], + __this_cpu_sub(head->mem_cgroup->stat->count[MEMCG_RSS_HUGE], HPAGE_PMD_NR); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -2391,7 +2379,7 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, bool charge) { int val = (charge) ? 1 : -1; - this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val); + this_cpu_add(memcg->stat->count[MEMCG_SWAP], val); } /** @@ -2725,7 +2713,7 @@ static void tree_stat(struct mem_cgroup *memcg, unsigned long *stat) for_each_mem_cgroup_tree(iter, memcg) { for (i = 0; i < MEMCG_NR_STAT; i++) - stat[i] += mem_cgroup_read_stat(iter, i); + stat[i] += memcg_page_state(iter, i); } } @@ -2738,7 +2726,7 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events) for_each_mem_cgroup_tree(iter, memcg) { for (i = 0; i < MEMCG_NR_EVENTS; i++) - events[i] += mem_cgroup_read_events(iter, i); + events[i] += memcg_sum_events(iter, i); } } @@ -2750,13 +2738,10 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) struct mem_cgroup *iter; for_each_mem_cgroup_tree(iter, memcg) { - val += mem_cgroup_read_stat(iter, - MEM_CGROUP_STAT_CACHE); - val += mem_cgroup_read_stat(iter, - MEM_CGROUP_STAT_RSS); + val += memcg_page_state(iter, MEMCG_CACHE); + val += memcg_page_state(iter, MEMCG_RSS); if (swap) - val += mem_cgroup_read_stat(iter, - MEM_CGROUP_STAT_SWAP); + val += memcg_page_state(iter, MEMCG_SWAP); } } else { if (!swap) @@ -3131,6 +3116,21 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) } #endif /* CONFIG_NUMA */ +/* Universal VM events cgroup1 shows, original sort order */ +unsigned int memcg1_events[] = { + PGPGIN, + PGPGOUT, + PGFAULT, + PGMAJFAULT, +}; + +static const char *const memcg1_event_names[] = { + "pgpgin", + "pgpgout", + "pgfault", + "pgmajfault", +}; + static int memcg_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); @@ -3138,22 +3138,20 @@ static int memcg_stat_show(struct seq_file *m, void *v) struct mem_cgroup *mi; unsigned int i; - BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_stat_names) != - MEM_CGROUP_STAT_NSTATS); - BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_events_names) != - MEM_CGROUP_EVENTS_NSTATS); + BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); - for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { - if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account()) + for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { + if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; - seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], - mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); + seq_printf(m, "%s %lu\n", memcg1_stat_names[i], + memcg_page_state(memcg, memcg1_stats[i]) * + PAGE_SIZE); } - for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) - seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i], - mem_cgroup_read_events(memcg, i)); + for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) + seq_printf(m, "%s %lu\n", memcg1_event_names[i], + memcg_sum_events(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], @@ -3171,23 +3169,23 @@ static int memcg_stat_show(struct seq_file *m, void *v) seq_printf(m, "hierarchical_memsw_limit %llu\n", (u64)memsw * PAGE_SIZE); - for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { + for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { unsigned long long val = 0; - if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account()) + if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; for_each_mem_cgroup_tree(mi, memcg) - val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; - seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val); + val += memcg_page_state(mi, memcg1_stats[i]) * + PAGE_SIZE; + seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], val); } - for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { + for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) { unsigned long long val = 0; for_each_mem_cgroup_tree(mi, memcg) - val += mem_cgroup_read_events(mi, i); - seq_printf(m, "total_%s %llu\n", - mem_cgroup_events_names[i], val); + val += memcg_sum_events(mi, memcg1_events[i]); + seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], val); } for (i = 0; i < NR_LRU_LISTS; i++) { @@ -3652,10 +3650,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - *pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY); + *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); /* this should eventually include NR_UNSTABLE_NFS */ - *pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); + *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | (1 << LRU_ACTIVE_FILE)); *pheadroom = PAGE_COUNTER_MAX; @@ -4511,33 +4509,29 @@ static int mem_cgroup_move_account(struct page *page, spin_lock_irqsave(&from->move_lock, flags); if (!anon && page_mapped(page)) { - __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], - nr_pages); - __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], - nr_pages); + __this_cpu_sub(from->stat->count[NR_FILE_MAPPED], nr_pages); + __this_cpu_add(to->stat->count[NR_FILE_MAPPED], nr_pages); } /* * move_lock grabbed above and caller set from->moving_account, so - * mem_cgroup_update_page_stat() will serialize updates to PageDirty. + * mod_memcg_page_state will serialize updates to PageDirty. * So mapping should be stable for dirty pages. */ if (!anon && PageDirty(page)) { struct address_space *mapping = page_mapping(page); if (mapping_cap_account_dirty(mapping)) { - __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_DIRTY], + __this_cpu_sub(from->stat->count[NR_FILE_DIRTY], nr_pages); - __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_DIRTY], + __this_cpu_add(to->stat->count[NR_FILE_DIRTY], nr_pages); } } if (PageWriteback(page)) { - __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK], - nr_pages); - __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK], - nr_pages); + __this_cpu_sub(from->stat->count[NR_WRITEBACK], nr_pages); + __this_cpu_add(to->stat->count[NR_WRITEBACK], nr_pages); } /* @@ -5154,7 +5148,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, continue; } - mem_cgroup_events(memcg, MEMCG_OOM, 1); + mem_cgroup_event(memcg, MEMCG_OOM); if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) break; } @@ -5167,10 +5161,10 @@ static int memory_events_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); - seq_printf(m, "low %lu\n", mem_cgroup_read_events(memcg, MEMCG_LOW)); - seq_printf(m, "high %lu\n", mem_cgroup_read_events(memcg, MEMCG_HIGH)); - seq_printf(m, "max %lu\n", mem_cgroup_read_events(memcg, MEMCG_MAX)); - seq_printf(m, "oom %lu\n", mem_cgroup_read_events(memcg, MEMCG_OOM)); + seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW)); + seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH)); + seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX)); + seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM)); return 0; } @@ -5197,9 +5191,9 @@ static int memory_stat_show(struct seq_file *m, void *v) tree_events(memcg, events); seq_printf(m, "anon %llu\n", - (u64)stat[MEM_CGROUP_STAT_RSS] * PAGE_SIZE); + (u64)stat[MEMCG_RSS] * PAGE_SIZE); seq_printf(m, "file %llu\n", - (u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE); + (u64)stat[MEMCG_CACHE] * PAGE_SIZE); seq_printf(m, "kernel_stack %llu\n", (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024); seq_printf(m, "slab %llu\n", @@ -5208,12 +5202,14 @@ static int memory_stat_show(struct seq_file *m, void *v) seq_printf(m, "sock %llu\n", (u64)stat[MEMCG_SOCK] * PAGE_SIZE); + seq_printf(m, "shmem %llu\n", + (u64)stat[NR_SHMEM] * PAGE_SIZE); seq_printf(m, "file_mapped %llu\n", - (u64)stat[MEM_CGROUP_STAT_FILE_MAPPED] * PAGE_SIZE); + (u64)stat[NR_FILE_MAPPED] * PAGE_SIZE); seq_printf(m, "file_dirty %llu\n", - (u64)stat[MEM_CGROUP_STAT_DIRTY] * PAGE_SIZE); + (u64)stat[NR_FILE_DIRTY] * PAGE_SIZE); seq_printf(m, "file_writeback %llu\n", - (u64)stat[MEM_CGROUP_STAT_WRITEBACK] * PAGE_SIZE); + (u64)stat[NR_WRITEBACK] * PAGE_SIZE); for (i = 0; i < NR_LRU_LISTS; i++) { struct mem_cgroup *mi; @@ -5232,10 +5228,15 @@ static int memory_stat_show(struct seq_file *m, void *v) /* Accumulated memory events */ - seq_printf(m, "pgfault %lu\n", - events[MEM_CGROUP_EVENTS_PGFAULT]); - seq_printf(m, "pgmajfault %lu\n", - events[MEM_CGROUP_EVENTS_PGMAJFAULT]); + seq_printf(m, "pgfault %lu\n", events[PGFAULT]); + seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]); + + seq_printf(m, "workingset_refault %lu\n", + stat[WORKINGSET_REFAULT]); + seq_printf(m, "workingset_activate %lu\n", + stat[WORKINGSET_ACTIVATE]); + seq_printf(m, "workingset_nodereclaim %lu\n", + stat[WORKINGSET_NODERECLAIM]); return 0; } @@ -5476,8 +5477,8 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, unsigned long nr_anon, unsigned long nr_file, - unsigned long nr_huge, unsigned long nr_kmem, - struct page *dummy_page) + unsigned long nr_kmem, unsigned long nr_huge, + unsigned long nr_shmem, struct page *dummy_page) { unsigned long nr_pages = nr_anon + nr_file + nr_kmem; unsigned long flags; @@ -5492,10 +5493,11 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, } local_irq_save(flags); - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); - __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); - __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); + __this_cpu_sub(memcg->stat->count[MEMCG_RSS], nr_anon); + __this_cpu_sub(memcg->stat->count[MEMCG_CACHE], nr_file); + __this_cpu_sub(memcg->stat->count[MEMCG_RSS_HUGE], nr_huge); + __this_cpu_sub(memcg->stat->count[NR_SHMEM], nr_shmem); + __this_cpu_add(memcg->stat->events[PGPGOUT], pgpgout); __this_cpu_add(memcg->stat->nr_page_events, nr_pages); memcg_check_events(memcg, dummy_page); local_irq_restore(flags); @@ -5507,6 +5509,7 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, static void uncharge_list(struct list_head *page_list) { struct mem_cgroup *memcg = NULL; + unsigned long nr_shmem = 0; unsigned long nr_anon = 0; unsigned long nr_file = 0; unsigned long nr_huge = 0; @@ -5525,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list) next = page->lru.next; VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); if (!page->mem_cgroup) continue; @@ -5539,9 +5542,9 @@ static void uncharge_list(struct list_head *page_list) if (memcg != page->mem_cgroup) { if (memcg) { uncharge_batch(memcg, pgpgout, nr_anon, nr_file, - nr_huge, nr_kmem, page); - pgpgout = nr_anon = nr_file = - nr_huge = nr_kmem = 0; + nr_kmem, nr_huge, nr_shmem, page); + pgpgout = nr_anon = nr_file = nr_kmem = 0; + nr_huge = nr_shmem = 0; } memcg = page->mem_cgroup; } @@ -5555,8 +5558,11 @@ static void uncharge_list(struct list_head *page_list) } if (PageAnon(page)) nr_anon += nr_pages; - else + else { nr_file += nr_pages; + if (PageSwapBacked(page)) + nr_shmem += nr_pages; + } pgpgout++; } else { nr_kmem += 1 << compound_order(page); @@ -5568,7 +5574,7 @@ static void uncharge_list(struct list_head *page_list) if (memcg) uncharge_batch(memcg, pgpgout, nr_anon, nr_file, - nr_huge, nr_kmem, page); + nr_kmem, nr_huge, nr_shmem, page); } /** diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 27f7210e7fabd1441d699d328213f95302c79378..ecc183fd94f36f35e91b69c6a06930bb456a7e86 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -220,6 +220,9 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, */ void shake_page(struct page *p, int access) { + if (PageHuge(p)) + return; + if (!PageSlab(p)) { lru_add_drain_all(); if (PageLRU(p)) @@ -322,7 +325,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, * wrong earlier. */ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, - int fail, struct page *page, unsigned long pfn, + bool fail, struct page *page, unsigned long pfn, int flags) { struct to_kill *tk, *next; @@ -536,6 +539,13 @@ static int delete_from_lru_cache(struct page *p) */ ClearPageActive(p); ClearPageUnevictable(p); + + /* + * Poisoned page might never drop its ref count to 0 so we have + * to uncharge it manually from its memcg. + */ + mem_cgroup_uncharge(p); + /* * drop the page count elevated by isolate_lru_page() */ @@ -904,35 +914,36 @@ EXPORT_SYMBOL_GPL(get_hwpoison_page); * Do all that is necessary to remove user space mappings. Unmap * the pages and send SIGBUS to the processes if the data was dirty. */ -static int hwpoison_user_mappings(struct page *p, unsigned long pfn, +static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, int trapno, int flags, struct page **hpagep) { - enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; + enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; struct address_space *mapping; LIST_HEAD(tokill); - int ret; + bool unmap_success; int kill = 1, forcekill; struct page *hpage = *hpagep; + bool mlocked = PageMlocked(hpage); /* * Here we are interested only in user-mapped pages, so skip any * other types of pages. */ if (PageReserved(p) || PageSlab(p)) - return SWAP_SUCCESS; + return true; if (!(PageLRU(hpage) || PageHuge(p))) - return SWAP_SUCCESS; + return true; /* * This check implies we don't kill processes if their pages * are in the swap cache early. Those are always late kills. */ if (!page_mapped(hpage)) - return SWAP_SUCCESS; + return true; if (PageKsm(p)) { pr_err("Memory failure: %#lx: can't handle KSM pages.\n", pfn); - return SWAP_FAIL; + return false; } if (PageSwapCache(p)) { @@ -971,11 +982,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, if (kill) collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); - ret = try_to_unmap(hpage, ttu); - if (ret != SWAP_SUCCESS) + unmap_success = try_to_unmap(hpage, ttu); + if (!unmap_success) pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n", pfn, page_mapcount(hpage)); + /* + * try_to_unmap() might put mlocked page in lru cache, so call + * shake_page() again to ensure that it's flushed. + */ + if (mlocked) + shake_page(hpage, 0); + /* * Now that the dirty bit has been propagated to the * struct page and all unmaps done we can decide if @@ -987,10 +1005,9 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * any accesses to the poisoned memory. */ forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL); - kill_procs(&tokill, forcekill, trapno, - ret != SWAP_SUCCESS, p, pfn, flags); + kill_procs(&tokill, forcekill, trapno, !unmap_success, p, pfn, flags); - return ret; + return unmap_success; } static void set_page_hwpoison_huge_page(struct page *hpage) @@ -1138,22 +1155,14 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * The check (unnecessarily) ignores LRU pages being isolated and * walked by the page reclaim code, however that's not a big loss. */ - if (!PageHuge(p)) { - if (!PageLRU(p)) - shake_page(p, 0); - if (!PageLRU(p)) { - /* - * shake_page could have turned it free. - */ - if (is_free_buddy_page(p)) { - if (flags & MF_COUNT_INCREASED) - action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); - else - action_result(pfn, MF_MSG_BUDDY_2ND, - MF_DELAYED); - return 0; - } - } + shake_page(p, 0); + /* shake_page could have turned it free. */ + if (!PageLRU(p) && is_free_buddy_page(p)) { + if (flags & MF_COUNT_INCREASED) + action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); + else + action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED); + return 0; } lock_page(hpage); @@ -1175,7 +1184,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * page_remove_rmap() in try_to_unmap_one(). So to determine page status * correctly, we save a copy of the page flags at this time. */ - page_flags = p->flags; + if (PageHuge(p)) + page_flags = hpage->flags; + else + page_flags = p->flags; /* * unpoison always clear PG_hwpoison inside page lock @@ -1230,8 +1242,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * When the raw error page is thp tail page, hpage points to the raw * page after thp split. */ - if (hwpoison_user_mappings(p, pfn, trapno, flags, &hpage) - != SWAP_SUCCESS) { + if (!hwpoison_user_mappings(p, pfn, trapno, flags, &hpage)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; goto out; @@ -1543,8 +1554,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) if (ret == 1 && !PageLRU(page)) { /* Drop page reference which is from __get_any_page() */ put_hwpoison_page(page); - pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", - pfn, page->flags); + pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n", + pfn, page->flags, &page->flags); return -EIO; } } @@ -1585,14 +1596,10 @@ static int soft_offline_huge_page(struct page *page, int flags) ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { - pr_info("soft offline: %#lx: migration failed %d, type %lx\n", - pfn, ret, page->flags); - /* - * We know that soft_offline_huge_page() tries to migrate - * only one hugepage pointed to by hpage, so we need not - * run through the pagelist here. - */ - putback_active_hugepage(hpage); + pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", + pfn, ret, page->flags, &page->flags); + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); if (ret > 0) ret = -EIO; } else { @@ -1677,14 +1684,14 @@ static int __soft_offline_page(struct page *page, int flags) if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); - pr_info("soft offline: %#lx: migration failed %d, type %lx\n", - pfn, ret, page->flags); + pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", + pfn, ret, page->flags, &page->flags); if (ret > 0) ret = -EIO; } } else { - pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", - pfn, ret, page_count(page), page->flags); + pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx (%pGp)\n", + pfn, ret, page_count(page), page->flags, &page->flags); } return ret; } diff --git a/mm/memory.c b/mm/memory.c index 235ba51b2fbf07ffeeeb6b70d6522b4b0addb3de..bb11c474857e5ff2a3756724ca30139ff41ebbab 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2854,40 +2854,6 @@ int do_swap_page(struct vm_fault *vmf) return ret; } -/* - * This is like a special single-page "expand_{down|up}wards()", - * except we must first make sure that 'address{-|+}PAGE_SIZE' - * doesn't hit another vma. - */ -static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { - struct vm_area_struct *prev = vma->vm_prev; - - /* - * Is there a mapping abutting this one below? - * - * That's only ok if it's the same stack mapping - * that has gotten split.. - */ - if (prev && prev->vm_end == address) - return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - - return expand_downwards(vma, address - PAGE_SIZE); - } - if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { - struct vm_area_struct *next = vma->vm_next; - - /* As VM_GROWSDOWN but s/below/above/ */ - if (next && next->vm_start == address + PAGE_SIZE) - return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - - return expand_upwards(vma, address + PAGE_SIZE); - } - return 0; -} - /* * We enter with non-exclusive mmap_sem (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf) if (vma->vm_flags & VM_SHARED) return VM_FAULT_SIGBUS; - /* Check if we need to add a guard page to the stack */ - if (check_stack_guard_page(vma, vmf->address) < 0) - return VM_FAULT_SIGSEGV; - /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created @@ -3029,6 +2991,17 @@ static int __do_fault(struct vm_fault *vmf) return ret; } +/* + * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. + * If we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static int pmd_devmap_trans_unstable(pmd_t *pmd) +{ + return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} + static int pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3052,18 +3025,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf) map_pte: /* * If a huge pmd materialized under us just retry later. Use - * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd - * didn't become pmd_trans_huge under us and then back to pmd_none, as - * a result of MADV_DONTNEED running immediately after a huge pmd fault - * in a different thread of this mm, in turn leading to a misleading - * pmd_trans_huge() retval. All we have to ensure is that it is a - * regular pmd that we can walk with pte_offset_map() and we can do that - * through an atomic read in C, which is what pmd_trans_unstable() - * provides. + * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of + * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge + * under us and then back to pmd_none, as a result of MADV_DONTNEED + * running immediately after a huge pmd fault in a different thread of + * this mm, in turn leading to a misleading pmd_trans_huge() retval. + * All we have to ensure is that it is a regular pmd that we can walk + * with pte_offset_map() and we can do that through an atomic read in + * C, which is what pmd_trans_unstable() provides. */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; + /* + * At this point we know that our vmf->pmd points to a page of ptes + * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() + * for the duration of the fault. If a racing MADV_DONTNEED runs and + * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still + * be valid and we will re-check to make sure the vmf->pte isn't + * pte_none() under vmf->ptl protection when we return to + * alloc_set_pte(). + */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); return 0; @@ -3690,7 +3672,7 @@ static int handle_pte_fault(struct vm_fault *vmf) vmf->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge @@ -4298,7 +4280,7 @@ void __might_fault(const char *file, int line) * get paged out, therefore we'll never actually fault, and the * below annotations will generate false positives. */ - if (segment_eq(get_fs(), KERNEL_DS)) + if (uaccess_kernel()) return; if (pagefault_disabled()) return; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6fa7208bcd564ec8fb6bcf25e206aef9bd724ecb..b63d7d1239df22714da632e191ce4d03049daeb0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1208,7 +1208,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) arch_refresh_nodedata(nid, pgdat); } else { - /* Reset the nr_zones, order and classzone_idx before reuse */ + /* + * Reset the nr_zones, order and classzone_idx before reuse. + * Note that kswapd will init kswapd_classzone_idx properly + * when it starts in the near future. + */ pgdat->nr_zones = 0; pgdat->kswapd_order = 0; pgdat->kswapd_classzone_idx = 0; diff --git a/mm/migrate.c b/mm/migrate.c index ed97c2c14fa80b47ffbf7fa22ec6d4b9b57202b1..89a0a1707f4c67deb77dc466cb6f5c2ecfe19051 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -184,9 +184,9 @@ void putback_movable_pages(struct list_head *l) unlock_page(page); put_page(page); } else { - putback_lru_page(page); dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); + putback_lru_page(page); } } } @@ -194,7 +194,7 @@ void putback_movable_pages(struct list_head *l) /* * Restore a potential migration pte to a working pte entry */ -static int remove_migration_pte(struct page *page, struct vm_area_struct *vma, +static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *old) { struct page_vma_mapped_walk pvmw = { @@ -253,7 +253,7 @@ static int remove_migration_pte(struct page *page, struct vm_area_struct *vma, update_mmu_cache(vma, pvmw.address, pvmw.pte); } - return SWAP_AGAIN; + return true; } /* @@ -1722,9 +1722,6 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat, { int z; - if (!pgdat_reclaimable(pgdat)) - return false; - for (z = pgdat->nr_zones - 1; z >= 0; z--) { struct zone *zone = pgdat->node_zones + z; @@ -1947,7 +1944,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, /* Prepare a page as a migration target */ __SetPageLocked(new_page); - __SetPageSwapBacked(new_page); + if (PageSwapBacked(page)) + __SetPageSwapBacked(new_page); /* anon mapping, we can simply copy page->mapping to the new page: */ new_page->mapping = page->mapping; diff --git a/mm/mlock.c b/mm/mlock.c index 0dd9ca18e19ed7ddb499a480c5831c312791b10a..b562b5523a6544e6c0ae6e4f792943441f6217a3 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -123,17 +123,15 @@ static bool __munlock_isolate_lru_page(struct page *page, bool getpage) */ static void __munlock_isolated_page(struct page *page) { - int ret = SWAP_AGAIN; - /* * Optimization: if the page was mapped just once, that's our mapping * and we don't need to check all the other vmas. */ if (page_mapcount(page) > 1) - ret = try_to_munlock(page); + try_to_munlock(page); /* Did try_to_unlock() succeed or punt? */ - if (ret != SWAP_MLOCK) + if (!PageMlocked(page)) count_vm_event(UNEVICTABLE_PGMUNLOCKED); putback_lru_page(page); @@ -286,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked; + int delta_munlocked = -nr; struct pagevec pvec_putback; int pgrescued = 0; @@ -306,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) continue; else __munlock_isolation_failed(page); + } else { + delta_munlocked++; } /* @@ -317,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; } - delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(zone_lru_lock(zone)); diff --git a/mm/mmap.c b/mm/mmap.c index bfbe8856d134f367464e3582d9a432260487777c..8e07976d5e47727273e9b469992f76ab0c309d58 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) unsigned long retval; unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; + struct vm_area_struct *next; unsigned long min_brk; bool populate; LIST_HEAD(uf); @@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) } /* Check against existing mmap mappings. */ - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + next = find_vma(mm, oldbrk); + if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) goto out; /* Ok, looks good - let it rip. */ @@ -253,10 +255,22 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) static long vma_compute_subtree_gap(struct vm_area_struct *vma) { - unsigned long max, subtree_gap; - max = vma->vm_start; - if (vma->vm_prev) - max -= vma->vm_prev->vm_end; + unsigned long max, prev_end, subtree_gap; + + /* + * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we + * allow two stack_guard_gaps between them here, and when choosing + * an unmapped area; whereas when expanding we only require one. + * That's a little inconsistent, but keeps the code here simpler. + */ + max = vm_start_gap(vma); + if (vma->vm_prev) { + prev_end = vm_end_gap(vma->vm_prev); + if (max > prev_end) + max -= prev_end; + else + max = 0; + } if (vma->vm_rb.rb_left) { subtree_gap = rb_entry(vma->vm_rb.rb_left, struct vm_area_struct, vm_rb)->rb_subtree_gap; @@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm) anon_vma_unlock_read(anon_vma); } - highest_address = vma->vm_end; + highest_address = vm_end_gap(vma); vma = vma->vm_next; i++; } @@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_next) vma_gap_update(vma->vm_next); else - mm->highest_vm_end = vma->vm_end; + mm->highest_vm_end = vm_end_gap(vma); /* * vma->vm_prev wasn't known when we followed the rbtree to find the @@ -856,7 +870,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, vma_gap_update(vma); if (end_changed) { if (!next) - mm->highest_vm_end = end; + mm->highest_vm_end = vm_end_gap(vma); else if (!adjust_next) vma_gap_update(next); } @@ -941,7 +955,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, * mm->highest_vm_end doesn't need any update * in remove_next == 1 case. */ - VM_WARN_ON(mm->highest_vm_end != end); + VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma)); } } if (insert && file) @@ -1479,7 +1493,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, struct user_struct *user = NULL; struct hstate *hs; - hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK); + hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (!hs) return -EINVAL; @@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) while (true) { /* Visit left subtree if it looks promising */ - gap_end = vma->vm_start; + gap_end = vm_start_gap(vma); if (gap_end >= low_limit && vma->vm_rb.rb_left) { struct vm_area_struct *left = rb_entry(vma->vm_rb.rb_left, @@ -1798,7 +1812,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) } } - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; check_current: /* Check if current node has a suitable gap */ if (gap_start > high_limit) @@ -1825,8 +1839,8 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) vma = rb_entry(rb_parent(prev), struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_left) { - gap_start = vma->vm_prev->vm_end; - gap_end = vma->vm_start; + gap_start = vm_end_gap(vma->vm_prev); + gap_end = vm_start_gap(vma); goto check_current; } } @@ -1890,7 +1904,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) while (true) { /* Visit right subtree if it looks promising */ - gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; if (gap_start <= high_limit && vma->vm_rb.rb_right) { struct vm_area_struct *right = rb_entry(vma->vm_rb.rb_right, @@ -1903,7 +1917,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) check_current: /* Check if current node has a suitable gap */ - gap_end = vma->vm_start; + gap_end = vm_start_gap(vma); if (gap_end < low_limit) return -ENOMEM; if (gap_start <= high_limit && gap_end - gap_start >= length) @@ -1929,7 +1943,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) struct vm_area_struct, vm_rb); if (prev == vma->vm_rb.rb_right) { gap_start = vma->vm_prev ? - vma->vm_prev->vm_end : 0; + vm_end_gap(vma->vm_prev) : 0; goto check_current; } } @@ -1967,7 +1981,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct vm_unmapped_area_info info; if (len > TASK_SIZE - mmap_min_addr) @@ -1978,9 +1992,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + vma = find_vma_prev(mm, addr, &prev); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) return addr; } @@ -2003,7 +2018,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { - struct vm_area_struct *vma; + struct vm_area_struct *vma, *prev; struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; @@ -2018,9 +2033,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); + vma = find_vma_prev(mm, addr, &prev); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma)) && + (!prev || addr >= vm_end_gap(prev))) return addr; } @@ -2155,21 +2171,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, * update accounting. This is shared with both the * grow-up and grow-down cases. */ -static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) +static int acct_stack_growth(struct vm_area_struct *vma, + unsigned long size, unsigned long grow) { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; - unsigned long new_start, actual_size; + unsigned long new_start; /* address space limit tests */ if (!may_expand_vm(mm, vma->vm_flags, grow)) return -ENOMEM; /* Stack limit test */ - actual_size = size; - if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) - actual_size -= PAGE_SIZE; - if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) + if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM; /* mlock limit tests */ @@ -2207,17 +2221,30 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns int expand_upwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *next; + unsigned long gap_addr; int error = 0; if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; /* Guard against wrapping around to address 0. */ - if (address < PAGE_ALIGN(address+4)) - address = PAGE_ALIGN(address+4); - else + address &= PAGE_MASK; + address += PAGE_SIZE; + if (!address) return -ENOMEM; + /* Enforce stack_guard_gap */ + gap_addr = address + stack_guard_gap; + if (gap_addr < address) + return -ENOMEM; + next = vma->vm_next; + if (next && next->vm_start < gap_addr) { + if (!(next->vm_flags & VM_GROWSUP)) + return -ENOMEM; + /* Check that both stack segments have the same anon_vma? */ + } + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; @@ -2261,7 +2288,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (vma->vm_next) vma_gap_update(vma->vm_next); else - mm->highest_vm_end = address; + mm->highest_vm_end = vm_end_gap(vma); spin_unlock(&mm->page_table_lock); perf_event_mmap(vma); @@ -2282,6 +2309,8 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *prev; + unsigned long gap_addr; int error; address &= PAGE_MASK; @@ -2289,6 +2318,17 @@ int expand_downwards(struct vm_area_struct *vma, if (error) return error; + /* Enforce stack_guard_gap */ + gap_addr = address - stack_guard_gap; + if (gap_addr > address) + return -ENOMEM; + prev = vma->vm_prev; + if (prev && prev->vm_end > gap_addr) { + if (!(prev->vm_flags & VM_GROWSDOWN)) + return -ENOMEM; + /* Check that both stack segments have the same anon_vma? */ + } + /* We must make sure the anon_vma is allocated. */ if (unlikely(anon_vma_prepare(vma))) return -ENOMEM; @@ -2343,28 +2383,25 @@ int expand_downwards(struct vm_area_struct *vma, return error; } -/* - * Note how expand_stack() refuses to expand the stack all the way to - * abut the next virtual mapping, *unless* that mapping itself is also - * a stack mapping. We want to leave room for a guard page, after all - * (the guard page itself is not added here, that is done by the - * actual page faulting logic) - * - * This matches the behavior of the guard page logic (see mm/memory.c: - * check_stack_guard_page()), which only allows the guard page to be - * removed under these circumstances. - */ +/* enforced gap between the expanding stack and other mappings. */ +unsigned long stack_guard_gap = 256UL<lock); - init_timer_deferrable(&dom->period_timer); - dom->period_timer.function = writeout_period; - dom->period_timer.data = (unsigned long)dom; + setup_deferrable_timer(&dom->period_timer, writeout_period, + (unsigned long)dom); dom->dirty_limit_tstamp = jiffies; @@ -2353,10 +2352,16 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->nr_to_write <= 0) return 0; - if (mapping->a_ops->writepages) - ret = mapping->a_ops->writepages(mapping, wbc); - else - ret = generic_writepages(mapping, wbc); + while (1) { + if (mapping->a_ops->writepages) + ret = mapping->a_ops->writepages(mapping, wbc); + else + ret = generic_writepages(mapping, wbc); + if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL)) + break; + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + } return ret; } @@ -2428,7 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) inode_attach_wb(inode, page); wb = inode_to_wb(inode); - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY); + inc_memcg_page_state(page, NR_FILE_DIRTY); __inc_node_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); __inc_node_page_state(page, NR_DIRTIED); @@ -2450,7 +2455,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb) { if (mapping_cap_account_dirty(mapping)) { - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); + dec_memcg_page_state(page, NR_FILE_DIRTY); dec_node_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); @@ -2707,7 +2712,7 @@ int clear_page_dirty_for_io(struct page *page) */ wb = unlocked_inode_to_wb_begin(inode, &locked); if (TestClearPageDirty(page)) { - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); + dec_memcg_page_state(page, NR_FILE_DIRTY); dec_node_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); @@ -2754,7 +2759,7 @@ int test_clear_page_writeback(struct page *page) ret = TestClearPageWriteback(page); } if (ret) { - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); + dec_memcg_page_state(page, NR_WRITEBACK); dec_node_page_state(page, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); @@ -2809,7 +2814,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) ret = TestSetPageWriteback(page); } if (!ret) { - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); + inc_memcg_page_state(page, NR_WRITEBACK); inc_node_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f3d603cef2c0c0e5aef09540dd2f8d50da5a808c..2302f250d6b1ba150e3c2e4e17cfb6c99574ab5b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -291,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT static inline void reset_deferred_meminit(pg_data_t *pgdat) { + unsigned long max_initialise; + unsigned long reserved_lowmem; + + /* + * Initialise at least 2G of a node but also take into account that + * two large system hashes that can take up 1GB for 0.25TB/node. + */ + max_initialise = max(2UL << (30 - PAGE_SHIFT), + (pgdat->node_spanned_pages >> 8)); + + /* + * Compensate the all the memblock reservations (e.g. crash kernel) + * from the initial estimation to make sure we will initialize enough + * memory to boot. + */ + reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, + pgdat->node_start_pfn + max_initialise); + max_initialise += reserved_lowmem; + + pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); pgdat->first_deferred_pfn = ULONG_MAX; } @@ -313,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat, unsigned long pfn, unsigned long zone_end, unsigned long *nr_initialised) { - unsigned long max_initialise; - /* Always populate low zones for address-contrained allocations */ if (zone_end < pgdat_end_pfn(pgdat)) return true; - /* - * Initialise at least 2G of a node but also take into account that - * two large system hashes that can take up 1GB for 0.25TB/node. - */ - max_initialise = max(2UL << (30 - PAGE_SHIFT), - (pgdat->node_spanned_pages >> 8)); - (*nr_initialised)++; - if ((*nr_initialised > max_initialise) && + if ((*nr_initialised > pgdat->static_init_size) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { pgdat->first_deferred_pfn = pfn; return false; @@ -1090,14 +1102,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, { int migratetype = 0; int batch_free = 0; - unsigned long nr_scanned, flags; bool isolated_pageblocks; - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); isolated_pageblocks = has_isolate_pageblock(zone); - nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); - if (nr_scanned) - __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); while (count) { struct page *page; @@ -1142,7 +1150,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, trace_mm_page_pcpu_drain(page, 0, mt); } while (--count && --batch_free && !list_empty(list)); } - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); } static void free_one_page(struct zone *zone, @@ -1150,19 +1158,13 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { - unsigned long nr_scanned, flags; - spin_lock_irqsave(&zone->lock, flags); - __count_vm_events(PGFREE, 1 << order); - nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); - if (nr_scanned) - __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); - + spin_lock(&zone->lock); if (unlikely(has_isolate_pageblock(zone) || is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -1240,6 +1242,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) static void __free_pages_ok(struct page *page, unsigned int order) { + unsigned long flags; int migratetype; unsigned long pfn = page_to_pfn(page); @@ -1247,7 +1250,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) return; migratetype = get_pfnblock_migratetype(page, pfn); + local_irq_save(flags); + __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype); + local_irq_restore(flags); } static void __init __free_pages_boot_core(struct page *page, unsigned int order) @@ -1695,10 +1701,10 @@ static inline int check_new_page(struct page *page) return 1; } -static inline bool free_pages_prezeroed(bool poisoned) +static inline bool free_pages_prezeroed(void) { return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && - page_poisoning_enabled() && poisoned; + page_poisoning_enabled(); } #ifdef CONFIG_DEBUG_VM @@ -1752,17 +1758,10 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags unsigned int alloc_flags) { int i; - bool poisoned = true; - - for (i = 0; i < (1 << order); i++) { - struct page *p = page + i; - if (poisoned) - poisoned &= page_is_poisoned(p); - } post_alloc_hook(page, order, gfp_flags); - if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO)) + if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) for (i = 0; i < (1 << order); i++) clear_highpage(page + i); @@ -1844,9 +1843,9 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, * Note that start_page and end_pages are not aligned on a pageblock * boundary. If alignment is required, use move_freepages_block() */ -int move_freepages(struct zone *zone, +static int move_freepages(struct zone *zone, struct page *start_page, struct page *end_page, - int migratetype) + int migratetype, int *num_movable) { struct page *page; unsigned int order; @@ -1863,6 +1862,9 @@ int move_freepages(struct zone *zone, VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); #endif + if (num_movable) + *num_movable = 0; + for (page = start_page; page <= end_page;) { if (!pfn_valid_within(page_to_pfn(page))) { page++; @@ -1873,6 +1875,15 @@ int move_freepages(struct zone *zone, VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); if (!PageBuddy(page)) { + /* + * We assume that pages that could be isolated for + * migration are movable. But we don't actually try + * isolating, as that would be expensive. + */ + if (num_movable && + (PageLRU(page) || __PageMovable(page))) + (*num_movable)++; + page++; continue; } @@ -1888,7 +1899,7 @@ int move_freepages(struct zone *zone, } int move_freepages_block(struct zone *zone, struct page *page, - int migratetype) + int migratetype, int *num_movable) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; @@ -1905,7 +1916,8 @@ int move_freepages_block(struct zone *zone, struct page *page, if (!zone_spans_pfn(zone, end_pfn)) return 0; - return move_freepages(zone, start_page, end_page, migratetype); + return move_freepages(zone, start_page, end_page, migratetype, + num_movable); } static void change_pageblock_range(struct page *pageblock_page, @@ -1955,28 +1967,79 @@ static bool can_steal_fallback(unsigned int order, int start_mt) /* * This function implements actual steal behaviour. If order is large enough, * we can steal whole pageblock. If not, we first move freepages in this - * pageblock and check whether half of pages are moved or not. If half of - * pages are moved, we can change migratetype of pageblock and permanently - * use it's pages as requested migratetype in the future. + * pageblock to our migratetype and determine how many already-allocated pages + * are there in the pageblock with a compatible migratetype. If at least half + * of pages are free or compatible, we can change migratetype of the pageblock + * itself, so pages freed in the future will be put on the correct free list. */ static void steal_suitable_fallback(struct zone *zone, struct page *page, - int start_type) + int start_type, bool whole_block) { unsigned int current_order = page_order(page); - int pages; + struct free_area *area; + int free_pages, movable_pages, alike_pages; + int old_block_type; + + old_block_type = get_pageblock_migratetype(page); + + /* + * This can happen due to races and we want to prevent broken + * highatomic accounting. + */ + if (is_migrate_highatomic(old_block_type)) + goto single_page; /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { change_pageblock_range(page, current_order, start_type); - return; + goto single_page; + } + + /* We are not allowed to try stealing from the whole block */ + if (!whole_block) + goto single_page; + + free_pages = move_freepages_block(zone, page, start_type, + &movable_pages); + /* + * Determine how many pages are compatible with our allocation. + * For movable allocation, it's the number of movable pages which + * we just obtained. For other types it's a bit more tricky. + */ + if (start_type == MIGRATE_MOVABLE) { + alike_pages = movable_pages; + } else { + /* + * If we are falling back a RECLAIMABLE or UNMOVABLE allocation + * to MOVABLE pageblock, consider all non-movable pages as + * compatible. If it's UNMOVABLE falling back to RECLAIMABLE or + * vice versa, be conservative since we can't distinguish the + * exact migratetype of non-movable pages. + */ + if (old_block_type == MIGRATE_MOVABLE) + alike_pages = pageblock_nr_pages + - (free_pages + movable_pages); + else + alike_pages = 0; } - pages = move_freepages_block(zone, page, start_type); + /* moving whole block can fail due to zone boundary conditions */ + if (!free_pages) + goto single_page; - /* Claim the whole block if over half of it is free */ - if (pages >= (1 << (pageblock_order-1)) || + /* + * If a sufficient number of pages in the block are either free or of + * comparable migratability as our allocation, claim the whole block. + */ + if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_type); + + return; + +single_page: + area = &zone->free_area[current_order]; + list_move(&page->lru, &area->free_list[start_type]); } /* @@ -2042,11 +2105,11 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, /* Yoink! */ mt = get_pageblock_migratetype(page); - if (mt != MIGRATE_HIGHATOMIC && - !is_migrate_isolate(mt) && !is_migrate_cma(mt)) { + if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt) + && !is_migrate_cma(mt)) { zone->nr_reserved_highatomic += pageblock_nr_pages; set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); - move_freepages_block(zone, page, MIGRATE_HIGHATOMIC); + move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL); } out_unlock: @@ -2100,8 +2163,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * from highatomic to ac->migratetype. So we should * adjust the count once. */ - if (get_pageblock_migratetype(page) == - MIGRATE_HIGHATOMIC) { + if (is_migrate_highatomic_page(page)) { /* * It should never happen but changes to * locking could inadvertently allow a per-cpu @@ -2124,7 +2186,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * may increase. */ set_pageblock_migratetype(page, ac->migratetype); - ret = move_freepages_block(zone, page, ac->migratetype); + ret = move_freepages_block(zone, page, ac->migratetype, + NULL); if (ret) { spin_unlock_irqrestore(&zone->lock, flags); return ret; @@ -2136,8 +2199,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, return false; } -/* Remove an element from the buddy allocator from the fallback list */ -static inline struct page * +/* + * Try finding a free buddy page on the fallback list and put it on the free + * list of requested migratetype, possibly along with other pages from the same + * block, depending on fragmentation avoidance heuristics. Returns true if + * fallback was found so that __rmqueue_smallest() can grab it. + */ +static inline bool __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) { struct free_area *area; @@ -2158,33 +2226,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) page = list_first_entry(&area->free_list[fallback_mt], struct page, lru); - if (can_steal && - get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC) - steal_suitable_fallback(zone, page, start_migratetype); - /* Remove the page from the freelists */ - area->nr_free--; - list_del(&page->lru); - rmv_page_order(page); - - expand(zone, page, order, current_order, area, - start_migratetype); - /* - * The pcppage_migratetype may differ from pageblock's - * migratetype depending on the decisions in - * find_suitable_fallback(). This is OK as long as it does not - * differ for MIGRATE_CMA pageblocks. Those can be used as - * fallback only via special __rmqueue_cma_fallback() function - */ - set_pcppage_migratetype(page, start_migratetype); + steal_suitable_fallback(zone, page, start_migratetype, + can_steal); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, fallback_mt); - return page; + return true; } - return NULL; + return false; } /* @@ -2196,13 +2248,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order, { struct page *page; +retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { if (migratetype == MIGRATE_MOVABLE) page = __rmqueue_cma_fallback(zone, order); - if (!page) - page = __rmqueue_fallback(zone, order, migratetype); + if (!page && __rmqueue_fallback(zone, order, migratetype)) + goto retry; } trace_mm_page_alloc_zone_locked(page, order, migratetype); @@ -2219,9 +2272,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, int migratetype, bool cold) { int i, alloced = 0; - unsigned long flags; - spin_lock_irqsave(&zone->lock, flags); + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) @@ -2257,7 +2309,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); return alloced; } @@ -2485,25 +2537,22 @@ void free_hot_cold_page(struct page *page, bool cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; + unsigned long flags; unsigned long pfn = page_to_pfn(page); int migratetype; - if (in_interrupt()) { - __free_pages_ok(page, 0); - return; - } - if (!free_pcp_prepare(page)) return; migratetype = get_pfnblock_migratetype(page, pfn); set_pcppage_migratetype(page, migratetype); - preempt_disable(); + local_irq_save(flags); + __count_vm_event(PGFREE); /* * We only track unmovable, reclaimable and movable on pcp lists. * Free ISOLATE pages back to the allocator because they are being - * offlined but treat RESERVE as movable pages so we can get those + * offlined but treat HIGHATOMIC as movable pages so we can get those * areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ @@ -2515,7 +2564,6 @@ void free_hot_cold_page(struct page *page, bool cold) migratetype = MIGRATE_MOVABLE; } - __count_vm_event(PGFREE); pcp = &this_cpu_ptr(zone->pageset)->pcp; if (!cold) list_add(&page->lru, &pcp->lists[migratetype]); @@ -2529,7 +2577,7 @@ void free_hot_cold_page(struct page *page, bool cold) } out: - preempt_enable(); + local_irq_restore(flags); } /* @@ -2614,7 +2662,7 @@ int __isolate_free_page(struct page *page, unsigned int order) for (; page < endpage; page += pageblock_nr_pages) { int mt = get_pageblock_migratetype(page); if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) - && mt != MIGRATE_HIGHATOMIC) + && !is_migrate_highatomic(mt)) set_pageblock_migratetype(page, MIGRATE_MOVABLE); } @@ -2654,8 +2702,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, { struct page *page; - VM_BUG_ON(in_interrupt()); - do { if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, @@ -2686,8 +2732,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct list_head *list; bool cold = ((gfp_flags & __GFP_COLD) != 0); struct page *page; + unsigned long flags; - preempt_disable(); + local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); @@ -2695,7 +2742,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); zone_statistics(preferred_zone, zone); } - preempt_enable(); + local_irq_restore(flags); return page; } @@ -2711,7 +2758,7 @@ struct page *rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; - if (likely(order == 0) && !in_interrupt()) { + if (likely(order == 0)) { page = rmqueue_pcplist(preferred_zone, zone, order, gfp_flags, migratetype); goto out; @@ -3113,8 +3160,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || - debug_guardpage_minorder() > 0) + if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) return; pr_warn("%s: ", current->comm); @@ -3248,14 +3294,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, enum compact_priority prio, enum compact_result *compact_result) { struct page *page; + unsigned int noreclaim_flag; if (!order) return NULL; - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, prio); - current->flags &= ~PF_MEMALLOC; + memalloc_noreclaim_restore(noreclaim_flag); if (*compact_result <= COMPACT_INACTIVE) return NULL; @@ -3402,12 +3449,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, { struct reclaim_state reclaim_state; int progress; + unsigned int noreclaim_flag; cond_resched(); /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); lockdep_set_current_reclaim_state(gfp_mask); reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; @@ -3417,7 +3465,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); - current->flags &= ~PF_MEMALLOC; + memalloc_noreclaim_restore(noreclaim_flag); cond_resched(); @@ -3524,20 +3572,13 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) return false; } -/* - * Maximum number of reclaim retries without any progress before OOM killer - * is consider as the only way to move forward. - */ -#define MAX_RECLAIM_RETRIES 16 - /* * Checks whether it makes sense to retry the reclaim to make a forward progress * for the given allocation request. - * The reclaim feedback represented by did_some_progress (any progress during - * the last reclaim round) and no_progress_loops (number of reclaim rounds without - * any progress in a row) is considered as well as the reclaimable pages on the - * applicable zone list (with a backoff mechanism which is a function of - * no_progress_loops). + * + * We give up when we either have tried MAX_RECLAIM_RETRIES in a row + * without success, or when we couldn't even meet the watermark if we + * reclaimed all remaining pages on the LRU lists. * * Returns true if a retry is viable or false to enter the oom path. */ @@ -3582,13 +3623,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, bool wmark; available = reclaimable = zone_reclaimable_pages(zone); - available -= DIV_ROUND_UP((*no_progress_loops) * available, - MAX_RECLAIM_RETRIES); available += zone_page_state_snapshot(zone, NR_FREE_PAGES); /* - * Would the allocation succeed if we reclaimed the whole - * available? + * Would the allocation succeed if we reclaimed all + * reclaimable pages? */ wmark = __zone_watermark_ok(zone, order, min_wmark, ac_classzone_idx(ac), alloc_flags, available); @@ -3639,6 +3678,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; + const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; struct page *page = NULL; unsigned int alloc_flags; unsigned long did_some_progress; @@ -3706,12 +3746,17 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, /* * For costly allocations, try direct compaction first, as it's likely - * that we have enough base pages and don't need to reclaim. Don't try - * that for allocations that are allowed to ignore watermarks, as the - * ALLOC_NO_WATERMARKS attempt didn't yet happen. + * that we have enough base pages and don't need to reclaim. For non- + * movable high-order allocations, do that as well, as compaction will + * try prevent permanent fragmentation by migrating from blocks of the + * same migratetype. + * Don't try this for allocations that are allowed to ignore + * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ - if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER && - !gfp_pfmemalloc_allowed(gfp_mask)) { + if (can_direct_reclaim && + (costly_order || + (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) + && !gfp_pfmemalloc_allowed(gfp_mask)) { page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac, INIT_COMPACT_PRIORITY, @@ -3723,7 +3768,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * Checks for costly allocations with __GFP_NORETRY, which * includes THP page fault allocations */ - if (gfp_mask & __GFP_NORETRY) { + if (costly_order && (gfp_mask & __GFP_NORETRY)) { /* * If compaction is deferred for high-order allocations, * it is because sync compaction recently failed. If @@ -3774,7 +3819,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, /* Make sure we know about allocations which stall for too long */ if (time_after(jiffies, alloc_start + stall_timeout)) { - warn_alloc(gfp_mask, ac->nodemask, + warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, "page allocation stalls for %ums, order:%u", jiffies_to_msecs(jiffies-alloc_start), order); stall_timeout += 10 * HZ; @@ -3804,7 +3849,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * Do not retry costly high order allocations unless they are * __GFP_REPEAT */ - if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) + if (costly_order && !(gfp_mask & __GFP_REPEAT)) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -3836,7 +3881,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto got_pg; /* Avoid allocations with no watermarks from looping endlessly */ - if (test_thread_flag(TIF_MEMDIE)) + if (test_thread_flag(TIF_MEMDIE) && + (alloc_flags == ALLOC_NO_WATERMARKS || + (gfp_mask & __GFP_NOMEMALLOC))) goto nopage; /* Retry as long as the OOM killer is making progress */ @@ -3974,10 +4021,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, goto out; /* - * Runtime PM, block IO and its error handling path can deadlock - * because I/O on the device might not complete. + * Apply scoped allocation constraints. This is mainly about GFP_NOFS + * resp. GFP_NOIO which has to be inherited for all allocation requests + * from a particular context which has been marked by + * memalloc_no{fs,io}_{save,restore}. */ - alloc_mask = memalloc_noio_flags(gfp_mask); + alloc_mask = current_gfp_context(gfp_mask); ac.spread_dirty_pages = false; /* @@ -4250,7 +4299,8 @@ EXPORT_SYMBOL(free_pages_exact); * nr_free_zone_pages() counts the number of counts pages which are beyond the * high watermark within all zones at or below a given zone index. For each * zone, the number of pages is calculated as: - * managed_pages - high_pages + * + * nr_free_zone_pages = managed_pages - high_pages */ static unsigned long nr_free_zone_pages(int offset) { @@ -4512,7 +4562,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) #endif " writeback_tmp:%lukB" " unstable:%lukB" - " pages_scanned:%lu" " all_unreclaimable? %s" "\n", pgdat->node_id, @@ -4535,8 +4584,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), - node_page_state(pgdat, NR_PAGES_SCANNED), - !pgdat_reclaimable(pgdat) ? "yes" : "no"); + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? + "yes" : "no"); } for_each_populated_zone(zone) { @@ -6100,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, /* pg_data_t should be reset to zero when it's allocated */ WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx); - reset_deferred_meminit(pgdat); pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; pgdat->per_cpu_nodestats = NULL; @@ -6122,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, (unsigned long)pgdat->node_mem_map); #endif + reset_deferred_meminit(pgdat); free_area_init_core(pgdat); } @@ -7431,7 +7480,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .zone = page_zone(pfn_to_page(start)), .mode = MIGRATE_SYNC, .ignore_skip_hint = true, - .gfp_mask = memalloc_noio_flags(gfp_mask), + .gfp_mask = current_gfp_context(gfp_mask), }; INIT_LIST_HEAD(&cc.migratepages); diff --git a/mm/page_ext.c b/mm/page_ext.c index 121dcffc4ec1768a6fc71dda8af878aa0d16cb92..88ccc044b09a41504fb212afdb5ca8a45e842998 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -59,9 +59,6 @@ static struct page_ext_operations *page_ext_ops[] = { &debug_guardpage_ops, -#ifdef CONFIG_PAGE_POISONING - &page_poisoning_ops, -#endif #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif @@ -127,15 +124,12 @@ struct page_ext *lookup_page_ext(struct page *page) struct page_ext *base; base = NODE_DATA(page_to_nid(page))->node_page_ext; -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) +#if defined(CONFIG_DEBUG_VM) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. - * - * This check is also necessary for ensuring page poisoning - * works as expected when enabled */ if (unlikely(!base)) return NULL; @@ -204,15 +198,12 @@ struct page_ext *lookup_page_ext(struct page *page) { unsigned long pfn = page_to_pfn(page); struct mem_section *section = __pfn_to_section(pfn); -#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PAGE_POISONING) +#if defined(CONFIG_DEBUG_VM) /* * The sanity checks the page allocator does upon freeing a * page can reach here before the page_ext arrays are * allocated when feeding a range of pages to the allocator * for the first time during bootup or memory hotplug. - * - * This check is also necessary for ensuring page poisoning - * works as expected when enabled */ if (!section->page_ext) return NULL; diff --git a/mm/page_idle.c b/mm/page_idle.c index b0ee56c56b5850f32f5b3157f9b142788840f50f..1b0f48c62316b3ebf1e09cc7b0a16f413db7e9ae 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -50,7 +50,7 @@ static struct page *page_idle_get_page(unsigned long pfn) return page; } -static int page_idle_clear_pte_refs_one(struct page *page, +static bool page_idle_clear_pte_refs_one(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg) { @@ -84,7 +84,7 @@ static int page_idle_clear_pte_refs_one(struct page *page, */ set_page_young(page); } - return SWAP_AGAIN; + return true; } static void page_idle_clear_pte_refs(struct page *page) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index f4e17a57926afffa33fceabf81a742ffc254d914..5092e4ef00c832fefb624a1d9d8f618402ad4402 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -66,7 +66,8 @@ static int set_migratetype_isolate(struct page *page, set_pageblock_migratetype(page, MIGRATE_ISOLATE); zone->nr_isolate_pageblock++; - nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); + nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE, + NULL); __mod_zone_freepage_state(zone, -nr_pages, migratetype); } @@ -88,7 +89,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) zone = page_zone(page); spin_lock_irqsave(&zone->lock, flags); - if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + if (!is_migrate_isolate_page(page)) goto out; /* @@ -120,7 +121,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) * pageblock scanning for freepage moving. */ if (!isolated_page) { - nr_pages = move_freepages_block(zone, page, migratetype); + nr_pages = move_freepages_block(zone, page, migratetype, NULL); __mod_zone_freepage_state(zone, nr_pages, migratetype); } set_pageblock_migratetype(page, migratetype); @@ -205,7 +206,7 @@ int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + if (!page || !is_migrate_isolate_page(page)) continue; unset_migratetype_isolate(page, migratetype); } @@ -262,7 +263,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, */ for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + if (page && !is_migrate_isolate_page(page)) break; } page = __first_valid_page(start_pfn, end_pfn - start_pfn); diff --git a/mm/page_poison.c b/mm/page_poison.c index 2e647c65916b91b00177837370e210d71c568f5e..be19e989ccff51f667c9698c9cb8fea3d5303f8e 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -6,7 +6,6 @@ #include #include -static bool __page_poisoning_enabled __read_mostly; static bool want_page_poisoning __read_mostly; static int early_page_poison_param(char *buf) @@ -18,75 +17,22 @@ static int early_page_poison_param(char *buf) early_param("page_poison", early_page_poison_param); bool page_poisoning_enabled(void) -{ - return __page_poisoning_enabled; -} - -static bool need_page_poisoning(void) -{ - return want_page_poisoning; -} - -static void init_page_poisoning(void) { /* - * page poisoning is debug page alloc for some arches. If either - * of those options are enabled, enable poisoning + * Assumes that debug_pagealloc_enabled is set before + * free_all_bootmem. + * Page poisoning is debug page alloc for some arches. If + * either of those options are enabled, enable poisoning. */ - if (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC)) { - if (!want_page_poisoning && !debug_pagealloc_enabled()) - return; - } else { - if (!want_page_poisoning) - return; - } - - __page_poisoning_enabled = true; -} - -struct page_ext_operations page_poisoning_ops = { - .need = need_page_poisoning, - .init = init_page_poisoning, -}; - -static inline void set_page_poison(struct page *page) -{ - struct page_ext *page_ext; - - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return; - - __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); -} - -static inline void clear_page_poison(struct page *page) -{ - struct page_ext *page_ext; - - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return; - - __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); -} - -bool page_is_poisoned(struct page *page) -{ - struct page_ext *page_ext; - - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return false; - - return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); + return (want_page_poisoning || + (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && + debug_pagealloc_enabled())); } static void poison_page(struct page *page) { void *addr = kmap_atomic(page); - set_page_poison(page); memset(addr, PAGE_POISON, PAGE_SIZE); kunmap_atomic(addr); } @@ -140,12 +86,13 @@ static void unpoison_page(struct page *page) { void *addr; - if (!page_is_poisoned(page)) - return; - addr = kmap_atomic(page); + /* + * Page poisoning when enabled poisons each and every page + * that is freed to buddy. Thus no extra check is done to + * see if a page was posioned. + */ check_poison_mem(addr, PAGE_SIZE); - clear_page_poison(page); kunmap_atomic(addr); } diff --git a/mm/percpu.c b/mm/percpu.c index 60a6488e9e6d49d5e9c5d4b611a5f5b037342316..e0aa8ae7bde708188e2d6ba84dbdc12dcf11f52e 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1284,18 +1284,7 @@ void free_percpu(void __percpu *ptr) } EXPORT_SYMBOL_GPL(free_percpu); -/** - * is_kernel_percpu_address - test whether address is from static percpu area - * @addr: address to test - * - * Test whether @addr belongs to in-kernel static percpu area. Module - * static percpu areas are not considered. For those, use - * is_module_percpu_address(). - * - * RETURNS: - * %true if @addr is from in-kernel static percpu area, %false otherwise. - */ -bool is_kernel_percpu_address(unsigned long addr) +bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr) { #ifdef CONFIG_SMP const size_t static_size = __per_cpu_end - __per_cpu_start; @@ -1304,15 +1293,38 @@ bool is_kernel_percpu_address(unsigned long addr) for_each_possible_cpu(cpu) { void *start = per_cpu_ptr(base, cpu); + void *va = (void *)addr; - if ((void *)addr >= start && (void *)addr < start + static_size) + if (va >= start && va < start + static_size) { + if (can_addr) { + *can_addr = (unsigned long) (va - start); + *can_addr += (unsigned long) + per_cpu_ptr(base, get_boot_cpu_id()); + } return true; - } + } + } #endif /* on UP, can't distinguish from other static vars, always false */ return false; } +/** + * is_kernel_percpu_address - test whether address is from static percpu area + * @addr: address to test + * + * Test whether @addr belongs to in-kernel static percpu area. Module + * static percpu areas are not considered. For those, use + * is_module_percpu_address(). + * + * RETURNS: + * %true if @addr is from in-kernel static percpu area, %false otherwise. + */ +bool is_kernel_percpu_address(unsigned long addr) +{ + return __is_kernel_percpu_address(addr, NULL); +} + /** * per_cpu_ptr_to_phys - convert translated percpu address to physical address * @addr: the address to be converted to physical address diff --git a/mm/rmap.c b/mm/rmap.c index f6838015810f5610abe039daec170aa1da634422..d405f0e0ee9651b40dceac3f45a851469b576e48 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data) void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), - 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, + 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, anon_vma_ctor); anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC|SLAB_ACCOUNT); @@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page) * If this page is still mapped, then its anon_vma cannot have been * freed. But if it has been unmapped, we have no security against the * anon_vma structure being freed and reused (for another anon_vma: - * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero() + * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero() * above cannot corrupt). */ if (!page_mapped(page)) { @@ -724,7 +724,7 @@ struct page_referenced_arg { /* * arg: page_referenced_arg will be passed */ -static int page_referenced_one(struct page *page, struct vm_area_struct *vma, +static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct page_referenced_arg *pra = arg; @@ -741,7 +741,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKED) { page_vma_mapped_walk_done(&pvmw); pra->vm_flags |= VM_LOCKED; - return SWAP_FAIL; /* To break the loop */ + return false; /* To break the loop */ } if (pvmw.pte) { @@ -781,9 +781,9 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, } if (!pra->mapcount) - return SWAP_SUCCESS; /* To break the loop */ + return false; /* To break the loop */ - return SWAP_AGAIN; + return true; } static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) @@ -812,7 +812,6 @@ int page_referenced(struct page *page, struct mem_cgroup *memcg, unsigned long *vm_flags) { - int ret; int we_locked = 0; struct page_referenced_arg pra = { .mapcount = total_mapcount(page), @@ -846,7 +845,7 @@ int page_referenced(struct page *page, rwc.invalid_vma = invalid_page_referenced_vma; } - ret = rmap_walk(page, &rwc); + rmap_walk(page, &rwc); *vm_flags = pra.vm_flags; if (we_locked) @@ -855,7 +854,7 @@ int page_referenced(struct page *page, return pra.referenced; } -static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, +static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct page_vma_mapped_walk pvmw = { @@ -908,7 +907,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, } } - return SWAP_AGAIN; + return true; } static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) @@ -1159,7 +1158,7 @@ void page_add_file_rmap(struct page *page, bool compound) goto out; } __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); - mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr); + mod_memcg_page_state(page, NR_FILE_MAPPED, nr); out: unlock_page_memcg(page); } @@ -1199,7 +1198,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) * pte lock(a spinlock) is held, which implies preemption disabled. */ __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); - mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr); + mod_memcg_page_state(page, NR_FILE_MAPPED, -nr); if (unlikely(PageMlocked(page))) clear_page_mlock(page); @@ -1288,15 +1287,10 @@ void page_remove_rmap(struct page *page, bool compound) */ } -struct rmap_private { - enum ttu_flags flags; - int lazyfreed; -}; - /* * @arg: enum ttu_flags will be passed to this argument */ -static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, +static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; @@ -1307,13 +1301,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, }; pte_t pteval; struct page *subpage; - int ret = SWAP_AGAIN; - struct rmap_private *rp = arg; - enum ttu_flags flags = rp->flags; + bool ret = true; + enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) - return SWAP_AGAIN; + return true; if (flags & TTU_SPLIT_HUGE_PMD) { split_huge_pmd_address(vma, address, @@ -1336,7 +1329,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, */ mlock_vma_page(page); } - ret = SWAP_MLOCK; + ret = false; page_vma_mapped_walk_done(&pvmw); break; } @@ -1354,7 +1347,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (!(flags & TTU_IGNORE_ACCESS)) { if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { - ret = SWAP_FAIL; + ret = false; page_vma_mapped_walk_done(&pvmw); break; } @@ -1424,18 +1417,34 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Store the swap location in the pte. * See handle_pte_fault() ... */ - VM_BUG_ON_PAGE(!PageSwapCache(page), page); + if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { + WARN_ON_ONCE(1); + ret = false; + page_vma_mapped_walk_done(&pvmw); + break; + } + + /* MADV_FREE page check */ + if (!PageSwapBacked(page)) { + if (!PageDirty(page)) { + dec_mm_counter(mm, MM_ANONPAGES); + goto discard; + } - if (!PageDirty(page) && (flags & TTU_LZFREE)) { - /* It's a freeable page by MADV_FREE */ - dec_mm_counter(mm, MM_ANONPAGES); - rp->lazyfreed++; - goto discard; + /* + * If the page was redirtied, it cannot be + * discarded. Remap the page to page table. + */ + set_pte_at(mm, address, pvmw.pte, pteval); + SetPageSwapBacked(page); + ret = false; + page_vma_mapped_walk_done(&pvmw); + break; } if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); - ret = SWAP_FAIL; + ret = false; page_vma_mapped_walk_done(&pvmw); break; } @@ -1492,24 +1501,14 @@ static int page_mapcount_is_zero(struct page *page) * * Tries to remove all the page table entries which are mapping this * page, used in the pageout path. Caller must hold the page lock. - * Return values are: * - * SWAP_SUCCESS - we succeeded in removing all mappings - * SWAP_AGAIN - we missed a mapping, try again later - * SWAP_FAIL - the page is unswappable - * SWAP_MLOCK - page is mlocked. + * If unmap is successful, return true. Otherwise, false. */ -int try_to_unmap(struct page *page, enum ttu_flags flags) +bool try_to_unmap(struct page *page, enum ttu_flags flags) { - int ret; - struct rmap_private rp = { - .flags = flags, - .lazyfreed = 0, - }; - struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, - .arg = &rp, + .arg = (void *)flags, .done = page_mapcount_is_zero, .anon_lock = page_lock_anon_vma_read, }; @@ -1526,16 +1525,11 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) rwc.invalid_vma = invalid_migration_vma; if (flags & TTU_RMAP_LOCKED) - ret = rmap_walk_locked(page, &rwc); + rmap_walk_locked(page, &rwc); else - ret = rmap_walk(page, &rwc); + rmap_walk(page, &rwc); - if (ret != SWAP_MLOCK && !page_mapcount(page)) { - ret = SWAP_SUCCESS; - if (rp.lazyfreed && !PageDirty(page)) - ret = SWAP_LZFREE; - } - return ret; + return !page_mapcount(page) ? true : false; } static int page_not_mapped(struct page *page) @@ -1550,34 +1544,22 @@ static int page_not_mapped(struct page *page) * Called from munlock code. Checks all of the VMAs mapping the page * to make sure nobody else has this page mlocked. The page will be * returned with PG_mlocked cleared if no other vmas have it mlocked. - * - * Return values are: - * - * SWAP_AGAIN - no vma is holding page mlocked, or, - * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem - * SWAP_FAIL - page cannot be located at present - * SWAP_MLOCK - page is now mlocked. */ -int try_to_munlock(struct page *page) -{ - int ret; - struct rmap_private rp = { - .flags = TTU_MUNLOCK, - .lazyfreed = 0, - }; +void try_to_munlock(struct page *page) +{ struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, - .arg = &rp, + .arg = (void *)TTU_MUNLOCK, .done = page_not_mapped, .anon_lock = page_lock_anon_vma_read, }; VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); + VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); - ret = rmap_walk(page, &rwc); - return ret; + rmap_walk(page, &rwc); } void __put_anon_vma(struct anon_vma *anon_vma) @@ -1625,13 +1607,12 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page, * vm_flags for that VMA. That should be OK, because that vma shouldn't be * LOCKED. */ -static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, +static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, bool locked) { struct anon_vma *anon_vma; pgoff_t pgoff_start, pgoff_end; struct anon_vma_chain *avc; - int ret = SWAP_AGAIN; if (locked) { anon_vma = page_anon_vma(page); @@ -1641,7 +1622,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, anon_vma = rmap_walk_anon_lock(page, rwc); } if (!anon_vma) - return ret; + return; pgoff_start = page_to_pgoff(page); pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; @@ -1655,8 +1636,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; - ret = rwc->rmap_one(page, vma, address, rwc->arg); - if (ret != SWAP_AGAIN) + if (!rwc->rmap_one(page, vma, address, rwc->arg)) break; if (rwc->done && rwc->done(page)) break; @@ -1664,7 +1644,6 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, if (!locked) anon_vma_unlock_read(anon_vma); - return ret; } /* @@ -1680,13 +1659,12 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, * vm_flags for that VMA. That should be OK, because that vma shouldn't be * LOCKED. */ -static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, +static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, bool locked) { struct address_space *mapping = page_mapping(page); pgoff_t pgoff_start, pgoff_end; struct vm_area_struct *vma; - int ret = SWAP_AGAIN; /* * The page lock not only makes sure that page->mapping cannot @@ -1697,7 +1675,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, VM_BUG_ON_PAGE(!PageLocked(page), page); if (!mapping) - return ret; + return; pgoff_start = page_to_pgoff(page); pgoff_end = pgoff_start + hpage_nr_pages(page) - 1; @@ -1712,8 +1690,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; - ret = rwc->rmap_one(page, vma, address, rwc->arg); - if (ret != SWAP_AGAIN) + if (!rwc->rmap_one(page, vma, address, rwc->arg)) goto done; if (rwc->done && rwc->done(page)) goto done; @@ -1722,28 +1699,27 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, done: if (!locked) i_mmap_unlock_read(mapping); - return ret; } -int rmap_walk(struct page *page, struct rmap_walk_control *rwc) +void rmap_walk(struct page *page, struct rmap_walk_control *rwc) { if (unlikely(PageKsm(page))) - return rmap_walk_ksm(page, rwc); + rmap_walk_ksm(page, rwc); else if (PageAnon(page)) - return rmap_walk_anon(page, rwc, false); + rmap_walk_anon(page, rwc, false); else - return rmap_walk_file(page, rwc, false); + rmap_walk_file(page, rwc, false); } /* Like rmap_walk, but caller holds relevant rmap lock */ -int rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc) +void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc) { /* no ksm support for now */ VM_BUG_ON_PAGE(PageKsm(page), page); if (PageAnon(page)) - return rmap_walk_anon(page, rwc, true); + rmap_walk_anon(page, rwc, true); else - return rmap_walk_file(page, rwc, true); + rmap_walk_file(page, rwc, true); } #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 0fd21670b513f735e554b340adc0d86938e32d69..6bb4deb12e78b8e3724c40446069e2cd0731b4a4 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -9,11 +9,12 @@ * as published by the Free Software Foundation; version 2 * of the License. */ +#define pr_fmt(fmt) "rodata_test: " fmt + #include #include const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); void rodata_test(void) { @@ -23,20 +24,20 @@ void rodata_test(void) /* test 1: read the value */ /* If this test fails, some previous testrun has clobbered the state */ if (!rodata_test_data) { - pr_err("rodata_test: test 1 fails (start data)\n"); + pr_err("test 1 fails (start data)\n"); return; } /* test 2: write to the variable; this should fault */ if (!probe_kernel_write((void *)&rodata_test_data, - (void *)&zero, sizeof(zero))) { - pr_err("rodata_test: test data was not read only\n"); + (void *)&zero, sizeof(zero))) { + pr_err("test data was not read only\n"); return; } /* test 3: check the value hasn't changed */ if (rodata_test_data == zero) { - pr_err("rodata_test: test data was changed\n"); + pr_err("test data was changed\n"); return; } @@ -44,13 +45,13 @@ void rodata_test(void) start = (unsigned long)__start_rodata; end = (unsigned long)__end_rodata; if (start & (PAGE_SIZE - 1)) { - pr_err("rodata_test: start of .rodata is not page size aligned\n"); + pr_err("start of .rodata is not page size aligned\n"); return; } if (end & (PAGE_SIZE - 1)) { - pr_err("rodata_test: end of .rodata is not page size aligned\n"); + pr_err("end of .rodata is not page size aligned\n"); return; } - pr_info("rodata_test: all tests were successful\n"); + pr_info("all tests were successful\n"); } diff --git a/mm/slab.c b/mm/slab.c index 807d86c769088681b47f41c0cc0a721307bd16c1..2a31ee3c5814f192234385303a0d86cc77790580 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) freelist = page->freelist; slab_destroy_debugcheck(cachep, page); - if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&page->rcu_head, kmem_rcu_free); else kmem_freepages(cachep, page); @@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, cachep->num = 0; - if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU) + if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) return false; left = calculate_slab_order(cachep, size, @@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN + 2 * sizeof(unsigned long long))) flags |= SLAB_RED_ZONE | SLAB_STORE_USER; - if (!(flags & SLAB_DESTROY_BY_RCU)) + if (!(flags & SLAB_TYPESAFE_BY_RCU)) flags |= SLAB_POISON; #endif #endif @@ -3879,7 +3879,12 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, prev = cachep->cpu_cache; cachep->cpu_cache = cpu_cache; - kick_all_cpus_sync(); + /* + * Without a previous cpu_cache there's no need to synchronize remote + * cpus, so skip the IPIs. + */ + if (prev) + kick_all_cpus_sync(); check_irq_on(); cachep->batchcount = batchcount; diff --git a/mm/slab.h b/mm/slab.h index 65e7c3fcac72790acece0ac140d864151f95f166..9cfcf099709c19cfc8b5070325a0527c763eddaa 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, /* Legal flag mask for kmem_cache_create(), for various configurations */ #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS ) + SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) #if defined(CONFIG_DEBUG_SLAB) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) @@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) * back there or track user information then we can * only use the space before that information. */ - if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) + if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) return s->inuse; /* * Else we can use all the padding etc for the allocation diff --git a/mm/slab_common.c b/mm/slab_common.c index 09d0e849b07f47d82f5d9a5cda4862517d297cb2..01a0fe2eb33267f8f04f7e90fd79358cd1f41d07 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, * Set of flags that will prevent slab merging */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ @@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) struct kmem_cache *s, *s2; /* - * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the + * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the * @slab_caches_to_rcu_destroy list. The slab pages are freed * through RCU and and the associated kmem_cache are dereferenced * while freeing the pages, so the kmem_caches should be freed only @@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s) memcg_unlink_cache(s); list_del(&s->list); - if (s->flags & SLAB_DESTROY_BY_RCU) { + if (s->flags & SLAB_TYPESAFE_BY_RCU) { list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { diff --git a/mm/slob.c b/mm/slob.c index eac04d4357ec6b8d653de4c30c96ffdd97974462..1bae78d71096ad26bbe4575f631f3fad7b694388 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp) /* * struct slob_rcu is inserted at the tail of allocated slob blocks, which - * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free + * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free * the block using call_rcu. */ struct slob_rcu { @@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize); int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) { - if (flags & SLAB_DESTROY_BY_RCU) { + if (flags & SLAB_TYPESAFE_BY_RCU) { /* leave room for rcu footer at the end of object */ c->size += sizeof(struct slob_rcu); } @@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { kmemleak_free_recursive(b, c->flags); - if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { + if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); slob_rcu->size = c->size; diff --git a/mm/slub.c b/mm/slub.c index 7f4bc7027ed53536efaaf5663f007bd2442de503..7449593fca724147cef5b8f7a46752333e5e0585 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h) static void free_slab(struct kmem_cache *s, struct page *page) { - if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { struct rcu_head *head; if (need_reserve_slab_rcu) { @@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * slab_free_freelist_hook() could have put the items into quarantine. * If so, no need to free them. */ - if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU)) + if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU)) return; do_slab_free(s, page, head, tail, cnt, addr); } @@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * the slab may touch the object after free or before allocation * then we should never poison the object itself. */ - if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && + if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) && !s->ctor) s->flags |= __OBJECT_POISON; else @@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ s->inuse = size; - if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || + if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor)) { /* * Relocate free pointer after the object if it is not @@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); s->reserved = 0; - if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) + if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU)) s->reserved = sizeof(struct rcu_head); if (!calculate_sizes(s, -1)) @@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma); static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); + return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); } SLAB_ATTR_RO(destroy_by_rcu); @@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) char mbuf[64]; char *buf; struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); + ssize_t len; if (!attr || !attr->store || !attr->show) continue; @@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(root_cache, buf); - attr->store(s, buf, strlen(buf)); + len = attr->show(root_cache, buf); + if (len > 0) + attr->store(s, buf, len); } if (buffer) diff --git a/mm/sparse.c b/mm/sparse.c index db6bf3c97ea2cd7e593922ab88840b84d67cdd88..6903c8fc308502eab4ca3570075cd6a881efa724 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -248,10 +248,7 @@ static int __meminit sparse_init_one_section(struct mem_section *ms, unsigned long usemap_size(void) { - unsigned long size_bytes; - size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8; - size_bytes = roundup(size_bytes, sizeof(unsigned long)); - return size_bytes; + return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/mm/swap.c b/mm/swap.c index 5dabf444d724db98595567b0f7daed7d53fc877e..98d08b4579faa42b8ef43ba4563ed6cb76f19893 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -46,7 +46,7 @@ int page_cluster; static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs); -static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); +static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs); #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); #endif @@ -97,6 +97,16 @@ static void __put_compound_page(struct page *page) void __put_page(struct page *page) { + if (is_zone_device_page(page)) { + put_dev_pagemap(page->pgmap); + + /* + * The page belongs to the device that created pgmap. Do + * not return it to page allocator. + */ + return; + } + if (unlikely(PageCompound(page))) __put_compound_page(page); else @@ -561,20 +571,27 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, } -static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, +static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, void *arg) { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { - int file = page_is_file_cache(page); - int lru = page_lru_base_type(page); + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && + !PageUnevictable(page)) { + bool active = PageActive(page); - del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); + del_page_from_lru_list(page, lruvec, + LRU_INACTIVE_ANON + active); ClearPageActive(page); ClearPageReferenced(page); - add_page_to_lru_list(page, lruvec, lru); + /* + * lazyfree pages are clean anonymous pages. They have + * SwapBacked flag cleared to distinguish normal anonymous + * pages + */ + ClearPageSwapBacked(page); + add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); - __count_vm_event(PGDEACTIVATE); - update_page_reclaim_stat(lruvec, file, 0); + __count_vm_events(PGLAZYFREE, hpage_nr_pages(page)); + update_page_reclaim_stat(lruvec, 1, 0); } } @@ -604,9 +621,9 @@ void lru_add_drain_cpu(int cpu) if (pagevec_count(pvec)) pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); - pvec = &per_cpu(lru_deactivate_pvecs, cpu); + pvec = &per_cpu(lru_lazyfree_pvecs, cpu); if (pagevec_count(pvec)) - pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); + pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); activate_page_drain(cpu); } @@ -638,22 +655,22 @@ void deactivate_file_page(struct page *page) } /** - * deactivate_page - deactivate a page + * mark_page_lazyfree - make an anon page lazyfree * @page: page to deactivate * - * deactivate_page() moves @page to the inactive list if @page was on the active - * list and was not an unevictable page. This is done to accelerate the reclaim - * of @page. + * mark_page_lazyfree() moves @page to the inactive file list. + * This is done to accelerate the reclaim of @page. */ -void deactivate_page(struct page *page) +void mark_page_lazyfree(struct page *page) { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); + if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && + !PageUnevictable(page)) { + struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); get_page(page); if (!pagevec_add(pvec, page) || PageCompound(page)) - pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); - put_cpu_var(lru_deactivate_pvecs); + pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); + put_cpu_var(lru_lazyfree_pvecs); } } @@ -693,7 +710,7 @@ void lru_add_drain_all(void) if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || - pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || + pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || need_activate_page_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); queue_work_on(cpu, mm_percpu_wq, work); diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index ac6318a064d35e6dcc5385d1dc8062ff6e46554c..3405b4ee1757e3deb196c93e49b9641dad8d56fb 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -48,6 +48,9 @@ static int swap_cgroup_prepare(int type) if (!page) goto not_enough_page; ctrl->map[idx] = page; + + if (!(idx % SWAP_CLUSTER_MAX)) + cond_resched(); } return 0; not_enough_page: diff --git a/mm/swap_slots.c b/mm/swap_slots.c index b1ccb58ad397403214a220e4a0ac7901a6b6ae1e..58f6c78f1dad313dc7aa9b2ec31ddab361335fe1 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_SWAP @@ -119,16 +120,18 @@ static int alloc_swap_slot_cache(unsigned int cpu) /* * Do allocation outside swap_slots_cache_mutex - * as vzalloc could trigger reclaim and get_swap_page, + * as kvzalloc could trigger reclaim and get_swap_page, * which can lock swap_slots_cache_mutex. */ - slots = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE); + slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE, + GFP_KERNEL); if (!slots) return -ENOMEM; - slots_ret = vzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE); + slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE, + GFP_KERNEL); if (!slots_ret) { - vfree(slots); + kvfree(slots); return -ENOMEM; } @@ -152,9 +155,9 @@ static int alloc_swap_slot_cache(unsigned int cpu) out: mutex_unlock(&swap_slots_cache_mutex); if (slots) - vfree(slots); + kvfree(slots); if (slots_ret) - vfree(slots_ret); + kvfree(slots_ret); return 0; } @@ -171,7 +174,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type, cache->cur = 0; cache->nr = 0; if (free_slots && cache->slots) { - vfree(cache->slots); + kvfree(cache->slots); cache->slots = NULL; } mutex_unlock(&cache->alloc_lock); @@ -186,7 +189,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type, } spin_unlock_irq(&cache->free_lock); if (slots) - vfree(slots); + kvfree(slots); } } @@ -241,8 +244,10 @@ int enable_swap_slots_cache(void) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache", alloc_swap_slot_cache, free_slot_cache); - if (ret < 0) + if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating " + "without swap slots cache.\n", __func__)) goto out_unlock; + swap_slot_cache_initialized = true; __reenable_swap_slots_cache(); out_unlock: diff --git a/mm/swap_state.c b/mm/swap_state.c index 473b71e052a8ed29df7c496af747e2b43491c782..539b8885e3d1d4942dbb905b60b87d40726e3de5 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -360,17 +360,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, /* * We might race against get_swap_page() and stumble * across a SWAP_HAS_CACHE swap_map entry whose page - * has not been brought into the swapcache yet, while - * the other end is scheduled away waiting on discard - * I/O completion at scan_swap_map(). - * - * In order to avoid turning this transitory state - * into a permanent loop around this -EEXIST case - * if !CONFIG_PREEMPT and the I/O completion happens - * to be waiting on the CPU waitqueue where we are now - * busy looping, we just conditionally invoke the - * scheduler here, if there are some more important - * tasks to run. + * has not been brought into the swapcache yet. */ cond_resched(); continue; @@ -533,7 +523,7 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages) unsigned int i, nr; nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); - spaces = vzalloc(sizeof(struct address_space) * nr); + spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL); if (!spaces) return -ENOMEM; for (i = 0; i < nr; i++) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 178130880b908515a105eccf9fa428f7cf61719a..4f6cba1b66322f3500714950ef3a12eeda9e4455 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -335,7 +335,7 @@ static void cluster_list_add_tail(struct swap_cluster_list *list, ci_tail = ci + tail; spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING); cluster_set_next(ci_tail, idx); - unlock_cluster(ci_tail); + spin_unlock(&ci_tail->lock); cluster_set_next_flag(&list->tail, idx, 0); } } @@ -672,6 +672,9 @@ static int scan_swap_map_slots(struct swap_info_struct *si, else goto done; } + si->swap_map[offset] = usage; + inc_cluster_info_page(si, si->cluster_info, offset); + unlock_cluster(ci); if (offset == si->lowest_bit) si->lowest_bit++; @@ -685,9 +688,6 @@ static int scan_swap_map_slots(struct swap_info_struct *si, plist_del(&si->avail_list, &swap_avail_head); spin_unlock(&swap_avail_lock); } - si->swap_map[offset] = usage; - inc_cluster_info_page(si, si->cluster_info, offset); - unlock_cluster(ci); si->cluster_next = offset + 1; slots[n_ret++] = swp_entry(si->type, offset); @@ -1079,8 +1079,6 @@ void swapcache_free_entries(swp_entry_t *entries, int n) p = swap_info_get_cont(entries[i], prev); if (p) swap_entry_free(p, entries[i]); - else - break; prev = p; } if (p) @@ -1111,6 +1109,18 @@ int page_swapcount(struct page *page) return count; } +static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +{ + int count = 0; + pgoff_t offset = swp_offset(entry); + struct swap_cluster_info *ci; + + ci = lock_cluster_or_swap_info(si, offset); + count = swap_count(si->swap_map[offset]); + unlock_cluster_or_swap_info(si, ci); + return count; +} + /* * How many references to @entry are currently swapped out? * This does not give an exact answer when swap count is continued, @@ -1119,17 +1129,11 @@ int page_swapcount(struct page *page) int __swp_swapcount(swp_entry_t entry) { int count = 0; - pgoff_t offset; struct swap_info_struct *si; - struct swap_cluster_info *ci; si = __swap_info_get(entry); - if (si) { - offset = swp_offset(entry); - ci = lock_cluster_or_swap_info(si, offset); - count = swap_count(si->swap_map[offset]); - unlock_cluster_or_swap_info(si, ci); - } + if (si) + count = swap_swapcount(si, entry); return count; } @@ -1291,7 +1295,8 @@ int free_swap_and_cache(swp_entry_t entry) * Also recheck PageSwapCache now page is locked (above). */ if (PageSwapCache(page) && !PageWriteback(page) && - (!page_mapped(page) || mem_cgroup_swap_full(page))) { + (!page_mapped(page) || mem_cgroup_swap_full(page)) && + !swap_swapcount(p, entry)) { delete_from_swap_cache(page); SetPageDirty(page); } @@ -2265,8 +2270,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; vfree(swap_map); - vfree(cluster_info); - vfree(frontswap_map); + kvfree(cluster_info); + kvfree(frontswap_map); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); exit_swap_address_space(p->type); @@ -2789,7 +2794,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->cluster_next = 1 + (prandom_u32() % p->highest_bit); nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER); - cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info)); + cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info), + GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; goto bad_swap; @@ -2822,7 +2828,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) - frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); + frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long), + GFP_KERNEL); if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) { /* diff --git a/mm/truncate.c b/mm/truncate.c index 6263affdef8866135f28256b5b1f5e598515d204..6479ed2afc53fb9dd8d9719051ea77e7a5b200af 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping, /* * Invalidate exceptional entry if easily possible. This handles exceptional - * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and - * clean entries. + * entries for invalidate_inode_pages(). */ static int invalidate_exceptional_entry(struct address_space *mapping, pgoff_t index, void *entry) { - /* Handled by shmem itself */ - if (shmem_mapping(mapping)) + /* Handled by shmem itself, or for DAX we do nothing. */ + if (shmem_mapping(mapping) || dax_mapping(mapping)) return 1; - if (dax_mapping(mapping)) - return dax_invalidate_mapping_entry(mapping, index); clear_shadow_entry(mapping, index, entry); return 1; } @@ -266,9 +263,8 @@ void truncate_inode_pages_range(struct address_space *mapping, pgoff_t index; int i; - cleancache_invalidate_inode(mapping); if (mapping->nrpages == 0 && mapping->nrexceptional == 0) - return; + goto out; /* Offsets within partial pages */ partial_start = lstart & (PAGE_SIZE - 1); @@ -363,7 +359,7 @@ void truncate_inode_pages_range(struct address_space *mapping, * will be released, just zeroed, so we can bail out now. */ if (start >= end) - return; + goto out; index = start; for ( ; ; ) { @@ -410,6 +406,8 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_release(&pvec); index++; } + +out: cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -623,7 +621,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int ret2 = 0; int did_range_unmap = 0; - cleancache_invalidate_inode(mapping); + if (mapping->nrpages == 0 && mapping->nrexceptional == 0) + goto out; + pagevec_init(&pvec, 0); index = start; while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, @@ -686,6 +686,18 @@ int invalidate_inode_pages2_range(struct address_space *mapping, cond_resched(); index++; } + /* + * For DAX we invalidate page tables after invalidating radix tree. We + * could invalidate page tables while invalidating each entry however + * that would be expensive. And doing range unmapping before doesn't + * work as we have no cheap way to find whether radix tree entry didn't + * get remapped later. + */ + if (dax_mapping(mapping)) { + unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT, + (loff_t)(end - start + 1) << PAGE_SHIFT, 0); + } +out: cleancache_invalidate_inode(mapping); return ret; } diff --git a/mm/usercopy.c b/mm/usercopy.c index d155e12563b139a9879bbc370902ea85a84bd0f3..a9852b24715d25598a7603560ffc042ca0153a2b 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -19,15 +19,9 @@ #include #include #include +#include #include -enum { - BAD_STACK = -1, - NOT_STACK = 0, - GOOD_FRAME, - GOOD_STACK, -}; - /* * Checks if a given pointer and length is contained by the current * stack frame (if possible). @@ -206,17 +200,6 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, { struct page *page; - /* - * Some architectures (arm64) return true for virt_addr_valid() on - * vmalloced addresses. Work around this by checking for vmalloc - * first. - * - * We also need to check for module addresses explicitly since we - * may copy static data from modules to userspace - */ - if (is_vmalloc_or_module_addr(ptr)) - return NULL; - if (!virt_addr_valid(ptr)) return NULL; diff --git a/mm/util.c b/mm/util.c index 656dc5e37a8721e892d1b2fd0a9f8c1808b17a2d..26be6407abd7efe452a585d341a8d7e5b53d2b32 100644 --- a/mm/util.c +++ b/mm/util.c @@ -329,6 +329,67 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, } EXPORT_SYMBOL(vm_mmap); +/** + * kvmalloc_node - attempt to allocate physically contiguous memory, but upon + * failure, fall back to non-contiguous (vmalloc) allocation. + * @size: size of the request. + * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL. + * @node: numa node to allocate from + * + * Uses kmalloc to get the memory but if the allocation fails then falls back + * to the vmalloc allocator. Use kvfree for freeing the memory. + * + * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT + * is supported only for large (>32kB) allocations, and it should be used only if + * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks. + * + * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. + */ +void *kvmalloc_node(size_t size, gfp_t flags, int node) +{ + gfp_t kmalloc_flags = flags; + void *ret; + + /* + * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) + * so the given set of flags has to be compatible. + */ + WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); + + /* + * We want to attempt a large physically contiguous block first because + * it is less likely to fragment multiple larger blocks and therefore + * contribute to a long term fragmentation less than vmalloc fallback. + * However make sure that larger requests are not too disruptive - no + * OOM killer and no allocation failure warnings as we have a fallback. + */ + if (size > PAGE_SIZE) { + kmalloc_flags |= __GFP_NOWARN; + + /* + * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly + * requests because there is no other way to tell the allocator + * that we want to fail rather than retry endlessly. + */ + if (!(kmalloc_flags & __GFP_REPEAT) || + (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + kmalloc_flags |= __GFP_NORETRY; + } + + ret = kmalloc_node(size, kmalloc_flags, node); + + /* + * It doesn't really make sense to fallback to vmalloc for sub page + * requests + */ + if (ret || size <= PAGE_SIZE) + return ret; + + return __vmalloc_node_flags_caller(size, node, flags, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(kvmalloc_node); + void kvfree(const void *addr) { if (is_vmalloc_addr(addr)) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0b057628a7ba5c45d722710082ce32df3f7e8e13..34a1c3e46ed72594b499e7f61e8aacdd4c5fe818 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -521,7 +521,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, } } - if (printk_ratelimit()) + if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) pr_warn("vmap allocation for size %lu failed: use vmalloc= to increase size\n", size); kfree(va); @@ -1579,7 +1579,7 @@ void vfree_atomic(const void *addr) * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling * conventions for vfree() arch-depenedent would be a really bad idea) * - * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node) + * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node) */ void vfree(const void *addr) { @@ -1658,7 +1658,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page **pages; unsigned int nr_pages, array_size, i; const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; - const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; + const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN; nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1786,6 +1786,13 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, * Allocate enough pages to cover @size from the page level * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. + * + * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT + * and __GFP_NOFAIL are not supported + * + * Any use of gfp flags outside of GFP_KERNEL should be consulted + * with mm people. + * */ static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, @@ -1809,6 +1816,13 @@ static inline void *__vmalloc_node_flags(unsigned long size, node, __builtin_return_address(0)); } + +void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, + void *caller) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); +} + /** * vmalloc - allocate virtually contiguous memory * @size: allocation size @@ -1821,7 +1835,7 @@ static inline void *__vmalloc_node_flags(unsigned long size, void *vmalloc(unsigned long size) { return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_HIGHMEM); + GFP_KERNEL); } EXPORT_SYMBOL(vmalloc); @@ -1838,7 +1852,7 @@ EXPORT_SYMBOL(vmalloc); void *vzalloc(unsigned long size) { return __vmalloc_node_flags(size, NUMA_NO_NODE, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc); @@ -1855,7 +1869,7 @@ void *vmalloc_user(unsigned long size) void *ret; ret = __vmalloc_node(size, SHMLBA, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, NUMA_NO_NODE, __builtin_return_address(0)); if (ret) { @@ -1879,7 +1893,7 @@ EXPORT_SYMBOL(vmalloc_user); */ void *vmalloc_node(unsigned long size, int node) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, node, __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_node); @@ -1899,7 +1913,7 @@ EXPORT_SYMBOL(vmalloc_node); void *vzalloc_node(unsigned long size, int node) { return __vmalloc_node_flags(size, node, - GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); } EXPORT_SYMBOL(vzalloc_node); @@ -1921,7 +1935,7 @@ EXPORT_SYMBOL(vzalloc_node); void *vmalloc_exec(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 6063581f705c48b97a78a087d8c67127af9c8d68..ce0618bfa8d0643d0e65263ad65a9e7a7a4c92ed 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -115,9 +115,9 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, unsigned long pressure = 0; /* - * reclaimed can be greater than scanned in cases - * like THP, where the scanned is 1 and reclaimed - * could be 512 + * reclaimed can be greater than scanned for things such as reclaimed + * slab pages. shrink_node() just adds reclaimed pages without a + * related increment to scanned pages. */ if (reclaimed >= scanned) goto out; diff --git a/mm/vmscan.c b/mm/vmscan.c index bc8031ef994d57a1d1622468f8df6d745853562b..8ad39bbc79e67eaff24c42201ae2470ffc21d0a7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -97,8 +97,13 @@ struct scan_control { /* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; - /* Can cgroups be reclaimed below their normal consumption range? */ - unsigned int may_thrash:1; + /* + * Cgroups are not reclaimed below their configured memory.low, + * unless we threaten to OOM. If any cgroups are skipped due to + * memory.low and nothing was reclaimed, go back for memory.low. + */ + unsigned int memcg_low_reclaim:1; + unsigned int memcg_low_skipped:1; unsigned int hibernation_mode:1; @@ -230,12 +235,6 @@ unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat) return nr; } -bool pgdat_reclaimable(struct pglist_data *pgdat) -{ - return node_page_state_snapshot(pgdat, NR_PAGES_SCANNED) < - pgdat_reclaimable_pages(pgdat) * 6; -} - /** * lruvec_lru_size - Returns the number of pages on the given LRU list. * @lruvec: lru vector @@ -912,7 +911,8 @@ static void page_check_dirty_writeback(struct page *page, * Anonymous pages are not handled by flushers and must be written * from reclaim context. Do not stall reclaim based on them */ - if (!page_is_file_cache(page)) { + if (!page_is_file_cache(page) || + (PageAnon(page) && !PageSwapBacked(page))) { *dirty = false; *writeback = false; return; @@ -972,8 +972,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, int may_enter_fs; enum page_references references = PAGEREF_RECLAIM_CLEAN; bool dirty, writeback; - bool lazyfree = false; - int ret = SWAP_SUCCESS; cond_resched(); @@ -988,13 +986,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, sc->nr_scanned++; if (unlikely(!page_evictable(page))) - goto cull_mlocked; + goto activate_locked; if (!sc->may_unmap && page_mapped(page)) goto keep_locked; /* Double the slab pressure for mapped and swapcache pages */ - if (page_mapped(page) || PageSwapCache(page)) + if ((page_mapped(page) || PageSwapCache(page)) && + !(PageAnon(page) && !PageSwapBacked(page))) sc->nr_scanned++; may_enter_fs = (sc->gfp_mask & __GFP_FS) || @@ -1120,13 +1119,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* * Anonymous process memory has backing store? * Try to allocate it some swap space here. + * Lazyfree page could be freed directly */ - if (PageAnon(page) && !PageSwapCache(page)) { + if (PageAnon(page) && PageSwapBacked(page) && + !PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; if (!add_to_swap(page, page_list)) goto activate_locked; - lazyfree = true; may_enter_fs = 1; /* Adding to swap updated mapping */ @@ -1143,21 +1143,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, * The page is mapped into the page tables of one or more * processes. Try to unmap it here. */ - if (page_mapped(page) && mapping) { - switch (ret = try_to_unmap(page, lazyfree ? - (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) : - (ttu_flags | TTU_BATCH_FLUSH))) { - case SWAP_FAIL: + if (page_mapped(page)) { + if (!try_to_unmap(page, ttu_flags | TTU_BATCH_FLUSH)) { nr_unmap_fail++; goto activate_locked; - case SWAP_AGAIN: - goto keep_locked; - case SWAP_MLOCK: - goto cull_mlocked; - case SWAP_LZFREE: - goto lazyfree; - case SWAP_SUCCESS: - ; /* try to free the page below */ } } @@ -1267,10 +1256,18 @@ static unsigned long shrink_page_list(struct list_head *page_list, } } -lazyfree: - if (!mapping || !__remove_mapping(mapping, page, true)) - goto keep_locked; + if (PageAnon(page) && !PageSwapBacked(page)) { + /* follow __remove_mapping for reference */ + if (!page_ref_freeze(page, 1)) + goto keep_locked; + if (PageDirty(page)) { + page_ref_unfreeze(page, 1); + goto keep_locked; + } + count_vm_event(PGLAZYFREED); + } else if (!mapping || !__remove_mapping(mapping, page, true)) + goto keep_locked; /* * At this point, we have no other references and there is * no way to pick any more up (removed from LRU, removed @@ -1280,9 +1277,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ __ClearPageLocked(page); free_it: - if (ret == SWAP_LZFREE) - count_vm_event(PGLAZYFREED); - nr_reclaimed++; /* @@ -1292,20 +1286,16 @@ static unsigned long shrink_page_list(struct list_head *page_list, list_add(&page->lru, &free_pages); continue; -cull_mlocked: - if (PageSwapCache(page)) - try_to_free_swap(page); - unlock_page(page); - list_add(&page->lru, &ret_pages); - continue; - activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ - if (PageSwapCache(page) && mem_cgroup_swap_full(page)) + if (PageSwapCache(page) && (mem_cgroup_swap_full(page) || + PageMlocked(page))) try_to_free_swap(page); VM_BUG_ON_PAGE(PageActive(page), page); - SetPageActive(page); - pgactivate++; + if (!PageMlocked(page)) { + SetPageActive(page); + pgactivate++; + } keep_locked: unlock_page(page); keep: @@ -1354,7 +1344,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, } ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, - TTU_UNMAP|TTU_IGNORE_ACCESS, NULL, true); + TTU_IGNORE_ACCESS, NULL, true); list_splice(&clean_pages, page_list); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret); return ret; @@ -1459,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, * * Appropriate locks must be held before calling this function. * - * @nr_to_scan: The number of pages to look through on the list. + * @nr_to_scan: The number of eligible pages to look through on the list. * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. @@ -1478,12 +1468,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, unsigned long nr_taken = 0; unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; - unsigned long skipped = 0, total_skipped = 0; - unsigned long scan, nr_pages; + unsigned long skipped = 0; + unsigned long scan, total_scan, nr_pages; LIST_HEAD(pages_skipped); - for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && - !list_empty(src);) { + scan = 0; + for (total_scan = 0; + scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src); + total_scan++) { struct page *page; page = lru_to_page(src); @@ -1498,11 +1490,12 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, } /* - * Account for scanned and skipped separetly to avoid the pgdat - * being prematurely marked unreclaimable by pgdat_reclaimable. + * Do not count skipped pages because that makes the function + * return with no isolated pages if the LRU mostly contains + * ineligible pages. This causes the VM to not reclaim any + * pages, triggering a premature OOM. */ scan++; - switch (__isolate_lru_page(page, mode)) { case 0: nr_pages = hpage_nr_pages(page); @@ -1531,6 +1524,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, if (!list_empty(&pages_skipped)) { int zid; + list_splice(&pages_skipped, src); for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_skipped[zid]) continue; @@ -1538,19 +1532,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]); skipped += nr_skipped[zid]; } - - /* - * Account skipped pages as a partial scan as the pgdat may be - * close to unreclaimable. If the LRU list is empty, account - * skipped pages as a full scan. - */ - total_skipped = list_empty(src) ? skipped : skipped >> 2; - - list_splice(&pages_skipped, src); } - *nr_scanned = scan + total_skipped; + *nr_scanned = total_scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, - scan, skipped, nr_taken, mode, lru); + total_scan, skipped, nr_taken, mode, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } @@ -1750,7 +1735,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) { - __mod_node_page_state(pgdat, NR_PAGES_SCANNED, nr_scanned); if (current_is_kswapd()) __count_vm_events(PGSCAN_KSWAPD, nr_scanned); else @@ -1761,7 +1745,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (nr_taken == 0) return 0; - nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP, + nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, 0, &stat, false); spin_lock_irq(&pgdat->lru_lock); @@ -1953,8 +1937,6 @@ static void shrink_active_list(unsigned long nr_to_scan, __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); reclaim_stat->recent_scanned[file] += nr_taken; - if (global_reclaim(sc)) - __mod_node_page_state(pgdat, NR_PAGES_SCANNED, nr_scanned); __count_vm_events(PGREFILL, nr_scanned); spin_unlock_irq(&pgdat->lru_lock); @@ -2033,6 +2015,8 @@ static void shrink_active_list(unsigned long nr_to_scan, * Both inactive lists should also be large enough that each inactive * page has a chance to be referenced again before it is reclaimed. * + * If that fails and refaulting is observed, the inactive list grows. + * * The inactive_ratio is the target ratio of ACTIVE to INACTIVE pages * on this LRU, maintained by the pageout code. A zone->inactive_ratio * of 3 means 3:1 or 25% of the pages are kept on the inactive list. @@ -2049,12 +2033,15 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct scan_control *sc, bool trace) + struct mem_cgroup *memcg, + struct scan_control *sc, bool actual_reclaim) { - unsigned long inactive_ratio; - unsigned long inactive, active; - enum lru_list inactive_lru = file * LRU_FILE; enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + enum lru_list inactive_lru = file * LRU_FILE; + unsigned long inactive, active; + unsigned long inactive_ratio; + unsigned long refaults; unsigned long gb; /* @@ -2067,27 +2054,42 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); - gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb) - inactive_ratio = int_sqrt(10 * gb); + if (memcg) + refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); else - inactive_ratio = 1; + refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); - if (trace) - trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id, - sc->reclaim_idx, - lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, - lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, - inactive_ratio, file); + /* + * When refaults are being observed, it means a new workingset + * is being established. Disable active list protection to get + * rid of the stale workingset quickly. + */ + if (file && actual_reclaim && lruvec->refaults != refaults) { + inactive_ratio = 0; + } else { + gb = (inactive + active) >> (30 - PAGE_SHIFT); + if (gb) + inactive_ratio = int_sqrt(10 * gb); + else + inactive_ratio = 1; + } + + if (actual_reclaim) + trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, + inactive_ratio, file); return inactive * inactive_ratio < active; } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct lruvec *lruvec, struct scan_control *sc) + struct lruvec *lruvec, struct mem_cgroup *memcg, + struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), + memcg, sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2123,30 +2125,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, unsigned long anon_prio, file_prio; enum scan_balance scan_balance; unsigned long anon, file; - bool force_scan = false; unsigned long ap, fp; enum lru_list lru; - bool some_scanned; - int pass; - - /* - * If the zone or memcg is small, nr[l] can be 0. This - * results in no scanning on this priority and a potential - * priority drop. Global direct reclaim can go to the next - * zone and tends to have no problems. Global kswapd is for - * zone balancing and it needs to scan a minimum amount. When - * reclaiming for a memcg, a priority drop can cause high - * latencies, so it's better to scan a minimum amount there as - * well. - */ - if (current_is_kswapd()) { - if (!pgdat_reclaimable(pgdat)) - force_scan = true; - if (!mem_cgroup_online(memcg)) - force_scan = true; - } - if (!global_reclaim(sc)) - force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { @@ -2218,7 +2198,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, sc, false) && + if (!inactive_list_is_low(lruvec, true, memcg, sc, false) && lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; @@ -2277,55 +2257,48 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, fraction[1] = fp; denominator = ap + fp + 1; out: - some_scanned = false; - /* Only use force_scan on second pass. */ - for (pass = 0; !some_scanned && pass < 2; pass++) { - *lru_pages = 0; - for_each_evictable_lru(lru) { - int file = is_file_lru(lru); - unsigned long size; - unsigned long scan; - - size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); - scan = size >> sc->priority; - - if (!scan && pass && force_scan) - scan = min(size, SWAP_CLUSTER_MAX); - - switch (scan_balance) { - case SCAN_EQUAL: - /* Scan lists relative to size */ - break; - case SCAN_FRACT: - /* - * Scan types proportional to swappiness and - * their relative recent reclaim efficiency. - */ - scan = div64_u64(scan * fraction[file], - denominator); - break; - case SCAN_FILE: - case SCAN_ANON: - /* Scan one type exclusively */ - if ((scan_balance == SCAN_FILE) != file) { - size = 0; - scan = 0; - } - break; - default: - /* Look ma, no brain */ - BUG(); - } + *lru_pages = 0; + for_each_evictable_lru(lru) { + int file = is_file_lru(lru); + unsigned long size; + unsigned long scan; - *lru_pages += size; - nr[lru] = scan; + size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); + scan = size >> sc->priority; + /* + * If the cgroup's already been deleted, make sure to + * scrape out the remaining cache. + */ + if (!scan && !mem_cgroup_online(memcg)) + scan = min(size, SWAP_CLUSTER_MAX); + switch (scan_balance) { + case SCAN_EQUAL: + /* Scan lists relative to size */ + break; + case SCAN_FRACT: /* - * Skip the second pass and don't force_scan, - * if we found something to scan. + * Scan types proportional to swappiness and + * their relative recent reclaim efficiency. */ - some_scanned |= !!scan; + scan = div64_u64(scan * fraction[file], + denominator); + break; + case SCAN_FILE: + case SCAN_ANON: + /* Scan one type exclusively */ + if ((scan_balance == SCAN_FILE) != file) { + size = 0; + scan = 0; + } + break; + default: + /* Look ma, no brain */ + BUG(); } + + *lru_pages += size; + nr[lru] = scan; } } @@ -2376,7 +2349,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); + lruvec, memcg, sc); } } @@ -2443,7 +2416,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, sc, true)) + if (inactive_list_is_low(lruvec, false, memcg, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -2557,9 +2530,11 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long scanned; if (mem_cgroup_low(root, memcg)) { - if (!sc->may_thrash) + if (!sc->memcg_low_reclaim) { + sc->memcg_low_skipped = 1; continue; - mem_cgroup_events(memcg, MEMCG_LOW, 1); + } + mem_cgroup_event(memcg, MEMCG_LOW); } reclaimed = sc->nr_reclaimed; @@ -2620,6 +2595,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); + /* + * Kswapd gives up on balancing particular nodes after too + * many failures to reclaim anything from them and goes to + * sleep. On reclaim progress, reset the failure counter. A + * successful direct reclaim run will revive a dormant kswapd. + */ + if (reclaimable) + pgdat->kswapd_failures = 0; + return reclaimable; } @@ -2694,10 +2678,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; - if (sc->priority != DEF_PRIORITY && - !pgdat_reclaimable(zone->zone_pgdat)) - continue; /* Let kswapd poll it */ - /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. @@ -2752,6 +2732,25 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->gfp_mask = orig_mask; } +static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat) +{ + struct mem_cgroup *memcg; + + memcg = mem_cgroup_iter(root_memcg, NULL, NULL); + do { + unsigned long refaults; + struct lruvec *lruvec; + + if (memcg) + refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE); + else + refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE); + + lruvec = mem_cgroup_lruvec(pgdat, memcg); + lruvec->refaults = refaults; + } while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL))); +} + /* * This is the main entry point to direct page reclaim. * @@ -2772,6 +2771,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int initial_priority = sc->priority; + pg_data_t *last_pgdat; + struct zoneref *z; + struct zone *zone; retry: delayacct_freepages_start(); @@ -2798,6 +2800,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->may_writepage = 1; } while (--sc->priority >= 0); + last_pgdat = NULL; + for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, + sc->nodemask) { + if (zone->zone_pgdat == last_pgdat) + continue; + last_pgdat = zone->zone_pgdat; + snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); + } + delayacct_freepages_end(); if (sc->nr_reclaimed) @@ -2808,16 +2819,17 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, return 1; /* Untapped cgroup reserves? Don't OOM, retry. */ - if (!sc->may_thrash) { + if (sc->memcg_low_skipped) { sc->priority = initial_priority; - sc->may_thrash = 1; + sc->memcg_low_reclaim = 1; + sc->memcg_low_skipped = 0; goto retry; } return 0; } -static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) +static bool allow_direct_reclaim(pg_data_t *pgdat) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; @@ -2825,10 +2837,15 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) int i; bool wmark_ok; + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; - if (!managed_zone(zone) || - pgdat_reclaimable_pages(pgdat) == 0) + if (!managed_zone(zone)) + continue; + + if (!zone_reclaimable_pages(zone)) continue; pfmemalloc_reserve += min_wmark_pages(zone); @@ -2905,7 +2922,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; - if (pfmemalloc_watermark_ok(pgdat)) + if (allow_direct_reclaim(pgdat)) goto out; break; } @@ -2927,14 +2944,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, */ if (!(gfp_mask & __GFP_FS)) { wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat), HZ); + allow_direct_reclaim(pgdat), HZ); goto check_pending; } /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat)); + allow_direct_reclaim(pgdat)); check_pending: if (fatal_signal_pending(current)) @@ -2950,7 +2967,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, unsigned long nr_reclaimed; struct scan_control sc = { .nr_to_reclaim = SWAP_CLUSTER_MAX, - .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), + .gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)), .reclaim_idx = gfp_zone(gfp_mask), .order = order, .nodemask = nodemask, @@ -3028,9 +3045,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, struct zonelist *zonelist; unsigned long nr_reclaimed; int nid; + unsigned int noreclaim_flag; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), - .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + .gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), .reclaim_idx = MAX_NR_ZONES - 1, .target_mem_cgroup = memcg, @@ -3054,9 +3072,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, sc.gfp_mask, sc.reclaim_idx); - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); - current->flags &= ~PF_MEMALLOC; + memalloc_noreclaim_restore(noreclaim_flag); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); @@ -3076,7 +3094,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, sc, true)) + if (inactive_list_is_low(lruvec, false, memcg, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); @@ -3084,22 +3102,44 @@ static void age_active_anon(struct pglist_data *pgdat, } while (memcg); } -static bool zone_balanced(struct zone *zone, int order, int classzone_idx) +/* + * Returns true if there is an eligible zone balanced for the request order + * and classzone_idx + */ +static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) { - unsigned long mark = high_wmark_pages(zone); + int i; + unsigned long mark = -1; + struct zone *zone; - if (!zone_watermark_ok_safe(zone, order, mark, classzone_idx)) - return false; + for (i = 0; i <= classzone_idx; i++) { + zone = pgdat->node_zones + i; + + if (!managed_zone(zone)) + continue; + + mark = high_wmark_pages(zone); + if (zone_watermark_ok_safe(zone, order, mark, classzone_idx)) + return true; + } /* - * If any eligible zone is balanced then the node is not considered - * to be congested or dirty + * If a node has no populated zone within classzone_idx, it does not + * need balancing by definition. This can happen if a zone-restricted + * allocation tries to wake a remote kswapd. */ - clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags); - clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags); - clear_bit(PGDAT_WRITEBACK, &zone->zone_pgdat->flags); + if (mark == -1) + return true; - return true; + return false; +} + +/* Clear pgdat state for congested, dirty or under writeback. */ +static void clear_pgdat_congested(pg_data_t *pgdat) +{ + clear_bit(PGDAT_CONGESTED, &pgdat->flags); + clear_bit(PGDAT_DIRTY, &pgdat->flags); + clear_bit(PGDAT_WRITEBACK, &pgdat->flags); } /* @@ -3110,11 +3150,9 @@ static bool zone_balanced(struct zone *zone, int order, int classzone_idx) */ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) { - int i; - /* * The throttled processes are normally woken up in balance_pgdat() as - * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * soon as allow_direct_reclaim() is true. But there is a potential * race between when kswapd checks the watermarks and a process gets * throttled. There is also a potential race if processes get * throttled, kswapd wakes, a large process exits thereby balancing the @@ -3128,17 +3166,16 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); - for (i = 0; i <= classzone_idx; i++) { - struct zone *zone = pgdat->node_zones + i; - - if (!managed_zone(zone)) - continue; + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; - if (!zone_balanced(zone, order, classzone_idx)) - return false; + if (pgdat_balanced(pgdat, order, classzone_idx)) { + clear_pgdat_congested(pgdat); + return true; } - return true; + return false; } /* @@ -3214,9 +3251,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); do { + unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; - sc.nr_reclaimed = 0; sc.reclaim_idx = classzone_idx; /* @@ -3241,23 +3278,12 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) } /* - * Only reclaim if there are no eligible zones. Check from - * high to low zone as allocations prefer higher zones. - * Scanning from low to high zone would allow congestion to be - * cleared during a very small window when a small low - * zone was balanced even under extreme pressure when the - * overall node may be congested. Note that sc.reclaim_idx - * is not used as buffer_heads_over_limit may have adjusted - * it. + * Only reclaim if there are no eligible zones. Note that + * sc.reclaim_idx is not used as buffer_heads_over_limit may + * have adjusted it. */ - for (i = classzone_idx; i >= 0; i--) { - zone = pgdat->node_zones + i; - if (!managed_zone(zone)) - continue; - - if (zone_balanced(zone, sc.order, classzone_idx)) - goto out; - } + if (pgdat_balanced(pgdat, sc.order, classzone_idx)) + goto out; /* * Do some background aging of the anon list, to give @@ -3271,7 +3297,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * If we're getting trouble reclaiming, start doing writepage * even in laptop mode. */ - if (sc.priority < DEF_PRIORITY - 2 || !pgdat_reclaimable(pgdat)) + if (sc.priority < DEF_PRIORITY - 2) sc.may_writepage = 1; /* Call soft limit reclaim before calling shrink_node. */ @@ -3295,7 +3321,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && - pfmemalloc_watermark_ok(pgdat)) + allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ @@ -3306,11 +3332,16 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ - if (raise_priority || !sc.nr_reclaimed) + nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; + if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); + if (!sc.nr_reclaimed) + pgdat->kswapd_failures++; + out: + snapshot_refaults(NULL, pgdat); /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller @@ -3320,6 +3351,22 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) return sc.order; } +/* + * pgdat->kswapd_classzone_idx is the highest zone index that a recent + * allocation request woke kswapd for. When kswapd has not woken recently, + * the value is MAX_NR_ZONES which is not a valid index. This compares a + * given classzone and returns it or the highest classzone index kswapd + * was recently woke for. + */ +static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat, + enum zone_type classzone_idx) +{ + if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES) + return classzone_idx; + + return max(pgdat->kswapd_classzone_idx, classzone_idx); +} + static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order, unsigned int classzone_idx) { @@ -3331,7 +3378,13 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); - /* Try to sleep for a short interval */ + /* + * Try to sleep for a short interval. Note that kcompactd will only be + * woken if it is possible to sleep for a short interval. This is + * deliberate on the assumption that if reclaim cannot keep an + * eligible zone balanced that it's also unlikely that compaction will + * succeed. + */ if (prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) { /* * Compaction records what page blocks it recently failed to @@ -3355,7 +3408,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o * the previous request that slept prematurely. */ if (remaining) { - pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx); + pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx); pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order); } @@ -3409,7 +3462,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o */ static int kswapd(void *p) { - unsigned int alloc_order, reclaim_order, classzone_idx; + unsigned int alloc_order, reclaim_order; + unsigned int classzone_idx = MAX_NR_ZONES - 1; pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; @@ -3439,20 +3493,23 @@ static int kswapd(void *p) tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; set_freezable(); - pgdat->kswapd_order = alloc_order = reclaim_order = 0; - pgdat->kswapd_classzone_idx = classzone_idx = 0; + pgdat->kswapd_order = 0; + pgdat->kswapd_classzone_idx = MAX_NR_ZONES; for ( ; ; ) { bool ret; + alloc_order = reclaim_order = pgdat->kswapd_order; + classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx); + kswapd_try_sleep: kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order, classzone_idx); /* Read the new order and classzone_idx */ alloc_order = reclaim_order = pgdat->kswapd_order; - classzone_idx = pgdat->kswapd_classzone_idx; + classzone_idx = kswapd_classzone_idx(pgdat, 0); pgdat->kswapd_order = 0; - pgdat->kswapd_classzone_idx = 0; + pgdat->kswapd_classzone_idx = MAX_NR_ZONES; ret = try_to_freeze(); if (kthread_should_stop()) @@ -3478,9 +3535,6 @@ static int kswapd(void *p) reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); if (reclaim_order < alloc_order) goto kswapd_try_sleep; - - alloc_order = reclaim_order = pgdat->kswapd_order; - classzone_idx = pgdat->kswapd_classzone_idx; } tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); @@ -3496,7 +3550,6 @@ static int kswapd(void *p) void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) { pg_data_t *pgdat; - int z; if (!managed_zone(zone)) return; @@ -3504,22 +3557,20 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) return; pgdat = zone->zone_pgdat; - pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx); + pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat, + classzone_idx); pgdat->kswapd_order = max(pgdat->kswapd_order, order); if (!waitqueue_active(&pgdat->kswapd_wait)) return; - /* Only wake kswapd if all zones are unbalanced */ - for (z = 0; z <= classzone_idx; z++) { - zone = pgdat->node_zones + z; - if (!managed_zone(zone)) - continue; + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return; - if (zone_balanced(zone, order, classzone_idx)) - return; - } + if (pgdat_balanced(pgdat, order, classzone_idx)) + return; - trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order); + trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, classzone_idx, order); wake_up_interruptible(&pgdat->kswapd_wait); } @@ -3548,8 +3599,9 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); struct task_struct *p = current; unsigned long nr_reclaimed; + unsigned int noreclaim_flag; - p->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); lockdep_set_current_reclaim_state(sc.gfp_mask); reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; @@ -3558,7 +3610,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) p->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); - p->flags &= ~PF_MEMALLOC; + memalloc_noreclaim_restore(noreclaim_flag); return nr_reclaimed; } @@ -3723,9 +3775,10 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in struct task_struct *p = current; struct reclaim_state reclaim_state; int classzone_idx = gfp_zone(gfp_mask); + unsigned int noreclaim_flag; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), - .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)), + .gfp_mask = (gfp_mask = current_gfp_context(gfp_mask)), .order = order, .priority = NODE_RECLAIM_PRIORITY, .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE), @@ -3740,7 +3793,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in * and we also need to be able to write out pages for RECLAIM_WRITE * and RECLAIM_UNMAP. */ - p->flags |= PF_MEMALLOC | PF_SWAPWRITE; + noreclaim_flag = memalloc_noreclaim_save(); + p->flags |= PF_SWAPWRITE; lockdep_set_current_reclaim_state(gfp_mask); reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; @@ -3756,7 +3810,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in } p->reclaim_state = NULL; - current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); + current->flags &= ~PF_SWAPWRITE; + memalloc_noreclaim_restore(noreclaim_flag); lockdep_clear_current_reclaim_state(); return sc.nr_reclaimed >= nr_pages; } @@ -3779,9 +3834,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) return NODE_RECLAIM_FULL; - if (!pgdat_reclaimable(pgdat)) - return NODE_RECLAIM_FULL; - /* * Do not scan if the allocation should not be delayed. */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 809025ed97ea0eee97573a32ba2764c63ee2dffd..76f73670200ac1d34b5f17388df8356fadffaf71 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -954,7 +954,6 @@ const char * const vmstat_text[] = { "nr_unevictable", "nr_isolated_anon", "nr_isolated_file", - "nr_pages_scanned", "workingset_refault", "workingset_activate", "workingset_nodereclaim", @@ -992,6 +991,7 @@ const char * const vmstat_text[] = { "pgfree", "pgactivate", "pgdeactivate", + "pglazyfree", "pgfault", "pgmajfault", @@ -1124,8 +1124,12 @@ static void frag_stop(struct seq_file *m, void *arg) { } -/* Walk all the zones in a node and print using a callback */ +/* + * Walk zones in a node and print using a callback. + * If @assert_populated is true, only use callback for zones that are populated. + */ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, + bool assert_populated, void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) { struct zone *zone; @@ -1133,7 +1137,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, unsigned long flags; for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { - if (!populated_zone(zone)) + if (assert_populated && !populated_zone(zone)) continue; spin_lock_irqsave(&zone->lock, flags); @@ -1161,7 +1165,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, static int frag_show(struct seq_file *m, void *arg) { pg_data_t *pgdat = (pg_data_t *)arg; - walk_zones_in_node(m, pgdat, frag_show_print); + walk_zones_in_node(m, pgdat, true, frag_show_print); return 0; } @@ -1202,7 +1206,7 @@ static int pagetypeinfo_showfree(struct seq_file *m, void *arg) seq_printf(m, "%6d ", order); seq_putc(m, '\n'); - walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print); + walk_zones_in_node(m, pgdat, true, pagetypeinfo_showfree_print); return 0; } @@ -1254,7 +1258,7 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg) for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) seq_printf(m, "%12s ", migratetype_names[mtype]); seq_putc(m, '\n'); - walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print); + walk_zones_in_node(m, pgdat, true, pagetypeinfo_showblockcount_print); return 0; } @@ -1280,7 +1284,7 @@ static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) seq_printf(m, "%12s ", migratetype_names[mtype]); seq_putc(m, '\n'); - walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print); + walk_zones_in_node(m, pgdat, true, pagetypeinfo_showmixedcount_print); #endif /* CONFIG_PAGE_OWNER */ } @@ -1355,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) return zone == compare; } - /* The zone must be somewhere! */ - WARN_ON_ONCE(1); return false; } @@ -1378,7 +1380,6 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n min %lu" "\n low %lu" "\n high %lu" - "\n node_scanned %lu" "\n spanned %lu" "\n present %lu" "\n managed %lu", @@ -1386,23 +1387,28 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), - node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED), zone->spanned_pages, zone->present_pages, zone->managed_pages); - for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) - seq_printf(m, "\n %-12s %lu", vmstat_text[i], - zone_page_state(zone, i)); - seq_printf(m, "\n protection: (%ld", zone->lowmem_reserve[0]); for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) seq_printf(m, ", %ld", zone->lowmem_reserve[i]); - seq_printf(m, - ")" - "\n pagesets"); + seq_putc(m, ')'); + + /* If unpopulated, no other information is useful */ + if (!populated_zone(zone)) { + seq_putc(m, '\n'); + return; + } + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + seq_printf(m, "\n %-12s %lu", vmstat_text[i], + zone_page_state(zone, i)); + + seq_printf(m, "\n pagesets"); for_each_online_cpu(i) { struct per_cpu_pageset *pageset; @@ -1425,19 +1431,22 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n node_unreclaimable: %u" "\n start_pfn: %lu" "\n node_inactive_ratio: %u", - !pgdat_reclaimable(zone->zone_pgdat), + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, zone->zone_start_pfn, zone->zone_pgdat->inactive_ratio); seq_putc(m, '\n'); } /* - * Output information about zones in @pgdat. + * Output information about zones in @pgdat. All zones are printed regardless + * of whether they are populated or not: lowmem_reserve_ratio operates on the + * set of all zones and userspace would not be aware of such zones if they are + * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio). */ static int zoneinfo_show(struct seq_file *m, void *arg) { pg_data_t *pgdat = (pg_data_t *)arg; - walk_zones_in_node(m, pgdat, zoneinfo_show_print); + walk_zones_in_node(m, pgdat, false, zoneinfo_show_print); return 0; } @@ -1586,22 +1595,9 @@ int vmstat_refresh(struct ctl_table *table, int write, for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { val = atomic_long_read(&vm_zone_stat[i]); if (val < 0) { - switch (i) { - case NR_PAGES_SCANNED: - /* - * This is often seen to go negative in - * recent kernels, but not to go permanently - * negative. Whilst it would be nicer not to - * have exceptions, rooting them out would be - * another task, of rather low priority. - */ - break; - default: - pr_warn("%s: %s %ld\n", - __func__, vmstat_text[i], val); - err = -EINVAL; - break; - } + pr_warn("%s: %s %ld\n", + __func__, vmstat_text[i], val); + err = -EINVAL; } } if (err) @@ -1768,8 +1764,7 @@ void __init init_mm_internals(void) { int ret __maybe_unused; - mm_percpu_wq = alloc_workqueue("mm_percpu_wq", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); #ifdef CONFIG_SMP ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", @@ -1857,7 +1852,7 @@ static int unusable_show(struct seq_file *m, void *arg) if (!node_state(pgdat->node_id, N_MEMORY)) return 0; - walk_zones_in_node(m, pgdat, unusable_show_print); + walk_zones_in_node(m, pgdat, true, unusable_show_print); return 0; } @@ -1909,7 +1904,7 @@ static int extfrag_show(struct seq_file *m, void *arg) { pg_data_t *pgdat = (pg_data_t *)arg; - walk_zones_in_node(m, pgdat, extfrag_show_print); + walk_zones_in_node(m, pgdat, true, extfrag_show_print); return 0; } diff --git a/mm/workingset.c b/mm/workingset.c index eda05c71fa49e6e1e4f93a4029ddef04a4f8ab4c..b8c9ab6784795a0e7f8eff82a9a071d481e91a5f 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -269,7 +269,6 @@ bool workingset_refault(void *shadow) lruvec = mem_cgroup_lruvec(pgdat, memcg); refault = atomic_long_read(&lruvec->inactive_age); active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); - rcu_read_unlock(); /* * The unsigned subtraction here gives an accurate distance @@ -290,11 +289,15 @@ bool workingset_refault(void *shadow) refault_distance = (refault - eviction) & EVICTION_MASK; inc_node_state(pgdat, WORKINGSET_REFAULT); + inc_memcg_state(memcg, WORKINGSET_REFAULT); if (refault_distance <= active_file) { inc_node_state(pgdat, WORKINGSET_ACTIVATE); + inc_memcg_state(memcg, WORKINGSET_ACTIVATE); + rcu_read_unlock(); return true; } + rcu_read_unlock(); return false; } @@ -472,6 +475,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->exceptional)) goto out_invalid; inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); + inc_memcg_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); __radix_tree_delete_node(&mapping->page_tree, node, workingset_update_node, mapping); diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 5945f7e19c679cb882a5a5716a1de33bed50a8f1..40d3d72beb5384efee121bd4319a02b6d0821522 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -23,10 +23,18 @@ int lowpan_register_netdevice(struct net_device *dev, { int i, ret; - dev->addr_len = EUI64_ADDR_LEN; + switch (lltype) { + case LOWPAN_LLTYPE_IEEE802154: + dev->addr_len = EUI64_ADDR_LEN; + break; + + case LOWPAN_LLTYPE_BTLE: + dev->addr_len = ETH_ALEN; + break; + } + dev->type = ARPHRD_6LOWPAN; dev->mtu = IPV6_MIN_MTU; - dev->priv_flags |= IFF_NO_QUEUE; lowpan_dev(dev)->lltype = lltype; diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 79f1fa22509a350b35340897c1cb137357734713..6b1042e216565a85ecc1b1e6301172750c07a8ae 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -278,6 +278,23 @@ lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev, return ret; } +static void lowpan_iphc_uncompress_lladdr(const struct net_device *dev, + struct in6_addr *ipaddr, + const void *lladdr) +{ + switch (dev->addr_len) { + case ETH_ALEN: + lowpan_iphc_uncompress_eui48_lladdr(ipaddr, lladdr); + break; + case EUI64_ADDR_LEN: + lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + break; + default: + WARN_ON_ONCE(1); + break; + } +} + /* Uncompress address function for source and * destination address(non-multicast). * @@ -320,7 +337,7 @@ static int lowpan_iphc_uncompress_addr(struct sk_buff *skb, lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_lladdr(dev, ipaddr, lladdr); break; } break; @@ -381,7 +398,7 @@ static int lowpan_iphc_uncompress_ctx_addr(struct sk_buff *skb, lowpan_iphc_uncompress_802154_lladdr(ipaddr, lladdr); break; default: - lowpan_iphc_uncompress_eui64_lladdr(ipaddr, lladdr); + lowpan_iphc_uncompress_lladdr(dev, ipaddr, lladdr); break; } ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen); @@ -666,6 +683,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) { case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC: + skb->pkt_type = PACKET_BROADCAST; + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); if (!ci) { @@ -681,11 +700,15 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); break; case LOWPAN_IPHC_M: + skb->pkt_type = PACKET_BROADCAST; + /* multicast */ err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr, iphc1 & LOWPAN_IPHC_DAM_MASK); break; case LOWPAN_IPHC_DAC: + skb->pkt_type = PACKET_HOST; + spin_lock_bh(&lowpan_dev(dev)->ctx.lock); ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid)); if (!ci) { @@ -701,6 +724,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev, spin_unlock_bh(&lowpan_dev(dev)->ctx.lock); break; default: + skb->pkt_type = PACKET_HOST; + err = lowpan_iphc_uncompress_addr(skb, dev, &hdr.daddr, iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); @@ -802,6 +827,21 @@ lowpan_iphc_compress_ctx_802154_lladdr(const struct in6_addr *ipaddr, return lladdr_compress; } +static bool lowpan_iphc_addr_equal(const struct net_device *dev, + const struct lowpan_iphc_ctx *ctx, + const struct in6_addr *ipaddr, + const void *lladdr) +{ + struct in6_addr tmp = {}; + + lowpan_iphc_uncompress_lladdr(dev, &tmp, lladdr); + + if (ctx) + ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen); + + return ipv6_addr_equal(&tmp, ipaddr); +} + static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev, const struct in6_addr *ipaddr, const struct lowpan_iphc_ctx *ctx, @@ -819,13 +859,7 @@ static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct net_device *dev, } break; default: - /* check for SAM/DAM = 11 */ - memcpy(&tmp.s6_addr[8], lladdr, EUI64_ADDR_LEN); - /* second bit-flip (Universe/Local) is done according RFC2464 */ - tmp.s6_addr[8] ^= 0x02; - /* context information are always used */ - ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen); - if (ipv6_addr_equal(&tmp, ipaddr)) { + if (lowpan_iphc_addr_equal(dev, ctx, ipaddr, lladdr)) { dam = LOWPAN_IPHC_DAM_11; goto out; } @@ -921,11 +955,12 @@ static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct net_device *dev, } break; default: - if (is_addr_mac_addr_based(ipaddr, lladdr)) { - dam = LOWPAN_IPHC_DAM_11; /* 0-bits */ + if (lowpan_iphc_addr_equal(dev, NULL, ipaddr, lladdr)) { + dam = LOWPAN_IPHC_DAM_11; pr_debug("address compression 0 bits\n"); goto out; } + break; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e97ab824e368cc16f9609acd70d5337866eb2936..abc5f400fc71f2f57f3a029d1c196890ee4e39e0 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -562,8 +562,7 @@ static int vlan_dev_init(struct net_device *dev) NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; - dev->features |= real_dev->vlan_features | NETIF_F_LLTX | - NETIF_F_GSO_SOFTWARE; + dev->features |= dev->hw_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; dev->gso_max_segs = real_dev->gso_max_segs; if (dev->features & NETIF_F_VLAN_FEATURES) @@ -627,11 +626,18 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; + netdev_features_t lower_features; - features = netdev_intersect_features(features, real_dev->vlan_features); - features |= NETIF_F_RXCSUM; - features = netdev_intersect_features(features, real_dev->features); + lower_features = netdev_intersect_features((real_dev->vlan_features | + NETIF_F_RXCSUM), + real_dev->features); + /* Add HW_CSUM setting to preserve user ability to control + * checksum offload on the vlan device. + */ + if (lower_features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + lower_features |= NETIF_F_HW_CSUM; + features = netdev_intersect_features(features, lower_features); features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE); features |= NETIF_F_LLTX; @@ -807,7 +813,6 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; - free_netdev(dev); } void vlan_setup(struct net_device *dev) @@ -820,7 +825,8 @@ void vlan_setup(struct net_device *dev) netif_keep_dst(dev); dev->netdev_ops = &vlan_netdev_ops; - dev->destructor = vlan_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; dev->min_mtu = 0; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 1270207f3d7c9dd6fde0e1f7839acfe18a4d82c9..9c94aad153b308d4ca3e1fb098f045365895a218 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -35,7 +35,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr) { if (!attr) return 0; - return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy); + return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy, + NULL); } static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) diff --git a/net/9p/Kconfig b/net/9p/Kconfig index a75174a337237d5f113ca8c1616a01a46e72ac59..e6014e0e51f7a212dd48cdefcb26dffb44b2fba4 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -22,6 +22,15 @@ config NET_9P_VIRTIO This builds support for a transports between guest partitions and a host partition. +config NET_9P_XEN + depends on XEN + select XEN_XENBUS_FRONTEND + tristate "9P Xen Transport" + help + This builds support for a transport for 9pfs between + two Xen domains. + + config NET_9P_RDMA depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS tristate "9P RDMA Transport (Experimental)" diff --git a/net/9p/Makefile b/net/9p/Makefile index a0874cc1f718bcadbb604458127bebcf5664a2c5..697ea7caf46605d2b015200fb1dd126c92caaa30 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_NET_9P) := 9pnet.o +obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o @@ -14,5 +15,8 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o 9pnet_virtio-objs := \ trans_virtio.o \ +9pnet_xen-objs := \ + trans_xen.o \ + 9pnet_rdma-objs := \ trans_rdma.o \ diff --git a/net/9p/client.c b/net/9p/client.c index 3ce672af1596cfdb8fbd67ce558366e7997b7695..1218fb3b52dad115e4343c465ea891aed38d34c0 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -592,9 +592,8 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, ename = &req->rc->sdata[req->rc->offset]; if (len > inline_len) { /* We have error in external buffer */ - err = copy_from_iter(ename + inline_len, - len - inline_len, uidata); - if (err != len - inline_len) { + if (!copy_from_iter_full(ename + inline_len, + len - inline_len, uidata)) { err = -EFAULT; goto out_err; } @@ -2101,6 +2100,10 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } + if (rsize < count) { + pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize); + count = rsize; + } p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 16d28756598789504f9a30a07476256a2146b2e3..16e10680518c4cba784296ff96ddcc2643cabf82 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -74,7 +74,7 @@ pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size) { size_t len = min(pdu->capacity - pdu->size, size); struct iov_iter i = *from; - if (copy_from_iter(&pdu->sdata[pdu->size], len, &i) != len) + if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, &i)) len = 0; pdu->size += len; diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c new file mode 100644 index 0000000000000000000000000000000000000000..6ad3e043c6174ae97a82525688988e740d87fd29 --- /dev/null +++ b/net/9p/trans_xen.c @@ -0,0 +1,545 @@ +/* + * linux/fs/9p/trans_xen + * + * Xen transport layer. + * + * Copyright (C) 2017 by Stefano Stabellini + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define XEN_9PFS_NUM_RINGS 2 +#define XEN_9PFS_RING_ORDER 6 +#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER) + +struct xen_9pfs_header { + uint32_t size; + uint8_t id; + uint16_t tag; + + /* uint8_t sdata[]; */ +} __attribute__((packed)); + +/* One per ring, more than one per 9pfs share */ +struct xen_9pfs_dataring { + struct xen_9pfs_front_priv *priv; + + struct xen_9pfs_data_intf *intf; + grant_ref_t ref; + int evtchn; + int irq; + /* protect a ring from concurrent accesses */ + spinlock_t lock; + + struct xen_9pfs_data data; + wait_queue_head_t wq; + struct work_struct work; +}; + +/* One per 9pfs share */ +struct xen_9pfs_front_priv { + struct list_head list; + struct xenbus_device *dev; + char *tag; + struct p9_client *client; + + int num_rings; + struct xen_9pfs_dataring *rings; +}; + +static LIST_HEAD(xen_9pfs_devs); +static DEFINE_RWLOCK(xen_9pfs_lock); + +/* We don't currently allow canceling of requests */ +static int p9_xen_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +static int p9_xen_create(struct p9_client *client, const char *addr, char *args) +{ + struct xen_9pfs_front_priv *priv; + + read_lock(&xen_9pfs_lock); + list_for_each_entry(priv, &xen_9pfs_devs, list) { + if (!strcmp(priv->tag, addr)) { + priv->client = client; + read_unlock(&xen_9pfs_lock); + return 0; + } + } + read_unlock(&xen_9pfs_lock); + return -EINVAL; +} + +static void p9_xen_close(struct p9_client *client) +{ + struct xen_9pfs_front_priv *priv; + + read_lock(&xen_9pfs_lock); + list_for_each_entry(priv, &xen_9pfs_devs, list) { + if (priv->client == client) { + priv->client = NULL; + read_unlock(&xen_9pfs_lock); + return; + } + } + read_unlock(&xen_9pfs_lock); +} + +static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size) +{ + RING_IDX cons, prod; + + cons = ring->intf->out_cons; + prod = ring->intf->out_prod; + virt_mb(); + + return XEN_9PFS_RING_SIZE - + xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size; +} + +static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) +{ + struct xen_9pfs_front_priv *priv = NULL; + RING_IDX cons, prod, masked_cons, masked_prod; + unsigned long flags; + u32 size = p9_req->tc->size; + struct xen_9pfs_dataring *ring; + int num; + + read_lock(&xen_9pfs_lock); + list_for_each_entry(priv, &xen_9pfs_devs, list) { + if (priv->client == client) + break; + } + read_unlock(&xen_9pfs_lock); + if (!priv || priv->client != client) + return -EINVAL; + + num = p9_req->tc->tag % priv->num_rings; + ring = &priv->rings[num]; + +again: + while (wait_event_interruptible(ring->wq, + p9_xen_write_todo(ring, size)) != 0) + ; + + spin_lock_irqsave(&ring->lock, flags); + cons = ring->intf->out_cons; + prod = ring->intf->out_prod; + virt_mb(); + + if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons, + XEN_9PFS_RING_SIZE) < size) { + spin_unlock_irqrestore(&ring->lock, flags); + goto again; + } + + masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + + xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size, + &masked_prod, masked_cons, XEN_9PFS_RING_SIZE); + + p9_req->status = REQ_STATUS_SENT; + virt_wmb(); /* write ring before updating pointer */ + prod += size; + ring->intf->out_prod = prod; + spin_unlock_irqrestore(&ring->lock, flags); + notify_remote_via_irq(ring->irq); + + return 0; +} + +static void p9_xen_response(struct work_struct *work) +{ + struct xen_9pfs_front_priv *priv; + struct xen_9pfs_dataring *ring; + RING_IDX cons, prod, masked_cons, masked_prod; + struct xen_9pfs_header h; + struct p9_req_t *req; + int status; + + ring = container_of(work, struct xen_9pfs_dataring, work); + priv = ring->priv; + + while (1) { + cons = ring->intf->in_cons; + prod = ring->intf->in_prod; + virt_rmb(); + + if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) < + sizeof(h)) { + notify_remote_via_irq(ring->irq); + return; + } + + masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + + /* First, read just the header */ + xen_9pfs_read_packet(&h, ring->data.in, sizeof(h), + masked_prod, &masked_cons, + XEN_9PFS_RING_SIZE); + + req = p9_tag_lookup(priv->client, h.tag); + if (!req || req->status != REQ_STATUS_SENT) { + dev_warn(&priv->dev->dev, "Wrong req tag=%x\n", h.tag); + cons += h.size; + virt_mb(); + ring->intf->in_cons = cons; + continue; + } + + memcpy(req->rc, &h, sizeof(h)); + req->rc->offset = 0; + + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + /* Then, read the whole packet (including the header) */ + xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size, + masked_prod, &masked_cons, + XEN_9PFS_RING_SIZE); + + virt_mb(); + cons += h.size; + ring->intf->in_cons = cons; + + status = (req->status != REQ_STATUS_ERROR) ? + REQ_STATUS_RCVD : REQ_STATUS_ERROR; + + p9_client_cb(priv->client, req, status); + } +} + +static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r) +{ + struct xen_9pfs_dataring *ring = r; + + if (!ring || !ring->priv->client) { + /* ignore spurious interrupt */ + return IRQ_HANDLED; + } + + wake_up_interruptible(&ring->wq); + schedule_work(&ring->work); + + return IRQ_HANDLED; +} + +static struct p9_trans_module p9_xen_trans = { + .name = "xen", + .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT), + .def = 1, + .create = p9_xen_create, + .close = p9_xen_close, + .request = p9_xen_request, + .cancel = p9_xen_cancel, + .owner = THIS_MODULE, +}; + +static const struct xenbus_device_id xen_9pfs_front_ids[] = { + { "9pfs" }, + { "" } +}; + +static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) +{ + int i, j; + + write_lock(&xen_9pfs_lock); + list_del(&priv->list); + write_unlock(&xen_9pfs_lock); + + for (i = 0; i < priv->num_rings; i++) { + if (!priv->rings[i].intf) + break; + if (priv->rings[i].irq > 0) + unbind_from_irqhandler(priv->rings[i].irq, priv->dev); + if (priv->rings[i].data.in) { + for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) { + grant_ref_t ref; + + ref = priv->rings[i].intf->ref[j]; + gnttab_end_foreign_access(ref, 0, 0); + } + free_pages((unsigned long)priv->rings[i].data.in, + XEN_9PFS_RING_ORDER - + (PAGE_SHIFT - XEN_PAGE_SHIFT)); + } + gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); + free_page((unsigned long)priv->rings[i].intf); + } + kfree(priv->rings); + kfree(priv->tag); + kfree(priv); +} + +static int xen_9pfs_front_remove(struct xenbus_device *dev) +{ + struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); + + dev_set_drvdata(&dev->dev, NULL); + xen_9pfs_front_free(priv); + return 0; +} + +static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, + struct xen_9pfs_dataring *ring) +{ + int i = 0; + int ret = -ENOMEM; + void *bytes = NULL; + + init_waitqueue_head(&ring->wq); + spin_lock_init(&ring->lock); + INIT_WORK(&ring->work, p9_xen_response); + + ring->intf = (struct xen_9pfs_data_intf *)get_zeroed_page(GFP_KERNEL); + if (!ring->intf) + return ret; + ret = gnttab_grant_foreign_access(dev->otherend_id, + virt_to_gfn(ring->intf), 0); + if (ret < 0) + goto out; + ring->ref = ret; + bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + if (!bytes) { + ret = -ENOMEM; + goto out; + } + for (; i < (1 << XEN_9PFS_RING_ORDER); i++) { + ret = gnttab_grant_foreign_access( + dev->otherend_id, virt_to_gfn(bytes) + i, 0); + if (ret < 0) + goto out; + ring->intf->ref[i] = ret; + } + ring->intf->ring_order = XEN_9PFS_RING_ORDER; + ring->data.in = bytes; + ring->data.out = bytes + XEN_9PFS_RING_SIZE; + + ret = xenbus_alloc_evtchn(dev, &ring->evtchn); + if (ret) + goto out; + ring->irq = bind_evtchn_to_irqhandler(ring->evtchn, + xen_9pfs_front_event_handler, + 0, "xen_9pfs-frontend", ring); + if (ring->irq >= 0) + return 0; + + xenbus_free_evtchn(dev, ring->evtchn); + ret = ring->irq; +out: + if (bytes) { + for (i--; i >= 0; i--) + gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); + free_pages((unsigned long)bytes, + XEN_9PFS_RING_ORDER - + (PAGE_SHIFT - XEN_PAGE_SHIFT)); + } + gnttab_end_foreign_access(ring->ref, 0, 0); + free_page((unsigned long)ring->intf); + return ret; +} + +static int xen_9pfs_front_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int ret, i; + struct xenbus_transaction xbt; + struct xen_9pfs_front_priv *priv = NULL; + char *versions; + unsigned int max_rings, max_ring_order, len = 0; + + versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); + if (!len) + return -EINVAL; + if (strcmp(versions, "1")) { + kfree(versions); + return -EINVAL; + } + kfree(versions); + max_rings = xenbus_read_unsigned(dev->otherend, "max-rings", 0); + if (max_rings < XEN_9PFS_NUM_RINGS) + return -EINVAL; + max_ring_order = xenbus_read_unsigned(dev->otherend, + "max-ring-page-order", 0); + if (max_ring_order < XEN_9PFS_RING_ORDER) + return -EINVAL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = dev; + priv->num_rings = XEN_9PFS_NUM_RINGS; + priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings), + GFP_KERNEL); + if (!priv->rings) { + kfree(priv); + return -ENOMEM; + } + + for (i = 0; i < priv->num_rings; i++) { + priv->rings[i].priv = priv; + ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]); + if (ret < 0) + goto error; + } + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + goto error; + } + ret = xenbus_printf(xbt, dev->nodename, "version", "%u", 1); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "num-rings", "%u", + priv->num_rings); + if (ret) + goto error_xenbus; + for (i = 0; i < priv->num_rings; i++) { + char str[16]; + + BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9); + sprintf(str, "ring-ref%u", i); + ret = xenbus_printf(xbt, dev->nodename, str, "%d", + priv->rings[i].ref); + if (ret) + goto error_xenbus; + + sprintf(str, "event-channel-%u", i); + ret = xenbus_printf(xbt, dev->nodename, str, "%u", + priv->rings[i].evtchn); + if (ret) + goto error_xenbus; + } + priv->tag = xenbus_read(xbt, dev->nodename, "tag", NULL); + if (IS_ERR(priv->tag)) { + ret = PTR_ERR(priv->tag); + goto error_xenbus; + } + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + goto error; + } + + write_lock(&xen_9pfs_lock); + list_add_tail(&priv->list, &xen_9pfs_devs); + write_unlock(&xen_9pfs_lock); + dev_set_drvdata(&dev->dev, priv); + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + error: + dev_set_drvdata(&dev->dev, NULL); + xen_9pfs_front_free(priv); + return ret; +} + +static int xen_9pfs_front_resume(struct xenbus_device *dev) +{ + dev_warn(&dev->dev, "suspsend/resume unsupported\n"); + return 0; +} + +static void xen_9pfs_front_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + switch (backend_state) { + case XenbusStateReconfiguring: + case XenbusStateReconfigured: + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateUnknown: + break; + + case XenbusStateInitWait: + break; + + case XenbusStateConnected: + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosed: + if (dev->state == XenbusStateClosed) + break; + /* Missed the backend's CLOSING state -- fallthrough */ + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static struct xenbus_driver xen_9pfs_front_driver = { + .ids = xen_9pfs_front_ids, + .probe = xen_9pfs_front_probe, + .remove = xen_9pfs_front_remove, + .resume = xen_9pfs_front_resume, + .otherend_changed = xen_9pfs_front_changed, +}; + +static int p9_trans_xen_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + pr_info("Initialising Xen transport for 9pfs\n"); + + v9fs_register_trans(&p9_xen_trans); + return xenbus_register_frontend(&xen_9pfs_front_driver); +} +module_init(p9_trans_xen_init); + +static void p9_trans_xen_exit(void) +{ + v9fs_unregister_trans(&p9_xen_trans); + return xenbus_unregister_driver(&xen_9pfs_front_driver); +} +module_exit(p9_trans_xen_exit); diff --git a/net/Makefile b/net/Makefile index 9b681550e3a3ea3c6146ac67572b6c97a28c9d2c..9086ffbb508514c1e4fb1a5d2d04d6c6b1cf5bea 100644 --- a/net/Makefile +++ b/net/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ diff --git a/net/atm/clip.c b/net/atm/clip.c index 53b4ac09e7b7d5d6a57f049dc1653c961b052622..ec527b62f79db1a4712d6fd654d2f899255de8bc 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -106,7 +106,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc) entry->expires = jiffies - 1; /* force resolution or expiration */ error = neigh_update(entry->neigh, NULL, NUD_NONE, - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, 0); if (error) pr_crit("neigh_update failed with %d\n", error); goto out; @@ -481,7 +481,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) link_vcc(clip_vcc, entry); } error = neigh_update(neigh, llc_oui, NUD_PERMANENT, - NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); return error; } diff --git a/net/atm/common.c b/net/atm/common.c index 9613381f5db04e28ff66749706d1d61d82f77b88..f06422f4108d209fde356457c453164f2f4d7289 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -62,21 +62,16 @@ static void vcc_remove_socket(struct sock *sk) write_unlock_irq(&vcc_sklist_lock); } -static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size) +static bool vcc_tx_ready(struct atm_vcc *vcc, unsigned int size) { - struct sk_buff *skb; struct sock *sk = sk_atm(vcc); if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); - return NULL; + return false; } - while (!(skb = alloc_skb(size, GFP_KERNEL))) - schedule(); - pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); - return skb; + return true; } static void vcc_sock_destruct(struct sock *sk) @@ -606,7 +601,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) eff = (size+3) & ~3; /* align to word boundary */ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); error = 0; - while (!(skb = alloc_tx(vcc, eff))) { + while (!vcc_tx_ready(vcc, eff)) { if (m->msg_flags & MSG_DONTWAIT) { error = -EAGAIN; break; @@ -628,6 +623,15 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) finish_wait(sk_sleep(sk), &wait); if (error) goto out; + + skb = alloc_skb(eff, GFP_KERNEL); + if (!skb) { + error = -ENOMEM; + goto out; + } + pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); + atomic_add(skb->truesize, &sk->sk_wmem_alloc); + skb->dev = NULL; /* for paths shared with net_device interfaces */ ATM_SKB(skb)->atm_options = vcc->atm_options; if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 71343d0fec94b55f7318ec8578abc956148f7791..495ba7cdcb0451c997656116a300a49b7e43a089 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -679,15 +679,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_forw_packet *forw_packet_aggr; + struct sk_buff *skb; unsigned char *skb_buff; unsigned int skb_size; atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, - queue_left, bat_priv); - if (!forw_packet_aggr) - return; - if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) skb_size = BATADV_MAX_AGGREGATION_BYTES; @@ -696,9 +692,14 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb_size += ETH_HLEN; - forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); - if (!forw_packet_aggr->skb) { - batadv_forw_packet_free(forw_packet_aggr, true); + skb = netdev_alloc_skb_ip_align(NULL, skb_size); + if (!skb) + return; + + forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, + queue_left, bat_priv, skb); + if (!forw_packet_aggr) { + kfree_skb(skb); return; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ba8420d8a992db2c14936f568f761869afd921c0..d07e89ec84677d22a74891406ab6fa5ee6087011 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): CLAIM %pM on vid %d\n", mac, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -404,7 +404,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -413,7 +413,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, BATADV_PRINT_VID(vid)); + ethhdr->h_source, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -425,14 +425,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_LOOPDETECT: ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; } @@ -475,9 +475,9 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) batadv_info(bat_priv->soft_iface, "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n", - BATADV_PRINT_VID(backbone_gw->vid)); + batadv_print_vid(backbone_gw->vid)); snprintf(vid_str, sizeof(vid_str), "%d", - BATADV_PRINT_VID(backbone_gw->vid)); + batadv_print_vid(backbone_gw->vid)); vid_str[sizeof(vid_str) - 1] = 0; batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT, @@ -510,7 +510,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, BATADV_PRINT_VID(vid)); + orig, batadv_print_vid(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -719,7 +719,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, BATADV_PRINT_VID(vid)); + mac, batadv_print_vid(vid)); kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, @@ -739,8 +739,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, goto claim_free_ref; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, BATADV_PRINT_VID(vid)); + "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n", + mac, batadv_print_vid(vid), backbone_gw->orig); remove_crc = true; } @@ -809,7 +809,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, BATADV_PRINT_VID(vid)); + mac, batadv_print_vid(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -849,7 +849,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - BATADV_PRINT_VID(vid), backbone_gw->orig, crc); + batadv_print_vid(vid), backbone_gw->orig, crc); spin_lock_bh(&backbone_gw->crc_lock); backbone_crc = backbone_gw->crc; @@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, - BATADV_PRINT_VID(backbone_gw->vid), + batadv_print_vid(backbone_gw->vid), backbone_crc, crc); batadv_bla_send_request(backbone_gw); @@ -904,7 +904,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - BATADV_PRINT_VID(vid), ethhdr->h_source); + batadv_print_vid(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return true; @@ -941,7 +941,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); + claim_addr, batadv_print_vid(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_put(backbone_gw); @@ -1161,7 +1161,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); if (ret < 2) @@ -1197,7 +1197,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); + ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); return true; } @@ -1295,10 +1295,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, goto skip; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): %pM, vid %d, time out\n", - claim->addr, claim->vid); + "bla_purge_claims(): timed out.\n"); purge_now: + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_purge_claims(): %pM, vid %d\n", + claim->addr, claim->vid); + batadv_handle_unclaim(bat_priv, primary_if, backbone_gw->orig, claim->addr, claim->vid); @@ -1846,6 +1849,13 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* possible optimization: race for a claim */ /* No claim exists yet, claim it for us! */ + + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n", + ethhdr->h_source, + batadv_is_my_client(bat_priv, + ethhdr->h_source, vid) ? + "yes" : "no"); batadv_handle_claim(bat_priv, primary_if, primary_if->net_dev->dev_addr, ethhdr->h_source, vid); @@ -1963,10 +1973,22 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* if yes, the client has roamed and we have * to unclaim it. */ - batadv_handle_unclaim(bat_priv, primary_if, - primary_if->net_dev->dev_addr, - ethhdr->h_source, vid); - goto allow; + if (batadv_has_timed_out(claim->lasttime, 100)) { + /* only unclaim if the last claim entry is + * older than 100 ms to make sure we really + * have a roaming client here. + */ + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n", + ethhdr->h_source); + batadv_handle_unclaim(bat_priv, primary_if, + primary_if->net_dev->dev_addr, + ethhdr->h_source, vid); + goto allow; + } else { + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n", + ethhdr->h_source); + goto handled; + } } /* check if it is a multicast/broadcast frame */ @@ -2042,7 +2064,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) backbone_crc = backbone_gw->crc; spin_unlock_bh(&backbone_gw->crc_lock); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", - claim->addr, BATADV_PRINT_VID(claim->vid), + claim->addr, batadv_print_vid(claim->vid), backbone_gw->orig, (is_own ? 'x' : ' '), backbone_crc); @@ -2274,7 +2296,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", backbone_gw->orig, - BATADV_PRINT_VID(backbone_gw->vid), secs, + batadv_print_vid(backbone_gw->vid), secs, msecs, backbone_crc); } rcu_read_unlock(); @@ -2449,3 +2471,52 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) return ret; } + +#ifdef CONFIG_BATMAN_ADV_DAT +/** + * batadv_bla_check_claim - check if address is claimed + * + * @bat_priv: the bat priv with all the soft interface information + * @addr: mac address of which the claim status is checked + * @vid: the VLAN ID + * + * addr is checked if this address is claimed by the local device itself. + * + * Return: true if bla is disabled or the mac is claimed by the device, + * false if the device addr is already claimed by another gateway + */ +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, + u8 *addr, unsigned short vid) +{ + struct batadv_bla_claim search_claim; + struct batadv_bla_claim *claim = NULL; + struct batadv_hard_iface *primary_if = NULL; + bool ret = true; + + if (!atomic_read(&bat_priv->bridge_loop_avoidance)) + return ret; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + return ret; + + /* First look if the mac address is claimed */ + ether_addr_copy(search_claim.addr, addr); + search_claim.vid = vid; + + claim = batadv_claim_hash_find(bat_priv, &search_claim); + + /* If there is a claim and we are not owner of the claim, + * return false. + */ + if (claim) { + if (!batadv_compare_eth(claim->backbone_gw->orig, + primary_if->net_dev->dev_addr)) + ret = false; + batadv_claim_put(claim); + } + + batadv_hardif_put(primary_if); + return ret; +} +#endif diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index e157986bd01cf989dc70c93bae7a4fdf17cf3c4f..234775748b8eae9477f802804f004e50d2a4840c 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -69,6 +69,10 @@ void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); +#ifdef CONFIG_BATMAN_ADV_DAT +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid); +#endif #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ @@ -145,6 +149,13 @@ static inline int batadv_bla_backbone_dump(struct sk_buff *msg, return -EOPNOTSUPP; } +static inline +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid) +{ + return true; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 1bfd1dbc2feba7bf6c16004ea8ca85ed6066c929..000ca2f113ab857b4969a95b8fe1134806b05f15 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -43,6 +43,7 @@ #include #include +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -330,7 +331,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, batadv_dbg(BATADV_DBG_DAT, bat_priv, "Entry updated: %pI4 %pM (vid: %d)\n", &dat_entry->ip, dat_entry->mac_addr, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); goto out; } @@ -356,7 +357,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, } batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n", - &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid)); + &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid)); out: if (dat_entry) @@ -835,7 +836,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n", &dat_entry->ip, dat_entry->mac_addr, - BATADV_PRINT_VID(dat_entry->vid), + batadv_print_vid(dat_entry->vid), last_seen_mins, last_seen_secs); } rcu_read_unlock(); @@ -1002,6 +1003,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; + struct net_device *soft_iface = bat_priv->soft_iface; int hdr_size = 0; unsigned short vid; @@ -1040,16 +1042,31 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, goto out; } + /* If BLA is enabled, only send ARP replies if we have claimed + * the destination for the ARP request or if no one else of + * the backbone gws belonging to our backbone has claimed the + * destination. + */ + if (!batadv_bla_check_claim(bat_priv, + dat_entry->mac_addr, vid)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Device %pM claimed by another backbone gw. Don't send ARP reply!", + dat_entry->mac_addr); + ret = true; + goto out; + } + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, dat_entry->mac_addr, hw_src, vid); if (!skb_new) goto out; - skb_new->protocol = eth_type_trans(skb_new, - bat_priv->soft_iface); - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size; + skb_new->protocol = eth_type_trans(skb_new, soft_iface); + + batadv_inc_counter(bat_priv, BATADV_CNT_RX); + batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES, + skb->len + ETH_HLEN + hdr_size); netif_rx(skb_new); batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); @@ -1188,6 +1205,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) { + struct batadv_dat_entry *dat_entry = NULL; u16 type; __be32 ip_src, ip_dst; u8 *hw_src, *hw_dst; @@ -1210,12 +1228,41 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, hw_dst = batadv_arp_hw_dst(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); + /* If ip_dst is already in cache and has the right mac address, + * drop this frame if this ARP reply is destined for us because it's + * most probably an ARP reply generated by another node of the DHT. + * We have most probably received already a reply earlier. Delivering + * this frame would lead to doubled receive of an ARP reply. + */ + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_src, vid); + if (dat_entry && batadv_compare_eth(hw_src, dat_entry->mac_addr)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, "Doubled ARP reply removed: ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]; dat_entry: %pM-%pI4\n", + hw_src, &ip_src, hw_dst, &ip_dst, + dat_entry->mac_addr, &dat_entry->ip); + dropped = true; + goto out; + } + /* Update our internal cache with both the IP addresses the node got * within the ARP reply */ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); + /* If BLA is enabled, only forward ARP replies if we have claimed the + * source of the ARP reply or if no one else of the same backbone has + * already claimed that client. This prevents that different gateways + * to the same backbone all forward the ARP reply leading to multiple + * replies in the backbone. + */ + if (!batadv_bla_check_claim(bat_priv, hw_src, vid)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Device %pM claimed by another backbone gw. Drop ARP reply.\n", + hw_src); + dropped = true; + goto out; + } + /* if this REPLY is directed to a client of mine, let's deliver the * packet to the interface */ @@ -1228,6 +1275,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, out: if (dropped) kfree_skb(skb); + if (dat_entry) + batadv_dat_entry_put(dat_entry); /* if dropped == false -> deliver to the interface */ return dropped; } @@ -1256,7 +1305,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, /* If this packet is an ARP_REQUEST and the node already has the * information that it is going to ask, then the packet can be dropped */ - if (forw_packet->num_packets) + if (batadv_forw_packet_is_rebroadcast(forw_packet)) goto out; vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size); diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 7a2b9f4da07830103a8f00e4c3b488b0367a9dc2..65ce97efa6b5946175f64d300573a532f28387f8 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -73,9 +73,10 @@ __printf(2, 3); /* possibly ratelimited debug output */ #define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ do { \ - if (atomic_read(&(bat_priv)->log_level) & (type) && \ + struct batadv_priv *__batpriv = (bat_priv); \ + if (atomic_read(&__batpriv->log_level) & (type) && \ (!(ratelimited) || net_ratelimit())) \ - batadv_debug_log(bat_priv, fmt, ## arg); \ + batadv_debug_log(__batpriv, fmt, ## arg); \ } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5000c540614d0c0a866857e5245ff3f365d14223..fb381fb26a66196942401ec6cb636c66e648a36a 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -516,6 +516,9 @@ static void batadv_recv_handler_init(void) BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12); BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8); + i = FIELD_SIZEOF(struct sk_buff, cb); + BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i); + /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 57a8103dbce7f00fb58ff5ae252e12a0f6453aec..810f7d026f544027991af1714c169342792e1a68 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.0" +#define BATADV_SOURCE_VERSION "2017.1" #endif /* B.A.T.M.A.N. parameters */ @@ -193,6 +193,7 @@ enum batadv_uev_type { #include #include +#include "packet.h" #include "types.h" struct net_device; @@ -200,8 +201,19 @@ struct packet_type; struct seq_file; struct sk_buff; -#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \ - (int)((vid) & VLAN_VID_MASK) : -1) +/** + * batadv_print_vid - return printable version of vid information + * @vid: the VLAN identifier + * + * Return: -1 when no VLAN is used, VLAN id otherwise + */ +static inline int batadv_print_vid(unsigned short vid) +{ + if (vid & BATADV_VLAN_HAS_TAG) + return (int)(vid & VLAN_VID_MASK); + else + return -1; +} extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 952ba81a565b611ee0a83fc0bbfb719b54187407..d327670641ac336a14f0ecc85dd848d4952e8e6e 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -494,9 +494,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) if (!bridged) goto update; -#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) - pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); -#endif + if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)) + pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); @@ -671,7 +670,6 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, return 0; } -#if IS_ENABLED(CONFIG_IPV6) /** * batadv_mcast_is_report_ipv6 - check for MLD reports * @skb: the ethernet frame destined for the mesh @@ -736,7 +734,6 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, return 0; } -#endif /** * batadv_mcast_forw_mode_check - check for optimized forwarding potential @@ -765,11 +762,12 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable); -#if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: + if (!IS_ENABLED(CONFIG_IPV6)) + return -EINVAL; + return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable); -#endif default: return -EINVAL; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7fd740b6e36dfb0e11c67c283cec68a5cfd1b5f6..ae9f4d37d34f07182c3d2527186a0999fe5b639d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -941,15 +941,17 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_unicast_4addr_packet *unicast_4addr_packet; - u8 *orig_addr; - struct batadv_orig_node *orig_node = NULL; + u8 *orig_addr, *orig_addr_gw; + struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - bool is4addr; + struct ethhdr *ethhdr; int ret = NET_RX_DROP; + bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + ethhdr = eth_hdr(skb); is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ @@ -972,6 +974,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { + /* If this is a unicast packet from another backgone gw, + * drop it. + */ + orig_addr_gw = ethhdr->h_source; + orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); + if (orig_node_gw) { + is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, + hdr_size); + batadv_orig_node_put(orig_node_gw); + if (is_gw) { + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n", + orig_addr_gw); + goto free_skb; + } + } + if (is4addr) { subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 1489ec27daff5548b072e88648f5cca192f74afa..403df596a73d28afa8b31b46a2c0649052ce3db0 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -482,6 +482,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, * @if_outgoing: The (optional) if_outgoing to be grabbed * @queue_left: The (optional) queue counter to decrease * @bat_priv: The bat_priv for the mesh of this forw_packet + * @skb: The raw packet this forwarding packet shall contain * * Allocates a forwarding packet and tries to get a reference to the * (optional) if_incoming, if_outgoing and queue_left. If queue_left @@ -493,7 +494,8 @@ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, - struct batadv_priv *bat_priv) + struct batadv_priv *bat_priv, + struct sk_buff *skb) { struct batadv_forw_packet *forw_packet; const char *qname; @@ -525,7 +527,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, INIT_HLIST_NODE(&forw_packet->list); INIT_HLIST_NODE(&forw_packet->cleanup_list); - forw_packet->skb = NULL; + forw_packet->skb = skb; forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; @@ -756,22 +758,23 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, if (!primary_if) goto err; + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) { + batadv_hardif_put(primary_if); + goto err; + } + forw_packet = batadv_forw_packet_alloc(primary_if, NULL, &bat_priv->bcast_queue_left, - bat_priv); + bat_priv, newskb); batadv_hardif_put(primary_if); if (!forw_packet) - goto err; - - newskb = skb_copy(skb, GFP_ATOMIC); - if (!newskb) goto err_packet_free; /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; bcast_packet->ttl--; - forw_packet->skb = newskb; forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, @@ -781,11 +784,60 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, return NETDEV_TX_OK; err_packet_free: - batadv_forw_packet_free(forw_packet, true); + kfree_skb(newskb); err: return NETDEV_TX_BUSY; } +/** + * batadv_forw_packet_bcasts_left - check if a retransmission is necessary + * @forw_packet: the forwarding packet to check + * @hard_iface: the interface to check on + * + * Checks whether a given packet has any (re)transmissions left on the provided + * interface. + * + * hard_iface may be NULL: In that case the number of transmissions this skb had + * so far is compared with the maximum amount of retransmissions independent of + * any interface instead. + * + * Return: True if (re)transmissions are left, false otherwise. + */ +static bool +batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet, + struct batadv_hard_iface *hard_iface) +{ + unsigned int max; + + if (hard_iface) + max = hard_iface->num_bcasts; + else + max = BATADV_NUM_BCASTS_MAX; + + return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max; +} + +/** + * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet + * @forw_packet: the packet to increase the counter for + */ +static void +batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet) +{ + BATADV_SKB_CB(forw_packet->skb)->num_bcasts++; +} + +/** + * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions + * @forw_packet: the packet to check + * + * Return: True if this packet was transmitted before, false otherwise. + */ +bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet) +{ + return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0; +} + static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; @@ -826,7 +878,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; - if (forw_packet->num_packets >= hard_iface->num_bcasts) + if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface)) continue; if (forw_packet->own) { @@ -884,10 +936,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) } rcu_read_unlock(); - forw_packet->num_packets++; + batadv_forw_packet_bcasts_inc(forw_packet); /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { + if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) { batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time); return; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index f21166d1032360a1febe3cebdfc9ee0e9958bb9c..a16b34f473ef02e46e642b46d4b31f588d89ea67 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -34,11 +34,13 @@ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, - struct batadv_priv *bat_priv); + struct batadv_priv *bat_priv, + struct sk_buff *skb); bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l); void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time); +bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet); int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index d042c99af028e2083307de1ba8978f2061fee45d..10f7edfb176ebd49c680ff4132db87aa00d3f04e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -64,28 +64,6 @@ #include "sysfs.h" #include "translation-table.h" -static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); -static void batadv_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info); -static u32 batadv_get_msglevel(struct net_device *dev); -static void batadv_set_msglevel(struct net_device *dev, u32 value); -static u32 batadv_get_link(struct net_device *dev); -static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data); -static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data); -static int batadv_get_sset_count(struct net_device *dev, int stringset); - -static const struct ethtool_ops batadv_ethtool_ops = { - .get_settings = batadv_get_settings, - .get_drvinfo = batadv_get_drvinfo, - .get_msglevel = batadv_get_msglevel, - .set_msglevel = batadv_set_msglevel, - .get_link = batadv_get_link, - .get_strings = batadv_get_strings, - .get_ethtool_stats = batadv_get_ethtool_stats, - .get_sset_count = batadv_get_sset_count, -}; - int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; @@ -140,7 +118,7 @@ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct net_device_stats *stats = &bat_priv->stats; + struct net_device_stats *stats = &dev->stats; stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); @@ -230,6 +208,9 @@ static int batadv_interface_tx(struct sk_buff *skb, if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; + /* reset control block to avoid left overs from previous users */ + memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); + netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); @@ -947,6 +928,98 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_del_slave = batadv_softif_slave_del, }; +static void batadv_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); +} + +/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 + * Declare each description string in struct.name[] to get fixed sized buffer + * and compile time checking for strings longer than ETH_GSTRING_LEN. + */ +static const struct { + const char name[ETH_GSTRING_LEN]; +} batadv_counters_strings[] = { + { "tx" }, + { "tx_bytes" }, + { "tx_dropped" }, + { "rx" }, + { "rx_bytes" }, + { "forward" }, + { "forward_bytes" }, + { "mgmt_tx" }, + { "mgmt_tx_bytes" }, + { "mgmt_rx" }, + { "mgmt_rx_bytes" }, + { "frag_tx" }, + { "frag_tx_bytes" }, + { "frag_rx" }, + { "frag_rx_bytes" }, + { "frag_fwd" }, + { "frag_fwd_bytes" }, + { "tt_request_tx" }, + { "tt_request_rx" }, + { "tt_response_tx" }, + { "tt_response_rx" }, + { "tt_roam_adv_tx" }, + { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_DAT + { "dat_get_tx" }, + { "dat_get_rx" }, + { "dat_put_tx" }, + { "dat_put_rx" }, + { "dat_cached_reply_tx" }, +#endif +#ifdef CONFIG_BATMAN_ADV_NC + { "nc_code" }, + { "nc_code_bytes" }, + { "nc_recode" }, + { "nc_recode_bytes" }, + { "nc_buffer" }, + { "nc_decode" }, + { "nc_decode_bytes" }, + { "nc_decode_failed" }, + { "nc_sniffed" }, +#endif +}; + +static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, batadv_counters_strings, + sizeof(batadv_counters_strings)); +} + +static void batadv_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + int i; + + for (i = 0; i < BATADV_CNT_NUM; i++) + data[i] = batadv_sum_counter(bat_priv, i); +} + +static int batadv_get_sset_count(struct net_device *dev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return BATADV_CNT_NUM; + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops batadv_ethtool_ops = { + .get_drvinfo = batadv_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = batadv_get_strings, + .get_ethtool_stats = batadv_get_ethtool_stats, + .get_sset_count = batadv_get_sset_count, +}; + /** * batadv_softif_free - Deconstructor of batadv_soft_interface * @dev: Device to cleanup and remove @@ -961,8 +1034,6 @@ static void batadv_softif_free(struct net_device *dev) * netdev and its private data (bat_priv) */ rcu_barrier(); - - free_netdev(dev); } /** @@ -971,12 +1042,11 @@ static void batadv_softif_free(struct net_device *dev) */ static void batadv_softif_init_early(struct net_device *dev) { - struct batadv_priv *priv = netdev_priv(dev); - ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; - dev->destructor = batadv_softif_free; + dev->needs_free_netdev = true; + dev->priv_destructor = batadv_softif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL; dev->priv_flags |= IFF_NO_QUEUE; @@ -989,8 +1059,6 @@ static void batadv_softif_init_early(struct net_device *dev) eth_hw_addr_random(dev); dev->ethtool_ops = &batadv_ethtool_ops; - - memset(priv, 0, sizeof(*priv)); } struct net_device *batadv_softif_create(struct net *net, const char *name) @@ -1083,118 +1151,3 @@ struct rtnl_link_ops batadv_link_ops __read_mostly = { .setup = batadv_softif_init_early, .dellink = batadv_softif_destroy_netlink, }; - -/* ethtool */ -static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - cmd->supported = 0; - cmd->advertising = 0; - ethtool_cmd_speed_set(cmd, SPEED_10); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_TP; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; - - return 0; -} - -static void batadv_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); - strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); -} - -static u32 batadv_get_msglevel(struct net_device *dev) -{ - return -EOPNOTSUPP; -} - -static void batadv_set_msglevel(struct net_device *dev, u32 value) -{ -} - -static u32 batadv_get_link(struct net_device *dev) -{ - return 1; -} - -/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 - * Declare each description string in struct.name[] to get fixed sized buffer - * and compile time checking for strings longer than ETH_GSTRING_LEN. - */ -static const struct { - const char name[ETH_GSTRING_LEN]; -} batadv_counters_strings[] = { - { "tx" }, - { "tx_bytes" }, - { "tx_dropped" }, - { "rx" }, - { "rx_bytes" }, - { "forward" }, - { "forward_bytes" }, - { "mgmt_tx" }, - { "mgmt_tx_bytes" }, - { "mgmt_rx" }, - { "mgmt_rx_bytes" }, - { "frag_tx" }, - { "frag_tx_bytes" }, - { "frag_rx" }, - { "frag_rx_bytes" }, - { "frag_fwd" }, - { "frag_fwd_bytes" }, - { "tt_request_tx" }, - { "tt_request_rx" }, - { "tt_response_tx" }, - { "tt_response_rx" }, - { "tt_roam_adv_tx" }, - { "tt_roam_adv_rx" }, -#ifdef CONFIG_BATMAN_ADV_DAT - { "dat_get_tx" }, - { "dat_get_rx" }, - { "dat_put_tx" }, - { "dat_put_rx" }, - { "dat_cached_reply_tx" }, -#endif -#ifdef CONFIG_BATMAN_ADV_NC - { "nc_code" }, - { "nc_code_bytes" }, - { "nc_recode" }, - { "nc_recode_bytes" }, - { "nc_buffer" }, - { "nc_decode" }, - { "nc_decode_bytes" }, - { "nc_decode_failed" }, - { "nc_sniffed" }, -#endif -}; - -static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - if (stringset == ETH_SS_STATS) - memcpy(data, batadv_counters_strings, - sizeof(batadv_counters_strings)); -} - -static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - int i; - - for (i = 0; i < BATADV_CNT_NUM; i++) - data[i] = batadv_sum_counter(bat_priv, i); -} - -static int batadv_get_sset_count(struct net_device *dev, int stringset) -{ - if (stringset == ETH_SS_STATS) - return BATADV_CNT_NUM; - - return -EOPNOTSUPP; -} diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index c94ebdecdc3d123f71c5af89783623e58d2f323d..556f9a865ddfb5e488c65e9edd708fad187f56b1 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -873,8 +873,8 @@ static int batadv_tp_send(void *arg) /* something went wrong during the preparation/transmission */ if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: batadv_tp_send() cannot send packets (%d)\n", - err); + "Meter: %s() cannot send packets (%d)\n", + __func__, err); /* ensure nobody else tries to stop the thread now */ if (atomic_dec_and_test(&tp_vars->sending)) tp_vars->reason = err; @@ -979,7 +979,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, if (!tp_vars) { spin_unlock_bh(&bat_priv->tp_list_lock); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: batadv_tp_start cannot allocate list elements\n"); + "Meter: %s cannot allocate list elements\n", + __func__); batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, dst, bat_priv, session_cookie); return; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 6077a87d46f0f781ac72dcc3cc1d9f84c814ad19..e75b4937b497401284bc553182737a2f7af54605 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -617,7 +617,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(tt_global->common.vid), message); + batadv_print_vid(tt_global->common.vid), message); batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, &tt_global->common); @@ -671,7 +671,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Re-adding pending client %pM (vid: %d)\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); /* whatever the reason why the PENDING flag was set, * this is a client which was enqueued to be removed in * this orig_interval. Since it popped up again, the @@ -684,7 +684,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Roaming client %pM (vid: %d) came back to its original location\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); /* the ROAM flag is set because this client roamed away * and the node got a roaming_advertisement message. Now * that the client popped up again at its original @@ -716,7 +716,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (!vlan) { net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; @@ -724,7 +724,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", - addr, BATADV_PRINT_VID(vid), + addr, batadv_print_vid(vid), (u8)atomic_read(&bat_priv->tt.vn)); ether_addr_copy(tt_local->common.addr, addr); @@ -1097,7 +1097,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), ((tt_common_entry->flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), no_purge ? 'P' : '.', @@ -1296,7 +1296,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Local tt entry (%pM, vid: %d) pending to be removed: %s\n", tt_local_entry->common.addr, - BATADV_PRINT_VID(tt_local_entry->common.vid), message); + batadv_print_vid(tt_local_entry->common.vid), message); } /** @@ -1727,7 +1727,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (vid: %d, via %pM)\n", - common->addr, BATADV_PRINT_VID(common->vid), + common->addr, batadv_print_vid(common->vid), orig_node->orig); ret = true; @@ -1835,7 +1835,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, if (!vlan) { seq_printf(seq, " * Cannot retrieve VLAN %d for originator %pM\n", - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), best_entry->orig_node->orig); goto print_list; } @@ -1844,7 +1844,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, seq_printf(seq, " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '*', tt_global_entry->common.addr, - BATADV_PRINT_VID(tt_global_entry->common.vid), + batadv_print_vid(tt_global_entry->common.vid), best_entry->ttvn, best_entry->orig_node->orig, last_ttvn, vlan->tt.crc, ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), @@ -1867,7 +1867,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, if (!vlan) { seq_printf(seq, " + Cannot retrieve VLAN %d for originator %pM\n", - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), orig_entry->orig_node->orig); continue; } @@ -1876,7 +1876,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, seq_printf(seq, " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '+', tt_global_entry->common.addr, - BATADV_PRINT_VID(tt_global_entry->common.vid), + batadv_print_vid(tt_global_entry->common.vid), orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, vlan->tt.crc, ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), @@ -2213,7 +2213,7 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv, "Deleting %pM from global tt entry %pM (vid: %d): %s\n", orig_node->orig, tt_global_entry->common.addr, - BATADV_PRINT_VID(vid), message); + batadv_print_vid(vid), message); _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); } @@ -2253,12 +2253,13 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, /* its the last one, mark for roaming. */ tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = jiffies; - } else + } else { /* there is another entry, we can simply delete this * one and can still use the other one. */ batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, orig_node, message); + } } /** @@ -2314,10 +2315,11 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, /* local entry exists, case 2: client roamed to us. */ batadv_tt_global_del_orig_list(tt_global_entry); batadv_tt_global_free(bat_priv, tt_global_entry, message); - } else + } else { /* no local entry exists, case 1: check for roaming */ batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); + } out: if (tt_global_entry) @@ -2375,7 +2377,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(vid), message); + batadv_print_vid(vid), message); hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_put(tt_global); } @@ -2435,7 +2437,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(tt_global->common.vid), + batadv_print_vid(tt_global->common.vid), msg); hlist_del_rcu(&tt_common->hash_entry); @@ -3650,7 +3652,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n", - orig_node->orig, client, BATADV_PRINT_VID(vid)); + orig_node->orig, client, batadv_print_vid(vid)); batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); @@ -3773,7 +3775,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting local tt entry (%pM, vid: %d): pending\n", tt_common->addr, - BATADV_PRINT_VID(tt_common->vid)); + batadv_print_vid(tt_common->vid)); batadv_tt_local_size_dec(bat_priv, tt_common->vid); hlist_del_rcu(&tt_common->hash_entry); @@ -4017,7 +4019,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", - addr, BATADV_PRINT_VID(vid), orig_node->orig); + addr, batadv_print_vid(vid), orig_node->orig); ret = true; out: return ret; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 246f21b4973bc39d0678273ad831da1f5b7e0df3..ea43a64492479809fe6bdf95b436792078f50e9f 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1000,7 +1000,6 @@ struct batadv_priv_bat_v { * struct batadv_priv - per mesh interface data * @mesh_state: current status of the mesh (inactive/active/deactivating) * @soft_iface: net device which holds this struct as private data - * @stats: structure holding the data for the ndo_get_stats() call * @bat_counters: mesh internal traffic statistic counters (see batadv_counters) * @aggregated_ogms: bool indicating whether OGM aggregation is enabled * @bonding: bool indicating whether traffic bonding is enabled @@ -1055,7 +1054,6 @@ struct batadv_priv_bat_v { struct batadv_priv { atomic_t mesh_state; struct net_device *soft_iface; - struct net_device_stats stats; u64 __percpu *bat_counters; /* Per cpu counters */ atomic_t aggregated_ogms; atomic_t bonding; @@ -1377,9 +1375,11 @@ struct batadv_nc_packet { * relevant to batman-adv in the skb->cb buffer in skbs. * @decoded: Marks a skb as decoded, which is checked when searching for coding * opportunities in network-coding.c + * @num_bcasts: Counter for broadcast packet retransmissions */ struct batadv_skb_cb { bool decoded; + unsigned int num_bcasts; }; /** @@ -1392,7 +1392,7 @@ struct batadv_skb_cb { * @skb: bcast packet's skb buffer * @packet_len: size of aggregated OGM packet inside the skb buffer * @direct_link_flags: direct link flags for aggregated OGM packets - * @num_packets: counter for bcast packet retransmission + * @num_packets: counter for aggregated OGMv1 packets * @delayed_work: work queue callback item for packet sending * @if_incoming: pointer to incoming hard-iface or primary iface if * locally generated packet diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index d491529332f4568b532759331bd8f88653573ba0..ab3b654b05cc87e19a965d0419efb7d45bd4b94e 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,6 @@ struct skb_cb { struct in6_addr addr; struct in6_addr gw; struct l2cap_chan *chan; - int status; }; #define lowpan_cb(skb) ((struct skb_cb *)((skb)->cb)) @@ -64,7 +64,7 @@ struct lowpan_peer { struct l2cap_chan *chan; /* peer addresses in various formats */ - unsigned char eui64_addr[EUI64_ADDR_LEN]; + unsigned char lladdr[ETH_ALEN]; struct in6_addr peer_addr; }; @@ -270,28 +270,20 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev) } static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, - struct l2cap_chan *chan) + struct lowpan_peer *peer) { - const u8 *saddr, *daddr; + const u8 *saddr; struct lowpan_btle_dev *dev; - struct lowpan_peer *peer; dev = lowpan_btle_dev(netdev); - rcu_read_lock(); - peer = __peer_lookup_chan(dev, chan); - rcu_read_unlock(); - if (!peer) - return -EINVAL; - - saddr = peer->eui64_addr; - daddr = dev->netdev->dev_addr; + saddr = peer->lladdr; - return lowpan_header_decompress(skb, netdev, daddr, saddr); + return lowpan_header_decompress(skb, netdev, netdev->dev_addr, saddr); } static int recv_pkt(struct sk_buff *skb, struct net_device *dev, - struct l2cap_chan *chan) + struct lowpan_peer *peer) { struct sk_buff *local_skb; int ret; @@ -344,8 +336,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->dev = dev; - ret = iphc_decompress(local_skb, dev, chan); + ret = iphc_decompress(local_skb, dev, peer); if (ret < 0) { + BT_DBG("iphc_decompress failed: %d", ret); kfree_skb(local_skb); goto drop; } @@ -365,6 +358,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, consume_skb(local_skb); consume_skb(skb); } else { + BT_DBG("unknown packet type"); goto drop; } @@ -390,7 +384,7 @@ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) if (!dev || !dev->netdev) return -ENOENT; - err = recv_pkt(skb, dev->netdev, chan); + err = recv_pkt(skb, dev->netdev, peer); if (err) { BT_DBG("recv pkt %d", err); err = -EAGAIN; @@ -399,37 +393,6 @@ static int chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) return err; } -static u8 get_addr_type_from_eui64(u8 byte) -{ - /* Is universal(0) or local(1) bit */ - return ((byte & 0x02) ? BDADDR_LE_RANDOM : BDADDR_LE_PUBLIC); -} - -static void copy_to_bdaddr(struct in6_addr *ip6_daddr, bdaddr_t *addr) -{ - u8 *eui64 = ip6_daddr->s6_addr + 8; - - addr->b[0] = eui64[7]; - addr->b[1] = eui64[6]; - addr->b[2] = eui64[5]; - addr->b[3] = eui64[2]; - addr->b[4] = eui64[1]; - addr->b[5] = eui64[0]; -} - -static void convert_dest_bdaddr(struct in6_addr *ip6_daddr, - bdaddr_t *addr, u8 *addr_type) -{ - copy_to_bdaddr(ip6_daddr, addr); - - /* We need to toggle the U/L bit that we got from IPv6 address - * so that we get the proper address and type of the BD address. - */ - addr->b[5] ^= 0x02; - - *addr_type = get_addr_type_from_eui64(addr->b[5]); -} - static int setup_header(struct sk_buff *skb, struct net_device *netdev, bdaddr_t *peer_addr, u8 *peer_addr_type) { @@ -437,8 +400,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev, struct ipv6hdr *hdr; struct lowpan_btle_dev *dev; struct lowpan_peer *peer; - bdaddr_t addr, *any = BDADDR_ANY; - u8 *daddr = any->b; + u8 *daddr; int err, status = 0; hdr = ipv6_hdr(skb); @@ -449,34 +411,24 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev, if (ipv6_addr_is_multicast(&ipv6_daddr)) { lowpan_cb(skb)->chan = NULL; + daddr = NULL; } else { - u8 addr_type; + BT_DBG("dest IP %pI6c", &ipv6_daddr); - /* Get destination BT device from skb. - * If there is no such peer then discard the packet. + /* The packet might be sent to 6lowpan interface + * because of routing (either via default route + * or user set route) so get peer according to + * the destination address. */ - convert_dest_bdaddr(&ipv6_daddr, &addr, &addr_type); - - BT_DBG("dest addr %pMR type %d IP %pI6c", &addr, - addr_type, &ipv6_daddr); - - peer = peer_lookup_ba(dev, &addr, addr_type); + peer = peer_lookup_dst(dev, &ipv6_daddr, skb); if (!peer) { - /* The packet might be sent to 6lowpan interface - * because of routing (either via default route - * or user set route) so get peer according to - * the destination address. - */ - peer = peer_lookup_dst(dev, &ipv6_daddr, skb); - if (!peer) { - BT_DBG("no such peer %pMR found", &addr); - return -ENOENT; - } + BT_DBG("no such peer"); + return -ENOENT; } - daddr = peer->eui64_addr; - *peer_addr = addr; - *peer_addr_type = addr_type; + daddr = peer->lladdr; + *peer_addr = peer->chan->dst; + *peer_addr_type = peer->chan->dst_type; lowpan_cb(skb)->chan = peer->chan; status = 1; @@ -527,15 +479,8 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb, return 0; } - if (!err) - err = lowpan_cb(skb)->status; - - if (err < 0) { - if (err == -EAGAIN) - netdev->stats.tx_dropped++; - else - netdev->stats.tx_errors++; - } + if (err < 0) + netdev->stats.tx_errors++; return err; } @@ -647,47 +592,19 @@ static void netdev_setup(struct net_device *dev) { dev->hard_header_len = 0; dev->needed_tailroom = 0; - dev->flags = IFF_RUNNING | IFF_POINTOPOINT | - IFF_MULTICAST; + dev->flags = IFF_RUNNING | IFF_MULTICAST; dev->watchdog_timeo = 0; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->netdev_ops = &netdev_ops; dev->header_ops = &header_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static struct device_type bt_type = { .name = "bluetooth", }; -static void set_addr(u8 *eui, u8 *addr, u8 addr_type) -{ - /* addr is the BT address in little-endian format */ - eui[0] = addr[5]; - eui[1] = addr[4]; - eui[2] = addr[3]; - eui[3] = 0xFF; - eui[4] = 0xFE; - eui[5] = addr[2]; - eui[6] = addr[1]; - eui[7] = addr[0]; - - /* Universal/local bit set, BT 6lowpan draft ch. 3.2.1 */ - if (addr_type == BDADDR_LE_PUBLIC) - eui[0] &= ~0x02; - else - eui[0] |= 0x02; - - BT_DBG("type %d addr %*phC", addr_type, 8, eui); -} - -static void set_dev_addr(struct net_device *netdev, bdaddr_t *addr, - u8 addr_type) -{ - netdev->addr_assign_type = NET_ADDR_PERM; - set_addr(netdev->dev_addr, addr->b, addr_type); -} - static void ifup(struct net_device *netdev) { int err; @@ -746,16 +663,9 @@ static struct l2cap_chan *chan_create(void) return chan; } -static void set_ip_addr_bits(u8 addr_type, u8 *addr) -{ - if (addr_type == BDADDR_LE_PUBLIC) - *addr |= 0x02; - else - *addr &= ~0x02; -} - static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, - struct lowpan_btle_dev *dev) + struct lowpan_btle_dev *dev, + bool new_netdev) { struct lowpan_peer *peer; @@ -766,19 +676,9 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, peer->chan = chan; memset(&peer->peer_addr, 0, sizeof(struct in6_addr)); - /* RFC 2464 ch. 5 */ - peer->peer_addr.s6_addr[0] = 0xFE; - peer->peer_addr.s6_addr[1] = 0x80; - set_addr((u8 *)&peer->peer_addr.s6_addr + 8, chan->dst.b, - chan->dst_type); - - memcpy(&peer->eui64_addr, (u8 *)&peer->peer_addr.s6_addr + 8, - EUI64_ADDR_LEN); + baswap((void *)peer->lladdr, &chan->dst); - /* IPv6 address needs to have the U/L bit set properly so toggle - * it back here. - */ - set_ip_addr_bits(chan->dst_type, (u8 *)&peer->peer_addr.s6_addr + 8); + lowpan_iphc_uncompress_eui48_lladdr(&peer->peer_addr, peer->lladdr); spin_lock(&devices_lock); INIT_LIST_HEAD(&peer->list); @@ -786,7 +686,8 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan, spin_unlock(&devices_lock); /* Notifying peers about us needs to be done without locks held */ - INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers); + if (new_netdev) + INIT_DELAYED_WORK(&dev->notify_peers, do_notify_peers); schedule_delayed_work(&dev->notify_peers, msecs_to_jiffies(100)); return peer->chan; @@ -803,7 +704,8 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) if (!netdev) return -ENOMEM; - set_dev_addr(netdev, &chan->src, chan->src_type); + netdev->addr_assign_type = NET_ADDR_PERM; + baswap((void *)netdev->dev_addr, &chan->src); netdev->netdev_ops = &netdev_ops; SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev); @@ -843,6 +745,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev) static inline void chan_ready_cb(struct l2cap_chan *chan) { struct lowpan_btle_dev *dev; + bool new_netdev = false; dev = lookup_dev(chan->conn); @@ -853,12 +756,13 @@ static inline void chan_ready_cb(struct l2cap_chan *chan) l2cap_chan_del(chan, -ENOENT); return; } + new_netdev = true; } if (!try_module_get(THIS_MODULE)) return; - add_peer_chan(chan, dev); + add_peer_chan(chan, dev, new_netdev); ifup(dev->netdev); } @@ -964,26 +868,28 @@ static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan, static void chan_suspend_cb(struct l2cap_chan *chan) { - struct sk_buff *skb = chan->data; + struct lowpan_btle_dev *dev; - BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb); + BT_DBG("chan %p suspend", chan); - if (!skb) + dev = lookup_dev(chan->conn); + if (!dev || !dev->netdev) return; - lowpan_cb(skb)->status = -EAGAIN; + netif_stop_queue(dev->netdev); } static void chan_resume_cb(struct l2cap_chan *chan) { - struct sk_buff *skb = chan->data; + struct lowpan_btle_dev *dev; - BT_DBG("chan %p conn %p skb %p", chan, chan->conn, skb); + BT_DBG("chan %p resume", chan); - if (!skb) + dev = lookup_dev(chan->conn); + if (!dev || !dev->netdev) return; - lowpan_cb(skb)->status = 0; + netif_wake_queue(dev->netdev); } static long chan_get_sndtimeo_cb(struct l2cap_chan *chan) diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 06c31b9a68b0bce3cc4f28224c3c23864f78e7c9..68f951b3e85aa725485bef77de63f162f0241d97 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -13,6 +13,7 @@ menuconfig BT select CRYPTO_CMAC select CRYPTO_ECB select CRYPTO_SHA256 + select CRYPTO_ECDH help Bluetooth is low-cost, low-power, short-range wireless technology. It was designed as a replacement for cables and other short-range diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 4bfaa19a557382311369b229b0d26846d5a968dc..5d0a113e2e4049e6043099ddcf1fdd50c22ce0ab 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -13,7 +13,7 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ - ecc.o hci_request.o mgmt_util.o + ecdh_helper.o hci_request.o mgmt_util.o bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 69e1f7d362a8b71ed61cc12c66e8b81837712447..42d0997e2fbb78af58970504baf0d855d72a964a 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -159,12 +159,17 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk) BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); + lock_sock(sk); list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; + release_sock(sk); parent->sk_ack_backlog++; } EXPORT_SYMBOL(bt_accept_enqueue); +/* Calling function must hold the sk lock. + * bt_sk(sk)->parent must be non-NULL meaning sk is in the parent list. + */ void bt_accept_unlink(struct sock *sk) { BT_DBG("sk %p state %d", sk, sk->sk_state); @@ -183,11 +188,32 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) BT_DBG("parent %p", parent); +restart: list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; + /* Prevent early freeing of sk due to unlink and sock_kill */ + sock_hold(sk); lock_sock(sk); + /* Check sk has not already been unlinked via + * bt_accept_unlink() due to serialisation caused by sk locking + */ + if (!bt_sk(sk)->parent) { + BT_DBG("sk %p, already unlinked", sk); + release_sock(sk); + sock_put(sk); + + /* Restart the loop as sk is no longer in the list + * and also avoid a potential infinite loop because + * list_for_each_entry_safe() is not thread safe. + */ + goto restart; + } + + /* sk is safely in the parent list so reduce reference count */ + sock_put(sk); + /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { bt_accept_unlink(sk); diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 02a4ccc04e1ebba5301f8d2bfec55229d428ffd8..ebcab5bbadd7eb174f8bb2aff10b4e0d5283e182 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -263,7 +263,7 @@ void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle) struct hci_cp_read_local_amp_assoc cp; struct amp_assoc *loc_assoc = &hdev->loc_assoc; struct hci_request req; - int err = 0; + int err; BT_DBG("%s handle %d", hdev->name, phy_handle); @@ -282,7 +282,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr) { struct hci_cp_read_local_amp_assoc cp; struct hci_request req; - int err = 0; + int err; memset(&hdev->loc_assoc, 0, sizeof(struct amp_assoc)); memset(&cp, 0, sizeof(cp)); @@ -292,7 +292,7 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr) set_bit(READ_LOC_AMP_ASSOC, &mgr->state); hci_req_init(&req, hdev); hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); - hci_req_run_skb(&req, read_local_amp_assoc_complete); + err = hci_req_run_skb(&req, read_local_amp_assoc_complete); if (err < 0) a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID); } @@ -303,7 +303,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, struct hci_cp_read_local_amp_assoc cp; struct amp_mgr *mgr = hcon->amp_mgr; struct hci_request req; - int err = 0; + int err; cp.phy_handle = hcon->handle; cp.len_so_far = cpu_to_le16(0); @@ -314,7 +314,7 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, /* Read Local AMP Assoc final link information data */ hci_req_init(&req, hdev); hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp); - hci_req_run_skb(&req, read_local_amp_assoc_complete); + err = hci_req_run_skb(&req, read_local_amp_assoc_complete); if (err < 0) a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID); } diff --git a/net/bluetooth/ecc.c b/net/bluetooth/ecc.c deleted file mode 100644 index e1709f8467acacb216241ec03f845a0043464c3b..0000000000000000000000000000000000000000 --- a/net/bluetooth/ecc.c +++ /dev/null @@ -1,816 +0,0 @@ -/* - * Copyright (c) 2013, Kenneth MacKay - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ecc.h" - -/* 256-bit curve */ -#define ECC_BYTES 32 - -#define MAX_TRIES 16 - -/* Number of u64's needed */ -#define NUM_ECC_DIGITS (ECC_BYTES / 8) - -struct ecc_point { - u64 x[NUM_ECC_DIGITS]; - u64 y[NUM_ECC_DIGITS]; -}; - -typedef struct { - u64 m_low; - u64 m_high; -} uint128_t; - -#define CURVE_P_32 { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull, \ - 0x0000000000000000ull, 0xFFFFFFFF00000001ull } - -#define CURVE_G_32 { \ - { 0xF4A13945D898C296ull, 0x77037D812DEB33A0ull, \ - 0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull }, \ - { 0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull, \ - 0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull } \ -} - -#define CURVE_N_32 { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull, \ - 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull } - -static u64 curve_p[NUM_ECC_DIGITS] = CURVE_P_32; -static struct ecc_point curve_g = CURVE_G_32; -static u64 curve_n[NUM_ECC_DIGITS] = CURVE_N_32; - -static void vli_clear(u64 *vli) -{ - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) - vli[i] = 0; -} - -/* Returns true if vli == 0, false otherwise. */ -static bool vli_is_zero(const u64 *vli) -{ - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) { - if (vli[i]) - return false; - } - - return true; -} - -/* Returns nonzero if bit bit of vli is set. */ -static u64 vli_test_bit(const u64 *vli, unsigned int bit) -{ - return (vli[bit / 64] & ((u64) 1 << (bit % 64))); -} - -/* Counts the number of 64-bit "digits" in vli. */ -static unsigned int vli_num_digits(const u64 *vli) -{ - int i; - - /* Search from the end until we find a non-zero digit. - * We do it in reverse because we expect that most digits will - * be nonzero. - */ - for (i = NUM_ECC_DIGITS - 1; i >= 0 && vli[i] == 0; i--); - - return (i + 1); -} - -/* Counts the number of bits required for vli. */ -static unsigned int vli_num_bits(const u64 *vli) -{ - unsigned int i, num_digits; - u64 digit; - - num_digits = vli_num_digits(vli); - if (num_digits == 0) - return 0; - - digit = vli[num_digits - 1]; - for (i = 0; digit; i++) - digit >>= 1; - - return ((num_digits - 1) * 64 + i); -} - -/* Sets dest = src. */ -static void vli_set(u64 *dest, const u64 *src) -{ - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) - dest[i] = src[i]; -} - -/* Returns sign of left - right. */ -static int vli_cmp(const u64 *left, const u64 *right) -{ - int i; - - for (i = NUM_ECC_DIGITS - 1; i >= 0; i--) { - if (left[i] > right[i]) - return 1; - else if (left[i] < right[i]) - return -1; - } - - return 0; -} - -/* Computes result = in << c, returning carry. Can modify in place - * (if result == in). 0 < shift < 64. - */ -static u64 vli_lshift(u64 *result, const u64 *in, - unsigned int shift) -{ - u64 carry = 0; - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) { - u64 temp = in[i]; - - result[i] = (temp << shift) | carry; - carry = temp >> (64 - shift); - } - - return carry; -} - -/* Computes vli = vli >> 1. */ -static void vli_rshift1(u64 *vli) -{ - u64 *end = vli; - u64 carry = 0; - - vli += NUM_ECC_DIGITS; - - while (vli-- > end) { - u64 temp = *vli; - *vli = (temp >> 1) | carry; - carry = temp << 63; - } -} - -/* Computes result = left + right, returning carry. Can modify in place. */ -static u64 vli_add(u64 *result, const u64 *left, - const u64 *right) -{ - u64 carry = 0; - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) { - u64 sum; - - sum = left[i] + right[i] + carry; - if (sum != left[i]) - carry = (sum < left[i]); - - result[i] = sum; - } - - return carry; -} - -/* Computes result = left - right, returning borrow. Can modify in place. */ -static u64 vli_sub(u64 *result, const u64 *left, const u64 *right) -{ - u64 borrow = 0; - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) { - u64 diff; - - diff = left[i] - right[i] - borrow; - if (diff != left[i]) - borrow = (diff > left[i]); - - result[i] = diff; - } - - return borrow; -} - -static uint128_t mul_64_64(u64 left, u64 right) -{ - u64 a0 = left & 0xffffffffull; - u64 a1 = left >> 32; - u64 b0 = right & 0xffffffffull; - u64 b1 = right >> 32; - u64 m0 = a0 * b0; - u64 m1 = a0 * b1; - u64 m2 = a1 * b0; - u64 m3 = a1 * b1; - uint128_t result; - - m2 += (m0 >> 32); - m2 += m1; - - /* Overflow */ - if (m2 < m1) - m3 += 0x100000000ull; - - result.m_low = (m0 & 0xffffffffull) | (m2 << 32); - result.m_high = m3 + (m2 >> 32); - - return result; -} - -static uint128_t add_128_128(uint128_t a, uint128_t b) -{ - uint128_t result; - - result.m_low = a.m_low + b.m_low; - result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low); - - return result; -} - -static void vli_mult(u64 *result, const u64 *left, const u64 *right) -{ - uint128_t r01 = { 0, 0 }; - u64 r2 = 0; - unsigned int i, k; - - /* Compute each digit of result in sequence, maintaining the - * carries. - */ - for (k = 0; k < NUM_ECC_DIGITS * 2 - 1; k++) { - unsigned int min; - - if (k < NUM_ECC_DIGITS) - min = 0; - else - min = (k + 1) - NUM_ECC_DIGITS; - - for (i = min; i <= k && i < NUM_ECC_DIGITS; i++) { - uint128_t product; - - product = mul_64_64(left[i], right[k - i]); - - r01 = add_128_128(r01, product); - r2 += (r01.m_high < product.m_high); - } - - result[k] = r01.m_low; - r01.m_low = r01.m_high; - r01.m_high = r2; - r2 = 0; - } - - result[NUM_ECC_DIGITS * 2 - 1] = r01.m_low; -} - -static void vli_square(u64 *result, const u64 *left) -{ - uint128_t r01 = { 0, 0 }; - u64 r2 = 0; - int i, k; - - for (k = 0; k < NUM_ECC_DIGITS * 2 - 1; k++) { - unsigned int min; - - if (k < NUM_ECC_DIGITS) - min = 0; - else - min = (k + 1) - NUM_ECC_DIGITS; - - for (i = min; i <= k && i <= k - i; i++) { - uint128_t product; - - product = mul_64_64(left[i], left[k - i]); - - if (i < k - i) { - r2 += product.m_high >> 63; - product.m_high = (product.m_high << 1) | - (product.m_low >> 63); - product.m_low <<= 1; - } - - r01 = add_128_128(r01, product); - r2 += (r01.m_high < product.m_high); - } - - result[k] = r01.m_low; - r01.m_low = r01.m_high; - r01.m_high = r2; - r2 = 0; - } - - result[NUM_ECC_DIGITS * 2 - 1] = r01.m_low; -} - -/* Computes result = (left + right) % mod. - * Assumes that left < mod and right < mod, result != mod. - */ -static void vli_mod_add(u64 *result, const u64 *left, const u64 *right, - const u64 *mod) -{ - u64 carry; - - carry = vli_add(result, left, right); - - /* result > mod (result = mod + remainder), so subtract mod to - * get remainder. - */ - if (carry || vli_cmp(result, mod) >= 0) - vli_sub(result, result, mod); -} - -/* Computes result = (left - right) % mod. - * Assumes that left < mod and right < mod, result != mod. - */ -static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right, - const u64 *mod) -{ - u64 borrow = vli_sub(result, left, right); - - /* In this case, p_result == -diff == (max int) - diff. - * Since -x % d == d - x, we can get the correct result from - * result + mod (with overflow). - */ - if (borrow) - vli_add(result, result, mod); -} - -/* Computes result = product % curve_p - from http://www.nsa.gov/ia/_files/nist-routines.pdf */ -static void vli_mmod_fast(u64 *result, const u64 *product) -{ - u64 tmp[NUM_ECC_DIGITS]; - int carry; - - /* t */ - vli_set(result, product); - - /* s1 */ - tmp[0] = 0; - tmp[1] = product[5] & 0xffffffff00000000ull; - tmp[2] = product[6]; - tmp[3] = product[7]; - carry = vli_lshift(tmp, tmp, 1); - carry += vli_add(result, result, tmp); - - /* s2 */ - tmp[1] = product[6] << 32; - tmp[2] = (product[6] >> 32) | (product[7] << 32); - tmp[3] = product[7] >> 32; - carry += vli_lshift(tmp, tmp, 1); - carry += vli_add(result, result, tmp); - - /* s3 */ - tmp[0] = product[4]; - tmp[1] = product[5] & 0xffffffff; - tmp[2] = 0; - tmp[3] = product[7]; - carry += vli_add(result, result, tmp); - - /* s4 */ - tmp[0] = (product[4] >> 32) | (product[5] << 32); - tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull); - tmp[2] = product[7]; - tmp[3] = (product[6] >> 32) | (product[4] << 32); - carry += vli_add(result, result, tmp); - - /* d1 */ - tmp[0] = (product[5] >> 32) | (product[6] << 32); - tmp[1] = (product[6] >> 32); - tmp[2] = 0; - tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32); - carry -= vli_sub(result, result, tmp); - - /* d2 */ - tmp[0] = product[6]; - tmp[1] = product[7]; - tmp[2] = 0; - tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull); - carry -= vli_sub(result, result, tmp); - - /* d3 */ - tmp[0] = (product[6] >> 32) | (product[7] << 32); - tmp[1] = (product[7] >> 32) | (product[4] << 32); - tmp[2] = (product[4] >> 32) | (product[5] << 32); - tmp[3] = (product[6] << 32); - carry -= vli_sub(result, result, tmp); - - /* d4 */ - tmp[0] = product[7]; - tmp[1] = product[4] & 0xffffffff00000000ull; - tmp[2] = product[5]; - tmp[3] = product[6] & 0xffffffff00000000ull; - carry -= vli_sub(result, result, tmp); - - if (carry < 0) { - do { - carry += vli_add(result, result, curve_p); - } while (carry < 0); - } else { - while (carry || vli_cmp(curve_p, result) != 1) - carry -= vli_sub(result, result, curve_p); - } -} - -/* Computes result = (left * right) % curve_p. */ -static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right) -{ - u64 product[2 * NUM_ECC_DIGITS]; - - vli_mult(product, left, right); - vli_mmod_fast(result, product); -} - -/* Computes result = left^2 % curve_p. */ -static void vli_mod_square_fast(u64 *result, const u64 *left) -{ - u64 product[2 * NUM_ECC_DIGITS]; - - vli_square(product, left); - vli_mmod_fast(result, product); -} - -#define EVEN(vli) (!(vli[0] & 1)) -/* Computes result = (1 / p_input) % mod. All VLIs are the same size. - * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" - * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf - */ -static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod) -{ - u64 a[NUM_ECC_DIGITS], b[NUM_ECC_DIGITS]; - u64 u[NUM_ECC_DIGITS], v[NUM_ECC_DIGITS]; - u64 carry; - int cmp_result; - - if (vli_is_zero(input)) { - vli_clear(result); - return; - } - - vli_set(a, input); - vli_set(b, mod); - vli_clear(u); - u[0] = 1; - vli_clear(v); - - while ((cmp_result = vli_cmp(a, b)) != 0) { - carry = 0; - - if (EVEN(a)) { - vli_rshift1(a); - - if (!EVEN(u)) - carry = vli_add(u, u, mod); - - vli_rshift1(u); - if (carry) - u[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull; - } else if (EVEN(b)) { - vli_rshift1(b); - - if (!EVEN(v)) - carry = vli_add(v, v, mod); - - vli_rshift1(v); - if (carry) - v[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull; - } else if (cmp_result > 0) { - vli_sub(a, a, b); - vli_rshift1(a); - - if (vli_cmp(u, v) < 0) - vli_add(u, u, mod); - - vli_sub(u, u, v); - if (!EVEN(u)) - carry = vli_add(u, u, mod); - - vli_rshift1(u); - if (carry) - u[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull; - } else { - vli_sub(b, b, a); - vli_rshift1(b); - - if (vli_cmp(v, u) < 0) - vli_add(v, v, mod); - - vli_sub(v, v, u); - if (!EVEN(v)) - carry = vli_add(v, v, mod); - - vli_rshift1(v); - if (carry) - v[NUM_ECC_DIGITS - 1] |= 0x8000000000000000ull; - } - } - - vli_set(result, u); -} - -/* ------ Point operations ------ */ - -/* Returns true if p_point is the point at infinity, false otherwise. */ -static bool ecc_point_is_zero(const struct ecc_point *point) -{ - return (vli_is_zero(point->x) && vli_is_zero(point->y)); -} - -/* Point multiplication algorithm using Montgomery's ladder with co-Z - * coordinates. From http://eprint.iacr.org/2011/338.pdf - */ - -/* Double in place */ -static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1) -{ - /* t1 = x, t2 = y, t3 = z */ - u64 t4[NUM_ECC_DIGITS]; - u64 t5[NUM_ECC_DIGITS]; - - if (vli_is_zero(z1)) - return; - - vli_mod_square_fast(t4, y1); /* t4 = y1^2 */ - vli_mod_mult_fast(t5, x1, t4); /* t5 = x1*y1^2 = A */ - vli_mod_square_fast(t4, t4); /* t4 = y1^4 */ - vli_mod_mult_fast(y1, y1, z1); /* t2 = y1*z1 = z3 */ - vli_mod_square_fast(z1, z1); /* t3 = z1^2 */ - - vli_mod_add(x1, x1, z1, curve_p); /* t1 = x1 + z1^2 */ - vli_mod_add(z1, z1, z1, curve_p); /* t3 = 2*z1^2 */ - vli_mod_sub(z1, x1, z1, curve_p); /* t3 = x1 - z1^2 */ - vli_mod_mult_fast(x1, x1, z1); /* t1 = x1^2 - z1^4 */ - - vli_mod_add(z1, x1, x1, curve_p); /* t3 = 2*(x1^2 - z1^4) */ - vli_mod_add(x1, x1, z1, curve_p); /* t1 = 3*(x1^2 - z1^4) */ - if (vli_test_bit(x1, 0)) { - u64 carry = vli_add(x1, x1, curve_p); - vli_rshift1(x1); - x1[NUM_ECC_DIGITS - 1] |= carry << 63; - } else { - vli_rshift1(x1); - } - /* t1 = 3/2*(x1^2 - z1^4) = B */ - - vli_mod_square_fast(z1, x1); /* t3 = B^2 */ - vli_mod_sub(z1, z1, t5, curve_p); /* t3 = B^2 - A */ - vli_mod_sub(z1, z1, t5, curve_p); /* t3 = B^2 - 2A = x3 */ - vli_mod_sub(t5, t5, z1, curve_p); /* t5 = A - x3 */ - vli_mod_mult_fast(x1, x1, t5); /* t1 = B * (A - x3) */ - vli_mod_sub(t4, x1, t4, curve_p); /* t4 = B * (A - x3) - y1^4 = y3 */ - - vli_set(x1, z1); - vli_set(z1, y1); - vli_set(y1, t4); -} - -/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ -static void apply_z(u64 *x1, u64 *y1, u64 *z) -{ - u64 t1[NUM_ECC_DIGITS]; - - vli_mod_square_fast(t1, z); /* z^2 */ - vli_mod_mult_fast(x1, x1, t1); /* x1 * z^2 */ - vli_mod_mult_fast(t1, t1, z); /* z^3 */ - vli_mod_mult_fast(y1, y1, t1); /* y1 * z^3 */ -} - -/* P = (x1, y1) => 2P, (x2, y2) => P' */ -static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, - u64 *p_initial_z) -{ - u64 z[NUM_ECC_DIGITS]; - - vli_set(x2, x1); - vli_set(y2, y1); - - vli_clear(z); - z[0] = 1; - - if (p_initial_z) - vli_set(z, p_initial_z); - - apply_z(x1, y1, z); - - ecc_point_double_jacobian(x1, y1, z); - - apply_z(x2, y2, z); -} - -/* Input P = (x1, y1, Z), Q = (x2, y2, Z) - * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) - * or P => P', Q => P + Q - */ -static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2) -{ - /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ - u64 t5[NUM_ECC_DIGITS]; - - vli_mod_sub(t5, x2, x1, curve_p); /* t5 = x2 - x1 */ - vli_mod_square_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */ - vli_mod_mult_fast(x1, x1, t5); /* t1 = x1*A = B */ - vli_mod_mult_fast(x2, x2, t5); /* t3 = x2*A = C */ - vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y2 - y1 */ - vli_mod_square_fast(t5, y2); /* t5 = (y2 - y1)^2 = D */ - - vli_mod_sub(t5, t5, x1, curve_p); /* t5 = D - B */ - vli_mod_sub(t5, t5, x2, curve_p); /* t5 = D - B - C = x3 */ - vli_mod_sub(x2, x2, x1, curve_p); /* t3 = C - B */ - vli_mod_mult_fast(y1, y1, x2); /* t2 = y1*(C - B) */ - vli_mod_sub(x2, x1, t5, curve_p); /* t3 = B - x3 */ - vli_mod_mult_fast(y2, y2, x2); /* t4 = (y2 - y1)*(B - x3) */ - vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y3 */ - - vli_set(x2, t5); -} - -/* Input P = (x1, y1, Z), Q = (x2, y2, Z) - * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) - * or P => P - Q, Q => P + Q - */ -static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2) -{ - /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ - u64 t5[NUM_ECC_DIGITS]; - u64 t6[NUM_ECC_DIGITS]; - u64 t7[NUM_ECC_DIGITS]; - - vli_mod_sub(t5, x2, x1, curve_p); /* t5 = x2 - x1 */ - vli_mod_square_fast(t5, t5); /* t5 = (x2 - x1)^2 = A */ - vli_mod_mult_fast(x1, x1, t5); /* t1 = x1*A = B */ - vli_mod_mult_fast(x2, x2, t5); /* t3 = x2*A = C */ - vli_mod_add(t5, y2, y1, curve_p); /* t4 = y2 + y1 */ - vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y2 - y1 */ - - vli_mod_sub(t6, x2, x1, curve_p); /* t6 = C - B */ - vli_mod_mult_fast(y1, y1, t6); /* t2 = y1 * (C - B) */ - vli_mod_add(t6, x1, x2, curve_p); /* t6 = B + C */ - vli_mod_square_fast(x2, y2); /* t3 = (y2 - y1)^2 */ - vli_mod_sub(x2, x2, t6, curve_p); /* t3 = x3 */ - - vli_mod_sub(t7, x1, x2, curve_p); /* t7 = B - x3 */ - vli_mod_mult_fast(y2, y2, t7); /* t4 = (y2 - y1)*(B - x3) */ - vli_mod_sub(y2, y2, y1, curve_p); /* t4 = y3 */ - - vli_mod_square_fast(t7, t5); /* t7 = (y2 + y1)^2 = F */ - vli_mod_sub(t7, t7, t6, curve_p); /* t7 = x3' */ - vli_mod_sub(t6, t7, x1, curve_p); /* t6 = x3' - B */ - vli_mod_mult_fast(t6, t6, t5); /* t6 = (y2 + y1)*(x3' - B) */ - vli_mod_sub(y1, t6, y1, curve_p); /* t2 = y3' */ - - vli_set(x1, t7); -} - -static void ecc_point_mult(struct ecc_point *result, - const struct ecc_point *point, u64 *scalar, - u64 *initial_z, int num_bits) -{ - /* R0 and R1 */ - u64 rx[2][NUM_ECC_DIGITS]; - u64 ry[2][NUM_ECC_DIGITS]; - u64 z[NUM_ECC_DIGITS]; - int i, nb; - - vli_set(rx[1], point->x); - vli_set(ry[1], point->y); - - xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z); - - for (i = num_bits - 2; i > 0; i--) { - nb = !vli_test_bit(scalar, i); - xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb]); - xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb]); - } - - nb = !vli_test_bit(scalar, 0); - xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb]); - - /* Find final 1/Z value. */ - vli_mod_sub(z, rx[1], rx[0], curve_p); /* X1 - X0 */ - vli_mod_mult_fast(z, z, ry[1 - nb]); /* Yb * (X1 - X0) */ - vli_mod_mult_fast(z, z, point->x); /* xP * Yb * (X1 - X0) */ - vli_mod_inv(z, z, curve_p); /* 1 / (xP * Yb * (X1 - X0)) */ - vli_mod_mult_fast(z, z, point->y); /* yP / (xP * Yb * (X1 - X0)) */ - vli_mod_mult_fast(z, z, rx[1 - nb]); /* Xb * yP / (xP * Yb * (X1 - X0)) */ - /* End 1/Z calculation */ - - xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb]); - - apply_z(rx[0], ry[0], z); - - vli_set(result->x, rx[0]); - vli_set(result->y, ry[0]); -} - -static void ecc_bytes2native(const u8 bytes[ECC_BYTES], - u64 native[NUM_ECC_DIGITS]) -{ - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) { - const u8 *digit = bytes + 8 * (NUM_ECC_DIGITS - 1 - i); - - native[NUM_ECC_DIGITS - 1 - i] = - ((u64) digit[0] << 0) | - ((u64) digit[1] << 8) | - ((u64) digit[2] << 16) | - ((u64) digit[3] << 24) | - ((u64) digit[4] << 32) | - ((u64) digit[5] << 40) | - ((u64) digit[6] << 48) | - ((u64) digit[7] << 56); - } -} - -static void ecc_native2bytes(const u64 native[NUM_ECC_DIGITS], - u8 bytes[ECC_BYTES]) -{ - int i; - - for (i = 0; i < NUM_ECC_DIGITS; i++) { - u8 *digit = bytes + 8 * (NUM_ECC_DIGITS - 1 - i); - - digit[0] = native[NUM_ECC_DIGITS - 1 - i] >> 0; - digit[1] = native[NUM_ECC_DIGITS - 1 - i] >> 8; - digit[2] = native[NUM_ECC_DIGITS - 1 - i] >> 16; - digit[3] = native[NUM_ECC_DIGITS - 1 - i] >> 24; - digit[4] = native[NUM_ECC_DIGITS - 1 - i] >> 32; - digit[5] = native[NUM_ECC_DIGITS - 1 - i] >> 40; - digit[6] = native[NUM_ECC_DIGITS - 1 - i] >> 48; - digit[7] = native[NUM_ECC_DIGITS - 1 - i] >> 56; - } -} - -bool ecc_make_key(u8 public_key[64], u8 private_key[32]) -{ - struct ecc_point pk; - u64 priv[NUM_ECC_DIGITS]; - unsigned int tries = 0; - - do { - if (tries++ >= MAX_TRIES) - return false; - - get_random_bytes(priv, ECC_BYTES); - - if (vli_is_zero(priv)) - continue; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve_n, priv) != 1) - continue; - - ecc_point_mult(&pk, &curve_g, priv, NULL, vli_num_bits(priv)); - } while (ecc_point_is_zero(&pk)); - - ecc_native2bytes(priv, private_key); - ecc_native2bytes(pk.x, public_key); - ecc_native2bytes(pk.y, &public_key[32]); - - return true; -} - -bool ecdh_shared_secret(const u8 public_key[64], const u8 private_key[32], - u8 secret[32]) -{ - u64 priv[NUM_ECC_DIGITS]; - u64 rand[NUM_ECC_DIGITS]; - struct ecc_point product, pk; - - get_random_bytes(rand, ECC_BYTES); - - ecc_bytes2native(public_key, pk.x); - ecc_bytes2native(&public_key[32], pk.y); - ecc_bytes2native(private_key, priv); - - ecc_point_mult(&product, &pk, priv, rand, vli_num_bits(priv)); - - ecc_native2bytes(product.x, secret); - - return !ecc_point_is_zero(&product); -} diff --git a/net/bluetooth/ecc.h b/net/bluetooth/ecc.h deleted file mode 100644 index 8d6a2f4d1905f33df35e1d2380f7e49cfe0f6c5f..0000000000000000000000000000000000000000 --- a/net/bluetooth/ecc.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2013, Kenneth MacKay - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* Create a public/private key pair. - * Outputs: - * public_key - Will be filled in with the public key. - * private_key - Will be filled in with the private key. - * - * Returns true if the key pair was generated successfully, false - * if an error occurred. The keys are with the LSB first. - */ -bool ecc_make_key(u8 public_key[64], u8 private_key[32]); - -/* Compute a shared secret given your secret key and someone else's - * public key. - * Note: It is recommended that you hash the result of ecdh_shared_secret - * before using it for symmetric encryption or HMAC. - * - * Inputs: - * public_key - The public key of the remote party - * private_key - Your private key. - * - * Outputs: - * secret - Will be filled in with the shared secret value. - * - * Returns true if the shared secret was generated successfully, false - * if an error occurred. Both input and output parameters are with the - * LSB first. - */ -bool ecdh_shared_secret(const u8 public_key[64], const u8 private_key[32], - u8 secret[32]); diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c new file mode 100644 index 0000000000000000000000000000000000000000..24d4e60f8c48b8c02532558f7a90bf273f586274 --- /dev/null +++ b/net/bluetooth/ecdh_helper.c @@ -0,0 +1,231 @@ +/* + * ECDH helper functions - KPP wrappings + * + * Copyright (C) 2017 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation; + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + * CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + * SOFTWARE IS DISCLAIMED. + */ +#include "ecdh_helper.h" + +#include +#include +#include + +struct ecdh_completion { + struct completion completion; + int err; +}; + +static void ecdh_complete(struct crypto_async_request *req, int err) +{ + struct ecdh_completion *res = req->data; + + if (err == -EINPROGRESS) + return; + + res->err = err; + complete(&res->completion); +} + +static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) +{ + int i; + + for (i = 0; i < ndigits; i++) + out[i] = __swab64(in[ndigits - 1 - i]); +} + +bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], + u8 secret[32]) +{ + struct crypto_kpp *tfm; + struct kpp_request *req; + struct ecdh p; + struct ecdh_completion result; + struct scatterlist src, dst; + u8 *tmp, *buf; + unsigned int buf_len; + int err = -ENOMEM; + + tmp = kmalloc(64, GFP_KERNEL); + if (!tmp) + return false; + + tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm)) { + pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", + PTR_ERR(tfm)); + goto free_tmp; + } + + req = kpp_request_alloc(tfm, GFP_KERNEL); + if (!req) + goto free_kpp; + + init_completion(&result.completion); + + /* Security Manager Protocol holds digits in litte-endian order + * while ECC API expect big-endian data + */ + swap_digits((u64 *)private_key, (u64 *)tmp, 4); + p.key = (char *)tmp; + p.key_size = 32; + /* Set curve_id */ + p.curve_id = ECC_CURVE_NIST_P256; + buf_len = crypto_ecdh_key_len(&p); + buf = kmalloc(buf_len, GFP_KERNEL); + if (!buf) { + pr_err("alg: kpp: Failed to allocate %d bytes for buf\n", + buf_len); + goto free_req; + } + crypto_ecdh_encode_key(buf, buf_len, &p); + + /* Set A private Key */ + err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len); + if (err) + goto free_all; + + swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ + swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ + + sg_init_one(&src, tmp, 64); + sg_init_one(&dst, secret, 32); + kpp_request_set_input(req, &src, 64); + kpp_request_set_output(req, &dst, 32); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ecdh_complete, &result); + err = crypto_kpp_compute_shared_secret(req); + if (err == -EINPROGRESS) { + wait_for_completion(&result.completion); + err = result.err; + } + if (err < 0) { + pr_err("alg: ecdh: compute shared secret failed. err %d\n", + err); + goto free_all; + } + + swap_digits((u64 *)secret, (u64 *)tmp, 4); + memcpy(secret, tmp, 32); + +free_all: + kzfree(buf); +free_req: + kpp_request_free(req); +free_kpp: + crypto_free_kpp(tfm); +free_tmp: + kfree(tmp); + return (err == 0); +} + +bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) +{ + struct crypto_kpp *tfm; + struct kpp_request *req; + struct ecdh p; + struct ecdh_completion result; + struct scatterlist dst; + u8 *tmp, *buf; + unsigned int buf_len; + int err = -ENOMEM; + const unsigned short max_tries = 16; + unsigned short tries = 0; + + tmp = kmalloc(64, GFP_KERNEL); + if (!tmp) + return false; + + tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm)) { + pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", + PTR_ERR(tfm)); + goto free_tmp; + } + + req = kpp_request_alloc(tfm, GFP_KERNEL); + if (!req) + goto free_kpp; + + init_completion(&result.completion); + + /* Set curve_id */ + p.curve_id = ECC_CURVE_NIST_P256; + p.key_size = 32; + buf_len = crypto_ecdh_key_len(&p); + buf = kmalloc(buf_len, GFP_KERNEL); + if (!buf) { + pr_err("alg: kpp: Failed to allocate %d bytes for buf\n", + buf_len); + goto free_req; + } + + do { + if (tries++ >= max_tries) + goto free_all; + + /* Set private Key */ + p.key = (char *)private_key; + crypto_ecdh_encode_key(buf, buf_len, &p); + err = crypto_kpp_set_secret(tfm, buf, buf_len); + if (err) + goto free_all; + + sg_init_one(&dst, tmp, 64); + kpp_request_set_input(req, NULL, 0); + kpp_request_set_output(req, &dst, 64); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ecdh_complete, &result); + + err = crypto_kpp_generate_public_key(req); + + if (err == -EINPROGRESS) { + wait_for_completion(&result.completion); + err = result.err; + } + + /* Private key is not valid. Regenerate */ + if (err == -EINVAL) + continue; + + if (err < 0) + goto free_all; + else + break; + + } while (true); + + /* Keys are handed back in little endian as expected by Security + * Manager Protocol + */ + swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */ + swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */ + swap_digits((u64 *)private_key, (u64 *)tmp, 4); + memcpy(private_key, tmp, 32); + +free_all: + kzfree(buf); +free_req: + kpp_request_free(req); +free_kpp: + crypto_free_kpp(tfm); +free_tmp: + kfree(tmp); + return (err == 0); +} diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..7a423faf76e5657c6a4c860031138f59eef0dcc0 --- /dev/null +++ b/net/bluetooth/ecdh_helper.h @@ -0,0 +1,27 @@ +/* + * ECDH helper functions - KPP wrappings + * + * Copyright (C) 2017 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation; + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + * CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + * SOFTWARE IS DISCLAIMED. + */ +#include + +bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32], + u8 secret[32]); +bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3ac89e9ace71556c66804b19e8dde8d397eb308f..05686776a5fb78d67dd38b22632ccfd1355b857e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2950,8 +2950,8 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_adv_max_interval = 0x0800; hdev->le_scan_interval = 0x0060; hdev->le_scan_window = 0x0030; - hdev->le_conn_min_interval = 0x0028; - hdev->le_conn_max_interval = 0x0038; + hdev->le_conn_min_interval = 0x0018; + hdev->le_conn_max_interval = 0x0028; hdev->le_conn_latency = 0x0000; hdev->le_supv_timeout = 0x002a; hdev->le_def_tx_len = 0x001b; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f64d6566021fccab9208518a1c53934b2da7f708..638bf0e1a2e390133fc7bcfa2041fc41a94d413f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1680,7 +1680,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE)) + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE| + MSG_CMSG_COMPAT)) return -EINVAL; if (len < 4 || len > HCI_MAX_FRAME_SIZE) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fc7f321a382369f0d5097ce785921b1298a237f0..f88ac99528ce448d8e0f8ea5eb06ee2d7719afab 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2425,6 +2425,22 @@ static int l2cap_segment_le_sdu(struct l2cap_chan *chan, return 0; } +static void l2cap_le_flowctl_send(struct l2cap_chan *chan) +{ + int sent = 0; + + BT_DBG("chan %p", chan); + + while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { + l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); + chan->tx_credits--; + sent++; + } + + BT_DBG("Sent %d credits %u queued %u", sent, chan->tx_credits, + skb_queue_len(&chan->tx_q)); +} + int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) { struct sk_buff *skb; @@ -2458,9 +2474,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) if (len > chan->omtu) return -EMSGSIZE; - if (!chan->tx_credits) - return -EAGAIN; - __skb_queue_head_init(&seg_queue); err = l2cap_segment_le_sdu(chan, &seg_queue, msg, len); @@ -2475,10 +2488,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len) skb_queue_splice_tail_init(&seg_queue, &chan->tx_q); - while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { - l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); - chan->tx_credits--; - } + l2cap_le_flowctl_send(chan); if (!chan->tx_credits) chan->ops->suspend(chan); @@ -5570,10 +5580,8 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, chan->tx_credits += credits; - while (chan->tx_credits && !skb_queue_empty(&chan->tx_q)) { - l2cap_do_send(chan, skb_dequeue(&chan->tx_q)); - chan->tx_credits--; - } + /* Resume sending */ + l2cap_le_flowctl_send(chan); if (chan->tx_credits) chan->ops->resume(chan); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index f7eb02f09b54055c9b8c17b6fdcf53ec5041aa11..8ebca9033d60621ba686656242864dcf6a721dd7 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -311,7 +311,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio) skb_queue_head_init(&d->tx_queue); mutex_init(&d->lock); - atomic_set(&d->refcnt, 1); + refcount_set(&d->refcnt, 1); rfcomm_dlc_clear_state(d); @@ -342,7 +342,7 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d) { struct rfcomm_session *s = d->session; - BT_DBG("dlc %p refcnt %d session %p", d, atomic_read(&d->refcnt), s); + BT_DBG("dlc %p refcnt %d session %p", d, refcount_read(&d->refcnt), s); list_del(&d->list); d->session = NULL; diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index dc688f13e49612cd74decf8853b8c270803942f3..ee92c925ecc5d3ac42bc041f60924da80672e1b8 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -26,7 +26,7 @@ #include #include -#include "ecc.h" +#include "ecdh_helper.h" #include "smp.h" #include "selftest.h" @@ -142,18 +142,30 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], const u8 pub_a[64], const u8 pub_b[64], const u8 dhkey[32]) { - u8 dhkey_a[32], dhkey_b[32]; + u8 *tmp, *dhkey_a, *dhkey_b; + int ret = 0; - ecdh_shared_secret(pub_b, priv_a, dhkey_a); - ecdh_shared_secret(pub_a, priv_b, dhkey_b); - - if (memcmp(dhkey_a, dhkey, 32)) + tmp = kmalloc(64, GFP_KERNEL); + if (!tmp) return -EINVAL; + dhkey_a = &tmp[0]; + dhkey_b = &tmp[32]; + + compute_ecdh_secret(pub_b, priv_a, dhkey_a); + compute_ecdh_secret(pub_a, priv_b, dhkey_b); + + if (memcmp(dhkey_a, dhkey, 32)) { + ret = -EINVAL; + goto out; + } + if (memcmp(dhkey_b, dhkey, 32)) - return -EINVAL; + ret = -EINVAL; - return 0; +out: + kfree(dhkey_a); + return ret; } static char test_ecdh_buffer[32]; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index fae391f1871f138c802722d8d8d86d817a97947e..14585edc94397c6f42e5f5fcee2f7f471019819e 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -31,7 +31,7 @@ #include #include -#include "ecc.h" +#include "ecdh_helper.h" #include "smp.h" #define SMP_DEV(hdev) \ @@ -569,8 +569,11 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) smp->debug_key = true; } else { while (true) { + /* Seed private key with random number */ + get_random_bytes(smp->local_sk, 32); + /* Generate local key pair for Secure Connections */ - if (!ecc_make_key(smp->local_pk, smp->local_sk)) + if (!generate_ecdh_keys(smp->local_pk, smp->local_sk)) return -EIO; /* This is unlikely, but we need to check that @@ -1895,8 +1898,11 @@ static u8 sc_send_public_key(struct smp_chan *smp) set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); } else { while (true) { + /* Seed private key with random number */ + get_random_bytes(smp->local_sk, 32); + /* Generate local key pair for Secure Connections */ - if (!ecc_make_key(smp->local_pk, smp->local_sk)) + if (!generate_ecdh_keys(smp->local_pk, smp->local_sk)) return SMP_UNSPECIFIED; /* This is unlikely, but we need to check that @@ -2670,7 +2676,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); - if (!ecdh_shared_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) + if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) return SMP_UNSPECIFIED; SMP_DBG("DHKey %32phN", smp->dhkey); @@ -3483,6 +3489,32 @@ void smp_unregister(struct hci_dev *hdev) #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) +static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) +{ + int i; + + for (i = 0; i < ndigits; i++) + out[i] = __swab64(in[ndigits - 1 - i]); +} + +static int __init test_debug_key(void) +{ + u8 pk[64], sk[32]; + + swap_digits((u64 *)debug_sk, (u64 *)sk, 4); + + if (!generate_ecdh_keys(pk, sk)) + return -EINVAL; + + if (memcmp(sk, debug_sk, 32)) + return -EINVAL; + + if (memcmp(pk, debug_pk, 64)) + return -EINVAL; + + return 0; +} + static int __init test_ah(struct crypto_cipher *tfm_aes) { const u8 irk[16] = { @@ -3738,6 +3770,12 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes, calltime = ktime_get(); + err = test_debug_key(); + if (err) { + BT_ERR("debug_key test failed"); + goto done; + } + err = test_ah(tfm_aes); if (err) { BT_ERR("smp_ah test failed"); diff --git a/net/bpf/Makefile b/net/bpf/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..27b2992a06925240a8118e8182e8c7f048a73bbf --- /dev/null +++ b/net/bpf/Makefile @@ -0,0 +1 @@ +obj-y := test_run.o diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c new file mode 100644 index 0000000000000000000000000000000000000000..6be41a44d688a44970981fc9c2f1aa35666807fa --- /dev/null +++ b/net/bpf/test_run.c @@ -0,0 +1,173 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +{ + u32 ret; + + preempt_disable(); + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + preempt_enable(); + + return ret; +} + +static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) +{ + u64 time_start, time_spent = 0; + u32 ret = 0, i; + + if (!repeat) + repeat = 1; + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + ret = bpf_test_run_one(prog, ctx); + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + return ret; +} + +static int bpf_test_finish(const union bpf_attr *kattr, + union bpf_attr __user *uattr, const void *data, + u32 size, u32 retval, u32 duration) +{ + void __user *data_out = u64_to_user_ptr(kattr->test.data_out); + int err = -EFAULT; + + if (data_out && copy_to_user(data_out, data, size)) + goto out; + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) + goto out; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) + goto out; + err = 0; +out: + return err; +} + +static void *bpf_test_init(const union bpf_attr *kattr, u32 size, + u32 headroom, u32 tailroom) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + void *data; + + if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + return ERR_PTR(-EINVAL); + + data = kzalloc(size + headroom + tailroom, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data + headroom, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + return data; +} + +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + bool is_l2 = false, is_direct_pkt_access = false; + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + u32 retval, duration; + struct sk_buff *skb; + void *data; + int ret; + + data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + switch (prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + is_l2 = true; + /* fall through */ + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + is_direct_pkt_access = true; + break; + default: + break; + } + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + return -ENOMEM; + } + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + if (is_l2) + __skb_push(skb, ETH_HLEN); + if (is_direct_pkt_access) + bpf_compute_data_end(skb); + retval = bpf_test_run(prog, skb, repeat, &duration); + if (!is_l2) + __skb_push(skb, ETH_HLEN); + size = skb->len; + /* bpf program can never convert linear skb to non-linear */ + if (WARN_ON_ONCE(skb_is_nonlinear(skb))) + size = skb_headlen(skb); + ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + kfree_skb(skb); + return ret; +} + +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct xdp_buff xdp = {}; + u32 retval, duration; + void *data; + int ret; + + data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + xdp.data_hard_start = data; + xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; + xdp.data_end = xdp.data + size; + + retval = bpf_test_run(prog, &xdp, repeat, &duration); + if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN) + size = xdp.data_end - xdp.data; + ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); + kfree(data); + return ret; +} diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 90f49a194249ff5fecb3d81a05837ea8b75cc586..f0f3447e8aa48ff02c3f11f6da0e65e79cb28e90 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -123,6 +123,7 @@ static void br_dev_uninit(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + br_multicast_dev_del(br); br_multicast_uninit_stats(br); br_vlan_flush(br); free_percpu(br->stats); @@ -378,7 +379,7 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6e08b7199dd7442acdbd4f85e5ef6315b121ca06..ab0c7cc8448f4824d69b9260e79ede7aac14dd9e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -589,6 +589,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (unlikely(source != fdb->dst)) { fdb->dst = source; fdb_modified = true; + /* Take over HW learned entry */ + if (unlikely(fdb->added_by_external_learn)) + fdb->added_by_external_learn = 0; } if (now != fdb->updated) fdb->updated = now; @@ -854,6 +857,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, br_fdb_update(br, p, addr, vid, true); rcu_read_unlock(); local_bh_enable(); + } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { + err = br_fdb_external_learn_add(br, p, addr, vid); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm->ndm_state, diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 902af6ba481c999f81ed2fba488d91665e03c02e..48fb17417fac3397e74cb712388076ed1ee87865 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -183,13 +183,23 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, struct net_bridge_port *p; list_for_each_entry_rcu(p, &br->port_list, list) { - /* Do not flood unicast traffic to ports that turn it off */ - if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) - continue; - /* Do not flood if mc off, except for traffic we originate */ - if (pkt_type == BR_PKT_MULTICAST && - !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) - continue; + /* Do not flood unicast traffic to ports that turn it off, nor + * other traffic if flood off, except for traffic we originate + */ + switch (pkt_type) { + case BR_PKT_UNICAST: + if (!(p->flags & BR_FLOOD)) + continue; + break; + case BR_PKT_MULTICAST: + if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) + continue; + break; + case BR_PKT_BROADCAST: + if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) + continue; + break; + } /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 56a2a72e7738c9869e27e77ca13140aed4bd530a..7f8d05cf90656e43211d9682657b788f99736722 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -311,7 +312,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) br_fdb_delete_by_port(br, NULL, 0, 1); - br_multicast_dev_del(br); cancel_delayed_work_sync(&br->gc_work); br_sysfs_delbr(br->dev); @@ -361,7 +361,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 056e6ac49d8fc7727fce8fc8320b8db4ac5351d2..b0845480a3ae7f65f347f7ad9399bc6a5d21d806 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -464,7 +464,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL, + NULL); if (err < 0) return err; @@ -568,7 +569,8 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, return ret; } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; @@ -662,7 +664,8 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 1f1e62095464f99eaca8de49a772289f057bd943..067cf03134492a33f10982755105e103666cd1ef 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -997,13 +997,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, if (!elem) return okfn(net, sk, skb); - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, elem); - rcu_read_unlock(); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 225ef7d5370166baff69080996cf64ff68b055f6..32bd3ead9ba14a0b42bda3fc959f9134a8c9cc36 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -133,6 +133,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_MCAST_TO_UCAST */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_MCAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_BCAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ @@ -189,6 +191,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, !!(p->flags & BR_FLOOD)) || nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD, !!(p->flags & BR_MCAST_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_BCAST_FLOOD, + !!(p->flags & BR_BCAST_FLOOD)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, !!(p->flags & BR_PROXYARP_WIFI)) || @@ -591,7 +595,7 @@ static int br_afspec(struct net_bridge *br, err = 0; switch (nla_type(attr)) { case IFLA_BRIDGE_VLAN_TUNNEL_INFO: - if (!(p->flags & BR_VLAN_TUNNEL)) + if (!p || !(p->flags & BR_VLAN_TUNNEL)) return -EINVAL; err = br_parse_vlan_tunnel_info(attr, &tinfo_curr); if (err) @@ -631,6 +635,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, + [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -683,6 +689,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST); + br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); @@ -748,8 +755,8 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) if (p && protinfo) { if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, br_port_policy); + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo, + br_port_policy, NULL); if (err) return err; @@ -828,6 +835,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EPROTONOSUPPORT; } } + + if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { + __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); + + if (defpvid >= VLAN_VID_MASK) + return -EINVAL; + } #endif return 0; diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index c913491495ab21ade17dec3512bd1d0a829b8e67..3712c7f0e00cd1addd73cdfe4504b687e425d0ec 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -227,8 +227,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, memset(tinfo, 0, sizeof(*tinfo)); - err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, - attr, vlan_tunnel_policy); + err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, + vlan_tunnel_policy, NULL); if (err < 0) return err; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 08341d2aa9c946d7bdd6e0d599e31ba96557a290..6f12a5271219f071ed2d0bacb263561cb9e0f605 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -179,6 +179,8 @@ static void br_stp_start(struct net_bridge *br) br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ + if (br->dev->flags & IFF_UP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); br_port_state_selection(br); } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index c98b3e5c140a5f30a28fd748408cf5e949a032b6..60b6fe277a8b0c90faad0dfcda87f149ceaa5552 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - if (br->stp_enabled != BR_USER_STP) + if (br->stp_enabled == BR_KERNEL_STP) mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 79aee759aba5906692d0c715c851cd3dca96801b..5d5d413a6cf8a311ddde9e341caa39a8f3640b35 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -173,6 +173,7 @@ BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); +BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -221,6 +222,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, &brport_attr_multicast_flood, + &brport_attr_broadcast_flood, NULL }; diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 5929309beaa1d5d310a19029364693c0b0355772..db85230e49c3b7e97093d54a1d7625d9145dfeb4 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -68,6 +68,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) return -EINVAL; + if (ebt_invalid_target(info->target)) + return -EINVAL; + return 0; } diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 4e0b0c3593250bd8a1be0cdafca49ce7e4684f94..e0bb624c3845eff5d756830504709eeb3d5bf960 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -9,6 +9,7 @@ */ #include #include +#include "../br_private.h" #include #include #include @@ -18,11 +19,30 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; + struct net_device *dev; if (!skb_make_writable(skb, 0)) return EBT_DROP; ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); + + if (is_multicast_ether_addr(info->mac)) { + if (is_broadcast_ether_addr(info->mac)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + if (xt_hooknum(par) != NF_BR_BROUTING) + dev = br_port_get_rcu(xt_in(par))->br->dev; + else + dev = xt_in(par); + + if (ether_addr_equal(info->mac, dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + else + skb->pkt_type = PACKET_OTHERHOST; + } + return info->target; } diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 98b9c8e8615ebc6e2ddefd1885a01af3ef58781b..707caea397433b66c887e527f0e4532eaf260880 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -62,10 +62,10 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) pptr = skb_header_pointer(skb, offset, sizeof(_ports), &_ports); if (pptr == NULL) { - printk(" INCOMPLETE TCP/UDP header"); + pr_cont(" INCOMPLETE TCP/UDP header"); return; } - printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + pr_cont(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); } } @@ -100,11 +100,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) { - printk(" INCOMPLETE IP header"); + pr_cont(" INCOMPLETE IP header"); goto out; } - printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", - &ih->saddr, &ih->daddr, ih->tos, ih->protocol); + pr_cont(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", + &ih->saddr, &ih->daddr, ih->tos, ih->protocol); print_ports(skb, ih->protocol, ih->ihl*4); goto out; } @@ -120,11 +120,11 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) { - printk(" INCOMPLETE IPv6 header"); + pr_cont(" INCOMPLETE IPv6 header"); goto out; } - printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", - &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); + pr_cont(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", + &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off); if (offset_ph == -1) @@ -142,12 +142,12 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) { - printk(" INCOMPLETE ARP header"); + pr_cont(" INCOMPLETE ARP header"); goto out; } - printk(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", - ntohs(ah->ar_hrd), ntohs(ah->ar_pro), - ntohs(ah->ar_op)); + pr_cont(" ARP HTYPE=%d, PTYPE=0x%04x, OPCODE=%d", + ntohs(ah->ar_hrd), ntohs(ah->ar_pro), + ntohs(ah->ar_op)); /* If it's for Ethernet and the lengths are OK, * then log the ARP payload @@ -161,17 +161,17 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); if (ap == NULL) { - printk(" INCOMPLETE ARP payload"); + pr_cont(" INCOMPLETE ARP payload"); goto out; } - printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4", - ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); + pr_cont(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4", + ap->mac_src, ap->ip_src, + ap->mac_dst, ap->ip_dst); } } out: - printk("\n"); + pr_cont("\n"); spin_unlock_bh(&ebt_log_lock); - } static unsigned int diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8fe36dc3aab29ceac90443f229b9d745b6dfe5d6..2585b100ebbbc332a33209c2b0bf3643eb0dd961 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -65,13 +65,13 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { - net->xt.broute_table = ebt_register_table(net, &broute_table); + net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); return PTR_ERR_OR_ZERO(net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table); + ebt_unregister_table(net, net->xt.broute_table, NULL); } static struct pernet_operations broute_net_ops = { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 593a1bdc079e8ee55165bd13dfef23a0689f3362..f22ef7c219137231e13e756b2695b281b90b9dd8 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __net_init frame_filter_net_init(struct net *net) { - net->xt.frame_filter = ebt_register_table(net, &frame_filter); + net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); return PTR_ERR_OR_ZERO(net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_filter); + ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter); } static struct pernet_operations frame_filter_net_ops = { @@ -109,20 +109,11 @@ static struct pernet_operations frame_filter_net_ops = { static int __init ebtable_filter_init(void) { - int ret; - - ret = register_pernet_subsys(&frame_filter_net_ops); - if (ret < 0) - return ret; - ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); - if (ret < 0) - unregister_pernet_subsys(&frame_filter_net_ops); - return ret; + return register_pernet_subsys(&frame_filter_net_ops); } static void __exit ebtable_filter_fini(void) { - nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); unregister_pernet_subsys(&frame_filter_net_ops); } diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index eb33919821ee12e1b74610c894c22d3d0fa05f2a..2f7a4f314406382a655a94d2ce384226ab906bfb 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __net_init frame_nat_net_init(struct net *net) { - net->xt.frame_nat = ebt_register_table(net, &frame_nat); + net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); return PTR_ERR_OR_ZERO(net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_nat); + ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat); } static struct pernet_operations frame_nat_net_ops = { @@ -109,20 +109,11 @@ static struct pernet_operations frame_nat_net_ops = { static int __init ebtable_nat_init(void) { - int ret; - - ret = register_pernet_subsys(&frame_nat_net_ops); - if (ret < 0) - return ret; - ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); - if (ret < 0) - unregister_pernet_subsys(&frame_nat_net_ops); - return ret; + return register_pernet_subsys(&frame_nat_net_ops); } static void __exit ebtable_nat_fini(void) { - nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); unregister_pernet_subsys(&frame_nat_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 79b69917f5210c0065ab5c9c37a497ede7cc273b..9c6e619f452bc96770a8e340d3879adce6dadd2a 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1157,8 +1157,30 @@ static int do_replace(struct net *net, const void __user *user, return ret; } +static void __ebt_unregister_table(struct net *net, struct ebt_table *table) +{ + int i; + + mutex_lock(&ebt_mutex); + list_del(&table->list); + mutex_unlock(&ebt_mutex); + EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, + ebt_cleanup_entry, net, NULL); + if (table->private->nentries) + module_put(table->me); + vfree(table->private->entries); + if (table->private->chainstack) { + for_each_possible_cpu(i) + vfree(table->private->chainstack[i]); + vfree(table->private->chainstack); + } + vfree(table->private); + kfree(table); +} + struct ebt_table * -ebt_register_table(struct net *net, const struct ebt_table *input_table) +ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops) { struct ebt_table_info *newinfo; struct ebt_table *t, *table; @@ -1238,6 +1260,16 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) } list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); + + if (!ops) + return table; + + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret) { + __ebt_unregister_table(net, table); + return ERR_PTR(ret); + } + return table; free_unlock: mutex_unlock(&ebt_mutex); @@ -1256,29 +1288,12 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) return ERR_PTR(ret); } -void ebt_unregister_table(struct net *net, struct ebt_table *table) +void ebt_unregister_table(struct net *net, struct ebt_table *table, + const struct nf_hook_ops *ops) { - int i; - - if (!table) { - BUGPRINT("Request to unregister NULL table!!!\n"); - return; - } - mutex_lock(&ebt_mutex); - list_del(&table->list); - mutex_unlock(&ebt_mutex); - EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, - ebt_cleanup_entry, net, NULL); - if (table->private->nentries) - module_put(table->me); - vfree(table->private->entries); - if (table->private->chainstack) { - for_each_possible_cpu(i) - vfree(table->private->chainstack[i]); - vfree(table->private->chainstack); - } - vfree(table->private); - kfree(table); + if (ops) + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + __ebt_unregister_table(net, table); } /* userspace just supplied us with counters */ @@ -1358,7 +1373,8 @@ static inline int ebt_obj_to_user(char __user *um, const char *_name, strlcpy(name, _name, sizeof(name)); if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) || put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) || - xt_data_to_user(um + entrysize, data, usersize, datasize)) + xt_data_to_user(um + entrysize, data, usersize, datasize, + XT_ALIGN(datasize))) return -EFAULT; return 0; @@ -1643,7 +1659,8 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, if (match->compat_to_user(cm->data, m->data)) return -EFAULT; } else { - if (xt_data_to_user(cm->data, m->data, match->usersize, msize)) + if (xt_data_to_user(cm->data, m->data, match->usersize, msize, + COMPAT_XT_ALIGN(msize))) return -EFAULT; } @@ -1672,7 +1689,8 @@ static int compat_target_to_user(struct ebt_entry_target *t, if (target->compat_to_user(cm->data, t->data)) return -EFAULT; } else { - if (xt_data_to_user(cm->data, t->data, target->usersize, tsize)) + if (xt_data_to_user(cm->data, t->data, target->usersize, tsize, + COMPAT_XT_ALIGN(tsize))) return -EFAULT; } @@ -1713,7 +1731,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr, if (*size < sizeof(*ce)) return -EINVAL; - ce = (struct ebt_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(*ce))) return -EFAULT; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 5974dbc1ea240fa6b8b2d109891ab211a8d556ff..bb63c9aed55d22af0f9d6538267e50900701a351 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -111,7 +111,7 @@ nft_meta_bridge_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_meta_bridge_type __read_mostly = { .family = NFPROTO_BRIDGE, .name = "meta", - .select_ops = &nft_meta_bridge_select_ops, + .select_ops = nft_meta_bridge_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 206dc266ecd237c2874d25352dd631e3bc31b002..346ef6b00b8f05b62edc911d06c01692624596d9 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -375,11 +375,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code, err; - - err = nft_reject_bridge_validate(ctx, expr, NULL); - if (err < 0) - return err; + int icmp_code; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index adcad344c843985435958890583761e865a25374..21f18ea2fce440c1a0c8dcddc335e327dbf9bced 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -754,6 +754,10 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, lock_sock(sk); + err = -EINVAL; + if (addr_len < offsetofend(struct sockaddr, sa_family)) + goto out; + err = -EAFNOSUPPORT; if (uaddr->sa_family != AF_CAIF) goto out; diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 59ce1fcc220ce0a71fb57733be7cc91e6b8ac7fc..71b6ab240dea26b228be35ba24ec3f43772f19f7 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -81,11 +81,7 @@ static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx) { struct sk_buff *skb; - if (likely(in_interrupt())) - skb = alloc_skb(len + pfx, GFP_ATOMIC); - else - skb = alloc_skb(len + pfx, GFP_KERNEL); - + skb = alloc_skb(len + pfx, GFP_ATOMIC); if (unlikely(skb == NULL)) return NULL; diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 1816fc9f1ee779f85874e886a9ba6db721c60680..fe3c53efb949ef29a3dc1f6278d9abc05b50f579 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -392,14 +392,14 @@ static void chnl_net_destructor(struct net_device *dev) { struct chnl_net *priv = netdev_priv(dev); caif_free_client(&priv->chnl); - free_netdev(dev); } static void ipcaif_net_setup(struct net_device *dev) { struct chnl_net *priv; dev->netdev_ops = &netdev_ops; - dev->destructor = chnl_net_destructor; + dev->needs_free_netdev = true; + dev->priv_destructor = chnl_net_destructor; dev->flags |= IFF_NOARP; dev->flags |= IFF_POINTOPOINT; dev->mtu = GPRS_PDP_MTU; diff --git a/net/can/af_can.c b/net/can/af_can.c index 5488e4a6ccd062e6f6e7e2b841dde5ef055d4337..88edac0f3e366398d0c1e0de023b90b0669498f9 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -2,7 +2,7 @@ * af_can.c - Protocol family CAN core module * (used by different CAN protocol modules) * - * Copyright (c) 2002-2007 Volkswagen Group Electronic Research + * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -75,20 +75,12 @@ static int stats_timer __read_mostly = 1; module_param(stats_timer, int, S_IRUGO); MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); -/* receive filters subscribed for 'all' CAN devices */ -struct dev_rcv_lists can_rx_alldev_list; -static DEFINE_SPINLOCK(can_rcvlists_lock); - static struct kmem_cache *rcv_cache __read_mostly; /* table of registered CAN protocols */ static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; static DEFINE_MUTEX(proto_tab_lock); -struct timer_list can_stattimer; /* timer for statistics update */ -struct s_stats can_stats; /* packet statistics */ -struct s_pstats can_pstats; /* receive list statistics */ - static atomic_t skbcounter = ATOMIC_INIT(0); /* @@ -145,9 +137,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= CAN_NPROTO) return -EINVAL; - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - cp = can_get_proto(protocol); #ifdef CONFIG_MODULES @@ -228,6 +217,7 @@ int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats; int err = -EINVAL; if (skb->len == CAN_MTU) { @@ -316,8 +306,8 @@ int can_send(struct sk_buff *skb, int loop) netif_rx_ni(newskb); /* update statistics */ - can_stats.tx_frames++; - can_stats.tx_frames_delta++; + can_stats->tx_frames++; + can_stats->tx_frames_delta++; return 0; @@ -331,10 +321,11 @@ EXPORT_SYMBOL(can_send); * af_can rx path */ -static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) +static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net, + struct net_device *dev) { if (!dev) - return &can_rx_alldev_list; + return net->can.can_rx_alldev_list; else return (struct dev_rcv_lists *)dev->ml_priv; } @@ -467,13 +458,14 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * -ENOMEM on missing cache mem to create subscription entry * -ENODEV unknown device */ -int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, - void (*func)(struct sk_buff *, void *), void *data, - char *ident, struct sock *sk) +int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, + canid_t mask, void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; struct dev_rcv_lists *d; + struct s_pstats *can_pstats = net->can.can_pstats; int err = 0; /* insert new receiver (dev,canid,mask) -> (func,data) */ @@ -481,13 +473,16 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, if (dev && dev->type != ARPHRD_CAN) return -ENODEV; + if (dev && !net_eq(net, dev_net(dev))) + return -ENODEV; + r = kmem_cache_alloc(rcv_cache, GFP_KERNEL); if (!r) return -ENOMEM; - spin_lock(&can_rcvlists_lock); + spin_lock(&net->can.can_rcvlists_lock); - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(net, dev); if (d) { rl = find_rcv_list(&can_id, &mask, d); @@ -502,15 +497,15 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, hlist_add_head_rcu(&r->list, rl); d->entries++; - can_pstats.rcv_entries++; - if (can_pstats.rcv_entries_max < can_pstats.rcv_entries) - can_pstats.rcv_entries_max = can_pstats.rcv_entries; + can_pstats->rcv_entries++; + if (can_pstats->rcv_entries_max < can_pstats->rcv_entries) + can_pstats->rcv_entries_max = can_pstats->rcv_entries; } else { kmem_cache_free(rcv_cache, r); err = -ENODEV; } - spin_unlock(&can_rcvlists_lock); + spin_unlock(&net->can.can_rcvlists_lock); return err; } @@ -540,19 +535,24 @@ static void can_rx_delete_receiver(struct rcu_head *rp) * Description: * Removes subscription entry depending on given (subscription) values. */ -void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, - void (*func)(struct sk_buff *, void *), void *data) +void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, + canid_t mask, void (*func)(struct sk_buff *, void *), + void *data) { struct receiver *r = NULL; struct hlist_head *rl; + struct s_pstats *can_pstats = net->can.can_pstats; struct dev_rcv_lists *d; if (dev && dev->type != ARPHRD_CAN) return; - spin_lock(&can_rcvlists_lock); + if (dev && !net_eq(net, dev_net(dev))) + return; + + spin_lock(&net->can.can_rcvlists_lock); - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(net, dev); if (!d) { pr_err("BUG: receive list not found for " "dev %s, id %03X, mask %03X\n", @@ -588,8 +588,8 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, hlist_del_rcu(&r->list); d->entries--; - if (can_pstats.rcv_entries > 0) - can_pstats.rcv_entries--; + if (can_pstats->rcv_entries > 0) + can_pstats->rcv_entries--; /* remove device structure requested by NETDEV_UNREGISTER */ if (d->remove_on_zero_entries && !d->entries) { @@ -598,7 +598,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, } out: - spin_unlock(&can_rcvlists_lock); + spin_unlock(&net->can.can_rcvlists_lock); /* schedule the receiver item for deletion */ if (r) { @@ -683,11 +683,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) static void can_receive(struct sk_buff *skb, struct net_device *dev) { struct dev_rcv_lists *d; + struct net *net = dev_net(dev); + struct s_stats *can_stats = net->can.can_stats; int matches; /* update statistics */ - can_stats.rx_frames++; - can_stats.rx_frames_delta++; + can_stats->rx_frames++; + can_stats->rx_frames_delta++; /* create non-zero unique skb identifier together with *skb */ while (!(can_skb_prv(skb)->skbcnt)) @@ -696,10 +698,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ - matches = can_rcv_filter(&can_rx_alldev_list, skb); + matches = can_rcv_filter(net->can.can_rx_alldev_list, skb); /* find receive list for this device */ - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(net, dev); if (d) matches += can_rcv_filter(d, skb); @@ -709,8 +711,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); if (matches > 0) { - can_stats.matches++; - can_stats.matches_delta++; + can_stats->matches++; + can_stats->matches_delta++; } } @@ -719,9 +721,6 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(!net_eq(dev_net(dev), &init_net))) - goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || cfd->len > CAN_MAX_DLEN, @@ -743,9 +742,6 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(!net_eq(dev_net(dev), &init_net))) - goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || cfd->len > CANFD_MAX_DLEN, @@ -835,9 +831,6 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; @@ -855,7 +848,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, break; case NETDEV_UNREGISTER: - spin_lock(&can_rcvlists_lock); + spin_lock(&dev_net(dev)->can.can_rcvlists_lock); d = dev->ml_priv; if (d) { @@ -869,7 +862,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, pr_err("can: notifier: receive list not found for dev " "%s\n", dev->name); - spin_unlock(&can_rcvlists_lock); + spin_unlock(&dev_net(dev)->can.can_rcvlists_lock); break; } @@ -877,6 +870,58 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static int can_pernet_init(struct net *net) +{ + spin_lock_init(&net->can.can_rcvlists_lock); + net->can.can_rx_alldev_list = + kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); + + net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); + net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); + + if (IS_ENABLED(CONFIG_PROC_FS)) { + /* the statistics are updated every second (timer triggered) */ + if (stats_timer) { + setup_timer(&net->can.can_stattimer, can_stat_update, + (unsigned long)net); + mod_timer(&net->can.can_stattimer, + round_jiffies(jiffies + HZ)); + } + net->can.can_stats->jiffies_init = jiffies; + can_init_proc(net); + } + + return 0; +} + +static void can_pernet_exit(struct net *net) +{ + struct net_device *dev; + + if (IS_ENABLED(CONFIG_PROC_FS)) { + can_remove_proc(net); + if (stats_timer) + del_timer_sync(&net->can.can_stattimer); + } + + /* remove created dev_rcv_lists from still registered CAN devices */ + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + struct dev_rcv_lists *d = dev->ml_priv; + + BUG_ON(d->entries); + kfree(d); + dev->ml_priv = NULL; + } + } + rcu_read_unlock(); + + kfree(net->can.can_rx_alldev_list); + kfree(net->can.can_stats); + kfree(net->can.can_pstats); +} + /* * af_can module init/exit functions */ @@ -902,6 +947,11 @@ static struct notifier_block can_netdev_notifier __read_mostly = { .notifier_call = can_notifier, }; +static struct pernet_operations can_pernet_ops __read_mostly = { + .init = can_pernet_init, + .exit = can_pernet_exit, +}; + static __init int can_init(void) { /* check for correct padding to be able to use the structs similarly */ @@ -912,21 +962,12 @@ static __init int can_init(void) pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n"); - memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list)); - rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver), 0, 0, NULL); if (!rcv_cache) return -ENOMEM; - if (IS_ENABLED(CONFIG_PROC_FS)) { - if (stats_timer) { - /* the statistics are updated every second (timer triggered) */ - setup_timer(&can_stattimer, can_stat_update, 0); - mod_timer(&can_stattimer, round_jiffies(jiffies + HZ)); - } - can_init_proc(); - } + register_pernet_subsys(&can_pernet_ops); /* protocol register */ sock_register(&can_family_ops); @@ -939,34 +980,13 @@ static __init int can_init(void) static __exit void can_exit(void) { - struct net_device *dev; - - if (IS_ENABLED(CONFIG_PROC_FS)) { - if (stats_timer) - del_timer_sync(&can_stattimer); - - can_remove_proc(); - } - /* protocol unregister */ dev_remove_pack(&canfd_packet); dev_remove_pack(&can_packet); unregister_netdevice_notifier(&can_netdev_notifier); sock_unregister(PF_CAN); - /* remove created dev_rcv_lists from still registered CAN devices */ - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - - struct dev_rcv_lists *d = dev->ml_priv; - - BUG_ON(d->entries); - kfree(d); - dev->ml_priv = NULL; - } - } - rcu_read_unlock(); + unregister_pernet_subsys(&can_pernet_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/can/af_can.h b/net/can/af_can.h index b86f5129e8385fe84ef671bb914e8e05c2977ca0..d0ef45bb2a72748e95f4ed0c0156e723ea01b7a3 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -110,18 +110,9 @@ struct s_pstats { unsigned long rcv_entries_max; }; -/* receive filters subscribed for 'all' CAN devices */ -extern struct dev_rcv_lists can_rx_alldev_list; - /* function prototypes for the CAN networklayer procfs (proc.c) */ -void can_init_proc(void); -void can_remove_proc(void); +void can_init_proc(struct net *net); +void can_remove_proc(struct net *net); void can_stat_update(unsigned long data); -/* structures and variables from af_can.c needed in proc.c for reading */ -extern struct timer_list can_stattimer; /* timer for statistics update */ -extern struct s_stats can_stats; /* packet statistics */ -extern struct s_pstats can_pstats; /* receive list statistics */ -extern struct hlist_head can_rx_dev_list; /* rx dispatcher structures */ - #endif /* AF_CAN_H */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d13b233c65161cf3595a8b0036207f5c2892e3..65432633a25000361c720058f8c3e8499fff9529 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1,7 +1,7 @@ /* * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content * - * Copyright (c) 2002-2016 Volkswagen Group Electronic Research + * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -77,7 +77,7 @@ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20161123" +#define CAN_BCM_VERSION "20170425" MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); @@ -118,8 +118,6 @@ struct bcm_op { struct net_device *rx_reg_dev; }; -static struct proc_dir_entry *proc_dir; - struct bcm_sock { struct sock sk; int bound; @@ -149,7 +147,8 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) /* * procfs functions */ -static char *bcm_proc_getifname(char *result, int ifindex) +#if IS_ENABLED(CONFIG_PROC_FS) +static char *bcm_proc_getifname(struct net *net, char *result, int ifindex) { struct net_device *dev; @@ -157,7 +156,7 @@ static char *bcm_proc_getifname(char *result, int ifindex) return "any"; rcu_read_lock(); - dev = dev_get_by_index_rcu(&init_net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev) strcpy(result, dev->name); else @@ -170,7 +169,8 @@ static char *bcm_proc_getifname(char *result, int ifindex) static int bcm_proc_show(struct seq_file *m, void *v) { char ifname[IFNAMSIZ]; - struct sock *sk = (struct sock *)m->private; + struct net *net = m->private; + struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode); struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; @@ -178,7 +178,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, " / sk %pK", sk); seq_printf(m, " / bo %pK", bo); seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs); - seq_printf(m, " / bound %s", bcm_proc_getifname(ifname, bo->ifindex)); + seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); list_for_each_entry(op, &bo->rx_ops, list) { @@ -190,7 +190,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) continue; seq_printf(m, "rx_op: %03X %-5s ", op->can_id, - bcm_proc_getifname(ifname, op->ifindex)); + bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u)", op->nframes); @@ -219,7 +219,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) list_for_each_entry(op, &bo->tx_ops, list) { seq_printf(m, "tx_op: %03X %s ", op->can_id, - bcm_proc_getifname(ifname, op->ifindex)); + bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u) ", op->nframes); @@ -242,7 +242,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) static int bcm_proc_open(struct inode *inode, struct file *file) { - return single_open(file, bcm_proc_show, PDE_DATA(inode)); + return single_open_net(inode, file, bcm_proc_show); } static const struct file_operations bcm_proc_fops = { @@ -252,6 +252,7 @@ static const struct file_operations bcm_proc_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif /* CONFIG_PROC_FS */ /* * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface @@ -267,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op) if (!op->ifindex) return; - dev = dev_get_by_index(&init_net, op->ifindex); + dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ return; @@ -764,8 +765,8 @@ static void bcm_remove_op(struct bcm_op *op) static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { - can_rx_unregister(dev, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op); + can_rx_unregister(dev_net(dev), dev, op->can_id, + REGMASK(op->can_id), bcm_rx_handler, op); /* mark as removed subscription */ op->rx_reg_dev = NULL; @@ -800,7 +801,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, if (op->rx_reg_dev) { struct net_device *dev; - dev = dev_get_by_index(&init_net, + dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (dev) { bcm_rx_unreg(dev, op); @@ -808,7 +809,8 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, } } } else - can_rx_unregister(NULL, op->can_id, + can_rx_unregister(sock_net(op->sk), NULL, + op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); @@ -1220,9 +1222,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); if (dev) { - err = can_rx_register(dev, op->can_id, + err = can_rx_register(sock_net(sk), dev, + op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); @@ -1232,7 +1235,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } } else - err = can_rx_register(NULL, op->can_id, + err = can_rx_register(sock_net(sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); if (err) { @@ -1272,7 +1275,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk, return err; } - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) { kfree_skb(skb); return -ENODEV; @@ -1337,7 +1340,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENODEV; @@ -1418,7 +1421,7 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, struct bcm_op *op; int notify_enodev = 0; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), sock_net(sk))) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) @@ -1490,6 +1493,7 @@ static int bcm_init(struct sock *sk) static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); struct bcm_sock *bo; struct bcm_op *op, *next; @@ -1521,23 +1525,25 @@ static int bcm_release(struct socket *sock) if (op->rx_reg_dev) { struct net_device *dev; - dev = dev_get_by_index(&init_net, op->ifindex); + dev = dev_get_by_index(net, op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else - can_rx_unregister(NULL, op->can_id, + can_rx_unregister(net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); bcm_remove_op(op); } +#if IS_ENABLED(CONFIG_PROC_FS) /* remove procfs entry */ - if (proc_dir && bo->bcm_proc_read) - remove_proc_entry(bo->procname, proc_dir); + if (net->can.bcmproc_dir && bo->bcm_proc_read) + remove_proc_entry(bo->procname, net->can.bcmproc_dir); +#endif /* CONFIG_PROC_FS */ /* remove device reference */ if (bo->bound) { @@ -1560,6 +1566,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); + struct net *net = sock_net(sk); int ret = 0; if (len < sizeof(*addr)) @@ -1576,7 +1583,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, if (addr->can_ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, addr->can_ifindex); + dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { ret = -ENODEV; goto fail; @@ -1595,17 +1602,19 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } - if (proc_dir) { +#if IS_ENABLED(CONFIG_PROC_FS) + if (net->can.bcmproc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, - proc_dir, + net->can.bcmproc_dir, &bcm_proc_fops, sk); if (!bo->bcm_proc_read) { ret = -ENOMEM; goto fail; } } +#endif /* CONFIG_PROC_FS */ bo->bound = 1; @@ -1686,6 +1695,30 @@ static const struct can_proto bcm_can_proto = { .prot = &bcm_proto, }; +static int canbcm_pernet_init(struct net *net) +{ +#if IS_ENABLED(CONFIG_PROC_FS) + /* create /proc/net/can-bcm directory */ + net->can.bcmproc_dir = proc_net_mkdir(net, "can-bcm", net->proc_net); +#endif /* CONFIG_PROC_FS */ + + return 0; +} + +static void canbcm_pernet_exit(struct net *net) +{ +#if IS_ENABLED(CONFIG_PROC_FS) + /* remove /proc/net/can-bcm directory */ + if (net->can.bcmproc_dir) + remove_proc_entry("can-bcm", net->proc_net); +#endif /* CONFIG_PROC_FS */ +} + +static struct pernet_operations canbcm_pernet_ops __read_mostly = { + .init = canbcm_pernet_init, + .exit = canbcm_pernet_exit, +}; + static int __init bcm_module_init(void) { int err; @@ -1698,17 +1731,14 @@ static int __init bcm_module_init(void) return err; } - /* create /proc/net/can-bcm directory */ - proc_dir = proc_mkdir("can-bcm", init_net.proc_net); + register_pernet_subsys(&canbcm_pernet_ops); return 0; } static void __exit bcm_module_exit(void) { can_proto_unregister(&bcm_can_proto); - - if (proc_dir) - remove_proc_entry("can-bcm", init_net.proc_net); + unregister_pernet_subsys(&canbcm_pernet_ops); } module_init(bcm_module_init); diff --git a/net/can/gw.c b/net/can/gw.c index 7056a1a2bb70098e691ce557f05e5bc1f27cb42f..29748d844c3fac1bda35d494cdc1474fe81849a9 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1,7 +1,7 @@ /* * gw.c - CAN frame Gateway/Router/Bridge with netlink interface * - * Copyright (c) 2011 Volkswagen Group Electronic Research + * Copyright (c) 2017 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,7 +59,7 @@ #include #include -#define CAN_GW_VERSION "20130117" +#define CAN_GW_VERSION "20170425" #define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); @@ -79,9 +79,7 @@ MODULE_PARM_DESC(max_hops, __stringify(CGW_MAX_HOPS) " hops, " "default: " __stringify(CGW_DEFAULT_HOPS) ")"); -static HLIST_HEAD(cgw_list); static struct notifier_block notifier; - static struct kmem_cache *cgw_cache __read_mostly; /* structure that contains the (on-the-fly) CAN frame modifications */ @@ -438,16 +436,16 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->handled_frames++; } -static inline int cgw_register_filter(struct cgw_job *gwj) +static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) { - return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + return can_rx_register(net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj, "gw", NULL); } -static inline void cgw_unregister_filter(struct cgw_job *gwj) +static inline void cgw_unregister_filter(struct net *net, struct cgw_job *gwj) { - can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id, + can_rx_unregister(net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); } @@ -455,9 +453,8 @@ static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; @@ -468,11 +465,11 @@ static int cgw_notifier(struct notifier_block *nb, ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); - cgw_unregister_filter(gwj); + cgw_unregister_filter(net, gwj); kmem_cache_free(cgw_cache, gwj); } } @@ -592,12 +589,13 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, /* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct cgw_job *gwj = NULL; int idx = 0; int s_idx = cb->args[0]; rcu_read_lock(); - hlist_for_each_entry_rcu(gwj, &cgw_list, list) { + hlist_for_each_entry_rcu(gwj, &net->can.cgw_list, list) { if (idx < s_idx) goto cont; @@ -641,7 +639,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, memset(mod, 0, sizeof(*mod)); err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, - cgw_policy); + cgw_policy, NULL); if (err < 0) return err; @@ -809,8 +807,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, return 0; } -static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { + struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; struct cf_mod mod; @@ -841,7 +841,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); /* check for updating an existing job with identical uid */ - hlist_for_each_entry(gwj, &cgw_list, list) { + hlist_for_each_entry(gwj, &net->can.cgw_list, list) { if (gwj->mod.uid != mod.uid) continue; @@ -879,7 +879,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) err = -ENODEV; - gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx); + gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx); if (!gwj->src.dev) goto out; @@ -887,7 +887,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) if (gwj->src.dev->type != ARPHRD_CAN) goto out; - gwj->dst.dev = __dev_get_by_index(&init_net, gwj->ccgw.dst_idx); + gwj->dst.dev = __dev_get_by_index(net, gwj->ccgw.dst_idx); if (!gwj->dst.dev) goto out; @@ -897,9 +897,9 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = cgw_register_filter(gwj); + err = cgw_register_filter(net, gwj); if (!err) - hlist_add_head_rcu(&gwj->list, &cgw_list); + hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: if (err) kmem_cache_free(cgw_cache, gwj); @@ -907,22 +907,24 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static void cgw_remove_all_jobs(void) +static void cgw_remove_all_jobs(struct net *net) { struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); - hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { hlist_del(&gwj->list); - cgw_unregister_filter(gwj); + cgw_unregister_filter(net, gwj); kmem_cache_free(cgw_cache, gwj); } } -static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { + struct net *net = sock_net(skb->sk); struct cgw_job *gwj = NULL; struct hlist_node *nx; struct rtcanmsg *r; @@ -951,7 +953,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) /* two interface indices both set to 0 => remove all entries */ if (!ccgw.src_idx && !ccgw.dst_idx) { - cgw_remove_all_jobs(); + cgw_remove_all_jobs(net); return 0; } @@ -960,7 +962,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); /* remove only the first matching entry */ - hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { + hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { if (gwj->flags != r->flags) continue; @@ -983,7 +985,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) continue; hlist_del(&gwj->list); - cgw_unregister_filter(gwj); + cgw_unregister_filter(net, gwj); kmem_cache_free(cgw_cache, gwj); err = 0; break; @@ -992,6 +994,24 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } +static int __net_init cangw_pernet_init(struct net *net) +{ + INIT_HLIST_HEAD(&net->can.cgw_list); + return 0; +} + +static void __net_exit cangw_pernet_exit(struct net *net) +{ + rtnl_lock(); + cgw_remove_all_jobs(net); + rtnl_unlock(); +} + +static struct pernet_operations cangw_pernet_ops = { + .init = cangw_pernet_init, + .exit = cangw_pernet_exit, +}; + static __init int cgw_module_init(void) { /* sanitize given module parameter */ @@ -1000,6 +1020,7 @@ static __init int cgw_module_init(void) pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n", max_hops); + register_pernet_subsys(&cangw_pernet_ops); cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job), 0, 0, NULL); @@ -1029,10 +1050,7 @@ static __exit void cgw_module_exit(void) unregister_netdevice_notifier(¬ifier); - rtnl_lock(); - cgw_remove_all_jobs(); - rtnl_unlock(); - + unregister_pernet_subsys(&cangw_pernet_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ kmem_cache_destroy(cgw_cache); diff --git a/net/can/proc.c b/net/can/proc.c index 85ef7bb0f1768fdc34e41baae297893e4758d16c..83045f00c63c147dd4664c76b17a98a9e040c45e 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -62,17 +62,6 @@ #define CAN_PROC_RCVLIST_EFF "rcvlist_eff" #define CAN_PROC_RCVLIST_ERR "rcvlist_err" -static struct proc_dir_entry *can_dir; -static struct proc_dir_entry *pde_version; -static struct proc_dir_entry *pde_stats; -static struct proc_dir_entry *pde_reset_stats; -static struct proc_dir_entry *pde_rcvlist_all; -static struct proc_dir_entry *pde_rcvlist_fil; -static struct proc_dir_entry *pde_rcvlist_inv; -static struct proc_dir_entry *pde_rcvlist_sff; -static struct proc_dir_entry *pde_rcvlist_eff; -static struct proc_dir_entry *pde_rcvlist_err; - static int user_reset; static const char rx_list_name[][8] = { @@ -86,21 +75,23 @@ static const char rx_list_name[][8] = { * af_can statistics stuff */ -static void can_init_stats(void) +static void can_init_stats(struct net *net) { + struct s_stats *can_stats = net->can.can_stats; + struct s_pstats *can_pstats = net->can.can_pstats; /* * This memset function is called from a timer context (when * can_stattimer is active which is the default) OR in a process * context (reading the proc_fs when can_stattimer is disabled). */ - memset(&can_stats, 0, sizeof(can_stats)); - can_stats.jiffies_init = jiffies; + memset(can_stats, 0, sizeof(struct s_stats)); + can_stats->jiffies_init = jiffies; - can_pstats.stats_reset++; + can_pstats->stats_reset++; if (user_reset) { user_reset = 0; - can_pstats.user_reset++; + can_pstats->user_reset++; } } @@ -126,64 +117,66 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, void can_stat_update(unsigned long data) { + struct net *net = (struct net *)data; + struct s_stats *can_stats = net->can.can_stats; unsigned long j = jiffies; /* snapshot */ /* restart counting in timer context on user request */ if (user_reset) - can_init_stats(); + can_init_stats(net); /* restart counting on jiffies overflow */ - if (j < can_stats.jiffies_init) - can_init_stats(); + if (j < can_stats->jiffies_init) + can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats.rx_frames > (ULONG_MAX / HZ)) - can_init_stats(); + if (can_stats->rx_frames > (ULONG_MAX / HZ)) + can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats.tx_frames > (ULONG_MAX / HZ)) - can_init_stats(); + if (can_stats->tx_frames > (ULONG_MAX / HZ)) + can_init_stats(net); /* matches overflow - very improbable */ - if (can_stats.matches > (ULONG_MAX / 100)) - can_init_stats(); + if (can_stats->matches > (ULONG_MAX / 100)) + can_init_stats(net); /* calc total values */ - if (can_stats.rx_frames) - can_stats.total_rx_match_ratio = (can_stats.matches * 100) / - can_stats.rx_frames; + if (can_stats->rx_frames) + can_stats->total_rx_match_ratio = (can_stats->matches * 100) / + can_stats->rx_frames; - can_stats.total_tx_rate = calc_rate(can_stats.jiffies_init, j, - can_stats.tx_frames); - can_stats.total_rx_rate = calc_rate(can_stats.jiffies_init, j, - can_stats.rx_frames); + can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j, + can_stats->tx_frames); + can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j, + can_stats->rx_frames); /* calc current values */ - if (can_stats.rx_frames_delta) - can_stats.current_rx_match_ratio = - (can_stats.matches_delta * 100) / - can_stats.rx_frames_delta; + if (can_stats->rx_frames_delta) + can_stats->current_rx_match_ratio = + (can_stats->matches_delta * 100) / + can_stats->rx_frames_delta; - can_stats.current_tx_rate = calc_rate(0, HZ, can_stats.tx_frames_delta); - can_stats.current_rx_rate = calc_rate(0, HZ, can_stats.rx_frames_delta); + can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta); + can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta); /* check / update maximum values */ - if (can_stats.max_tx_rate < can_stats.current_tx_rate) - can_stats.max_tx_rate = can_stats.current_tx_rate; + if (can_stats->max_tx_rate < can_stats->current_tx_rate) + can_stats->max_tx_rate = can_stats->current_tx_rate; - if (can_stats.max_rx_rate < can_stats.current_rx_rate) - can_stats.max_rx_rate = can_stats.current_rx_rate; + if (can_stats->max_rx_rate < can_stats->current_rx_rate) + can_stats->max_rx_rate = can_stats->current_rx_rate; - if (can_stats.max_rx_match_ratio < can_stats.current_rx_match_ratio) - can_stats.max_rx_match_ratio = can_stats.current_rx_match_ratio; + if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio) + can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio; /* clear values for 'current rate' calculation */ - can_stats.tx_frames_delta = 0; - can_stats.rx_frames_delta = 0; - can_stats.matches_delta = 0; + can_stats->tx_frames_delta = 0; + can_stats->rx_frames_delta = 0; + can_stats->matches_delta = 0; /* restart timer (one second) */ - mod_timer(&can_stattimer, round_jiffies(jiffies + HZ)); + mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ)); } /* @@ -217,57 +210,61 @@ static void can_print_recv_banner(struct seq_file *m) static int can_stats_proc_show(struct seq_file *m, void *v) { + struct net *net = m->private; + struct s_stats *can_stats = net->can.can_stats; + struct s_pstats *can_pstats = net->can.can_pstats; + seq_putc(m, '\n'); - seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats.tx_frames); - seq_printf(m, " %8ld received frames (RXF)\n", can_stats.rx_frames); - seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats.matches); + seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames); + seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames); + seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches); seq_putc(m, '\n'); - if (can_stattimer.function == can_stat_update) { + if (net->can.can_stattimer.function == can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", - can_stats.total_rx_match_ratio); + can_stats->total_rx_match_ratio); seq_printf(m, " %8ld frames/s total tx rate (TXR)\n", - can_stats.total_tx_rate); + can_stats->total_tx_rate); seq_printf(m, " %8ld frames/s total rx rate (RXR)\n", - can_stats.total_rx_rate); + can_stats->total_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% current match ratio (CRXMR)\n", - can_stats.current_rx_match_ratio); + can_stats->current_rx_match_ratio); seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n", - can_stats.current_tx_rate); + can_stats->current_tx_rate); seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n", - can_stats.current_rx_rate); + can_stats->current_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% max match ratio (MRXMR)\n", - can_stats.max_rx_match_ratio); + can_stats->max_rx_match_ratio); seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n", - can_stats.max_tx_rate); + can_stats->max_tx_rate); seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n", - can_stats.max_rx_rate); + can_stats->max_rx_rate); seq_putc(m, '\n'); } seq_printf(m, " %8ld current receive list entries (CRCV)\n", - can_pstats.rcv_entries); + can_pstats->rcv_entries); seq_printf(m, " %8ld maximum receive list entries (MRCV)\n", - can_pstats.rcv_entries_max); + can_pstats->rcv_entries_max); - if (can_pstats.stats_reset) + if (can_pstats->stats_reset) seq_printf(m, "\n %8ld statistic resets (STR)\n", - can_pstats.stats_reset); + can_pstats->stats_reset); - if (can_pstats.user_reset) + if (can_pstats->user_reset) seq_printf(m, " %8ld user statistic resets (USTR)\n", - can_pstats.user_reset); + can_pstats->user_reset); seq_putc(m, '\n'); return 0; @@ -275,7 +272,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v) static int can_stats_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_stats_proc_show, NULL); + return single_open_net(inode, file, can_stats_proc_show); } static const struct file_operations can_stats_proc_fops = { @@ -288,25 +285,28 @@ static const struct file_operations can_stats_proc_fops = { static int can_reset_stats_proc_show(struct seq_file *m, void *v) { + struct net *net = m->private; + struct s_pstats *can_pstats = net->can.can_pstats; + struct s_stats *can_stats = net->can.can_stats; + user_reset = 1; - if (can_stattimer.function == can_stat_update) { + if (net->can.can_stattimer.function == can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", - can_pstats.stats_reset + 1); - + can_pstats->stats_reset + 1); } else { - if (can_stats.jiffies_init != jiffies) - can_init_stats(); + if (can_stats->jiffies_init != jiffies) + can_init_stats(net); seq_printf(m, "Performed statistic reset #%ld.\n", - can_pstats.stats_reset); + can_pstats->stats_reset); } return 0; } static int can_reset_stats_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_reset_stats_proc_show, NULL); + return single_open_net(inode, file, can_reset_stats_proc_show); } static const struct file_operations can_reset_stats_proc_fops = { @@ -325,7 +325,7 @@ static int can_version_proc_show(struct seq_file *m, void *v) static int can_version_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_version_proc_show, NULL); + return single_open_net(inode, file, can_version_proc_show); } static const struct file_operations can_version_proc_fops = { @@ -351,20 +351,21 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, static int can_rcvlist_proc_show(struct seq_file *m, void *v) { /* double cast to prevent GCC warning */ - int idx = (int)(long)m->private; + int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_one(m, idx, NULL, d); /* receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv); } @@ -377,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) static int can_rcvlist_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode)); + return single_open_net(inode, file, can_rcvlist_proc_show); } static const struct file_operations can_rcvlist_proc_fops = { @@ -417,6 +418,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; /* RX_SFF */ seq_puts(m, "\nreceive list 'rx_sff':\n"); @@ -424,11 +426,11 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); /* sff receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { d = dev->ml_priv; can_rcvlist_proc_show_array(m, dev, d->rx_sff, @@ -444,7 +446,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_sff_proc_show, NULL); + return single_open_net(inode, file, can_rcvlist_sff_proc_show); } static const struct file_operations can_rcvlist_sff_proc_fops = { @@ -460,6 +462,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; /* RX_EFF */ seq_puts(m, "\nreceive list 'rx_eff':\n"); @@ -467,11 +470,11 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); /* eff receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { d = dev->ml_priv; can_rcvlist_proc_show_array(m, dev, d->rx_eff, @@ -487,7 +490,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_eff_proc_show, NULL); + return single_open_net(inode, file, can_rcvlist_eff_proc_show); } static const struct file_operations can_rcvlist_eff_proc_fops = { @@ -498,82 +501,86 @@ static const struct file_operations can_rcvlist_eff_proc_fops = { .release = single_release, }; -/* - * proc utility functions - */ - -static void can_remove_proc_readentry(const char *name) -{ - if (can_dir) - remove_proc_entry(name, can_dir); -} - /* * can_init_proc - create main CAN proc directory and procfs entries */ -void can_init_proc(void) +void can_init_proc(struct net *net) { /* create /proc/net/can directory */ - can_dir = proc_mkdir("can", init_net.proc_net); + net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net); - if (!can_dir) { - pr_info("can: failed to create /proc/net/can.\n"); + if (!net->can.proc_dir) { + printk(KERN_INFO "can: failed to create /proc/net/can . " + "CONFIG_PROC_FS missing?\n"); return; } /* own procfs entries from the AF_CAN core */ - pde_version = proc_create(CAN_PROC_VERSION, 0644, can_dir, - &can_version_proc_fops); - pde_stats = proc_create(CAN_PROC_STATS, 0644, can_dir, - &can_stats_proc_fops); - pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, can_dir, - &can_reset_stats_proc_fops); - pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_ERR); - pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_ALL); - pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_FIL); - pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_INV); - pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir, - &can_rcvlist_eff_proc_fops); - pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir, - &can_rcvlist_sff_proc_fops); + net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644, + net->can.proc_dir, + &can_version_proc_fops); + net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644, + net->can.proc_dir, + &can_stats_proc_fops); + net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, + net->can.proc_dir, + &can_reset_stats_proc_fops); + net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_ERR); + net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_ALL); + net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_FIL); + net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_INV); + net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, + net->can.proc_dir, + &can_rcvlist_eff_proc_fops); + net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, + net->can.proc_dir, + &can_rcvlist_sff_proc_fops); } /* * can_remove_proc - remove procfs entries and main CAN proc directory */ -void can_remove_proc(void) +void can_remove_proc(struct net *net) { - if (pde_version) - can_remove_proc_readentry(CAN_PROC_VERSION); + if (net->can.pde_version) + remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir); - if (pde_stats) - can_remove_proc_readentry(CAN_PROC_STATS); + if (net->can.pde_stats) + remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir); - if (pde_reset_stats) - can_remove_proc_readentry(CAN_PROC_RESET_STATS); + if (net->can.pde_reset_stats) + remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir); - if (pde_rcvlist_err) - can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR); + if (net->can.pde_rcvlist_err) + remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir); - if (pde_rcvlist_all) - can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL); + if (net->can.pde_rcvlist_all) + remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir); - if (pde_rcvlist_fil) - can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL); + if (net->can.pde_rcvlist_fil) + remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir); - if (pde_rcvlist_inv) - can_remove_proc_readentry(CAN_PROC_RCVLIST_INV); + if (net->can.pde_rcvlist_inv) + remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir); - if (pde_rcvlist_eff) - can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF); + if (net->can.pde_rcvlist_eff) + remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir); - if (pde_rcvlist_sff) - can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF); + if (net->can.pde_rcvlist_sff) + remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (can_dir) - remove_proc_entry("can", init_net.proc_net); + if (net->can.proc_dir) + remove_proc_entry("can", net->proc_net); } diff --git a/net/can/raw.c b/net/can/raw.c index 6dc546a06673ff41fc121c546ebd0567bb0da05f..864c80dbdb72d11cdd24183cec3769222b339ccc 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -181,20 +181,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data) kfree_skb(skb); } -static int raw_enable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, int count) +static int raw_enable_filters(struct net *net, struct net_device *dev, + struct sock *sk, struct can_filter *filter, + int count) { int err = 0; int i; for (i = 0; i < count; i++) { - err = can_rx_register(dev, filter[i].can_id, + err = can_rx_register(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) - can_rx_unregister(dev, filter[i].can_id, + can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); break; @@ -204,57 +205,62 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, return err; } -static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, - can_err_mask_t err_mask) +static int raw_enable_errfilter(struct net *net, struct net_device *dev, + struct sock *sk, can_err_mask_t err_mask) { int err = 0; if (err_mask) - err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, + err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk, "raw", sk); return err; } -static void raw_disable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, int count) +static void raw_disable_filters(struct net *net, struct net_device *dev, + struct sock *sk, struct can_filter *filter, + int count) { int i; for (i = 0; i < count; i++) - can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk); + can_rx_unregister(net, dev, filter[i].can_id, + filter[i].can_mask, raw_rcv, sk); } -static inline void raw_disable_errfilter(struct net_device *dev, +static inline void raw_disable_errfilter(struct net *net, + struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { if (err_mask) - can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG, + can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk); } -static inline void raw_disable_allfilters(struct net_device *dev, +static inline void raw_disable_allfilters(struct net *net, + struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); - raw_disable_filters(dev, sk, ro->filter, ro->count); - raw_disable_errfilter(dev, sk, ro->err_mask); + raw_disable_filters(net, dev, sk, ro->filter, ro->count); + raw_disable_errfilter(net, dev, sk, ro->err_mask); } -static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) +static int raw_enable_allfilters(struct net *net, struct net_device *dev, + struct sock *sk) { struct raw_sock *ro = raw_sk(sk); int err; - err = raw_enable_filters(dev, sk, ro->filter, ro->count); + err = raw_enable_filters(net, dev, sk, ro->filter, ro->count); if (!err) { - err = raw_enable_errfilter(dev, sk, ro->err_mask); + err = raw_enable_errfilter(net, dev, sk, ro->err_mask); if (err) - raw_disable_filters(dev, sk, ro->filter, ro->count); + raw_disable_filters(net, dev, sk, ro->filter, + ro->count); } return err; @@ -267,7 +273,7 @@ static int raw_notifier(struct notifier_block *nb, struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), sock_net(sk))) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) @@ -282,7 +288,7 @@ static int raw_notifier(struct notifier_block *nb, lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), dev, sk); if (ro->count > 1) kfree(ro->filter); @@ -358,13 +364,13 @@ static int raw_release(struct socket *sock) if (ro->ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); if (dev) { - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), dev, sk); dev_put(dev); } } else - raw_disable_allfilters(NULL, sk); + raw_disable_allfilters(sock_net(sk), NULL, sk); } if (ro->count > 1) @@ -404,7 +410,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (addr->can_ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, addr->can_ifindex); + dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; goto out; @@ -420,13 +426,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; /* filters set by default/setsockopt */ - err = raw_enable_allfilters(dev, sk); + err = raw_enable_allfilters(sock_net(sk), dev, sk); dev_put(dev); } else { ifindex = 0; /* filters set by default/setsockopt */ - err = raw_enable_allfilters(NULL, sk); + err = raw_enable_allfilters(sock_net(sk), NULL, sk); } if (!err) { @@ -435,13 +441,15 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (ro->ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), + ro->ifindex); if (dev) { - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), + dev, sk); dev_put(dev); } } else - raw_disable_allfilters(NULL, sk); + raw_disable_allfilters(sock_net(sk), NULL, sk); } ro->ifindex = ifindex; ro->bound = 1; @@ -517,15 +525,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (ro->bound && ro->ifindex) - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); if (ro->bound) { /* (try to) register the new filters */ if (count == 1) - err = raw_enable_filters(dev, sk, &sfilter, 1); + err = raw_enable_filters(sock_net(sk), dev, sk, + &sfilter, 1); else - err = raw_enable_filters(dev, sk, filter, - count); + err = raw_enable_filters(sock_net(sk), dev, sk, + filter, count); if (err) { if (count > 1) kfree(filter); @@ -533,7 +542,8 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } /* remove old filter registrations */ - raw_disable_filters(dev, sk, ro->filter, ro->count); + raw_disable_filters(sock_net(sk), dev, sk, ro->filter, + ro->count); } /* remove old filter space */ @@ -569,18 +579,20 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (ro->bound && ro->ifindex) - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); /* remove current error mask */ if (ro->bound) { /* (try to) register the new err_mask */ - err = raw_enable_errfilter(dev, sk, err_mask); + err = raw_enable_errfilter(sock_net(sk), dev, sk, + err_mask); if (err) goto out_err; /* remove old err_mask registration */ - raw_disable_errfilter(dev, sk, ro->err_mask); + raw_disable_errfilter(sock_net(sk), dev, sk, + ro->err_mask); } /* link new err_mask to the socket */ @@ -741,7 +753,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return -EINVAL; } - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENXIO; diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 2034fb9266700e6b456b141db9b5d264aed77d62..8757fb87dab871faaaeccaed0ac11d76521ad594 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -151,7 +151,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, struct timespec validity; void *tp, *tpend; void **ptp; - struct ceph_crypto_key new_session_key; + struct ceph_crypto_key new_session_key = { 0 }; struct ceph_buffer *new_ticket_blob; unsigned long new_expires, new_renew_after; u64 new_secret_id; @@ -215,6 +215,9 @@ static int process_one_ticket(struct ceph_auth_client *ac, dout(" ticket blob is %d bytes\n", dlen); ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad); blob_struct_v = ceph_decode_8(ptp); + if (blob_struct_v != 1) + goto bad; + new_secret_id = ceph_decode_64(ptp); ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend); if (ret) @@ -234,13 +237,13 @@ static int process_one_ticket(struct ceph_auth_client *ac, type, ceph_entity_type_name(type), th->secret_id, (int)th->ticket_blob->vec.iov_len); xi->have_keys |= th->service; - -out: - return ret; + return 0; bad: ret = -EINVAL; - goto out; +out: + ceph_crypto_key_destroy(&new_session_key); + return ret; } static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 108533859a53292cde61a3cedd052a2579684e87..47e94b560ba0d37daf2fb801fa11b9aa39720013 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -45,18 +45,16 @@ bool libceph_compatible(void *data) } EXPORT_SYMBOL(libceph_compatible); -/* - * find filename portion of a path (/foo/bar/baz -> baz) - */ -const char *ceph_file_part(const char *s, int len) +static int param_get_supported_features(char *buffer, + const struct kernel_param *kp) { - const char *e = s + len; - - while (e != s && *(e-1) != '/') - e--; - return e; + return sprintf(buffer, "0x%llx", CEPH_FEATURES_SUPPORTED_DEFAULT); } -EXPORT_SYMBOL(ceph_file_part); +static const struct kernel_param_ops param_ops_supported_features = { + .get = param_get_supported_features, +}; +module_param_cb(supported_features, ¶m_ops_supported_features, NULL, + S_IRUGO); const char *ceph_msg_type_name(int type) { @@ -187,7 +185,7 @@ void *ceph_kvmalloc(size_t size, gfp_t flags) return ptr; } - return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL); + return __vmalloc(size, flags, PAGE_KERNEL); } @@ -596,9 +594,7 @@ EXPORT_SYMBOL(ceph_client_gid); /* * create a fresh client instance */ -struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, - u64 supported_features, - u64 required_features) +struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) { struct ceph_client *client; struct ceph_entity_addr *myaddr = NULL; @@ -615,14 +611,12 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, init_waitqueue_head(&client->auth_wq); client->auth_err = 0; - if (!ceph_test_opt(client, NOMSGAUTH)) - required_features |= CEPH_FEATURE_MSG_AUTH; - client->extra_mon_dispatch = NULL; - client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | - supported_features; - client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT | - required_features; + client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT; + client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT; + + if (!ceph_test_opt(client, NOMSGAUTH)) + client->required_features |= CEPH_FEATURE_MSG_AUTH; /* msgr */ if (ceph_test_opt(client, MYIP)) diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c index b9233b9903990bd38721213ce287a8045debd8c7..08ada893f01e6acb61b5f91425539a33d7b2c0d4 100644 --- a/net/ceph/cls_lock_client.c +++ b/net/ceph/cls_lock_client.c @@ -179,6 +179,57 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc, } EXPORT_SYMBOL(ceph_cls_break_lock); +int ceph_cls_set_cookie(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 type, char *old_cookie, + char *tag, char *new_cookie) +{ + int cookie_op_buf_size; + int name_len = strlen(lock_name); + int old_cookie_len = strlen(old_cookie); + int tag_len = strlen(tag); + int new_cookie_len = strlen(new_cookie); + void *p, *end; + struct page *cookie_op_page; + int ret; + + cookie_op_buf_size = name_len + sizeof(__le32) + + old_cookie_len + sizeof(__le32) + + tag_len + sizeof(__le32) + + new_cookie_len + sizeof(__le32) + + sizeof(u8) + CEPH_ENCODING_START_BLK_LEN; + if (cookie_op_buf_size > PAGE_SIZE) + return -E2BIG; + + cookie_op_page = alloc_page(GFP_NOIO); + if (!cookie_op_page) + return -ENOMEM; + + p = page_address(cookie_op_page); + end = p + cookie_op_buf_size; + + /* encode cls_lock_set_cookie_op struct */ + ceph_start_encoding(&p, 1, 1, + cookie_op_buf_size - CEPH_ENCODING_START_BLK_LEN); + ceph_encode_string(&p, end, lock_name, name_len); + ceph_encode_8(&p, type); + ceph_encode_string(&p, end, old_cookie, old_cookie_len); + ceph_encode_string(&p, end, tag, tag_len); + ceph_encode_string(&p, end, new_cookie, new_cookie_len); + + dout("%s lock_name %s type %d old_cookie %s tag %s new_cookie %s\n", + __func__, lock_name, type, old_cookie, tag, new_cookie); + ret = ceph_osdc_call(osdc, oid, oloc, "lock", "set_cookie", + CEPH_OSD_FLAG_WRITE, cookie_op_page, + cookie_op_buf_size, NULL, NULL); + + dout("%s: status %d\n", __func__, ret); + __free_page(cookie_op_page); + return ret; +} +EXPORT_SYMBOL(ceph_cls_set_cookie); + void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers) { int i; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index c62b2b029a6e6fe66fa3440551d0602fc8c4123e..71ba13927b3d1726bd488403de8274478a956091 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -62,7 +62,8 @@ static int osdmap_show(struct seq_file *s, void *p) return 0; down_read(&osdc->lock); - seq_printf(s, "epoch %d flags 0x%x\n", map->epoch, map->flags); + seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch, + osdc->epoch_barrier, map->flags); for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pi = @@ -177,9 +178,7 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req) seq_printf(s, "%llu\t", req->r_tid); dump_target(s, &req->r_t); - seq_printf(s, "\t%d\t%u'%llu", req->r_attempts, - le32_to_cpu(req->r_replay_version.epoch), - le64_to_cpu(req->r_replay_version.version)); + seq_printf(s, "\t%d", req->r_attempts); for (i = 0; i < req->r_num_ops; i++) { struct ceph_osd_req_op *op = &req->r_ops[i]; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f76bb333261384257490b0f5125207028e8352aa..588a919300514ad2cd4b843c528bb41e601fa7d8 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1174,8 +1174,8 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, * Returns true if the result moves the cursor on to the next piece * of the data item. */ -static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, - size_t bytes) +static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) { bool new_piece; @@ -1207,8 +1207,6 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, new_piece = true; } cursor->need_crc = new_piece; - - return new_piece; } static size_t sizeof_footer(struct ceph_connection *con) @@ -1386,8 +1384,9 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec now = CURRENT_TIME; + struct timespec now; + ktime_get_real_ts(&now); con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); ceph_encode_timespec(&con->out_temp_keepalive2, &now); con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), @@ -1576,7 +1575,6 @@ static int write_partial_message_data(struct ceph_connection *con) size_t page_offset; size_t length; bool last_piece; - bool need_crc; int ret; page = ceph_msg_data_next(cursor, &page_offset, &length, @@ -1591,7 +1589,7 @@ static int write_partial_message_data(struct ceph_connection *con) } if (do_datacrc && cursor->need_crc) crc = ceph_crc32c_page(crc, page, page_offset, length); - need_crc = ceph_msg_data_advance(cursor, (size_t)ret); + ceph_msg_data_advance(cursor, (size_t)ret); } dout("%s %p msg %p done\n", __func__, con, msg); @@ -2230,10 +2228,18 @@ static void process_ack(struct ceph_connection *con) struct ceph_msg *m; u64 ack = le64_to_cpu(con->in_temp_ack); u64 seq; + bool reconnect = (con->in_tag == CEPH_MSGR_TAG_SEQ); + struct list_head *list = reconnect ? &con->out_queue : &con->out_sent; - while (!list_empty(&con->out_sent)) { - m = list_first_entry(&con->out_sent, struct ceph_msg, - list_head); + /* + * In the reconnect case, con_fault() has requeued messages + * in out_sent. We should cleanup old messages according to + * the reconnect seq. + */ + while (!list_empty(list)) { + m = list_first_entry(list, struct ceph_msg, list_head); + if (reconnect && m->needs_out_seq) + break; seq = le64_to_cpu(m->hdr.seq); if (seq > ack) break; @@ -2242,6 +2248,7 @@ static void process_ack(struct ceph_connection *con) m->ack_stamp = jiffies; ceph_msg_remove(m); } + prepare_read_tag(con); } @@ -2298,7 +2305,7 @@ static int read_partial_msg_data(struct ceph_connection *con) if (do_datacrc) crc = ceph_crc32c_page(crc, page, page_offset, ret); - (void) ceph_msg_data_advance(cursor, (size_t)ret); + ceph_msg_data_advance(cursor, (size_t)ret); } if (do_datacrc) con->in_data_crc = crc; @@ -3176,8 +3183,9 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con, { if (interval > 0 && (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) { - struct timespec now = CURRENT_TIME; + struct timespec now; struct timespec ts; + ktime_get_real_ts(&now); jiffies_to_timespec(interval, &ts); ts = timespec_add(con->last_keepalive_ack, ts); return timespec_compare(&now, &ts) >= 0; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 29a0ef351c5e068838fc1ed8f634439853ff0a3a..250f11f786092d902b52da5af9a228d9786acf62 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -43,15 +43,13 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) int i, err = -EINVAL; struct ceph_fsid fsid; u32 epoch, num_mon; - u16 version; u32 len; ceph_decode_32_safe(&p, end, len, bad); ceph_decode_need(&p, end, len, bad); dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p)); - - ceph_decode_16_safe(&p, end, version, bad); + p += sizeof(u16); /* skip version */ ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad); ceph_decode_copy(&p, &fsid, sizeof(fsid)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e15ea9e4c4955fbd697e545cedfdb7f7925c347e..924f07c36ddbc2864c7428723766d0a64729c7c4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -961,6 +961,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, truncate_size, truncate_seq); } + req->r_abort_on_full = true; req->r_flags = flags; req->r_base_oloc.pool = layout->pool_id; req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); @@ -1005,7 +1006,7 @@ static bool osd_registered(struct ceph_osd *osd) */ static void osd_init(struct ceph_osd *osd) { - atomic_set(&osd->o_ref, 1); + refcount_set(&osd->o_ref, 1); RB_CLEAR_NODE(&osd->o_node); osd->o_requests = RB_ROOT; osd->o_linger_requests = RB_ROOT; @@ -1050,9 +1051,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) static struct ceph_osd *get_osd(struct ceph_osd *osd) { - if (atomic_inc_not_zero(&osd->o_ref)) { - dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1, - atomic_read(&osd->o_ref)); + if (refcount_inc_not_zero(&osd->o_ref)) { + dout("get_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref)-1, + refcount_read(&osd->o_ref)); return osd; } else { dout("get_osd %p FAIL\n", osd); @@ -1062,9 +1063,9 @@ static struct ceph_osd *get_osd(struct ceph_osd *osd) static void put_osd(struct ceph_osd *osd) { - dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), - atomic_read(&osd->o_ref) - 1); - if (atomic_dec_and_test(&osd->o_ref)) { + dout("put_osd %p %d -> %d\n", osd, refcount_read(&osd->o_ref), + refcount_read(&osd->o_ref) - 1); + if (refcount_dec_and_test(&osd->o_ref)) { osd_cleanup(osd); kfree(osd); } @@ -1297,8 +1298,9 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, __pool_full(pi); WARN_ON(pi->id != t->base_oloc.pool); - return (t->flags & CEPH_OSD_FLAG_READ && pauserd) || - (t->flags & CEPH_OSD_FLAG_WRITE && pausewr); + return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) || + ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) || + (osdc->osdmap->epoch < osdc->epoch_barrier); } enum calc_target_result { @@ -1503,9 +1505,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg) ceph_encode_32(&p, req->r_flags); ceph_encode_timespec(p, &req->r_mtime); p += sizeof(struct ceph_timespec); - /* aka reassert_version */ - memcpy(p, &req->r_replay_version, sizeof(req->r_replay_version)); - p += sizeof(req->r_replay_version); + + /* reassert_version */ + memset(p, 0, sizeof(struct ceph_eversion)); + p += sizeof(struct ceph_eversion); /* oloc */ ceph_start_encoding(&p, 5, 4, @@ -1626,6 +1629,7 @@ static void maybe_request_map(struct ceph_osd_client *osdc) ceph_monc_renew_subs(&osdc->client->monc); } +static void complete_request(struct ceph_osd_request *req, int err); static void send_map_check(struct ceph_osd_request *req); static void __submit_request(struct ceph_osd_request *req, bool wrlocked) @@ -1635,6 +1639,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) enum calc_target_result ct_res; bool need_send = false; bool promoted = false; + bool need_abort = false; WARN_ON(req->r_tid); dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); @@ -1650,8 +1655,13 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) goto promote; } - if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && - ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { + if (osdc->osdmap->epoch < osdc->epoch_barrier) { + dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch, + osdc->epoch_barrier); + req->r_t.paused = true; + maybe_request_map(osdc); + } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("req %p pausewr\n", req); req->r_t.paused = true; maybe_request_map(osdc); @@ -1669,6 +1679,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) pr_warn_ratelimited("FULL or reached pool quota\n"); req->r_t.paused = true; maybe_request_map(osdc); + if (req->r_abort_on_full) + need_abort = true; } else if (!osd_homeless(osd)) { need_send = true; } else { @@ -1685,6 +1697,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) link_request(osd, req); if (need_send) send_request(req); + else if (need_abort) + complete_request(req, -ENOSPC); mutex_unlock(&osd->lock); if (ct_res == CALC_TARGET_POOL_DNE) @@ -1799,6 +1813,97 @@ static void abort_request(struct ceph_osd_request *req, int err) complete_request(req, err); } +static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) +{ + if (likely(eb > osdc->epoch_barrier)) { + dout("updating epoch_barrier from %u to %u\n", + osdc->epoch_barrier, eb); + osdc->epoch_barrier = eb; + /* Request map if we're not to the barrier yet */ + if (eb > osdc->osdmap->epoch) + maybe_request_map(osdc); + } +} + +void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) +{ + down_read(&osdc->lock); + if (unlikely(eb > osdc->epoch_barrier)) { + up_read(&osdc->lock); + down_write(&osdc->lock); + update_epoch_barrier(osdc, eb); + up_write(&osdc->lock); + } else { + up_read(&osdc->lock); + } +} +EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier); + +/* + * Drop all pending requests that are stalled waiting on a full condition to + * clear, and complete them with ENOSPC as the return code. Set the + * osdc->epoch_barrier to the latest map epoch that we've seen if any were + * cancelled. + */ +static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc) +{ + struct rb_node *n; + bool victims = false; + + dout("enter abort_on_full\n"); + + if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc)) + goto out; + + /* Scan list and see if there is anything to abort */ + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { + struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); + struct rb_node *m; + + m = rb_first(&osd->o_requests); + while (m) { + struct ceph_osd_request *req = rb_entry(m, + struct ceph_osd_request, r_node); + m = rb_next(m); + + if (req->r_abort_on_full) { + victims = true; + break; + } + } + if (victims) + break; + } + + if (!victims) + goto out; + + /* + * Update the barrier to current epoch if it's behind that point, + * since we know we have some calls to be aborted in the tree. + */ + update_epoch_barrier(osdc, osdc->osdmap->epoch); + + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { + struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); + struct rb_node *m; + + m = rb_first(&osd->o_requests); + while (m) { + struct ceph_osd_request *req = rb_entry(m, + struct ceph_osd_request, r_node); + m = rb_next(m); + + if (req->r_abort_on_full && + (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || + pool_full(osdc, req->r_t.target_oloc.pool))) + abort_request(req, -ENOSPC); + } + } +out: + dout("return abort_on_full barrier=%u\n", osdc->epoch_barrier); +} + static void check_pool_dne(struct ceph_osd_request *req) { struct ceph_osd_client *osdc = req->r_osdc; @@ -3252,11 +3357,13 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); - if (was_pauserd || was_pausewr || pauserd || pausewr) + if (was_pauserd || was_pausewr || pauserd || pausewr || + osdc->osdmap->epoch < osdc->epoch_barrier) maybe_request_map(osdc); kick_requests(osdc, &need_resend, &need_resend_linger); + ceph_osdc_abort_on_full(osdc); ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP, osdc->osdmap->epoch); up_write(&osdc->lock); @@ -3574,7 +3681,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc, ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); lreq->t.flags = CEPH_OSD_FLAG_WRITE; - lreq->mtime = CURRENT_TIME; + ktime_get_real_ts(&lreq->mtime); lreq->reg_req = alloc_linger_request(lreq); if (!lreq->reg_req) { @@ -3632,7 +3739,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc, ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); req->r_flags = CEPH_OSD_FLAG_WRITE; - req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&req->r_mtime); osd_req_op_watch_init(req, 0, lreq->linger_id, CEPH_OSD_WATCH_OP_UNWATCH); @@ -4126,7 +4233,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) close_osd(osd); } up_write(&osdc->lock); - WARN_ON(atomic_read(&osdc->homeless_osd.o_ref) != 1); + WARN_ON(refcount_read(&osdc->homeless_osd.o_ref) != 1); osd_cleanup(&osdc->homeless_osd); WARN_ON(!list_empty(&osdc->osd_lru)); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index ffe9e904d4d1d130b0353edbe45d50d236b4f74e..55e3a477f92d4cba92e342d13686b6e3b94204c0 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -317,6 +317,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) u32 yes; struct crush_rule *r; + err = -EINVAL; ceph_decode_32_safe(p, end, yes, bad); if (!yes) { dout("crush_decode NO rule %d off %x %p to %p\n", diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index 6864007e64fc3236f6d118f8a4a1869255bbba92..ce09f73be759c562cb9ffe093eb1c44350c8adde 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -16,7 +16,7 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) void ceph_pagelist_release(struct ceph_pagelist *pl) { - if (!atomic_dec_and_test(&pl->refcnt)) + if (!refcount_dec_and_test(&pl->refcnt)) return; ceph_pagelist_unmap_tail(pl); while (!list_empty(&pl->head)) { diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c index 705414e78ae0b05d2d1b8d5d8f8e8fbb6007bfb4..e14a5d038656f004da16aa044fdecca9d094a71b 100644 --- a/net/ceph/snapshot.c +++ b/net/ceph/snapshot.c @@ -49,7 +49,7 @@ struct ceph_snap_context *ceph_create_snap_context(u32 snap_count, if (!snapc) return NULL; - atomic_set(&snapc->nref, 1); + refcount_set(&snapc->nref, 1); snapc->num_snaps = snap_count; return snapc; @@ -59,7 +59,7 @@ EXPORT_SYMBOL(ceph_create_snap_context); struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc) { if (sc) - atomic_inc(&sc->nref); + refcount_inc(&sc->nref); return sc; } EXPORT_SYMBOL(ceph_get_snap_context); @@ -68,7 +68,7 @@ void ceph_put_snap_context(struct ceph_snap_context *sc) { if (!sc) return; - if (atomic_dec_and_test(&sc->nref)) { + if (refcount_dec_and_test(&sc->nref)) { /*printk(" deleting snap_context %p\n", sc);*/ kfree(sc); } diff --git a/net/core/datagram.c b/net/core/datagram.c index f4947e737f34a0d5dafb2e8232260f46a43fbc27..db1866f2ffcf6e3ed505ce5c1c5a4f11e6a908bd 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); - } while (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)); + + if (!sk_can_busy_loop(sk)) + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); + } while (!skb_queue_empty(&sk->sk_receive_queue)); error = -EAGAIN; @@ -760,7 +764,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (msg_data_left(msg) < chunk) { if (__skb_checksum_complete(skb)) - goto csum_error; + return -EINVAL; if (skb_copy_datagram_msg(skb, hlen, msg, chunk)) goto fault; } else { @@ -768,15 +772,16 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter, chunk, &csum)) goto fault; - if (csum_fold(csum)) - goto csum_error; + + if (csum_fold(csum)) { + iov_iter_revert(&msg->msg_iter, chunk); + return -EINVAL; + } + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); } return 0; -csum_error: - iov_iter_revert(&msg->msg_iter, chunk); - return -EINVAL; fault: return -EFAULT; } diff --git a/net/core/dev.c b/net/core/dev.c index 533a6d6f60920a01e8a9cef8fdb408950b914b05..6d60149287a1868cd65fcc55a4e29edd7611def3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -1251,8 +1253,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) if (!new_ifalias) return -ENOMEM; dev->ifalias = new_ifalias; + memcpy(dev->ifalias, alias, len); + dev->ifalias[len] = 0; - strlcpy(dev->ifalias, alias, len+1); return len; } @@ -2450,6 +2453,9 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) { unsigned long flags; + if (unlikely(!skb)) + return; + if (likely(atomic_read(&skb->users) == 1)) { smp_rmb(); atomic_set(&skb->users, 0); @@ -2972,6 +2978,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device __skb_linearize(skb)) goto out_kfree_skb; + if (validate_xmit_xfrm(skb, features)) + goto out_kfree_skb; + /* If packet is not checksummed and device does not * support checksumming for this protocol, complete * checksumming here. @@ -3441,6 +3450,7 @@ EXPORT_SYMBOL(netdev_max_backlog); int netdev_tstamp_prequeue __read_mostly = 1; int netdev_budget __read_mostly = 300; +unsigned int __read_mostly netdev_budget_usecs = 2000; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ @@ -4227,7 +4237,7 @@ static int __netif_receive_skb(struct sk_buff *skb) int ret; if (sk_memalloc_socks() && skb_pfmemalloc(skb)) { - unsigned long pflags = current->flags; + unsigned int noreclaim_flag; /* * PFMEMALLOC skbs are special, they should @@ -4238,15 +4248,134 @@ static int __netif_receive_skb(struct sk_buff *skb) * Use PF_MEMALLOC as this saves us from propagating the allocation * context down to all allocation sites. */ - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); ret = __netif_receive_skb_core(skb, true); - tsk_restore_flags(current, pflags, PF_MEMALLOC); + memalloc_noreclaim_restore(noreclaim_flag); } else ret = __netif_receive_skb_core(skb, false); return ret; } +static struct static_key generic_xdp_needed __read_mostly; + +static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) +{ + struct bpf_prog *new = xdp->prog; + int ret = 0; + + switch (xdp->command) { + case XDP_SETUP_PROG: { + struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); + + rcu_assign_pointer(dev->xdp_prog, new); + if (old) + bpf_prog_put(old); + + if (old && !new) { + static_key_slow_dec(&generic_xdp_needed); + } else if (new && !old) { + static_key_slow_inc(&generic_xdp_needed); + dev_disable_lro(dev); + } + break; + } + + case XDP_QUERY_PROG: + xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct xdp_buff xdp; + u32 act = XDP_DROP; + void *orig_data; + int hlen, off; + u32 mac_len; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_cloned(skb)) + return XDP_PASS; + + if (skb_linearize(skb)) + goto do_drop; + + /* The XDP program wants to see the packet starting at the MAC + * header. + */ + mac_len = skb->data - skb_mac_header(skb); + hlen = skb_headlen(skb) + mac_len; + xdp.data = skb->data - mac_len; + xdp.data_end = xdp.data + hlen; + xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + off = xdp.data - orig_data; + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + switch (act) { + case XDP_TX: + __skb_push(skb, mac_len); + /* fall through */ + case XDP_PASS: + break; + + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(skb->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + do_drop: + kfree_skb(skb); + break; + } + + return act; +} + +/* When doing generic XDP we have to bypass the qdisc layer and the + * network taps in order to match in-driver-XDP behavior. + */ +static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + txq = netdev_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + if (free_skb) { + trace_xdp_exception(dev, xdp_prog, XDP_TX); + kfree_skb(skb); + } +} + static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; @@ -4258,6 +4387,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); + if (static_key_false(&generic_xdp_needed)) { + struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + + if (xdp_prog) { + u32 act = netif_receive_generic_xdp(skb, xdp_prog); + + if (act != XDP_PASS) { + rcu_read_unlock(); + if (act == XDP_TX) + generic_xdp_tx(skb, xdp_prog); + return NET_RX_DROP; + } + } + } + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4490,7 +4634,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff enum gro_result ret; int grow; - if (!(skb->dev->features & NETIF_F_GRO)) + if (netif_elide_gro(skb->dev)) goto normal; if (skb->csum_bad) @@ -4805,6 +4949,19 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb) } EXPORT_SYMBOL(__skb_gro_checksum_complete); +static void net_rps_send_ipi(struct softnet_data *remsd) +{ +#ifdef CONFIG_RPS + while (remsd) { + struct softnet_data *next = remsd->rps_ipi_next; + + if (cpu_online(remsd->cpu)) + smp_call_function_single_async(remsd->cpu, &remsd->csd); + remsd = next; + } +#endif +} + /* * net_rps_action_and_irq_enable sends any pending IPI's for rps. * Note: called with local irq disabled, but exits with local irq enabled. @@ -4820,14 +4977,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) local_irq_enable(); /* Send pending IPI's to kick RPS processing on remote cpus. */ - while (remsd) { - struct softnet_data *next = remsd->rps_ipi_next; - - if (cpu_online(remsd->cpu)) - smp_call_function_single_async(remsd->cpu, - &remsd->csd); - remsd = next; - } + net_rps_send_ipi(remsd); } else #endif local_irq_enable(); @@ -5060,27 +5210,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) do_softirq(); } -bool sk_busy_loop(struct sock *sk, int nonblock) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); void *have_poll_lock = NULL; struct napi_struct *napi; - int rc; restart: - rc = false; napi_poll = NULL; rcu_read_lock(); - napi = napi_by_id(sk->sk_napi_id); + napi = napi_by_id(napi_id); if (!napi) goto out; preempt_disable(); for (;;) { - rc = 0; + int work = 0; + local_bh_disable(); if (!napi_poll) { unsigned long val = READ_ONCE(napi->state); @@ -5098,16 +5249,15 @@ bool sk_busy_loop(struct sock *sk, int nonblock) have_poll_lock = netpoll_poll_lock(napi); napi_poll = napi->poll; } - rc = napi_poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); + work = napi_poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi, work, BUSY_POLL_BUDGET); count: - if (rc > 0) - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + if (work > 0) + __NET_ADD_STATS(dev_net(napi->dev), + LINUX_MIB_BUSYPOLLRXPACKETS, work); local_bh_enable(); - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || - busy_loop_timeout(end_time)) + if (!loop_end || loop_end(loop_end_arg, start_time)) break; if (unlikely(need_resched())) { @@ -5116,9 +5266,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock) preempt_enable(); rcu_read_unlock(); cond_resched(); - rc = !skb_queue_empty(&sk->sk_receive_queue); - if (rc || busy_loop_timeout(end_time)) - return rc; + if (loop_end(loop_end_arg, start_time)) + return; goto restart; } cpu_relax(); @@ -5126,12 +5275,10 @@ bool sk_busy_loop(struct sock *sk, int nonblock) if (napi_poll) busy_poll_stop(napi, have_poll_lock); preempt_enable(); - rc = !skb_queue_empty(&sk->sk_receive_queue); out: rcu_read_unlock(); - return rc; } -EXPORT_SYMBOL(sk_busy_loop); +EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ @@ -5143,10 +5290,10 @@ static void napi_hash_add(struct napi_struct *napi) spin_lock(&napi_hash_lock); - /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + /* 0..NR_CPUS range is reserved for sender_cpu use */ do { - if (unlikely(++napi_gen_id < NR_CPUS + 1)) - napi_gen_id = NR_CPUS + 1; + if (unlikely(++napi_gen_id < MIN_NAPI_ID)) + napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); napi->napi_id = napi_gen_id; @@ -5310,7 +5457,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + + usecs_to_jiffies(netdev_budget_usecs); int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); @@ -6711,47 +6859,72 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_QUERY_PROG; + + /* Query must always succeed. */ + WARN_ON(xdp_op(dev, &xdp) < 0); + return xdp.prog_attached; +} + +static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, + struct netlink_ext_ack *extack, + struct bpf_prog *prog) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_SETUP_PROG; + xdp.extack = extack; + xdp.prog = prog; + + return xdp_op(dev, &xdp); +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device + * @extack: netlink extended ack * @fd: new program fd or negative value to clear * @flags: xdp-related flags * * Set or clear a bpf program for a device */ -int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags) { const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - struct netdev_xdp xdp; + xdp_op_t xdp_op, xdp_chk; int err; ASSERT_RTNL(); - if (!ops->ndo_xdp) + xdp_op = xdp_chk = ops->ndo_xdp; + if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) return -EOPNOTSUPP; + if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) + xdp_op = generic_xdp_install; + if (xdp_op == xdp_chk) + xdp_chk = generic_xdp_install; + if (fd >= 0) { - if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - - err = ops->ndo_xdp(dev, &xdp); - if (err < 0) - return err; - if (xdp.prog_attached) - return -EBUSY; - } + if (xdp_chk && __dev_xdp_attached(dev, xdp_chk)) + return -EEXIST; + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && + __dev_xdp_attached(dev, xdp_op)) + return -EBUSY; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_SETUP_PROG; - xdp.prog = prog; - - err = ops->ndo_xdp(dev, &xdp); + err = dev_xdp_install(dev, xdp_op, extack, prog); if (err < 0 && prog) bpf_prog_put(prog); @@ -7102,13 +7275,10 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, else netif_dormant_off(dev); - if (netif_carrier_ok(rootdev)) { - if (!netif_carrier_ok(dev)) - netif_carrier_on(dev); - } else { - if (netif_carrier_ok(dev)) - netif_carrier_off(dev); - } + if (netif_carrier_ok(rootdev)) + netif_carrier_on(dev); + else + netif_carrier_off(dev); } EXPORT_SYMBOL(netif_stacked_transfer_operstate); @@ -7121,12 +7291,10 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); - rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!rx) { - rx = vzalloc(sz); - if (!rx) - return -ENOMEM; - } + rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + if (!rx) + return -ENOMEM; + dev->_rx = rx; for (i = 0; i < count; i++) @@ -7163,12 +7331,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) if (count < 1 || count > 0xffff) return -EINVAL; - tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!tx) { - tx = vzalloc(sz); - if (!tx) - return -ENOMEM; - } + tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + if (!tx) + return -ENOMEM; + dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -7342,6 +7508,8 @@ int register_netdevice(struct net_device *dev) err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + if (dev->priv_destructor) + dev->priv_destructor(dev); goto out; } EXPORT_SYMBOL(register_netdevice); @@ -7549,8 +7717,10 @@ void netdev_run_todo(void) WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); - if (dev->destructor) - dev->destructor(dev); + if (dev->priv_destructor) + dev->priv_destructor(dev); + if (dev->needs_free_netdev) + free_netdev(dev); /* Report a network device has been unregistered */ rtnl_lock(); @@ -7702,9 +7872,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); - if (!p) - p = vzalloc(alloc_size); + p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT); if (!p) return NULL; @@ -7791,6 +7959,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs); void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; + struct bpf_prog *prog; might_sleep(); netif_free_tx_queues(dev); @@ -7809,6 +7978,12 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + prog = rcu_dereference_protected(dev->xdp_prog, 1); + if (prog) { + bpf_prog_put(prog); + static_key_slow_dec(&generic_xdp_needed); + } + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); @@ -8028,7 +8203,7 @@ static int dev_cpu_dead(unsigned int oldcpu) struct sk_buff **list_skb; struct sk_buff *skb; unsigned int cpu; - struct softnet_data *sd, *oldsd; + struct softnet_data *sd, *oldsd, *remsd = NULL; local_irq_disable(); cpu = smp_processor_id(); @@ -8069,6 +8244,13 @@ static int dev_cpu_dead(unsigned int oldcpu) raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); +#ifdef CONFIG_RPS + remsd = oldsd->rps_ipi_list; + oldsd->rps_ipi_list = NULL; +#endif + /* send out pending IPI's on offline CPU */ + net_rps_send_ipi(remsd); + /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { netif_rx_ni(skb); diff --git a/net/core/devlink.c b/net/core/devlink.c index e9c1e6acfb6d196d4373dcc36bcf76577952dd32..a0adfc31a3fe854cd52600f6b2924255aee521c2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1397,10 +1397,10 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; + u8 inline_mode, encap_mode; void *hdr; int err = 0; u16 mode; - u8 inline_mode; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) @@ -1429,6 +1429,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; } + if (ops->eswitch_encap_mode_get) { + err = ops->eswitch_encap_mode_get(devlink, &encap_mode); + if (err) + goto nla_put_failure; + err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode); + if (err) + goto nla_put_failure; + } + genlmsg_end(msg, hdr); return 0; @@ -1468,9 +1477,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; - u16 mode; - u8 inline_mode; + u8 inline_mode, encap_mode; int err = 0; + u16 mode; if (!ops) return -EOPNOTSUPP; @@ -1494,7 +1503,699 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, return err; } + if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) { + if (!ops->eswitch_encap_mode_set) + return -EOPNOTSUPP; + encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]); + err = ops->eswitch_encap_mode_set(devlink, encap_mode); + if (err) + return err; + } + + return 0; +} + +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + struct devlink_dpipe_header *header = match->header; + struct devlink_dpipe_field *field = &header->fields[match->field_id]; + struct nlattr *match_attr; + + match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH); + if (!match_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, match_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, match_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_match_put); + +static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *matches_attr; + + matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES); + if (!matches_attr) + return -EMSGSIZE; + + if (table->table_ops->matches_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, matches_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, matches_attr); + return -EMSGSIZE; +} + +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + struct devlink_dpipe_header *header = action->header; + struct devlink_dpipe_field *field = &header->fields[action->field_id]; + struct nlattr *action_attr; + + action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION); + if (!action_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, action_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, action_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_action_put); + +static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *actions_attr; + + actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); + if (!actions_attr) + return -EMSGSIZE; + + if (table->table_ops->actions_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, actions_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, actions_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_table_put(struct sk_buff *skb, + struct devlink_dpipe_table *table) +{ + struct nlattr *table_attr; + + table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); + if (!table_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + table->counters_enabled)) + goto nla_put_failure; + + if (devlink_dpipe_matches_put(table, skb)) + goto nla_put_failure; + + if (devlink_dpipe_actions_put(table, skb)) + goto nla_put_failure; + + nla_nest_end(skb, table_attr); return 0; + +nla_put_failure: + nla_nest_cancel(skb, table_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb, + struct genl_info *info) +{ + int err; + + if (*pskb) { + err = genlmsg_reply(*pskb, info); + if (err) + return err; + } + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!*pskb) + return -ENOMEM; + return 0; +} + +static int devlink_dpipe_tables_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + struct nlattr *tables_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + bool incomplete; + void *hdr; + int i; + int err; + + table = list_first_entry(dpipe_tables, + struct devlink_dpipe_table, list); +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES); + if (!tables_attr) + goto nla_put_failure; + + i = 0; + incomplete = false; + list_for_each_entry_from(table, dpipe_tables, list) { + if (!table_name) { + err = devlink_dpipe_table_put(skb, table); + if (err) { + if (!i) + goto err_table_put; + incomplete = true; + break; + } + } else { + if (!strcmp(table->name, table_name)) { + err = devlink_dpipe_table_put(skb, table); + if (err) + break; + } + } + i++; + } + + nla_nest_end(skb, tables_attr); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name = NULL; + + if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + + return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0, + &devlink->dpipe_table_list, + table_name); +} + +static int devlink_dpipe_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE, + value->value_size, value->value)) + return -EMSGSIZE; + if (value->mask) + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK, + value->value_size, value->mask)) + return -EMSGSIZE; + if (value->mapping_valid) + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING, + value->mapping_value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->action) + return -EINVAL; + if (devlink_dpipe_action_put(skb, value->action)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *action_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + action_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ACTION_VALUE); + if (!action_attr) + return -EMSGSIZE; + err = devlink_dpipe_action_value_put(skb, &values[i]); + if (err) + goto err_action_value_put; + nla_nest_end(skb, action_attr); + } + return 0; + +err_action_value_put: + nla_nest_cancel(skb, action_attr); + return err; +} + +static int devlink_dpipe_match_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->match) + return -EINVAL; + if (devlink_dpipe_match_put(skb, value->match)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_match_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *match_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + match_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_MATCH_VALUE); + if (!match_attr) + return -EMSGSIZE; + err = devlink_dpipe_match_value_put(skb, &values[i]); + if (err) + goto err_match_value_put; + nla_nest_end(skb, match_attr); + } + return 0; + +err_match_value_put: + nla_nest_cancel(skb, match_attr); + return err; +} + +static int devlink_dpipe_entry_put(struct sk_buff *skb, + struct devlink_dpipe_entry *entry) +{ + struct nlattr *entry_attr, *matches_attr, *actions_attr; + int err; + + entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (entry->counter_valid) + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, + entry->counter, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + matches_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); + if (!matches_attr) + goto nla_put_failure; + + err = devlink_dpipe_match_values_put(skb, entry->match_values, + entry->match_values_count); + if (err) { + nla_nest_cancel(skb, matches_attr); + goto err_match_values_put; + } + nla_nest_end(skb, matches_attr); + + actions_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); + if (!actions_attr) + goto nla_put_failure; + + err = devlink_dpipe_action_values_put(skb, entry->action_values, + entry->action_values_count); + if (err) { + nla_nest_cancel(skb, actions_attr); + goto err_action_values_put; + } + nla_nest_end(skb, actions_attr); + + nla_nest_end(skb, entry_attr); + return 0; + +nla_put_failure: + err = -EMSGSIZE; +err_match_values_put: +err_action_values_put: + nla_nest_cancel(skb, entry_attr); + return err; +} + +static struct devlink_dpipe_table * +devlink_dpipe_table_find(struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + list_for_each_entry_rcu(table, dpipe_tables, list) { + if (!strcmp(table->name, table_name)) + return table; + } + return NULL; +} + +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink *devlink; + int err; + + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb, + dump_ctx->info); + if (err) + return err; + + dump_ctx->hdr = genlmsg_put(dump_ctx->skb, + dump_ctx->info->snd_portid, + dump_ctx->info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, + dump_ctx->cmd); + if (!dump_ctx->hdr) + goto nla_put_failure; + + devlink = dump_ctx->info->user_ptr[0]; + if (devlink_nl_put_handle(dump_ctx->skb, devlink)) + goto nla_put_failure; + dump_ctx->nest = nla_nest_start(dump_ctx->skb, + DEVLINK_ATTR_DPIPE_ENTRIES); + if (!dump_ctx->nest) + goto nla_put_failure; + return 0; + +nla_put_failure: + genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr); + nlmsg_free(dump_ctx->skb); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare); + +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return devlink_dpipe_entry_put(dump_ctx->skb, entry); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append); + +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + nla_nest_end(dump_ctx->skb, dump_ctx->nest); + genlmsg_end(dump_ctx->skb, dump_ctx->hdr); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close); + +static int devlink_dpipe_entries_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_table *table) +{ + struct devlink_dpipe_dump_ctx dump_ctx; + struct nlmsghdr *nlh; + int err; + + dump_ctx.skb = NULL; + dump_ctx.cmd = cmd; + dump_ctx.info = info; + + err = table->table_ops->entries_dump(table->priv, + table->counters_enabled, + &dump_ctx); + if (err) + goto err_entries_dump; + +send_done: + nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(dump_ctx.skb, info); + +err_entries_dump: +err_skb_send_alloc: + genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); + nlmsg_free(dump_ctx.skb); + return err; +} + +static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + const char *table_name; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (!table->table_ops->entries_dump) + return -EINVAL; + + return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET, + 0, table); +} + +static int devlink_dpipe_fields_put(struct sk_buff *skb, + const struct devlink_dpipe_header *header) +{ + struct devlink_dpipe_field *field; + struct nlattr *field_attr; + int i; + + for (i = 0; i < header->fields_count; i++) { + field = &header->fields[i]; + field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD); + if (!field_attr) + return -EMSGSIZE; + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type)) + goto nla_put_failure; + nla_nest_end(skb, field_attr); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, field_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_header_put(struct sk_buff *skb, + struct devlink_dpipe_header *header) +{ + struct nlattr *fields_attr, *header_attr; + int err; + + header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER); + if (!header_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS); + if (!fields_attr) + goto nla_put_failure; + + err = devlink_dpipe_fields_put(skb, header); + if (err) { + nla_nest_cancel(skb, fields_attr); + goto nla_put_failure; + } + nla_nest_end(skb, fields_attr); + nla_nest_end(skb, header_attr); + return 0; + +nla_put_failure: + err = -EMSGSIZE; + nla_nest_cancel(skb, header_attr); + return err; +} + +static int devlink_dpipe_headers_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_headers * + dpipe_headers) +{ + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *headers_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + void *hdr; + int i, j; + int err; + + i = 0; +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS); + if (!headers_attr) + goto nla_put_failure; + + j = 0; + for (; i < dpipe_headers->headers_count; i++) { + err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]); + if (err) { + if (!j) + goto err_table_put; + break; + } + j++; + } + nla_nest_end(skb, headers_attr); + genlmsg_end(skb, hdr); + if (i != dpipe_headers->headers_count) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + if (!devlink->dpipe_headers) + return -EOPNOTSUPP; + return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET, + 0, devlink->dpipe_headers); +} + +static int devlink_dpipe_table_counters_set(struct devlink *devlink, + const char *table_name, + bool enable) +{ + struct devlink_dpipe_table *table; + + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (table->counter_control_extern) + return -EOPNOTSUPP; + + if (!(table->counters_enabled ^ enable)) + return 0; + + table->counters_enabled = enable; + if (table->table_ops->counters_set_update) + table->table_ops->counters_set_update(table->priv, enable); + return 0; +} + +static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name; + bool counters_enable; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] || + !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]); + + return devlink_dpipe_table_counters_set(devlink, table_name, + counters_enable); } static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { @@ -1512,6 +2213,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1644,6 +2348,34 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .doit = devlink_nl_cmd_dpipe_table_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .doit = devlink_nl_cmd_dpipe_entries_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .doit = devlink_nl_cmd_dpipe_headers_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .doit = devlink_nl_cmd_dpipe_table_counters_set, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -1680,6 +2412,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); + INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -1880,6 +2613,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) } EXPORT_SYMBOL_GPL(devlink_sb_unregister); +/** + * devlink_dpipe_headers_register - register dpipe headers + * + * @devlink: devlink + * @dpipe_headers: dpipe header array + * + * Register the headers supported by hardware. + */ +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = dpipe_headers; + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); + +/** + * devlink_dpipe_headers_unregister - unregister dpipe headers + * + * @devlink: devlink + * + * Unregister the headers supported by hardware. + */ +void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = NULL; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); + +/** + * devlink_dpipe_table_counter_enabled - check if counter allocation + * required + * @devlink: devlink + * @table_name: tables name + * + * Used by driver to check if counter allocation is required. + * After counter allocation is turned on the table entries + * are updated to include counter statistics. + * + * After that point on the driver must respect the counter + * state so that each entry added to the table is added + * with a counter. + */ +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + bool enabled; + + rcu_read_lock(); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + enabled = false; + if (table) + enabled = table->counters_enabled; + rcu_read_unlock(); + return enabled; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); + +/** + * devlink_dpipe_table_register - register dpipe table + * + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @size: size + * @counter_control_extern: external control for counters + */ +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + struct devlink_dpipe_table *table; + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) + return -EEXIST; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->name = table_name; + table->table_ops = table_ops; + table->priv = priv; + table->size = size; + table->counter_control_extern = counter_control_extern; + + mutex_lock(&devlink_mutex); + list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); + +/** + * devlink_dpipe_table_unregister - unregister dpipe table + * + * @devlink: devlink + * @table_name: table name + */ +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + mutex_lock(&devlink_mutex); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + goto unlock; + list_del_rcu(&table->list); + mutex_unlock(&devlink_mutex); + kfree_rcu(table, rcu); + return; +unlock: + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index fb55327dcfeabdaf3eeecc3a8d176ae215612649..70ccda233bd1f1aab18535e6d9d0419bb9a1a23b 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -412,9 +412,8 @@ static int __init init_net_drop_monitor(void) for_each_possible_cpu(cpu) { data = &per_cpu(dm_cpu_data, cpu); INIT_WORK(&data->dm_alert_work, send_dm_alert); - init_timer(&data->send_timer); - data->send_timer.data = (unsigned long)data; - data->send_timer.function = sched_send_work; + setup_timer(&data->send_timer, sched_send_work, + (unsigned long)data); spin_lock_init(&data->lock); reset_per_cpu_data(data); } diff --git a/net/core/dst.c b/net/core/dst.c index 960e503b5a529a2c4f1866f49c150493ee98d7da..13ba4a090c410e40853178df4ffca85f02b71b03 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard_out); -const u32 dst_default_metrics[RTAX_MAX + 1] = { +const struct dst_metrics dst_default_metrics = { /* This initializer is needed to force linker to place this variable * into const section. Otherwise it might end into bss section. * We really want to avoid false sharing on this variable, and catch * any writes on it. */ - [RTAX_MAX] = 0xdeadbeef, + .refcnt = ATOMIC_INIT(1), }; void dst_init(struct dst_entry *dst, struct dst_ops *ops, @@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, if (dev) dev_hold(dev); dst->ops = ops; - dst_init_metrics(dst, dst_default_metrics, true); + dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; dst->path = dst; dst->from = NULL; @@ -314,25 +314,30 @@ EXPORT_SYMBOL(dst_release); u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) { - u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p) { - u32 *old_p = __DST_METRICS_PTR(old); + struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + atomic_set(&p->refcnt, 1); + memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { kfree(p); - p = __DST_METRICS_PTR(prev); + p = (struct dst_metrics *)__DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; + } else if (prev & DST_METRICS_REFCOUNTED) { + if (atomic_dec_and_test(&old_p->refcnt)) + kfree(old_p); } } - return p; + BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0); + return (u32 *)p; } EXPORT_SYMBOL(dst_cow_metrics_generic); @@ -341,7 +346,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) { unsigned long prev, new; - new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY; + new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY; prev = cmpxchg(&dst->_metrics, old, new); if (prev == old) kfree(__DST_METRICS_PTR(old)); @@ -464,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, spin_lock_bh(&dst_garbage.lock); dst = dst_garbage.list; dst_garbage.list = NULL; + /* The code in dst_ifdown places a hold on the loopback device. + * If the gc entry processing is set to expire after a lengthy + * interval, this hold can cause netdev_wait_allrefs() to hang + * out and wait for a long time -- until the the loopback + * interface is released. If we're really unlucky, it'll emit + * pr_emerg messages to console too. Reset the interval here, + * so dst cleanups occur in a more timely fashion. + */ + if (dst_garbage.timer_inc > DST_GC_INC) { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + mod_delayed_work(system_wq, &dst_gc_work, + dst_garbage.timer_expires); + } spin_unlock_bh(&dst_garbage.lock); if (last) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index aecb2c7241b697e79628fdb79467f5087b2bbf9f..03111a2d6653b8f06427540eb5864b255af4ead3 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -90,6 +90,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", + [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", @@ -103,12 +104,15 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_HW_TC_BIT] = "hw-tc-offload", + [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", + [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", }; static const char rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { [ETH_RSS_HASH_TOP_BIT] = "toeplitz", [ETH_RSS_HASH_XOR_BIT] = "xor", + [ETH_RSS_HASH_CRC32_BIT] = "crc32", }; static const char diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index b6791d94841d56cf8b1027d3ba2d71dd21302caf..f21c4d3aeae0cf59a8d9239cf1e581e6d136c740 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -23,6 +23,20 @@ static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(~0), }; +bool fib_rule_matchall(const struct fib_rule *rule) +{ + if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || + rule->flags) + return false; + if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) + return false; + if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || + !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib_rule_matchall); + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -354,7 +368,8 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, return 0; } -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -372,7 +387,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); if (err < 0) goto errout; @@ -425,6 +440,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[FRA_TUN_ID]) rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); + err = -EINVAL; if (tb[FRA_L3MDEV]) { #ifdef CONFIG_NET_L3_MASTER_DEV rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]); @@ -446,7 +462,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) else rule->suppress_ifgroup = -1; - err = -EINVAL; if (tb[FRA_GOTO]) { if (rule->action != FR_ACT_GOTO) goto errout_free; @@ -547,7 +562,8 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) } EXPORT_SYMBOL_GPL(fib_nl_newrule); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -566,7 +582,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); if (err < 0) goto errout; @@ -576,8 +592,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[FRA_UID_RANGE]) { range = nla_get_kuid_range(tb); - if (!uid_range_set(&range)) + if (!uid_range_set(&range)) { + err = -EINVAL; goto errout; + } } else { range = fib_kuid_range_unset; } diff --git a/net/core/filter.c b/net/core/filter.c index ebaeaf2e46e8bd0171379604930d232f205afd07..a6bb95fa87b26a6627ba38cd9a94269c7e59f73d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -91,7 +93,12 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { - unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb); + struct sock *save_sk = skb->sk; + unsigned int pkt_len; + + skb->sk = sk; + pkt_len = bpf_prog_run_save_cb(filter->prog, skb); + skb->sk = save_sk; err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; } rcu_read_unlock(); @@ -348,7 +355,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, * @new_prog: buffer where converted program will be stored * @new_len: pointer to store length of converted program * - * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style. + * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn' + * style extended BPF (eBPF). * Conversion workflow: * * 1) First pass for calculating the new program length: @@ -928,7 +936,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu) */ static void sk_filter_release(struct sk_filter *fp) { - if (atomic_dec_and_test(&fp->refcnt)) + if (refcount_dec_and_test(&fp->refcnt)) call_rcu(&fp->rcu, sk_filter_release_rcu); } @@ -943,20 +951,27 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) /* try to charge the socket memory if there is space available * return true on success */ -bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) +static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) { u32 filter_size = bpf_prog_size(fp->prog->len); /* same check as in sock_kmalloc() */ if (filter_size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { - atomic_inc(&fp->refcnt); atomic_add(filter_size, &sk->sk_omem_alloc); return true; } return false; } +bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) +{ + bool ret = __sk_filter_charge(sk, fp); + if (ret) + refcount_inc(&fp->refcnt); + return ret; +} + static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) { struct sock_filter *old_prog; @@ -1179,12 +1194,12 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return -ENOMEM; fp->prog = prog; - atomic_set(&fp->refcnt, 0); - if (!sk_filter_charge(sk, fp)) { + if (!__sk_filter_charge(sk, fp)) { kfree(fp); return -ENOMEM; } + refcount_set(&fp->refcnt, 1); old_fp = rcu_dereference_protected(sk->sk_filter, lockdep_sock_is_held(sk)); @@ -2266,6 +2281,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_skb_change_head || func == bpf_skb_change_tail || func == bpf_skb_pull_data || + func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head) @@ -2599,6 +2615,36 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) +{ + return skb->sk ? sock_gen_cookie(skb->sk) : 0; +} + +static const struct bpf_func_proto bpf_get_socket_cookie_proto = { + .func = bpf_get_socket_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) +{ + struct sock *sk = sk_to_full_sk(skb->sk); + kuid_t kuid; + + if (!sk || !sk_fullsock(sk)) + return overflowuid; + kuid = sock_net_uid(sock_net(sk), sk); + return from_kuid_munged(sock_net(sk)->user_ns, kuid); +} + +static const struct bpf_func_proto bpf_get_socket_uid_proto = { + .func = bpf_get_socket_uid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2633,6 +2679,10 @@ sk_filter_func_proto(enum bpf_func_id func_id) switch (func_id) { case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; default: return bpf_base_func_proto(func_id); } @@ -2692,6 +2742,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; default: return bpf_base_func_proto(func_id); } @@ -2715,12 +2769,7 @@ xdp_func_proto(enum bpf_func_id func_id) static const struct bpf_func_proto * cg_skb_func_proto(enum bpf_func_id func_id) { - switch (func_id) { - case BPF_FUNC_skb_load_bytes: - return &bpf_skb_load_bytes_proto; - default: - return bpf_base_func_proto(func_id); - } + return sk_filter_func_proto(func_id); } static const struct bpf_func_proto * @@ -3154,6 +3203,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct __sk_buff, napi_id): +#if defined(CONFIG_NET_RX_BUSY_POLL) + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4); + + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sk_buff, napi_id)); + *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); +#else + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; } @@ -3252,111 +3314,55 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -static const struct bpf_verifier_ops sk_filter_ops = { +const struct bpf_verifier_ops sk_filter_prog_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -static const struct bpf_verifier_ops tc_cls_act_ops = { +const struct bpf_verifier_ops tc_cls_act_prog_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops xdp_ops = { +const struct bpf_verifier_ops xdp_prog_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .test_run = bpf_prog_test_run_xdp, }; -static const struct bpf_verifier_ops cg_skb_ops = { +const struct bpf_verifier_ops cg_skb_prog_ops = { .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops lwt_inout_ops = { +const struct bpf_verifier_ops lwt_inout_prog_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops lwt_xmit_ops = { +const struct bpf_verifier_ops lwt_xmit_prog_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; -static const struct bpf_verifier_ops cg_sock_ops = { +const struct bpf_verifier_ops cg_sock_prog_ops = { .get_func_proto = bpf_base_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -static struct bpf_prog_type_list sk_filter_type __ro_after_init = { - .ops = &sk_filter_ops, - .type = BPF_PROG_TYPE_SOCKET_FILTER, -}; - -static struct bpf_prog_type_list sched_cls_type __ro_after_init = { - .ops = &tc_cls_act_ops, - .type = BPF_PROG_TYPE_SCHED_CLS, -}; - -static struct bpf_prog_type_list sched_act_type __ro_after_init = { - .ops = &tc_cls_act_ops, - .type = BPF_PROG_TYPE_SCHED_ACT, -}; - -static struct bpf_prog_type_list xdp_type __ro_after_init = { - .ops = &xdp_ops, - .type = BPF_PROG_TYPE_XDP, -}; - -static struct bpf_prog_type_list cg_skb_type __ro_after_init = { - .ops = &cg_skb_ops, - .type = BPF_PROG_TYPE_CGROUP_SKB, -}; - -static struct bpf_prog_type_list lwt_in_type __ro_after_init = { - .ops = &lwt_inout_ops, - .type = BPF_PROG_TYPE_LWT_IN, -}; - -static struct bpf_prog_type_list lwt_out_type __ro_after_init = { - .ops = &lwt_inout_ops, - .type = BPF_PROG_TYPE_LWT_OUT, -}; - -static struct bpf_prog_type_list lwt_xmit_type __ro_after_init = { - .ops = &lwt_xmit_ops, - .type = BPF_PROG_TYPE_LWT_XMIT, -}; - -static struct bpf_prog_type_list cg_sock_type __ro_after_init = { - .ops = &cg_sock_ops, - .type = BPF_PROG_TYPE_CGROUP_SOCK -}; - -static int __init register_sk_filter_ops(void) -{ - bpf_register_prog_type(&sk_filter_type); - bpf_register_prog_type(&sched_cls_type); - bpf_register_prog_type(&sched_act_type); - bpf_register_prog_type(&xdp_type); - bpf_register_prog_type(&cg_skb_type); - bpf_register_prog_type(&cg_sock_type); - bpf_register_prog_type(&lwt_in_type); - bpf_register_prog_type(&lwt_out_type); - bpf_register_prog_type(&lwt_xmit_type); - - return 0; -} -late_initcall(register_sk_filter_ops); - int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow.c b/net/core/flow.c index f765c11d8df567d704998185482c3d220280c148..f7f5d1932a2720767dd31f4033f196815ff08447 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -47,7 +47,7 @@ struct flow_flush_info { static struct kmem_cache *flow_cachep __read_mostly; -#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) +#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) static void flow_cache_new_hashrnd(unsigned long arg) @@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work) } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - int deleted, struct list_head *gc_list, + unsigned int deleted, + struct list_head *gc_list, struct netns_xfrm *xfrm) { if (deleted) { @@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, static void __flow_cache_shrink(struct flow_cache *fc, struct flow_cache_percpu *fcp, - int shrink_to) + unsigned int shrink_to) { struct flow_cache_entry *fle; struct hlist_node *tmp; LIST_HEAD(gc_list); - int i, deleted = 0; + unsigned int deleted = 0; struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, flow_cache_global); + unsigned int i; for (i = 0; i < flow_cache_hash_size(fc); i++) { - int saved = 0; + unsigned int saved = 0; hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { @@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, static void flow_cache_shrink(struct flow_cache *fc, struct flow_cache_percpu *fcp) { - int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); + unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); __flow_cache_shrink(fc, fcp, shrink_to); } @@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, const struct flowi *key, - size_t keysize) + unsigned int keysize) { const u32 *k = (const u32 *) key; const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); @@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc, * important assumptions that we can here, such as alignment. */ static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - size_t keysize) + unsigned int keysize) { const flow_compare_t *k1, *k1_lim, *k2; @@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; struct flow_cache_object *flo; - size_t keysize; + unsigned int keysize; unsigned int hash; local_bh_disable(); @@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache_entry *fle; struct hlist_node *tmp; LIST_HEAD(gc_list); - int i, deleted = 0; + unsigned int deleted = 0; struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, flow_cache_global); + unsigned int i; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { @@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data) static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp; - int i; + unsigned int i; fcp = per_cpu_ptr(fc->percpu, cpu); for (i = 0; i < flow_cache_hash_size(fc); i++) @@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net) static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); + unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); if (!fcp->hash_table) { fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %zu\n", sz); + pr_err("NET: failed to allocate flow cache sz %u\n", sz); return -ENOMEM; } fcp->hash_rnd_recalc = 1; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d98d4998213da6103665d62d5a85613631236f19..28d94bce4df8eb046dd6a03103c9249b29e74cd8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -113,6 +113,235 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); +enum flow_dissect_ret { + FLOW_DISSECT_RET_OUT_GOOD, + FLOW_DISSECT_RET_OUT_BAD, + FLOW_DISSECT_RET_OUT_PROTO_AGAIN, +}; + +static enum flow_dissect_ret +__skb_flow_dissect_mpls(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int nhoff, int hlen) +{ + struct flow_dissector_key_keyid *key_keyid; + struct mpls_label *hdr, _hdr[2]; + u32 entry, label; + + if (!dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && + !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) + return FLOW_DISSECT_RET_OUT_GOOD; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, + hlen, &_hdr); + if (!hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + entry = ntohl(hdr[0].entry); + label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_dissector_key_mpls *key_mpls; + + key_mpls = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS, + target_container); + key_mpls->mpls_label = label; + key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK) + >> MPLS_LS_TTL_SHIFT; + key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK) + >> MPLS_LS_TC_SHIFT; + key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK) + >> MPLS_LS_S_SHIFT; + } + + if (label == MPLS_LABEL_ENTROPY) { + key_keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY, + target_container); + key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK); + } + return FLOW_DISSECT_RET_OUT_GOOD; +} + +static enum flow_dissect_ret +__skb_flow_dissect_arp(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, void *data, int nhoff, int hlen) +{ + struct flow_dissector_key_arp *key_arp; + struct { + unsigned char ar_sha[ETH_ALEN]; + unsigned char ar_sip[4]; + unsigned char ar_tha[ETH_ALEN]; + unsigned char ar_tip[4]; + } *arp_eth, _arp_eth; + const struct arphdr *arp; + struct arphdr _arp; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) + return FLOW_DISSECT_RET_OUT_GOOD; + + arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, + hlen, &_arp); + if (!arp) + return FLOW_DISSECT_RET_OUT_BAD; + + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_hln != ETH_ALEN || + arp->ar_pln != 4 || + (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST))) + return FLOW_DISSECT_RET_OUT_BAD; + + arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), + sizeof(_arp_eth), data, + hlen, &_arp_eth); + if (!arp_eth) + return FLOW_DISSECT_RET_OUT_BAD; + + key_arp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ARP, + target_container); + + memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip)); + memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip)); + + /* Only store the lower byte of the opcode; + * this covers ARPOP_REPLY and ARPOP_REQUEST. + */ + key_arp->op = ntohs(arp->ar_op) & 0xff; + + ether_addr_copy(key_arp->sha, arp_eth->ar_sha); + ether_addr_copy(key_arp->tha, arp_eth->ar_tha); + + return FLOW_DISSECT_RET_OUT_GOOD; +} + +static enum flow_dissect_ret +__skb_flow_dissect_gre(const struct sk_buff *skb, + struct flow_dissector_key_control *key_control, + struct flow_dissector *flow_dissector, + void *target_container, void *data, + __be16 *p_proto, int *p_nhoff, int *p_hlen, + unsigned int flags) +{ + struct flow_dissector_key_keyid *key_keyid; + struct gre_base_hdr *hdr, _hdr; + int offset = 0; + u16 gre_ver; + + hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), + data, *p_hlen, &_hdr); + if (!hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + /* Only look inside GRE without routing */ + if (hdr->flags & GRE_ROUTING) + return FLOW_DISSECT_RET_OUT_GOOD; + + /* Only look inside GRE for version 0 and 1 */ + gre_ver = ntohs(hdr->flags & GRE_VERSION); + if (gre_ver > 1) + return FLOW_DISSECT_RET_OUT_GOOD; + + *p_proto = hdr->protocol; + if (gre_ver) { + /* Version1 must be PPTP, and check the flags */ + if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) + return FLOW_DISSECT_RET_OUT_GOOD; + } + + offset += sizeof(struct gre_base_hdr); + + if (hdr->flags & GRE_CSUM) + offset += sizeof(((struct gre_full_hdr *) 0)->csum) + + sizeof(((struct gre_full_hdr *) 0)->reserved1); + + if (hdr->flags & GRE_KEY) { + const __be32 *keyid; + __be32 _keyid; + + keyid = __skb_header_pointer(skb, *p_nhoff + offset, + sizeof(_keyid), + data, *p_hlen, &_keyid); + if (!keyid) + return FLOW_DISSECT_RET_OUT_BAD; + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_GRE_KEYID)) { + key_keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_GRE_KEYID, + target_container); + if (gre_ver == 0) + key_keyid->keyid = *keyid; + else + key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; + } + offset += sizeof(((struct gre_full_hdr *) 0)->key); + } + + if (hdr->flags & GRE_SEQ) + offset += sizeof(((struct pptp_gre_header *) 0)->seq); + + if (gre_ver == 0) { + if (*p_proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, *p_nhoff + offset, + sizeof(_eth), + data, *p_hlen, &_eth); + if (!eth) + return FLOW_DISSECT_RET_OUT_BAD; + *p_proto = eth->h_proto; + offset += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + *p_hlen = *p_nhoff + offset; + } + } else { /* version 1, must be PPTP */ + u8 _ppp_hdr[PPP_HDRLEN]; + u8 *ppp_hdr; + + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *) 0)->ack); + + ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset, + sizeof(_ppp_hdr), + data, *p_hlen, _ppp_hdr); + if (!ppp_hdr) + return FLOW_DISSECT_RET_OUT_BAD; + + switch (PPP_PROTOCOL(ppp_hdr)) { + case PPP_IP: + *p_proto = htons(ETH_P_IP); + break; + case PPP_IPV6: + *p_proto = htons(ETH_P_IPV6); + break; + default: + /* Could probably catch some more like MPLS */ + break; + } + + offset += PPP_HDRLEN; + } + + *p_nhoff += offset; + key_control->flags |= FLOW_DIS_ENCAPSULATION; + if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) + return FLOW_DISSECT_RET_OUT_GOOD; + + return FLOW_DISSECT_RET_OUT_PROTO_AGAIN; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -138,12 +367,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_arp *key_arp; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; bool ret; @@ -181,7 +408,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs)); } -again: +proto_again: switch (proto) { case htons(ETH_P_IP): { const struct iphdr *iph; @@ -284,7 +511,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); if (skip_vlan) - goto again; + goto proto_again; } skip_vlan = true; @@ -307,7 +534,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, } } - goto again; + goto proto_again; } case htons(ETH_P_PPP_SES): { struct { @@ -349,31 +576,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb, } case htons(ETH_P_MPLS_UC): - case htons(ETH_P_MPLS_MC): { - struct mpls_label *hdr, _hdr[2]; + case htons(ETH_P_MPLS_MC): mpls: - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, - hlen, &_hdr); - if (!hdr) - goto out_bad; - - if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >> - MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) { - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { - key_keyid = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_MPLS_ENTROPY, - target_container); - key_keyid->keyid = hdr[1].entry & - htonl(MPLS_LS_LABEL_MASK); - } - + switch (__skb_flow_dissect_mpls(skb, flow_dissector, + target_container, data, + nhoff, hlen)) { + case FLOW_DISSECT_RET_OUT_GOOD: goto out_good; + case FLOW_DISSECT_RET_OUT_BAD: + default: + goto out_bad; } - - goto out_good; - } - case htons(ETH_P_FCOE): if ((hlen - nhoff) < FCOE_HEADER_LEN) goto out_bad; @@ -382,177 +595,33 @@ bool __skb_flow_dissect(const struct sk_buff *skb, goto out_good; case htons(ETH_P_ARP): - case htons(ETH_P_RARP): { - struct { - unsigned char ar_sha[ETH_ALEN]; - unsigned char ar_sip[4]; - unsigned char ar_tha[ETH_ALEN]; - unsigned char ar_tip[4]; - } *arp_eth, _arp_eth; - const struct arphdr *arp; - struct arphdr _arp; - - arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, - hlen, &_arp); - if (!arp) - goto out_bad; - - if (arp->ar_hrd != htons(ARPHRD_ETHER) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_hln != ETH_ALEN || - arp->ar_pln != 4 || - (arp->ar_op != htons(ARPOP_REPLY) && - arp->ar_op != htons(ARPOP_REQUEST))) - goto out_bad; - - arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp), - sizeof(_arp_eth), data, - hlen, - &_arp_eth); - if (!arp_eth) + case htons(ETH_P_RARP): + switch (__skb_flow_dissect_arp(skb, flow_dissector, + target_container, data, + nhoff, hlen)) { + case FLOW_DISSECT_RET_OUT_GOOD: + goto out_good; + case FLOW_DISSECT_RET_OUT_BAD: + default: goto out_bad; - - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ARP)) { - - key_arp = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_ARP, - target_container); - - memcpy(&key_arp->sip, arp_eth->ar_sip, - sizeof(key_arp->sip)); - memcpy(&key_arp->tip, arp_eth->ar_tip, - sizeof(key_arp->tip)); - - /* Only store the lower byte of the opcode; - * this covers ARPOP_REPLY and ARPOP_REQUEST. - */ - key_arp->op = ntohs(arp->ar_op) & 0xff; - - ether_addr_copy(key_arp->sha, arp_eth->ar_sha); - ether_addr_copy(key_arp->tha, arp_eth->ar_tha); } - - goto out_good; - } - default: goto out_bad; } ip_proto_again: switch (ip_proto) { - case IPPROTO_GRE: { - struct gre_base_hdr *hdr, _hdr; - u16 gre_ver; - int offset = 0; - - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); - if (!hdr) + case IPPROTO_GRE: + switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector, + target_container, data, + &proto, &nhoff, &hlen, flags)) { + case FLOW_DISSECT_RET_OUT_GOOD: + goto out_good; + case FLOW_DISSECT_RET_OUT_BAD: goto out_bad; - - /* Only look inside GRE without routing */ - if (hdr->flags & GRE_ROUTING) - break; - - /* Only look inside GRE for version 0 and 1 */ - gre_ver = ntohs(hdr->flags & GRE_VERSION); - if (gre_ver > 1) - break; - - proto = hdr->protocol; - if (gre_ver) { - /* Version1 must be PPTP, and check the flags */ - if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) - break; - } - - offset += sizeof(struct gre_base_hdr); - - if (hdr->flags & GRE_CSUM) - offset += sizeof(((struct gre_full_hdr *)0)->csum) + - sizeof(((struct gre_full_hdr *)0)->reserved1); - - if (hdr->flags & GRE_KEY) { - const __be32 *keyid; - __be32 _keyid; - - keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), - data, hlen, &_keyid); - if (!keyid) - goto out_bad; - - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_GRE_KEYID)) { - key_keyid = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_GRE_KEYID, - target_container); - if (gre_ver == 0) - key_keyid->keyid = *keyid; - else - key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; - } - offset += sizeof(((struct gre_full_hdr *)0)->key); - } - - if (hdr->flags & GRE_SEQ) - offset += sizeof(((struct pptp_gre_header *)0)->seq); - - if (gre_ver == 0) { - if (proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; - - eth = __skb_header_pointer(skb, nhoff + offset, - sizeof(_eth), - data, hlen, &_eth); - if (!eth) - goto out_bad; - proto = eth->h_proto; - offset += sizeof(*eth); - - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - hlen = (nhoff + offset); - } - } else { /* version 1, must be PPTP */ - u8 _ppp_hdr[PPP_HDRLEN]; - u8 *ppp_hdr; - - if (hdr->flags & GRE_ACK) - offset += sizeof(((struct pptp_gre_header *)0)->ack); - - ppp_hdr = __skb_header_pointer(skb, nhoff + offset, - sizeof(_ppp_hdr), - data, hlen, _ppp_hdr); - if (!ppp_hdr) - goto out_bad; - - switch (PPP_PROTOCOL(ppp_hdr)) { - case PPP_IP: - proto = htons(ETH_P_IP); - break; - case PPP_IPV6: - proto = htons(ETH_P_IPV6); - break; - default: - /* Could probably catch some more like MPLS */ - break; - } - - offset += PPP_HDRLEN; + case FLOW_DISSECT_RET_OUT_PROTO_AGAIN: + goto proto_again; } - - nhoff += offset; - key_control->flags |= FLOW_DIS_ENCAPSULATION; - if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) - goto out_good; - - goto again; - } case NEXTHDR_HOP: case NEXTHDR_ROUTING: case NEXTHDR_DEST: { diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index c98bbfbd26b8645bb2a273bdee61f285606e215d..814e58a3ce8b6a16a3fa60917b359f0fd3bf969d 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) struct net_device *dev = skb->dev; struct gro_cell *cell; - if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) return netif_rx(skb); cell = this_cpu_ptr(gcells->cells); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 0cfe7b0216c3522a8d05d404423de939ad2cc2d5..b3bc0a31af9f3bd2d21fd12b9b2885d9b1573a4d 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -209,7 +209,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, int ret; u32 fd; - ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy); + ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy, + NULL); if (ret < 0) return ret; @@ -249,7 +250,7 @@ static int bpf_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EAFNOSUPPORT; - ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy); + ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, NULL); if (ret < 0) return ret; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 6df9f8fabf0ca5d2ced3070406900b7ec28a7924..cfae3d5fe11f10035394aea48fb5332244058a03 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -162,7 +162,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; struct nlattr *attrs; - struct nlattr *nla; u16 encap_type; int attrlen; @@ -170,7 +169,6 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining) attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { attrs = rtnh_attrs(rtnh); - nla = nla_find(attrs, attrlen, RTA_ENCAP); nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { @@ -205,7 +203,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { const struct lwtunnel_encap_ops *ops; struct nlattr *nest; - int ret = -EINVAL; + int ret; if (!lwtstate) return 0; @@ -214,8 +212,11 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; - ret = -EOPNOTSUPP; nest = nla_nest_start(skb, RTA_ENCAP); + if (!nest) + return -EMSGSIZE; + + ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->fill_encap)) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4526cbd7e28a1fcdecfc06a41985fd4d19634457..d274f81fcc2c08f1e85df4ed00b9a034f3ae0739 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -52,8 +52,9 @@ do { \ #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(unsigned long arg); -static void __neigh_notify(struct neighbour *n, int type, int flags); -static void neigh_update_notify(struct neighbour *neigh); +static void __neigh_notify(struct neighbour *n, int type, int flags, + u32 pid); +static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); #ifdef CONFIG_PROC_FS @@ -99,7 +100,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) if (neigh->parms->neigh_cleanup) neigh->parms->neigh_cleanup(neigh); - __neigh_notify(neigh, RTM_DELNEIGH, 0); + __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); neigh_release(neigh); } @@ -949,7 +950,7 @@ static void neigh_timer_handler(unsigned long arg) } if (notify) - neigh_update_notify(neigh); + neigh_update_notify(neigh, 0); neigh_release(neigh); } @@ -1073,7 +1074,7 @@ static void neigh_update_hhs(struct neighbour *neigh) */ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, - u32 flags) + u32 flags, u32 nlmsg_pid) { u8 old; int err; @@ -1131,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } - if (new & NUD_CONNECTED) - neigh->confirmed = jiffies; - neigh->updated = jiffies; - /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */ @@ -1156,6 +1153,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } } + /* Update timestamps only once we know we will make a change to the + * neighbour entry. Otherwise we risk to move the locktime window with + * noop updates and ignore relevant ARP updates. + */ + if (new != old || lladdr != neigh->ha) { + if (new & NUD_CONNECTED) + neigh->confirmed = jiffies; + neigh->updated = jiffies; + } + if (new != old) { neigh_del_timer(neigh); if (new & NUD_PROBE) @@ -1230,7 +1237,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_unlock_bh(&neigh->lock); if (notify) - neigh_update_notify(neigh); + neigh_update_notify(neigh, nlmsg_pid); return err; } @@ -1261,7 +1268,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, lladdr || !dev->addr_len); if (neigh) neigh_update(neigh, lladdr, NUD_STALE, - NEIGH_UPDATE_F_OVERRIDE); + NEIGH_UPDATE_F_OVERRIDE, 0); return neigh; } EXPORT_SYMBOL(neigh_event_ns); @@ -1589,7 +1596,8 @@ static struct neigh_table *neigh_find_table(int family) return tbl; } -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1639,14 +1647,16 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, + NETLINK_CB(skb).portid); neigh_release(neigh); out: return err; } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; struct net *net = sock_net(skb->sk); @@ -1659,7 +1669,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) int err; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); if (err < 0) goto out; @@ -1730,7 +1740,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) neigh_event_send(neigh, NULL); err = 0; } else - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags, + NETLINK_CB(skb).portid); neigh_release(neigh); out: @@ -1933,7 +1944,8 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct neigh_table *tbl; @@ -1943,7 +1955,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) int err, tidx; err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, - nl_neightbl_policy); + nl_neightbl_policy, extack); if (err < 0) goto errout; @@ -1981,7 +1993,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) int i, ifindex = 0; err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], - nl_ntbl_parm_policy); + nl_ntbl_parm_policy, extack); if (err < 0) goto errout_tbl_lock; @@ -2230,10 +2242,10 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, return -EMSGSIZE; } -static void neigh_update_notify(struct neighbour *neigh) +static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) { call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); - __neigh_notify(neigh, RTM_NEWNEIGH, 0); + __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); } static bool neigh_master_filtered(struct net_device *dev, int master_idx) @@ -2272,7 +2284,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, unsigned int flags = NLM_F_MULTI; int err; - err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL); if (!err) { if (tb[NDA_IFINDEX]) filter_idx = nla_get_u32(tb[NDA_IFINDEX]); @@ -2831,7 +2843,8 @@ static inline size_t neigh_nlmsg_size(void) + nla_total_size(4); /* NDA_PROBES */ } -static void __neigh_notify(struct neighbour *n, int type, int flags) +static void __neigh_notify(struct neighbour *n, int type, int flags, + u32 pid) { struct net *net = dev_net(n->dev); struct sk_buff *skb; @@ -2841,7 +2854,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) if (skb == NULL) goto errout; - err = neigh_fill_info(skb, n, 0, 0, type, flags); + err = neigh_fill_info(skb, n, pid, 0, type, flags); if (err < 0) { /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2857,7 +2870,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) void neigh_app_ns(struct neighbour *n) { - __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); + __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); } EXPORT_SYMBOL(neigh_app_ns); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 652468ff65b79d5c9d45ece0d3289de480bbc603..26bbfababff27cecc589bca69e3eb14739187f5b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -35,7 +35,8 @@ LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net = { - .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), + .count = ATOMIC_INIT(1), + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), }; EXPORT_SYMBOL(init_net); @@ -314,6 +315,25 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) goto out; } +static int __net_init net_defaults_init_net(struct net *net) +{ + net->core.sysctl_somaxconn = SOMAXCONN; + return 0; +} + +static struct pernet_operations net_defaults_ops = { + .init = net_defaults_init_net, +}; + +static __init int net_defaults_init(void) +{ + if (register_pernet_subsys(&net_defaults_ops)) + panic("Cannot initialize net default settings"); + + return 0; +} + +core_initcall(net_defaults_init); #ifdef CONFIG_NET_NS static struct ucounts *inc_net_namespaces(struct user_namespace *ns) @@ -571,7 +591,8 @@ static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { [NETNSA_FD] = { .type = NLA_U32 }, }; -static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; @@ -579,7 +600,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) int nsid, err; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy); + rtnl_net_policy, extack); if (err < 0) return err; if (!tb[NETNSA_NSID]) @@ -644,7 +665,8 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, return -EMSGSIZE; } -static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; @@ -653,7 +675,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) int err, id; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy); + rtnl_net_policy, extack); if (err < 0) return err; if (tb[NETNSA_PID]) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 9424673009c14e0fb288b8e4041dba596b37ee8d..29be2466970cd670daa7a8abdd54929c9af39026 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -105,15 +105,21 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; struct netdev_queue *txq; + unsigned int q_index; if (!netif_device_present(dev) || !netif_running(dev)) { kfree_skb(skb); continue; } - txq = skb_get_tx_queue(dev, skb); - local_irq_save(flags); + /* check if skb->queue_mapping is still valid */ + q_index = skb_get_queue_mapping(skb); + if (unlikely(q_index >= dev->real_num_tx_queues)) { + q_index = q_index % dev->real_num_tx_queues; + skb_set_queue_mapping(skb, q_index); + } + txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0f9275ee55958156a6cbac3f0d2b1ff54c3c89a5..1c4810919a0a35900d45a659de0cd780b7e500d3 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c4e84c55824085b343679cc295a81f7b35e3acaf..5e61456f6bc795cfb75db2d530eb3b1872c82989 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -896,15 +896,12 @@ static size_t rtnl_port_size(const struct net_device *dev, return port_self_size; } -static size_t rtnl_xdp_size(const struct net_device *dev) +static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ nla_total_size(1); /* XDP_ATTACHED */ - if (!dev->netdev_ops->ndo_xdp) - return 0; - else - return xdp_size; + return xdp_size; } static noinline size_t if_nlmsg_size(const struct net_device *dev, @@ -943,7 +940,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ - + rtnl_xdp_size(dev) /* IFLA_XDP */ + + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1056,7 +1053,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) return err; } - if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name)) return -EMSGSIZE; return 0; @@ -1127,6 +1124,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct ifla_vf_mac vf_mac; struct ifla_vf_info ivi; + memset(&ivi, 0, sizeof(ivi)); + /* Not all SR-IOV capable drivers support the * spoofcheck and "RSS query enable" query. Preset to * -1 so the user space tool can detect that the driver @@ -1135,7 +1134,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, ivi.spoofchk = -1; ivi.rss_query_en = -1; ivi.trusted = -1; - memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero */ @@ -1249,22 +1247,31 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } +static u8 rtnl_xdp_attached_mode(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + ASSERT_RTNL(); + + if (rcu_access_pointer(dev->xdp_prog)) + return XDP_ATTACHED_SKB; + if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp)) + return XDP_ATTACHED_DRV; + + return XDP_ATTACHED_NONE; +} + static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { - struct netdev_xdp xdp_op = {}; struct nlattr *xdp; int err; - if (!dev->netdev_ops->ndo_xdp) - return 0; xdp = nla_nest_start(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; - xdp_op.command = XDP_QUERY_PROG; - err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); - if (err) - goto err_cancel; - err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached); + + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, + rtnl_xdp_attached_mode(dev)); if (err) goto err_cancel; @@ -1515,7 +1522,8 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; - if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0) + if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, + ifla_info_policy, NULL) < 0) return NULL; if (linfo[IFLA_INFO_KIND]) { @@ -1592,8 +1600,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { - + if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, + ifla_policy, NULL) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1620,13 +1628,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, flags, ext_filter_mask); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err < 0) - goto out; + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: @@ -1634,15 +1642,18 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) } } out: + err = skb->len; +out_err: cb->args[1] = idx; cb->args[0] = h; - return skb->len; + return err; } -int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) +int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, + struct netlink_ext_ack *exterr) { - return nla_parse(tb, IFLA_MAX, head, len, ifla_policy); + return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr); } EXPORT_SYMBOL(rtnl_nla_parse_ifla); @@ -1907,6 +1918,7 @@ static int do_set_master(struct net_device *dev, int ifindex) #define DO_SETLINK_NOTIFY 0x03 static int do_setlink(const struct sk_buff *skb, struct net_device *dev, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, struct nlattr **tb, char *ifname, int status) { const struct net_device_ops *ops = dev->netdev_ops; @@ -2078,7 +2090,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, - ifla_vf_policy); + ifla_vf_policy, NULL); if (err < 0) goto errout; err = do_setvfinfo(dev, vfinfo); @@ -2106,7 +2118,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } err = nla_parse_nested(port, IFLA_PORT_MAX, attr, - ifla_port_policy); + ifla_port_policy, NULL); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { @@ -2126,7 +2138,8 @@ static int do_setlink(const struct sk_buff *skb, struct nlattr *port[IFLA_PORT_MAX+1]; err = nla_parse_nested(port, IFLA_PORT_MAX, - tb[IFLA_PORT_SELF], ifla_port_policy); + tb[IFLA_PORT_SELF], ifla_port_policy, + NULL); if (err < 0) goto errout; @@ -2170,7 +2183,7 @@ static int do_setlink(const struct sk_buff *skb, u32 xdp_flags = 0; err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], - ifla_xdp_policy); + ifla_xdp_policy, NULL); if (err < 0) goto errout; @@ -2185,10 +2198,15 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } + if ((xdp_flags & XDP_FLAGS_SKB_MODE) && + (xdp_flags & XDP_FLAGS_DRV_MODE)) { + err = -EINVAL; + goto errout; + } } if (xdp[IFLA_XDP_FD]) { - err = dev_change_xdp_fd(dev, + err = dev_change_xdp_fd(dev, extack, nla_get_s32(xdp[IFLA_XDP_FD]), xdp_flags); if (err) @@ -2210,7 +2228,8 @@ static int do_setlink(const struct sk_buff *skb, return err; } -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2219,7 +2238,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); if (err < 0) goto errout; @@ -2246,7 +2266,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - err = do_setlink(skb, dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0); errout: return err; } @@ -2303,7 +2323,8 @@ int rtnl_delete_link(struct net_device *dev) } EXPORT_SYMBOL_GPL(rtnl_delete_link); -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct net_device *dev; @@ -2312,7 +2333,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) return err; @@ -2407,6 +2428,7 @@ EXPORT_SYMBOL(rtnl_create_link); static int rtnl_group_changelink(const struct sk_buff *skb, struct net *net, int group, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, struct nlattr **tb) { struct net_device *dev, *aux; @@ -2414,7 +2436,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { - err = do_setlink(skb, dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0); if (err < 0) return err; } @@ -2423,7 +2445,8 @@ static int rtnl_group_changelink(const struct sk_buff *skb, return 0; } -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -2441,7 +2464,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) #ifdef CONFIG_MODULES replay: #endif - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) return err; @@ -2472,7 +2495,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (tb[IFLA_LINKINFO]) { err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], ifla_info_policy); + tb[IFLA_LINKINFO], ifla_info_policy, + NULL); if (err < 0) return err; } else @@ -2497,7 +2521,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { err = nla_parse_nested(attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], - ops->policy); + ops->policy, NULL); if (err < 0) return err; data = attr; @@ -2515,7 +2539,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) err = nla_parse_nested(slave_attr, m_ops->slave_maxtype, linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy); + m_ops->slave_policy, + NULL); if (err < 0) return err; slave_data = slave_attr; @@ -2557,14 +2582,15 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) status |= DO_SETLINK_NOTIFY; } - return do_setlink(skb, dev, ifm, tb, ifname, status); + return do_setlink(skb, dev, ifm, extack, tb, ifname, + status); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), - ifm, tb); + ifm, extack, tb); return -ENODEV; } @@ -2673,7 +2699,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) } } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2684,7 +2711,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) int err; u32 ext_filter_mask = 0; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) return err; @@ -2734,7 +2761,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { + if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -2955,7 +2982,8 @@ static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid) return 0; } -static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2965,7 +2993,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) u16 vid; int err; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); if (err < 0) return err; @@ -3055,7 +3083,8 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -3068,7 +3097,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); if (err < 0) return err; @@ -3203,8 +3232,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) == 0) { + err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, + IFLA_MAX, ifla_policy, NULL); + if (err < 0) { + return -EINVAL; + } else if (err == 0) { if (tb[IFLA_MASTER]) br_idx = nla_get_u32(tb[IFLA_MASTER]); } @@ -3427,8 +3459,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) err = br_dev->netdev_ops->ndo_bridge_getlink( skb, portid, seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } @@ -3439,16 +3475,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) seq, dev, filter_mask, NLM_F_MULTI); - if (err < 0 && err != -EOPNOTSUPP) - break; + if (err < 0 && err != -EOPNOTSUPP) { + if (likely(skb->len)) + break; + + goto out_err; + } } idx++; } } + err = skb->len; +out_err: rcu_read_unlock(); cb->args[0] = idx; - return skb->len; + return err; } static inline size_t bridge_nlmsg_size(void) @@ -3498,7 +3540,8 @@ static int rtnl_bridge_notify(struct net_device *dev) return err; } -static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -3572,7 +3615,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -3940,7 +3984,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, return size; } -static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct net_device *dev = NULL; @@ -4046,7 +4091,8 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) /* Process one rtnetlink message. */ -static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); rtnl_doit_func doit; @@ -4101,7 +4147,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (doit == NULL) return -EOPNOTSUPP; - return doit(skb, nlh); + return doit(skb, nlh, extack); } static void rtnetlink_rcv(struct sk_buff *skb) @@ -4116,22 +4162,16 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_PRE_UP: - case NETDEV_POST_INIT: - case NETDEV_REGISTER: - case NETDEV_CHANGE: - case NETDEV_PRE_TYPE_CHANGE: - case NETDEV_GOING_DOWN: - case NETDEV_UNREGISTER: - case NETDEV_UNREGISTER_FINAL: - case NETDEV_RELEASE: - case NETDEV_JOIN: - case NETDEV_BONDING_INFO: + case NETDEV_REBOOT: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + case NETDEV_BONDING_FAILOVER: + case NETDEV_NOTIFY_PEERS: + case NETDEV_RESEND_IGMP: + case NETDEV_CHANGEINFODATA: + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; } return NOTIFY_DONE; @@ -4185,6 +4225,7 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL); rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index d28da7d363f170f35d88623e2b864f04a67c3de5..ae35cce3a40d70387bee815798933aa43a0e6d84 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { - net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } + +static __always_inline void ts_secret_init(void) +{ + net_get_random_once(&ts_secret, sizeof(ts_secret)); +} #endif #ifdef CONFIG_INET @@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; @@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } +EXPORT_SYMBOL(secure_tcpv6_ts_off); -u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; @@ -78,14 +84,14 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, .sport = sport, .dport = dport }; - u64 hash; + u32 hash; + net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } -EXPORT_SYMBOL(secure_tcpv6_sequence_number); +EXPORT_SYMBOL(secure_tcpv6_seq); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -107,30 +113,30 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) { if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } -/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), +/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ - -u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) { - u64 hash; + u32 hash; + net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f781092fda9cb8cac22b0743b4bc7666a3bd91a..b1be7c01efe269d2bc97be7dcd1cc5485d29fa7b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1576,6 +1576,8 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } + if (!skb->sk || skb->destructor == sock_edemux) + skb_condense(skb); return 0; } EXPORT_SYMBOL(___pskb_trim); @@ -1980,7 +1982,6 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, .pages = pages, .partial = partial, .nr_pages_max = MAX_SKB_FRAGS, - .flags = flags, .ops = &nosteal_pipe_buf_ops, .spd_release = sock_spd_release, }; @@ -3082,22 +3083,32 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (sg && csum && (mss != GSO_BY_FRAGS)) { if (!(features & NETIF_F_GSO_PARTIAL)) { struct sk_buff *iter; + unsigned int frag_len; if (!list_skb || !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) goto normal; - /* Split the buffer at the frag_list pointer. - * This is based on the assumption that all - * buffers in the chain excluding the last - * containing the same amount of data. + /* If we get here then all the required + * GSO features except frag_list are supported. + * Try to split the SKB to multiple GSO SKBs + * with no frag_list. + * Currently we can do that only when the buffers don't + * have a linear part and all the buffers except + * the last are of the same length. */ + frag_len = list_skb->len; skb_walk_frags(head_skb, iter) { - if (skb_headlen(iter)) + if (frag_len != iter->len && iter->next) + goto normal; + if (skb_headlen(iter) && !iter->head_frag) goto normal; len -= iter->len; } + + if (len != frag_len) + goto normal; } /* GSO partial only requires that we trim off any excess that @@ -3743,8 +3754,11 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); - if (skb && (skb_next = skb_peek(q))) + if (skb && (skb_next = skb_peek(q))) { icmp_next = is_icmp_err_skb(skb_next); + if (icmp_next) + sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin; + } spin_unlock_irqrestore(&q->lock, flags); if (is_icmp_err_skb(skb) && !icmp_next) @@ -3807,6 +3821,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; + serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && diff --git a/net/core/sock.c b/net/core/sock.c index 2c4f574168fbdcebc0cc82c0c8a36214992d6224..727f924b7f91f495d9e7a4e7297c9c937d3258ed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -102,6 +102,7 @@ #include #include #include +#include #include #include #include @@ -138,10 +139,7 @@ #include -#ifdef CONFIG_INET #include -#endif - #include static DEFINE_MUTEX(proto_list_mutex); @@ -247,12 +245,66 @@ static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { _sock_locks("k-clock-") }; +static const char *const af_family_rlock_key_strings[AF_MAX+1] = { + "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" , + "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK", + "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" , + "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" , + "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" , + "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" , + "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" , + "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" , + "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" , + "rlock-27" , "rlock-28" , "rlock-AF_CAN" , + "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" , + "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" , + "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" , + "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" , + "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX" +}; +static const char *const af_family_wlock_key_strings[AF_MAX+1] = { + "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" , + "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK", + "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" , + "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" , + "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" , + "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" , + "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" , + "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" , + "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" , + "wlock-27" , "wlock-28" , "wlock-AF_CAN" , + "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" , + "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" , + "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" , + "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" , + "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX" +}; +static const char *const af_family_elock_key_strings[AF_MAX+1] = { + "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" , + "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK", + "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" , + "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" , + "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" , + "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" , + "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" , + "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" , + "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" , + "elock-27" , "elock-28" , "elock-AF_CAN" , + "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" , + "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" , + "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" , + "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" , + "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX" +}; /* - * sk_callback_lock locking rules are per-address-family, + * sk_callback_lock and sk queues locking rules are per-address-family, * so split the lock classes by using a per-AF key: */ static struct lock_class_key af_callback_keys[AF_MAX]; +static struct lock_class_key af_rlock_keys[AF_MAX]; +static struct lock_class_key af_wlock_keys[AF_MAX]; +static struct lock_class_key af_elock_keys[AF_MAX]; static struct lock_class_key af_kern_callback_keys[AF_MAX]; /* Take into consideration the size of the struct sk_buff overhead in the @@ -318,14 +370,14 @@ EXPORT_SYMBOL_GPL(sk_clear_memalloc); int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int ret; - unsigned long pflags = current->flags; + unsigned int noreclaim_flag; /* these should have been dropped before queueing */ BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); ret = sk->sk_backlog_rcv(sk, skb); - tsk_restore_flags(current, pflags, PF_MEMALLOC); + memalloc_noreclaim_restore(noreclaim_flag); return ret; } @@ -1029,6 +1081,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, union { int val; + u64 val64; struct linger ling; struct timeval tm; } v; @@ -1259,6 +1312,40 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_incoming_cpu; break; + case SO_MEMINFO: + { + u32 meminfo[SK_MEMINFO_VARS]; + + if (get_user(len, optlen)) + return -EFAULT; + + sk_get_meminfo(sk, meminfo); + + len = min_t(unsigned int, len, sizeof(meminfo)); + if (copy_to_user(optval, &meminfo, len)) + return -EFAULT; + + goto lenout; + } + +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_INCOMING_NAPI_ID: + v.val = READ_ONCE(sk->sk_napi_id); + + /* aggregate non-NAPI IDs down to 0 */ + if (v.val < MIN_NAPI_ID) + v.val = 0; + + break; +#endif + + case SO_COOKIE: + lv = sizeof(u64); + if (len < lv) + return -EINVAL; + v.val64 = sock_gen_cookie(sk); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1483,6 +1570,27 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); +static void sk_init_common(struct sock *sk) +{ + skb_queue_head_init(&sk->sk_receive_queue); + skb_queue_head_init(&sk->sk_write_queue); + skb_queue_head_init(&sk->sk_error_queue); + + rwlock_init(&sk->sk_callback_lock); + lockdep_set_class_and_name(&sk->sk_receive_queue.lock, + af_rlock_keys + sk->sk_family, + af_family_rlock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_write_queue.lock, + af_wlock_keys + sk->sk_family, + af_family_wlock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_error_queue.lock, + af_elock_keys + sk->sk_family, + af_family_elock_key_strings[sk->sk_family]); + lockdep_set_class_and_name(&sk->sk_callback_lock, + af_callback_keys + sk->sk_family, + af_family_clock_key_strings[sk->sk_family]); +} + /** * sk_clone_lock - clone a socket, and lock its clone * @sk: the socket to clone @@ -1516,13 +1624,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ atomic_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); - skb_queue_head_init(&newsk->sk_receive_queue); - skb_queue_head_init(&newsk->sk_write_queue); - - rwlock_init(&newsk->sk_callback_lock); - lockdep_set_class_and_name(&newsk->sk_callback_lock, - af_callback_keys + newsk->sk_family, - af_family_clock_key_strings[newsk->sk_family]); + sk_init_common(newsk); newsk->sk_dst_cache = NULL; newsk->sk_dst_pending_confirm = 0; @@ -1533,7 +1635,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; sock_reset_flag(newsk, SOCK_DONE); - skb_queue_head_init(&newsk->sk_error_queue); filter = rcu_dereference_protected(newsk->sk_filter, 1); if (filter != NULL) @@ -1699,28 +1800,24 @@ EXPORT_SYMBOL(skb_set_owner_w); * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. * But we also want to keep skb->sk set because some packet schedulers - * rely on it (sch_fq for example). So we set skb->truesize to a small - * amount (1) and decrease sk_wmem_alloc accordingly. + * rely on it (sch_fq for example). */ void skb_orphan_partial(struct sk_buff *skb) { - /* If this skb is a TCP pure ACK or already went here, - * we have nothing to do. 2 is already a very small truesize. - */ - if (skb->truesize <= 2) + if (skb_is_tcp_pure_ack(skb)) return; - /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, - * so we do not completely orphan skb, but transfert all - * accounted bytes but one, to avoid unexpected reorders. - */ if (skb->destructor == sock_wfree #ifdef CONFIG_INET || skb->destructor == tcp_wfree #endif ) { - atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); - skb->truesize = 1; + struct sock *sk = skb->sk; + + if (atomic_inc_not_zero(&sk->sk_refcnt)) { + atomic_sub(skb->truesize, &sk->sk_wmem_alloc); + skb->destructor = sock_efree; + } } else { skb_orphan(skb); } @@ -2466,10 +2563,7 @@ EXPORT_SYMBOL(sk_stop_timer); void sock_init_data(struct socket *sock, struct sock *sk) { - skb_queue_head_init(&sk->sk_receive_queue); - skb_queue_head_init(&sk->sk_write_queue); - skb_queue_head_init(&sk->sk_error_queue); - + sk_init_common(sk); sk->sk_send_head = NULL; init_timer(&sk->sk_timer); @@ -2521,7 +2615,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp = ktime_set(-1L, 0); + sk->sk_stamp = SK_DEFAULT_STAMP; #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; @@ -2802,6 +2896,21 @@ void sk_common_release(struct sock *sk) } EXPORT_SYMBOL(sk_common_release); +void sk_get_meminfo(const struct sock *sk, u32 *mem) +{ + memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); + + mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); + mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); + mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; + mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); + mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; + mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); +} + #ifdef CONFIG_PROC_FS #define PROTO_INUSE_NR 64 /* should be enough for the first time */ struct prot_inuse { @@ -3142,3 +3251,14 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ + +#ifdef CONFIG_NET_RX_BUSY_POLL +bool sk_busy_loop_end(void *p, unsigned long start_time) +{ + struct sock *sk = p; + + return !skb_queue_empty(&sk->sk_receive_queue) || + sk_busy_loop_timeout(sk, start_time); +} +EXPORT_SYMBOL(sk_busy_loop_end); +#endif /* CONFIG_NET_RX_BUSY_POLL */ diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 6b10573cc9faa790fe261b452b85f3b774c3ec21..217f4e3b82f6edca841b66089ab0772bd8d391e7 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,7 +19,7 @@ static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; -static u64 sock_gen_cookie(struct sock *sk) +u64 sock_gen_cookie(struct sock *sk) { while (1) { u64 res = atomic64_read(&sk->sk_cookie); @@ -59,15 +59,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) { u32 mem[SK_MEMINFO_VARS]; - mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; - mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); - mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; - mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; - mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); - mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len; - mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); + sk_get_meminfo(sk, mem); return nla_put(skb, attrtype, sizeof(mem), &mem); } @@ -246,7 +238,8 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { int ret; diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 9a1a352fd1ebe598e4925bcda037dc0e4a2288bc..eed1ebf7f29d0fac552074b127e5636fecede65f 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -13,9 +13,9 @@ static DEFINE_SPINLOCK(reuseport_lock); -static struct sock_reuseport *__reuseport_alloc(u16 max_socks) +static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) { - size_t size = sizeof(struct sock_reuseport) + + unsigned int size = sizeof(struct sock_reuseport) + sizeof(struct sock *) * max_socks; struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7f9cc400eca08c01c9014476aa4daf0852505b20..b7cd9aafe99e9b9216b13cce617133cdf832eeeb 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -452,6 +452,14 @@ static struct ctl_table net_core_table[] = { .extra1 = &one, .extra2 = &max_skb_frags, }, + { + .procname = "netdev_budget_usecs", + .data = &netdev_budget_usecs, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + }, { } }; @@ -471,8 +479,6 @@ static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl; - net->core.sysctl_somaxconn = SOMAXCONN; - tbl = netns_core_table; if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); diff --git a/net/core/utils.c b/net/core/utils.c index 6592d7bbed394086a8ba8efcb370fb1d75db4449..93066bd0305ab73dbee756125e2cb77b7f46aa44 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -26,9 +26,11 @@ #include #include #include +#include #include #include +#include #include #include @@ -51,7 +53,7 @@ EXPORT_SYMBOL(net_ratelimit); __be32 in_aton(const char *str) { - unsigned long l; + unsigned int l; unsigned int val; int i; @@ -300,6 +302,107 @@ int in6_pton(const char *src, int srclen, } EXPORT_SYMBOL(in6_pton); +static int inet4_pton(const char *src, u16 port_num, + struct sockaddr_storage *addr) +{ + struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; + int srclen = strlen(src); + + if (srclen > INET_ADDRSTRLEN) + return -EINVAL; + + if (in4_pton(src, srclen, (u8 *)&addr4->sin_addr.s_addr, + '\n', NULL) == 0) + return -EINVAL; + + addr4->sin_family = AF_INET; + addr4->sin_port = htons(port_num); + + return 0; +} + +static int inet6_pton(struct net *net, const char *src, u16 port_num, + struct sockaddr_storage *addr) +{ + struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; + const char *scope_delim; + int srclen = strlen(src); + + if (srclen > INET6_ADDRSTRLEN) + return -EINVAL; + + if (in6_pton(src, srclen, (u8 *)&addr6->sin6_addr.s6_addr, + '%', &scope_delim) == 0) + return -EINVAL; + + if (ipv6_addr_type(&addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL && + src + srclen != scope_delim && *scope_delim == '%') { + struct net_device *dev; + char scope_id[16]; + size_t scope_len = min_t(size_t, sizeof(scope_id) - 1, + src + srclen - scope_delim - 1); + + memcpy(scope_id, scope_delim + 1, scope_len); + scope_id[scope_len] = '\0'; + + dev = dev_get_by_name(net, scope_id); + if (dev) { + addr6->sin6_scope_id = dev->ifindex; + dev_put(dev); + } else if (kstrtouint(scope_id, 0, &addr6->sin6_scope_id)) { + return -EINVAL; + } + } + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(port_num); + + return 0; +} + +/** + * inet_pton_with_scope - convert an IPv4/IPv6 and port to socket address + * @net: net namespace (used for scope handling) + * @af: address family, AF_INET, AF_INET6 or AF_UNSPEC for either + * @src: the start of the address string + * @port: the start of the port string (or NULL for none) + * @addr: output socket address + * + * Return zero on success, return errno when any error occurs. + */ +int inet_pton_with_scope(struct net *net, __kernel_sa_family_t af, + const char *src, const char *port, struct sockaddr_storage *addr) +{ + u16 port_num; + int ret = -EINVAL; + + if (port) { + if (kstrtou16(port, 0, &port_num)) + return -EINVAL; + } else { + port_num = 0; + } + + switch (af) { + case AF_INET: + ret = inet4_pton(src, port_num, addr); + break; + case AF_INET6: + ret = inet6_pton(net, src, port_num, addr); + break; + case AF_UNSPEC: + ret = inet4_pton(src, port_num, addr); + if (ret) + ret = inet6_pton(net, src, port_num, addr); + break; + default: + pr_err("unexpected address family %d\n", af); + }; + + return ret; +} +EXPORT_SYMBOL(inet_pton_with_scope); + void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr) { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3202d75329b504b6442441becc31157e0c2be7a5..93106120f98770699d28e8b82701b132144baa87 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -245,8 +245,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], - dcbnl_pfc_up_nest); + tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -304,7 +303,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], - dcbnl_cap_nest); + dcbnl_cap_nest, NULL); if (ret) return ret; @@ -348,7 +347,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest); + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -393,7 +392,7 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest); + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -452,7 +451,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, return -EINVAL; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest); + dcbnl_app_nest, NULL); if (ret) return ret; @@ -520,7 +519,7 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh, return -EINVAL; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest); + dcbnl_app_nest, NULL); if (ret) return ret; @@ -577,8 +576,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, - tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], + dcbnl_pg_nest, NULL); if (ret) return ret; @@ -597,8 +596,8 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, data = pg_tb[DCB_PG_ATTR_TC_ALL]; else data = pg_tb[i]; - ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, - data, dcbnl_tc_param_nest); + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data, + dcbnl_tc_param_nest, NULL); if (ret) goto err_pg; @@ -735,8 +734,7 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], - dcbnl_pfc_up_nest); + tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -791,8 +789,8 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, - tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], + dcbnl_pg_nest, NULL); if (ret) return ret; @@ -801,7 +799,7 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, continue; ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, - pg_tb[i], dcbnl_tc_param_nest); + pg_tb[i], dcbnl_tc_param_nest, NULL); if (ret) return ret; @@ -889,8 +887,8 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getbcncfg) return -EOPNOTSUPP; - ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, - tb[DCB_ATTR_BCN], dcbnl_bcn_nest); + ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], + dcbnl_bcn_nest, NULL); if (ret) return ret; @@ -948,9 +946,8 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setbcnrp) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, - tb[DCB_ATTR_BCN], - dcbnl_pfc_up_nest); + ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], + dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -1424,8 +1421,8 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1508,8 +1505,8 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1581,8 +1578,8 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], - dcbnl_featcfg_nest); + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); if (ret) return ret; @@ -1625,8 +1622,8 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], - dcbnl_featcfg_nest); + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); if (ret) goto err; @@ -1699,7 +1696,8 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = { [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, }; -static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct net_device *netdev; @@ -1715,7 +1713,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, - dcbnl_rtnl_policy); + dcbnl_rtnl_policy, extack); if (ret < 0) return ret; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b99168b0fabf2a8c65defdd0b93d362630774e1a..f75482bdee9a0e5cc9ef89806b7d2d67ddb38ba1 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -951,7 +951,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d9b6a4e403e701fd9b9ecf92bac496e45570054e..992621172220d136a1114a58f1c0e75219c57ea4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -426,6 +426,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; @@ -490,6 +493,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1014,7 +1020,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7de5b40a5d0d1245ad995877f779e0d87d1cf398..405483a07efc7ac2efcfe86e285a7673547c9691 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -132,6 +132,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include +#include #include #include #include @@ -1469,18 +1470,18 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us case DSO_NODELAY: if (optlen != sizeof(int)) return -EINVAL; - if (scp->nonagle == 2) + if (scp->nonagle == TCP_NAGLE_CORK) return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 1; + scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF; /* if (scp->nonagle == 1) { Push pending frames } */ break; case DSO_CORK: if (optlen != sizeof(int)) return -EINVAL; - if (scp->nonagle == 1) + if (scp->nonagle == TCP_NAGLE_OFF) return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 2; + scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK; /* if (scp->nonagle == 0) { Push pending frames } */ break; @@ -1608,14 +1609,14 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us case DSO_NODELAY: if (r_len > sizeof(int)) r_len = sizeof(int); - val = (scp->nonagle == 1); + val = (scp->nonagle == TCP_NAGLE_OFF); r_data = &val; break; case DSO_CORK: if (r_len > sizeof(int)) r_len = sizeof(int); - val = (scp->nonagle == 2); + val = (scp->nonagle == TCP_NAGLE_CORK); r_data = &val; break; @@ -2360,7 +2361,8 @@ MODULE_AUTHOR("Linux DECnet Project Team"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_DECnet); -static char banner[] __initdata = KERN_INFO "NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n"; +static const char banner[] __initconst = KERN_INFO +"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n"; static int __init decnet_init(void) { diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 8fdd9f492b0ea2201db70cfe83ebe1f3384c7b04..9017a9a73ab5885002b9d63047e6bd5a7b91cc4b 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -565,7 +565,8 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { [IFA_FLAGS] = { .type = NLA_U32 }, }; -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -581,7 +582,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) goto errout; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, + extack); if (err < 0) goto errout; @@ -609,7 +611,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -625,7 +628,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, + extack); if (err < 0) return err; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 7af0ba6157a108578182eda3ec75ec7bd0c27859..f9058ebeb635c2efff457bf73d7895129b443cde 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -501,7 +501,8 @@ static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) return table; } -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; @@ -515,7 +516,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, + extack); if (err < 0) return err; @@ -526,7 +528,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; @@ -540,7 +543,8 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, + extack); if (err < 0) return err; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 482730cd8a562e048b08f275551361ca813a8792..eeb5fc561f800f042023f95e88f0fe5fd64d3f52 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -110,7 +110,7 @@ struct neigh_table dn_neigh_table = { static int dn_neigh_construct(struct neighbour *neigh) { struct net_device *dev = neigh->dev; - struct dn_neigh *dn = (struct dn_neigh *)neigh; + struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); struct dn_dev *dn_db; struct neigh_parms *parms; @@ -339,7 +339,7 @@ int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *) dst; struct neighbour *neigh = rt->n; - struct dn_neigh *dn = (struct dn_neigh *)neigh; + struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n); struct dn_dev *dn_db; bool use_long; @@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - dn = (struct dn_neigh *)neigh; + dn = container_of(neigh, struct dn_neigh, n); if (neigh) { write_lock(&neigh->lock); @@ -451,7 +451,7 @@ int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1); - dn = (struct dn_neigh *)neigh; + dn = container_of(neigh, struct dn_neigh, n); if (neigh) { write_lock(&neigh->lock); @@ -510,7 +510,7 @@ static void neigh_elist_cb(struct neighbour *neigh, void *_info) if (neigh->dev != s->dev) return; - dn = (struct dn_neigh *) neigh; + dn = container_of(neigh, struct dn_neigh, n); if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2))) return; @@ -549,7 +549,7 @@ int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n) static inline void dn_neigh_format_entry(struct seq_file *seq, struct neighbour *n) { - struct dn_neigh *dn = (struct dn_neigh *) n; + struct dn_neigh *dn = container_of(n, struct dn_neigh, n); char buf[DN_ASCBUF_LEN]; read_lock(&n->lock); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b1dc096d22f8c83e771b1df68d7815661ac51bae..4b9518a0d2489494ed88b6808f95803cf1b543ad 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1640,7 +1640,8 @@ const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { /* * This is called by both endnodes and routers now. */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) +static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm = nlmsg_data(nlh); @@ -1654,7 +1655,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy, + extack); if (err < 0) return err; diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 85f2fdc360c27b21cb5b9c486e2d05ffb1d557d7..aa8ffecc46a439fa129dfea9c1f4d4e525ad3796 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -96,13 +96,15 @@ static unsigned int dnrmg_hook(void *priv, } -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0) static inline void dnrmg_receive_user_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) + if (skb->len < sizeof(*nlh) || + nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) return; if (!netlink_capable(skb, CAP_NET_ADMIN)) @@ -134,7 +136,7 @@ static int __init dn_rtmsg_init(void) return -ENOMEM; } - rv = nf_register_hook(&dnrmg_ops); + rv = nf_register_net_hook(&init_net, &dnrmg_ops); if (rv) { netlink_kernel_release(dnrmg); } @@ -144,7 +146,7 @@ static int __init dn_rtmsg_init(void) static void __exit dn_rtmsg_fini(void) { - nf_unregister_hook(&dnrmg_ops); + nf_unregister_net_hook(&init_net, &dnrmg_ops); netlink_kernel_release(dnrmg); } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 9649238eef404095a89d34a006dc0b504bc47038..81a0868edb1d619a6ade13b7817db90e7a2d69a2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -6,7 +6,7 @@ config HAVE_NET_DSA config NET_DSA tristate "Distributed Switch Architecture" - depends on HAVE_NET_DSA + depends on HAVE_NET_DSA && MAY_USE_DEVLINK select NET_SWITCHDEV select PHYLIB ---help--- @@ -31,4 +31,10 @@ config NET_DSA_TAG_TRAILER config NET_DSA_TAG_QCA bool +config NET_DSA_TAG_MTK + bool + +config NET_DSA_TAG_LAN9303 + bool + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 31d343796251da06c979b3428f275f5911dfb2ab..0b747d75e65a265dc5059586e250a2b97ae3f067 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o slave.o dsa2.o switch.o +dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o @@ -8,3 +8,5 @@ dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o +dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b6d4f6a23f06c9d794a5eedc4c9f79810d5b06e5..90038d45a54764df55017b46258dc772b0af1c96 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -14,15 +14,17 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include #include +#include +#include #include "dsa_priv.h" static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, @@ -52,63 +54,16 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #endif #ifdef CONFIG_NET_DSA_TAG_QCA [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_MTK + [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_LAN9303 + [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, #endif [DSA_TAG_PROTO_NONE] = &none_ops, }; -/* switch driver registration ***********************************************/ -static DEFINE_MUTEX(dsa_switch_drivers_mutex); -static LIST_HEAD(dsa_switch_drivers); - -void register_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(register_switch_driver); - -void unregister_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(unregister_switch_driver); - -static const struct dsa_switch_ops * -dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, - const char **_name, void **priv) -{ - const struct dsa_switch_ops *ret; - struct list_head *list; - const char *name; - - ret = NULL; - name = NULL; - - mutex_lock(&dsa_switch_drivers_mutex); - list_for_each(list, &dsa_switch_drivers) { - const struct dsa_switch_ops *ops; - struct dsa_switch_driver *drv; - - drv = list_entry(list, struct dsa_switch_driver, list); - ops = drv->ops; - - name = ops->probe(parent, host_dev, sw_addr, priv); - if (name != NULL) { - ret = ops; - break; - } - } - mutex_unlock(&dsa_switch_drivers_mutex); - - *_name = name; - - return ret; -} - -/* basic switch operations **************************************************/ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, struct dsa_port *dport, int port) { @@ -140,23 +95,6 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, return 0; } -static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) -{ - struct dsa_port *dport; - int ret, port; - - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - - dport = &ds->ports[port]; - ret = dsa_cpu_dsa_setup(ds, dev, dport, port); - if (ret) - return ret; - } - return 0; -} - const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) { const struct dsa_device_ops *ops; @@ -206,168 +144,6 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) master->ethtool_ops = ds->dst->master_orig_ethtool_ops; } -static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) -{ - const struct dsa_switch_ops *ops = ds->ops; - struct dsa_switch_tree *dst = ds->dst; - struct dsa_chip_data *cd = ds->cd; - bool valid_name_found = false; - int index = ds->index; - int i, ret; - - /* - * Validate supplied switch configuration. - */ - for (i = 0; i < ds->num_ports; i++) { - char *name; - - name = cd->port_names[i]; - if (name == NULL) - continue; - - if (!strcmp(name, "cpu")) { - if (dst->cpu_switch) { - netdev_err(dst->master_netdev, - "multiple cpu ports?!\n"); - return -EINVAL; - } - dst->cpu_switch = ds; - dst->cpu_port = i; - ds->cpu_port_mask |= 1 << i; - } else if (!strcmp(name, "dsa")) { - ds->dsa_port_mask |= 1 << i; - } else { - ds->enabled_port_mask |= 1 << i; - } - valid_name_found = true; - } - - if (!valid_name_found && i == ds->num_ports) - return -EINVAL; - - /* Make the built-in MII bus mask match the number of ports, - * switch drivers can override this later - */ - ds->phys_mii_mask = ds->enabled_port_mask; - - /* - * If the CPU connects to this switch, set the switch tree - * tagging protocol to the preferred tagging format of this - * switch. - */ - if (dst->cpu_switch == ds) { - enum dsa_tag_protocol tag_protocol; - - tag_protocol = ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) - return PTR_ERR(dst->tag_ops); - - dst->rcv = dst->tag_ops->rcv; - } - - memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); - - /* - * Do basic register setup. - */ - ret = ops->setup(ds); - if (ret < 0) - return ret; - - ret = dsa_switch_register_notifier(ds); - if (ret) - return ret; - - if (ops->set_addr) { - ret = ops->set_addr(ds, dst->master_netdev->dev_addr); - if (ret < 0) - return ret; - } - - if (!ds->slave_mii_bus && ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(parent); - if (!ds->slave_mii_bus) - return -ENOMEM; - dsa_slave_mii_bus_init(ds); - - ret = mdiobus_register(ds->slave_mii_bus); - if (ret < 0) - return ret; - } - - /* - * Create network devices for physical switch ports. - */ - for (i = 0; i < ds->num_ports; i++) { - ds->ports[i].dn = cd->port_dn[i]; - - if (!(ds->enabled_port_mask & (1 << i))) - continue; - - ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); - if (ret < 0) - netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", - index, i, cd->port_names[i], ret); - } - - /* Perform configuration of the CPU and DSA ports */ - ret = dsa_cpu_dsa_setups(ds, parent); - if (ret < 0) - netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n", - index); - - ret = dsa_cpu_port_ethtool_setup(ds); - if (ret) - return ret; - - return 0; -} - -static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, int index, - struct device *parent, struct device *host_dev) -{ - struct dsa_chip_data *cd = dst->pd->chip + index; - const struct dsa_switch_ops *ops; - struct dsa_switch *ds; - int ret; - const char *name; - void *priv; - - /* - * Probe for switch model. - */ - ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (!ops) { - netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", - index); - return ERR_PTR(-EINVAL); - } - netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", - index, name); - - - /* - * Allocate and initialise switch state. - */ - ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); - if (!ds) - return ERR_PTR(-ENOMEM); - - ds->dst = dst; - ds->index = index; - ds->cd = cd; - ds->ops = ops; - ds->priv = priv; - - ret = dsa_switch_setup_one(ds, parent); - if (ret) - return ERR_PTR(ret); - - return ds; -} - void dsa_cpu_dsa_destroy(struct dsa_port *port) { struct device_node *port_dn = port->dn; @@ -376,86 +152,6 @@ void dsa_cpu_dsa_destroy(struct dsa_port *port) of_phy_deregister_fixed_link(port_dn); } -static void dsa_switch_destroy(struct dsa_switch *ds) -{ - int port; - - /* Destroy network devices for physical switch ports. */ - for (port = 0; port < ds->num_ports; port++) { - if (!(ds->enabled_port_mask & (1 << port))) - continue; - - if (!ds->ports[port].netdev) - continue; - - dsa_slave_destroy(ds->ports[port].netdev); - } - - /* Disable configuration of the CPU and DSA ports */ - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - dsa_cpu_dsa_destroy(&ds->ports[port]); - - /* Clearing a bit which is not set does no harm */ - ds->cpu_port_mask |= ~(1 << port); - ds->dsa_port_mask |= ~(1 << port); - } - - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_unregister(ds->slave_mii_bus); - - dsa_switch_unregister_notifier(ds); -} - -#ifdef CONFIG_PM_SLEEP -int dsa_switch_suspend(struct dsa_switch *ds) -{ - int i, ret = 0; - - /* Suspend slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) - continue; - - ret = dsa_slave_suspend(ds->ports[i].netdev); - if (ret) - return ret; - } - - if (ds->ops->suspend) - ret = ds->ops->suspend(ds); - - return ret; -} -EXPORT_SYMBOL_GPL(dsa_switch_suspend); - -int dsa_switch_resume(struct dsa_switch *ds) -{ - int i, ret = 0; - - if (ds->ops->resume) - ret = ds->ops->resume(ds); - - if (ret) - return ret; - - /* Resume slave network devices */ - for (i = 0; i < ds->num_ports; i++) { - if (!dsa_is_port_initialized(ds, i)) - continue; - - ret = dsa_slave_resume(ds->ports[i].netdev); - if (ret) - return ret; - } - - return 0; -} -EXPORT_SYMBOL_GPL(dsa_switch_resume); -#endif - -/* platform driver init and cleanup *****************************************/ static int dev_is_class(struct device *dev, void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) @@ -474,24 +170,6 @@ static struct device *dev_find_class(struct device *parent, char *class) return device_find_child(parent, class, dev_is_class); } -struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) -{ - struct device *d; - - d = dev_find_class(dev, "mdio_bus"); - if (d != NULL) { - struct mii_bus *bus; - - bus = to_mii_bus(d); - put_device(d); - - return bus; - } - - return NULL; -} -EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); - struct net_device *dsa_dev_to_net_device(struct device *dev) { struct device *d; @@ -511,458 +189,92 @@ struct net_device *dsa_dev_to_net_device(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); -#ifdef CONFIG_OF -static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *link) -{ - const __be32 *reg; - int link_sw_addr; - struct device_node *parent_sw; - int len; - - parent_sw = of_get_parent(link); - if (!parent_sw) - return -EINVAL; - - reg = of_get_property(parent_sw, "reg", &len); - if (!reg || (len != sizeof(*reg) * 2)) - return -EINVAL; - - /* - * Get the destination switch number from the second field of its 'reg' - * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. - */ - link_sw_addr = be32_to_cpup(reg + 1); - - if (link_sw_addr >= pd->nr_chips) - return -EINVAL; - - cd->rtable[link_sw_addr] = port_index; - - return 0; -} - -static int dsa_of_probe_links(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *port, - const char *port_name) -{ - struct device_node *link; - int link_index; - int ret; - - for (link_index = 0;; link_index++) { - link = of_parse_phandle(port, "link", link_index); - if (!link) - break; - - if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { - ret = dsa_of_setup_routing_table(pd, cd, chip_index, - port_index, link); - if (ret) - return ret; - } - } - return 0; -} - -static void dsa_of_free_platform_data(struct dsa_platform_data *pd) -{ - int i; - int port_index; - - for (i = 0; i < pd->nr_chips; i++) { - port_index = 0; - while (port_index < DSA_MAX_PORTS) { - kfree(pd->chip[i].port_names[port_index]); - port_index++; - } - - /* Drop our reference to the MDIO bus device */ - if (pd->chip[i].host_dev) - put_device(pd->chip[i].host_dev); - } - kfree(pd->chip); -} - -static int dsa_of_probe(struct device *dev) +static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) { - struct device_node *np = dev->of_node; - struct device_node *child, *mdio, *ethernet, *port; - struct mii_bus *mdio_bus, *mdio_bus_switch; - struct net_device *ethernet_dev; - struct dsa_platform_data *pd; - struct dsa_chip_data *cd; - const char *port_name; - int chip_index, port_index; - const unsigned int *sw_addr, *port_reg; - u32 eeprom_len; - int ret; - - mdio = of_parse_phandle(np, "dsa,mii-bus", 0); - if (!mdio) - return -EINVAL; - - mdio_bus = of_mdio_find_bus(mdio); - if (!mdio_bus) - return -EPROBE_DEFER; - - ethernet = of_parse_phandle(np, "dsa,ethernet", 0); - if (!ethernet) { - ret = -EINVAL; - goto out_put_mdio; - } - - ethernet_dev = of_find_net_device_by_node(ethernet); - if (!ethernet_dev) { - ret = -EPROBE_DEFER; - goto out_put_mdio; - } - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = -ENOMEM; - goto out_put_ethernet; - } + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct sk_buff *nskb = NULL; - dev->platform_data = pd; - pd->of_netdev = ethernet_dev; - pd->nr_chips = of_get_available_child_count(np); - if (pd->nr_chips > DSA_MAX_SWITCHES) - pd->nr_chips = DSA_MAX_SWITCHES; - - pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data), - GFP_KERNEL); - if (!pd->chip) { - ret = -ENOMEM; - goto out_free; + if (unlikely(dst == NULL)) { + kfree_skb(skb); + return 0; } - chip_index = -1; - for_each_available_child_of_node(np, child) { - int i; - - chip_index++; - cd = &pd->chip[chip_index]; - - cd->of_node = child; - - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - cd->rtable[i] = DSA_RTABLE_NONE; - - /* When assigning the host device, increment its refcount */ - cd->host_dev = get_device(&mdio_bus->dev); - - sw_addr = of_get_property(child, "reg", NULL); - if (!sw_addr) - continue; - - cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr >= PHY_MAX_ADDR) - continue; - - if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) - cd->eeprom_len = eeprom_len; - - mdio = of_parse_phandle(child, "mii-bus", 0); - if (mdio) { - mdio_bus_switch = of_mdio_find_bus(mdio); - if (!mdio_bus_switch) { - ret = -EPROBE_DEFER; - goto out_free_chip; - } - - /* Drop the mdio_bus device ref, replacing the host - * device with the mdio_bus_switch device, keeping - * the refcount from of_mdio_find_bus() above. - */ - put_device(cd->host_dev); - cd->host_dev = &mdio_bus_switch->dev; - } - - for_each_available_child_of_node(child, port) { - port_reg = of_get_property(port, "reg", NULL); - if (!port_reg) - continue; - - port_index = be32_to_cpup(port_reg); - if (port_index >= DSA_MAX_PORTS) - break; - - port_name = of_get_property(port, "label", NULL); - if (!port_name) - continue; - - cd->port_dn[port_index] = port; - - cd->port_names[port_index] = kstrdup(port_name, - GFP_KERNEL); - if (!cd->port_names[port_index]) { - ret = -ENOMEM; - goto out_free_chip; - } - - ret = dsa_of_probe_links(pd, cd, chip_index, - port_index, port, port_name); - if (ret) - goto out_free_chip; + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return 0; - } + nskb = dst->rcv(skb, dev, pt, orig_dev); + if (!nskb) { + kfree_skb(skb); + return 0; } - /* The individual chips hold their own refcount on the mdio bus, - * so drop ours */ - put_device(&mdio_bus->dev); - - return 0; - -out_free_chip: - dsa_of_free_platform_data(pd); -out_free: - kfree(pd); - dev->platform_data = NULL; -out_put_ethernet: - put_device(ðernet_dev->dev); -out_put_mdio: - put_device(&mdio_bus->dev); - return ret; -} + skb = nskb; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); -static void dsa_of_remove(struct device *dev) -{ - struct dsa_platform_data *pd = dev->platform_data; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; - if (!dev->of_node) - return; + netif_receive_skb(skb); - dsa_of_free_platform_data(pd); - put_device(&pd->of_netdev->dev); - kfree(pd); -} -#else -static inline int dsa_of_probe(struct device *dev) -{ return 0; } -static inline void dsa_of_remove(struct device *dev) -{ -} -#endif - -static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, - struct device *parent, struct dsa_platform_data *pd) +#ifdef CONFIG_PM_SLEEP +int dsa_switch_suspend(struct dsa_switch *ds) { - int i; - unsigned configured = 0; - - dst->pd = pd; - dst->master_netdev = dev; - dst->cpu_port = -1; - - for (i = 0; i < pd->nr_chips; i++) { - struct dsa_switch *ds; + int i, ret = 0; - ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); - if (IS_ERR(ds)) { - netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", - i, PTR_ERR(ds)); + /* Suspend slave network devices */ + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_port_initialized(ds, i)) continue; - } - - dst->ds[i] = ds; - - ++configured; - } - - /* - * If no switch was found, exit cleanly - */ - if (!configured) - return -EPROBE_DEFER; - - /* - * If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dev->dsa_ptr = (void *)dst; - - return 0; -} -static int dsa_probe(struct platform_device *pdev) -{ - struct dsa_platform_data *pd = pdev->dev.platform_data; - struct net_device *dev; - struct dsa_switch_tree *dst; - int ret; - - if (pdev->dev.of_node) { - ret = dsa_of_probe(&pdev->dev); + ret = dsa_slave_suspend(ds->ports[i].netdev); if (ret) return ret; - - pd = pdev->dev.platform_data; - } - - if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) - return -EINVAL; - - if (pd->of_netdev) { - dev = pd->of_netdev; - dev_hold(dev); - } else { - dev = dsa_dev_to_net_device(pd->netdev); - } - if (dev == NULL) { - ret = -EPROBE_DEFER; - goto out; - } - - if (dev->dsa_ptr != NULL) { - dev_put(dev); - ret = -EEXIST; - goto out; } - dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); - if (dst == NULL) { - dev_put(dev); - ret = -ENOMEM; - goto out; - } - - platform_set_drvdata(pdev, dst); - - ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) { - dev_put(dev); - goto out; - } - - return 0; - -out: - dsa_of_remove(&pdev->dev); + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); return ret; } +EXPORT_SYMBOL_GPL(dsa_switch_suspend); -static void dsa_remove_dst(struct dsa_switch_tree *dst) +int dsa_switch_resume(struct dsa_switch *ds) { - int i; + int i, ret = 0; - dst->master_netdev->dsa_ptr = NULL; + if (ds->ops->resume) + ret = ds->ops->resume(ds); - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + if (ret) + return ret; - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; + /* Resume slave network devices */ + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_port_initialized(ds, i)) + continue; - if (ds) - dsa_switch_destroy(ds); + ret = dsa_slave_resume(ds->ports[i].netdev); + if (ret) + return ret; } - dsa_cpu_port_ethtool_restore(dst->cpu_switch); - - dev_put(dst->master_netdev); -} - -static int dsa_remove(struct platform_device *pdev) -{ - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - - dsa_remove_dst(dst); - dsa_of_remove(&pdev->dev); - return 0; } - -static void dsa_shutdown(struct platform_device *pdev) -{ -} - -static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - - if (unlikely(dst == NULL)) { - kfree_skb(skb); - return 0; - } - - return dst->rcv(skb, dev, pt, orig_dev); -} +EXPORT_SYMBOL_GPL(dsa_switch_resume); +#endif static struct packet_type dsa_pack_type __read_mostly = { .type = cpu_to_be16(ETH_P_XDSA), .func = dsa_switch_rcv, }; -#ifdef CONFIG_PM_SLEEP -static int dsa_suspend(struct device *d) -{ - struct platform_device *pdev = to_platform_device(d); - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL) - ret = dsa_switch_suspend(ds); - } - - return ret; -} - -static int dsa_resume(struct device *d) -{ - struct platform_device *pdev = to_platform_device(d); - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL) - ret = dsa_switch_resume(ds); - } - - return ret; -} -#endif - -static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); - -static const struct of_device_id dsa_of_match_table[] = { - { .compatible = "marvell,dsa", }, - {} -}; -MODULE_DEVICE_TABLE(of, dsa_of_match_table); - -static struct platform_driver dsa_driver = { - .probe = dsa_probe, - .remove = dsa_remove, - .shutdown = dsa_shutdown, - .driver = { - .name = "dsa", - .of_match_table = dsa_of_match_table, - .pm = &dsa_pm_ops, - }, -}; - static int __init dsa_init_module(void) { int rc; @@ -971,7 +283,7 @@ static int __init dsa_init_module(void) if (rc) return rc; - rc = platform_driver_register(&dsa_driver); + rc = dsa_legacy_register(); if (rc) return rc; @@ -985,7 +297,7 @@ static void __exit dsa_cleanup_module(void) { dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); - platform_driver_unregister(&dsa_driver); + dsa_legacy_unregister(); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 737be6470c7f27ba032d01667e039f3c03c17ae8..7796580e99ee2c57cdd5503130c8ec2e121d879a 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -13,16 +13,20 @@ #include #include #include +#include #include #include -#include #include #include +#include #include "dsa_priv.h" static LIST_HEAD(dsa_switch_trees); static DEFINE_MUTEX(dsa2_mutex); +static const struct devlink_ops dsa_devlink_ops = { +}; + static struct dsa_switch_tree *dsa_get_dst(u32 tree) { struct dsa_switch_tree *dst; @@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index, return err; } - return 0; + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + + return devlink_port_register(ds->devlink, + &ds->ports[index].devlink_port, + index); } static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); dsa_cpu_dsa_destroy(port); } @@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index, ds->cpu_port_mask |= BIT(index); - return 0; + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, + index); + return err; } static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); dsa_cpu_dsa_destroy(port); ds->cpu_port_mask &= ~BIT(index); @@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index, return err; } + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, + index); + if (err) + return err; + + devlink_port_type_eth_set(&ds->ports[index].devlink_port, + ds->ports[index].netdev); + return 0; } static void dsa_user_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); if (ds->ports[index].netdev) { dsa_slave_destroy(ds->ports[index].netdev); ds->ports[index].netdev = NULL; @@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) */ ds->phys_mii_mask = ds->enabled_port_mask; + /* Add the switch to devlink before calling setup, so that setup can + * add dpipe tables + */ + ds->devlink = devlink_alloc(&dsa_devlink_ops, 0); + if (!ds->devlink) + return -ENOMEM; + + err = devlink_register(ds->devlink, ds->dev); + if (err) + return err; + err = ds->ops->setup(ds); if (err < 0) return err; @@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) mdiobus_unregister(ds->slave_mii_bus); dsa_switch_unregister_notifier(ds); + + if (ds->devlink) { + devlink_unregister(ds->devlink); + devlink_free(ds->devlink); + ds->devlink = NULL; + } + } static int dsa_dst_apply(struct dsa_switch_tree *dst) @@ -440,8 +484,10 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - if (dst->cpu_switch) + if (dst->cpu_switch) { dsa_cpu_port_ethtool_restore(dst->cpu_switch); + dst->cpu_switch = NULL; + } pr_info("DSA: tree %d unapplied\n", dst->tree); dst->applied = false; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 0706a511244e92ff0174173eb388a41ff59141f4..f4a88e4852138864081c1871fc882803e53247db 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -17,8 +17,9 @@ struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev); }; struct dsa_slave_priv { @@ -54,6 +55,10 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); int dsa_cpu_port_ethtool_setup(struct dsa_switch *ds); void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds); +/* legacy.c */ +int dsa_legacy_register(void); +void dsa_legacy_unregister(void); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -85,4 +90,10 @@ extern const struct dsa_device_ops brcm_netdev_ops; /* tag_qca.c */ extern const struct dsa_device_ops qca_netdev_ops; +/* tag_mtk.c */ +extern const struct dsa_device_ops mtk_netdev_ops; + +/* tag_lan9303.c */ +extern const struct dsa_device_ops lan9303_netdev_ops; + #endif diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c new file mode 100644 index 0000000000000000000000000000000000000000..7281098df04ecd597b7824a9c0d2ec1d7f60e2b9 --- /dev/null +++ b/net/dsa/legacy.c @@ -0,0 +1,771 @@ +/* + * net/dsa/legacy.c - Hardware switch handling + * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "dsa_priv.h" + +/* switch driver registration ***********************************************/ +static DEFINE_MUTEX(dsa_switch_drivers_mutex); +static LIST_HEAD(dsa_switch_drivers); + +void register_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_add_tail(&drv->list, &dsa_switch_drivers); + mutex_unlock(&dsa_switch_drivers_mutex); +} +EXPORT_SYMBOL_GPL(register_switch_driver); + +void unregister_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_del_init(&drv->list); + mutex_unlock(&dsa_switch_drivers_mutex); +} +EXPORT_SYMBOL_GPL(unregister_switch_driver); + +static const struct dsa_switch_ops * +dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, + const char **_name, void **priv) +{ + const struct dsa_switch_ops *ret; + struct list_head *list; + const char *name; + + ret = NULL; + name = NULL; + + mutex_lock(&dsa_switch_drivers_mutex); + list_for_each(list, &dsa_switch_drivers) { + const struct dsa_switch_ops *ops; + struct dsa_switch_driver *drv; + + drv = list_entry(list, struct dsa_switch_driver, list); + ops = drv->ops; + + name = ops->probe(parent, host_dev, sw_addr, priv); + if (name != NULL) { + ret = ops; + break; + } + } + mutex_unlock(&dsa_switch_drivers_mutex); + + *_name = name; + + return ret; +} + +/* basic switch operations **************************************************/ +static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) +{ + struct dsa_port *dport; + int ret, port; + + for (port = 0; port < ds->num_ports; port++) { + if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) + continue; + + dport = &ds->ports[port]; + ret = dsa_cpu_dsa_setup(ds, dev, dport, port); + if (ret) + return ret; + } + return 0; +} + +static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) +{ + const struct dsa_switch_ops *ops = ds->ops; + struct dsa_switch_tree *dst = ds->dst; + struct dsa_chip_data *cd = ds->cd; + bool valid_name_found = false; + int index = ds->index; + int i, ret; + + /* + * Validate supplied switch configuration. + */ + for (i = 0; i < ds->num_ports; i++) { + char *name; + + name = cd->port_names[i]; + if (name == NULL) + continue; + + if (!strcmp(name, "cpu")) { + if (dst->cpu_switch) { + netdev_err(dst->master_netdev, + "multiple cpu ports?!\n"); + return -EINVAL; + } + dst->cpu_switch = ds; + dst->cpu_port = i; + ds->cpu_port_mask |= 1 << i; + } else if (!strcmp(name, "dsa")) { + ds->dsa_port_mask |= 1 << i; + } else { + ds->enabled_port_mask |= 1 << i; + } + valid_name_found = true; + } + + if (!valid_name_found && i == ds->num_ports) + return -EINVAL; + + /* Make the built-in MII bus mask match the number of ports, + * switch drivers can override this later + */ + ds->phys_mii_mask = ds->enabled_port_mask; + + /* + * If the CPU connects to this switch, set the switch tree + * tagging protocol to the preferred tagging format of this + * switch. + */ + if (dst->cpu_switch == ds) { + enum dsa_tag_protocol tag_protocol; + + tag_protocol = ops->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(dst->tag_ops)) + return PTR_ERR(dst->tag_ops); + + dst->rcv = dst->tag_ops->rcv; + } + + memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); + + /* + * Do basic register setup. + */ + ret = ops->setup(ds); + if (ret < 0) + return ret; + + ret = dsa_switch_register_notifier(ds); + if (ret) + return ret; + + if (ops->set_addr) { + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + if (ret < 0) + return ret; + } + + if (!ds->slave_mii_bus && ops->phy_read) { + ds->slave_mii_bus = devm_mdiobus_alloc(parent); + if (!ds->slave_mii_bus) + return -ENOMEM; + dsa_slave_mii_bus_init(ds); + + ret = mdiobus_register(ds->slave_mii_bus); + if (ret < 0) + return ret; + } + + /* + * Create network devices for physical switch ports. + */ + for (i = 0; i < ds->num_ports; i++) { + ds->ports[i].dn = cd->port_dn[i]; + + if (!(ds->enabled_port_mask & (1 << i))) + continue; + + ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); + if (ret < 0) + netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n", + index, i, cd->port_names[i], ret); + } + + /* Perform configuration of the CPU and DSA ports */ + ret = dsa_cpu_dsa_setups(ds, parent); + if (ret < 0) + netdev_err(dst->master_netdev, "[%d] : can't configure CPU and DSA ports\n", + index); + + ret = dsa_cpu_port_ethtool_setup(ds); + if (ret) + return ret; + + return 0; +} + +static struct dsa_switch * +dsa_switch_setup(struct dsa_switch_tree *dst, int index, + struct device *parent, struct device *host_dev) +{ + struct dsa_chip_data *cd = dst->pd->chip + index; + const struct dsa_switch_ops *ops; + struct dsa_switch *ds; + int ret; + const char *name; + void *priv; + + /* + * Probe for switch model. + */ + ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); + if (!ops) { + netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", + index); + return ERR_PTR(-EINVAL); + } + netdev_info(dst->master_netdev, "[%d]: detected a %s switch\n", + index, name); + + + /* + * Allocate and initialise switch state. + */ + ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); + if (!ds) + return ERR_PTR(-ENOMEM); + + ds->dst = dst; + ds->index = index; + ds->cd = cd; + ds->ops = ops; + ds->priv = priv; + + ret = dsa_switch_setup_one(ds, parent); + if (ret) + return ERR_PTR(ret); + + return ds; +} + +static void dsa_switch_destroy(struct dsa_switch *ds) +{ + int port; + + /* Destroy network devices for physical switch ports. */ + for (port = 0; port < ds->num_ports; port++) { + if (!(ds->enabled_port_mask & (1 << port))) + continue; + + if (!ds->ports[port].netdev) + continue; + + dsa_slave_destroy(ds->ports[port].netdev); + } + + /* Disable configuration of the CPU and DSA ports */ + for (port = 0; port < ds->num_ports; port++) { + if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) + continue; + dsa_cpu_dsa_destroy(&ds->ports[port]); + + /* Clearing a bit which is not set does no harm */ + ds->cpu_port_mask |= ~(1 << port); + ds->dsa_port_mask |= ~(1 << port); + } + + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_unregister(ds->slave_mii_bus); + + dsa_switch_unregister_notifier(ds); +} + +/* platform driver init and cleanup *****************************************/ +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "mdio_bus"); + if (d != NULL) { + struct mii_bus *bus; + + bus = to_mii_bus(d); + put_device(d); + + return bus; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); + +#ifdef CONFIG_OF +static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, int port_index, + struct device_node *link) +{ + const __be32 *reg; + int link_sw_addr; + struct device_node *parent_sw; + int len; + + parent_sw = of_get_parent(link); + if (!parent_sw) + return -EINVAL; + + reg = of_get_property(parent_sw, "reg", &len); + if (!reg || (len != sizeof(*reg) * 2)) + return -EINVAL; + + /* + * Get the destination switch number from the second field of its 'reg' + * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. + */ + link_sw_addr = be32_to_cpup(reg + 1); + + if (link_sw_addr >= pd->nr_chips) + return -EINVAL; + + cd->rtable[link_sw_addr] = port_index; + + return 0; +} + +static int dsa_of_probe_links(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, int port_index, + struct device_node *port, + const char *port_name) +{ + struct device_node *link; + int link_index; + int ret; + + for (link_index = 0;; link_index++) { + link = of_parse_phandle(port, "link", link_index); + if (!link) + break; + + if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { + ret = dsa_of_setup_routing_table(pd, cd, chip_index, + port_index, link); + if (ret) + return ret; + } + } + return 0; +} + +static void dsa_of_free_platform_data(struct dsa_platform_data *pd) +{ + int i; + int port_index; + + for (i = 0; i < pd->nr_chips; i++) { + port_index = 0; + while (port_index < DSA_MAX_PORTS) { + kfree(pd->chip[i].port_names[port_index]); + port_index++; + } + + /* Drop our reference to the MDIO bus device */ + if (pd->chip[i].host_dev) + put_device(pd->chip[i].host_dev); + } + kfree(pd->chip); +} + +static int dsa_of_probe(struct device *dev) +{ + struct device_node *np = dev->of_node; + struct device_node *child, *mdio, *ethernet, *port; + struct mii_bus *mdio_bus, *mdio_bus_switch; + struct net_device *ethernet_dev; + struct dsa_platform_data *pd; + struct dsa_chip_data *cd; + const char *port_name; + int chip_index, port_index; + const unsigned int *sw_addr, *port_reg; + u32 eeprom_len; + int ret; + + mdio = of_parse_phandle(np, "dsa,mii-bus", 0); + if (!mdio) + return -EINVAL; + + mdio_bus = of_mdio_find_bus(mdio); + if (!mdio_bus) + return -EPROBE_DEFER; + + ethernet = of_parse_phandle(np, "dsa,ethernet", 0); + if (!ethernet) { + ret = -EINVAL; + goto out_put_mdio; + } + + ethernet_dev = of_find_net_device_by_node(ethernet); + if (!ethernet_dev) { + ret = -EPROBE_DEFER; + goto out_put_mdio; + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) { + ret = -ENOMEM; + goto out_put_ethernet; + } + + dev->platform_data = pd; + pd->of_netdev = ethernet_dev; + pd->nr_chips = of_get_available_child_count(np); + if (pd->nr_chips > DSA_MAX_SWITCHES) + pd->nr_chips = DSA_MAX_SWITCHES; + + pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data), + GFP_KERNEL); + if (!pd->chip) { + ret = -ENOMEM; + goto out_free; + } + + chip_index = -1; + for_each_available_child_of_node(np, child) { + int i; + + chip_index++; + cd = &pd->chip[chip_index]; + + cd->of_node = child; + + /* Initialize the routing table */ + for (i = 0; i < DSA_MAX_SWITCHES; ++i) + cd->rtable[i] = DSA_RTABLE_NONE; + + /* When assigning the host device, increment its refcount */ + cd->host_dev = get_device(&mdio_bus->dev); + + sw_addr = of_get_property(child, "reg", NULL); + if (!sw_addr) + continue; + + cd->sw_addr = be32_to_cpup(sw_addr); + if (cd->sw_addr >= PHY_MAX_ADDR) + continue; + + if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) + cd->eeprom_len = eeprom_len; + + mdio = of_parse_phandle(child, "mii-bus", 0); + if (mdio) { + mdio_bus_switch = of_mdio_find_bus(mdio); + if (!mdio_bus_switch) { + ret = -EPROBE_DEFER; + goto out_free_chip; + } + + /* Drop the mdio_bus device ref, replacing the host + * device with the mdio_bus_switch device, keeping + * the refcount from of_mdio_find_bus() above. + */ + put_device(cd->host_dev); + cd->host_dev = &mdio_bus_switch->dev; + } + + for_each_available_child_of_node(child, port) { + port_reg = of_get_property(port, "reg", NULL); + if (!port_reg) + continue; + + port_index = be32_to_cpup(port_reg); + if (port_index >= DSA_MAX_PORTS) + break; + + port_name = of_get_property(port, "label", NULL); + if (!port_name) + continue; + + cd->port_dn[port_index] = port; + + cd->port_names[port_index] = kstrdup(port_name, + GFP_KERNEL); + if (!cd->port_names[port_index]) { + ret = -ENOMEM; + goto out_free_chip; + } + + ret = dsa_of_probe_links(pd, cd, chip_index, + port_index, port, port_name); + if (ret) + goto out_free_chip; + + } + } + + /* The individual chips hold their own refcount on the mdio bus, + * so drop ours */ + put_device(&mdio_bus->dev); + + return 0; + +out_free_chip: + dsa_of_free_platform_data(pd); +out_free: + kfree(pd); + dev->platform_data = NULL; +out_put_ethernet: + put_device(ðernet_dev->dev); +out_put_mdio: + put_device(&mdio_bus->dev); + return ret; +} + +static void dsa_of_remove(struct device *dev) +{ + struct dsa_platform_data *pd = dev->platform_data; + + if (!dev->of_node) + return; + + dsa_of_free_platform_data(pd); + put_device(&pd->of_netdev->dev); + kfree(pd); +} +#else +static inline int dsa_of_probe(struct device *dev) +{ + return 0; +} + +static inline void dsa_of_remove(struct device *dev) +{ +} +#endif + +static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, + struct device *parent, struct dsa_platform_data *pd) +{ + int i; + unsigned configured = 0; + + dst->pd = pd; + dst->master_netdev = dev; + dst->cpu_port = -1; + + for (i = 0; i < pd->nr_chips; i++) { + struct dsa_switch *ds; + + ds = dsa_switch_setup(dst, i, parent, pd->chip[i].host_dev); + if (IS_ERR(ds)) { + netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", + i, PTR_ERR(ds)); + continue; + } + + dst->ds[i] = ds; + + ++configured; + } + + /* + * If no switch was found, exit cleanly + */ + if (!configured) + return -EPROBE_DEFER; + + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + dev->dsa_ptr = (void *)dst; + + return 0; +} + +static int dsa_probe(struct platform_device *pdev) +{ + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct dsa_switch_tree *dst; + int ret; + + if (pdev->dev.of_node) { + ret = dsa_of_probe(&pdev->dev); + if (ret) + return ret; + + pd = pdev->dev.platform_data; + } + + if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) + return -EINVAL; + + if (pd->of_netdev) { + dev = pd->of_netdev; + dev_hold(dev); + } else { + dev = dsa_dev_to_net_device(pd->netdev); + } + if (dev == NULL) { + ret = -EPROBE_DEFER; + goto out; + } + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + ret = -EEXIST; + goto out; + } + + dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); + if (dst == NULL) { + dev_put(dev); + ret = -ENOMEM; + goto out; + } + + platform_set_drvdata(pdev, dst); + + ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); + if (ret) { + dev_put(dev); + goto out; + } + + return 0; + +out: + dsa_of_remove(&pdev->dev); + + return ret; +} + +static void dsa_remove_dst(struct dsa_switch_tree *dst) +{ + int i; + + dst->master_netdev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); + + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds) + dsa_switch_destroy(ds); + } + + dsa_cpu_port_ethtool_restore(dst->cpu_switch); + + dev_put(dst->master_netdev); +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + + dsa_remove_dst(dst); + dsa_of_remove(&pdev->dev); + + return 0; +} + +static void dsa_shutdown(struct platform_device *pdev) +{ +} + +#ifdef CONFIG_PM_SLEEP +static int dsa_suspend(struct device *d) +{ + struct platform_device *pdev = to_platform_device(d); + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + int i, ret = 0; + + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds != NULL) + ret = dsa_switch_suspend(ds); + } + + return ret; +} + +static int dsa_resume(struct device *d) +{ + struct platform_device *pdev = to_platform_device(d); + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + int i, ret = 0; + + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds != NULL) + ret = dsa_switch_resume(ds); + } + + return ret; +} +#endif + +static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); + +static const struct of_device_id dsa_of_match_table[] = { + { .compatible = "marvell,dsa", }, + {} +}; +MODULE_DEVICE_TABLE(of, dsa_of_match_table); + +static struct platform_driver dsa_driver = { + .probe = dsa_probe, + .remove = dsa_remove, + .shutdown = dsa_shutdown, + .driver = { + .name = "dsa", + .of_match_table = dsa_of_match_table, + .pm = &dsa_pm_ops, + }, +}; + +int dsa_legacy_register(void) +{ + return platform_driver_register(&dsa_driver); +} + +void dsa_legacy_unregister(void) +{ + platform_driver_unregister(&dsa_driver); +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c34872e1febc4b75d1b69b18a8a1189405ca30fa..7693182df81e61d14540cd43be3ae7e134eef576 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -419,8 +420,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, return 0; } -static int dsa_fastest_ageing_time(struct dsa_switch *ds, - unsigned int ageing_time) +static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds, + unsigned int ageing_time) { int i; @@ -443,9 +444,13 @@ static int dsa_slave_ageing_time(struct net_device *dev, unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time); unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_prepare(trans)) { + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; return 0; + } /* Keep the fastest ageing time in case of multiple bridges */ p->dp->ageing_time = ageing_time; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 6456dacf9ae9e0b88585defc026179423a80e4e4..ca6e26e514f089cfa8991ef6a7bd790bd8db14fe 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -1,7 +1,8 @@ /* * Handling of a single switch chip, part of a switch fabric * - * Copyright (c) 2017 Vivien Didelot + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, if (ds->index == info->sw_index && ds->ops->port_bridge_join) return ds->ops->port_bridge_join(ds, info->port, info->br); - if (ds->index != info->sw_index) - dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n", - info->sw_index, info->port, netdev_name(info->br)); + if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join) + return ds->ops->crosschip_bridge_join(ds, info->sw_index, + info->port, info->br); return 0; } @@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, if (ds->index == info->sw_index && ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, info->port, info->br); - if (ds->index != info->sw_index) - dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n", - info->sw_index, info->port, netdev_name(info->br)); + if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, + info->br); return 0; } diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 5d925b6b2bb14f78f84a06b84b4fa19bd6846e82..2a9b52c5af86b5308d7d71a3340a0e48768bdb60 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "dsa_priv.h" /* This tag length is 4 bytes, older ones were 6 bytes, we do not @@ -91,23 +92,17 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev return NULL; } -static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; int source_port; u8 *brcm_tag; - if (unlikely(dst == NULL)) - goto out_drop; - ds = dst->cpu_switch; - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) goto out_drop; @@ -139,22 +134,12 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; skb->dev = ds->ports[source_port].netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops brcm_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 72579ceea381b7e2bce99a28208810b707434f09..1c6633f0de01909f950a03349b1b4c08c2151839 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "dsa_priv.h" #define DSA_HLEN 4 @@ -67,8 +68,9 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) return NULL; } -static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -76,13 +78,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_device; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) goto out_drop; @@ -164,21 +159,11 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, } skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops dsa_netdev_ops = { diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 648c051817a1b4a4e64cda67bab8c81288027a4e..d9c668aa5e54682914a0e61b3cb6373a71a0ee8b 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "dsa_priv.h" #define DSA_HLEN 4 @@ -80,8 +81,9 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) return NULL; } -static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -89,13 +91,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_device; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) goto out_drop; @@ -183,21 +178,11 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, } skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops edsa_netdev_ops = { diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c new file mode 100644 index 0000000000000000000000000000000000000000..70130ed5c21a9fa5e218e51ab4448c35c9005eba --- /dev/null +++ b/net/dsa/tag_lan9303.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2017 Pengutronix, Juergen Borleis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include +#include +#include +#include +#include "dsa_priv.h" + +/* To define the outgoing port and to discover the incoming port a regular + * VLAN tag is used by the LAN9303. But its VID meaning is 'special': + * + * Dest MAC Src MAC TAG Type + * ...| 1 2 3 4 5 6 | 1 2 3 4 5 6 | 1 2 3 4 | 1 2 |... + * |<------->| + * TAG: + * |<------------->| + * | 1 2 | 3 4 | + * TPID VID + * 0x8100 + * + * VID bit 3 indicates a request for an ALR lookup. + * + * If VID bit 3 is zero, then bits 0 and 1 specify the destination port + * (0, 1, 2) or broadcast (3) or the source port (1, 2). + * + * VID bit 4 is used to specify if the STP port state should be overridden. + * Required when no forwarding between the external ports should happen. + */ + +#define LAN9303_TAG_LEN 4 +#define LAN9303_MAX_PORTS 3 + +static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u16 *lan9303_tag; + + /* insert a special VLAN tag between the MAC addresses + * and the current ethertype field. + */ + if (skb_cow_head(skb, LAN9303_TAG_LEN) < 0) { + dev_dbg(&dev->dev, + "Cannot make room for the special tag. Dropping packet\n"); + goto out_free; + } + + /* provide 'LAN9303_TAG_LEN' bytes additional space */ + skb_push(skb, LAN9303_TAG_LEN); + + /* make room between MACs and Ether-Type */ + memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN); + + lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); + lan9303_tag[0] = htons(ETH_P_8021Q); + lan9303_tag[1] = htons(p->dp->index | BIT(4)); + + return skb; +out_free: + kfree_skb(skb); + return NULL; +} + +static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + u16 *lan9303_tag; + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + unsigned int source_port; + + ds = dst->ds[0]; + + if (unlikely(!ds)) { + dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n"); + return NULL; + } + + if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { + dev_warn_ratelimited(&dev->dev, + "Dropping packet, cannot pull\n"); + return NULL; + } + + /* '->data' points into the middle of our special VLAN tag information: + * + * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type + * ^ + * ->data + */ + lan9303_tag = (u16 *)(skb->data - 2); + + if (lan9303_tag[0] != htons(ETH_P_8021Q)) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n"); + return NULL; + } + + source_port = ntohs(lan9303_tag[1]) & 0x3; + + if (source_port >= LAN9303_MAX_PORTS) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); + return NULL; + } + + if (!ds->ports[source_port].netdev) { + dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n"); + return NULL; + } + + /* remove the special VLAN tag between the MAC addresses + * and the current ethertype field. + */ + skb_pull_rcsum(skb, 2 + 2); + memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), + 2 * ETH_ALEN); + + /* forward the packet to the dedicated interface */ + skb->dev = ds->ports[source_port].netdev; + + return skb; +} + +const struct dsa_device_ops lan9303_netdev_ops = { + .xmit = lan9303_xmit, + .rcv = lan9303_rcv, +}; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c new file mode 100644 index 0000000000000000000000000000000000000000..837cdddb53f093c6283451f7626612312008bd0d --- /dev/null +++ b/net/dsa/tag_mtk.c @@ -0,0 +1,100 @@ +/* + * Mediatek DSA Tag support + * Copyright (C) 2017 Landen Chao + * Sean Wang + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include "dsa_priv.h" + +#define MTK_HDR_LEN 4 +#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) +#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) + +static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *mtk_tag; + + if (skb_cow_head(skb, MTK_HDR_LEN) < 0) + goto out_free; + + skb_push(skb, MTK_HDR_LEN); + + memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); + + /* Build the tag after the MAC Source Address */ + mtk_tag = skb->data + 2 * ETH_ALEN; + mtk_tag[0] = 0; + mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + mtk_tag[2] = 0; + mtk_tag[3] = 0; + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + int port; + __be16 *phdr, hdr; + + if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) + goto out_drop; + + /* The MTK header is added by the switch between src addr + * and ethertype at this point, skb->data points to 2 bytes + * after src addr so header should be 2 bytes right before. + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Remove MTK tag and recalculate checksum. */ + skb_pull_rcsum(skb, MTK_HDR_LEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - MTK_HDR_LEN, + 2 * ETH_ALEN); + + /* This protocol doesn't support cascading multiple + * switches so it's safe to assume the switch is first + * in the tree. + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + skb->dev = ds->ports[port].netdev; + + return skb; + +out_drop: + return NULL; +} + +const struct dsa_device_ops mtk_netdev_ops = { + .xmit = mtk_tag_xmit, + .rcv = mtk_tag_rcv, +}; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 30240f343aea8450b13936159b4b9a76126a8977..3ba3f59f7a3433b3731a5b3dc501eb22660d7cce 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -12,6 +12,7 @@ */ #include +#include #include "dsa_priv.h" #define QCA_HDR_LEN 2 @@ -65,8 +66,9 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) return NULL; } -static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -74,13 +76,6 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, int port; __be16 *phdr, hdr; - if (unlikely(!dst)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - goto out; - if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) goto out_drop; @@ -114,22 +109,12 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, goto out_drop; /* Update skb & forward the frame accordingly */ - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; skb->dev = ds->ports[port].netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops qca_netdev_ops = { diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 26f977176978085af9c034319c754a1ac7501d4c..aafc2fc74c3067dd05c67c409e40e8ceef33cf0c 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "dsa_priv.h" static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) @@ -57,22 +58,17 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) return nskb; } -static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; u8 *trailer; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; ds = dst->cpu_switch; - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (skb_linearize(skb)) goto out_drop; @@ -88,21 +84,11 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, pskb_trim_rcsum(skb, skb->len - 4); skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops trailer_netdev_ops = { diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index c73160fb11e7c666f8ac8ed4103b7312ff769a58..0a0a392dc2bd64b8c4202cc1361d828f9f984dd1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -378,7 +378,6 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) del_timer_sync(&hsr->announce_timer); synchronize_rcu(); - free_netdev(hsr_dev); } static const struct net_device_ops hsr_device_ops = { @@ -404,7 +403,8 @@ void hsr_dev_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &hsr_type); dev->priv_flags |= IFF_NO_QUEUE; - dev->destructor = hsr_dev_destroy; + dev->needs_free_netdev = true; + dev->priv_destructor = hsr_dev_destroy; dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 4ebe2aa3e7d3e944295e9d53890e3cb9b7a90139..04b5450c5a5572e875f7900a3676fd80259b9b4b 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -324,8 +324,7 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, unsigned long irqflags; frame->is_supervision = is_supervision_frame(port->hsr, skb); - frame->node_src = hsr_get_node(&port->hsr->node_db, skb, - frame->is_supervision); + frame->node_src = hsr_get_node(port, skb, frame->is_supervision); if (frame->node_src == NULL) return -1; /* Unknown node and !is_supervision, or no mem */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 7ea925816f79d8a0e547a0feb1257fda23097ba4..284a9b820df8db51a0dbee9737db5a8127eeaad7 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -158,9 +158,10 @@ struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], /* Get the hsr_node from which 'skb' was sent. */ -struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, +struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, bool is_sup) { + struct list_head *node_db = &port->hsr->node_db; struct hsr_node *node; struct ethhdr *ethhdr; u16 seq_out; @@ -186,7 +187,11 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, */ seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { - WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); + /* this is called also for frames from master port and + * so warn only for non master ports + */ + if (port->type != HSR_PT_MASTER) + WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); seq_out = HSR_SEQNR_START; } diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 438b40f98f5a986e50180c351d55c9dbb0b66977..4e04f0e868e95044ac27f77ca11d004d928326cd 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -18,7 +18,7 @@ struct hsr_node; struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], u16 seq_out); -struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb, +struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, bool is_sup); void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, struct hsr_port *port); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 1ab30e7d3f99e19c5e54fd9747cfce7a5c1f559b..81dac16933fc05a19224a7c9ef450d34dea1be91 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -350,7 +350,7 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) return 0; invalid: - netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: @@ -432,7 +432,7 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; invalid: - netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL); + netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; nla_put_failure: diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index d7efbf0dad20f8c4d3c42d039eff528c678bc04c..0a866f3322901e357091955bf24ac16fc5f6b94f 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -107,7 +107,7 @@ static void lowpan_setup(struct net_device *ldev) ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; - ldev->destructor = free_netdev; + ldev->needs_free_netdev = true; ldev->features |= NETIF_F_NETNS_LOCAL; } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index fc60cd061f3966a2381803a8e4d85206770253dc..99f6c254ea777c8786421d069f8f2414d0eb34e5 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -249,8 +249,7 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, genl_family_attrbuf(&nl802154_fam), - nl802154_fam.maxattr, - nl802154_policy); + nl802154_fam.maxattr, nl802154_policy, NULL); if (err) goto out_unlock; @@ -562,8 +561,8 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct nl802154_dump_wpan_phy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - tb, nl802154_fam.maxattr, nl802154_policy); + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, + nl802154_fam.maxattr, nl802154_policy, NULL); /* TODO check if we can handle error here, * we have no backward compatibility @@ -1308,7 +1307,7 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, - nl802154_dev_addr_policy)) + nl802154_dev_addr_policy, NULL)) return -EINVAL; if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || @@ -1348,7 +1347,7 @@ ieee802154_llsec_parse_key_id(struct nlattr *nla, struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1]; if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla, - nl802154_key_id_policy)) + nl802154_key_id_policy, NULL)) return -EINVAL; if (!attrs[NL802154_KEY_ID_ATTR_MODE]) @@ -1565,7 +1564,7 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info) if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy)) + nl802154_key_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] || @@ -1615,7 +1614,7 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info) if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy)) + nl802154_key_policy, info->extack)) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) @@ -1729,8 +1728,8 @@ ieee802154_llsec_parse_device(struct nlattr *nla, { struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla, - nl802154_dev_policy)) + if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, + nla, nl802154_dev_policy, NULL)) return -EINVAL; memset(dev, 0, sizeof(*dev)); @@ -1783,7 +1782,7 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info) if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], - nl802154_dev_policy)) + nl802154_dev_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]) @@ -1911,7 +1910,7 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy) < 0) + nl802154_devkey_policy, info->extack) < 0) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] || @@ -1943,7 +1942,7 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy)) + nl802154_devkey_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) @@ -2063,8 +2062,8 @@ llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl) { struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla, - nl802154_seclevel_policy)) + if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, + nla, nl802154_seclevel_policy, NULL)) return -EINVAL; memset(sl, 0, sizeof(*sl)); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c6d4238ff94a8a329bf44bf59b30fb09fb07f707..f83de23a30e7e77303d011dee35eb92342adab5c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - fib_frontend.o fib_semantics.o fib_trie.o \ + fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b1fc6e4278ef4f1cba58412977918af31d73e62..58925b6597de83e7d643fb9b1c7e992c9748ae1c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1043,7 +1043,7 @@ static struct inet_protosw inetsw_array[] = .type = SOCK_DGRAM, .protocol = IPPROTO_ICMP, .prot = &ping_prot, - .ops = &inet_dgram_ops, + .ops = &inet_sockraw_ops, .flags = INET_PROTOSW_REUSE, }, @@ -1343,6 +1343,9 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (*(u8 *)iph != 0x45) goto out_unlock; + if (ip_is_fragment(iph)) + goto out_unlock; + if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out_unlock; @@ -1599,8 +1602,9 @@ static const struct net_protocol igmp_protocol = { }; #endif -static const struct net_protocol tcp_protocol = { +static struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, + .early_demux_handler = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, @@ -1608,8 +1612,9 @@ static const struct net_protocol tcp_protocol = { .icmp_strict_tag_validation = 1, }; -static const struct net_protocol udp_protocol = { +static struct net_protocol udp_protocol = { .early_demux = udp_v4_early_demux, + .early_demux_handler = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, @@ -1720,6 +1725,8 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; + net->ipv4.sysctl_udp_early_demux = 1; + net->ipv4.sysctl_tcp_early_demux = 1; #ifdef CONFIG_SYSCTL net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 51b27ae09fbd725bcd8030982e5850215ac4ce5c..e9f3386a528b4a792839d5e50894a68d3097eb42 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -641,6 +641,32 @@ void arp_xmit(struct sk_buff *skb) } EXPORT_SYMBOL(arp_xmit); +static bool arp_is_garp(struct net *net, struct net_device *dev, + int *addr_type, __be16 ar_op, + __be32 sip, __be32 tip, + unsigned char *sha, unsigned char *tha) +{ + bool is_garp = tip == sip; + + /* Gratuitous ARP _replies_ also require target hwaddr to be + * the same as source. + */ + if (is_garp && ar_op == htons(ARPOP_REPLY)) + is_garp = + /* IPv4 over IEEE 1394 doesn't provide target + * hardware address field in its ARP payload. + */ + tha && + !memcmp(tha, sha, dev->addr_len); + + if (is_garp) { + *addr_type = inet_addr_type_dev_table(net, dev, sip); + if (*addr_type != RTN_UNICAST) + is_garp = false; + } + return is_garp; +} + /* * Process an arp request. */ @@ -653,6 +679,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha; + unsigned char *tha = NULL; __be32 sip, tip; u16 dev_type = dev->type; int addr_type; @@ -724,6 +751,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) break; #endif default: + tha = arp_ptr; arp_ptr += dev->addr_len; } memcpy(&tip, arp_ptr, 4); @@ -835,19 +863,25 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IN_DEV_ARP_ACCEPT(in_dev)) { - unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip); + addr_type = -1; + if (n || IN_DEV_ARP_ACCEPT(in_dev)) { + is_garp = arp_is_garp(net, dev, &addr_type, arp->ar_op, + sip, tip, sha, tha); + } + if (IN_DEV_ARP_ACCEPT(in_dev)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && - addr_type == RTN_UNICAST; - if (!n && - ((arp->ar_op == htons(ARPOP_REPLY) && - addr_type == RTN_UNICAST) || is_garp)) + (is_garp || + (arp->ar_op == htons(ARPOP_REPLY) && + (addr_type == RTN_UNICAST || + (addr_type < 0 && + /* postpone calculation to as late as possible */ + inet_addr_type_dev_table(net, dev, sip) == + RTN_UNICAST))))) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -872,7 +906,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) skb->pkt_type != PACKET_HOST) state = NUD_STALE; neigh_update(n, sha, state, - override ? NEIGH_UPDATE_F_OVERRIDE : 0); + override ? NEIGH_UPDATE_F_OVERRIDE : 0, 0); neigh_release(n); } @@ -1033,7 +1067,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? r->arp_ha.sa_data : NULL, state, NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); } return err; @@ -1084,7 +1118,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip) if (neigh->nud_state & ~NUD_NOARP) err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE| - NEIGH_UPDATE_F_ADMIN); + NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cebedd545e5e2863afcfe116309725e2cd57206c..df14815a3b8ce74aeb613458ffaf5f6eee4a263d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -571,7 +571,8 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) return ret; } -static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -582,7 +583,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, + extack); if (err < 0) goto errout; @@ -752,7 +754,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, struct in_device *in_dev; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, + NULL); if (err < 0) goto errout; @@ -843,7 +846,8 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) return NULL; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; @@ -1192,6 +1196,18 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) return done; } +static __be32 in_dev_select_addr(const struct in_device *in_dev, + int scope) +{ + for_primary_ifa(in_dev) { + if (ifa->ifa_scope != RT_SCOPE_LINK && + ifa->ifa_scope <= scope) + return ifa->ifa_local; + } endfor_ifa(in_dev); + + return 0; +} + __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { __be32 addr = 0; @@ -1228,13 +1244,9 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) if (master_idx && (dev = dev_get_by_index_rcu(net, master_idx)) && (in_dev = __in_dev_get_rcu(dev))) { - for_primary_ifa(in_dev) { - if (ifa->ifa_scope != RT_SCOPE_LINK && - ifa->ifa_scope <= scope) { - addr = ifa->ifa_local; - goto out_unlock; - } - } endfor_ifa(in_dev); + addr = in_dev_select_addr(in_dev, scope); + if (addr) + goto out_unlock; } /* Not loopback addresses on loopback should be preferred @@ -1249,13 +1261,9 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) if (!in_dev) continue; - for_primary_ifa(in_dev) { - if (ifa->ifa_scope != RT_SCOPE_LINK && - ifa->ifa_scope <= scope) { - addr = ifa->ifa_local; - goto out_unlock; - } - } endfor_ifa(in_dev); + addr = in_dev_select_addr(in_dev, scope); + if (addr) + goto out_unlock; } out_unlock: rcu_read_unlock(); @@ -1713,7 +1721,7 @@ static int inet_validate_link_af(const struct net_device *dev, if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT; - err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); + err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); if (err < 0) return err; @@ -1741,7 +1749,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!in_dev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) + if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET_CONF]) { @@ -1798,6 +1806,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; + if (!devconf) + goto out; + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, IPV4_DEVCONF(*devconf, FORWARDING)) < 0) @@ -1819,6 +1830,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) goto nla_put_failure; +out: nlmsg_end(skb, nlh); return 0; @@ -1827,8 +1839,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, return -EMSGSIZE; } -void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf) +void inet_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv4_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; @@ -1838,7 +1850,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, - RTM_NEWNETCONF, 0, type); + event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -1861,7 +1873,8 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -1874,7 +1887,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv4_policy); + devconf_ipv4_policy, extack); if (err < 0) goto errout; @@ -2017,10 +2030,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); @@ -2033,7 +2048,8 @@ static void inet_forward_change(struct net *net) in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, dev->ifindex, &in_dev->cnf); } } @@ -2078,19 +2094,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, if (i == IPV4_DEVCONF_RP_FILTER - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_RP_FILTER, ifindex, cnf); } if (i == IPV4_DEVCONF_PROXY_ARP - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, ifindex, cnf); } if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, ifindex, cnf); } } @@ -2125,7 +2144,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, container_of(cnf, struct in_device, cnf); if (*valp) dev_disable_lro(idev->dev); - inet_netconf_notify_devconf(net, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, idev->dev->ifindex, cnf); @@ -2133,7 +2152,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, rtnl_unlock(); rt_cache_flush(net); } else - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); } @@ -2255,7 +2275,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, p->sysctl = t; - inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, + ifindex, p); return 0; free: @@ -2264,16 +2285,18 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, return -ENOBUFS; } -static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) +static void __devinet_sysctl_unregister(struct net *net, + struct ipv4_devconf *cnf, int ifindex) { struct devinet_sysctl_table *t = cnf->sysctl; - if (!t) - return; + if (t) { + cnf->sysctl = NULL; + unregister_net_sysctl_table(t->sysctl_header); + kfree(t); + } - cnf->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int devinet_sysctl_register(struct in_device *idev) @@ -2295,7 +2318,9 @@ static int devinet_sysctl_register(struct in_device *idev) static void devinet_sysctl_unregister(struct in_device *idev) { - __devinet_sysctl_unregister(&idev->cnf); + struct net *net = dev_net(idev->dev); + + __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex); neigh_sysctl_unregister(idev->arp_parms); } @@ -2370,9 +2395,9 @@ static __net_init int devinet_init_net(struct net *net) #ifdef CONFIG_SYSCTL err_reg_ctl: - __devinet_sysctl_unregister(dflt); + __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT); err_reg_dflt: - __devinet_sysctl_unregister(all); + __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: if (tbl != ctl_forward_entry) kfree(tbl); @@ -2394,8 +2419,10 @@ static __net_exit void devinet_exit_net(struct net *net) tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); - __devinet_sysctl_unregister(net->ipv4.devconf_dflt); - __devinet_sysctl_unregister(net->ipv4.devconf_all); + __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, + NETCONFA_IFINDEX_DEFAULT); + __devinet_sysctl_unregister(net, net->ipv4.devconf_all, + NETCONFA_IFINDEX_ALL); kfree(tbl); #endif kfree(net->ipv4.devconf_dflt); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b1e24446e2972444835dd85c02434f3b089ba552..93322f895eab6136adbe22aa37a4eedc9687a0e4 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -152,21 +152,28 @@ static void esp_output_restore_header(struct sk_buff *skb) } static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, + struct xfrm_state *x, struct ip_esp_hdr *esph, struct esp_output_extra *extra) { - struct xfrm_state *x = skb_dst(skb)->xfrm; - /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + __u32 seqhi; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo) + seqhi = xo->seq.hi; + else + seqhi = XFRM_SKB_CB(skb)->seq.output.hi; + extra->esphoff = (unsigned char *)esph - skb_transport_header(skb); esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); extra->seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; @@ -198,98 +205,57 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static int esp_output(struct xfrm_state *x, struct sk_buff *skb) +static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { - struct esp_output_extra *extra; - int err = -ENOMEM; - struct ip_esp_hdr *esph; - struct crypto_aead *aead; - struct aead_request *req; - struct scatterlist *sg, *dsg; - struct sk_buff *trailer; - struct page *page; - void *tmp; - u8 *iv; - u8 *tail; - u8 *vaddr; - int blksize; - int clen; - int alen; - int plen; - int ivlen; - int tfclen; - int nfrags; - int assoclen; - int extralen; - int tailen; - __be64 seqno; - __u8 proto = *skb_mac_header(skb); - - /* skb is pure payload to encrypt */ - - aead = x->data; - alen = crypto_aead_authsize(aead); - ivlen = crypto_aead_ivsize(aead); - - tfclen = 0; - if (x->tfcpad) { - struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); - u32 padto; - - padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached)); - if (skb->len < padto) - tfclen = padto - skb->len; + int encap_type; + struct udphdr *uh; + __be32 *udpdata32; + __be16 sport, dport; + struct xfrm_encap_tmpl *encap = x->encap; + struct ip_esp_hdr *esph = esp->esph; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); + + uh = (struct udphdr *)esph; + uh->source = sport; + uh->dest = dport; + uh->len = htons(skb->len + esp->tailen + - skb_transport_offset(skb)); + uh->check = 0; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + esph = (struct ip_esp_hdr *)(uh + 1); + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + udpdata32 = (__be32 *)(uh + 1); + udpdata32[0] = udpdata32[1] = 0; + esph = (struct ip_esp_hdr *)(udpdata32 + 2); + break; } - blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(skb->len + 2 + tfclen, blksize); - plen = clen - skb->len - tfclen; - tailen = tfclen + plen + alen; - assoclen = sizeof(*esph); - extralen = 0; - if (x->props.flags & XFRM_STATE_ESN) { - extralen += sizeof(*extra); - assoclen += sizeof(__be32); - } + *skb_mac_header(skb) = IPPROTO_UDP; + esp->esph = esph; +} - *skb_mac_header(skb) = IPPROTO_ESP; - esph = ip_esp_hdr(skb); +int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +{ + u8 *tail; + u8 *vaddr; + int nfrags; + int esph_offset; + struct page *page; + struct sk_buff *trailer; + int tailen = esp->tailen; /* this is non-NULL only with UDP Encapsulation */ - if (x->encap) { - struct xfrm_encap_tmpl *encap = x->encap; - struct udphdr *uh; - __be32 *udpdata32; - __be16 sport, dport; - int encap_type; - - spin_lock_bh(&x->lock); - sport = encap->encap_sport; - dport = encap->encap_dport; - encap_type = encap->encap_type; - spin_unlock_bh(&x->lock); - - uh = (struct udphdr *)esph; - uh->source = sport; - uh->dest = dport; - uh->len = htons(skb->len + tailen - - skb_transport_offset(skb)); - uh->check = 0; - - switch (encap_type) { - default: - case UDP_ENCAP_ESPINUDP: - esph = (struct ip_esp_hdr *)(uh + 1); - break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - udpdata32 = (__be32 *)(uh + 1); - udpdata32[0] = udpdata32[1] = 0; - esph = (struct ip_esp_hdr *)(udpdata32 + 2); - break; - } - - *skb_mac_header(skb) = IPPROTO_UDP; - } + if (x->encap) + esp_output_udp_encap(x, skb, esp); if (!skb_cloned(skb)) { if (tailen <= skb_availroom(skb)) { @@ -304,6 +270,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct sock *sk = skb->sk; struct page_frag *pfrag = &x->xfrag; + esp->inplace = false; + allocsize = ALIGN(tailen, L1_CACHE_BYTES); spin_lock_bh(&x->lock); @@ -320,10 +288,12 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) tail = vaddr + pfrag->offset; - esp_output_fill_trailer(tail, tfclen, plen, proto); + esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); kunmap_atomic(vaddr); + spin_unlock_bh(&x->lock); + nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, @@ -339,107 +309,115 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) if (sk) atomic_add(tailen, &sk->sk_wmem_alloc); - skb_push(skb, -skb_network_offset(skb)); - - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - esph->spi = x->id.spi; - - tmp = esp_alloc_tmp(aead, nfrags + 2, extralen); - if (!tmp) { - spin_unlock_bh(&x->lock); - err = -ENOMEM; - goto error; - } - - extra = esp_tmp_extra(tmp); - iv = esp_tmp_iv(aead, tmp, extralen); - req = esp_tmp_req(aead, iv); - sg = esp_req_sg(aead, req); - dsg = &sg[nfrags]; - - esph = esp_output_set_extra(skb, esph, extra); - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); + goto out; + } + } - allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); +cow: + esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); - if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { - spin_unlock_bh(&x->lock); - err = -ENOMEM; - goto error; - } + nfrags = skb_cow_data(skb, tailen, &trailer); + if (nfrags < 0) + goto out; + tail = skb_tail_pointer(trailer); + esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); - skb_shinfo(skb)->nr_frags = 1; +skip_cow: + esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); + pskb_put(skb, trailer, tailen); - page = pfrag->page; - get_page(page); - /* replace page frags in skb with new page */ - __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); - pfrag->offset = pfrag->offset + allocsize; +out: + return nfrags; +} +EXPORT_SYMBOL_GPL(esp_output_head); - sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); - skb_to_sgvec(skb, dsg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); +int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +{ + u8 *iv; + int alen; + void *tmp; + int ivlen; + int assoclen; + int extralen; + struct page *page; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct aead_request *req; + struct scatterlist *sg, *dsg; + struct esp_output_extra *extra; + int err = -ENOMEM; - spin_unlock_bh(&x->lock); + assoclen = sizeof(struct ip_esp_hdr); + extralen = 0; - goto skip_cow2; - } + if (x->props.flags & XFRM_STATE_ESN) { + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } -cow: - err = skb_cow_data(skb, tailen, &trailer); - if (err < 0) - goto error; - nfrags = err; - tail = skb_tail_pointer(trailer); - esph = ip_esp_hdr(skb); - -skip_cow: - esp_output_fill_trailer(tail, tfclen, plen, proto); - - pskb_put(skb, trailer, clen - skb->len + alen); - skb_push(skb, -skb_network_offset(skb)); - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - esph->spi = x->id.spi; + aead = x->data; + alen = crypto_aead_authsize(aead); + ivlen = crypto_aead_ivsize(aead); - tmp = esp_alloc_tmp(aead, nfrags, extralen); - if (!tmp) { - err = -ENOMEM; + tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen); + if (!tmp) goto error; - } extra = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); - dsg = sg; - esph = esp_output_set_extra(skb, esph, extra); + if (esp->inplace) + dsg = sg; + else + dsg = &sg[esp->nfrags]; - sg_init_table(sg, nfrags); + esph = esp_output_set_extra(skb, x, esp->esph, extra); + esp->esph = esph; + + sg_init_table(sg, esp->nfrags); skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); + assoclen + ivlen + esp->clen + alen); + + if (!esp->inplace) { + int allocsize; + struct page_frag *pfrag = &x->xfrag; + + allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); + + spin_lock_bh(&x->lock); + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + goto error; + } + + skb_shinfo(skb)->nr_frags = 1; + + page = pfrag->page; + get_page(page); + /* replace page frags in skb with new page */ + __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); + pfrag->offset = pfrag->offset + allocsize; + spin_unlock_bh(&x->lock); + + sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); + skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + } -skip_cow2: if ((x->props.flags & XFRM_STATE_ESN)) aead_request_set_callback(req, 0, esp_output_done_esn, skb); else aead_request_set_callback(req, 0, esp_output_done, skb); - aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); + aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv); aead_request_set_ad(req, assoclen); - seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + - ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); - memset(iv, 0, ivlen); - memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), + memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8), min(ivlen, 8)); ESP_SKB_CB(skb)->tmp = tmp; @@ -465,11 +443,63 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) error: return err; } +EXPORT_SYMBOL_GPL(esp_output_tail); -static int esp_input_done2(struct sk_buff *skb, int err) +static int esp_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int alen; + int blksize; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct esp_info esp; + + esp.inplace = true; + + esp.proto = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + /* skb is pure payload to encrypt */ + + aead = x->data; + alen = crypto_aead_authsize(aead); + + esp.tfclen = 0; + if (x->tfcpad) { + struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); + u32 padto; + + padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached)); + if (skb->len < padto) + esp.tfclen = padto - skb->len; + } + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); + esp.plen = esp.clen - skb->len - esp.tfclen; + esp.tailen = esp.tfclen + esp.plen + alen; + + esp.esph = ip_esp_hdr(skb); + + esp.nfrags = esp_output_head(x, skb, &esp); + if (esp.nfrags < 0) + return esp.nfrags; + + esph = esp.esph; + esph->spi = x->id.spi; + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); + + skb_push(skb, -skb_network_offset(skb)); + + return esp_output_tail(x, skb, &esp); +} + +int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -478,7 +508,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) u8 nexthdr[2]; int padlen; - kfree(ESP_SKB_CB(skb)->tmp); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; @@ -549,6 +580,7 @@ static int esp_input_done2(struct sk_buff *skb, int err) out: return err; } +EXPORT_SYMBOL_GPL(esp_input_done2); static void esp_input_done(struct crypto_async_request *base, int err) { @@ -751,13 +783,17 @@ static int esp_init_aead(struct xfrm_state *x) char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; + u32 mask = 0; err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) goto error; - aead = crypto_alloc_aead(aead_name, 0, 0); + if (x->xso.offload_handle) + mask |= CRYPTO_ALG_ASYNC; + + aead = crypto_alloc_aead(aead_name, 0, mask); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -787,6 +823,7 @@ static int esp_init_authenc(struct xfrm_state *x) char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; + u32 mask = 0; err = -EINVAL; if (!x->ealg) @@ -812,7 +849,10 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; } - aead = crypto_alloc_aead(authenc_name, 0, 0); + if (x->xso.offload_handle) + mask |= CRYPTO_ALG_ASYNC; + + aead = crypto_alloc_aead(authenc_name, 0, mask); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -931,7 +971,7 @@ static const struct xfrm_type esp_type = .destructor = esp_destroy, .get_mtu = esp4_get_mtu, .input = esp_input, - .output = esp_output + .output = esp_output, }; static struct xfrm4_protocol esp4_protocol = { diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 1de4426324064ff991399e7ef0f0b98c68386a5c..e0666016a7642c017b4693d32723245adc484982 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -43,27 +43,31 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head, if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; - err = secpath_set(skb); - if (err) - goto out; + xo = xfrm_offload(skb); + if (!xo || !(xo->flags & CRYPTO_DONE)) { + err = secpath_set(skb); + if (err) + goto out; - if (skb->sp->len == XFRM_MAX_DEPTH) - goto out; + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, - (xfrm_address_t *)&ip_hdr(skb)->daddr, - spi, IPPROTO_ESP, AF_INET); - if (!x) - goto out; + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ip_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET); + if (!x) + goto out; - skb->sp->xvec[skb->sp->len++] = x; - skb->sp->olen++; + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; - xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); - goto out; + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } } + xo->flags |= XFRM_GRO; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; @@ -84,19 +88,214 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head, return NULL; } +static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct iphdr *iph = ip_hdr(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + int proto = iph->protocol; + + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + esph->spi = x->id.spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + + xo->proto = proto; +} + +static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + __u32 seq; + int err = 0; + struct sk_buff *skb2; + struct xfrm_state *x; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t esp_features = features; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (!xo) + goto out; + + seq = xo->seq.low; + + x = skb->sp->xvec[skb->sp->len - 1]; + aead = x->data; + esph = ip_esp_hdr(skb); + + if (esph->spi != x->id.spi) + goto out; + + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + goto out; + + __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); + + skb->encap_hdr_csum = 1; + + if (!(features & NETIF_F_HW_ESP)) + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + + segs = x->outer_mode->gso_segment(x, skb, esp_features); + if (IS_ERR_OR_NULL(segs)) + goto out; + + __skb_pull(skb, skb->data - skb_mac_header(skb)); + + skb2 = segs; + do { + struct sk_buff *nskb = skb2->next; + + xo = xfrm_offload(skb2); + xo->flags |= XFRM_GSO_SEGMENT; + xo->seq.low = seq; + xo->seq.hi = xfrm_replay_seqhi(x, seq); + + if(!(features & NETIF_F_HW_ESP)) + xo->flags |= CRYPTO_FALLBACK; + + x->outer_mode->xmit(x, skb2); + + err = x->type_offload->xmit(x, skb2, esp_features); + if (err) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + + if (!skb_is_gso(skb2)) + seq++; + else + seq += skb_shinfo(skb2)->gso_segs; + + skb_push(skb2, skb2->mac_len); + skb2 = nskb; + } while (skb2); + +out: + return segs; +} + +static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) +{ + struct crypto_aead *aead = x->data; + + if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead))) + return -EINVAL; + + skb->ip_summed = CHECKSUM_NONE; + + return esp_input_done2(skb, 0); +} + +static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) +{ + int err; + int alen; + int blksize; + struct xfrm_offload *xo; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct esp_info esp; + bool hw_offload = true; + + esp.inplace = true; + + xo = xfrm_offload(skb); + + if (!xo) + return -EINVAL; + + if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || + (x->xso.dev != skb->dev)) { + xo->flags |= CRYPTO_FALLBACK; + hw_offload = false; + } + + esp.proto = xo->proto; + + /* skb is pure payload to encrypt */ + + aead = x->data; + alen = crypto_aead_authsize(aead); + + esp.tfclen = 0; + /* XXX: Add support for tfc padding here. */ + + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); + esp.plen = esp.clen - skb->len - esp.tfclen; + esp.tailen = esp.tfclen + esp.plen + alen; + + esp.esph = ip_esp_hdr(skb); + + + if (!hw_offload || (hw_offload && !skb_is_gso(skb))) { + esp.nfrags = esp_output_head(x, skb, &esp); + if (esp.nfrags < 0) + return esp.nfrags; + } + + esph = esp.esph; + esph->spi = x->id.spi; + + skb_push(skb, -skb_network_offset(skb)); + + if (xo->flags & XFRM_GSO_SEGMENT) { + esph->seq_no = htonl(xo->seq.low); + } else { + ip_hdr(skb)->tot_len = htons(skb->len); + ip_send_check(ip_hdr(skb)); + } + + if (hw_offload) + return 0; + + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + + err = esp_output_tail(x, skb, &esp); + if (err < 0) + return err; + + secpath_reset(skb); + + return 0; +} + static const struct net_offload esp4_offload = { .callbacks = { .gro_receive = esp4_gro_receive, + .gso_segment = esp4_gso_segment, }, }; +static const struct xfrm_type_offload esp_type_offload = { + .description = "ESP4 OFFLOAD", + .owner = THIS_MODULE, + .proto = IPPROTO_ESP, + .input_tail = esp_input_tail, + .xmit = esp_xmit, + .encap = esp4_gso_encap, +}; + static int __init esp4_offload_init(void) { + if (xfrm_register_type_offload(&esp_type_offload, AF_INET) < 0) { + pr_info("%s: can't add xfrm type offload\n", __func__); + return -EAGAIN; + } + return inet_add_offload(&esp4_offload, IPPROTO_ESP); } static void __exit esp4_offload_exit(void) { + if (xfrm_unregister_type_offload(&esp_type_offload, AF_INET) < 0) + pr_info("%s: can't remove xfrm type offload\n", __func__); + inet_del_offload(&esp4_offload, IPPROTO_ESP); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8f2133ffc2ff1b94871408a5f934cb938d3462b5..83e3ed258467dbfd620bd27d48a0b33f5ef7a067 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -632,7 +632,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, int err, remaining; struct rtmsg *rtm; - err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); + err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, + NULL); if (err < 0) goto errout; @@ -709,7 +710,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -731,7 +733,8 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -760,7 +763,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0; + int dumped = 0, err; if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) @@ -780,20 +783,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - if (fib_table_dump(tb, skb, cb) < 0) - goto out; + err = fib_table_dump(tb, skb, cb); + if (err < 0) { + if (likely(skb->len)) + goto out; + + goto out_err; + } dumped = 1; next: e++; } } out: + err = skb->len; +out_err: rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; - return skb->len; + return err; } /* Prepare and feed intra-kernel routing request. @@ -1127,7 +1137,8 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event, { if (fib_sync_down_dev(dev, event, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev)); + else + rt_cache_flush(dev_net(dev)); arp_ifdown(dev); } diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c new file mode 100644 index 0000000000000000000000000000000000000000..e0714d975947160565dfb1bd9a86bc7a592c73b1 --- /dev/null +++ b/net/ipv4/fib_notifier.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include +#include + +static ATOMIC_NOTIFIER_HEAD(fib_chain); + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return nb->notifier_call(nb, event_type, info); +} + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + net->ipv4.fib_seq++; + info->net = net; + return atomic_notifier_call_chain(&fib_chain, event_type, info); +} + +static unsigned int fib_seq_sum(void) +{ + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) + fib_seq += net->ipv4.fib_seq; + rtnl_unlock(); + + return fib_seq; +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + /* Mutex semantics guarantee that every change done to + * FIB tries before we read the change sequence counter + * is now visible to us. + */ + rcu_read_lock(); + for_each_net_rcu(net) { + fib_rules_notify(net, nb); + fib_notify(net, nb); + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2e50062f642d61bdc0c0893de4e4ff84b5be6c8d..778ecf977eb2bd7b7b3808691aaf818fc4bd680d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,6 +47,27 @@ struct fib4_rule { #endif }; +static bool fib4_rule_matchall(const struct fib_rule *rule) +{ + struct fib4_rule *r = container_of(rule, struct fib4_rule, common); + + if (r->dst_len || r->src_len || r->tos) + return false; + return fib_rule_matchall(rule); +} + +bool fib4_rule_default(const struct fib_rule *rule) +{ + if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || + rule->l3mdev) + return false; + if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN && + rule->table != RT_TABLE_DEFAULT) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib4_rule_default); + int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { @@ -164,12 +185,36 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule) +{ + struct fib_rule_notifier_info info = { + .rule = rule, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + static int call_fib_rule_notifiers(struct net *net, - enum fib_event_type event_type) + enum fib_event_type event_type, + struct fib_rule *rule) +{ + struct fib_rule_notifier_info info = { + .rule = rule, + }; + + return call_fib_notifiers(net, event_type, &info.info); +} + +/* Called with rcu_read_lock() */ +void fib_rules_notify(struct net *net, struct notifier_block *nb) { - struct fib_notifier_info info; + struct fib_rules_ops *ops = net->ipv4.rules_ops; + struct fib_rule *rule; - return call_fib_notifiers(net, event_type, &info); + list_for_each_entry_rcu(rule, &ops->rules_list, list) + call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule); } static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { @@ -228,7 +273,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule); err = 0; errout: @@ -250,7 +295,7 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule); errout: return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 317026a39cfa2b49bf06182d89a11af0fa2688af..ad9ad4aab5da7c7d11c3b80edbdfcbdd3d7153fe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -57,7 +57,6 @@ static unsigned int fib_info_cnt; static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH -u32 fib_multipath_secret __read_mostly; #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ @@ -204,6 +203,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + struct dst_metrics *m; change_nexthops(fi) { if (nexthop_nh->nh_dev) @@ -214,8 +214,9 @@ static void free_fib_info_rcu(struct rcu_head *head) rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); - if (fi->fib_metrics != (u32 *) dst_default_metrics) - kfree(fi->fib_metrics); + m = fi->fib_metrics; + if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + kfree(m); kfree(fi); } @@ -576,9 +577,6 @@ static void fib_rebalance(struct fib_info *fi) atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); } endfor_nexthops(fi); - - net_get_random_once(&fib_multipath_secret, - sizeof(fib_multipath_secret)); } static inline void fib_add_weight(struct fib_info *fi, @@ -975,11 +973,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; - fi->fib_metrics[type - 1] = val; + fi->fib_metrics->metrics[type - 1] = val; } if (ecn_ca) - fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; return 0; } @@ -1037,11 +1035,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; if (cfg->fc_mx) { - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); if (!fi->fib_metrics) goto failure; + atomic_set(&fi->fib_metrics->refcnt, 1); } else - fi->fib_metrics = (u32 *) dst_default_metrics; + fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; @@ -1242,7 +1241,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; - if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) + if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc && @@ -1641,7 +1640,7 @@ void fib_select_multipath(struct fib_result *res, int hash) #endif void fib_select_path(struct net *net, struct fib_result *res, - struct flowi4 *fl4, int mp_hash) + struct flowi4 *fl4, const struct sk_buff *skb) { bool oif_check; @@ -1650,10 +1649,9 @@ void fib_select_path(struct net *net, struct fib_result *res, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi->fib_nhs > 1 && oif_check) { - if (mp_hash < 0) - mp_hash = get_hash_from_flowi4(fl4) >> 1; + int h = fib_multipath_hash(res->fi, fl4, skb); - fib_select_multipath(res, mp_hash); + fib_select_multipath(res, h); } else #endif diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2f0d8233950faeac91f287644bc9476f19f74578..51182ff2b4415238210e83208bf7f45b5fb55326 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -84,43 +84,6 @@ #include #include "fib_lookup.h" -static unsigned int fib_seq_sum(void) -{ - unsigned int fib_seq = 0; - struct net *net; - - rtnl_lock(); - for_each_net(net) - fib_seq += net->ipv4.fib_seq; - rtnl_unlock(); - - return fib_seq; -} - -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -static int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info) -{ - info->net = net; - return nb->notifier_call(nb, event_type, info); -} - -static void fib_rules_notify(struct net *net, struct notifier_block *nb, - enum fib_event_type event_type) -{ -#ifdef CONFIG_IP_MULTIPLE_TABLES - struct fib_notifier_info info; - - if (net->ipv4.fib_has_custom_rules) - call_fib_notifier(nb, net, event_type, &info); -#endif -} - -static void fib_notify(struct net *net, struct notifier_block *nb, - enum fib_event_type event_type); - static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, @@ -137,62 +100,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, return call_fib_notifier(nb, net, event_type, &info.info); } -static bool fib_dump_is_consistent(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb), - unsigned int fib_seq) -{ - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) - return true; - atomic_notifier_chain_unregister(&fib_chain, nb); - if (cb) - cb(nb); - return false; -} - -#define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) -{ - int retries = 0; - - do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; - - /* Mutex semantics guarantee that every change done to - * FIB tries before we read the change sequence counter - * is now visible to us. - */ - rcu_read_lock(); - for_each_net_rcu(net) { - fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD); - fib_notify(net, nb, FIB_EVENT_ENTRY_ADD); - } - rcu_read_unlock(); - - if (fib_dump_is_consistent(nb, cb, fib_seq)) - return 0; - } while (++retries < FIB_DUMP_MAX_RETRIES); - - return -EBUSY; -} -EXPORT_SYMBOL(register_fib_notifier); - -int unregister_fib_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&fib_chain, nb); -} -EXPORT_SYMBOL(unregister_fib_notifier); - -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, - struct fib_notifier_info *info) -{ - net->ipv4.fib_seq++; - info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); -} - static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, @@ -1995,8 +1902,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb) } static void fib_leaf_notify(struct net *net, struct key_vector *l, - struct fib_table *tb, struct notifier_block *nb, - enum fib_event_type event_type) + struct fib_table *tb, struct notifier_block *nb) { struct fib_alias *fa; @@ -2012,22 +1918,21 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, if (tb->tb_id != fa->tb_id) continue; - call_fib_entry_notifier(nb, net, event_type, l->key, + call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, fa->tb_id); } } static void fib_table_notify(struct net *net, struct fib_table *tb, - struct notifier_block *nb, - enum fib_event_type event_type) + struct notifier_block *nb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; t_key key = 0; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - fib_leaf_notify(net, l, tb, nb, event_type); + fib_leaf_notify(net, l, tb, nb); key = l->key + 1; /* stop in case of wrap around */ @@ -2036,8 +1941,7 @@ static void fib_table_notify(struct net *net, struct fib_table *tb, } } -static void fib_notify(struct net *net, struct notifier_block *nb, - enum fib_event_type event_type) +void fib_notify(struct net *net, struct notifier_block *nb) { unsigned int h; @@ -2046,7 +1950,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb, struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) - fib_table_notify(net, tb, nb, event_type); + fib_table_notify(net, tb, nb); } } @@ -2079,6 +1983,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + int err; + if (i < s_i) { i++; continue; @@ -2089,17 +1995,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - xkey, - KEYLENGTH - fa->fa_slen, - fa->fa_tos, - fa->fa_info, NLM_F_MULTI) < 0) { + err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE, + tb->tb_id, fa->fa_type, + xkey, KEYLENGTH - fa->fa_slen, + fa->fa_tos, fa->fa_info, NLM_F_MULTI); + if (err < 0) { cb->args[4] = i; - return -1; + return err; } i++; } @@ -2121,10 +2024,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, t_key key = cb->args[3]; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { + int err; + + err = fn_trie_dump_leaf(l, tb, skb, cb); + if (err < 0) { cb->args[3] = key; cb->args[2] = count; - return -1; + return err; } ++count; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fc310db2708bf6c9e96befe413e89ac931818f74..9144fa7df2ad51372ba1c1debf54a41460a8cbec 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -464,22 +464,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) local_bh_enable(); } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - -/* Source and destination is swapped. See ip_multipath_icmp_hash */ -static int icmp_multipath_hash_skb(const struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - return fib_multipath_hash(iph->daddr, iph->saddr); -} - -#else - -#define icmp_multipath_hash_skb(skb) (-1) - -#endif - static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, @@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key_hash(net, fl4, - icmp_multipath_hash_skb(skb_in)); + rt = __ip_route_output_key_hash(net, fl4, skb_in); if (IS_ERR(rt)) return rt; @@ -674,8 +657,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* Needed by both icmp_global_allow and icmp_xmit_lock */ local_bh_disable(); - /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!icmpv4_global_allow(net, type, code)) + /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless + * incoming dev is loopback. If outgoing dev change to not be + * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) + */ + if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && + !icmpv4_global_allow(net, type, code)) goto out_bh_enable; sk = icmp_xmit_lock(net); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44fd86de2823dd17de16276a8ec01b190e69b8b4..8f6b5bbcbf69f54f354d3678cf6e5f8374602edb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2071,21 +2071,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, static void ip_mc_clear_src(struct ip_mc_list *pmc) { - struct ip_sf_list *psf, *nextpsf; + struct ip_sf_list *psf, *nextpsf, *tomb, *sources; - for (psf = pmc->tomb; psf; psf = nextpsf) { + spin_lock_bh(&pmc->lock); + tomb = pmc->tomb; + pmc->tomb = NULL; + sources = pmc->sources; + pmc->sources = NULL; + pmc->sfmode = MCAST_EXCLUDE; + pmc->sfcount[MCAST_INCLUDE] = 0; + pmc->sfcount[MCAST_EXCLUDE] = 1; + spin_unlock_bh(&pmc->lock); + + for (psf = tomb; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } - pmc->tomb = NULL; - for (psf = pmc->sources; psf; psf = nextpsf) { + for (psf = sources; psf; psf = nextpsf) { nextpsf = psf->sf_next; kfree(psf); } - pmc->sources = NULL; - pmc->sfmode = MCAST_EXCLUDE; - pmc->sfcount[MCAST_INCLUDE] = 0; - pmc->sfcount[MCAST_EXCLUDE] = 1; } /* Join a multicast group diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5e313c1ac94fc88eca5fe3a0e9e46e551e955ff0..1054d330bf9df3189a21dbb08e27c0e6ad136775 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -794,6 +794,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, /* listeners have SOCK_RCU_FREE, not the children */ sock_reset_flag(newsk, SOCK_RCU_FREE); + inet_sk(newsk)->mc_list = NULL; + newsk->sk_mark = inet_rsk(req)->ir_mark; atomic64_set(&newsk->sk_cookie, atomic64_read(&inet_rsk(req)->ir_cookie)); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 8bea74298173f5198e7b3083b5afb5e1398df977..e9a59d2d91d4061f299065173d9212eefe72ef89 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -678,11 +678,7 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); - hashinfo->ehash_locks = kmalloc_array(nblocks, locksz, - GFP_KERNEL | __GFP_NOWARN); - if (!hashinfo->ehash_locks) - hashinfo->ehash_locks = vmalloc(nblocks * locksz); - + hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); if (!hashinfo->ehash_locks) return -ENOMEM; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c9c1cb635d9afd0c5ccdec4402aa2aac29cc889a..e90c80a548ad8f61b8542fdd8a2a55a2562a0e07 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -829,7 +829,8 @@ static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) static int ipgre_netlink_parms(struct net_device *dev, struct nlattr *data[], struct nlattr *tb[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, + __u32 *fwmark) { struct ip_tunnel *t = netdev_priv(dev); @@ -886,6 +887,9 @@ static int ipgre_netlink_parms(struct net_device *dev, t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); } + if (data[IFLA_GRE_FWMARK]) + *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + return 0; } @@ -957,6 +961,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, { struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + __u32 fwmark = 0; int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { @@ -967,31 +972,32 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, return err; } - err = ipgre_netlink_parms(dev, data, tb, &p); + err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_newlink(dev, tb, &p); + return ip_tunnel_newlink(dev, tb, &p, fwmark); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + __u32 fwmark = t->fwmark; int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { - struct ip_tunnel *t = netdev_priv(dev); err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } - err = ipgre_netlink_parms(dev, data, tb, &p); + err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_changelink(dev, tb, &p); + return ip_tunnel_changelink(dev, tb, &p, fwmark); } static size_t ipgre_get_size(const struct net_device *dev) @@ -1029,6 +1035,8 @@ static size_t ipgre_get_size(const struct net_device *dev) nla_total_size(0) + /* IFLA_GRE_IGNORE_DF */ nla_total_size(1) + + /* IFLA_GRE_FWMARK */ + nla_total_size(4) + 0; } @@ -1049,7 +1057,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || nla_put_u8(skb, IFLA_GRE_PMTUDISC, - !!(p->iph.frag_off & htons(IP_DF)))) + !!(p->iph.frag_off & htons(IP_DF))) || + nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -1093,6 +1102,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, + [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ipgre_link_ops __read_mostly = { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d6feabb0351607f282e1f78f159c0ccb88bcec96..fa2dc8f692c631f1ff7fe814c3ee27f0de2a41d8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct net_device *dev = skb->dev; + void (*edemux)(struct sk_buff *skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) int protocol = iph->protocol; ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) { - ipprot->early_demux(skb); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { + edemux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ebd953bc5607f3b25fffddcb26a5c65e5490cb2b..ec4fe3d4b5c9c17d53d7464b42b82a564ae48e54 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -330,7 +330,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, sent to multicast group to reach destination designated router. */ struct ip_ra_chain __rcu *ip_ra_chain; -static DEFINE_SPINLOCK(ip_ra_lock); static void ip_ra_destroy_rcu(struct rcu_head *head) @@ -352,21 +351,17 @@ int ip_ra_control(struct sock *sk, unsigned char on, new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - spin_lock_bh(&ip_ra_lock); for (rap = &ip_ra_chain; - (ra = rcu_dereference_protected(*rap, - lockdep_is_held(&ip_ra_lock))) != NULL; + (ra = rtnl_dereference(*rap)) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { - spin_unlock_bh(&ip_ra_lock); kfree(new_ra); return -EADDRINUSE; } /* dont let ip_call_ra_chain() use sk again */ ra->sk = NULL; RCU_INIT_POINTER(*rap, ra->next); - spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); @@ -380,17 +375,14 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (!new_ra) { - spin_unlock_bh(&ip_ra_lock); + if (!new_ra) return -ENOBUFS; - } new_ra->sk = sk; new_ra->destructor = destructor; RCU_INIT_POINTER(new_ra->next, ra); rcu_assign_pointer(*rap, new_ra); sock_hold(sk); - spin_unlock_bh(&ip_ra_lock); return 0; } @@ -488,16 +480,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk, return false; /* Support IP_PKTINFO on tstamp packets if requested, to correlate - * timestamp with egress dev. Not possible for packets without dev + * timestamp with egress dev. Not possible for packets without iif * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). */ - if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || - (!skb->dev)) + info = PKTINFO_SKB_CB(skb); + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) || + !info->ipi_ifindex) return false; - info = PKTINFO_SKB_CB(skb); info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; - info->ipi_ifindex = skb->dev->ifindex; return true; } @@ -591,6 +582,7 @@ static bool setsockopt_needs_rtnl(int optname) case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: + case IP_ROUTER_ALERT: return true; } return false; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 823abaef006bd353cf0466f14dd3abaa26f80c07..b436d077563174c22b48a81a6a856f30dd831a5e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -293,7 +293,8 @@ static struct net_device *__ip_tunnel_create(struct net *net, static inline void init_tunnel_flow(struct flowi4 *fl4, int proto, __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif) + __be32 key, __u8 tos, int oif, + __u32 mark) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -302,6 +303,7 @@ static inline void init_tunnel_flow(struct flowi4 *fl4, fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; + fl4->flowi4_mark = mark; } static int ip_tunnel_bind_dev(struct net_device *dev) @@ -322,7 +324,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) init_tunnel_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, - RT_TOS(iph->tos), tunnel->parms.link); + RT_TOS(iph->tos), tunnel->parms.link, + tunnel->fwmark); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -578,7 +581,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, - RT_TOS(tos), tunnel->parms.link); + RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; rt = ip_route_output_key(tunnel->net, &fl4); @@ -707,7 +710,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; @@ -795,7 +799,8 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, struct ip_tunnel *t, struct net_device *dev, struct ip_tunnel_parm *p, - bool set_mtu) + bool set_mtu, + __u32 fwmark) { ip_tunnel_del(itn, t); t->parms.iph.saddr = p->iph.saddr; @@ -812,10 +817,11 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, t->parms.iph.tos = p->iph.tos; t->parms.iph.frag_off = p->iph.frag_off; - if (t->parms.link != p->link) { + if (t->parms.link != p->link || t->fwmark != fwmark) { int mtu; t->parms.link = p->link; + t->fwmark = fwmark; mtu = ip_tunnel_bind_dev(dev); if (set_mtu) dev->mtu = mtu; @@ -893,7 +899,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) if (t) { err = 0; - ip_tunnel_update(itn, t, dev, p, true); + ip_tunnel_update(itn, t, dev, p, true, 0); } else { err = -ENOENT; } @@ -961,7 +967,6 @@ static void ip_tunnel_dev_free(struct net_device *dev) gro_cells_destroy(&tunnel->gro_cells); dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) @@ -1066,7 +1071,7 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p) + struct ip_tunnel_parm *p, __u32 fwmark) { struct ip_tunnel *nt; struct net *net = dev_net(dev); @@ -1087,6 +1092,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], nt->net = net; nt->parms = *p; + nt->fwmark = fwmark; err = register_netdevice(dev); if (err) goto out; @@ -1105,7 +1111,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], EXPORT_SYMBOL_GPL(ip_tunnel_newlink); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p) + struct ip_tunnel_parm *p, __u32 fwmark) { struct ip_tunnel *t; struct ip_tunnel *tunnel = netdev_priv(dev); @@ -1137,7 +1143,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], } } - ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); + ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark); return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_changelink); @@ -1148,7 +1154,8 @@ int ip_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; int err; - dev->destructor = ip_tunnel_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip_tunnel_dev_free; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index a31f47ccaad90deadb686b2a9091694c8c1ecb9d..baf196eaf1d81809a9264fb94daa58eae51f1bff 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -235,7 +235,7 @@ static int ip_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy); + err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, NULL); if (err < 0) return err; @@ -332,7 +332,8 @@ static int ip6_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy); + err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, + NULL); if (err < 0) return err; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 8b14f1404c8f7315495877a34ad6757aee4232a8..4ec9affb2252408b0c218ad63b17a891ff2ab8ed 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -471,7 +471,8 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) } static void vti_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, + __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); @@ -497,24 +498,29 @@ static void vti_netlink_parms(struct nlattr *data[], if (data[IFLA_VTI_REMOTE]) parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]); + if (data[IFLA_VTI_FWMARK]) + *fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } static int vti_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip_tunnel_parm parms; + __u32 fwmark = 0; - vti_netlink_parms(data, &parms); - return ip_tunnel_newlink(dev, tb, &parms); + vti_netlink_parms(data, &parms, &fwmark); + return ip_tunnel_newlink(dev, tb, &parms, fwmark); } static int vti_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); + __u32 fwmark = t->fwmark; struct ip_tunnel_parm p; - vti_netlink_parms(data, &p); - return ip_tunnel_changelink(dev, tb, &p); + vti_netlink_parms(data, &p, &fwmark); + return ip_tunnel_changelink(dev, tb, &p, fwmark); } static size_t vti_get_size(const struct net_device *dev) @@ -530,6 +536,8 @@ static size_t vti_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_VTI_REMOTE */ nla_total_size(4) + + /* IFLA_VTI_FWMARK */ + nla_total_size(4) + 0; } @@ -538,11 +546,13 @@ static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; - nla_put_u32(skb, IFLA_VTI_LINK, p->link); - nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); - nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); - nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr); - nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr); + if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) || + nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key) || + nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr) || + nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr) || + nla_put_u32(skb, IFLA_VTI_FWMARK, t->fwmark)) + return -EMSGSIZE; return 0; } @@ -553,6 +563,7 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_OKEY] = { .type = NLA_U32 }, [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vti_link_ops __read_mostly = { diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index dfb2ab2dd3c84d93b8d77df41d1160d26f162fc3..c3b12b1c71621b942dd4f9ad1d60ab5bb3c66e20 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 00d4229b6954262e556f7a8fd9c072638d49a8d6..1e441c6f216025339caf5d82a04ea3570566641a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -390,7 +390,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) } static void ipip_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms, bool *collect_md) + struct ip_tunnel_parm *parms, bool *collect_md, + __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); @@ -428,6 +429,9 @@ static void ipip_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_COLLECT_METADATA]) *collect_md = true; + + if (data[IFLA_IPTUN_FWMARK]) + *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -470,6 +474,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + __u32 fwmark = 0; if (ipip_netlink_encap_parms(data, &ipencap)) { int err = ip_tunnel_encap_setup(t, &ipencap); @@ -478,26 +483,27 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, return err; } - ipip_netlink_parms(data, &p, &t->collect_md); - return ip_tunnel_newlink(dev, tb, &p); + ipip_netlink_parms(data, &p, &t->collect_md, &fwmark); + return ip_tunnel_newlink(dev, tb, &p, fwmark); } static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; bool collect_md; + __u32 fwmark = t->fwmark; if (ipip_netlink_encap_parms(data, &ipencap)) { - struct ip_tunnel *t = netdev_priv(dev); int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } - ipip_netlink_parms(data, &p, &collect_md); + ipip_netlink_parms(data, &p, &collect_md, &fwmark); if (collect_md) return -EINVAL; @@ -505,7 +511,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) return -EINVAL; - return ip_tunnel_changelink(dev, tb, &p); + return ip_tunnel_changelink(dev, tb, &p, fwmark); } static size_t ipip_get_size(const struct net_device *dev) @@ -535,6 +541,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_COLLECT_METADATA */ nla_total_size(0) + + /* IFLA_IPTUN_FWMARK */ + nla_total_size(4) + 0; } @@ -550,7 +558,8 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, - !!(parm->iph.frag_off & htons(IP_DF)))) + !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, @@ -585,6 +594,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, + [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ipip_link_ops __read_mostly = { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c0317c940bcdc303015f500b52198e0862440e17..8ae425cad81858b3a79719d47b43f8baa346a06a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local); + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, @@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev) dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features |= NETIF_F_NETNS_LOCAL; } @@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; - inet_netconf_notify_devconf(dev_net(dev), + inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); @@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRNOTAVAIL; } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex, - &in_dev->cnf); + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, + dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ @@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { - ip_mr_forward(net, mrt, skb, c, 0); + ip_mr_forward(net, mrt, skb->dev, skb, c, 0); } } } @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Queue a packet for resolution. It gets locked cache entry! */ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, - struct sk_buff *skb) + struct sk_buff *skb, struct net_device *dev) { const struct iphdr *iph = ip_hdr(skb); struct mfc_cache *c; @@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, kfree_skb(skb); err = -ENOBUFS; } else { + if (dev) { + skb->dev = dev; + skb->skb_iif = dev->ifindex; + } skb_queue_tail(&c->mfc_un.unres.unresolved, skb); err = 0; } @@ -1278,18 +1282,18 @@ static void mrtsock_destruct(struct sock *sk) struct net *net = sock_net(sk); struct mr_table *mrt; - rtnl_lock(); + ASSERT_RTNL(); ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt, false); } } - rtnl_unlock(); } /* Socket options and virtual interface manipulation. The whole @@ -1344,7 +1348,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (ret == 0) { rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } @@ -1353,13 +1358,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (sk != rcu_access_pointer(mrt->mroute_sk)) { ret = -EACCES; } else { - /* We need to unlock here because mrtsock_destruct takes - * care of rtnl itself and we can't change that due to - * the IP_ROUTER_ALERT setsockopt which runs without it. - */ - rtnl_unlock(); ret = ip_ra_control(sk, 0, NULL); - goto out; + goto out_unlock; } break; case MRT_ADD_VIF: @@ -1470,7 +1470,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, } out_unlock: rtnl_unlock(); -out: return ret; } @@ -1833,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) /* "local" means that we should preserve one skb (for local delivery) */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *cache, - int local) + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *cache, int local) { - int true_vifi = ipmr_find_vif(mrt, skb->dev); + int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; @@ -1858,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != skb->dev) { - struct net_device *mdev; - - mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev); - if (mdev == skb->dev) - goto forward; - + if (mrt->vif_table[vif].dev != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -1985,6 +1978,20 @@ int ip_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; + struct net_device *dev; + + /* skb->dev passed in is the loX master dev for vrfs. + * As there are no vifs associated with loopback devices, + * get the proper interface that does have a vif associated with it. + */ + dev = skb->dev; + if (netif_is_l3_master(skb->dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); + if (!dev) { + kfree_skb(skb); + return -ENODEV; + } + } /* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally. @@ -2022,7 +2029,7 @@ int ip_mr_input(struct sk_buff *skb) /* already under rcu_read_lock() */ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); if (!cache) { - int vif = ipmr_find_vif(mrt, skb->dev); + int vif = ipmr_find_vif(mrt, dev); if (vif >= 0) cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, @@ -2042,9 +2049,9 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(&mrt_lock); - vif = ipmr_find_vif(mrt, skb->dev); + vif = ipmr_find_vif(mrt, dev); if (vif >= 0) { - int err2 = ipmr_cache_unresolved(mrt, vif, skb); + int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); read_unlock(&mrt_lock); return err2; @@ -2055,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(&mrt_lock); - ip_mr_forward(net, mrt, skb, cache, local); + ip_mr_forward(net, mrt, dev, skb, cache, local); read_unlock(&mrt_lock); if (local) @@ -2229,7 +2236,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, iph->saddr = saddr; iph->daddr = daddr; iph->version = 0; - err = ipmr_cache_unresolved(mrt, vif, skb2); + err = ipmr_cache_unresolved(mrt, vif, skb2, dev); read_unlock(&mrt_lock); rcu_read_unlock(); return err; @@ -2428,7 +2435,8 @@ static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc) /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, struct mfcctl *mfcc, int *mrtsock, - struct mr_table **mrtret) + struct mr_table **mrtret, + struct netlink_ext_ack *extack) { struct net_device *dev = NULL; u32 tblid = RT_TABLE_DEFAULT; @@ -2437,7 +2445,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, struct rtmsg *rtm; int ret, rem; - ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy); + ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, + extack); if (ret < 0) goto out; rtm = nlmsg_data(nlh); @@ -2496,7 +2505,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, } /* takes care of both newroute and delroute */ -static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh) +static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); int ret, mrtsock, parent; @@ -2505,7 +2515,7 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh) mrtsock = 0; tbl = NULL; - ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl); + ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); if (ret < 0) return ret; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6241a81fd7f5a3df8fb3cf251bfdd407dda6a1f6..0bc3c3d73e61e00a04d73f32800256a62c8943a5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -309,8 +309,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, */ for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct arpt_entry *e - = (struct arpt_entry *)(entry0 + pos); + struct arpt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; @@ -354,14 +353,12 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (pos == oldpos) goto next; - e = (struct arpt_entry *) - (entry0 + pos); + e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; - e = (struct arpt_entry *) - (entry0 + pos + size); + e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; @@ -376,16 +373,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = (struct arpt_entry *) - (entry0 + newpos); + e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } - e = (struct arpt_entry *) - (entry0 + newpos); + e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } @@ -562,8 +557,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } - if (ret != 0) - goto out_free; ret = -EINVAL; if (i != repl->num_entries) @@ -683,7 +676,7 @@ static int copy_entries_to_user(unsigned int total_size, for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ const struct xt_entry_target *t; - e = (struct arpt_entry *)(loc_cpu_entry + off); + e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; @@ -1130,7 +1123,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, int h; origsize = *size; - de = (struct arpt_entry *)*dstptr; + de = *dstptr; memcpy(de, e, sizeof(struct arpt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); @@ -1324,7 +1317,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, int ret; origsize = *size; - ce = (struct compat_arpt_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 384b85713e062881d3d2554b0828f08ff28f443d..2a55a40211cbfb94a9ade41c33678bba4be2a7e4 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -382,7 +382,7 @@ mark_source_chains(const struct xt_table_info *newinfo, to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos); + struct ipt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; @@ -424,14 +424,12 @@ mark_source_chains(const struct xt_table_info *newinfo, if (pos == oldpos) goto next; - e = (struct ipt_entry *) - (entry0 + pos); + e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; - e = (struct ipt_entry *) - (entry0 + pos + size); + e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; @@ -446,16 +444,14 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = (struct ipt_entry *) - (entry0 + newpos); + e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } - e = (struct ipt_entry *) - (entry0 + newpos); + e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } @@ -834,7 +830,7 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_match *m; const struct xt_entry_target *t; - e = (struct ipt_entry *)(loc_cpu_entry + off); + e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; @@ -1229,7 +1225,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, int ret = 0; origsize = *size; - ce = (struct compat_ipt_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) @@ -1366,7 +1362,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_entry_match *ematch; origsize = *size; - de = (struct ipt_entry *)*dstptr; + de = *dstptr; memcpy(de, e, sizeof(struct ipt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 9b8841316e7b94e375cc52d0dfd7f9fe89205195..038f293c23766de6cd77f361269743498c2dd18e 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -40,8 +41,8 @@ MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); struct clusterip_config { struct list_head list; /* list of all configs */ - atomic_t refcount; /* reference count */ - atomic_t entries; /* number of entries/rules + refcount_t refcount; /* reference count */ + refcount_t entries; /* number of entries/rules * referencing us */ __be32 clusterip; /* the IP address */ @@ -77,7 +78,7 @@ struct clusterip_net { static inline void clusterip_config_get(struct clusterip_config *c) { - atomic_inc(&c->refcount); + refcount_inc(&c->refcount); } @@ -89,7 +90,7 @@ static void clusterip_config_rcu_free(struct rcu_head *head) static inline void clusterip_config_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->refcount)) + if (refcount_dec_and_test(&c->refcount)) call_rcu_bh(&c->rcu, clusterip_config_rcu_free); } @@ -103,7 +104,7 @@ clusterip_config_entry_put(struct clusterip_config *c) struct clusterip_net *cn = net_generic(net, clusterip_net_id); local_bh_disable(); - if (atomic_dec_and_lock(&c->entries, &cn->lock)) { + if (refcount_dec_and_lock(&c->entries, &cn->lock)) { list_del_rcu(&c->list); spin_unlock(&cn->lock); local_bh_enable(); @@ -149,10 +150,10 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) c = NULL; else #endif - if (unlikely(!atomic_inc_not_zero(&c->refcount))) + if (unlikely(!refcount_inc_not_zero(&c->refcount))) c = NULL; else if (entry) - atomic_inc(&c->entries); + refcount_inc(&c->entries); } rcu_read_unlock_bh(); @@ -188,8 +189,8 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; - atomic_set(&c->refcount, 1); - atomic_set(&c->entries, 1); + refcount_set(&c->refcount, 1); + refcount_set(&c->entries, 1); spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 3240a2614e82bd82c674aac84ab140c91670f163..af2b69b6895f5ae8e326b27e05efa01cc1405e1b 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -293,12 +293,16 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack(net, skb, th, &opts); - return NF_DROP; - + consume_skb(skb); + return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); - return NF_DROP; + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) { + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } return XT_CONTINUE; @@ -367,10 +371,13 @@ static unsigned int ipv4_synproxy_hook(void *priv, * number match the one of first SYN. */ if (synproxy_recv_client_ack(net, skb, th, &opts, - ntohl(th->seq) + 1)) + ntohl(th->seq) + 1)) { this_cpu_inc(snet->stats->cookie_retrans); - - return NF_DROP; + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } synproxy->isn = ntohl(th->ack_seq); @@ -409,19 +416,56 @@ static unsigned int ipv4_synproxy_hook(void *priv, return NF_ACCEPT; } +static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { + { + .hook = ipv4_synproxy_hook, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv4_synproxy_hook, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + static int synproxy_tg4_check(const struct xt_tgchk_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); const struct ipt_entry *e = par->entryinfo; + int err; if (e->ip.proto != IPPROTO_TCP || e->ip.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_netns_get(par->net, par->family); + err = nf_ct_netns_get(par->net, par->family); + if (err) + return err; + + if (snet->hook_ref4 == 0) { + err = nf_register_net_hooks(par->net, ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); + if (err) { + nf_ct_netns_put(par->net, par->family); + return err; + } + } + + snet->hook_ref4++; + return err; } static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); + + snet->hook_ref4--; + if (snet->hook_ref4 == 0) + nf_unregister_net_hooks(par->net, ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); nf_ct_netns_put(par->net, par->family); } @@ -436,46 +480,14 @@ static struct xt_target synproxy_tg4_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { - { - .hook = ipv4_synproxy_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, - { - .hook = ipv4_synproxy_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, -}; - static int __init synproxy_tg4_init(void) { - int err; - - err = nf_register_hooks(ipv4_synproxy_ops, - ARRAY_SIZE(ipv4_synproxy_ops)); - if (err < 0) - goto err1; - - err = xt_register_target(&synproxy_tg4_reg); - if (err < 0) - goto err2; - - return 0; - -err2: - nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); -err1: - return err; + return xt_register_target(&synproxy_tg4_reg); } static void __exit synproxy_tg4_exit(void) { xt_unregister_target(&synproxy_tg4_reg); - nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); } module_init(synproxy_tg4_init); diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index f0dbff05fc28174de694ecad1d1adcde63c314ec..39895b9ddeb9601307f073298ab690ae1f9972d4 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ nf_reset(skb); - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif /* * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 6f5e8d01b876933a68e5f6cf8b2a48f8c4e17262..feedd759ca8043c3eff37e22d0f1c8301b229da2 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -264,13 +264,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, if (!ct) return NF_ACCEPT; - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; + nat = nfct_nat(ct); switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index ea91058b5f6f4245a84179f81a8641756f60efff..dc1dea15c1b4fb0d7ad3ad02779cde147e52c2cf 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -37,7 +37,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); - nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY)); @@ -56,7 +55,9 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, return NF_DROP; } - nat->masq_index = out->ifindex; + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; /* Transfer from original range. */ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index b3ca21b2ba9b638550b552c85e297234ccf39caa..8a69363b48846c628994e54c92a354ca46f71ebc 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -49,9 +49,14 @@ static void pptp_nat_expected(struct nf_conn *ct, const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_range range; + struct nf_conn_nat *nat; + nat = nf_ct_nat_ext_add(ct); + if (WARN_ON_ONCE(!nat)) + return; + + nat_pptp_info = &nat->help.nat_pptp_info; ct_pptp_info = nfct_help_data(master); - nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; /* And here goes the grand finale of corrosion... */ if (exp->dir == IP_CT_DIR_ORIGINAL) { @@ -120,13 +125,17 @@ pptp_outbound_pkt(struct sk_buff *skb, { struct nf_ct_pptp_master *ct_pptp_info; + struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_pptp *nat_pptp_info; u_int16_t msg; __be16 new_callid; unsigned int cid_off; + if (WARN_ON_ONCE(!nat)) + return NF_DROP; + + nat_pptp_info = &nat->help.nat_pptp_info; ct_pptp_info = nfct_help_data(ct); - nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; new_callid = ct_pptp_info->pns_call_id; @@ -177,11 +186,11 @@ pptp_outbound_pkt(struct sk_buff *skb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_callid), (char *)&new_callid, - sizeof(new_callid)) == 0) + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid))) return NF_DROP; return NF_ACCEPT; } @@ -191,11 +200,15 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_conntrack_expect *expect_reply) { const struct nf_conn *ct = expect_orig->master; + struct nf_conn_nat *nat = nfct_nat(ct); struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; + if (WARN_ON_ONCE(!nat)) + return; + + nat_pptp_info = &nat->help.nat_pptp_info; ct_pptp_info = nfct_help_data(ct); - nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; @@ -223,11 +236,15 @@ pptp_inbound_pkt(struct sk_buff *skb, union pptp_ctrl_union *pptpReq) { const struct nf_nat_pptp *nat_pptp_info; + struct nf_conn_nat *nat = nfct_nat(ct); u_int16_t msg; __be16 new_pcid; unsigned int pcid_off; - nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + if (WARN_ON_ONCE(!nat)) + return NF_DROP; + + nat_pptp_info = &nat->help.nat_pptp_info; new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { @@ -271,11 +288,11 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, - pcid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_pcid), (char *)&new_pcid, - sizeof(new_pcid)) == 0) + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + pcid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid))) return NF_DROP; return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 53e49f5011d3ce482c5180edc47da7928db0f224..d5b1e0b3f6876995b6987aa47515ece9c7beaca3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -827,8 +827,8 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, return 1; } -static unsigned char snmp_request_decode(struct asn1_ctx *ctx, - struct snmp_request *request) +static unsigned char noinline_for_stack +snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request) { unsigned int cls, con, tag; unsigned char *end; @@ -920,10 +920,10 @@ static inline void mangle_address(unsigned char *begin, } } -static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, - struct snmp_v1_trap *trap, - const struct oct1_map *map, - __sum16 *check) +static unsigned char noinline_for_stack +snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap, + const struct oct1_map *map, + __sum16 *check) { unsigned int cls, con, tag, len; unsigned char *end; @@ -998,18 +998,6 @@ static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, * *****************************************************************************/ -static void hex_dump(const unsigned char *buf, size_t len) -{ - size_t i; - - for (i = 0; i < len; i++) { - if (i && !(i % 16)) - printk("\n"); - printk("%02x ", *(buf + i)); - } - printk("\n"); -} - /* * Parse and mangle SNMP message according to mapping. * (And this is the fucking 'basic' method). @@ -1026,7 +1014,8 @@ static int snmp_parse_mangle(unsigned char *msg, struct snmp_object *obj; if (debug > 1) - hex_dump(msg, len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1, + msg, len, 0); asn1_open(&ctx, msg, len); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 146d86105183e1a456a0f17ed6bb5371aa1e8f76..7cd8d0d918f82e275e0ecd31c1cea8ec8fcf345d 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -104,7 +104,6 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; - const struct iphdr *oiph; struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; @@ -116,8 +115,6 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) return; - oiph = ip_hdr(oldskb); - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + LL_MAX_HEADER, GFP_ATOMIC); if (!nskb) diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index a83d558e1aaed62b6bce05d9cce36405b0c210ef..e9293bdebba04f5b329cd4fd4a6b5b676ade5988 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -139,7 +139,7 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, * SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && + if (ct && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 2981291910dd2cac2d508fcde89083afc22affd4..de3681df2ce71c7341d96dd5d2540ab110ac80eb 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -90,7 +90,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, nft_in(pkt)->ifindex); return; } @@ -99,7 +99,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, get_ifindex(pkt->skb->dev)); return; } @@ -212,7 +212,7 @@ nft_fib4_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_fib4_type __read_mostly = { .name = "fib", - .select_ops = &nft_fib4_select_ops, + .select_ops = nft_fib4_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV4, diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 69cf49e8356d0184f774840c9dc96560f2ae2f2b..fa44e752a9a3f8eb9957314149ae15e6df10465a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED), SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED), SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), - SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), @@ -282,6 +281,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), + SNMP_MIB_ITEM("TCPFastOpenBlackhole", LINUX_MIB_TCPFASTOPENBLACKHOLE), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING), diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 4b7c0ec65251ef40577a2d5e360fcbaed391a566..32a691b7ce2c7e79eab6491b52457a11e666f7d3 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -28,7 +28,7 @@ #include #include -const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; +struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; EXPORT_SYMBOL(inet_offloads); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8119e1f66e036ad2a8372bf24dd943c7d9631d8e..bdffad875691ce7240040cfaf188eb4cf3ec7307 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -358,6 +358,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct iphdr)) + return -EINVAL; + if (flags&MSG_PROBE) goto out; @@ -682,7 +685,9 @@ static void raw_close(struct sock *sk, long timeout) /* * Raw sockets may have direct kernel references. Kill them. */ + rtnl_lock(); ip_ra_control(sk, 0, NULL); + rtnl_unlock(); sk_common_release(sk); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index acd69cfe2951ed89213403b3e9556da6ddd50ba8..6883b3d4ba8f69de2cb924612d60f5671a219a84 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1250,15 +1250,11 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { - unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); + unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); + unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, + ip_rt_min_advmss); - if (advmss == 0) { - advmss = max_t(unsigned int, dst->dev->mtu - 40, - ip_rt_min_advmss); - if (advmss > 65535 - 40) - advmss = 65535 - 40; - } - return advmss; + return min(advmss, IPV4_MAX_PMTU - header_size); } static unsigned int ipv4_mtu(const struct dst_entry *dst) @@ -1389,8 +1385,12 @@ static void rt_add_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *) dst; + if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) + kfree(p); + if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1442,7 +1442,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; rt->rt_uses_gateway = 1; } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); + if (fi->fib_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + atomic_inc(&fi->fib_metrics->refcnt); + } #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif @@ -1734,45 +1738,97 @@ static int __mkroute_input(struct sk_buff *skb, } #ifdef CONFIG_IP_ROUTE_MULTIPATH - /* To make ICMP packets follow the right flow, the multipath hash is - * calculated from the inner IP addresses in reverse order. + * calculated from the inner IP addresses. */ -static int ip_multipath_icmp_hash(struct sk_buff *skb) +static void ip_multipath_l3_keys(const struct sk_buff *skb, + struct flow_keys *hash_keys) { const struct iphdr *outer_iph = ip_hdr(skb); - struct icmphdr _icmph; + const struct iphdr *inner_iph; const struct icmphdr *icmph; struct iphdr _inner_iph; - const struct iphdr *inner_iph; + struct icmphdr _icmph; + + hash_keys->addrs.v4addrs.src = outer_iph->saddr; + hash_keys->addrs.v4addrs.dst = outer_iph->daddr; + if (likely(outer_iph->protocol != IPPROTO_ICMP)) + return; if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) - goto standard_hash; + return; icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), &_icmph); if (!icmph) - goto standard_hash; + return; if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_REDIRECT && icmph->type != ICMP_TIME_EXCEEDED && - icmph->type != ICMP_PARAMETERPROB) { - goto standard_hash; - } + icmph->type != ICMP_PARAMETERPROB) + return; inner_iph = skb_header_pointer(skb, outer_iph->ihl * 4 + sizeof(_icmph), sizeof(_inner_iph), &_inner_iph); if (!inner_iph) - goto standard_hash; + return; + hash_keys->addrs.v4addrs.src = inner_iph->saddr; + hash_keys->addrs.v4addrs.dst = inner_iph->daddr; +} - return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); +/* if skb is set it will be used and fl4 can be NULL */ +int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, + const struct sk_buff *skb) +{ + struct net *net = fi->fib_net; + struct flow_keys hash_keys; + u32 mhash; -standard_hash: - return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); -} + switch (net->ipv4.sysctl_fib_multipath_hash_policy) { + case 0: + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (skb) { + ip_multipath_l3_keys(skb, &hash_keys); + } else { + hash_keys.addrs.v4addrs.src = fl4->saddr; + hash_keys.addrs.v4addrs.dst = fl4->daddr; + } + break; + case 1: + /* skb is currently provided only when forwarding */ + if (skb) { + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + struct flow_keys keys; + + /* short-circuit if we already have L4 hash present */ + if (skb->l4_hash) + return skb_get_hash_raw(skb) >> 1; + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, flag); + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + hash_keys.ports.src = keys.ports.src; + hash_keys.ports.dst = keys.ports.dst; + hash_keys.basic.ip_proto = keys.basic.ip_proto; + } else { + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + hash_keys.addrs.v4addrs.src = fl4->saddr; + hash_keys.addrs.v4addrs.dst = fl4->daddr; + hash_keys.ports.src = fl4->fl4_sport; + hash_keys.ports.dst = fl4->fl4_dport; + hash_keys.basic.ip_proto = fl4->flowi4_proto; + } + break; + } + mhash = flow_hash_from_keys(&hash_keys); + return mhash >> 1; +} +EXPORT_SYMBOL_GPL(fib_multipath_hash); #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static int ip_mkroute_input(struct sk_buff *skb, @@ -1782,12 +1838,8 @@ static int ip_mkroute_input(struct sk_buff *skb, { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) { - int h; + int h = fib_multipath_hash(res->fi, NULL, skb); - if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP)) - h = ip_multipath_icmp_hash(skb); - else - h = fib_multipath_hash(saddr, daddr); fib_select_multipath(res, h); } #endif @@ -2203,7 +2255,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, - int mp_hash) + const struct sk_buff *skb) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2359,13 +2411,14 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, } /* L3 master device is the loopback for that domain */ - dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; + dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(res)) ? : + net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; } - fib_select_path(net, &res, fl4, mp_hash); + fib_select_path(net, &res, fl4, skb); dev_out = FIB_RES_DEV(res); fl4->flowi4_oif = dev_out->ifindex; @@ -2585,7 +2638,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, return -EMSGSIZE; } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; @@ -2601,7 +2655,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) u32 table_id = RT_TABLE_MAIN; kuid_t uid; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, + extack); if (err < 0) goto errout; @@ -2619,10 +2674,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) skb_reset_mac_header(skb); skb_reset_network_header(skb); - /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ - ip_hdr(skb)->protocol = IPPROTO_UDP; - skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; @@ -2632,6 +2683,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) else uid = (iif ? INVALID_UID : current_uid()); + /* Bugfix: need to give ip_route_input enough of an IP header to + * not gag. + */ + ip_hdr(skb)->protocol = IPPROTO_UDP; + ip_hdr(skb)->saddr = src; + ip_hdr(skb)->daddr = dst; + + skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; fl4.saddr = src; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 496b97e17aaf7ed2cf41cef303cb0696927f66ac..0257d965f11119acf8c55888d6e672d171ef5f08 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, u32 tsoff) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { atomic_set(&req->rsk_refcnt, 1); + tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); } else { @@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d6880a6149ee80c6c75f4fe75b46a9d18d204d5d..86957e9cd6c6748ac00aa0307154bb131c43f1da 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -24,6 +24,7 @@ #include #include #include +#include static int zero; static int one = 1; @@ -294,6 +295,74 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, return ret; } +static void proc_configure_early_demux(int enabled, int protocol) +{ + struct net_protocol *ipprot; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol *ip6prot; +#endif + + rcu_read_lock(); + + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot) + ipprot->early_demux = enabled ? ipprot->early_demux_handler : + NULL; + +#if IS_ENABLED(CONFIG_IPV6) + ip6prot = rcu_dereference(inet6_protos[protocol]); + if (ip6prot) + ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : + NULL; +#endif + rcu_read_unlock(); +} + +static int proc_tcp_early_demux(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (write && !ret) { + int enabled = init_net.ipv4.sysctl_tcp_early_demux; + + proc_configure_early_demux(enabled, IPPROTO_TCP); + } + + return ret; +} + +static int proc_udp_early_demux(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (write && !ret) { + int enabled = init_net.ipv4.sysctl_udp_early_demux; + + proc_configure_early_demux(enabled, IPPROTO_UDP); + } + + return ret; +} + +static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, + int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + tcp_fastopen_active_timeout_reset(); + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -343,6 +412,14 @@ static struct ctl_table ipv4_table[] = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), .proc_handler = proc_tcp_fastopen_key, }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, + .extra1 = &zero, + }, { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, @@ -749,6 +826,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "udp_early_demux", + .data = &init_net.ipv4.sysctl_udp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_udp_early_demux + }, + { + .procname = "tcp_early_demux", + .data = &init_net.ipv4.sysctl_tcp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tcp_early_demux + }, { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, @@ -980,13 +1071,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_tw_recycle", - .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, @@ -1004,6 +1088,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "fib_multipath_hash_policy", + .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif { .procname = "ip_unprivileged_port_start", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 40ba4249a58677671b68bf495f32f15b7c5f62d7..b5ea036ca78144b86622cb0944d0b840f7225ec5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -533,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; - } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect * Return POLLOUT so application can call write() * in order for kernel to generate SYN+data @@ -1084,9 +1084,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); + struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && + uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; if (tp->fastopen_req) return -EALREADY; /* Another Fast Open is in progress */ @@ -1108,7 +1111,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, } } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; - err = __inet_stream_connect(sk->sk_socket, msg->msg_name, + err = __inet_stream_connect(sk->sk_socket, uaddr, msg->msg_namelen, flags, 1); /* fastopen_req could already be freed in __inet_stream_connect * if the connection times out or gets rst @@ -2296,6 +2299,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); + tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; @@ -2319,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); @@ -2373,9 +2381,10 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l return 0; } -static int tcp_repair_options_est(struct tcp_sock *tp, +static int tcp_repair_options_est(struct sock *sk, struct tcp_repair_opt __user *optbuf, unsigned int len) { + struct tcp_sock *tp = tcp_sk(sk); struct tcp_repair_opt opt; while (len >= sizeof(opt)) { @@ -2388,13 +2397,14 @@ static int tcp_repair_options_est(struct tcp_sock *tp, switch (opt.opt_code) { case TCPOPT_MSS: tp->rx_opt.mss_clamp = opt.opt_val; + tcp_mtup_init(sk); break; case TCPOPT_WINDOW: { u16 snd_wscale = opt.opt_val & 0xFFFF; u16 rcv_wscale = opt.opt_val >> 16; - if (snd_wscale > 14 || rcv_wscale > 14) + if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE) return -EFBIG; tp->rx_opt.snd_wscale = snd_wscale; @@ -2471,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, /* Values greater than interface MTU won't take effect. However * at the point when this call is done we typically don't yet * know which interface is going to be used */ - if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { + if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) { err = -EINVAL; break; } @@ -2547,7 +2557,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (!tp->repair) err = -EINVAL; else if (sk->sk_state == TCP_ESTABLISHED) - err = tcp_repair_options_est(tp, + err = tcp_repair_options_est(sk, (struct tcp_repair_opt __user *)optval, optlen); else @@ -2852,7 +2862,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_ssthresh = tp->snd_ssthresh; info->tcpi_advmss = tp->advmss; - info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; + info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 79c4817abc94d08265edb2dfa995e3e479148a16..324c9bcc5456b499b59cef40838b4e9829119e13 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -168,12 +168,8 @@ void tcp_assign_congestion_control(struct sock *sk) } out: rcu_read_unlock(); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - /* Clear out private data before diag gets it and - * the ca has not been initialized. - */ - if (ca->get_info) - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else @@ -184,6 +180,7 @@ void tcp_init_congestion_control(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); + tcp_sk(sk)->prior_ssthresh = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) @@ -200,11 +197,10 @@ static void tcp_reinit_congestion_control(struct sock *sk, tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - if (sk->sk_state != TCP_CLOSE) { - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (sk->sk_state != TCP_CLOSE) tcp_init_congestion_control(sk); - } } /* Manage refcounts on socket close. */ diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index c99230efcd52d2233ba22fbc117101c8e8cee113..0683ba447d775b6101a929a6aca3eb255cff8932 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -72,7 +72,7 @@ MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); module_param(hystart, int, 0644); MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); module_param(hystart_detect, int, 0644); -MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" +MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8ea4e9787f82ba65cd07b4c2b663df76fe4eb143..4af82b914dd4bbdc47e37cf1cf70f206bd186db5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -341,6 +341,13 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, cookie->len = -1; return false; } + + /* Firewall blackhole issue check */ + if (tcp_fastopen_active_should_disable(sk)) { + cookie->len = -1; + return false; + } + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { cookie->len = -1; return true; @@ -380,3 +387,98 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) return false; } EXPORT_SYMBOL(tcp_fastopen_defer_connect); + +/* + * The following code block is to deal with middle box issues with TFO: + * Middlebox firewall issues can potentially cause server's data being + * blackholed after a successful 3WHS using TFO. + * The proposed solution is to disable active TFO globally under the + * following circumstances: + * 1. client side TFO socket receives out of order FIN + * 2. client side TFO socket receives out of order RST + * We disable active side TFO globally for 1hr at first. Then if it + * happens again, we disable it for 2h, then 4h, 8h, ... + * And we reset the timeout back to 1hr when we see a successful active + * TFO connection with data exchanges. + */ + +/* Default to 1hr */ +unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60; +static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0); +static unsigned long tfo_active_disable_stamp __read_mostly; + +/* Disable active TFO and record current jiffies and + * tfo_active_disable_times + */ +void tcp_fastopen_active_disable(struct sock *sk) +{ + atomic_inc(&tfo_active_disable_times); + tfo_active_disable_stamp = jiffies; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE); +} + +/* Reset tfo_active_disable_times to 0 */ +void tcp_fastopen_active_timeout_reset(void) +{ + atomic_set(&tfo_active_disable_times, 0); +} + +/* Calculate timeout for tfo active disable + * Return true if we are still in the active TFO disable period + * Return false if timeout already expired and we should use active TFO + */ +bool tcp_fastopen_active_should_disable(struct sock *sk) +{ + int tfo_da_times = atomic_read(&tfo_active_disable_times); + int multiplier; + unsigned long timeout; + + if (!tfo_da_times) + return false; + + /* Limit timout to max: 2^6 * initial timeout */ + multiplier = 1 << min(tfo_da_times - 1, 6); + timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; + if (time_before(jiffies, tfo_active_disable_stamp + timeout)) + return true; + + /* Mark check bit so we can check for successful active TFO + * condition and reset tfo_active_disable_times + */ + tcp_sk(sk)->syn_fastopen_ch = 1; + return false; +} + +/* Disable active TFO if FIN is the only packet in the ofo queue + * and no data is received. + * Also check if we can reset tfo_active_disable_times if data is + * received successfully on a marked active TFO sockets opened on + * a non-loopback interface + */ +void tcp_fastopen_active_disable_ofo_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct rb_node *p; + struct sk_buff *skb; + struct dst_entry *dst; + + if (!tp->syn_fastopen) + return; + + if (!tp->data_segs_in) { + p = rb_first(&tp->out_of_order_queue); + if (p && !rb_next(p)) { + skb = rb_entry(p, struct sk_buff, rbnode); + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { + tcp_fastopen_active_disable(sk); + return; + } + } + } else if (tp->syn_fastopen_ch && + atomic_read(&tfo_active_disable_times)) { + dst = sk_dst_get(sk); + if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) + tcp_fastopen_active_timeout_reset(); + dst_release(dst); + } +} diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 659d1baefb2bba36d96e412eb7ca5a02996fb6dd..174d4376baa5374c11caecc0e0452fa938d63561 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); -EXPORT_SYMBOL(sysctl_tcp_timestamps); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -442,7 +441,8 @@ void tcp_init_buffer_space(struct sock *sk) tcp_sndbuf_expand(sk); tp->rcvq_space.space = tp->rcv_wnd; - tp->rcvq_space.time = tcp_time_stamp; + skb_mstamp_get(&tp->tcp_mstamp); + tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; maxwin = tcp_full_space(sk); @@ -518,7 +518,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss); */ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) { - u32 new_sample = tp->rcv_rtt_est.rtt; + u32 new_sample = tp->rcv_rtt_est.rtt_us; long m = sample; if (m == 0) @@ -548,21 +548,23 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) new_sample = m << 3; } - if (tp->rcv_rtt_est.rtt != new_sample) - tp->rcv_rtt_est.rtt = new_sample; + tp->rcv_rtt_est.rtt_us = new_sample; } static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) { - if (tp->rcv_rtt_est.time == 0) + u32 delta_us; + + if (tp->rcv_rtt_est.time.v64 == 0) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; - tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1); + delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time); + tcp_rcv_rtt_update(tp, delta_us, 1); new_measure: tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; - tp->rcv_rtt_est.time = tcp_time_stamp; + tp->rcv_rtt_est.time = tp->tcp_mstamp; } static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, @@ -572,7 +574,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) - tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0); + tcp_rcv_rtt_update(tp, + jiffies_to_usecs(tcp_time_stamp - + tp->rx_opt.rcv_tsecr), + 0); } /* @@ -585,8 +590,8 @@ void tcp_rcv_space_adjust(struct sock *sk) int time; int copied; - time = tcp_time_stamp - tp->rcvq_space.time; - if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) + time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time); + if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) return; /* Number of bytes copied to user in last RTT */ @@ -642,7 +647,7 @@ void tcp_rcv_space_adjust(struct sock *sk) new_measure: tp->rcvq_space.seq = tp->copied_seq; - tp->rcvq_space.time = tcp_time_stamp; + tp->rcvq_space.time = tp->tcp_mstamp; } /* There is something which you must keep in mind when you analyze the @@ -1131,7 +1136,6 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; - struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */ struct rate_sample *rate; int flag; }; @@ -1175,13 +1179,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, */ if (pkt_len > mss) { unsigned int new_len = (pkt_len / mss) * mss; - if (!in_sack && new_len < pkt_len) { + if (!in_sack && new_len < pkt_len) new_len += mss; - if (new_len >= skb->len) - return 0; - } pkt_len = new_len; } + + if (pkt_len >= skb->len && !in_sack) + return 0; + err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; @@ -1214,8 +1219,7 @@ static u8 tcp_sacktag_one(struct sock *sk, return sacked; if (!(sacked & TCPCB_SACKED_ACKED)) { - tcp_rack_advance(tp, sacked, end_seq, - xmit_time, &state->ack_time); + tcp_rack_advance(tp, sacked, end_seq, xmit_time); if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, @@ -2760,8 +2764,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked) return false; } -static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag, - const struct skb_mstamp *ack_time) +static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2769,7 +2772,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag, if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { u32 prior_retrans = tp->retrans_out; - tcp_rack_mark_lost(sk, ack_time); + tcp_rack_mark_lost(sk); if (prior_retrans > tp->retrans_out) *ack_flag |= FLAG_LOST_RETRANS; } @@ -2788,8 +2791,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag, * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, const int acked, - bool is_dupack, int *ack_flag, int *rexmit, - const struct skb_mstamp *ack_time) + bool is_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2857,11 +2859,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, tcp_try_keep_open(sk); return; } - tcp_rack_identify_loss(sk, ack_flag, ack_time); + tcp_rack_identify_loss(sk, ack_flag); break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack, rexmit); - tcp_rack_identify_loss(sk, ack_flag, ack_time); + tcp_rack_identify_loss(sk, ack_flag); if (!(icsk->icsk_ca_state == TCP_CA_Open || (*ack_flag & FLAG_LOST_RETRANS))) return; @@ -2877,7 +2879,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); - tcp_rack_identify_loss(sk, ack_flag, ack_time); + tcp_rack_identify_loss(sk, ack_flag); if (!tcp_time_to_recover(sk, flag)) { tcp_try_to_open(sk, flag); return; @@ -3059,8 +3061,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, { const struct inet_connection_sock *icsk = inet_csk(sk); struct skb_mstamp first_ackt, last_ackt; - struct skb_mstamp *now = &sack->ack_time; struct tcp_sock *tp = tcp_sk(sk); + struct skb_mstamp *now = &tp->tcp_mstamp; u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; bool fully_acked = true; @@ -3120,8 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->delivered += acked_pcount; if (!tcp_skb_spurious_retrans(tp, skb)) tcp_rack_advance(tp, sacked, scb->end_seq, - &skb->skb_mstamp, - &sack->ack_time); + &skb->skb_mstamp); } if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; @@ -3189,7 +3190,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets) + if (reord < prior_fackets && reord <= tp->fackets_out) tcp_update_reordering(sk, tp->fackets_out - reord, 0); delta = tcp_is_fack(tp) ? pkts_acked : @@ -3576,8 +3577,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - skb_mstamp_get(&sack_state.ack_time); - if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -3647,8 +3646,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, - &sack_state.ack_time); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); } if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -3660,8 +3658,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ - tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time, - sack_state.rate); + tcp_rate_gen(sk, delivered, lost, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); return 1; @@ -3669,8 +3666,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, - &sack_state.ack_time); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3691,11 +3687,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * If data was DSACKed, see if we can undo a cwnd reduction. */ if (TCP_SKB_CB(skb)->sacked) { - skb_mstamp_get(&sack_state.ack_time); flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit, - &sack_state.ack_time); + tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); tcp_xmit_recovery(sk, rexmit); } @@ -3768,11 +3762,12 @@ void tcp_parse_options(const struct sk_buff *skb, !estab && sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; - if (snd_wscale > 14) { - net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n", + if (snd_wscale > TCP_MAX_WSCALE) { + net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n", __func__, - snd_wscale); - snd_wscale = 14; + snd_wscale, + TCP_MAX_WSCALE); + snd_wscale = TCP_MAX_WSCALE; } opt_rx->snd_wscale = snd_wscale; } @@ -4007,10 +4002,10 @@ void tcp_reset(struct sock *sk) /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); + tcp_done(sk); + if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); - - tcp_done(sk); } /* @@ -5299,8 +5294,16 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (rst_seq_match) tcp_reset(sk); - else + else { + /* Disable TFO if RST is out-of-order + * and no data has been received + * for current active TFO socket + */ + if (tp->syn_fastopen && !tp->data_segs_in && + sk->sk_state == TCP_ESTABLISHED) + tcp_fastopen_active_disable(sk); tcp_send_challenge_ack(sk, skb); + } goto discard; } @@ -5353,6 +5356,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); + skb_mstamp_get(&tp->tcp_mstamp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* @@ -5579,10 +5583,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) else tp->pred_flags = 0; - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); - } } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -5651,6 +5651,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; + bool fastopen_fail; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) @@ -5754,10 +5755,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_finish_connect(sk, skb); - if ((tp->syn_fastopen || tp->syn_data) && - tcp_rcv_fastopen_synack(sk, skb, &foc)) - return -1; + fastopen_fail = (tp->syn_fastopen || tp->syn_data) && + tcp_rcv_fastopen_synack(sk, skb, &foc); + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + } + if (fastopen_fail) + return -1; if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_ack.pingpong) { @@ -5911,6 +5917,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_SYN_SENT: tp->rx_opt.saw_tstamp = 0; + skb_mstamp_get(&tp->tcp_mstamp); queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5922,6 +5929,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; } + skb_mstamp_get(&tp->tcp_mstamp); tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { @@ -6041,9 +6049,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; } - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + if (tp->linger2 < 0) { + tcp_done(sk); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { + /* Receive out of order FIN after close() */ + if (tp->syn_fastopen && th->fin) + tcp_fastopen_active_disable(sk); tcp_done(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; @@ -6332,37 +6347,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - if (isn && tmp_opt.tstamp_ok) - af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + if (tmp_opt.tstamp_ok) + tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); if (!want_cookie && !isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { - bool strict; - - dst = af_ops->route_req(sk, &fl, req, &strict); - - if (dst && strict && - !tcp_peer_is_proven(req, dst, true, - tmp_opt.saw_tstamp)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } /* Kill the following clause, if you dislike this way. */ - else if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false, - tmp_opt.saw_tstamp)) { + if (!net->ipv4.sysctl_tcp_syncookies && + (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. @@ -6375,10 +6368,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + isn = af_ops->init_seq(skb); } if (!dst) { - dst = af_ops->route_req(sk, &fl, req, NULL); + dst = af_ops->route_req(sk, &fl, req); if (!dst) goto drop_and_free; } @@ -6387,7 +6380,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - tcp_rsk(req)->ts_off = 0; req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 575e19dcc01763ef3fa938dea3ea51995b573163..5ab2aac5ca191075383fc75214da816873bb222c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v4_init_seq(const struct sk_buff *skb) { - return secure_tcp_sequence_number(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcp_seq(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcp_ts_off(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 *fl4; struct rtable *rt; int err; - u32 seq; struct ip_options_rcu *inet_opt; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -198,10 +203,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (tcp_death_row->sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) - tcp_fetch_timewait_stamp(sk, &rt->dst); - inet->inet_dport = usin->sin_port; sk_daddr_set(sk, daddr); @@ -236,13 +237,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; if (likely(!tp->repair)) { - seq = secure_tcp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcp_seq(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); + tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + inet->inet_daddr); } inet->inet_id = tp->write_seq ^ jiffies; @@ -1217,19 +1218,9 @@ static void tcp_v4_init_req(struct request_sock *req, static struct dst_entry *tcp_v4_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); - - if (strict) { - if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) - *strict = true; - else - *strict = false; - } - - return dst; + return inet_csk_route_req(sk, &fl->u.ip4, req); } struct request_sock_ops tcp_request_sock_ops __read_mostly = { @@ -1253,7 +1244,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_sequence, + .init_seq = tcp_v4_init_seq, + .init_ts_off = tcp_v4_init_ts_off, .send_synack = tcp_v4_send_synack, }; @@ -1423,8 +1415,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (!nsk) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1871,6 +1861,9 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Cleanup up the write buffer. */ tcp_write_queue_purge(sk); + /* Check if we want to disable active TFO */ + tcp_fastopen_active_disable_ofo_check(sk); + /* Cleans up our, hopefully empty, out_of_order_queue. */ skb_rbtree_purge(&tp->out_of_order_queue); @@ -2402,7 +2395,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, @@ -2466,7 +2459,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 0; cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 046fd3910873306d74207615d6997e1c847ea361..d6fb6c067af4641f232b94e7c590c212648e8173 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -264,13 +264,15 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); + u32 delta; if (sample->rtt_us > 0) tcp_lp_rtt_sample(sk, sample->rtt_us); /* calc inference */ - if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) - lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); + delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + if ((s32)delta > 0) + lp->inference = 3 * delta; /* test if within inference */ if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0f46e5fe31ad1b6809ada1f70bce7b63df4f8c9c..653bbd67e3a39b68d27d26d17571c00ce2854bfd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -45,8 +45,6 @@ struct tcp_metrics_block { struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; - u32 tcpm_ts; - u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; struct tcp_fastopen_metrics tcpm_fastopen; @@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); - tm->tcpm_ts = 0; - tm->tcpm_ts_stamp = 0; if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; @@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return tm; } -static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) -{ - struct tcp_metrics_block *tm; - struct inetpeer_addr saddr, daddr; - unsigned int hash; - struct net *net; - - if (tw->tw_family == AF_INET) { - inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); - inetpeer_set_addr_v4(&daddr, tw->tw_daddr); - hash = ipv4_addr_hash(tw->tw_daddr); - } -#if IS_ENABLED(CONFIG_IPV6) - else if (tw->tw_family == AF_INET6) { - if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) { - inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); - inetpeer_set_addr_v4(&daddr, tw->tw_daddr); - hash = ipv4_addr_hash(tw->tw_daddr); - } else { - inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr); - inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr); - hash = ipv6_addr_hash(&tw->tw_v6_daddr); - } - } -#endif - else - return NULL; - - net = twsk_net(tw); - hash ^= net_hash_mix(net); - hash = hash_32(hash, tcp_metrics_hash_log); - - for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; - tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr) && - net_eq(tm_net(tm), net)) - break; - } - return tm; -} - static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct dst_entry *dst, bool create) @@ -573,8 +527,7 @@ void tcp_init_metrics(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, - bool paws_check, bool timestamps) +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; bool ret; @@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, rcu_read_lock(); tm = __tcp_get_metrics_req(req, dst); - if (paws_check) { - if (tm && - (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && - ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW || - !timestamps)) - ret = false; - else - ret = true; - } else { - if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) - ret = true; - else - ret = false; - } - rcu_read_unlock(); - - return ret; -} - -void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) -{ - struct tcp_metrics_block *tm; - - rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); - if (tm) { - struct tcp_sock *tp = tcp_sk(sk); - - if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; - tp->rx_opt.ts_recent = tm->tcpm_ts; - } - } - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); - -/* VJ's idea. Save last timestamp seen from this destination and hold - * it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter - * synchronized state. - */ -bool tcp_remember_stamp(struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_get(sk); - bool ret = false; - - if (dst) { - struct tcp_metrics_block *tm; - - rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); - if (tm) { - struct tcp_sock *tp = tcp_sk(sk); - - if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && - tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - tm->tcpm_ts = tp->rx_opt.ts_recent; - } - ret = true; - } - rcu_read_unlock(); - } - return ret; -} - -bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) -{ - struct tcp_metrics_block *tm; - bool ret = false; - - rcu_read_lock(); - tm = __tcp_get_metrics_tw(tw); - if (tm) { - const struct tcp_timewait_sock *tcptw; - struct sock *sk = (struct sock *) tw; - - tcptw = tcp_twsk(sk); - if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && - tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - tm->tcpm_ts = tcptw->tw_ts_recent; - } + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) ret = true; - } + else + ret = false; rcu_read_unlock(); return ret; @@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, jiffies - tm->tcpm_stamp, TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; - if (tm->tcpm_ts_stamp) { - if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, - (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) - goto nla_put_failure; - if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, - tm->tcpm_ts) < 0) - goto nla_put_failure; - } { int n = 0; @@ -1150,10 +1011,7 @@ static int __net_init tcp_net_metrics_init(struct net *net) tcp_metrics_hash_log = order_base_2(slots); size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log; - tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!tcp_metrics_hash) - tcp_metrics_hash = vzalloc(size); - + tcp_metrics_hash = kvzalloc(size, GFP_KERNEL); if (!tcp_metrics_hash) return -ENOMEM; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 65c0f3d13eca47c6394c09925decf54287d01b48..717be4de53248352c758b50557987d898340dd4f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -26,6 +26,7 @@ #include #include #include +#include int sysctl_tcp_abort_on_overflow __read_mostly; @@ -94,7 +95,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; - struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { @@ -149,12 +149,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - if (tcp_death_row->sysctl_tw_recycle && - tcptw->tw_ts_recent_stamp && - tcp_tw_remember_stamp(tw)) - inet_twsk_reschedule(tw, tw->tw_timeout); - else - inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -259,12 +254,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct inet_timewait_sock *tw; - bool recycle_ok = false; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = tcp_remember_stamp(sk); - tw = inet_twsk_alloc(sk, tcp_death_row, state); if (tw) { @@ -317,13 +308,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (timeo < rto) timeo = rto; - if (recycle_ok) { - tw->tw_timeout = rto; - } else { - tw->tw_timeout = TCP_TIMEWAIT_LEN; - if (state == TCP_TIME_WAIT) - timeo = TCP_TIMEWAIT_LEN; - } + tw->tw_timeout = TCP_TIMEWAIT_LEN; + if (state == TCP_TIME_WAIT) + timeo = TCP_TIMEWAIT_LEN; inet_twsk_schedule(tw, timeo); /* Linkage updates. */ @@ -536,6 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; tcp_ecn_openreq_child(newtp, req); + newtp->fastopen_req = NULL; newtp->fastopen_rsk = NULL; newtp->syn_data_acked = 0; newtp->rack.mstamp.v64 = 0; @@ -813,6 +801,9 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + /* record NAPI ID of child */ + sk_mark_napi_id(child, skb); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c3c082ed38796f65948e7a1042e37813b71d5abf..4858e190f6ac130c9441f58cb8944cc82bf67270 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -212,12 +212,12 @@ void tcp_select_initial_window(int __space, __u32 mss, /* If no clamp set the clamp to the max possible scaled window */ if (*window_clamp == 0) - (*window_clamp) = (65535 << 14); + (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); space = min(*window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) - space = (space / mss) * mss; + space = rounddown(space, mss); /* NOTE: offering an initial window larger than 32767 * will break some buggy TCP stacks. If the admin tells us @@ -234,13 +234,11 @@ void tcp_select_initial_window(int __space, __u32 mss, (*rcv_wscale) = 0; if (wscale_ok) { - /* Set window scaling on max possible window - * See RFC1323 for an explanation of the limit to 14 - */ + /* Set window scaling on max possible window */ space = max_t(u32, space, sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); - while (space > 65535 && (*rcv_wscale) < 14) { + while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { space >>= 1; (*rcv_wscale)++; } @@ -253,7 +251,7 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set the clamp no higher than max representable value */ - (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); + (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); } EXPORT_SYMBOL(tcp_select_initial_window); @@ -1267,7 +1265,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, * eventually). The difference is that pulled data not copied, but * immediately discarded. */ -static void __pskb_trim_head(struct sk_buff *skb, int len) +static int __pskb_trim_head(struct sk_buff *skb, int len) { struct skb_shared_info *shinfo; int i, k, eat; @@ -1277,7 +1275,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) __skb_pull(skb, eat); len -= eat; if (!len) - return; + return 0; } eat = len; k = 0; @@ -1303,23 +1301,28 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) skb_reset_tail_pointer(skb); skb->data_len -= len; skb->len = skb->data_len; + return len; } /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { + u32 delta_truesize; + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; - __pskb_trim_head(skb, len); + delta_truesize = __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; - skb->truesize -= len; - sk->sk_wmem_queued -= len; - sk_mem_uncharge(sk, len); - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + if (delta_truesize) { + skb->truesize -= delta_truesize; + sk->sk_wmem_queued -= delta_truesize; + sk_mem_uncharge(sk, delta_truesize); + sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + } /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) @@ -1511,6 +1514,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) { + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; struct tcp_sock *tp = tcp_sk(sk); /* Track the maximum number of outstanding packets in each @@ -1533,7 +1537,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) tp->snd_cwnd_used = tp->packets_out; if (sysctl_tcp_slow_start_after_idle && - (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) + (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && + !ca_ops->cong_control) tcp_cwnd_application_limited(sk); /* The following conditions together indicate the starvation @@ -2561,7 +2566,6 @@ u32 __tcp_select_window(struct sock *sk) /* Don't do rounding if we are using window scaling, since the * scaled window will not line up with the MSS boundary anyway. */ - window = tp->rcv_wnd; if (tp->rx_opt.rcv_wscale) { window = free_space; @@ -2569,10 +2573,9 @@ u32 __tcp_select_window(struct sock *sk) * Import case: prevent zero window announcement if * 1< mss. */ - if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window) - window = (((window >> tp->rx_opt.rcv_wscale) + 1) - << tp->rx_opt.rcv_wscale); + window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale)); } else { + window = tp->rcv_wnd; /* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the @@ -2582,7 +2585,7 @@ u32 __tcp_select_window(struct sock *sk) * is too small. */ if (window <= free_space - mss || window > free_space) - window = (free_space / mss) * mss; + window = rounddown(free_space, mss); else if (mss == full_space && free_space > window + (full_space >> 1)) window = free_space; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 9be1581a5a08c36f4544fbdabedd9741fb266a1e..c6a9fa8946462100947ab62d86464ff8f99565c2 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -106,7 +106,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - struct skb_mstamp *now, struct rate_sample *rs) + struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; @@ -120,7 +120,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * to carry current time, flags, stats like "tcp_sacktag_state". */ if (delivered) - tp->delivered_mstamp = *now; + tp->delivered_mstamp = tp->tcp_mstamp; rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ @@ -138,7 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */ + ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp, + &rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); /* Normally we expect interval_us >= min-rtt. diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index d8acbd9f477a2ac6b0f8eee1bf59f3ab43abff07..362b8c75bfab44cf87c2a01398a146a271bc1119 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -45,8 +45,7 @@ static bool tcp_rack_sent_after(const struct skb_mstamp *t1, * or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will * make us enter the CA_Recovery state. */ -static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, - u32 *reo_timeout) +static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -79,7 +78,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, * A packet is lost if its elapsed time is beyond * the recent RTT plus the reordering window. */ - u32 elapsed = skb_mstamp_us_delta(now, + u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp, &skb->skb_mstamp); s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed; @@ -105,7 +104,7 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, } } -void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) +void tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout; @@ -115,7 +114,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; - tcp_rack_detect_loss(sk, now, &timeout); + tcp_rack_detect_loss(sk, &timeout); if (timeout) { timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, @@ -128,8 +127,7 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) * draft-cheng-tcpm-rack-00.txt */ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, - const struct skb_mstamp *xmit_time, - const struct skb_mstamp *ack_time) + const struct skb_mstamp *xmit_time) { u32 rtt_us; @@ -138,7 +136,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, end_seq, tp->rack.end_seq)) return; - rtt_us = skb_mstamp_us_delta(ack_time, xmit_time); + rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time); if (sacked & TCPCB_RETRANS) { /* If the sacked packet was retransmitted, it's ambiguous * whether the retransmission or the original (or the prior @@ -165,12 +163,11 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, void tcp_rack_reo_timeout(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct skb_mstamp now; u32 timeout, prior_inflight; - skb_mstamp_get(&now); prior_inflight = tcp_packets_in_flight(tp); - tcp_rack_detect_loss(sk, &now, &timeout); + skb_mstamp_get(&tp->tcp_mstamp); + tcp_rack_detect_loss(sk, &timeout); if (prior_inflight != tcp_packets_in_flight(tp)) { if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) { tcp_enter_recovery(sk, false); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b2ab411c6d3728fa7dbdebde045532a7317f5166..14672543cf0bd27bc59976d5cec38d2d3bbcdd2c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -201,11 +201,10 @@ static int tcp_write_timeout(struct sock *sk) if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) { /* Some middle-boxes may black-hole Fast Open _after_ * the handshake. Therefore we conservatively disable - * Fast Open on this path on recurring timeouts with - * few or zero bytes acked after Fast Open. + * Fast Open on this path on recurring timeouts after + * successful Fast Open. */ - if (tp->syn_data_acked && - tp->bytes_acked <= tp->rx_opt.mss_clamp) { + if (tp->syn_data_acked) { tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) NET_INC_STATS(sock_net(sk), diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index fed66dc0e0f5f242cf0af25434fa9cfa89998958..9775453b8d174c848dc09df83d1fa185422cd8cc 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -265,8 +265,8 @@ static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr, if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = 1; info->vegas.tcpv_rttcnt = 0; - info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt), - info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), + info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt); + info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min); *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ea6e4cff9fafe99af23fd8ea666cd979d5af9104..1d6219bf2d6b48abaa73ad7f178049d95124cc90 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1612,7 +1612,7 @@ static void udp_v4_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1657,7 +1657,7 @@ EXPORT_SYMBOL(udp_encap_enable); * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index feb50a16398dfa856fd928fe823b4f6556d2caa1..a8cf8c6fb60ccf532fda1b109f902ead378acf22 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -25,7 +25,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); -int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b2be1d9757efb8ce8b82dc0a0fe3a475d193ea5b..781250151d40ee4559f7b90d15dccad8ffaeafd0 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, u16 mac_len = skb->mac_len; int udp_offset, outer_hlen; __wsum partial; + bool need_ipsec; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); + need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && + !need_ipsec && (skb->dev->features & (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 4acc0508c5ebc65dc392de50a207901b2ea8d305..3d36644890bb6d3b0a755c811c60e920ad5cd8b8 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -12,6 +12,7 @@ #include #include #include +#include /* Add encapsulation header. * @@ -23,6 +24,8 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); @@ -56,9 +59,40 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) return 0; } +static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len); + + if (xo->flags & XFRM_GSO_SEGMENT) { + skb_reset_transport_header(skb); + skb->transport_header -= x->props.header_len; + } +} + static struct xfrm_mode xfrm4_transport_mode = { .input = xfrm4_transport_input, .output = xfrm4_transport_output, + .gso_segment = xfrm4_transport_gso_segment, + .xmit = xfrm4_transport_xmit, .owner = THIS_MODULE, .encap = XFRM_MODE_TRANSPORT, }; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 35feda67646494c92263cf30109432fb395fa1df..e6265e2c274e48a54cfc08afb9fee6c9fc2ad194 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -33,6 +33,9 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *top_iph; int flags; + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); @@ -96,11 +99,36 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) return err; } +static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); + +} + +static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo->flags & XFRM_GSO_SEGMENT) { + skb->network_header = skb->network_header - x->props.header_len; + skb->transport_header = skb->network_header + + sizeof(struct iphdr); + } + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + x->props.header_len); +} + static struct xfrm_mode xfrm4_tunnel_mode = { .input2 = xfrm4_mode_tunnel_input, .input = xfrm_prepare_input, .output2 = xfrm4_mode_tunnel_output, .output = xfrm4_prepare_output, + .gso_segment = xfrm4_mode_tunnel_gso_segment, + .xmit = xfrm4_mode_tunnel_xmit, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, .flags = XFRM_MODE_FLAG_TUNNEL, diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 7ee6518afa86ff785cacda0f115297ff6e5d0fa5..94b8702603bc54f36542977ed0d9c3b93d43b6b7 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -29,7 +29,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) goto out; mtu = dst_mtu(skb_dst(skb)); - if (skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) { skb->protocol = htons(ETH_P_IP); if (skb->sk) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e2afe677a9d944a2c6c27a2e7b2d06227712cf89..48c452959d2c2fe687472c3732fe40246a8c863a 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL bool "IPv6: Segment Routing Header encapsulation support" depends on IPV6 select LWTUNNEL + select DST_CACHE ---help--- Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 80ce478c4851318f6eef9320bbdb8548641f920c..6a4fb1e629fb7609048156974ae2eb322cebddae 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -224,6 +224,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -245,6 +246,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { #endif .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, + .disable_policy = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -276,6 +278,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rtr_pref = 1, .rtr_probe_interval = 60 * HZ, #ifdef CONFIG_IPV6_ROUTE_INFO + .accept_ra_rt_info_min_plen = 0, .accept_ra_rt_info_max_plen = 0, #endif #endif @@ -297,6 +300,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #endif .enhanced_dad = 1, .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, + .disable_policy = 0, }; /* Check if a valid qdisc is available */ @@ -545,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; + if (!devconf) + goto out; + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; @@ -563,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->ignore_routes_with_linkdown) < 0) goto nla_put_failure; +out: nlmsg_end(skb, nlh); return 0; @@ -571,8 +579,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, return -EMSGSIZE; } -void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf) +void inet6_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv6_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; @@ -582,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, - RTM_NEWNETCONF, 0, type); + event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -603,7 +611,8 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -616,7 +625,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy); + devconf_ipv6_policy, extack); if (err < 0) goto errout; @@ -765,7 +774,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } - inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, + NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); } @@ -800,7 +810,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); rtnl_unlock(); @@ -812,13 +823,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net->ipv6.devconf_dflt->forwarding = newf; if ((!newf) ^ (!old_dflt)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); addrconf_forward_change(net, newf); if ((!newf) ^ (!old)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) @@ -843,6 +856,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) idev->cnf.ignore_routes_with_linkdown = newf; if (changed) inet6_netconf_notify_devconf(dev_net(dev), + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, dev->ifindex, &idev->cnf); @@ -865,6 +879,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); @@ -877,6 +892,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); @@ -944,6 +960,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { + struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; unsigned int hash; @@ -990,6 +1007,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } + if (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy) + rt->dst.flags |= DST_NOPOLICY; + neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *addr; @@ -1001,7 +1022,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; - ifa->flags = flags | IFA_F_TENTATIVE; + ifa->flags = flags; + /* No need to add the TENTATIVE flag for addresses with NODAD */ + if (!(flags & IFA_F_NODAD)) + ifa->flags |= IFA_F_TENTATIVE; ifa->valid_lft = valid_lft; ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; @@ -2053,12 +2077,23 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) __ipv6_dev_ac_dec(ifp->idev, &addr); } -static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) +static int addrconf_ifid_6lowpan(u8 *eui, struct net_device *dev) { - if (dev->addr_len != EUI64_ADDR_LEN) + switch (dev->addr_len) { + case ETH_ALEN: + memcpy(eui, dev->dev_addr, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + memcpy(eui + 5, dev->dev_addr + 3, 3); + break; + case EUI64_ADDR_LEN: + memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN); + eui[0] ^= 2; + break; + default: return -1; - memcpy(eui, dev->dev_addr, EUI64_ADDR_LEN); - eui[0] ^= 2; + } + return 0; } @@ -2150,7 +2185,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) case ARPHRD_TUNNEL: return addrconf_ifid_gre(eui, dev); case ARPHRD_6LOWPAN: - return addrconf_ifid_eui64(eui, dev); + return addrconf_ifid_6lowpan(eui, dev); case ARPHRD_IEEE1394: return addrconf_ifid_ieee1394(eui, dev); case ARPHRD_TUNNEL6: @@ -3271,14 +3306,24 @@ static void addrconf_gre_config(struct net_device *dev) static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - if (!ifp->rt) { - struct rt6_info *rt; + /* rt6i_ref == 0 means the host route was removed from the + * FIB, for example, if 'lo' device is taken down. In that + * case regenerate the host route. + */ + if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) { + struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); if (unlikely(IS_ERR(rt))) return PTR_ERR(rt); + /* ifp->rt can be accessed outside of rtnl */ + spin_lock(&ifp->lock); + prev = ifp->rt; ifp->rt = rt; + spin_unlock(&ifp->lock); + + ip6_rt_put(prev); } if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { @@ -3286,7 +3331,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev, idev->dev, 0, 0); } - addrconf_dad_start(ifp); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) + addrconf_dad_start(ifp); return 0; } @@ -3505,6 +3551,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, */ static struct notifier_block ipv6_dev_notf = { .notifier_call = addrconf_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY, }; static void addrconf_type_change(struct net_device *dev, unsigned long event) @@ -3641,7 +3688,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (keep) { /* set state to skip the notifier below */ state = INET6_IFADDR_STATE_DEAD; - ifa->state = 0; + ifa->state = INET6_IFADDR_STATE_PREDAD; if (!(ifa->flags & IFA_F_NODAD)) ifa->flags |= IFA_F_TENTATIVE; @@ -4382,7 +4429,8 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { }; static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -4391,7 +4439,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ifa_flags; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, + extack); if (err < 0) return err; @@ -4491,7 +4540,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, } static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -4503,7 +4553,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) u32 ifa_flags; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, + extack); if (err < 0) return err; @@ -4853,7 +4904,8 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; @@ -4864,7 +4916,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, + extack); if (err < 0) goto errout; @@ -4975,6 +5028,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_PROBE_INTERVAL] = jiffies_to_msecs(cnf->rtr_probe_interval); #ifdef CONFIG_IPV6_ROUTE_INFO + array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen; array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif @@ -5006,6 +5060,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #endif array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; + array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; } static inline size_t inet6_ifla6_size(void) @@ -5232,7 +5287,8 @@ static int inet6_validate_link_af(const struct net_device *dev, if (dev && !__in6_dev_get(dev)) return -EAFNOSUPPORT; - return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy); + return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy, + NULL); } static int check_addr_gen_mode(int mode) @@ -5264,7 +5320,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!idev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) + if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET6_TOKEN]) { @@ -5667,17 +5723,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return restart_syscall(); if (valp == &net->ipv6.devconf_dflt->proxy_ndp) - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); else if (valp == &net->ipv6.devconf_all->proxy_ndp) - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); else { struct inet6_dev *idev = ctl->extra1; - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, idev->dev->ifindex, &idev->cnf); } @@ -5830,6 +5889,105 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static +void addrconf_set_nopolicy(struct rt6_info *rt, int action) +{ + if (rt) { + if (action) + rt->dst.flags |= DST_NOPOLICY; + else + rt->dst.flags &= ~DST_NOPOLICY; + } +} + +static +void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) +{ + struct inet6_ifaddr *ifa; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + spin_lock(&ifa->lock); + if (ifa->rt) { + struct rt6_info *rt = ifa->rt; + struct fib6_table *table = rt->rt6i_table; + int cpu; + + read_lock(&table->tb6_lock); + addrconf_set_nopolicy(ifa->rt, val); + if (rt->rt6i_pcpu) { + for_each_possible_cpu(cpu) { + struct rt6_info **rtp; + + rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu); + addrconf_set_nopolicy(*rtp, val); + } + } + read_unlock(&table->tb6_lock); + } + spin_unlock(&ifa->lock); + } + read_unlock_bh(&idev->lock); +} + +static +int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) +{ + struct inet6_dev *idev; + struct net *net; + + if (!rtnl_trylock()) + return restart_syscall(); + + *valp = val; + + net = (struct net *)ctl->extra2; + if (valp == &net->ipv6.devconf_dflt->disable_policy) { + rtnl_unlock(); + return 0; + } + + if (valp == &net->ipv6.devconf_all->disable_policy) { + struct net_device *dev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev) + addrconf_disable_policy_idev(idev, val); + } + } else { + idev = (struct inet6_dev *)ctl->extra1; + addrconf_disable_policy_idev(idev, val); + } + + rtnl_unlock(); + return 0; +} + +static +int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int *valp = ctl->data; + int val = *valp; + loff_t pos = *ppos; + struct ctl_table lctl; + int ret; + + lctl = *ctl; + lctl.data = &val; + ret = proc_dointvec(&lctl, write, buffer, lenp, ppos); + + if (write && (*valp != val)) + ret = addrconf_disable_policy(ctl, valp, val); + + if (ret) + *ppos = pos; + + return ret; +} + static int minus_one = -1; static const int one = 1; static const int two_five_five = 255; @@ -6017,6 +6175,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO + { + .procname = "accept_ra_rt_info_min_plen", + .data = &ipv6_devconf.accept_ra_rt_info_min_plen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, @@ -6187,6 +6352,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = addrconf_sysctl_addr_gen_mode, }, + { + .procname = "disable_policy", + .data = &ipv6_devconf.disable_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_disable_policy, + }, { /* sentinel */ } @@ -6227,7 +6399,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, ifindex = NETCONFA_IFINDEX_DEFAULT; else ifindex = idev->dev->ifindex; - inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, + ifindex, p); return 0; free: @@ -6236,7 +6409,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, return -ENOBUFS; } -static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) +static void __addrconf_sysctl_unregister(struct net *net, + struct ipv6_devconf *p, int ifindex) { struct ctl_table *table; @@ -6247,6 +6421,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) unregister_net_sysctl_table(p->sysctl_header); p->sysctl_header = NULL; kfree(table); + + inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int addrconf_sysctl_register(struct inet6_dev *idev) @@ -6270,7 +6446,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev) static void addrconf_sysctl_unregister(struct inet6_dev *idev) { - __addrconf_sysctl_unregister(&idev->cnf); + __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf, + idev->dev->ifindex); neigh_sysctl_unregister(idev->nd_parms); } @@ -6313,7 +6490,7 @@ static int __net_init addrconf_init_net(struct net *net) #ifdef CONFIG_SYSCTL err_reg_dflt: - __addrconf_sysctl_unregister(all); + __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: kfree(dflt); #endif @@ -6326,8 +6503,10 @@ static int __net_init addrconf_init_net(struct net *net) static void __net_exit addrconf_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL - __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); - __addrconf_sysctl_unregister(net->ipv6.devconf_all); + __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt, + NETCONFA_IFINDEX_DEFAULT); + __addrconf_sysctl_unregister(net, net->ipv6.devconf_all, + NETCONFA_IFINDEX_ALL); #endif kfree(net->ipv6.devconf_dflt); kfree(net->ipv6.devconf_all); @@ -6398,6 +6577,8 @@ int __init addrconf_init(void) goto errlo; } + ip6_route_init_special_entries(); + for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_addr_lst[i]); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index a8f6986dcbe5ea5a36a3cc6fcf51becebb50c33b..07cd7d248bb6822b787e4c74c8ef3d27d38e9d99 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -404,7 +404,8 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_LABEL] = { .len = sizeof(u32), }, }; -static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; @@ -413,7 +414,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) u32 label; int err = 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, + extack); if (err < 0) return err; @@ -521,7 +523,8 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; @@ -532,7 +535,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, + extack); if (err < 0) return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a9a9553ee63df8eb6e16e00d5da8c29406435350..a88b5b5b79558948dcbad84cbdda99c1d574102e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -933,8 +933,6 @@ static int __init inet6_init(void) if (err) goto igmp_fail; - ipv6_stub = &ipv6_stub_impl; - err = ipv6_netfilter_init(); if (err) goto netfilter_fail; @@ -1005,18 +1003,28 @@ static int __init inet6_init(void) if (err) goto seg6_fail; + err = igmp6_late_init(); + if (err) + goto igmp6_late_err; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) goto sysctl_fail; #endif + + /* ensure that ipv6 stubs are visible only after ipv6 is ready */ + wmb(); + ipv6_stub = &ipv6_stub_impl; out: return err; #ifdef CONFIG_SYSCTL sysctl_fail: - seg6_exit(); + igmp6_late_cleanup(); #endif +igmp6_late_err: + seg6_exit(); seg6_fail: calipso_exit(); calipso_fail: diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 37ac9de713c69af30ae50d03e53ee472a7520b98..8d772fea1ddecd427a66c18f34d50f969186f02a 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1319,7 +1319,7 @@ static int calipso_skbuff_setattr(struct sk_buff *skb, struct ipv6hdr *ip6_hdr; struct ipv6_opt_hdr *hop; unsigned char buf[CALIPSO_MAX_BUFFER]; - int len_delta, new_end, pad; + int len_delta, new_end, pad, payload; unsigned int start, end; ip6_hdr = ipv6_hdr(skb); @@ -1346,6 +1346,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb, if (ret_val < 0) return ret_val; + ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */ + if (len_delta) { if (len_delta > 0) skb_push(skb, len_delta); @@ -1355,6 +1357,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb, sizeof(*ip6_hdr) + start); skb_reset_network_header(skb); ip6_hdr = ipv6_hdr(skb); + payload = ntohs(ip6_hdr->payload_len); + ip6_hdr->payload_len = htons(payload + len_delta); } hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index eec27f87efaca15133cf1d5225e37e6a2f6a6f8a..e011122ebd43c190aec3812099345ec852444284 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) * At one point, excluding local errors was a quick test to identify icmp/icmp6 * errors. This is no longer true, but the test remained, so the v6 stack, * unlike v4, also honors cmsg requests on all wifi and timestamp errors. - * - * Timestamp code paths do not initialize the fields expected by cmsg: - * the PKTINFO fields in skb->cb[]. Fill those in here. */ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, struct sock_exterr_skb *serr) @@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) return false; - if (!skb->dev) + if (!IP6CB(skb)->iif) return false; - if (skb->protocol == htons(ETH_P_IPV6)) - IP6CB(skb)->iif = skb->dev->ifindex; - else - PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; - return true; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ff54faa756317047af5af661d070ba9a5eb65429..1fe99ba8066c8de2a5717f0b611a152c94837734 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -170,19 +170,23 @@ static void esp_output_restore_header(struct sk_buff *skb) } static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, + struct xfrm_state *x, struct ip_esp_hdr *esph, __be32 *seqhi) { - struct xfrm_state *x = skb_dst(skb)->xfrm; - /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + struct xfrm_offload *xo = xfrm_offload(skb); + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); *seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + if (xo) + esph->seq_no = htonl(xo->seq.hi); + else + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); } esph->spi = x->id.spi; @@ -214,61 +218,16 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) +int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { - int err; - struct ip_esp_hdr *esph; - struct crypto_aead *aead; - struct aead_request *req; - struct scatterlist *sg, *dsg; - struct sk_buff *trailer; - struct page *page; - void *tmp; - int blksize; - int clen; - int alen; - int plen; - int ivlen; - int tfclen; - int nfrags; - int assoclen; - int seqhilen; - int tailen; - u8 *iv; u8 *tail; u8 *vaddr; - __be32 *seqhi; - __be64 seqno; - __u8 proto = *skb_mac_header(skb); - - /* skb is pure payload to encrypt */ - aead = x->data; - alen = crypto_aead_authsize(aead); - ivlen = crypto_aead_ivsize(aead); - - tfclen = 0; - if (x->tfcpad) { - struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); - u32 padto; - - padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached)); - if (skb->len < padto) - tfclen = padto - skb->len; - } - blksize = ALIGN(crypto_aead_blocksize(aead), 4); - clen = ALIGN(skb->len + 2 + tfclen, blksize); - plen = clen - skb->len - tfclen; - tailen = tfclen + plen + alen; - - assoclen = sizeof(*esph); - seqhilen = 0; - - if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; - } + int nfrags; + struct page *page; + struct ip_esp_hdr *esph; + struct sk_buff *trailer; + int tailen = esp->tailen; - *skb_mac_header(skb) = IPPROTO_ESP; esph = ip_esp_hdr(skb); if (!skb_cloned(skb)) { @@ -284,6 +243,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct sock *sk = skb->sk; struct page_frag *pfrag = &x->xfrag; + esp->inplace = false; + allocsize = ALIGN(tailen, L1_CACHE_BYTES); spin_lock_bh(&x->lock); @@ -300,10 +261,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) tail = vaddr + pfrag->offset; - esp_output_fill_trailer(tail, tfclen, plen, proto); + esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); kunmap_atomic(vaddr); + spin_unlock_bh(&x->lock); + nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, @@ -319,108 +282,111 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) if (sk) atomic_add(tailen, &sk->sk_wmem_alloc); - skb_push(skb, -skb_network_offset(skb)); - - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - esph->spi = x->id.spi; - - tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen); - if (!tmp) { - spin_unlock_bh(&x->lock); - err = -ENOMEM; - goto error; - } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); - req = esp_tmp_req(aead, iv); - sg = esp_req_sg(aead, req); - dsg = &sg[nfrags]; - - esph = esp_output_set_esn(skb, esph, seqhi); - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); - - allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); - - if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { - spin_unlock_bh(&x->lock); - err = -ENOMEM; - goto error; - } - - skb_shinfo(skb)->nr_frags = 1; - - page = pfrag->page; - get_page(page); - /* replace page frags in skb with new page */ - __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); - pfrag->offset = pfrag->offset + allocsize; - - sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); - skb_to_sgvec(skb, dsg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); - - spin_unlock_bh(&x->lock); - - goto skip_cow2; + goto out; } } cow: - err = skb_cow_data(skb, tailen, &trailer); - if (err < 0) - goto error; - nfrags = err; - + nfrags = skb_cow_data(skb, tailen, &trailer); + if (nfrags < 0) + goto out; tail = skb_tail_pointer(trailer); - esph = ip_esp_hdr(skb); skip_cow: - esp_output_fill_trailer(tail, tfclen, plen, proto); + esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); + pskb_put(skb, trailer, tailen); - pskb_put(skb, trailer, clen - skb->len + alen); - skb_push(skb, -skb_network_offset(skb)); +out: + return nfrags; +} +EXPORT_SYMBOL_GPL(esp6_output_head); - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - esph->spi = x->id.spi; +int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +{ + u8 *iv; + int alen; + void *tmp; + int ivlen; + int assoclen; + int seqhilen; + __be32 *seqhi; + struct page *page; + struct ip_esp_hdr *esph; + struct aead_request *req; + struct crypto_aead *aead; + struct scatterlist *sg, *dsg; + int err = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); - if (!tmp) { - err = -ENOMEM; - goto error; + assoclen = sizeof(struct ip_esp_hdr); + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + seqhilen += sizeof(__be32); + assoclen += sizeof(__be32); } + aead = x->data; + alen = crypto_aead_authsize(aead); + ivlen = crypto_aead_ivsize(aead); + + tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen); + if (!tmp) + goto error; + seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); - dsg = sg; - esph = esp_output_set_esn(skb, esph, seqhi); + if (esp->inplace) + dsg = sg; + else + dsg = &sg[esp->nfrags]; - sg_init_table(sg, nfrags); + esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); + + sg_init_table(sg, esp->nfrags); skb_to_sgvec(skb, sg, (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); + assoclen + ivlen + esp->clen + alen); + + if (!esp->inplace) { + int allocsize; + struct page_frag *pfrag = &x->xfrag; + + allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES); + + spin_lock_bh(&x->lock); + if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { + spin_unlock_bh(&x->lock); + goto error; + } + + skb_shinfo(skb)->nr_frags = 1; + + page = pfrag->page; + get_page(page); + /* replace page frags in skb with new page */ + __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len); + pfrag->offset = pfrag->offset + allocsize; + spin_unlock_bh(&x->lock); + + sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1); + skb_to_sgvec(skb, dsg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + esp->clen + alen); + } -skip_cow2: if ((x->props.flags & XFRM_STATE_ESN)) aead_request_set_callback(req, 0, esp_output_done_esn, skb); else aead_request_set_callback(req, 0, esp_output_done, skb); - aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); + aead_request_set_crypt(req, sg, dsg, ivlen + esp->clen, iv); aead_request_set_ad(req, assoclen); - seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + - ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); - memset(iv, 0, ivlen); - memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), + memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&esp->seqno + 8 - min(ivlen, 8), min(ivlen, 8)); ESP_SKB_CB(skb)->tmp = tmp; @@ -446,10 +412,60 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) error: return err; } +EXPORT_SYMBOL_GPL(esp6_output_tail); -static int esp_input_done2(struct sk_buff *skb, int err) +static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int alen; + int blksize; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct esp_info esp; + + esp.inplace = true; + + esp.proto = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + /* skb is pure payload to encrypt */ + + aead = x->data; + alen = crypto_aead_authsize(aead); + + esp.tfclen = 0; + if (x->tfcpad) { + struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); + u32 padto; + + padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached)); + if (skb->len < padto) + esp.tfclen = padto - skb->len; + } + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); + esp.plen = esp.clen - skb->len - esp.tfclen; + esp.tailen = esp.tfclen + esp.plen + alen; + + esp.nfrags = esp6_output_head(x, skb, &esp); + if (esp.nfrags < 0) + return esp.nfrags; + + esph = ip_esp_hdr(skb); + esph->spi = x->id.spi; + + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + esp.seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); + + skb_push(skb, -skb_network_offset(skb)); + + return esp6_output_tail(x, skb, &esp); +} + +int esp6_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -458,7 +474,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) int padlen; u8 nexthdr[2]; - kfree(ESP_SKB_CB(skb)->tmp); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; @@ -492,12 +509,13 @@ static int esp_input_done2(struct sk_buff *skb, int err) out: return err; } +EXPORT_SYMBOL_GPL(esp6_input_done2); static void esp_input_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; - xfrm_input_resume(skb, esp_input_done2(skb, err)); + xfrm_input_resume(skb, esp6_input_done2(skb, err)); } static void esp_input_restore_header(struct sk_buff *skb) @@ -619,7 +637,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) if ((x->props.flags & XFRM_STATE_ESN)) esp_input_restore_header(skb); - ret = esp_input_done2(skb, ret); + ret = esp6_input_done2(skb, ret); out: return ret; @@ -682,13 +700,17 @@ static int esp_init_aead(struct xfrm_state *x) char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; + u32 mask = 0; err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) goto error; - aead = crypto_alloc_aead(aead_name, 0, 0); + if (x->xso.offload_handle) + mask |= CRYPTO_ALG_ASYNC; + + aead = crypto_alloc_aead(aead_name, 0, mask); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -718,6 +740,7 @@ static int esp_init_authenc(struct xfrm_state *x) char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; + u32 mask = 0; err = -EINVAL; if (!x->ealg) @@ -743,7 +766,10 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; } - aead = crypto_alloc_aead(authenc_name, 0, 0); + if (x->xso.offload_handle) + mask |= CRYPTO_ALG_ASYNC; + + aead = crypto_alloc_aead(authenc_name, 0, mask); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index d914eb93204adbe02dceca9254da2598b6c8d99b..d950d43ba255442cf3079546a46b95693029f10c 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -45,27 +45,31 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; - err = secpath_set(skb); - if (err) - goto out; + xo = xfrm_offload(skb); + if (!xo || !(xo->flags & CRYPTO_DONE)) { + err = secpath_set(skb); + if (err) + goto out; - if (skb->sp->len == XFRM_MAX_DEPTH) - goto out; + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, - (xfrm_address_t *)&ipv6_hdr(skb)->daddr, - spi, IPPROTO_ESP, AF_INET6); - if (!x) - goto out; + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); + if (!x) + goto out; - skb->sp->xvec[skb->sp->len++] = x; - skb->sp->olen++; + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; - xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); - goto out; + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } } + xo->flags |= XFRM_GRO; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; @@ -86,19 +90,216 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, return NULL; } +static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + int proto = iph->nexthdr; + + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + esph->spi = x->id.spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + + xo->proto = proto; +} + +static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + __u32 seq; + int err = 0; + struct sk_buff *skb2; + struct xfrm_state *x; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t esp_features = features; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (!xo) + goto out; + + seq = xo->seq.low; + + x = skb->sp->xvec[skb->sp->len - 1]; + aead = x->data; + esph = ip_esp_hdr(skb); + + if (esph->spi != x->id.spi) + goto out; + + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + goto out; + + __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); + + skb->encap_hdr_csum = 1; + + if (!(features & NETIF_F_HW_ESP)) + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + + segs = x->outer_mode->gso_segment(x, skb, esp_features); + if (IS_ERR_OR_NULL(segs)) + goto out; + + __skb_pull(skb, skb->data - skb_mac_header(skb)); + + skb2 = segs; + do { + struct sk_buff *nskb = skb2->next; + + xo = xfrm_offload(skb2); + xo->flags |= XFRM_GSO_SEGMENT; + xo->seq.low = seq; + xo->seq.hi = xfrm_replay_seqhi(x, seq); + + if(!(features & NETIF_F_HW_ESP)) + xo->flags |= CRYPTO_FALLBACK; + + x->outer_mode->xmit(x, skb2); + + err = x->type_offload->xmit(x, skb2, esp_features); + if (err) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + + if (!skb_is_gso(skb2)) + seq++; + else + seq += skb_shinfo(skb2)->gso_segs; + + skb_push(skb2, skb2->mac_len); + skb2 = nskb; + } while (skb2); + +out: + return segs; +} + +static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) +{ + struct crypto_aead *aead = x->data; + + if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead))) + return -EINVAL; + + skb->ip_summed = CHECKSUM_NONE; + + return esp6_input_done2(skb, 0); +} + +static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) +{ + int err; + int alen; + int blksize; + struct xfrm_offload *xo; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct esp_info esp; + bool hw_offload = true; + + esp.inplace = true; + + xo = xfrm_offload(skb); + + if (!xo) + return -EINVAL; + + if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || + (x->xso.dev != skb->dev)) { + xo->flags |= CRYPTO_FALLBACK; + hw_offload = false; + } + + esp.proto = xo->proto; + + /* skb is pure payload to encrypt */ + + aead = x->data; + alen = crypto_aead_authsize(aead); + + esp.tfclen = 0; + /* XXX: Add support for tfc padding here. */ + + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); + esp.plen = esp.clen - skb->len - esp.tfclen; + esp.tailen = esp.tfclen + esp.plen + alen; + + if (!hw_offload || (hw_offload && !skb_is_gso(skb))) { + esp.nfrags = esp6_output_head(x, skb, &esp); + if (esp.nfrags < 0) + return esp.nfrags; + } + + esph = ip_esp_hdr(skb); + esph->spi = x->id.spi; + + skb_push(skb, -skb_network_offset(skb)); + + if (xo->flags & XFRM_GSO_SEGMENT) { + esph->seq_no = htonl(xo->seq.low); + } else { + int len; + + len = skb->len - sizeof(struct ipv6hdr); + if (len > IPV6_MAXPLEN) + len = 0; + + ipv6_hdr(skb)->payload_len = htons(len); + } + + if (hw_offload) + return 0; + + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + + err = esp6_output_tail(x, skb, &esp); + if (err < 0) + return err; + + secpath_reset(skb); + + return 0; +} + static const struct net_offload esp6_offload = { .callbacks = { .gro_receive = esp6_gro_receive, + .gso_segment = esp6_gso_segment, }, }; +static const struct xfrm_type_offload esp6_type_offload = { + .description = "ESP6 OFFLOAD", + .owner = THIS_MODULE, + .proto = IPPROTO_ESP, + .input_tail = esp6_input_tail, + .xmit = esp6_xmit, + .encap = esp6_gso_encap, +}; + static int __init esp6_offload_init(void) { + if (xfrm_register_type_offload(&esp6_type_offload, AF_INET6) < 0) { + pr_info("%s: can't add xfrm type offload\n", __func__); + return -EAGAIN; + } + return inet6_add_offload(&esp6_offload, IPPROTO_ESP); } static void __exit esp6_offload_exit(void) { + if (xfrm_unregister_type_offload(&esp6_type_offload, AF_INET6) < 0) + pr_info("%s: can't remove xfrm type offload\n", __func__); + inet6_del_offload(&esp6_offload, IPPROTO_ESP); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 275cac628a95066f0a27e93f5015ddeb0172c28c..b636f1da9aece33532ccab5482aa72696f12db94 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -388,7 +388,6 @@ static int ipv6_srh_rcv(struct sk_buff *skb) icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); - kfree_skb(skb); return -1; } @@ -910,6 +909,8 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, { switch (opt->type) { case IPV6_SRCRT_TYPE_0: + case IPV6_SRCRT_STRICT: + case IPV6_SRCRT_TYPE_2: ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); break; case IPV6_SRCRT_TYPE_4: @@ -945,13 +946,13 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } -EXPORT_SYMBOL(ipv6_push_nfrag_opts); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) { if (opt->dst1opt) ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); } +EXPORT_SYMBOL(ipv6_push_frag_opts); struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) @@ -1164,6 +1165,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, switch (opt->srcrt->type) { case IPV6_SRCRT_TYPE_0: + case IPV6_SRCRT_STRICT: + case IPV6_SRCRT_TYPE_2: fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; break; case IPV6_SRCRT_TYPE_4: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 230b5aac9f03eadb775eea9cb3d9b4cce571cc32..8d7b113958b1332be11545069fafa8072a64e6a2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -491,7 +491,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!icmpv6_global_allow(type)) + if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type)) goto out_bh_enable; mip6_addr_swap(skb); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index ce1aae4a7fc8fa9daf9f1502d0ac77d2be2aee31..b3df03e3faa07d96f170e5ff0b535ebb16216c05 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -146,8 +146,7 @@ static int ila_build_state(struct nlattr *nla, return -EINVAL; } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, - ila_nl_policy); + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, NULL); if (ret < 0) return ret; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index af8f52ee7180868294930629f4f312c012a7a35c..77f7f8c7d93d67483f241616123380f9d8e6ba84 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -41,13 +41,7 @@ static int alloc_ila_locks(struct ila_net *ilan) size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); if (sizeof(spinlock_t) != 0) { -#ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) - ilan->locks = vmalloc(size * sizeof(spinlock_t)); - else -#endif - ilan->locks = kmalloc_array(size, sizeof(spinlock_t), - GFP_KERNEL); + ilan->locks = kvmalloc(size * sizeof(spinlock_t), GFP_KERNEL); if (!ilan->locks) return -ENOMEM; for (i = 0; i < size; i++) @@ -68,6 +62,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc) { u32 *v = (u32 *)loc.v32; + __ila_hash_secret_init(); return jhash_2words(v[0], v[1], hashrnd); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6fcb7cb49bb20dcfe518177177e3e0ac1c7d1091..64eea3962733a323fbae25b3c7b6de658a8cfdea 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -537,13 +537,14 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - dsfield = ipv4_get_dsfield(iph); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -596,13 +597,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -780,6 +785,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, t->parms.o_key = p->o_key; t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; + t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6gre_tnl_link_config(t, set_mtu); return 0; @@ -985,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev) dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } static void ip6gre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ip6gre_netdev_ops; - dev->destructor = ip6gre_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; dev->type = ARPHRD_IP6GRE; @@ -1142,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net) return 0; err_reg_dev: - ip6gre_dev_free(ign->fb_tunnel_dev); + free_netdev(ign->fb_tunnel_dev); err_alloc_dev: return err; } @@ -1249,6 +1255,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[], if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); + + if (data[IFLA_GRE_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); } static int ip6gre_tap_init(struct net_device *dev) @@ -1291,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &ip6gre_tap_netdev_ops; - dev->destructor = ip6gre_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; @@ -1470,6 +1480,8 @@ static size_t ip6gre_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_GRE_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_GRE_FWMARK */ + nla_total_size(4) + 0; } @@ -1490,7 +1502,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || - nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) || + nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, @@ -1525,6 +1538,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4bce1533b3f3b38e1173c18cb1bb6b33099..9ee208a348f559b27dcc56ee80bf9ad51630cb45 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,6 +49,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + void (*edemux)(struct sk_buff *skb); + /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ @@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && ipprot->early_demux) - ipprot->early_demux(skb); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) + edemux(skb); } if (!skb_valid_dst(skb)) ip6_route_input(skb); @@ -122,11 +124,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); /* * RFC4291 2.5.3 + * The loopback address must not be used as the source address in IPv6 + * packets that are sent outside of a single node. [..] * A packet received on an interface with a destination address * of loopback must be dropped. */ - if (!(dev->flags & IFF_LOOPBACK) && - ipv6_addr_loopback(&hdr->daddr)) + if ((ipv6_addr_loopback(&hdr->saddr) || + ipv6_addr_loopback(&hdr->daddr)) && + !(dev->flags & IFF_LOOPBACK)) goto err; /* RFC4291 Errata ID: 3480 diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 93e58a5e18374bee41f5a17f0c5911e381acb142..cdb3728faca7746d91e2430f6024f060a82b24fd 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, const struct net_offload *ops; int proto; struct frag_hdr *fptr; - unsigned int unfrag_ip6hlen; unsigned int payload_len; u8 *prevhdr; int offset = 0; @@ -116,8 +115,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, skb->network_header = (u8 *)ipv6h - skb->head; if (udpfrag) { - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); + int err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + fptr = (struct frag_hdr *)((u8 *)ipv6h + err); fptr->frag_off = htons(offset); if (skb->next) fptr->frag_off |= htons(IP6_MF); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 58f6288e9ba53e6964b74d71dde7615ead695c06..bf8a58a1c32d83a9605844075da5815be23a6bf1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -597,7 +597,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; - hlen = ip6_find_1stfragopt(skb, &prevhdr); + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + goto fail; + hlen = err; nexthdr = *prevhdr; mtu = ip6_skb_dst_mtu(skb); @@ -1463,6 +1466,11 @@ static int __ip6_append_data(struct sock *sk, */ alloclen += sizeof(struct frag_hdr); + copy = datalen - transhdrlen - fraggap; + if (copy < 0) { + err = -EINVAL; + goto error; + } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len, @@ -1512,13 +1520,9 @@ static int __ip6_append_data(struct sock *sk, data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; - - if (copy < 0) { - err = -EINVAL; - kfree_skb(skb); - goto error; - } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { + if (copy > 0 && + getfrag(from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 75fac933c209a0f430279dea10b5dd2426a7ed31..c3581973f5d7265a574ae69416a516526ed64e44 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev) gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } static int ip6_tnl_create2(struct net_device *dev) @@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) return t; failed_free: - ip6_dev_free(dev); + free_netdev(dev); failed: return ERR_PTR(err); } @@ -954,7 +953,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) opt->dst_opt[5] = IPV6_TLV_PADN; opt->dst_opt[6] = 1; - opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt; opt->ops.opt_nflen = 8; } @@ -1037,7 +1036,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct net_device_stats *stats = &t->dev->stats; - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; struct ipv6_tel_txoption opt; struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; @@ -1057,26 +1056,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { - struct in6_addr *addr6; - struct neighbour *neigh; - int addr_type; + if (skb->protocol == htons(ETH_P_IPV6)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; - if (!skb_dst(skb)) - goto tx_err_link_failure; + if (!skb_dst(skb)) + goto tx_err_link_failure; - neigh = dst_neigh_lookup(skb_dst(skb), - &ipv6_hdr(skb)->daddr); - if (!neigh) - goto tx_err_link_failure; + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; - addr6 = (struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); - if (addr_type == IPV6_ADDR_ANY) - addr6 = &ipv6_hdr(skb)->daddr; + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; - memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); - neigh_release(neigh); + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } } else if (!(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { /* enable the cache only only if the routing decision does @@ -1093,6 +1094,9 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, if (!dst) { route_lookup: + /* add dsfield to flowlabel for route lookup */ + fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel); + dst = ip6_route_output(net, NULL, fl6); if (dst->error) @@ -1176,7 +1180,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL); + ipv6_push_frag_opts(skb, &opt.ops, &proto); } /* Calculate max headroom for all the headers and adjust @@ -1194,7 +1198,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), + ip6_flow_hdr(ipv6h, dsfield, ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; @@ -1229,8 +1233,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - dsfield = ipv4_get_dsfield(iph); - if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -1244,6 +1246,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; + dsfield = ip6_tclass(key->label); } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; @@ -1252,10 +1255,13 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPIP; if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; + dsfield = ipv4_get_dsfield(iph); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -1263,6 +1269,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); + skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1296,8 +1304,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -1311,6 +1317,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; + dsfield = ip6_tclass(key->label); } else { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ @@ -1333,11 +1340,15 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_proto = IPPROTO_IPV6; if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + dsfield = ipv6_get_dsfield(ipv6h); + else + dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + else + fl6.flowi6_mark = t->parms.fwmark; } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); @@ -1345,6 +1356,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1467,6 +1480,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; + t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); return 0; @@ -1762,7 +1776,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; - dev->destructor = ip6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->flags |= IFF_NOARP; @@ -1918,6 +1933,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_COLLECT_METADATA]) parms->collect_md = true; + + if (data[IFLA_IPTUN_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -2054,6 +2072,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_COLLECT_METADATA */ nla_total_size(0) + + /* IFLA_IPTUN_FWMARK */ + nla_total_size(4) + 0; } @@ -2069,7 +2089,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || - nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || + nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || @@ -2081,6 +2102,7 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) if (parm->collect_md) if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) goto nla_put_failure; + return 0; nla_put_failure: @@ -2109,6 +2131,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, + [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { @@ -2201,7 +2224,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) return 0; err_register: - ip6_dev_free(ip6n->fb_tnl_dev); + free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3d8a3b63b4fdbec7d488194e21e0c9013f0ff6da..837ea1eefe7f8cc85924a9604d58cd702af94667 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) static void vti6_dev_free(struct net_device *dev) { free_percpu(dev->tstats); - free_netdev(dev); } static int vti6_tnl_create2(struct net_device *dev) @@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p return t; failed_free: - vti6_dev_free(dev); + free_netdev(dev); failed: return NULL; } @@ -657,6 +656,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) t->parms.i_key = p->i_key; t->parms.o_key = p->o_key; t->parms.proto = p->proto; + t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); vti6_link_config(t); return 0; @@ -841,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = { static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; - dev->destructor = vti6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); @@ -933,6 +934,9 @@ static void vti6_netlink_parms(struct nlattr *data[], if (data[IFLA_VTI_OKEY]) parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); + + if (data[IFLA_VTI_FWMARK]) + parms->fwmark = nla_get_u32(data[IFLA_VTI_FWMARK]); } static int vti6_newlink(struct net *src_net, struct net_device *dev, @@ -998,6 +1002,8 @@ static size_t vti6_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_VTI_OKEY */ nla_total_size(4) + + /* IFLA_VTI_FWMARK */ + nla_total_size(4) + 0; } @@ -1010,7 +1016,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || - nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) + nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key) || + nla_put_u32(skb, IFLA_VTI_FWMARK, parm->fwmark)) goto nla_put_failure; return 0; @@ -1024,6 +1031,7 @@ static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { [IFLA_VTI_REMOTE] = { .len = sizeof(struct in6_addr) }, [IFLA_VTI_IKEY] = { .type = NLA_U32 }, [IFLA_VTI_OKEY] = { .type = NLA_U32 }, + [IFLA_VTI_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vti6_link_ops __read_mostly = { @@ -1092,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net) return 0; err_register: - vti6_dev_free(ip6n->fb_tnl_dev); + free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6ba6c900ebcf430cf313a2bef55ff69c114af218..2ecb39b943b5002e63bfa5b045919fbfd9fd64f6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev) dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features |= NETIF_F_NETNS_LOCAL; } @@ -774,7 +774,8 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) * Delete a VIF entry */ -static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) +static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, + struct list_head *head) { struct mif_device *v; struct net_device *dev; @@ -815,12 +816,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) in6_dev = __in6_dev_get(dev); if (in6_dev) { in6_dev->cnf.mc_forwarding--; - inet6_netconf_notify_devconf(dev_net(dev), + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } - if (v->flags & MIFF_REGISTER) + if ((v->flags & MIFF_REGISTER) && !notify) unregister_netdevice_queue(dev, head); dev_put(dev); @@ -974,7 +975,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { in6_dev->cnf.mc_forwarding++; - inet6_netconf_notify_devconf(dev_net(dev), + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } @@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct notifier_block *this, struct mr6_table *mrt; struct mif_device *v; int ct; - LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; @@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct notifier_block *this, v = &mrt->vif6_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(mrt, ct, &list); + mif6_delete(mrt, ct, 1, NULL); } } - unregister_netdevice_many(&list); return NOTIFY_DONE; } @@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all) for (i = 0; i < mrt->maxvif; i++) { if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) continue; - mif6_delete(mrt, i, &list); + mif6_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); @@ -1599,7 +1598,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) write_unlock_bh(&mrt_lock); if (!err) - inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); rtnl_unlock(); @@ -1620,7 +1620,7 @@ int ip6mr_sk_done(struct sock *sk) mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); - inet6_netconf_notify_devconf(net, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); @@ -1707,7 +1707,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(mrt, mifi, NULL); + ret = mif6_delete(mrt, mifi, 0, NULL); rtnl_unlock(); return ret; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1bdc703cb9668bd77690c3d8f1ec0062d7b88c43..07403fa164e18aac704e3b77d1e2a094ad53c04c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev) mld_ifc_start_timer(idev, 1); } - static void igmp6_timer_handler(unsigned long data) { struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; @@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) write_unlock_bh(&idev->lock); } +static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) +{ + struct ifmcaddr6 *pmc; + + ASSERT_RTNL(); + + if (mld_in_v1_mode(idev)) { + read_lock_bh(&idev->lock); + for (pmc = idev->mc_list; pmc; pmc = pmc->next) + igmp6_join_group(pmc); + read_unlock_bh(&idev->lock); + } else + mld_send_report(idev, NULL); +} + +static int ipv6_mc_netdev_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct inet6_dev *idev = __in6_dev_get(dev); + + switch (event) { + case NETDEV_RESEND_IGMP: + if (idev) + ipv6_mc_rejoin_groups(idev); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block igmp6_netdev_notifier = { + .notifier_call = ipv6_mc_netdev_event, +}; + #ifdef CONFIG_PROC_FS struct igmp6_mc_iter_state { struct seq_net_private p; @@ -2970,7 +3007,17 @@ int __init igmp6_init(void) return register_pernet_subsys(&igmp6_net_ops); } +int __init igmp6_late_init(void) +{ + return register_netdevice_notifier(&igmp6_netdev_notifier); +} + void igmp6_cleanup(void) { unregister_pernet_subsys(&igmp6_net_ops); } + +void igmp6_late_cleanup(void) +{ + unregister_netdevice_notifier(&igmp6_netdev_notifier); +} diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ebac630d3c603186be2fc0dcbaac7d7e74bfde6..d310dc41209a39647b0ee7e8da2d38e90ac4333e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -732,7 +732,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, struct ndisc_options *ndopts) { - neigh_update(neigh, lladdr, new, flags); + neigh_update(neigh, lladdr, new, flags, 0); /* report ndisc ops about neighbour update */ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); } @@ -1418,6 +1418,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ri->prefix_len == 0 && !in6_dev->cnf.accept_ra_defrtr) continue; + if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, @@ -1746,10 +1748,13 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0, net, false); + /* fallthrough */ + case NETDEV_UP: idev = in6_dev_get(dev); if (!idev) break; - if (idev->cnf.ndisc_notify) + if (idev->cnf.ndisc_notify || + net->ipv6.devconf_all->ndisc_notify) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1e15c54fd5e27dbafaf4d4b0b3de9008a724559f..1f90644056ac784f1a455751796d5fa0d9441915 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -51,15 +51,6 @@ void *ip6t_alloc_initial_table(const struct xt_table *info) } EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); -/* - We keep a set of rules for each CPU, so we can avoid write-locking - them in the softirq when updating the counters and therefore - only need to read-lock in the softirq; doing a write_lock_bh() in user - context stops packets coming through and allows user context to read - the counters or update the rules. - - Hence the start of any table is given by get_table() below. */ - /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -411,7 +402,7 @@ mark_source_chains(const struct xt_table_info *newinfo, to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); + struct ip6t_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; @@ -453,14 +444,12 @@ mark_source_chains(const struct xt_table_info *newinfo, if (pos == oldpos) goto next; - e = (struct ip6t_entry *) - (entry0 + pos); + e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; - e = (struct ip6t_entry *) - (entry0 + pos + size); + e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; @@ -475,16 +464,14 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = (struct ip6t_entry *) - (entry0 + newpos); + e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } - e = (struct ip6t_entry *) - (entry0 + newpos); + e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } @@ -863,7 +850,7 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_match *m; const struct xt_entry_target *t; - e = (struct ip6t_entry *)(loc_cpu_entry + off); + e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; @@ -1258,7 +1245,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, int ret = 0; origsize = *size; - ce = (struct compat_ip6t_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) @@ -1394,7 +1381,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, struct xt_entry_match *ematch; origsize = *size; - de = (struct ip6t_entry *)*dstptr; + de = *dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 4ef1ddd4bbbd813ff8e6ed46275ecdf48d5ff9a8..d3c4daa708b9014378f5cdec4b0cc811f9999d92 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -307,12 +307,17 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack(net, skb, th, &opts); - return NF_DROP; + consume_skb(skb); + return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); - return NF_DROP; + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) { + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } return XT_CONTINUE; @@ -388,10 +393,13 @@ static unsigned int ipv6_synproxy_hook(void *priv, * number match the one of first SYN. */ if (synproxy_recv_client_ack(net, skb, th, &opts, - ntohl(th->seq) + 1)) + ntohl(th->seq) + 1)) { this_cpu_inc(snet->stats->cookie_retrans); - - return NF_DROP; + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } synproxy->isn = ntohl(th->ack_seq); @@ -430,20 +438,57 @@ static unsigned int ipv6_synproxy_hook(void *priv, return NF_ACCEPT; } +static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { + { + .hook = ipv6_synproxy_hook, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv6_synproxy_hook, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + static int synproxy_tg6_check(const struct xt_tgchk_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); const struct ip6t_entry *e = par->entryinfo; + int err; if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_netns_get(par->net, par->family); + err = nf_ct_netns_get(par->net, par->family); + if (err) + return err; + + if (snet->hook_ref6 == 0) { + err = nf_register_net_hooks(par->net, ipv6_synproxy_ops, + ARRAY_SIZE(ipv6_synproxy_ops)); + if (err) { + nf_ct_netns_put(par->net, par->family); + return err; + } + } + + snet->hook_ref6++; + return err; } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); + + snet->hook_ref6--; + if (snet->hook_ref6 == 0) + nf_unregister_net_hooks(par->net, ipv6_synproxy_ops, + ARRAY_SIZE(ipv6_synproxy_ops)); nf_ct_netns_put(par->net, par->family); } @@ -458,46 +503,14 @@ static struct xt_target synproxy_tg6_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { - { - .hook = ipv6_synproxy_hook, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, - { - .hook = ipv6_synproxy_hook, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, -}; - static int __init synproxy_tg6_init(void) { - int err; - - err = nf_register_hooks(ipv6_synproxy_ops, - ARRAY_SIZE(ipv6_synproxy_ops)); - if (err < 0) - goto err1; - - err = xt_register_target(&synproxy_tg6_reg); - if (err < 0) - goto err2; - - return 0; - -err2: - nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); -err1: - return err; + return xt_register_target(&synproxy_tg6_reg); } static void __exit synproxy_tg6_exit(void) { xt_unregister_target(&synproxy_tg6_reg); - nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); } module_init(synproxy_tg6_init); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d2c2ccbfbe728f8ff2d7bc42c5e7feb2ae4ad97b..d5f028e33f658bd62deb5b9b0737c33366a22a83 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -221,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 888ecd106e5f37ba6c9e93bf1d1f161ddbaa280c..4a7ddeddbaabf1866a65cfda5d27e8e9c1e75b57 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index e0be97e636a48f54c1488ca70ae97a9a13e8be61..b2b4f031b3a16b1f9f374221396ad02ccc79744e 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -235,7 +235,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, inside->icmp6.icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len - hdrlen, IPPROTO_ICMPV6, - csum_partial(&inside->icmp6, + skb_checksum(skb, hdrlen, skb->len - hdrlen, 0)); } @@ -273,13 +273,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, if (!ct) return NF_ACCEPT; - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; + nat = nfct_nat(ct); switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 051b6a6bfff6ca7c0dd42ceb43a417260a11bd23..2297c9f073bac63c90ed0578b474fd53b556a6e2 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -30,6 +30,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, const struct net_device *out) { enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; struct in6_addr src; struct nf_conn *ct; struct nf_nat_range newrange; @@ -42,7 +43,9 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; - nfct_nat(ct)->masq_index = out->ifindex; + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.in6 = src; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 765facf03d45c47b9913b1adcdaf59b6fe09383c..43f91d9b086c7d5c66860ae2eef4ace040acbcff 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -159,7 +159,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv->result, pkt, + nft_fib_store_result(dest, priv, pkt, nft_in(pkt)->ifindex); return; } @@ -246,7 +246,7 @@ nft_fib6_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_fib6_type __read_mostly = { .name = "fib", - .select_ops = &nft_fib6_select_ops, + .select_ops = nft_fib6_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV6, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index cd4252346a32d90e8e133bd985811b6241e4bcd7..e9065b8d3af852c6e9a7359667f68a5ad00bfe75 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -79,14 +79,13 @@ EXPORT_SYMBOL(ipv6_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); unsigned int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; - while (offset + 1 <= packet_len) { + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; switch (**nexthdr) { @@ -107,13 +106,16 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; } - return offset; + return -EINVAL; } EXPORT_SYMBOL(ip6_find_1stfragopt); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 9b522fa90e6d8f4a87ebed7cf574a36ceea89c61..ac826dd338ff0825eaf0d2d74cee92d008e018bb 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -192,7 +192,7 @@ static struct inet_protosw pingv6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_ICMPV6, .prot = &pingv6_prot, - .ops = &inet6_dgram_ops, + .ops = &inet6_sockraw_ops, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index cc8e3ae9ca736490c3f689297d190f930fdd2ac9..e88bcb8ff0fd73b8377f5e0d3c3d1fa524ef1da8 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -219,7 +219,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, u64 buff64[SNMP_MIB_MAX]; int i; - memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX); snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index e3770abe688a3a9059456fe9195adbfcdfb73157..b5d54d4f995c0f4bade2e3f1c4def9616252ca55 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -26,7 +26,7 @@ #include #if IS_ENABLED(CONFIG_IPV6) -const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; +struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; EXPORT_SYMBOL(inet6_protos); int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f174e76e6505d4045e940c9fceef765d2aaa937d..60be012fe7085cc7a199e84333cef5ee95ed1f04 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -632,6 +632,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } + if (length < sizeof(struct ipv6hdr)) + return -EINVAL; if (flags&MSG_PROBE) goto out; @@ -1178,8 +1180,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) - amount = skb_tail_pointer(skb) - - skb_transport_header(skb); + amount = skb->len; spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } @@ -1337,7 +1338,7 @@ void raw6_proc_exit(void) #endif /* CONFIG_PROC_FS */ /* Same as inet6_dgram_ops, sans udp_poll. */ -static const struct proto_ops inet6_sockraw_ops = { +const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9db1418993f2b8a5b4194895f243441033d4729a..7cebd954d5bb4263017f8f2d577ecfb88ca7c093 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1854,6 +1854,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) int addr_type; int err = -EINVAL; + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) + goto out; + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) goto out; #ifndef CONFIG_IPV6_SUBTREES @@ -2800,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) if ((rt->dst.dev == dev || !dev) && rt != adn->net->ipv6.ip6_null_entry && (rt->rt6i_nsiblings == 0 || + (dev && netdev_unregistering(dev)) || !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) return -1; @@ -2906,7 +2911,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int pref; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, + NULL); if (err < 0) goto errout; @@ -3259,7 +3265,8 @@ static int ip6_route_multipath_del(struct fib6_config *cfg) return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct fib6_config cfg; int err; @@ -3276,7 +3283,8 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) } } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct fib6_config cfg; int err; @@ -3564,7 +3572,8 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; @@ -3574,7 +3583,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct flowi6 fl6; int err, iif = 0, oif = 0; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, + extack); if (err < 0) goto errout; @@ -3700,7 +3710,10 @@ static int ip6_route_dev_notify(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + if (!(dev->flags & IFF_LOOPBACK)) + return NOTIFY_OK; + + if (event == NETDEV_REGISTER) { net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -3708,6 +3721,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); +#endif + } else if (event == NETDEV_UNREGISTER) { + in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); + in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); #endif } @@ -4015,9 +4034,24 @@ static struct pernet_operations ip6_route_net_late_ops = { static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, - .priority = 0, + .priority = ADDRCONF_NOTIFY_PRIORITY - 10, }; +void __init ip6_route_init_special_entries(void) +{ + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif +} + int __init ip6_route_init(void) { int ret; @@ -4044,17 +4078,6 @@ int __init ip6_route_init(void) ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; - /* Registering of the loopback is done before this portion of code, - * the loopback reference in rt6_info will not be taken, do it - * manually for init_net */ - init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #endif ret = fib6_init(); if (ret) goto out_register_subsys; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index a855eb325b030a666fe92c56a2d432c77d9dfe7a..5f44ffed25768d83c31b31295474c5ecf623e986 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -53,6 +53,9 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) struct sr6_tlv *tlv; unsigned int tlv_len; + if (trailing < sizeof(*tlv)) + return false; + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); tlv_len = sizeof(*tlv) + tlv->len; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 85582257d3af88146d435ef6c2e98f0bbef94a41..6a495490d43e1018206e628b917840d7ae8f9085 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,17 +26,13 @@ #include #include #include -#ifdef CONFIG_DST_CACHE #include -#endif #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif struct seg6_lwt { -#ifdef CONFIG_DST_CACHE struct dst_cache cache; -#endif struct seg6_iptunnel_encap tuninfo[0]; }; @@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); - err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + err = skb_cow_head(skb, tot_len); if (unlikely(err)) return err; @@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; - err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + err = skb_cow_head(skb, hdrlen); if (unlikely(err)) return err; @@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb) static int seg6_input(struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; int err; err = seg6_do_srh(skb); @@ -245,8 +244,30 @@ static int seg6_input(struct sk_buff *skb) return err; } + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + + preempt_disable(); + dst = dst_cache_get(&slwt->cache); + preempt_enable(); + skb_dst_drop(skb); - ip6_route_input(skb); + + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { + preempt_disable(); + dst_cache_set_ip6(&slwt->cache, dst, + &ipv6_hdr(skb)->saddr); + preempt_enable(); + } + } else { + skb_dst_set(skb, dst); + } + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + return err; return dst_input(skb); } @@ -264,11 +285,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); -#ifdef CONFIG_DST_CACHE preempt_disable(); dst = dst_cache_get(&slwt->cache); preempt_enable(); -#endif if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -287,16 +306,18 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } -#ifdef CONFIG_DST_CACHE preempt_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); preempt_enable(); -#endif } skb_dst_drop(skb); skb_dst_set(skb, dst); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; + return dst_output(net, sk, skb); drop: kfree_skb(skb); @@ -315,7 +336,7 @@ static int seg6_build_state(struct nlattr *nla, int err; err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, - seg6_iptunnel_policy); + seg6_iptunnel_policy, NULL); if (err < 0) return err; @@ -355,13 +376,11 @@ static int seg6_build_state(struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); -#ifdef CONFIG_DST_CACHE err = dst_cache_init(&slwt->cache, GFP_KERNEL); if (err) { kfree(newts); return err; } -#endif memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); @@ -375,12 +394,10 @@ static int seg6_build_state(struct nlattr *nla, return 0; } -#ifdef CONFIG_DST_CACHE static void seg6_destroy_state(struct lwtunnel_state *lwt) { dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); } -#endif static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) @@ -414,9 +431,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops seg6_iptun_ops = { .build_state = seg6_build_state, -#ifdef CONFIG_DST_CACHE .destroy_state = seg6_destroy_state, -#endif .output = seg6_output, .input = seg6_input, .fill_encap = seg6_fill_encap_info, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 99853c6e33a8c3def99ecb56e288cce4a38a997b..2378503577b0c8823049b7d17f857466481077b3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, return nt; failed_free: - ipip6_dev_free(dev); + free_netdev(dev); failed: return NULL; } @@ -881,11 +881,12 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - rt = ip_route_output_ports(tunnel->net, &fl4, NULL, - dst, tiph->saddr, - 0, 0, - IPPROTO_IPV6, RT_TOS(tos), - tunnel->parms.link); + flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark, + RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6, + 0, dst, tiph->saddr, 0, 0, + sock_net_uid(tunnel->net, NULL)); + rt = ip_route_output_flow(tunnel->net, &fl4, NULL); + if (IS_ERR(rt)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; @@ -1071,7 +1072,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) } } -static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) +static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p, + __u32 fwmark) { struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); @@ -1085,8 +1087,9 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) ipip6_tunnel_link(sitn, t); t->parms.iph.ttl = p->iph.ttl; t->parms.iph.tos = p->iph.tos; - if (t->parms.link != p->link) { + if (t->parms.link != p->link || t->fwmark != fwmark) { t->parms.link = p->link; + t->fwmark = fwmark; ipip6_tunnel_bind_dev(t->dev); } dst_cache_reset(&t->dst_cache); @@ -1220,7 +1223,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) t = netdev_priv(dev); } - ipip6_tunnel_update(t, &p); + ipip6_tunnel_update(t, &p, t->fwmark); } if (t) { @@ -1333,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev) dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } #define SIT_FEATURES (NETIF_F_SG | \ @@ -1348,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; - dev->destructor = ipip6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + t_hlen; @@ -1418,7 +1421,8 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) } static void ipip6_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, + __u32 *fwmark) { memset(parms, 0, sizeof(*parms)); @@ -1457,6 +1461,8 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_PROTO]) parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (data[IFLA_IPTUN_FWMARK]) + *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -1549,7 +1555,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, return err; } - ipip6_netlink_parms(data, &nt->parms); + ipip6_netlink_parms(data, &nt->parms, &nt->fwmark); if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; @@ -1577,6 +1583,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; #endif + __u32 fwmark = t->fwmark; int err; if (dev == sitn->fb_tunnel_dev) @@ -1588,7 +1595,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], return err; } - ipip6_netlink_parms(data, &p); + ipip6_netlink_parms(data, &p, &fwmark); if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) @@ -1602,7 +1609,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], } else t = netdev_priv(dev); - ipip6_tunnel_update(t, &p); + ipip6_tunnel_update(t, &p, fwmark); #ifdef CONFIG_IPV6_SIT_6RD if (ipip6_netlink_6rd_parms(data, &ip6rd)) @@ -1649,6 +1656,8 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_FWMARK */ + nla_total_size(4) + 0; } @@ -1665,7 +1674,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || - nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) + nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) || + nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark)) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD @@ -1715,6 +1725,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static void ipip6_dellink(struct net_device *dev, struct list_head *head) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 895ff650db43017ef39344679771d94ad6eaaf00..5abc3692b9011b140816dc4ce6223e79e5defddb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); out: return ret; out_free: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 49fa2e8c3fa9212eef1198a1077a6726f0f1b6fc..4f4310a36a0481e2bd068e39285011ff28377ea5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v6_init_seq(const struct sk_buff *skb) { - return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v6_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct flowi6 fl6; struct dst_entry *dst; int addr_type; - u32 seq; int err; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -265,11 +270,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); - if (tcp_death_row->sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) - tcp_fetch_timewait_stamp(sk, dst); - icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + @@ -287,13 +287,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { - seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); + tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) @@ -727,11 +727,8 @@ static void tcp_v6_init_req(struct request_sock *req, static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - if (strict) - *strict = true; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } @@ -757,7 +754,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq = tcp_v6_init_sequence, + .init_seq = tcp_v6_init_seq, + .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; @@ -1064,6 +1062,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; @@ -1133,6 +1132,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; @@ -1301,8 +1301,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1921,7 +1919,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, @@ -1933,8 +1931,9 @@ struct proto tcpv6_prot = { .diag_destroy = tcp_abort, }; -static const struct inet6_protocol tcpv6_protocol = { +static struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, + .early_demux_handler = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e28082f0a307eb68ac13987580d8d9f65358212f..06ec39b796092ad5e8954c0cfd10e75205ffce54 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -525,7 +526,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } -int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -568,7 +569,7 @@ void udpv6_encap_enable(void) } EXPORT_SYMBOL(udpv6_encap_enable); -int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -864,6 +865,69 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; } + +static struct sock *__udp6_lib_demux_lookup(struct net *net, + __be16 loc_port, const struct in6_addr *loc_addr, + __be16 rmt_port, const struct in6_addr *rmt_addr, + int dif) +{ + unsigned short hnum = ntohs(loc_port); + unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); + unsigned int slot2 = hash2 & udp_table.mask; + struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; + const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + struct sock *sk; + + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { + if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) + return sk; + /* Only check first socket in chain */ + break; + } + return NULL; +} + +static void udp_v6_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + const struct udphdr *uh; + struct sock *sk; + struct dst_entry *dst; + int dif = skb->dev->ifindex; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct udphdr))) + return; + + uh = udp_hdr(skb); + + if (skb->pkt_type == PACKET_HOST) + sk = __udp6_lib_demux_lookup(net, uh->dest, + &ipv6_hdr(skb)->daddr, + uh->source, &ipv6_hdr(skb)->saddr, + dif); + else + return; + + if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) + return; + + skb->sk = sk; + skb->destructor = sock_efree; + dst = READ_ONCE(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + if (dst) { + if (dst->flags & DST_NOCACHE) { + if (likely(atomic_inc_not_zero(&dst->__refcnt))) + skb_dst_set(skb, dst); + } else { + skb_dst_set_noref(skb, dst); + } + } +} + static __inline__ int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); @@ -1378,7 +1442,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif -static const struct inet6_protocol udpv6_protocol = { +static struct inet6_protocol udpv6_protocol = { + .early_demux = udp_v6_early_demux, + .early_demux_handler = udp_v6_early_demux, .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index e78bdc76dcc33ceda888fb323a530f774057edbb..f180b3d85e3147facb487720bb3d98722ae76a45 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -26,7 +26,6 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); -int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ac858c480f2f272f37275cddfd9ffe889a91af97..a2267f80febbb6f31459097f27bd89d51d0f2b11 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u8 frag_hdr_sz = sizeof(struct frag_hdr); __wsum csum; int tnl_hlen; + int err; mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -90,7 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Find the unfragmentable header and shift it left by frag_hdr_sz * bytes to insert fragment header. */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + err = ip6_find_1stfragopt(skb, &prevhdr); + if (err < 0) + return ERR_PTR(err); + unfrag_ip6hlen = err; nexthdr = *prevhdr; *prevhdr = NEXTHDR_FRAGMENT; unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 0e015906f9ca91e11d3e9e124c532c6a7cb04c5d..07d36573f50b9451e4c2bfee331ac2c023791a7a 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + if (hdr_len < 0) + return hdr_len; skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); skb_set_network_header(skb, -x->props.header_len); skb->transport_header = skb->network_header + hdr_len; diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 4439ee44c8b05461b8a66190c7800379ca5f105c..9ad07a91708ef7a1008d469766ab39b9b882883f 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -13,6 +13,7 @@ #include #include #include +#include /* Add encapsulation header. * @@ -26,8 +27,11 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; iph = ipv6_hdr(skb); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + if (hdr_len < 0) + return hdr_len; skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); skb_set_network_header(skb, -x->props.header_len); skb->transport_header = skb->network_header + hdr_len; @@ -61,9 +65,41 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) return 0; } +static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet6_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len); + + if (xo->flags & XFRM_GSO_SEGMENT) { + skb_reset_transport_header(skb); + skb->transport_header -= x->props.header_len; + } +} + + static struct xfrm_mode xfrm6_transport_mode = { .input = xfrm6_transport_input, .output = xfrm6_transport_output, + .gso_segment = xfrm4_transport_gso_segment, + .xmit = xfrm6_transport_xmit, .owner = THIS_MODULE, .encap = XFRM_MODE_TRANSPORT, }; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 372855eeaf42551208adeeaff16b1cd06f08d3f2..02556e356f87e94830e76bb6f4f519e4cf1eb4d7 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -36,6 +36,9 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *top_iph; int dsfield; + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); @@ -96,11 +99,35 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) return err; } +static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); + +} + +static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo->flags & XFRM_GSO_SEGMENT) { + skb->network_header = skb->network_header - x->props.header_len; + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + } + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + x->props.header_len); +} + static struct xfrm_mode xfrm6_tunnel_mode = { .input2 = xfrm6_mode_tunnel_input, .input = xfrm_prepare_input, .output2 = xfrm6_mode_tunnel_output, .output = xfrm6_prepare_output, + .gso_segment = xfrm6_mode_tunnel_gso_segment, + .xmit = xfrm6_mode_tunnel_xmit, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, .flags = XFRM_MODE_FLAG_TUNNEL, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4d09ce6fa90e666bfdeda09cbdc8c7b0cb8b5824..8ae87d4ec5ff607d431513bb2ef42d5c2c93450a 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -73,11 +73,16 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); + if (skb->ignore_df) + goto out; + mtu = dst_mtu(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->ignore_df && skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) { skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); @@ -89,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; } - +out: return ret; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 8a9219ff2e77e0205de652974a44c5e4ba33b2c0..fa31ef29e3fa0bf3973e12e43c57b48eb7d1be45 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1168,11 +1168,10 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) sipx->sipx_network = ipxif->if_netnum; memcpy(sipx->sipx_node, ipxif->if_node, sizeof(sipx->sipx_node)); - rc = -EFAULT; + rc = 0; if (copy_to_user(arg, &ifr, sizeof(ifr))) - break; + rc = -EFAULT; ipxitf_put(ipxif); - rc = 0; break; } case SIOCAIPXITFCRT: diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 74d09f91709e6121ed80b8f089283615a5ffce5e..3be852808a9d1f7a2767f482b334279a95e90642 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -65,7 +65,7 @@ static void irlan_eth_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &irlan_eth_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 31762f76cdb5f2a3ec322135068402be532218ed..deca20fb2ce2e5e50063e58ec2c0d9d6239d3bd4 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1707,11 +1707,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_clone info; struct socket *newsock = NULL; - if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - return -EFAULT; - err = kcm_clone(sock, &info, &newsock); - if (!err) { if (copy_to_user((void __user *)arg, &info, sizeof(info))) { diff --git a/net/key/af_key.c b/net/key/af_key.c index c6252ed42c1de65dee149d7d869b62b96616e22a..512dc43d0ce6814f0a6d0507805025d75234eece 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -63,8 +63,13 @@ struct pfkey_sock { } u; struct sk_buff *skb; } dump; + struct mutex dump_lock; }; +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, + xfrm_address_t *saddr, xfrm_address_t *daddr, + u16 *family); + static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; @@ -139,6 +144,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; + struct pfkey_sock *pfk; int err; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -153,6 +159,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, if (sk == NULL) goto out; + pfk = pfkey_sk(sk); + mutex_init(&pfk->dump_lock); + sock->ops = &pfkey_ops; sock_init_data(sock, sk); @@ -281,13 +290,23 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) struct sadb_msg *hdr; int rc; + mutex_lock(&pfk->dump_lock); + if (!pfk->dump.dump) { + rc = 0; + goto out; + } + rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; + if (rc == -ENOBUFS) { + rc = 0; + goto out; + } if (pfk->dump.skb) { - if (!pfkey_can_dump(&pfk->sk)) - return 0; + if (!pfkey_can_dump(&pfk->sk)) { + rc = 0; + goto out; + } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; @@ -298,6 +317,9 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) } pfkey_terminate_dump(pfk); + +out: + mutex_unlock(&pfk->dump_lock); return rc; } @@ -1793,19 +1815,26 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); - if (pfk->dump.dump != NULL) + mutex_lock(&pfk->dump_lock); + if (pfk->dump.dump != NULL) { + mutex_unlock(&pfk->dump_lock); return -EBUSY; + } proto = pfkey_satype2proto(hdr->sadb_msg_satype); - if (proto == 0) + if (proto == 0) { + mutex_unlock(&pfk->dump_lock); return -EINVAL; + } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; filter = kmalloc(sizeof(*filter), GFP_KERNEL); - if (filter == NULL) + if (filter == NULL) { + mutex_unlock(&pfk->dump_lock); return -ENOMEM; + } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); @@ -1821,6 +1850,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); + mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } @@ -1913,19 +1943,14 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { - u8 *sa = (u8 *) (rq + 1); - int family, socklen; + int err; - family = pfkey_sockaddr_extract((struct sockaddr *)sa, - &t->saddr); - if (!family) - return -EINVAL; - - socklen = pfkey_sockaddr_len(family); - if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen), - &t->id.daddr) != family) - return -EINVAL; - t->encap_family = family; + err = parse_sockaddr_pair( + (struct sockaddr *)(rq + 1), + rq->sadb_x_ipsecrequest_len - sizeof(*rq), + &t->saddr, &t->id.daddr, &t->encap_family); + if (err) + return err; } else t->encap_family = xp->family; @@ -1945,7 +1970,11 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; - while (len >= sizeof(struct sadb_x_ipsecrequest)) { + while (len >= sizeof(*rq)) { + if (len < rq->sadb_x_ipsecrequest_len || + rq->sadb_x_ipsecrequest_len < sizeof(*rq)) + return -EINVAL; + if ((err = parse_ipsecrequest(xp, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; @@ -2408,7 +2437,6 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc return err; } -#ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); @@ -2420,7 +2448,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, { int af, socklen; - if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) + if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); @@ -2436,6 +2464,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, return 0; } +#ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { @@ -2443,13 +2472,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct sadb_x_ipsecrequest *rq2; int mode; - if (len <= sizeof(struct sadb_x_ipsecrequest) || - len < rq1->sadb_x_ipsecrequest_len) + if (len < sizeof(*rq1) || + len < rq1->sadb_x_ipsecrequest_len || + rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), - rq1->sadb_x_ipsecrequest_len, + rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) @@ -2458,13 +2488,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; - if (len <= sizeof(struct sadb_x_ipsecrequest) || - len < rq2->sadb_x_ipsecrequest_len) + if (len <= sizeof(*rq2) || + len < rq2->sadb_x_ipsecrequest_len || + rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), - rq2->sadb_x_ipsecrequest_len, + rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) @@ -2679,14 +2710,18 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb { struct pfkey_sock *pfk = pfkey_sk(sk); - if (pfk->dump.dump != NULL) + mutex_lock(&pfk->dump_lock); + if (pfk->dump.dump != NULL) { + mutex_unlock(&pfk->dump_lock); return -EBUSY; + } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); + mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } @@ -3250,7 +3285,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) { + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } @@ -3792,7 +3827,6 @@ static inline void pfkey_exit_proc(struct net *net) static struct xfrm_mgr pfkeyv2_mgr = { - .id = "pfkeyv2", .notify = pfkey_send_notify, .acquire = pfkey_send_acquire, .compile_policy = pfkey_compile_policy, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e37d9554da7b47df0571c41478e6e758b8a8e6f9..fa0342574b8986ad98f458febe8e3e6314171eae 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -120,7 +120,7 @@ static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) return sk->sk_user_data; } -static inline struct l2tp_net *l2tp_pernet(struct net *net) +static inline struct l2tp_net *l2tp_pernet(const struct net *net) { BUG_ON(!net); @@ -217,27 +217,6 @@ static void l2tp_tunnel_sock_put(struct sock *sk) sock_put(sk); } -/* Lookup a session by id in the global session list - */ -static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id) -{ - struct l2tp_net *pn = l2tp_pernet(net); - struct hlist_head *session_list = - l2tp_session_id_hash_2(pn, session_id); - struct l2tp_session *session; - - rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, global_hlist) { - if (session->session_id == session_id) { - rcu_read_unlock_bh(); - return session; - } - } - rcu_read_unlock_bh(); - - return NULL; -} - /* Session hash list. * The session_id SHOULD be random according to RFC2661, but several * L2TP implementations (Cisco and Microsoft) use incrementing @@ -250,38 +229,10 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } -/* Lookup a session by id - */ -struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id) -{ - struct hlist_head *session_list; - struct l2tp_session *session; - - /* In L2TPv3, session_ids are unique over all tunnels and we - * sometimes need to look them up before we know the - * tunnel. - */ - if (tunnel == NULL) - return l2tp_session_find_2(net, session_id); - - session_list = l2tp_session_id_hash(tunnel, session_id); - read_lock_bh(&tunnel->hlist_lock); - hlist_for_each_entry(session, session_list, hlist) { - if (session->session_id == session_id) { - read_unlock_bh(&tunnel->hlist_lock); - return session; - } - } - read_unlock_bh(&tunnel->hlist_lock); - - return NULL; -} -EXPORT_SYMBOL_GPL(l2tp_session_find); - -/* Like l2tp_session_find() but takes a reference on the returned session. +/* Lookup a session. A new reference is held on the returned session. * Optionally calls session->ref() too if do_ref is true. */ -struct l2tp_session *l2tp_session_get(struct net *net, +struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, bool do_ref) { @@ -356,7 +307,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, +struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, + const char *ifname, bool do_ref) { struct l2tp_net *pn = l2tp_pernet(net); @@ -427,7 +379,7 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, /* Lookup a tunnel by id */ -struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) +struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id) { struct l2tp_tunnel *tunnel; struct l2tp_net *pn = l2tp_pernet(net); @@ -445,7 +397,7 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) } EXPORT_SYMBOL_GPL(l2tp_tunnel_find); -struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth) +struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8ce7818c7a9d0578b79e70eff1916d3248e7604a..eec5ad2ebb93c32bca7a57b377f80df8e10b2591 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -230,18 +230,16 @@ static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk) return tunnel; } -struct l2tp_session *l2tp_session_get(struct net *net, +struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, bool do_ref); -struct l2tp_session *l2tp_session_find(struct net *net, - struct l2tp_tunnel *tunnel, - u32 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, bool do_ref); -struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, +struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, + const char *ifname, bool do_ref); -struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); -struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); +struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); +struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 6fd41d7afe1ef27592970de333c75928264dc275..4de2ec94b08cbf5aba016da754b799c46d5f378a 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -30,6 +30,9 @@ #include #include #include +#include +#include +#include #include "l2tp_core.h" @@ -111,12 +114,13 @@ static void l2tp_eth_get_stats64(struct net_device *dev, { struct l2tp_eth *priv = netdev_priv(dev); - stats->tx_bytes = atomic_long_read(&priv->tx_bytes); - stats->tx_packets = atomic_long_read(&priv->tx_packets); - stats->tx_dropped = atomic_long_read(&priv->tx_dropped); - stats->rx_bytes = atomic_long_read(&priv->rx_bytes); - stats->rx_packets = atomic_long_read(&priv->rx_packets); - stats->rx_errors = atomic_long_read(&priv->rx_errors); + stats->tx_bytes = (unsigned long) atomic_long_read(&priv->tx_bytes); + stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets); + stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped); + stats->rx_bytes = (unsigned long) atomic_long_read(&priv->rx_bytes); + stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets); + stats->rx_errors = (unsigned long) atomic_long_read(&priv->rx_errors); + } static const struct net_device_ops l2tp_eth_netdev_ops = { @@ -127,13 +131,18 @@ static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; +static struct device_type l2tpeth_type = { + .name = "l2tpeth", +}; + static void l2tp_eth_dev_setup(struct net_device *dev) { + SET_NETDEV_DEVTYPE(dev, &l2tpeth_type); ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->features |= NETIF_F_LLTX; dev->netdev_ops = &l2tp_eth_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) @@ -204,8 +213,58 @@ static void l2tp_eth_show(struct seq_file *m, void *arg) } #endif +static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel, + struct l2tp_session *session, + struct net_device *dev) +{ + unsigned int overhead = 0; + struct dst_entry *dst; + u32 l3_overhead = 0; + + /* if the encap is UDP, account for UDP header size */ + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { + overhead += sizeof(struct udphdr); + dev->needed_headroom += sizeof(struct udphdr); + } + if (session->mtu != 0) { + dev->mtu = session->mtu; + dev->needed_headroom += session->hdr_len; + return; + } + lock_sock(tunnel->sock); + l3_overhead = kernel_sock_ip_overhead(tunnel->sock); + release_sock(tunnel->sock); + if (l3_overhead == 0) { + /* L3 Overhead couldn't be identified, this could be + * because tunnel->sock was NULL or the socket's + * address family was not IPv4 or IPv6, + * dev mtu stays at 1500. + */ + return; + } + /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr + * UDP overhead, if any, was already factored in above. + */ + overhead += session->hdr_len + ETH_HLEN + l3_overhead; + + /* If PMTU discovery was enabled, use discovered MTU on L2TP device */ + dst = sk_dst_get(tunnel->sock); + if (dst) { + /* dst_mtu will use PMTU if found, else fallback to intf MTU */ + u32 pmtu = dst_mtu(dst); + + if (pmtu != 0) + dev->mtu = pmtu; + dst_release(dst); + } + session->mtu = dev->mtu - overhead; + dev->mtu = session->mtu; + dev->needed_headroom += session->hdr_len; +} + static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { + unsigned char name_assign_type; struct net_device *dev; char name[IFNAMSIZ]; struct l2tp_tunnel *tunnel; @@ -222,15 +281,12 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p } if (cfg->ifname) { - dev = dev_get_by_name(net, cfg->ifname); - if (dev) { - dev_put(dev); - rc = -EEXIST; - goto out; - } strlcpy(name, cfg->ifname, IFNAMSIZ); - } else + name_assign_type = NET_NAME_USER; + } else { strcpy(name, L2TP_ETH_DEV_NAME); + name_assign_type = NET_NAME_ENUM; + } session = l2tp_session_create(sizeof(*spriv), tunnel, session_id, peer_session_id, cfg); @@ -239,7 +295,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - dev = alloc_netdev(sizeof(*priv), name, NET_NAME_UNKNOWN, + dev = alloc_netdev(sizeof(*priv), name, name_assign_type, l2tp_eth_dev_setup); if (!dev) { rc = -ENOMEM; @@ -247,12 +303,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p } dev_net_set(dev, net); - if (session->mtu == 0) - session->mtu = dev->mtu - session->hdr_len; - dev->mtu = session->mtu; - dev->needed_headroom += session->hdr_len; dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; + l2tp_eth_adjust_mtu(tunnel, session, dev); priv = netdev_priv(dev); priv->dev = dev; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 7e3e669baac42df9d0be2864b927ba4f390258e9..12cfcd0ca807396d18e061e9bcf4c29e760b2563 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -521,11 +521,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf goto out; } session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - ret = -EEXIST; - goto out; - } if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) { ret = -EINVAL; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index cb4fff785cbf5aaad520442dc243ae62dc5750ea..c38d16f22d2a7ff265b8729d43b164312598fd9f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -142,7 +142,7 @@ static struct proto llc_proto = { .name = "LLC", .owner = THIS_MODULE, .obj_size = sizeof(struct llc_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, }; /** @@ -311,6 +311,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) int rc = -EINVAL; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); + + lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; @@ -382,6 +384,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + release_sock(sk); return rc; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 8bc5a1bd2d453542df31506f543feb64b64cdd96..9b02c13d258b005bb10029b3baf8ec4db71f18b4 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || @@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 5404d0d195cc581613e356b75bd70321e617673e..63b6ab0563705f4b15c6bc8e3d9c7ad85a2af381 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 4456559cb056d1e32a621351120327cf27541bf7..1b7a4daf283c5191c19eaa4f3571d316c866906a 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -357,14 +357,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, spin_lock_init(&tid_agg_rx->reorder_lock); /* rx timer */ - tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; - tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; - init_timer_deferrable(&tid_agg_rx->session_timer); + setup_deferrable_timer(&tid_agg_rx->session_timer, + sta_rx_agg_session_timer_expired, + (unsigned long)&sta->timer_to_tid[tid]); /* rx reorder timer */ - tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired; - tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&tid_agg_rx->reorder_timer); + setup_timer(&tid_agg_rx->reorder_timer, + sta_rx_agg_reorder_timer_expired, + (unsigned long)&sta->timer_to_tid[tid]); /* prepare reordering buffer */ tid_agg_rx->reorder_buf = diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 45319cc01121a9eb17d49185a07a9675e7b2995d..cf2392b2ac717972e4354346fd086ec802370de3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -670,14 +670,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, tid_tx->timeout = timeout; /* response timer */ - tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired; - tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&tid_tx->addba_resp_timer); + setup_timer(&tid_tx->addba_resp_timer, + sta_addba_resp_timer_expired, + (unsigned long)&sta->timer_to_tid[tid]); /* tx timer */ - tid_tx->session_timer.function = sta_tx_agg_session_timer_expired; - tid_tx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; - init_timer_deferrable(&tid_tx->session_timer); + setup_deferrable_timer(&tid_tx->session_timer, + sta_tx_agg_session_timer_expired, + (unsigned long)&sta->timer_to_tid[tid]); /* assign a dialog token */ sta->ampdu_mlme.dialog_token_allocator++; @@ -741,46 +741,43 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, ieee80211_agg_start_txq(sta, tid, true); } -void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) +void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, + struct tid_ampdu_tx *tid_tx) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - struct tid_ampdu_tx *tid_tx; - trace_api_start_tx_ba_cb(sdata, ra, tid); + if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) + return; + + if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) + ieee80211_agg_tx_operational(local, sta, tid); +} + +static struct tid_ampdu_tx * +ieee80211_lookup_tid_tx(struct ieee80211_sub_if_data *sdata, + const u8 *ra, u16 tid, struct sta_info **sta) +{ + struct tid_ampdu_tx *tid_tx; if (tid >= IEEE80211_NUM_TIDS) { ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", tid, IEEE80211_NUM_TIDS); - return; + return NULL; } - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, ra); - if (!sta) { - mutex_unlock(&local->sta_mtx); + *sta = sta_info_get_bss(sdata, ra); + if (!*sta) { ht_dbg(sdata, "Could not find station: %pM\n", ra); - return; + return NULL; } - mutex_lock(&sta->ampdu_mlme.mtx); - tid_tx = rcu_dereference_protected_tid_tx(sta, tid); + tid_tx = rcu_dereference((*sta)->ampdu_mlme.tid_tx[tid]); - if (WARN_ON(!tid_tx)) { + if (WARN_ON(!tid_tx)) ht_dbg(sdata, "addBA was not requested!\n"); - goto unlock; - } - if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) - goto unlock; - - if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) - ieee80211_agg_tx_operational(local, sta, tid); - - unlock: - mutex_unlock(&sta->ampdu_mlme.mtx); - mutex_unlock(&local->sta_mtx); + return tid_tx; } void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, @@ -788,19 +785,20 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); + struct sta_info *sta; + struct tid_ampdu_tx *tid_tx; - if (unlikely(!skb)) - return; + trace_api_start_tx_ba_cb(sdata, ra, tid); - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; + rcu_read_lock(); + tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta); + if (!tid_tx) + goto out; - skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->work); + set_bit(HT_AGG_STATE_START_CB, &tid_tx->state); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + out: + rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); @@ -860,37 +858,18 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); -void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) +void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, + struct tid_ampdu_tx *tid_tx) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - struct tid_ampdu_tx *tid_tx; + struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; - trace_api_stop_tx_ba_cb(sdata, ra, tid); - - if (tid >= IEEE80211_NUM_TIDS) { - ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", - tid, IEEE80211_NUM_TIDS); - return; - } - - ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", ra, tid); - - mutex_lock(&local->sta_mtx); - - sta = sta_info_get_bss(sdata, ra); - if (!sta) { - ht_dbg(sdata, "Could not find station: %pM\n", ra); - goto unlock; - } + ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", + sta->sta.addr, tid); - mutex_lock(&sta->ampdu_mlme.mtx); spin_lock_bh(&sta->lock); - tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { + if (!test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { ht_dbg(sdata, "unexpected callback to A-MPDU stop for %pM tid %d\n", sta->sta.addr, tid); @@ -906,12 +885,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) spin_unlock_bh(&sta->lock); if (send_delba) - ieee80211_send_delba(sdata, ra, tid, + ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - - mutex_unlock(&sta->ampdu_mlme.mtx); - unlock: - mutex_unlock(&local->sta_mtx); } void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, @@ -919,19 +894,20 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); + struct sta_info *sta; + struct tid_ampdu_tx *tid_tx; - if (unlikely(!skb)) - return; + trace_api_stop_tx_ba_cb(sdata, ra, tid); - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; + rcu_read_lock(); + tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta); + if (!tid_tx) + goto out; - skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->work); + set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + out: + rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ac879bb17870d4602fd151504f68d26c21eeac9b..4a388fe8c2d1363b300becd61664d333b128e845 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3,7 +3,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright (C) 2015-2016 Intel Deutschland GmbH + * Copyright (C) 2015-2017 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -22,11 +22,98 @@ #include "mesh.h" #include "wme.h" +static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata, + struct vif_params *params) +{ + bool mu_mimo_groups = false; + bool mu_mimo_follow = false; + + if (params->vht_mumimo_groups) { + u64 membership; + + BUILD_BUG_ON(sizeof(membership) != WLAN_MEMBERSHIP_LEN); + + memcpy(sdata->vif.bss_conf.mu_group.membership, + params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); + memcpy(sdata->vif.bss_conf.mu_group.position, + params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, + WLAN_USER_POSITION_LEN); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS); + /* don't care about endianness - just check for 0 */ + memcpy(&membership, params->vht_mumimo_groups, + WLAN_MEMBERSHIP_LEN); + mu_mimo_groups = membership != 0; + } + + if (params->vht_mumimo_follow_addr) { + mu_mimo_follow = + is_valid_ether_addr(params->vht_mumimo_follow_addr); + ether_addr_copy(sdata->u.mntr.mu_follow_addr, + params->vht_mumimo_follow_addr); + } + + sdata->vif.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; +} + +static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, + struct vif_params *params) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *monitor_sdata; + + /* check flags first */ + if (params->flags && ieee80211_sdata_running(sdata)) { + u32 mask = MONITOR_FLAG_COOK_FRAMES | MONITOR_FLAG_ACTIVE; + + /* + * Prohibit MONITOR_FLAG_COOK_FRAMES and + * MONITOR_FLAG_ACTIVE to be changed while the + * interface is up. + * Else we would need to add a lot of cruft + * to update everything: + * cooked_mntrs, monitor and all fif_* counters + * reconfigure hardware + */ + if ((params->flags & mask) != (sdata->u.mntr.flags & mask)) + return -EBUSY; + } + + /* also validate MU-MIMO change */ + monitor_sdata = rtnl_dereference(local->monitor_sdata); + + if (!monitor_sdata && + (params->vht_mumimo_groups || params->vht_mumimo_follow_addr)) + return -EOPNOTSUPP; + + /* apply all changes now - no failures allowed */ + + if (monitor_sdata) + ieee80211_set_mu_mimo_follow(monitor_sdata, params); + + if (params->flags) { + if (ieee80211_sdata_running(sdata)) { + ieee80211_adjust_monitor_flags(sdata, -1); + sdata->u.mntr.flags = params->flags; + ieee80211_adjust_monitor_flags(sdata, 1); + + ieee80211_configure_filter(local); + } else { + /* + * Because the interface is down, ieee80211_do_stop + * and ieee80211_do_open take care of "everything" + * mentioned in the comment above. + */ + sdata->u.mntr.flags = params->flags; + } + } + + return 0; +} + static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, const char *name, unsigned char name_assign_type, enum nl80211_iftype type, - u32 *flags, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -38,9 +125,14 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, if (err) return ERR_PTR(err); - if (type == NL80211_IFTYPE_MONITOR && flags) { - sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - sdata->u.mntr.flags = *flags; + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + if (type == NL80211_IFTYPE_MONITOR) { + err = ieee80211_set_mon_options(sdata, params); + if (err) { + ieee80211_if_remove(sdata); + return NULL; + } } return wdev; @@ -55,7 +147,7 @@ static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) static int ieee80211_change_iface(struct wiphy *wiphy, struct net_device *dev, - enum nl80211_iftype type, u32 *flags, + enum nl80211_iftype type, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -75,58 +167,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_sub_if_data *monitor_sdata; - u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; - - monitor_sdata = rtnl_dereference(local->monitor_sdata); - if (monitor_sdata && - wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) { - memcpy(monitor_sdata->vif.bss_conf.mu_group.membership, - params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); - memcpy(monitor_sdata->vif.bss_conf.mu_group.position, - params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, - WLAN_USER_POSITION_LEN); - monitor_sdata->vif.mu_mimo_owner = true; - ieee80211_bss_info_change_notify(monitor_sdata, - BSS_CHANGED_MU_GROUPS); - - ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr, - params->macaddr); - } - - if (!flags) - return 0; - - if (ieee80211_sdata_running(sdata)) { - u32 mask = MONITOR_FLAG_COOK_FRAMES | - MONITOR_FLAG_ACTIVE; - - /* - * Prohibit MONITOR_FLAG_COOK_FRAMES and - * MONITOR_FLAG_ACTIVE to be changed while the - * interface is up. - * Else we would need to add a lot of cruft - * to update everything: - * cooked_mntrs, monitor and all fif_* counters - * reconfigure hardware - */ - if ((*flags & mask) != (sdata->u.mntr.flags & mask)) - return -EBUSY; - - ieee80211_adjust_monitor_flags(sdata, -1); - sdata->u.mntr.flags = *flags; - ieee80211_adjust_monitor_flags(sdata, 1); - - ieee80211_configure_filter(local); - } else { - /* - * Because the interface is down, ieee80211_do_stop - * and ieee80211_do_open take care of "everything" - * mentioned in the comment above. - */ - sdata->u.mntr.flags = *flags; - } + ret = ieee80211_set_mon_options(sdata, params); + if (ret) + return ret; } return 0; @@ -617,10 +660,11 @@ void sta_set_rate_info_tx(struct sta_info *sta, int shift = ieee80211_vif_get_shift(&sta->sdata->vif); u16 brate; - sband = sta->local->hw.wiphy->bands[ - ieee80211_get_sdata_band(sta->sdata)]; - brate = sband->bitrates[rate->idx].bitrate; - rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); + sband = ieee80211_get_sband(sta->sdata); + if (sband) { + brate = sband->bitrates[rate->idx].bitrate; + rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); + } } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; @@ -696,11 +740,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, return 0; mutex_lock(&local->mtx); - mutex_lock(&local->iflist_mtx); if (local->use_chanctx) { - sdata = rcu_dereference_protected( - local->monitor_sdata, - lockdep_is_held(&local->iflist_mtx)); + sdata = rtnl_dereference(local->monitor_sdata); if (sdata) { ieee80211_vif_release_channel(sdata); ret = ieee80211_vif_use_channel(sdata, chandef, @@ -713,7 +754,6 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, if (ret == 0) local->monitor_chandef = *chandef; - mutex_unlock(&local->iflist_mtx); mutex_unlock(&local->mtx); return ret; @@ -862,6 +902,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, default: return -EINVAL; } + sdata->u.ap.req_smps = sdata->smps_mode; + sdata->needed_rx_chains = sdata->local->rx_chains; sdata->vif.bss_conf.beacon_int = params->beacon_interval; @@ -1214,10 +1256,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, int ret = 0; struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); u32 mask, set; - sband = local->hw.wiphy->bands[band]; + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; mask = params->sta_flags_mask; set = params->sta_flags_set; @@ -1350,7 +1393,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef, sband, params->supported_rates, params->supported_rates_len, - &sta->sta.supp_rates[band]); + &sta->sta.supp_rates[sband->band]); } if (params->ht_capa) @@ -1366,8 +1409,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ - __ieee80211_vht_handle_opmode(sdata, sta, - params->opmode_notif, band); + __ieee80211_vht_handle_opmode(sdata, sta, params->opmode_notif, + sband->band); } if (params->support_p2p_ps >= 0) @@ -2005,13 +2048,15 @@ static int ieee80211_change_bss(struct wiphy *wiphy, struct bss_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - enum nl80211_band band; + struct ieee80211_supported_band *sband; u32 changed = 0; if (!sdata_dereference(sdata->u.ap.beacon, sdata)) return -ENOENT; - band = ieee80211_get_sdata_band(sdata); + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; if (params->use_cts_prot >= 0) { sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot; @@ -2024,7 +2069,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (!sdata->vif.bss_conf.use_short_slot && - band == NL80211_BAND_5GHZ) { + sband->band == NL80211_BAND_5GHZ) { sdata->vif.bss_conf.use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } @@ -2037,11 +2082,12 @@ static int ieee80211_change_bss(struct wiphy *wiphy, if (params->basic_rates) { ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef, - wiphy->bands[band], + wiphy->bands[sband->band], params->basic_rates, params->basic_rates_len, &sdata->vif.bss_conf.basic_rates); changed |= BSS_CHANGED_BASIC_RATES; + ieee80211_check_rate_mask(sdata); } if (params->ap_isolate >= 0) { @@ -2198,7 +2244,8 @@ ieee80211_sched_scan_start(struct wiphy *wiphy, } static int -ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev) +ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev, + u64 reqid) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -2630,6 +2677,33 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy, bss_conf->cqm_rssi_thold = rssi_thold; bss_conf->cqm_rssi_hyst = rssi_hyst; + bss_conf->cqm_rssi_low = 0; + bss_conf->cqm_rssi_high = 0; + sdata->u.mgd.last_cqm_event_signal = 0; + + /* tell the driver upon association, unless already associated */ + if (sdata->u.mgd.associated && + sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM); + + return 0; +} + +static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, + struct net_device *dev, + s32 rssi_low, s32 rssi_high) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_vif *vif = &sdata->vif; + struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; + + if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) + return -EOPNOTSUPP; + + bss_conf->cqm_rssi_low = rssi_low; + bss_conf->cqm_rssi_high = rssi_high; + bss_conf->cqm_rssi_thold = 0; + bss_conf->cqm_rssi_hyst = 0; sdata->u.mgd.last_cqm_event_signal = 0; /* tell the driver upon association, unless already associated */ @@ -2658,6 +2732,21 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return ret; } + /* + * If active validate the setting and reject it if it doesn't leave + * at least one basic rate usable, since we really have to be able + * to send something, and if we're an AP we have to be able to do + * so at a basic rate so that all clients can receive it. + */ + if (rcu_access_pointer(sdata->vif.chanctx_conf) && + sdata->vif.bss_conf.chandef.chan) { + u32 basic_rates = sdata->vif.bss_conf.basic_rates; + enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band; + + if (!(mask->control[band].legacy & basic_rates)) + return -EINVAL; + } + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; @@ -3639,6 +3728,7 @@ const struct cfg80211_ops mac80211_config_ops = { .mgmt_tx = ieee80211_mgmt_tx, .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, + .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config, .mgmt_frame_register = ieee80211_mgmt_frame_register, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index f4a52877356349abed96d0a77d6ae75f301181e4..6ca5442b1e03b18aa81764089bf74c002d337690 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -7,6 +7,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation + * Copyright 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -289,8 +290,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, { int i; - cancel_work_sync(&sta->ampdu_mlme.work); - for (i = 0; i < IEEE80211_NUM_TIDS; i++) { __ieee80211_stop_tx_ba_session(sta, i, reason); __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, @@ -298,6 +297,9 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, reason != AGG_STOP_DESTROY_STA && reason != AGG_STOP_PEER_REQUEST); } + + /* stopping might queue the work again - so cancel only afterwards */ + cancel_work_sync(&sta->ampdu_mlme.work); } void ieee80211_ba_session_work(struct work_struct *work) @@ -352,10 +354,16 @@ void ieee80211_ba_session_work(struct work_struct *work) spin_unlock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, - &tid_tx->state)) + if (!tid_tx) + continue; + + if (test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state)) + ieee80211_start_tx_ba_cb(sta, tid, tid_tx); + if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_LOCAL_REQUEST); + if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) + ieee80211_stop_tx_ba_cb(sta, tid, tid_tx); } mutex_unlock(&sta->ampdu_mlme.mtx); } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 98999d3d5262743cf32ef92480772f034e70baf1..364d4e13764942db0b41eee7a9561b3c00e946ae 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -66,6 +66,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, 2 + (IEEE80211_MAX_SUPP_RATES - 8) + 2 + sizeof(struct ieee80211_ht_cap) + 2 + sizeof(struct ieee80211_ht_operation) + + 2 + sizeof(struct ieee80211_vht_cap) + + 2 + sizeof(struct ieee80211_vht_operation) + ifibss->ie_len; presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL); if (!presp) @@ -425,7 +427,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: cfg80211_chandef_create(&chandef, cbss->channel, - NL80211_CHAN_WIDTH_20_NOHT); + NL80211_CHAN_NO_HT); chandef.width = sdata->u.ibss.chandef.width; break; case NL80211_CHAN_WIDTH_80: @@ -437,7 +439,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, default: /* fall back to 20 MHz for unsupported modes */ cfg80211_chandef_create(&chandef, cbss->channel, - NL80211_CHAN_WIDTH_20_NOHT); + NL80211_CHAN_NO_HT); break; } @@ -992,7 +994,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, enum nl80211_band band = rx_status->band; enum nl80211_bss_scan_width scan_width; struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; + struct ieee80211_supported_band *sband; bool rates_updated = false; u32 supp_rates = 0; @@ -1002,6 +1004,10 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, if (!ether_addr_equal(mgmt->bssid, sdata->u.ibss.bssid)) return; + sband = local->hw.wiphy->bands[band]; + if (WARN_ON(!sband)) + return; + rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -1014,9 +1020,9 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->flag & RX_FLAG_5MHZ) + if (rx_status->bw == RATE_INFO_BW_5) scan_width = NL80211_BSS_CHAN_WIDTH_5; - if (rx_status->flag & RX_FLAG_10MHZ) + else if (rx_status->bw == RATE_INFO_BW_10) scan_width = NL80211_BSS_CHAN_WIDTH_10; sta->sta.supp_rates[band] = supp_rates | diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0e718437d080e7258efe75bcba26ce65990671ce..5e002f62c235fbae4c5154ef9eab65c0e5f842b8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -839,6 +839,8 @@ struct txq_info { struct ieee80211_if_mntr { u32 flags; u8 mu_follow_addr[ETH_ALEN] __aligned(2); + + struct list_head list; }; /** @@ -999,21 +1001,6 @@ sdata_assert_lock(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&sdata->wdev.mtx); } -static inline enum nl80211_band -ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) -{ - enum nl80211_band band = NL80211_BAND_2GHZ; - struct ieee80211_chanctx_conf *chanctx_conf; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (!WARN_ON(!chanctx_conf)) - band = chanctx_conf->def.chan->band; - rcu_read_unlock(); - - return band; -} - static inline int ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef) { @@ -1049,8 +1036,6 @@ struct ieee80211_rx_agg { enum sdata_queue_type { IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, - IEEE80211_SDATA_QUEUE_AGG_START = 1, - IEEE80211_SDATA_QUEUE_AGG_STOP = 2, IEEE80211_SDATA_QUEUE_RX_AGG_START = 3, IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4, }; @@ -1259,6 +1244,7 @@ struct ieee80211_local { /* see iface.c */ struct list_head interfaces; + struct list_head mon_list; /* only that are IFF_UP && !cooked */ struct mutex iflist_mtx; /* @@ -1418,11 +1404,26 @@ IEEE80211_WDEV_TO_SUB_IF(struct wireless_dev *wdev) return container_of(wdev, struct ieee80211_sub_if_data, wdev); } -/* this struct represents 802.11n's RA/TID combination */ -struct ieee80211_ra_tid { - u8 ra[ETH_ALEN]; - u16 tid; -}; +static inline struct ieee80211_supported_band * +ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx_conf *chanctx_conf; + enum nl80211_band band; + + rcu_read_lock(); + chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + + if (WARN_ON(!chanctx_conf)) { + rcu_read_unlock(); + return NULL; + } + + band = chanctx_conf->def.chan->band; + rcu_read_unlock(); + + return local->hw.wiphy->bands[band]; +} /* this struct holds the value parsing from channel switch IE */ struct ieee80211_csa_ie { @@ -1474,6 +1475,7 @@ struct ieee802_11_elems { const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; + const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie; /* length of them, respectively */ u8 ext_capab_len; @@ -1527,9 +1529,9 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status) status->flag & RX_FLAG_MACTIME_END); if (status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END)) return true; - /* can't handle HT/VHT preamble yet */ + /* can't handle non-legacy preamble yet */ if (status->flag & RX_FLAG_MACTIME_PLCP_START && - !(status->flag & (RX_FLAG_HT | RX_FLAG_VHT))) + status->encoding == RX_ENC_LEGACY) return true; return false; } @@ -1784,8 +1786,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_agg_stop_reason reason); int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_agg_stop_reason reason); -void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); -void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); +void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, + struct tid_ampdu_tx *tid_tx); +void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, + struct tid_ampdu_tx *tid_tx); void ieee80211_ba_session_work(struct work_struct *work); void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5bb0c501281954dfe656c5e886c9032b958061be..f5f50150ba1cd7f53689a4e5efac3235f04679cf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -676,7 +676,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) set_bit(SDATA_STATE_RUNNING, &sdata->state); - if (sdata->vif.type == NL80211_IFTYPE_WDS) { + switch (sdata->vif.type) { + case NL80211_IFTYPE_WDS: /* Create STA entry for the WDS peer */ sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, GFP_KERNEL); @@ -697,8 +698,17 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) rate_control_rate_init(sta); netif_carrier_on(dev); - } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) { + break; + case NL80211_IFTYPE_P2P_DEVICE: rcu_assign_pointer(local->p2p_sdata, sdata); + break; + case NL80211_IFTYPE_MONITOR: + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) + break; + list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list); + break; + default: + break; } /* @@ -817,6 +827,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP: cancel_work_sync(&sdata->u.ap.request_smps_work); break; + case NL80211_IFTYPE_MONITOR: + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) + break; + list_del_rcu(&sdata->u.mntr.list); + break; default: break; } @@ -1198,7 +1213,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = { static void ieee80211_if_free(struct net_device *dev) { free_percpu(dev->tstats); - free_netdev(dev); } static void ieee80211_if_setup(struct net_device *dev) @@ -1206,7 +1220,8 @@ static void ieee80211_if_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &ieee80211_dataif_ops; - dev->destructor = ieee80211_if_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ieee80211_if_free; } static void ieee80211_if_setup_no_queue(struct net_device *dev) @@ -1222,7 +1237,6 @@ static void ieee80211_iface_work(struct work_struct *work) struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - struct ieee80211_ra_tid *ra_tid; struct ieee80211_rx_agg *rx_agg; if (!ieee80211_sdata_running(sdata)) @@ -1238,15 +1252,7 @@ static void ieee80211_iface_work(struct work_struct *work) while ((skb = skb_dequeue(&sdata->skb_queue))) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) { - ra_tid = (void *)&skb->cb; - ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra, - ra_tid->tid); - } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) { - ra_tid = (void *)&skb->cb; - ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra, - ra_tid->tid); - } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) { + if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) { rx_agg = (void *)&skb->cb; mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, rx_agg->addr); @@ -1810,6 +1816,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { ieee80211_if_free(ndev); + free_netdev(ndev); return ret; } @@ -1899,7 +1906,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - ieee80211_if_free(ndev); + free_netdev(ndev); return ret; } } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 56fb47953b72420b3ceb4e3f5d27dc9c4d23928b..8aa1f5b6a05145b0838b494abdf2b2c5b0aa17ab 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -253,6 +253,7 @@ static void ieee80211_restart_work(struct work_struct *work) WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); + flush_work(&local->radar_detected_work); rtnl_lock(); list_for_each_entry(sdata, &local->interfaces, list) flush_delayed_work(&sdata->dec_tailroom_needed_wk); @@ -603,6 +604,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ARRAY_SIZE(local->ext_capa); INIT_LIST_HEAD(&local->interfaces); + INIT_LIST_HEAD(&local->mon_list); __hw_addr_init(&local->mc_list); @@ -1186,6 +1188,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->reconfig_filter); cancel_work_sync(&local->tdls_chsw_work); flush_work(&local->sched_scan_stopped_work); + flush_work(&local->radar_detected_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6e7b6a07b7d536a64f7e9ec296adf2bbba8d961c..737e1f082b0ddd55e03b30bf215a5fbb68943143 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -63,6 +63,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 basic_rates = 0; struct cfg80211_chan_def sta_chan_def; + struct ieee80211_supported_band *sband; /* * As support for each feature is added, check for matching @@ -83,7 +84,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth))) return false; - ieee80211_sta_get_rates(sdata, ie, ieee80211_get_sdata_band(sdata), + sband = ieee80211_get_sband(sdata); + if (!sband) + return false; + + ieee80211_sta_get_rates(sdata, ie, sband->band, &basic_rates); if (sdata->vif.bss_conf.basic_rates != basic_rates) @@ -399,12 +404,13 @@ static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_local *local = sdata->local; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u8 *pos; - sband = local->hw.wiphy->bands[band]; + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + if (!sband->ht_cap.ht_supported || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || @@ -462,12 +468,13 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_local *local = sdata->local; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u8 *pos; - sband = local->hw.wiphy->bands[band]; + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + if (!sband->vht_cap.vht_supported || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || @@ -916,12 +923,16 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings params; struct ieee80211_csa_ie csa_ie; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband; int err; u32 sta_flags; sdata_assert_lock(sdata); + sband = ieee80211_get_sband(sdata); + if (!sband) + return false; + sta_flags = IEEE80211_STA_DISABLE_VHT; switch (sdata->vif.bss_conf.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: @@ -935,7 +946,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); memset(&csa_ie, 0, sizeof(csa_ie)); - err = ieee80211_parse_ch_switch_ie(sdata, elems, band, + err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, sta_flags, sdata->vif.addr, &csa_ie); if (err < 0) @@ -1100,8 +1111,14 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; - if (mesh_matches_local(sdata, &elems)) - mesh_neighbour_update(sdata, mgmt->sa, &elems); + if (mesh_matches_local(sdata, &elems)) { + mpl_dbg(sdata, "rssi_threshold=%d,rx_status->signal=%d\n", + sdata->u.mesh.mshcfg.rssi_threshold, rx_status->signal); + if (!sdata->u.mesh.user_mpm || + sdata->u.mesh.mshcfg.rssi_threshold == 0 || + sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal) + mesh_neighbour_update(sdata, mgmt->sa, &elems); + } if (ifmsh->sync_ops) ifmsh->sync_ops->rx_bcn_presp(sdata, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index b747c9645e432bffe5b9d266a51e7ffca3b75ec3..4005edd71fe86db5415bf0bb9803dac7248ae77a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -16,6 +16,7 @@ #define TEST_FRAME_LEN 8192 #define MAX_METRIC 0xffffffff #define ARITH_SHIFT 8 +#define LINK_FAIL_THRESH 95 #define MAX_PREQ_QUEUE_LEN 64 @@ -307,10 +308,12 @@ void ieee80211s_update_metric(struct ieee80211_local *local, failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); - /* moving average, scaled to 100 */ - sta->mesh->fail_avg = - ((80 * sta->mesh->fail_avg + 5) / 100 + 20 * failed); - if (sta->mesh->fail_avg > 95) + /* moving average, scaled to 100. + * feed failure as 100 and success as 0 + */ + ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, failed * 100); + if (ewma_mesh_fail_avg_read(&sta->mesh->fail_avg) > + LINK_FAIL_THRESH) mesh_plink_broken(sta); } @@ -325,6 +328,8 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, int rate, err; u32 tx_time, estimated_retx; u64 result; + unsigned long fail_avg = + ewma_mesh_fail_avg_read(&sta->mesh->fail_avg); /* Try to get rate based on HW/SW RC algorithm. * Rate is returned in units of Kbps, correct this @@ -336,7 +341,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, if (rate) { err = 0; } else { - if (sta->mesh->fail_avg >= 100) + if (fail_avg > LINK_FAIL_THRESH) return MAX_METRIC; sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); @@ -344,7 +349,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, if (WARN_ON(!rate)) return MAX_METRIC; - err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + err = (fail_avg << ARITH_SHIFT) / 100; } /* bitrate is in units of 100 Kbps, while we need rate in units of @@ -484,6 +489,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, ? mpath->exp_time : exp_time; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); + ewma_mesh_fail_avg_init(&sta->mesh->fail_avg); + /* init it at a low value - 0 start is tricky */ + ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1); mesh_path_tx_pending(mpath); /* draft says preq_id should be saved to, but there does * not seem to be any use for it, skipping by now @@ -522,6 +530,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, ? mpath->exp_time : exp_time; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); + ewma_mesh_fail_avg_init(&sta->mesh->fail_avg); + /* init it at a low value - 0 start is tricky */ + ewma_mesh_fail_avg_add(&sta->mesh->fail_avg, 1); mesh_path_tx_pending(mpath); } else spin_unlock_bh(&mpath->state_lock); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index f0e6175a9821f01d7aac2dfbda02c1ee5eeb31ec..97269caafecd7b52e644e9bb645d305fdfb67196 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -397,11 +397,10 @@ struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, new_mpath->sdata = sdata; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); - new_mpath->timer.data = (unsigned long) new_mpath; - new_mpath->timer.function = mesh_path_timer; new_mpath->exp_time = jiffies; spin_lock_init(&new_mpath->state_lock); - init_timer(&new_mpath->timer); + setup_timer(&new_mpath->timer, mesh_path_timer, + (unsigned long) new_mpath); return new_mpath; } @@ -829,6 +828,9 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); + ewma_mesh_fail_avg_init(&next_hop->mesh->fail_avg); + /* init it at a low value - 0 start is tricky */ + ewma_mesh_fail_avg_add(&next_hop->mesh->fail_avg, 1); mesh_path_tx_pending(mpath); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 953d71e784a9ab71cb8494478e39eaf3b9464211..1131cd504a15ef5fa507c37c8a5464aac173321f 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -95,19 +95,23 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; + struct ieee80211_supported_band *sband; struct sta_info *sta; u32 erp_rates = 0, changed = 0; int i; bool short_slot = false; - if (band == NL80211_BAND_5GHZ) { + sband = ieee80211_get_sband(sdata); + if (!sband) + return changed; + + if (sband->band == NL80211_BAND_5GHZ) { /* (IEEE 802.11-2012 19.4.5) */ short_slot = true; goto out; - } else if (band != NL80211_BAND_2GHZ) + } else if (sband->band != NL80211_BAND_2GHZ) { goto out; + } for (i = 0; i < sband->n_bitrates; i++) if (sband->bitrates[i].flags & IEEE80211_RATE_ERP_G) @@ -123,7 +127,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata) continue; short_slot = false; - if (erp_rates & sta->sta.supp_rates[band]) + if (erp_rates & sta->sta.supp_rates[sband->band]) short_slot = true; else break; @@ -249,7 +253,15 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.self_prot.action_code = action; if (action != WLAN_SP_MESH_PEERING_CLOSE) { - enum nl80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband; + enum nl80211_band band; + + sband = ieee80211_get_sband(sdata); + if (!sband) { + err = -EINVAL; + goto free; + } + band = sband->band; /* capability info */ pos = skb_put(skb, 2); @@ -395,13 +407,16 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, bool insert) { struct ieee80211_local *local = sdata->local; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); struct ieee80211_supported_band *sband; u32 rates, basic_rates = 0, changed = 0; enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; - sband = local->hw.wiphy->bands[band]; - rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates); + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + + rates = ieee80211_sta_get_rates(sdata, elems, sband->band, + &basic_rates); spin_lock_bh(&sta->mesh->plink_lock); sta->rx_stats.last_rx = jiffies; @@ -412,9 +427,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, goto out; sta->mesh->processed_beacon = true; - if (sta->sta.supp_rates[band] != rates) + if (sta->sta.supp_rates[sband->band] != rates) changed |= IEEE80211_RC_SUPP_RATES_CHANGED; - sta->sta.supp_rates[band] = rates; + sta->sta.supp_rates[sband->band] = rates; if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, elems->ht_cap_elem, sta)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6e90301154d5a6ba4a7e9bb612ac8243cf09a23c..cc8e6ea1b27e95e36412d840cd394039f5df4d5d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6,7 +6,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015 - 2016 Intel Deutschland GmbH + * Copyright (C) 2015 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -601,7 +601,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; - u32 rate_flags, rates = 0; + u32 rates = 0; sdata_assert_lock(sdata); @@ -612,7 +612,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) return; } chan = chanctx_conf->def.chan; - rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); rcu_read_unlock(); sband = local->hw.wiphy->bands[chan->band]; shift = ieee80211_vif_get_shift(&sdata->vif); @@ -636,9 +635,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) */ rates_len = 0; for (i = 0; i < sband->n_bitrates; i++) { - if ((rate_flags & sband->bitrates[i].flags) - != rate_flags) - continue; rates |= BIT(i); rates_len++; } @@ -1855,11 +1851,16 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, u16 capab, bool erp_valid, u8 erp) { struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; + struct ieee80211_supported_band *sband; u32 changed = 0; bool use_protection; bool use_short_preamble; bool use_short_slot; + sband = ieee80211_get_sband(sdata); + if (!sband) + return changed; + if (erp_valid) { use_protection = (erp & WLAN_ERP_USE_PROTECTION) != 0; use_short_preamble = (erp & WLAN_ERP_BARKER_PREAMBLE) == 0; @@ -1869,7 +1870,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, } use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); - if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_5GHZ) + if (sband->band == NL80211_BAND_5GHZ) use_short_slot = true; if (use_protection != bss_conf->use_cts_prot) { @@ -1908,6 +1909,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.associated = cbss; memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); + ieee80211_check_rate_mask(sdata); + sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE; if (sdata->vif.p2p || @@ -2797,8 +2800,9 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - sdata_info(sdata, "disassociated from %pM (Reason: %u)\n", - mgmt->sa, reason_code); + sdata_info(sdata, "disassociated from %pM (Reason: %u=%s)\n", + mgmt->sa, reason_code, + ieee80211_get_reason_code_string(reason_code)); ieee80211_set_disassoc(sdata, 0, 0, false, NULL); @@ -2810,7 +2814,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, u32 *rates, u32 *basic_rates, bool *have_higher_than_11mbit, int *min_rate, int *min_rate_index, - int shift, u32 rate_flags) + int shift) { int i, j; @@ -2822,15 +2826,15 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, *have_higher_than_11mbit = true; /* - * BSS_MEMBERSHIP_SELECTOR_HT_PHY is defined in 802.11n-2009 - * 7.3.2.2 as a magic value instead of a rate. Hence, skip it. + * Skip HT and VHT BSS membership selectors since they're not + * rates. * - * Note: Even through the membership selector and the basic + * Note: Even though the membership selector and the basic * rate flag share the same bit, they are not exactly * the same. */ - if (!!(supp_rates[i] & 0x80) && - (supp_rates[i] & 0x7f) == BSS_MEMBERSHIP_SELECTOR_HT_PHY) + if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) || + supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY)) continue; for (j = 0; j < sband->n_bitrates; j++) { @@ -2838,8 +2842,6 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, int brate; br = &sband->bitrates[j]; - if ((rate_flags & br->flags) != rate_flags) - continue; brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5); if (brate == rate) { @@ -3001,7 +3003,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, goto out; } - sband = local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)]; + sband = ieee80211_get_sband(sdata); + if (!sband) { + mutex_unlock(&sdata->local->sta_mtx); + ret = false; + goto out; + } /* Set up internal HT/VHT capabilities */ if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) @@ -3085,6 +3092,18 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, } changed |= BSS_CHANGED_QOS; + if (elems.max_idle_period_ie) { + bss_conf->max_idle_period = + le16_to_cpu(elems.max_idle_period_ie->max_idle_period); + bss_conf->protected_keep_alive = + !!(elems.max_idle_period_ie->idle_options & + WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE); + changed |= BSS_CHANGED_KEEP_ALIVE; + } else { + bss_conf->max_idle_period = 0; + bss_conf->protected_keep_alive = false; + } + /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; @@ -3430,6 +3449,30 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } } + if (bss_conf->cqm_rssi_low && + ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { + int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal); + int last_event = ifmgd->last_cqm_event_signal; + int low = bss_conf->cqm_rssi_low; + int high = bss_conf->cqm_rssi_high; + + if (sig < low && + (last_event == 0 || last_event >= low)) { + ifmgd->last_cqm_event_signal = sig; + ieee80211_cqm_rssi_notify( + &sdata->vif, + NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, + sig, GFP_KERNEL); + } else if (sig > high && + (last_event == 0 || last_event <= high)) { + ifmgd->last_cqm_event_signal = sig; + ieee80211_cqm_rssi_notify( + &sdata->vif, + NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, + sig, GFP_KERNEL); + } + } + if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) { mlme_dbg_ratelimited(sdata, "cancelling AP probe due to a received beacon\n"); @@ -4333,6 +4376,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) return -EINVAL; + /* If a reconfig is happening, bail out */ + if (local->in_reconfig) + return -EBUSY; + if (assoc) { rcu_read_lock(); have_sta = sta_info_get(sdata, cbss->bssid); @@ -4345,40 +4392,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - if (new_sta || override) { - err = ieee80211_prep_channel(sdata, cbss); - if (err) { - if (new_sta) - sta_info_free(local, new_sta); - return -EINVAL; - } - } - + /* + * Set up the information for the new channel before setting the + * new channel. We can't - completely race-free - change the basic + * rates bitmap and the channel (sband) that it refers to, but if + * we set it up before we at least avoid calling into the driver's + * bss_info_changed() method with invalid information (since we do + * call that from changing the channel - only for IDLE and perhaps + * some others, but ...). + * + * So to avoid that, just set up all the new information before the + * channel, but tell the driver to apply it only afterwards, since + * it might need the new channel for that. + */ if (new_sta) { u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit; int min_rate = INT_MAX, min_rate_index = -1; - struct ieee80211_chanctx_conf *chanctx_conf; const struct cfg80211_bss_ies *ies; int shift = ieee80211_vif_get_shift(&sdata->vif); - u32 rate_flags; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { - rcu_read_unlock(); - sta_info_free(local, new_sta); - return -EINVAL; - } - rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); - rcu_read_unlock(); ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len, &rates, &basic_rates, &have_higher_than_11mbit, &min_rate, &min_rate_index, - shift, rate_flags); + shift); /* * This used to be a workaround for basic rates missing @@ -4436,8 +4475,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.sync_dtim_count = 0; } rcu_read_unlock(); + } + + if (new_sta || override) { + err = ieee80211_prep_channel(sdata, cbss); + if (err) { + if (new_sta) + sta_info_free(local, new_sta); + return -EINVAL; + } + } - /* tell driver about BSSID, basic rates and timing */ + if (new_sta) { + /* + * tell driver about BSSID, basic rates and timing + * this was set up above, before setting the channel + */ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 76a8bcd8ef11237cf8715bf6a10c9085a2152d4d..a87d195c4a615761ed6041296b417ec3c9a8f996 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -10,7 +10,7 @@ static void ieee80211_sched_scan_cancel(struct ieee80211_local *local) { if (ieee80211_request_sched_scan_stop(local)) return; - cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); + cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0); } int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 206698bc93f406939bb5d883b6ab2f04bc1a3bed..ea1f4315c521be976c822918d7d821cffd3cc0e9 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc + * Copyright 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -61,6 +62,28 @@ void rate_control_rate_init(struct sta_info *sta) set_sta_flag(sta, WLAN_STA_RATE_CONTROL); } +void rate_control_tx_status(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct ieee80211_tx_status *st) +{ + struct rate_control_ref *ref = local->rate_ctrl; + struct sta_info *sta = container_of(st->sta, struct sta_info, sta); + void *priv_sta = sta->rate_ctrl_priv; + + if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) + return; + + spin_lock_bh(&sta->rate_ctrl_lock); + if (ref->ops->tx_status_ext) + ref->ops->tx_status_ext(ref->priv, sband, priv_sta, st); + else if (st->skb) + ref->ops->tx_status(ref->priv, sband, st->sta, priv_sta, st->skb); + else + WARN_ON_ONCE(1); + + spin_unlock_bh(&sta->rate_ctrl_lock); +} + void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct sta_info *sta, u32 changed) @@ -173,9 +196,11 @@ ieee80211_rate_control_ops_get(const char *name) /* try default if specific alg requested but not found */ ops = ieee80211_try_rate_control_ops_get(ieee80211_default_rc_algo); - /* try built-in one if specific alg requested but not found */ - if (!ops && strlen(CONFIG_MAC80211_RC_DEFAULT)) + /* Note: check for > 0 is intentional to avoid clang warning */ + if (!ops && (strlen(CONFIG_MAC80211_RC_DEFAULT) > 0)) + /* try built-in one if specific alg requested but not found */ ops = ieee80211_try_rate_control_ops_get(CONFIG_MAC80211_RC_DEFAULT); + kernel_param_unlock(THIS_MODULE); return ops; @@ -208,7 +233,6 @@ static struct rate_control_ref *rate_control_alloc(const char *name, ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) return NULL; - ref->local = local; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto free; @@ -229,18 +253,45 @@ static struct rate_control_ref *rate_control_alloc(const char *name, return NULL; } -static void rate_control_free(struct rate_control_ref *ctrl_ref) +static void rate_control_free(struct ieee80211_local *local, + struct rate_control_ref *ctrl_ref) { ctrl_ref->ops->free(ctrl_ref->priv); #ifdef CONFIG_MAC80211_DEBUGFS - debugfs_remove_recursive(ctrl_ref->local->debugfs.rcdir); - ctrl_ref->local->debugfs.rcdir = NULL; + debugfs_remove_recursive(local->debugfs.rcdir); + local->debugfs.rcdir = NULL; #endif kfree(ctrl_ref); } +void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + u32 user_mask, basic_rates = sdata->vif.bss_conf.basic_rates; + enum nl80211_band band; + + if (WARN_ON(!sdata->vif.bss_conf.chandef.chan)) + return; + + if (WARN_ON_ONCE(!basic_rates)) + return; + + band = sdata->vif.bss_conf.chandef.chan->band; + user_mask = sdata->rc_rateidx_mask[band]; + sband = local->hw.wiphy->bands[band]; + + if (user_mask & basic_rates) + return; + + sdata_dbg(sdata, + "no overlap between basic rates (0x%x) and user mask (0x%x on band %d) - clearing the latter", + basic_rates, user_mask, band); + sdata->rc_rateidx_mask[band] = (1 << sband->n_bitrates) - 1; +} + static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc) { struct sk_buff *skb = txrc->skb; @@ -875,7 +926,9 @@ int rate_control_set_rates(struct ieee80211_hw *hw, struct ieee80211_sta_rates *old; struct ieee80211_supported_band *sband; - sband = hw->wiphy->bands[ieee80211_get_sdata_band(sta->sdata)]; + sband = ieee80211_get_sband(sta->sdata); + if (!sband) + return -EINVAL; rate_control_apply_mask_ratetbl(sta, sband, rates); /* * mac80211 guarantees that this function will not be called @@ -936,6 +989,6 @@ void rate_control_deinitialize(struct ieee80211_local *local) return; local->rate_ctrl = NULL; - rate_control_free(ref); + rate_control_free(local, ref); } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 8d3260785b940d3ea87679f86b98015ce1b4fbbd..8212bfeb71d6b382fda7e2efe243304c7475db2a 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -20,7 +20,6 @@ #include "driver-ops.h" struct rate_control_ref { - struct ieee80211_local *local; const struct rate_control_ops *ops; void *priv; }; @@ -29,47 +28,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_tx_rate_control *txrc); -static inline void rate_control_tx_status(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta, - struct sk_buff *skb) -{ - struct rate_control_ref *ref = local->rate_ctrl; - struct ieee80211_sta *ista = &sta->sta; - void *priv_sta = sta->rate_ctrl_priv; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) - return; - - spin_lock_bh(&sta->rate_ctrl_lock); - if (ref->ops->tx_status) - ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); - else - ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); - spin_unlock_bh(&sta->rate_ctrl_lock); -} - -static inline void -rate_control_tx_status_noskb(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta, - struct ieee80211_tx_info *info) -{ - struct rate_control_ref *ref = local->rate_ctrl; - struct ieee80211_sta *ista = &sta->sta; - void *priv_sta = sta->rate_ctrl_priv; - - if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) - return; - - if (WARN_ON_ONCE(!ref->ops->tx_status_noskb)) - return; - - spin_lock_bh(&sta->rate_ctrl_lock); - ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info); - spin_unlock_bh(&sta->rate_ctrl_lock); -} +void rate_control_tx_status(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct ieee80211_tx_status *st); void rate_control_rate_init(struct sta_info *sta); void rate_control_rate_update(struct ieee80211_local *local, @@ -111,6 +72,8 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } +void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata); + /* Get a reference to the rate control algorithm. If `name' is NULL, get the * first available algorithm. */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 3ebe4405a2d43d66f76bdef3fd4f10cd3fa82b8f..9766c1cc4b0a5d59ae4c71aea92921391a1576b3 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -264,9 +264,9 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) static void minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct ieee80211_tx_info *info) + void *priv_sta, struct ieee80211_tx_status *st) { + struct ieee80211_tx_info *info = st->info; struct minstrel_priv *mp = priv; struct minstrel_sta_info *mi = priv_sta; struct ieee80211_tx_rate *ar = info->status.rates; @@ -726,7 +726,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta) const struct rate_control_ops mac80211_minstrel = { .name = "minstrel", - .tx_status_noskb = minstrel_tx_status, + .tx_status_ext = minstrel_tx_status, .get_rate = minstrel_get_rate, .rate_init = minstrel_rate_init, .alloc = minstrel_alloc, diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 8e783e197e935cd8bcd0b7b7d5e88b8d2c9210cc..4a5bdad9f303034f0aee90ed3ac00cb044458a8d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -678,9 +678,9 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb) static void minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct ieee80211_tx_info *info) + void *priv_sta, struct ieee80211_tx_status *st) { + struct ieee80211_tx_info *info = st->info; struct minstrel_ht_sta_priv *msp = priv_sta; struct minstrel_ht_sta *mi = &msp->ht; struct ieee80211_tx_rate *ar = info->status.rates; @@ -690,8 +690,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, int i; if (!msp->is_ht) - return mac80211_minstrel.tx_status_noskb(priv, sband, sta, - &msp->legacy, info); + return mac80211_minstrel.tx_status_ext(priv, sband, + &msp->legacy, st); /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && @@ -1374,7 +1374,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", - .tx_status_noskb = minstrel_ht_tx_status, + .tx_status_ext = minstrel_ht_tx_status, .get_rate = minstrel_ht_get_rate, .rate_init = minstrel_ht_rate_init, .rate_update = minstrel_ht_rate_update, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e48724a6725e3266c1d5559d268339a7d2cd7f10..3674fe3d67dc74ba9042e91920f43376325c6b6a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -95,24 +95,13 @@ static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, * This function cleans up the SKB, i.e. it removes all the stuff * only useful for monitoring. */ -static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, - struct sk_buff *skb, - unsigned int rtap_vendor_space) +static void remove_monitor_info(struct sk_buff *skb, + unsigned int present_fcs_len, + unsigned int rtap_vendor_space) { - if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) { - if (likely(skb->len > FCS_LEN)) - __pskb_trim(skb, skb->len - FCS_LEN); - else { - /* driver bug */ - WARN_ON(1); - dev_kfree_skb(skb); - return NULL; - } - } - + if (present_fcs_len) + __pskb_trim(skb, skb->len - present_fcs_len); __pskb_pull(skb, rtap_vendor_space); - - return skb; } static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len, @@ -167,7 +156,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* padding for RX_FLAGS if necessary */ len = ALIGN(len, 2); - if (status->flag & RX_FLAG_HT) /* HT info */ + if (status->encoding == RX_ENC_HT) /* HT info */ len += 3; if (status->flag & RX_FLAG_AMPDU_DETAILS) { @@ -175,7 +164,7 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 8; } - if (status->flag & RX_FLAG_VHT) { + if (status->encoding == RX_ENC_VHT) { len = ALIGN(len, 2); len += 12; } @@ -208,6 +197,51 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, return len; } +static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + int rtap_vendor_space) +{ + struct { + struct ieee80211_hdr_3addr hdr; + u8 category; + u8 action_code; + } __packed action; + + if (!sdata) + return; + + BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1); + + if (skb->len < rtap_vendor_space + sizeof(action) + + VHT_MUMIMO_GROUPS_DATA_LEN) + return; + + if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr)) + return; + + skb_copy_bits(skb, rtap_vendor_space, &action, sizeof(action)); + + if (!ieee80211_is_action(action.hdr.frame_control)) + return; + + if (action.category != WLAN_CATEGORY_VHT) + return; + + if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT) + return; + + if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr)) + return; + + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) + return; + + skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); +} + /* * ieee80211_add_rx_radiotap_header - add radiotap header * @@ -295,12 +329,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos |= IEEE80211_RADIOTAP_F_FCS; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) *pos |= IEEE80211_RADIOTAP_F_BADFCS; - if (status->flag & RX_FLAG_SHORTPRE) + if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; pos++; /* IEEE80211_RADIOTAP_RATE */ - if (!rate || status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) { + if (!rate || status->encoding != RX_ENC_LEGACY) { /* * Without rate information don't add it. If we have, * MCS information is a separate field in radiotap, @@ -311,9 +345,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, } else { int shift = 0; rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); - if (status->flag & RX_FLAG_10MHZ) + if (status->bw == RATE_INFO_BW_10) shift = 1; - else if (status->flag & RX_FLAG_5MHZ) + else if (status->bw == RATE_INFO_BW_5) shift = 2; *pos = DIV_ROUND_UP(rate->bitrate, 5 * (1 << shift)); } @@ -322,14 +356,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_CHANNEL */ put_unaligned_le16(status->freq, pos); pos += 2; - if (status->flag & RX_FLAG_10MHZ) + if (status->bw == RATE_INFO_BW_10) channel_flags |= IEEE80211_CHAN_HALF; - else if (status->flag & RX_FLAG_5MHZ) + else if (status->bw == RATE_INFO_BW_5) channel_flags |= IEEE80211_CHAN_QUARTER; if (status->band == NL80211_BAND_5GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; - else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) + else if (status->encoding != RX_ENC_LEGACY) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; else if (rate && rate->flags & IEEE80211_RATE_ERP_G) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; @@ -368,21 +402,21 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, put_unaligned_le16(rx_flags, pos); pos += 2; - if (status->flag & RX_FLAG_HT) { + if (status->encoding == RX_ENC_HT) { unsigned int stbc; rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); *pos++ = local->hw.radiotap_mcs_details; *pos = 0; - if (status->flag & RX_FLAG_SHORT_GI) + if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) *pos |= IEEE80211_RADIOTAP_MCS_SGI; - if (status->flag & RX_FLAG_40MHZ) + if (status->bw == RATE_INFO_BW_40) *pos |= IEEE80211_RADIOTAP_MCS_BW_40; - if (status->flag & RX_FLAG_HT_GF) + if (status->enc_flags & RX_ENC_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; - if (status->flag & RX_FLAG_LDPC) + if (status->enc_flags & RX_ENC_FLAG_LDPC) *pos |= IEEE80211_RADIOTAP_MCS_FEC_LDPC; - stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT; + stbc = (status->enc_flags & RX_ENC_FLAG_STBC_MASK) >> RX_ENC_FLAG_STBC_SHIFT; *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT; pos++; *pos++ = status->rate_idx; @@ -415,35 +449,40 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos++ = 0; } - if (status->flag & RX_FLAG_VHT) { + if (status->encoding == RX_ENC_VHT) { u16 known = local->hw.radiotap_vht_details; rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); put_unaligned_le16(known, pos); pos += 2; /* flags */ - if (status->flag & RX_FLAG_SHORT_GI) + if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; /* in VHT, STBC is binary */ - if (status->flag & RX_FLAG_STBC_MASK) + if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_STBC; - if (status->vht_flag & RX_VHT_FLAG_BF) + if (status->enc_flags & RX_ENC_FLAG_BF) *pos |= IEEE80211_RADIOTAP_VHT_FLAG_BEAMFORMED; pos++; /* bandwidth */ - if (status->vht_flag & RX_VHT_FLAG_80MHZ) + switch (status->bw) { + case RATE_INFO_BW_80: *pos++ = 4; - else if (status->vht_flag & RX_VHT_FLAG_160MHZ) + break; + case RATE_INFO_BW_160: *pos++ = 11; - else if (status->flag & RX_FLAG_40MHZ) + break; + case RATE_INFO_BW_40: *pos++ = 1; - else /* 20 MHz */ + break; + default: *pos++ = 0; + } /* MCS/NSS */ - *pos = (status->rate_idx << 4) | status->vht_nss; + *pos = (status->rate_idx << 4) | status->nss; pos += 4; /* coding field */ - if (status->flag & RX_FLAG_LDPC) + if (status->enc_flags & RX_ENC_FLAG_LDPC) *pos |= IEEE80211_RADIOTAP_CODING_LDPC_USER0; pos++; /* group ID */ @@ -499,6 +538,59 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, } } +static struct sk_buff * +ieee80211_make_monitor_skb(struct ieee80211_local *local, + struct sk_buff **origskb, + struct ieee80211_rate *rate, + int rtap_vendor_space, bool use_origskb) +{ + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(*origskb); + int rt_hdrlen, needed_headroom; + struct sk_buff *skb; + + /* room for the radiotap header based on driver features */ + rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, *origskb); + needed_headroom = rt_hdrlen - rtap_vendor_space; + + if (use_origskb) { + /* only need to expand headroom if necessary */ + skb = *origskb; + *origskb = NULL; + + /* + * This shouldn't trigger often because most devices have an + * RX header they pull before we get here, and that should + * be big enough for our radiotap information. We should + * probably export the length to drivers so that we can have + * them allocate enough headroom to start with. + */ + if (skb_headroom(skb) < needed_headroom && + pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return NULL; + } + } else { + /* + * Need to make a copy and possibly remove radiotap header + * and FCS from the original. + */ + skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC); + + if (!skb) + return NULL; + } + + /* prepend radiotap information */ + ieee80211_add_rx_radiotap_header(local, skb, rate, rt_hdrlen, true); + + skb_reset_mac_header(skb); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + + return skb; +} + /* * This function copies a received frame to all monitor interfaces and * returns a cleaned-up SKB that no longer includes the FCS nor the @@ -510,14 +602,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb); struct ieee80211_sub_if_data *sdata; - int rt_hdrlen, needed_headroom; - struct sk_buff *skb, *skb2; - struct net_device *prev_dev = NULL; + struct sk_buff *monskb = NULL; int present_fcs_len = 0; unsigned int rtap_vendor_space = 0; - struct ieee80211_mgmt *mgmt; struct ieee80211_sub_if_data *monitor_sdata = rcu_dereference(local->monitor_sdata); + bool only_monitor = false; if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data; @@ -534,8 +624,15 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, * the SKB because it has a bad FCS/PLCP checksum. */ - if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) + if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) { + if (unlikely(origskb->len <= FCS_LEN)) { + /* driver bug */ + WARN_ON(1); + dev_kfree_skb(origskb); + return NULL; + } present_fcs_len = FCS_LEN; + } /* ensure hdr->frame_control and vendor radiotap data are in skb head */ if (!pskb_may_pull(origskb, 2 + rtap_vendor_space)) { @@ -543,104 +640,62 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, return NULL; } + only_monitor = should_drop_frame(origskb, present_fcs_len, + rtap_vendor_space); + if (!local->monitors || (status->flag & RX_FLAG_SKIP_MONITOR)) { - if (should_drop_frame(origskb, present_fcs_len, - rtap_vendor_space)) { + if (only_monitor) { dev_kfree_skb(origskb); return NULL; } - return remove_monitor_info(local, origskb, rtap_vendor_space); - } - - /* room for the radiotap header based on driver features */ - rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb); - needed_headroom = rt_hdrlen - rtap_vendor_space; - - if (should_drop_frame(origskb, present_fcs_len, rtap_vendor_space)) { - /* only need to expand headroom if necessary */ - skb = origskb; - origskb = NULL; - - /* - * This shouldn't trigger often because most devices have an - * RX header they pull before we get here, and that should - * be big enough for our radiotap information. We should - * probably export the length to drivers so that we can have - * them allocate enough headroom to start with. - */ - if (skb_headroom(skb) < needed_headroom && - pskb_expand_head(skb, needed_headroom, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return NULL; - } - } else { - /* - * Need to make a copy and possibly remove radiotap header - * and FCS from the original. - */ - skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC); - - origskb = remove_monitor_info(local, origskb, - rtap_vendor_space); - - if (!skb) - return origskb; + remove_monitor_info(origskb, present_fcs_len, + rtap_vendor_space); + return origskb; } - /* prepend radiotap information */ - ieee80211_add_rx_radiotap_header(local, skb, rate, rt_hdrlen, true); + ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space); - skb_reset_mac_header(skb); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); + list_for_each_entry_rcu(sdata, &local->mon_list, u.mntr.list) { + bool last_monitor = list_is_last(&sdata->u.mntr.list, + &local->mon_list); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) - continue; + if (!monskb) + monskb = ieee80211_make_monitor_skb(local, &origskb, + rate, + rtap_vendor_space, + only_monitor && + last_monitor); - if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) - continue; + if (monskb) { + struct sk_buff *skb; - if (!ieee80211_sdata_running(sdata)) - continue; + if (last_monitor) { + skb = monskb; + monskb = NULL; + } else { + skb = skb_clone(monskb, GFP_ATOMIC); + } - if (prev_dev) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2) { - skb2->dev = prev_dev; - netif_receive_skb(skb2); + if (skb) { + skb->dev = sdata->dev; + ieee80211_rx_stats(skb->dev, skb->len); + netif_receive_skb(skb); } } - prev_dev = sdata->dev; - ieee80211_rx_stats(sdata->dev, skb->len); + if (last_monitor) + break; } - mgmt = (void *)skb->data; - if (monitor_sdata && - skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN && - ieee80211_is_action(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_VHT && - mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT && - is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) && - ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) { - struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC); - - if (mu_skb) { - mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; - skb_queue_tail(&monitor_sdata->skb_queue, mu_skb); - ieee80211_queue_work(&local->hw, &monitor_sdata->work); - } - } + /* this happens if last_monitor was erroneously false */ + dev_kfree_skb(monskb); - if (prev_dev) { - skb->dev = prev_dev; - netif_receive_skb(skb); - } else - dev_kfree_skb(skb); + /* ditto */ + if (!origskb) + return NULL; + remove_monitor_info(origskb, present_fcs_len, rtap_vendor_space); return origskb; } @@ -1558,12 +1613,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) */ if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) && !ieee80211_has_morefrags(hdr->frame_control) && + !ieee80211_is_back_req(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && - /* PM bit is only checked in frames where it isn't reserved, + /* + * PM bit is only checked in frames where it isn't reserved, * in AP mode it's reserved in non-bufferable management frames * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field) + * BAR frames should be ignored as specified in + * IEEE 802.11-2012 10.2.1.2. */ (!ieee80211_is_mgmt(hdr->frame_control) || ieee80211_is_bufferable_mmpdu(hdr->frame_control))) { @@ -2437,7 +2496,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) { mpp_addr = hdr->addr3; proxied_addr = mesh_hdr->eaddr1; - } else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) { + } else if ((mesh_hdr->flags & MESH_FLAGS_AE) == + MESH_FLAGS_AE_A5_A6) { /* has_a4 already checked in ieee80211_rx_mesh_check */ mpp_addr = hdr->addr4; proxied_addr = mesh_hdr->eaddr2; @@ -3286,8 +3346,8 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, status = IEEE80211_SKB_RXCB((rx->skb)); sband = rx->local->hw.wiphy->bands[status->band]; - if (!(status->flag & RX_FLAG_HT) && - !(status->flag & RX_FLAG_VHT)) + if (!(status->encoding == RX_ENC_HT) && + !(status->encoding == RX_ENC_VHT)) rate = &sband->bitrates[status->rate_idx]; ieee80211_rx_cooked_monitor(rx, rate); @@ -3524,7 +3584,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); - int multicast = is_multicast_ether_addr(hdr->addr1); + bool multicast = is_multicast_ether_addr(hdr->addr1); switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -3548,7 +3608,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; if (!rx->sta) { int rate_idx; - if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT)) + if (status->encoding != RX_ENC_LEGACY) rate_idx = 0; /* TODO: HT/VHT rates */ else rate_idx = status->rate_idx; @@ -3568,7 +3628,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return false; if (!rx->sta) { int rate_idx; - if (status->flag & RX_FLAG_HT) + if (status->encoding != RX_ENC_LEGACY) rate_idx = 0; /* TODO: HT rates */ else rate_idx = status->rate_idx; @@ -3610,6 +3670,27 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) !ether_addr_equal(bssid, hdr->addr1)) return false; } + + /* + * 802.11-2016 Table 9-26 says that for data frames, A1 must be + * the BSSID - we've checked that already but may have accepted + * the wildcard (ff:ff:ff:ff:ff:ff). + * + * It also says: + * The BSSID of the Data frame is determined as follows: + * a) If the STA is contained within an AP or is associated + * with an AP, the BSSID is the address currently in use + * by the STA contained in the AP. + * + * So we should not accept data frames with an address that's + * multicast. + * + * Accepting it also opens a security problem because stations + * could encrypt it with the GTK and inject traffic that way. + */ + if (ieee80211_is_data(hdr->frame_control) && multicast) + return false; + return true; case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) @@ -4210,7 +4291,8 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, * we probably can't have a valid rate here anyway. */ - if (status->flag & RX_FLAG_HT) { + switch (status->encoding) { + case RX_ENC_HT: /* * rate_idx is MCS index, which can be [0-76] * as documented on: @@ -4228,14 +4310,19 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, status->rate_idx, status->rate_idx)) goto drop; - } else if (status->flag & RX_FLAG_VHT) { + break; + case RX_ENC_VHT: if (WARN_ONCE(status->rate_idx > 9 || - !status->vht_nss || - status->vht_nss > 8, + !status->nss || + status->nss > 8, "Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n", - status->rate_idx, status->vht_nss)) + status->rate_idx, status->nss)) goto drop; - } else { + break; + default: + WARN_ON_ONCE(1); + /* fall through */ + case RX_ENC_LEGACY: if (WARN_ON(status->rate_idx >= sband->n_bitrates)) goto drop; rate = &sband->bitrates[status->rate_idx]; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index faab3c490d2b755f32ee09638eaf0d45067780cd..47d2ed5704700f110aa6a0f57ecb5af29e0c5db9 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -79,9 +79,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal; bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->flag & RX_FLAG_5MHZ) + if (rx_status->bw == RATE_INFO_BW_5) bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5; - if (rx_status->flag & RX_FLAG_10MHZ) + else if (rx_status->bw == RATE_INFO_BW_10) bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10; bss_meta.chan = channel; @@ -174,8 +174,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (beacon) { struct ieee80211_supported_band *sband = local->hw.wiphy->bands[rx_status->band]; - if (!(rx_status->flag & RX_FLAG_HT) && - !(rx_status->flag & RX_FLAG_VHT)) + if (!(rx_status->encoding == RX_ENC_HT) && + !(rx_status->encoding == RX_ENC_VHT)) bss->beacon_rate = &sband->bitrates[rx_status->rate_idx]; } @@ -1219,7 +1219,7 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw) trace_api_sched_scan_results(local); - cfg80211_sched_scan_results(hw->wiphy); + cfg80211_sched_scan_results(hw->wiphy, 0); } EXPORT_SYMBOL(ieee80211_sched_scan_results); @@ -1239,7 +1239,7 @@ void ieee80211_sched_scan_end(struct ieee80211_local *local) mutex_unlock(&local->mtx); - cfg80211_sched_scan_stopped(local->hw.wiphy); + cfg80211_sched_scan_stopped(local->hw.wiphy, 0); } void ieee80211_sched_scan_stopped_work(struct work_struct *work) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 97f4c9d6b54ced5bf9f337c55d334cd67646abe9..0782e486fe89306c3cd73d36f1f285dee130ddba 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -132,9 +132,9 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee80211_vht_operation vht_oper = { .chan_width = wide_bw_chansw_ie->new_channel_width, - .center_freq_seg1_idx = + .center_freq_seg0_idx = wide_bw_chansw_ie->new_center_freq_seg0, - .center_freq_seg2_idx = + .center_freq_seg1_idx = wide_bw_chansw_ie->new_center_freq_seg1, /* .basic_mcs_set doesn't matter */ }; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3323a2fb289bd035a1280043bd3d64c2509cb9e2..403e3cc58b573dc511c8ba399ddefdc7e8e24ba0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2,7 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015 - 2016 Intel Deutschland GmbH + * Copyright (C) 2015 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -395,10 +395,15 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->sta.smps_mode = IEEE80211_SMPS_OFF; if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { - struct ieee80211_supported_band *sband = - hw->wiphy->bands[ieee80211_get_sdata_band(sdata)]; - u8 smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> - IEEE80211_HT_CAP_SM_PS_SHIFT; + struct ieee80211_supported_band *sband; + u8 smps; + + sband = ieee80211_get_sband(sdata); + if (!sband) + goto free_txq; + + smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> + IEEE80211_HT_CAP_SM_PS_SHIFT; /* * Assume that hostapd advertises our caps in the beacon and * this is the known_smps_mode for a station that just assciated @@ -1957,24 +1962,32 @@ sta_get_last_rx_stats(struct sta_info *sta) static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, struct rate_info *rinfo) { - rinfo->bw = (rate & STA_STATS_RATE_BW_MASK) >> - STA_STATS_RATE_BW_SHIFT; + rinfo->bw = STA_STATS_GET(BW, rate); - if (rate & STA_STATS_RATE_VHT) { + switch (STA_STATS_GET(TYPE, rate)) { + case STA_STATS_RATE_TYPE_VHT: rinfo->flags = RATE_INFO_FLAGS_VHT_MCS; - rinfo->mcs = rate & 0xf; - rinfo->nss = (rate & 0xf0) >> 4; - } else if (rate & STA_STATS_RATE_HT) { + rinfo->mcs = STA_STATS_GET(VHT_MCS, rate); + rinfo->nss = STA_STATS_GET(VHT_NSS, rate); + if (STA_STATS_GET(SGI, rate)) + rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; + break; + case STA_STATS_RATE_TYPE_HT: rinfo->flags = RATE_INFO_FLAGS_MCS; - rinfo->mcs = rate & 0xff; - } else if (rate & STA_STATS_RATE_LEGACY) { + rinfo->mcs = STA_STATS_GET(HT_MCS, rate); + if (STA_STATS_GET(SGI, rate)) + rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; + break; + case STA_STATS_RATE_TYPE_LEGACY: { struct ieee80211_supported_band *sband; u16 brate; unsigned int shift; + int band = STA_STATS_GET(LEGACY_BAND, rate); + int rate_idx = STA_STATS_GET(LEGACY_IDX, rate); rinfo->flags = 0; - sband = local->hw.wiphy->bands[(rate >> 4) & 0xf]; - brate = sband->bitrates[rate & 0xf].bitrate; + sband = local->hw.wiphy->bands[band]; + brate = sband->bitrates[rate_idx].bitrate; if (rinfo->bw == RATE_INFO_BW_5) shift = 2; else if (rinfo->bw == RATE_INFO_BW_10) @@ -1982,10 +1995,9 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, else shift = 0; rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); + break; + } } - - if (rate & STA_STATS_RATE_SGI) - rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) @@ -2143,7 +2155,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu); - sinfo->rx_packets += cpurxs->dropped; + sinfo->rx_dropped_misc += cpurxs->dropped; } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e65cda34d2bc000fb7e3738a6235ba5d53b8fde6..ea0747d6a6da194fec9116dc14b58a543f1accf2 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -1,7 +1,7 @@ /* * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright(c) 2015-2016 Intel Deutschland GmbH + * Copyright(c) 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,8 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_STOPPING 3 #define HT_AGG_STATE_WANT_START 4 #define HT_AGG_STATE_WANT_STOP 5 +#define HT_AGG_STATE_START_CB 6 +#define HT_AGG_STATE_STOP_CB 7 enum ieee80211_agg_stop_reason { AGG_STOP_DECLINED, @@ -324,6 +327,9 @@ struct ieee80211_fast_rx { struct rcu_head rcu_head; }; +/* we use only values in the range 0-100, so pick a large precision */ +DECLARE_EWMA(mesh_fail_avg, 20, 8) + /** * struct mesh_sta - mesh STA information * @plink_lock: serialize access to plink fields @@ -369,7 +375,7 @@ struct mesh_sta { enum nl80211_mesh_power_mode nonpeer_pm; /* moving percentage of failed MSDUs */ - unsigned int fail_avg; + struct ewma_mesh_fail_avg fail_avg; }; DECLARE_EWMA(signal, 10, 8) @@ -724,40 +730,55 @@ void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); unsigned long ieee80211_sta_last_active(struct sta_info *sta); +enum sta_stats_type { + STA_STATS_RATE_TYPE_INVALID = 0, + STA_STATS_RATE_TYPE_LEGACY, + STA_STATS_RATE_TYPE_HT, + STA_STATS_RATE_TYPE_VHT, +}; + +#define STA_STATS_FIELD_HT_MCS GENMASK( 7, 0) +#define STA_STATS_FIELD_LEGACY_IDX GENMASK( 3, 0) +#define STA_STATS_FIELD_LEGACY_BAND GENMASK( 7, 4) +#define STA_STATS_FIELD_VHT_MCS GENMASK( 3, 0) +#define STA_STATS_FIELD_VHT_NSS GENMASK( 7, 4) +#define STA_STATS_FIELD_BW GENMASK(11, 8) +#define STA_STATS_FIELD_SGI GENMASK(12, 12) +#define STA_STATS_FIELD_TYPE GENMASK(15, 13) + +#define STA_STATS_FIELD(_n, _v) FIELD_PREP(STA_STATS_FIELD_ ## _n, _v) +#define STA_STATS_GET(_n, _v) FIELD_GET(STA_STATS_FIELD_ ## _n, _v) + #define STA_STATS_RATE_INVALID 0 -#define STA_STATS_RATE_VHT 0x8000 -#define STA_STATS_RATE_HT 0x4000 -#define STA_STATS_RATE_LEGACY 0x2000 -#define STA_STATS_RATE_SGI 0x1000 -#define STA_STATS_RATE_BW_SHIFT 9 -#define STA_STATS_RATE_BW_MASK (0x7 << STA_STATS_RATE_BW_SHIFT) - -static inline u16 sta_stats_encode_rate(struct ieee80211_rx_status *s) + +static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s) { - u16 r = s->rate_idx; - - if (s->vht_flag & RX_VHT_FLAG_80MHZ) - r |= RATE_INFO_BW_80 << STA_STATS_RATE_BW_SHIFT; - else if (s->vht_flag & RX_VHT_FLAG_160MHZ) - r |= RATE_INFO_BW_160 << STA_STATS_RATE_BW_SHIFT; - else if (s->flag & RX_FLAG_40MHZ) - r |= RATE_INFO_BW_40 << STA_STATS_RATE_BW_SHIFT; - else if (s->flag & RX_FLAG_10MHZ) - r |= RATE_INFO_BW_10 << STA_STATS_RATE_BW_SHIFT; - else if (s->flag & RX_FLAG_5MHZ) - r |= RATE_INFO_BW_5 << STA_STATS_RATE_BW_SHIFT; - else - r |= RATE_INFO_BW_20 << STA_STATS_RATE_BW_SHIFT; - - if (s->flag & RX_FLAG_SHORT_GI) - r |= STA_STATS_RATE_SGI; - - if (s->flag & RX_FLAG_VHT) - r |= STA_STATS_RATE_VHT | (s->vht_nss << 4); - else if (s->flag & RX_FLAG_HT) - r |= STA_STATS_RATE_HT; - else - r |= STA_STATS_RATE_LEGACY | (s->band << 4); + u16 r; + + r = STA_STATS_FIELD(BW, s->bw); + + if (s->enc_flags & RX_ENC_FLAG_SHORT_GI) + r |= STA_STATS_FIELD(SGI, 1); + + switch (s->encoding) { + case RX_ENC_VHT: + r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_VHT); + r |= STA_STATS_FIELD(VHT_NSS, s->nss); + r |= STA_STATS_FIELD(VHT_MCS, s->rate_idx); + break; + case RX_ENC_HT: + r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HT); + r |= STA_STATS_FIELD(HT_MCS, s->rate_idx); + break; + case RX_ENC_LEGACY: + r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_LEGACY); + r |= STA_STATS_FIELD(LEGACY_BAND, s->band); + r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx); + break; + default: + WARN_ON(1); + return STA_STATS_RATE_INVALID; + } return r; } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 83b8b11f24ea1dadc0501bd8a4c15598195524a2..be47ac5cd8c8dd44a23247979f0d1742cf089026 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -200,6 +200,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) } if (ieee80211_is_action(mgmt->frame_control) && + !ieee80211_has_protected(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_HT && mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS && ieee80211_sdata_running(sdata)) { @@ -630,61 +631,6 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, return rates_idx; } -void ieee80211_tx_status_noskb(struct ieee80211_hw *hw, - struct ieee80211_sta *pubsta, - struct ieee80211_tx_info *info) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_supported_band *sband; - int retry_count; - bool acked, noack_success; - - ieee80211_tx_get_rates(hw, info, &retry_count); - - sband = hw->wiphy->bands[info->band]; - - acked = !!(info->flags & IEEE80211_TX_STAT_ACK); - noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED); - - if (pubsta) { - struct sta_info *sta; - - sta = container_of(pubsta, struct sta_info, sta); - - if (!acked) - sta->status_stats.retry_failed++; - sta->status_stats.retry_count += retry_count; - - if (acked) { - sta->status_stats.last_ack = jiffies; - - if (sta->status_stats.lost_packets) - sta->status_stats.lost_packets = 0; - - /* Track when last TDLS packet was ACKed */ - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) - sta->status_stats.last_tdls_pkt_time = jiffies; - } else { - ieee80211_lost_packet(sta, info); - } - - rate_control_tx_status_noskb(local, sband, sta, info); - } - - if (acked || noack_success) { - I802_DEBUG_INC(local->dot11TransmittedFrameCount); - if (!pubsta) - I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); - if (retry_count > 0) - I802_DEBUG_INC(local->dot11RetryCount); - if (retry_count > 1) - I802_DEBUG_INC(local->dot11MultipleRetryCount); - } else { - I802_DEBUG_INC(local->dot11FailedCount); - } -} -EXPORT_SYMBOL(ieee80211_tx_status_noskb); - void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_supported_band *sband, int retry_count, int shift, bool send_to_cooked) @@ -742,15 +688,16 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, dev_kfree_skb(skb); } -void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +static void __ieee80211_tx_status(struct ieee80211_hw *hw, + struct ieee80211_tx_status *status) { + struct sk_buff *skb = status->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info = status->info; + struct sta_info *sta; __le16 fc; struct ieee80211_supported_band *sband; - struct rhlist_head *tmp; - struct sta_info *sta; int retry_count; int rates_idx; bool send_to_cooked; @@ -761,16 +708,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); - rcu_read_lock(); - sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - for_each_sta_info(local, hdr->addr1, sta, tmp) { - /* skip wrong virtual interface */ - if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) - continue; - + if (status->sta) { + sta = container_of(status->sta, struct sta_info, sta); shift = ieee80211_vif_get_shift(&sta->sdata->vif); if (info->flags & IEEE80211_TX_STATUS_EOSP) @@ -790,7 +732,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) * that this TX packet failed because of that. */ ieee80211_handle_filtered_frame(local, sta, skb); - rcu_read_unlock(); return; } @@ -840,7 +781,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { ieee80211_handle_filtered_frame(local, sta, skb); - rcu_read_unlock(); return; } else { if (!acked) @@ -856,7 +796,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } } - rate_control_tx_status(local, sband, sta, skb); + rate_control_tx_status(local, sband, status); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) ieee80211s_update_metric(local, sta, skb); @@ -883,8 +823,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } } - rcu_read_unlock(); - ieee80211_led_tx(local); /* SNMP counters @@ -949,8 +887,96 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) /* send to monitor interfaces */ ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked); } + +void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_status status = { + .skb = skb, + .info = IEEE80211_SKB_CB(skb), + }; + struct rhlist_head *tmp; + struct sta_info *sta; + + rcu_read_lock(); + + for_each_sta_info(local, hdr->addr1, sta, tmp) { + /* skip wrong virtual interface */ + if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) + continue; + + status.sta = &sta->sta; + break; + } + + __ieee80211_tx_status(hw, &status); + rcu_read_unlock(); +} EXPORT_SYMBOL(ieee80211_tx_status); +void ieee80211_tx_status_ext(struct ieee80211_hw *hw, + struct ieee80211_tx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_info *info = status->info; + struct ieee80211_sta *pubsta = status->sta; + struct ieee80211_supported_band *sband; + int retry_count; + bool acked, noack_success; + + if (status->skb) + return __ieee80211_tx_status(hw, status); + + if (!status->sta) + return; + + ieee80211_tx_get_rates(hw, info, &retry_count); + + sband = hw->wiphy->bands[info->band]; + + acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED); + + if (pubsta) { + struct sta_info *sta; + + sta = container_of(pubsta, struct sta_info, sta); + + if (!acked) + sta->status_stats.retry_failed++; + sta->status_stats.retry_count += retry_count; + + if (acked) { + sta->status_stats.last_ack = jiffies; + + if (sta->status_stats.lost_packets) + sta->status_stats.lost_packets = 0; + + /* Track when last TDLS packet was ACKed */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) + sta->status_stats.last_tdls_pkt_time = jiffies; + } else { + ieee80211_lost_packet(sta, info); + } + + rate_control_tx_status(local, sband, status); + } + + if (acked || noack_success) { + I802_DEBUG_INC(local->dot11TransmittedFrameCount); + if (!pubsta) + I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount); + if (retry_count > 0) + I802_DEBUG_INC(local->dot11RetryCount); + if (retry_count > 1) + I802_DEBUG_INC(local->dot11MultipleRetryCount); + } else { + I802_DEBUG_INC(local->dot11FailedCount); + } +} +EXPORT_SYMBOL(ieee80211_tx_status_ext); + void ieee80211_report_low_ack(struct ieee80211_sta *pubsta, u32 num_packets) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index afca7d103684bbc8c953ae13bb0345934d8f5f35..f20dcf1b1830a2412ed96ebcc9ce3be65ef5d7e8 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -47,8 +47,7 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata, NL80211_FEATURE_TDLS_CHANNEL_SWITCH; bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) && !ifmgd->tdls_wider_bw_prohibited; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; + struct ieee80211_supported_band *sband = ieee80211_get_sband(sdata); bool vht = sband && sband->vht_cap.vht_supported; u8 *pos = (void *)skb_put(skb, 10); @@ -180,11 +179,14 @@ static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb) static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata, u16 status_code) { + struct ieee80211_supported_band *sband; + /* The capability will be 0 when sending a failure code */ if (status_code != 0) return 0; - if (ieee80211_get_sdata_band(sdata) == NL80211_BAND_2GHZ) { + sband = ieee80211_get_sband(sdata); + if (sband && sband->band == NL80211_BAND_2GHZ) { return WLAN_CAPABILITY_SHORT_SLOT_TIME | WLAN_CAPABILITY_SHORT_PREAMBLE; } @@ -358,17 +360,20 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, u8 action_code, bool initiator, const u8 *extra_ies, size_t extra_ies_len) { - enum nl80211_band band = ieee80211_get_sdata_band(sdata); - struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; + struct ieee80211_local *local = sdata->local; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; struct sta_info *sta = NULL; size_t offset = 0, noffset; u8 *pos; - ieee80211_add_srates_ie(sdata, skb, false, band); - ieee80211_add_ext_srates_ie(sdata, skb, false, band); + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + + ieee80211_add_srates_ie(sdata, skb, false, sband->band); + ieee80211_add_ext_srates_ie(sdata, skb, false, sband->band); ieee80211_tdls_add_supp_channels(sdata, skb); /* add any custom IEs that go before Extended Capabilities */ @@ -439,7 +444,6 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata, * the same on all bands. The specification limits the setup to a * single HT-cap, so use the current band for now. */ - sband = local->hw.wiphy->bands[band]; memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap)); if ((action_code == WLAN_TDLS_SETUP_REQUEST || @@ -545,9 +549,13 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t offset = 0, noffset; struct sta_info *sta, *ap_sta; - enum nl80211_band band = ieee80211_get_sdata_band(sdata); + struct ieee80211_supported_band *sband; u8 *pos; + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, peer); @@ -612,7 +620,8 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata, ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator); /* only include VHT-operation if not on the 2.4GHz band */ - if (band != NL80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) { + if (sband->band != NL80211_BAND_2GHZ && + sta->sta.vht_cap.vht_supported) { /* * if both peers support WIDER_BW, we can expand the chandef to * a wider compatible one, up to 80MHz diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ba8d7db0a07165e7b33f71caa260f352771d434a..04b22f8982fe42b41364d8d7feee0d5bd036c64a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -682,10 +682,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.skb = tx->skb; txrc.reported_rate.idx = -1; txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; - if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1) - txrc.max_rate_idx = -1; - else - txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; if (tx->sdata->rc_has_mcs_mask[info->band]) txrc.rate_idx_mcs_mask = @@ -4249,10 +4245,6 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, txrc.skb = skb; txrc.reported_rate.idx = -1; txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; - if (txrc.rate_idx_mask == (1 << txrc.sband->n_bitrates) - 1) - txrc.max_rate_idx = -1; - else - txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); @@ -4305,7 +4297,10 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, return bcn; shift = ieee80211_vif_get_shift(vif); - sband = hw->wiphy->bands[ieee80211_get_sdata_band(vif_to_sdata(vif))]; + sband = ieee80211_get_sband(vif_to_sdata(vif)); + if (!sband) + return bcn; + ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false); return bcn; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ac59fbd280dff8f712fb9dbc841147e4642ec607..ac9ac6c35594a4c67d570bb40cc4ad44f3d7bfa0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2015-2016 Intel Deutschland GmbH + * Copyright (C) 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -828,6 +828,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, case WLAN_EID_EXT_CAPABILITY: case WLAN_EID_CHAN_SWITCH_TIMING: case WLAN_EID_LINK_ID: + case WLAN_EID_BSS_MAX_IDLE_PERIOD: /* * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible * that if the content gets bigger it might be needed more than once @@ -1089,6 +1090,10 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, else elem_parse_failed = true; break; + case WLAN_EID_BSS_MAX_IDLE_PERIOD: + if (elen >= sizeof(*elems->max_idle_period_ie)) + elems->max_idle_period_ie = (void *)pos; + break; default: break; } @@ -1590,14 +1595,14 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, size_t num_rates; u32 supp_rates, rate_flags; int i, j, shift; + sband = sdata->local->hw.wiphy->bands[band]; + if (WARN_ON(!sband)) + return 1; rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); shift = ieee80211_vif_get_shift(&sdata->vif); - if (WARN_ON(!sband)) - return 1; - num_rates = sband->n_bitrates; supp_rates = 0; for (i = 0; i < elems->supp_rates_len + @@ -1983,6 +1988,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->u.mgd.have_beacon) changed |= BSS_CHANGED_BEACON_INFO; + if (sdata->vif.bss_conf.max_idle_period || + sdata->vif.bss_conf.protected_keep_alive) + changed |= BSS_CHANGED_KEEP_ALIVE; + sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); sdata_unlock(sdata); @@ -2103,7 +2112,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (sched_scan_stopped) - cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); + cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0); wake_up: if (local->in_reconfig) { @@ -2413,13 +2422,13 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, *pos++ = WLAN_EID_VHT_OPERATION; *pos++ = sizeof(struct ieee80211_vht_operation); vht_oper = (struct ieee80211_vht_operation *)pos; - vht_oper->center_freq_seg1_idx = ieee80211_frequency_to_channel( + vht_oper->center_freq_seg0_idx = ieee80211_frequency_to_channel( chandef->center_freq1); if (chandef->center_freq2) - vht_oper->center_freq_seg2_idx = + vht_oper->center_freq_seg1_idx = ieee80211_frequency_to_channel(chandef->center_freq2); else - vht_oper->center_freq_seg2_idx = 0x00; + vht_oper->center_freq_seg1_idx = 0x00; switch (chandef->width) { case NL80211_CHAN_WIDTH_160: @@ -2428,11 +2437,11 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, * workaround. */ vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; - vht_oper->center_freq_seg2_idx = vht_oper->center_freq_seg1_idx; + vht_oper->center_freq_seg1_idx = vht_oper->center_freq_seg0_idx; if (chandef->chan->center_freq < chandef->center_freq1) - vht_oper->center_freq_seg1_idx -= 8; + vht_oper->center_freq_seg0_idx -= 8; else - vht_oper->center_freq_seg1_idx += 8; + vht_oper->center_freq_seg0_idx += 8; break; case NL80211_CHAN_WIDTH_80P80: /* @@ -2491,9 +2500,9 @@ bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper, if (!oper) return false; - cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx, + cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg0_idx, chandef->chan->band); - cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg2_idx, + cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx, chandef->chan->band); switch (oper->chan_width) { @@ -2503,11 +2512,11 @@ bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper, new.width = NL80211_CHAN_WIDTH_80; new.center_freq1 = cf1; /* If needed, adjust based on the newer interop workaround. */ - if (oper->center_freq_seg2_idx) { + if (oper->center_freq_seg1_idx) { unsigned int diff; - diff = abs(oper->center_freq_seg2_idx - - oper->center_freq_seg1_idx); + diff = abs(oper->center_freq_seg1_idx - + oper->center_freq_seg0_idx); if (diff == 8) { new.width = NL80211_CHAN_WIDTH_160; new.center_freq1 = cf2; @@ -2715,42 +2724,39 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, memset(&ri, 0, sizeof(ri)); /* Fill cfg80211 rate info */ - if (status->flag & RX_FLAG_HT) { + switch (status->encoding) { + case RX_ENC_HT: ri.mcs = status->rate_idx; ri.flags |= RATE_INFO_FLAGS_MCS; - if (status->flag & RX_FLAG_40MHZ) - ri.bw = RATE_INFO_BW_40; - else - ri.bw = RATE_INFO_BW_20; - if (status->flag & RX_FLAG_SHORT_GI) + ri.bw = status->bw; + if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; - } else if (status->flag & RX_FLAG_VHT) { + break; + case RX_ENC_VHT: ri.flags |= RATE_INFO_FLAGS_VHT_MCS; ri.mcs = status->rate_idx; - ri.nss = status->vht_nss; - if (status->flag & RX_FLAG_40MHZ) - ri.bw = RATE_INFO_BW_40; - else if (status->vht_flag & RX_VHT_FLAG_80MHZ) - ri.bw = RATE_INFO_BW_80; - else if (status->vht_flag & RX_VHT_FLAG_160MHZ) - ri.bw = RATE_INFO_BW_160; - else - ri.bw = RATE_INFO_BW_20; - if (status->flag & RX_FLAG_SHORT_GI) + ri.nss = status->nss; + ri.bw = status->bw; + if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) ri.flags |= RATE_INFO_FLAGS_SHORT_GI; - } else { + break; + default: + WARN_ON(1); + /* fall through */ + case RX_ENC_LEGACY: { struct ieee80211_supported_band *sband; int shift = 0; int bitrate; - if (status->flag & RX_FLAG_10MHZ) { + ri.bw = status->bw; + + switch (status->bw) { + case RATE_INFO_BW_10: shift = 1; - ri.bw = RATE_INFO_BW_10; - } else if (status->flag & RX_FLAG_5MHZ) { + break; + case RATE_INFO_BW_5: shift = 2; - ri.bw = RATE_INFO_BW_5; - } else { - ri.bw = RATE_INFO_BW_20; + break; } sband = local->hw.wiphy->bands[status->band]; @@ -2762,19 +2768,21 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, if (status->band == NL80211_BAND_5GHZ) { ts += 20 << shift; mpdu_offset += 2; - } else if (status->flag & RX_FLAG_SHORTPRE) { + } else if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) { ts += 96; } else { ts += 192; } } + break; + } } rate = cfg80211_calculate_bitrate(&ri); if (WARN_ONCE(!rate, "Invalid bitrate: flags=0x%llx, idx=%d, vht_nss=%d\n", (unsigned long long)status->flag, status->rate_idx, - status->vht_nss)) + status->nss)) return 0; /* rewind from end of MPDU */ @@ -2791,8 +2799,10 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; struct cfg80211_chan_def chandef; + /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */ + ASSERT_RTNL(); + mutex_lock(&local->mtx); - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { /* it might be waiting for the local->mtx, but then * by the time it gets it, sdata->wdev.cac_started @@ -2809,7 +2819,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) GFP_KERNEL); } } - mutex_unlock(&local->iflist_mtx); mutex_unlock(&local->mtx); } @@ -2831,7 +2840,9 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) } mutex_unlock(&local->chanctx_mtx); + rtnl_lock(); ieee80211_dfs_cac_cancel(local); + rtnl_unlock(); if (num_chanctx > 1) /* XXX: multi-channel is not supported yet */ @@ -2846,7 +2857,7 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw) trace_api_radar_detected(local); - ieee80211_queue_work(hw, &local->radar_detected_work); + schedule_work(&local->radar_detected_work); } EXPORT_SYMBOL(ieee80211_radar_detected); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c1ef22df865fe77bf7cf0a42d560f54db98a3edd..cc19614ff4e60bed9a351b8f8aa73792103665a0 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "ieee80211_i.h" #include "michael.h" @@ -153,7 +154,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); - if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0) + if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN)) goto mic_fail; /* remove Michael MIC from payload */ @@ -1048,7 +1049,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) bip_aad(skb, aad); ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mic); - if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_cmac.icverrors++; return RX_DROP_UNUSABLE; } @@ -1098,7 +1099,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx) bip_aad(skb, aad); ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mic); - if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_cmac.icverrors++; return RX_DROP_UNUSABLE; } @@ -1202,7 +1203,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, skb->data + 24, skb->len - 24, mic) < 0 || - memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_gmac.icverrors++; return RX_DROP_UNUSABLE; } diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 56ccffa3f2bfc7731adfaabb1026ef7e8af68d32..62141dcec2d66020fb4eb7bb698e880809878028 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -19,6 +19,7 @@ #ifndef __IEEE802154_I_H #define __IEEE802154_I_H +#include #include #include #include diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 06019dba4b10e3e0d079ee617707ae5412269144..bd88a9b80773e20e5de1f5a66ae7a3471387135e 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -526,8 +526,6 @@ static void mac802154_wpan_free(struct net_device *dev) struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev); mac802154_llsec_destroy(&sdata->sec); - - free_netdev(dev); } static void ieee802154_if_setup(struct net_device *dev) @@ -593,7 +591,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, sdata->dev->dev_addr); sdata->dev->header_ops = &mac802154_header_ops; - sdata->dev->destructor = mac802154_wpan_free; + sdata->dev->needs_free_netdev = true; + sdata->dev->priv_destructor = mac802154_wpan_free; sdata->dev->netdev_ops = &mac802154_wpan_ops; sdata->dev->ml_priv = &mac802154_mlme_wpan; wpan_dev->promiscuous_mode = false; @@ -608,7 +607,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, break; case NL802154_IFTYPE_MONITOR: - sdata->dev->destructor = free_netdev; + sdata->dev->needs_free_netdev = true; sdata->dev->netdev_ops = &mac802154_monitor_ops; wpan_dev->promiscuous_mode = true; break; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 6414079aa7297eee8fbd6320fb4392c6d8865e34..7b05fd1497ceddea47c2c7917f5ee58f6ff2d560 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,6 +24,9 @@ #include #include "internal.h" +/* max memory we will use for mpls_route */ +#define MAX_MPLS_ROUTE_MEM 4096 + /* Maximum number of labels to look ahead at when selecting a path of * a multipath route */ @@ -32,7 +35,9 @@ #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) static int zero = 0; +static int one = 1; static int label_limit = (1 << 20) - 1; +static int ttl_max = 255; static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, struct nlmsghdr *nlh, struct net *net, u32 portid, @@ -58,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible); static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) { - u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); - int nh_index = nh - rt->rt_nh; - - return nh0_via + rt->rt_max_alen * nh_index; + return (u8 *)nh + rt->rt_via_offset; } static const u8 *mpls_nh_via(const struct mpls_route *rt, @@ -187,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) return hash; } +static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) +{ + return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size); +} + +/* number of alive nexthops (rt->rt_nhn_alive) and the flags for + * a next hop (nh->nh_flags) are modified by netdev event handlers. + * Since those fields can change at any moment, use READ_ONCE to + * access both. + */ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, struct sk_buff *skb) { - int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; int nh_index = 0; int n = 0; + u8 alive; /* No need to look further into packet if there's only * one path */ if (rt->rt_nhn == 1) - goto out; + return rt->rt_nh; - if (alive <= 0) + alive = READ_ONCE(rt->rt_nhn_alive); + if (alive == 0) return NULL; hash = mpls_multipath_hash(rt, skb); @@ -209,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive == rt->rt_nhn) goto out; for_nexthops(rt) { - if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + unsigned int nh_flags = READ_ONCE(nh->nh_flags); + + if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) continue; if (n == nh_index) return nh; @@ -217,11 +232,11 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } endfor_nexthops(rt); out: - return &rt->rt_nh[nh_index]; + return mpls_get_nexthop(rt, nh_index); } -static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, - struct mpls_entry_decoded dec) +static bool mpls_egress(struct net *net, struct mpls_route *rt, + struct sk_buff *skb, struct mpls_entry_decoded dec) { enum mpls_payload_type payload_type; bool success = false; @@ -246,22 +261,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb, switch (payload_type) { case MPT_IPV4: { struct iphdr *hdr4 = ip_hdr(skb); + u8 new_ttl; skb->protocol = htons(ETH_P_IP); + + /* If propagating TTL, take the decremented TTL from + * the incoming MPLS header, otherwise decrement the + * TTL, but only if not 0 to avoid underflow. + */ + if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || + (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && + net->mpls.ip_ttl_propagate)) + new_ttl = dec.ttl; + else + new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0; + csum_replace2(&hdr4->check, htons(hdr4->ttl << 8), - htons(dec.ttl << 8)); - hdr4->ttl = dec.ttl; + htons(new_ttl << 8)); + hdr4->ttl = new_ttl; success = true; break; } case MPT_IPV6: { struct ipv6hdr *hdr6 = ipv6_hdr(skb); skb->protocol = htons(ETH_P_IPV6); - hdr6->hop_limit = dec.ttl; + + /* If propagating TTL, take the decremented TTL from + * the incoming MPLS header, otherwise decrement the + * hop limit, but only if not 0 to avoid underflow. + */ + if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || + (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && + net->mpls.ip_ttl_propagate)) + hdr6->hop_limit = dec.ttl; + else if (hdr6->hop_limit) + hdr6->hop_limit = hdr6->hop_limit - 1; success = true; break; } case MPT_UNSPEC: + /* Should have decided which protocol it is by now */ break; } @@ -361,7 +400,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ - if (!mpls_egress(rt, skb, dec)) + if (!mpls_egress(dev_net(out_dev), rt, skb, dec)) goto err; } else { bool bos; @@ -412,6 +451,7 @@ static struct packet_type mpls_packet_type __read_mostly = { static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { [RTA_DST] = { .type = NLA_U32 }, [RTA_OIF] = { .type = NLA_U32 }, + [RTA_TTL_PROPAGATE] = { .type = NLA_U8 }, }; struct mpls_route_config { @@ -421,6 +461,7 @@ struct mpls_route_config { u8 rc_via_alen; u8 rc_via[MAX_VIA_ALEN]; u32 rc_label; + u8 rc_ttl_propagate; u8 rc_output_labels; u32 rc_output_label[MAX_NEW_LABELS]; u32 rc_nlflags; @@ -430,20 +471,27 @@ struct mpls_route_config { int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) +/* all nexthops within a route have the same size based on max + * number of labels and max via length for a hop + */ +static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) { - u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); + u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); struct mpls_route *rt; + size_t size; - rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), - VIA_ALEN_ALIGN) + - num_nh * max_alen_aligned, - GFP_KERNEL); - if (rt) { - rt->rt_nhn = num_nh; - rt->rt_nhn_alive = num_nh; - rt->rt_max_alen = max_alen_aligned; - } + size = sizeof(*rt) + num_nh * nh_size; + if (size > MAX_MPLS_ROUTE_MEM) + return ERR_PTR(-EINVAL); + + rt = kzalloc(size, GFP_KERNEL); + if (!rt) + return ERR_PTR(-ENOMEM); + + rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; + rt->rt_nh_size = nh_size; + rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); return rt; } @@ -648,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, return -ENOMEM; err = -EINVAL; - /* Ensure only a supported number of labels are present */ - if (cfg->rc_output_labels > MAX_NEW_LABELS) - goto errout; nh->nh_labels = cfg->rc_output_labels; for (i = 0; i < nh->nh_labels; i++) @@ -675,7 +720,7 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, static int mpls_nh_build(struct net *net, struct mpls_route *rt, struct mpls_nh *nh, int oif, struct nlattr *via, - struct nlattr *newdst) + struct nlattr *newdst, u8 max_labels) { int err = -ENOMEM; @@ -683,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; if (newdst) { - err = nla_get_labels(newdst, MAX_NEW_LABELS, + err = nla_get_labels(newdst, max_labels, &nh->nh_labels, nh->nh_label); if (err) goto errout; @@ -708,22 +753,20 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, return err; } -static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, - u8 cfg_via_alen, u8 *max_via_alen) +static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, + u8 cfg_via_alen, u8 *max_via_alen, + u8 *max_labels) { - int nhs = 0; int remaining = len; - - if (!rtnh) { - *max_via_alen = cfg_via_alen; - return 1; - } + u8 nhs = 0; *max_via_alen = 0; + *max_labels = 0; while (rtnh_ok(rtnh, remaining)) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); int attrlen; + u8 n_labels = 0; attrlen = rtnh_attrlen(rtnh); nla = nla_find(attrs, attrlen, RTA_VIA); @@ -737,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, via_alen); } + nla = nla_find(attrs, attrlen, RTA_NEWDST); + if (nla && + nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0) + return 0; + + *max_labels = max_t(u8, *max_labels, n_labels); + + /* number of nexthops is tracked by a u8. + * Check for overflow. + */ + if (nhs == 255) + return 0; nhs++; + rtnh = rtnh_next(rtnh, &remaining); } @@ -746,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, } static int mpls_nh_build_multi(struct mpls_route_config *cfg, - struct mpls_route *rt) + struct mpls_route *rt, u8 max_labels) { struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; int remaining = cfg->rc_mp_len; - int nhs = 0; int err = 0; + u8 nhs = 0; change_nexthops(rt) { int attrlen; @@ -779,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, } err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst, + max_labels); if (err) goto errout; @@ -806,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg) int err = -EINVAL; u8 max_via_alen; unsigned index; - int nhs; + u8 max_labels; + u8 nhs; index = cfg->rc_label; @@ -844,21 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -EINVAL; - nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, - cfg->rc_via_alen, &max_via_alen); + if (cfg->rc_mp) { + nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, + cfg->rc_via_alen, &max_via_alen, + &max_labels); + } else { + max_via_alen = cfg->rc_via_alen; + max_labels = cfg->rc_output_labels; + nhs = 1; + } + if (nhs == 0) goto errout; err = -ENOMEM; - rt = mpls_rt_alloc(nhs, max_via_alen); - if (!rt) + rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); goto errout; + } rt->rt_protocol = cfg->rc_protocol; rt->rt_payload_type = cfg->rc_payload_type; + rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) - err = mpls_nh_build_multi(cfg, rt); + err = mpls_nh_build_multi(cfg, rt, max_labels); else err = mpls_nh_build_from_cfg(cfg, rt); if (err) @@ -1011,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type) return size; } -static void mpls_netconf_notify_devconf(struct net *net, int type, - struct mpls_dev *mdev) +static void mpls_netconf_notify_devconf(struct net *net, int event, + int type, struct mpls_dev *mdev) { struct sk_buff *skb; int err = -ENOBUFS; @@ -1021,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type, if (!skb) goto errout; - err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF, - 0, type); + err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -1042,7 +1110,8 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { }; static int mpls_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX + 1]; @@ -1054,7 +1123,7 @@ static int mpls_netconf_get_devconf(struct sk_buff *in_skb, int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_mpls_policy); + devconf_mpls_policy, NULL); if (err < 0) goto errout; @@ -1155,9 +1224,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write, if (i == offsetof(struct mpls_dev, input_enabled) && val != oval) { - mpls_netconf_notify_devconf(net, - NETCONFA_INPUT, - mdev); + mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_INPUT, mdev); } } @@ -1198,10 +1266,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev, snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); - mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); + mdev->sysctl = register_net_sysctl(net, path, table); if (!mdev->sysctl) goto free; + mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev); return 0; free: @@ -1210,13 +1279,17 @@ static int mpls_dev_sysctl_register(struct net_device *dev, return -ENOBUFS; } -static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) +static void mpls_dev_sysctl_unregister(struct net_device *dev, + struct mpls_dev *mdev) { + struct net *net = dev_net(dev); struct ctl_table *table; table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); + + mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev); } static struct mpls_dev *mpls_add_dev(struct net_device *dev) @@ -1242,11 +1315,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) u64_stats_init(&mpls_stats->syncp); } + mdev->dev = dev; + err = mpls_dev_sysctl_register(dev, mdev); if (err) goto free; - mdev->dev = dev; rcu_assign_pointer(dev->mpls_ptr, mdev); return mdev; @@ -1269,8 +1343,7 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN; - unsigned int alive; + u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -1281,36 +1354,48 @@ static void mpls_ifdown(struct net_device *dev, int event) continue; alive = 0; + deleted = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; + if (rtnl_dereference(nh->nh_dev) != dev) goto next; switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nh->nh_flags |= RTNH_F_DEAD; + nh_flags |= RTNH_F_DEAD; /* fall through */ case NETDEV_CHANGE: - nh->nh_flags |= RTNH_F_LINKDOWN; + nh_flags |= RTNH_F_LINKDOWN; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); + + if (nh->nh_flags != nh_flags) + WRITE_ONCE(nh->nh_flags, nh_flags); next: - if (!(nh->nh_flags & nh_flags)) + if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; + if (!rtnl_dereference(nh->nh_dev)) + deleted++; } endfor_nexthops(rt); WRITE_ONCE(rt->rt_nhn_alive, alive); + + /* if there are no more nexthops, delete the route */ + if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) + mpls_route_update(net, index, NULL, NULL); } } -static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +static void mpls_ifup(struct net_device *dev, unsigned int flags) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); unsigned index; - int alive; + u8 alive; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { @@ -1321,20 +1406,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) alive = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; struct net_device *nh_dev = rtnl_dereference(nh->nh_dev); - if (!(nh->nh_flags & nh_flags)) { + if (!(nh_flags & flags)) { alive++; continue; } if (nh_dev != dev) continue; alive++; - nh->nh_flags &= ~nh_flags; + nh_flags &= ~flags; + WRITE_ONCE(nh->nh_flags, nh_flags); } endfor_nexthops(rt); - ACCESS_ONCE(rt->rt_nhn_alive) = alive; + WRITE_ONCE(rt->rt_nhn_alive, alive); } } @@ -1385,7 +1472,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, mpls_ifdown(dev, event); mdev = mpls_dev_get(dev); if (mdev) { - mpls_dev_sysctl_unregister(mdev); + mpls_dev_sysctl_unregister(dev, mdev); RCU_INIT_POINTER(dev->mpls_ptr, NULL); call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); } @@ -1395,7 +1482,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (mdev) { int err; - mpls_dev_sysctl_unregister(mdev); + mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) return notifier_from_errno(err); @@ -1455,16 +1542,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, - u32 max_labels, u8 *labels, u32 label[]) + u8 max_labels, u8 *labels, u32 label[]) { unsigned len = nla_len(nla); - unsigned nla_labels; struct mpls_shim_hdr *nla_label; + u8 nla_labels; bool bos; int i; - /* len needs to be an even multiple of 4 (the label size) */ - if (len & 3) + /* len needs to be an even multiple of 4 (the label size). Number + * of labels is a u8 so check for overflow. + */ + if (len & 3 || len / 4 > 255) return -EINVAL; /* Limit the number of new labels allowed */ @@ -1472,6 +1561,10 @@ int nla_get_labels(const struct nlattr *nla, if (nla_labels > max_labels) return -EINVAL; + /* when label == NULL, caller wants number of labels */ + if (!label) + goto out; + nla_label = nla_data(nla); bos = true; for (i = nla_labels - 1; i >= 0; i--, bos = false) { @@ -1495,6 +1588,7 @@ int nla_get_labels(const struct nlattr *nla, label[i] = dec.label; } +out: *labels = nla_labels; return 0; } @@ -1550,13 +1644,13 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, int index; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, + NULL); if (err < 0) goto errout; err = -EINVAL; rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); if (rtm->rtm_family != AF_MPLS) goto errout; @@ -1584,6 +1678,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_label = LABEL_NOT_SPECIFIED; cfg->rc_protocol = rtm->rtm_protocol; cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; + cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT; cfg->rc_nlflags = nlh->nlmsg_flags; cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->rc_nlinfo.nlh = nlh; @@ -1630,6 +1725,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->rc_mp_len = nla_len(nla); break; } + case RTA_TTL_PROPAGATE: + { + u8 ttl_propagate = nla_get_u8(nla); + + if (ttl_propagate > 1) + goto errout; + cfg->rc_ttl_propagate = ttl_propagate ? + MPLS_TTL_PROP_ENABLED : + MPLS_TTL_PROP_DISABLED; + break; + } default: /* Unsupported attribute */ goto errout; @@ -1641,29 +1747,47 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; + + err = mpls_route_del(cfg); +out: + kfree(cfg); - return mpls_route_del(&cfg); + return err; } -static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) +static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; + + err = mpls_route_add(cfg); +out: + kfree(cfg); - return mpls_route_add(&cfg); + return err; } static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, @@ -1690,6 +1814,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, if (nla_put_labels(skb, RTA_DST, 1, &label)) goto nla_put_failure; + + if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) { + bool ttl_propagate = + rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED; + + if (nla_put_u8(skb, RTA_TTL_PROPAGATE, + ttl_propagate)) + goto nla_put_failure; + } if (rt->rt_nhn == 1) { const struct mpls_nh *nh = rt->rt_nh; @@ -1711,21 +1844,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, } else { struct rtnexthop *rtnh; struct nlattr *mp; - int dead = 0; - int linkdown = 0; + u8 linkdown = 0; + u8 dead = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; for_nexthops(rt) { + dev = rtnl_dereference(nh->nh_dev); + if (!dev) + continue; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; - dev = rtnl_dereference(nh->nh_dev); - if (dev) - rtnh->rtnh_ifindex = dev->ifindex; + rtnh->rtnh_ifindex = dev->ifindex; if (nh->nh_flags & RTNH_F_LINKDOWN) { rtnh->rtnh_flags |= RTNH_F_LINKDOWN; linkdown++; @@ -1800,7 +1935,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) - + nla_total_size(4); /* RTA_DST */ + + nla_total_size(4) /* RTA_DST */ + + nla_total_size(1); /* RTA_TTL_PROPAGATE */ if (rt->rt_nhn == 1) { struct mpls_nh *nh = rt->rt_nh; @@ -1816,6 +1952,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) size_t nhsize = 0; for_nexthops(rt) { + if (!rtnl_dereference(nh->nh_dev)) + continue; nhsize += nla_total_size(sizeof(struct rtnexthop)); /* RTA_VIA */ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) @@ -1867,10 +2005,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) unsigned index; if (size) { - labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (!labels) - labels = vzalloc(size); - + labels = kvzalloc(size, GFP_KERNEL); if (!labels) goto nolabels; } @@ -1878,12 +2013,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(1, lo->addr_len); - if (!rt0) + rt0 = mpls_rt_alloc(1, lo->addr_len, 0); + if (IS_ERR(rt0)) goto nort0; RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_payload_type = MPT_IPV4; + rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt0->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, @@ -1891,12 +2027,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(1, lo->addr_len); - if (!rt2) + rt2 = mpls_rt_alloc(1, lo->addr_len, 0); + if (IS_ERR(rt2)) goto nort2; RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; + rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt2->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, @@ -1978,6 +2115,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write, return ret; } +#define MPLS_NS_SYSCTL_OFFSET(field) \ + (&((struct net *)0)->field) + static const struct ctl_table mpls_table[] = { { .procname = "platform_labels", @@ -1986,21 +2126,47 @@ static const struct ctl_table mpls_table[] = { .mode = 0644, .proc_handler = mpls_platform_labels, }, + { + .procname = "ip_ttl_propagate", + .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "default_ttl", + .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl), + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &ttl_max, + }, { } }; static int mpls_net_init(struct net *net) { struct ctl_table *table; + int i; net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; + net->mpls.ip_ttl_propagate = 1; + net->mpls.default_ttl = 255; table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); if (table == NULL) return -ENOMEM; - table[0].data = net; + /* Table data contains only offsets relative to the base of + * the mdev at this point, so make them absolute. + */ + for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++) + table[i].data = (char *)net + (uintptr_t)table[i].data; + net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); if (net->mpls.ctl == NULL) { kfree(table); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 76360d8b95798e148c5d6a48ce3375eeadcad5a5..4db6a59713220dcefac8be970ab06b1ae25bcd5c 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -2,6 +2,11 @@ #define MPLS_INTERNAL_H #include +/* put a reasonable limit on the number of labels + * we will accept from userspace + */ +#define MAX_NEW_LABELS 30 + struct mpls_entry_decoded { u32 label; u8 ttl; @@ -64,7 +69,6 @@ struct mpls_dev { struct sk_buff; #define LABEL_NOT_SPECIFIED (1 << 20) -#define MAX_NEW_LABELS 2 /* This maximum ha length copied from the definition of struct neighbour */ #define VIA_ALEN_ALIGN sizeof(unsigned long) @@ -83,11 +87,35 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + + /* nh_flags is accessed under RCU in the packet path; it is + * modified handling netdev events with rtnl lock held + */ unsigned int nh_flags; - u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; u8 nh_via_table; + u8 nh_reserved1; + + u32 nh_label[0]; +}; + +/* offset of via from beginning of mpls_nh */ +#define MPLS_NH_VIA_OFF(num_labels) \ + ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \ + VIA_ALEN_ALIGN) + +/* all nexthops within a route have the same size based on the + * max number of labels and max via length across all nexthops + */ +#define MPLS_NH_SIZE(num_labels, max_via_alen) \ + (MPLS_NH_VIA_OFF((num_labels)) + \ + ALIGN((max_via_alen), VIA_ALEN_ALIGN)) + +enum mpls_ttl_propagation { + MPLS_TTL_PROP_DEFAULT, + MPLS_TTL_PROP_ENABLED, + MPLS_TTL_PROP_DISABLED, }; /* The route, nexthops and vias are stored together in the same memory @@ -98,16 +126,16 @@ struct mpls_nh { /* next hop label forwarding entry */ * +----------------------+ * | mpls_nh 0 | * +----------------------+ - * | ... | - * +----------------------+ - * | mpls_nh n-1 | - * +----------------------+ - * | alignment padding | + * | alignment padding | 4 bytes for odd number of labels * +----------------------+ * | via[rt_max_alen] 0 | * +----------------------+ + * | alignment padding | via's aligned on sizeof(unsigned long) + * +----------------------+ * | ... | * +----------------------+ + * | mpls_nh n-1 | + * +----------------------+ * | via[rt_max_alen] n-1 | * +----------------------+ */ @@ -116,22 +144,30 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; - unsigned int rt_nhn; - unsigned int rt_nhn_alive; + u8 rt_ttl_propagate; + u8 rt_nhn; + /* rt_nhn_alive is accessed under RCU in the packet path; it + * is modified handling netdev events with rtnl lock held + */ + u8 rt_nhn_alive; + u8 rt_nh_size; + u8 rt_via_offset; + u8 rt_reserved1; struct mpls_nh rt_nh[0]; }; #define for_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (rt)->rt_nh; \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define change_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \ + __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define endfor_nexthops(rt) } @@ -166,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, +int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, u32 label[]); int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, u8 via[]); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index e4e4424f9eb1f5531d22463687d74c2e2ca971a6..369c7a23c86c1407702917488c90e0f3e7f8ee1a 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -29,6 +29,7 @@ static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, + [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, }; static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) @@ -49,6 +50,7 @@ static int mpls_xmit(struct sk_buff *skb) struct rtable *rt = NULL; struct rt6_info *rt6 = NULL; struct mpls_dev *out_mdev; + struct net *net; int err = 0; bool bos; int i; @@ -56,17 +58,7 @@ static int mpls_xmit(struct sk_buff *skb) /* Find the output device */ out_dev = dst->dev; - - /* Obtain the ttl */ - if (dst->ops->family == AF_INET) { - ttl = ip_hdr(skb)->ttl; - rt = (struct rtable *)dst; - } else if (dst->ops->family == AF_INET6) { - ttl = ipv6_hdr(skb)->hop_limit; - rt6 = (struct rt6_info *)dst; - } else { - goto drop; - } + net = dev_net(out_dev); skb_orphan(skb); @@ -78,6 +70,38 @@ static int mpls_xmit(struct sk_buff *skb) tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate); + /* Obtain the ttl using the following set of rules. + * + * LWT ttl propagation setting: + * - disabled => use default TTL value from LWT + * - enabled => use TTL value from IPv4/IPv6 header + * - default => + * Global ttl propagation setting: + * - disabled => use default TTL value from global setting + * - enabled => use TTL value from IPv4/IPv6 header + */ + if (dst->ops->family == AF_INET) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && + !net->mpls.ip_ttl_propagate) + ttl = net->mpls.default_ttl; + else + ttl = ip_hdr(skb)->ttl; + rt = (struct rtable *)dst; + } else if (dst->ops->family == AF_INET6) { + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) + ttl = tun_encap_info->default_ttl; + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && + !net->mpls.ip_ttl_propagate) + ttl = net->mpls.default_ttl; + else + ttl = ipv6_hdr(skb)->hop_limit; + rt6 = (struct rt6_info *)dst; + } else { + goto drop; + } + /* Verify the destination can hold the packet */ new_header_size = mpls_encap_size(tun_encap_info); mtu = mpls_dev_mtu(out_dev); @@ -140,10 +164,11 @@ static int mpls_build_state(struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; + u8 n_labels; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, - mpls_iptunnel_policy); + mpls_iptunnel_policy, NULL); if (ret < 0) return ret; @@ -151,15 +176,32 @@ static int mpls_build_state(struct nlattr *nla, return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); + /* determine number of labels */ + if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], + MAX_NEW_LABELS, &n_labels, NULL)) + return -EINVAL; + + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + + n_labels * sizeof(u32)); if (!newts) return -ENOMEM; tun_encap_info = mpls_lwtunnel_encap(newts); - ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels, &tun_encap_info->labels, tun_encap_info->label); if (ret) goto errout; + + tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT; + + if (tb[MPLS_IPTUNNEL_TTL]) { + tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]); + /* TTL 0 implies propagate from IP header */ + tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ? + MPLS_TTL_PROP_DISABLED : + MPLS_TTL_PROP_ENABLED; + } + newts->type = LWTUNNEL_ENCAP_MPLS; newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; newts->headroom = mpls_encap_size(tun_encap_info); @@ -186,6 +228,10 @@ static int mpls_fill_encap_info(struct sk_buff *skb, tun_encap_info->label)) goto nla_put_failure; + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT && + nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -195,10 +241,16 @@ static int mpls_fill_encap_info(struct sk_buff *skb, static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) { struct mpls_iptunnel_encap *tun_encap_info; + int nlsize; tun_encap_info = mpls_lwtunnel_encap(lwtstate); - return nla_total_size(tun_encap_info->labels * 4); + nlsize = nla_total_size(tun_encap_info->labels * 4); + + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT) + nlsize += nla_total_size(1); + + return nlsize; } static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) @@ -207,10 +259,12 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b); int l; - if (a_hdr->labels != b_hdr->labels) + if (a_hdr->labels != b_hdr->labels || + a_hdr->ttl_propagate != b_hdr->ttl_propagate || + a_hdr->default_ttl != b_hdr->default_ttl) return 1; - for (l = 0; l < MAX_NEW_LABELS; l++) + for (l = 0; l < a_hdr->labels; l++) if (a_hdr->label[l] != b_hdr->label[l]) return 1; return 0; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a87a6f8a74d8c90b2e6801a9d391ced7084dd15f..552d606e57ca4aac6bfe5ad26c42c385a431ab68 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -126,14 +126,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) } EXPORT_SYMBOL(nf_register_net_hook); -void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +static struct nf_hook_entry * +__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { struct nf_hook_entry __rcu **pp; struct nf_hook_entry *p; pp = nf_hook_entry_head(net, reg); if (WARN_ON_ONCE(!pp)) - return; + return NULL; mutex_lock(&nf_hook_mutex); for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { @@ -145,7 +146,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) mutex_unlock(&nf_hook_mutex); if (!p) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); - return; + return NULL; } #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -154,10 +155,24 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) #ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif + + return p; +} + +void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +{ + struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg); + unsigned int nfq; + + if (!p) + return; + synchronize_net(); - nf_queue_nf_hook_drop(net, p); + /* other cpu might still process nfqueue verdict that used reg */ - synchronize_net(); + nfq = nf_queue_nf_hook_drop(net); + if (nfq) + synchronize_net(); kfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -183,10 +198,32 @@ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, EXPORT_SYMBOL(nf_register_net_hooks); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, - unsigned int n) + unsigned int hookcount) { - while (n-- > 0) - nf_unregister_net_hook(net, ®[n]); + struct nf_hook_entry *to_free[16]; + unsigned int i, n, nfq; + + do { + n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free)); + + for (i = 0; i < n; i++) + to_free[i] = __nf_unregister_net_hook(net, ®[i]); + + synchronize_net(); + + /* need 2nd synchronize_net() if nfqueue is used, skb + * can get reinjected right before nf_queue_hook_drop() + */ + nfq = nf_queue_nf_hook_drop(net); + if (nfq) + synchronize_net(); + + for (i = 0; i < n; i++) + kfree(to_free[i]); + + reg += n; + hookcount -= n; + } while (hookcount > 0); } EXPORT_SYMBOL(nf_unregister_net_hooks); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 6f09a99298cdf6aaf18c1ff947c76db2d4985865..8ad2b52a0b328249688656c1fa80a7135b932c02 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -232,7 +232,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_is_filled((const struct mtype_elem *)x) && + mtype_is_filled(x) && #endif ip_set_timeout_expired(ext_timeout(x, set)))) continue; @@ -248,8 +248,7 @@ mtype_list(const struct ip_set *set, } if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (ip_set_put_extensions(skb, set, x, - mtype_is_filled((const struct mtype_elem *)x))) + if (ip_set_put_extensions(skb, set, x, mtype_is_filled(x))) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c296f9b606d495d59c50ee46a04e7e17b21c1530..ba6a5516dc7c724b172ee13c0456b461c47f7281 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -295,7 +295,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -313,7 +314,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, + ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -500,14 +502,6 @@ __ip_set_put(struct ip_set *set) /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need * a separate reference counter */ -static inline void -__ip_set_get_netlink(struct ip_set *set) -{ - write_lock_bh(&ip_set_ref_lock); - set->ref_netlink++; - write_unlock_bh(&ip_set_ref_lock); -} - static inline void __ip_set_put_netlink(struct ip_set *set) { @@ -769,7 +763,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), sizeof(*nfmsg), flags); if (!nlh) return NULL; @@ -906,7 +900,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], - set->type->create_policy)) { + set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1257,8 +1251,8 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; /* Second pass, so parser can't fail */ - nla_parse(cda, IPSET_ATTR_CMD_MAX, - attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); + nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, + ip_set_setname_policy, NULL); if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; @@ -1305,7 +1299,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) * manually :-( */ if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret); + netlink_ack(cb->skb, nlh, ret, NULL); return ret; } } @@ -1501,9 +1495,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - nla_parse(cda, IPSET_ATTR_CMD_MAX, - cmdattr, nlh->nlmsg_len - min_len, - ip_set_adt_policy); + nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL); errline = nla_data(cda[IPSET_ATTR_LINENO]); @@ -1549,7 +1542,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (attr[IPSET_ATTR_DATA]) { if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1561,7 +1554,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1603,7 +1596,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (attr[IPSET_ATTR_DATA]) { if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1615,7 +1608,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1646,7 +1639,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy)) + set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); @@ -1915,7 +1908,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EFAULT; goto done; } - op = (unsigned int *)data; + op = data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ @@ -2013,7 +2006,7 @@ static struct nf_sockopt_ops so_set __read_mostly = { .pf = PF_INET, .get_optmin = SO_IP_SET, .get_optmax = SO_IP_SET + 1, - .get = &ip_set_sockfn_get, + .get = ip_set_sockfn_get, .owner = THIS_MODULE, }; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e6a2753dff9e91dac406e657ad0d49875f052503..3d2ac71a83ec411294361037e7b1d77b6d4bb7e2 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -181,7 +181,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) if (!(cp->flags & IP_VS_CONN_F_HASHED)) { cp->flags |= IP_VS_CONN_F_HASHED; - atomic_inc(&cp->refcnt); + refcount_inc(&cp->refcnt); hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { @@ -215,7 +215,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_HASHED) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; - atomic_dec(&cp->refcnt); + refcount_dec(&cp->refcnt); ret = 1; } else ret = 0; @@ -242,13 +242,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_HASHED) { ret = false; /* Decrease refcnt and unlink conn only if we are last user */ - if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { + if (refcount_dec_if_one(&cp->refcnt)) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } } else - ret = atomic_read(&cp->refcnt) ? false : true; + ret = refcount_read(&cp->refcnt) ? false : true; spin_unlock(&cp->lock); ct_write_unlock_bh(hash); @@ -475,7 +475,7 @@ static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp) void ip_vs_conn_put(struct ip_vs_conn *cp) { if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && - (atomic_read(&cp->refcnt) == 1) && + (refcount_read(&cp->refcnt) == 1) && !timer_pending(&cp->timer)) /* expire connection immediately */ __ip_vs_conn_put_notimer(cp); @@ -617,8 +617,8 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + cp->flags, refcount_read(&cp->refcnt), + refcount_read(&dest->refcnt)); /* Update the connection counters */ if (!(flags & IP_VS_CONN_F_TEMPLATE)) { @@ -714,8 +714,8 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + cp->flags, refcount_read(&cp->refcnt), + refcount_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -863,10 +863,10 @@ static void ip_vs_conn_expire(unsigned long data) expire_later: IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", - atomic_read(&cp->refcnt), + refcount_read(&cp->refcnt), atomic_read(&cp->n_control)); - atomic_inc(&cp->refcnt); + refcount_inc(&cp->refcnt); cp->timeout = 60*HZ; if (ipvs->sync_state & IP_VS_STATE_MASTER) @@ -941,7 +941,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, * it in the table, so that other thread run ip_vs_random_dropentry * but cannot drop this entry. */ - atomic_set(&cp->refcnt, 1); + refcount_set(&cp->refcnt, 1); cp->control = NULL; atomic_set(&cp->n_control, 0); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index db40050f8785eb9205a7bf493a71c9b956b93ab8..ad99c1ceea6f42bf3e52500a4452f7f74e730be5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -542,7 +542,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport), - cp->flags, atomic_read(&cp->refcnt)); + cp->flags, refcount_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; @@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, { unsigned int verdict = NF_DROP; - if (IP_VS_FWD_METHOD(cp) != 0) { - pr_err("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { @@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); + +ignore_cp: verdict = NF_ACCEPT; out: @@ -1193,7 +1193,7 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), - cp->flags, atomic_read(&cp->refcnt)); + cp->flags, refcount_read(&cp->refcnt)); LeaveFunction(12); return cp; } @@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in */ cp = pp->conn_out_get(ipvs, af, skb, &iph); - if (likely(cp)) + if (likely(cp)) { + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + goto ignore_cp; return handle_response(af, skb, pd, cp, &iph, hooknum); + } /* Check for real-server-started requests */ if (atomic_read(&ipvs->conn_out_counter)) { @@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in } } } + +out: IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; + +ignore_cp: + __ip_vs_conn_put(cp); + goto out; } /* @@ -2200,6 +2209,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { static int __net_init __ip_vs_init(struct net *net) { struct netns_ipvs *ipvs; + int ret; ipvs = net_generic(net, ip_vs_net_id); if (ipvs == NULL) @@ -2231,13 +2241,17 @@ static int __net_init __ip_vs_init(struct net *net) if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; - printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", - sizeof(struct netns_ipvs), ipvs->gen); + ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + if (ret < 0) + goto hook_fail; + return 0; /* * Error handling */ +hook_fail: + ip_vs_sync_net_cleanup(ipvs); sync_fail: ip_vs_conn_net_cleanup(ipvs); conn_fail: @@ -2257,6 +2271,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); @@ -2317,24 +2332,16 @@ static int __init ip_vs_init(void) if (ret < 0) goto cleanup_sub; - ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); - if (ret < 0) { - pr_err("can't register hooks.\n"); - goto cleanup_dev; - } - ret = ip_vs_register_nl_ioctl(); if (ret < 0) { pr_err("can't register netlink/ioctl.\n"); - goto cleanup_hooks; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); return ret; -cleanup_hooks: - nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: @@ -2351,7 +2358,6 @@ static int __init ip_vs_init(void) static void __exit ip_vs_cleanup(void) { ip_vs_unregister_nl_ioctl(); - nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_conn_cleanup(); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5aeb0dde6ccc5e525e740ca5fde3ba2c58c50070..1fa3c2307b6ea0173bbcf13c3d0b5cca8c68cba9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -699,7 +699,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, dest->vfwmark, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + refcount_read(&dest->refcnt)); if (dest->af == dest_af && ip_vs_addr_equal(dest_af, &dest->addr, daddr) && dest->port == dport && @@ -934,7 +934,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atomic_set(&dest->activeconns, 0); atomic_set(&dest->inactconns, 0); atomic_set(&dest->persistconns, 0); - atomic_set(&dest->refcnt, 1); + refcount_set(&dest->refcnt, 1); INIT_HLIST_NODE(&dest->d_list); spin_lock_init(&dest->dst_lock); @@ -998,7 +998,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " "dest->refcnt=%d, service %u/%s:%u\n", IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), - atomic_read(&dest->refcnt), + refcount_read(&dest->refcnt), dest->vfwmark, IP_VS_DBG_ADDR(svc->af, &dest->vaddr), ntohs(dest->vport)); @@ -1074,7 +1074,7 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, spin_lock_bh(&ipvs->dest_trash_lock); IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + refcount_read(&dest->refcnt)); if (list_empty(&ipvs->dest_trash) && !cleanup) mod_timer(&ipvs->dest_trash_timer, jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); @@ -1157,7 +1157,7 @@ static void ip_vs_dest_trash_expire(unsigned long data) spin_lock(&ipvs->dest_trash_lock); list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { - if (atomic_read(&dest->refcnt) > 1) + if (refcount_read(&dest->refcnt) > 1) continue; if (dest->idle_start) { if (time_before(now, dest->idle_start + @@ -1545,7 +1545,7 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) dev->name, IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + refcount_read(&dest->refcnt)); __ip_vs_dst_cache_reset(dest); } spin_unlock_bh(&dest->dst_lock); @@ -1774,13 +1774,13 @@ static struct ctl_table vs_vars[] = { .procname = "sync_version", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_sync_mode, + .proc_handler = proc_do_sync_mode, }, { .procname = "sync_ports", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_sync_ports, + .proc_handler = proc_do_sync_ports, }, { .procname = "sync_persist_mode", @@ -2130,8 +2130,8 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Total Incoming Outgoing Incoming Outgoing\n"); - seq_printf(seq, - " Conns Packets Packets Bytes Bytes\n"); + seq_puts(seq, + " Conns Packets Packets Bytes Bytes\n"); ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", @@ -2178,8 +2178,8 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Total Incoming Outgoing Incoming Outgoing\n"); - seq_printf(seq, - "CPU Conns Packets Packets Bytes Bytes\n"); + seq_puts(seq, + "CPU Conns Packets Packets Bytes Bytes\n"); for_each_possible_cpu(i) { struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); @@ -3078,6 +3078,17 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, return skb->len; } +static bool ip_vs_is_af_valid(int af) +{ + if (af == AF_INET) + return true; +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6 && ipv6_mod_enabled()) + return true; +#endif + return false; +} + static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, @@ -3089,7 +3100,8 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, /* Parse mandatory identifying service fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) + nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, + ip_vs_svc_policy, NULL)) return -EINVAL; nla_af = attrs[IPVS_SVC_ATTR_AF]; @@ -3104,11 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, memset(usvc, 0, sizeof(*usvc)); usvc->af = nla_get_u16(nla_af); -#ifdef CONFIG_IP_VS_IPV6 - if (usvc->af != AF_INET && usvc->af != AF_INET6) -#else - if (usvc->af != AF_INET) -#endif + if (!ip_vs_is_af_valid(usvc->af)) return -EAFNOSUPPORT; if (nla_fwmark) { @@ -3251,8 +3259,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); /* Try to find the service for which to dump destinations */ - if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, - IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) + if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, + ip_vs_cmd_policy, NULL)) goto out_err; @@ -3288,7 +3296,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* Parse mandatory identifying destination fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) + nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, + ip_vs_dest_policy, NULL)) return -EINVAL; nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; @@ -3530,7 +3539,7 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], - ip_vs_daemon_policy)) + ip_vs_daemon_policy, info->extack)) goto out; if (cmd == IPVS_CMD_NEW_DAEMON) @@ -3610,6 +3619,11 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (udest.af == 0) udest.af = svc->af; + if (!ip_vs_is_af_valid(udest.af)) { + ret = -EAFNOSUPPORT; + goto out; + } + if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { /* The synchronization protocol is incompatible * with mixed family services diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index d30c327bb578981c0868086cfddb6c304419b2e8..fb780be76d15a05c0d66e8a7f5ea58ee32dacdb9 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -260,7 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) { + if (ct && (ct->status & IPS_NAT_MASK)) { + bool mangled; + /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, @@ -268,12 +270,13 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * packet. */ rcu_read_lock(); - ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - iph->ihl * 4, - start-data, end-start, - buf, buf_len); + mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + iph->ihl * 4, + start - data, + end - start, + buf, buf_len); rcu_read_unlock(); - if (ret) { + if (mangled) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -482,11 +485,8 @@ static struct pernet_operations ip_vs_ftp_ops = { static int __init ip_vs_ftp_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_ftp_ops); /* rcu_barrier() is called by netns on error */ - return rv; + return register_pernet_subsys(&ip_vs_ftp_ops); } /* diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 5824927cf8e02b7fa02f319177d96219c9427033..b6aa4a970c6e97678e8c88e8fe0e0e0b4e4d476d 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -448,7 +448,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 703f11877beece84cb56ec62d4bd13e87c0d67c3..c13ff575f9f73ab9fb53837ff1b01cd279156c9f 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -204,7 +204,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; } @@ -249,7 +249,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) __func__, IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port), atomic_read(&most->activeconns), - atomic_read(&most->refcnt), + refcount_read(&most->refcnt), atomic_read(&most->weight), moh); return most; } @@ -612,7 +612,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index fc230d99aa3b0143f198d7a9ec798344886058ad..6cf3fd81a5eca887ccefe628c2b9d627b40cf055 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -85,7 +85,7 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conntrack_tuple new_tuple; - if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || + if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) return; @@ -232,7 +232,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, { struct nf_conntrack_expect *exp; - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return; exp = nf_ct_expect_alloc(ct); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index a8b63401e7731e6c8fef37b62c43425e2f96b43c..7d9d4ac596ca5809a9322aa3c1276f6dc08f146c 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -110,7 +110,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 8ae480715ceae79ae07fd4dbfc9bacf424b429b3..ca880a3ad033aec3f55641c3cd485098983a8722 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -193,28 +193,6 @@ ip_vs_create_timeout_table(int *table, int size) } -/* - * Set timeout value for state specified by name - */ -int -ip_vs_set_state_timeout(int *table, int num, const char *const *names, - const char *name, int to) -{ - int i; - - if (!table || !name || !to) - return -EINVAL; - - for (i = 0; i < num; i++) { - if (strcmp(names[i], name)) - continue; - table[i] = to * HZ; - return 0; - } - return -ENOENT; -} - - const char * ip_vs_state_name(__u16 proto, int state) { struct ip_vs_protocol *pp = ip_vs_proto_get(proto); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index d952d67f904d1124ed0c5adfa20a51f82207181c..56f8e4b204ffcc4840a1042097a0f7d0f004df18 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -447,7 +447,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ntohs(cp->cport), sctp_state_name(cp->state), sctp_state_name(next_state), - atomic_read(&cp->refcnt)); + refcount_read(&cp->refcnt)); if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (next_state != IP_VS_SCTP_S_ESTABLISHED)) { diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 5117bcb7d2f00604d5ab73f0610246a7d4fef755..12dc8d5bc37d7ea03ba8448514a6d3caf03a62b0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -557,7 +557,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, ntohs(cp->cport), tcp_state_name(cp->state), tcp_state_name(new_state), - atomic_read(&cp->refcnt)); + refcount_read(&cp->refcnt)); if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 58bacfc461ee6a1d6df4e6e024032fb52a044e35..ee0530d14c5f9d468e199bcb7cd53aad022b748b 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -97,7 +97,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + refcount_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index f8e2d00f528b945e774564854fc66f53dbc61970..ab23cf203437772407f800861ea68687d78f8ff6 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -111,7 +111,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b03c28084f814a9f0b805357e5164ef7b00b0985..0e5b64a75da0521be40831f791eb8f3d7303f70e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -520,7 +520,7 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && pkts % sync_period != sysctl_sync_threshold(ipvs)) return 0; - } else if (sync_refresh_period <= 0 && + } else if (!sync_refresh_period && pkts != sysctl_sync_threshold(ipvs)) return 0; @@ -1849,7 +1849,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; - ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); + ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; ms = ipvs->ms; @@ -1862,7 +1862,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, ms->ipvs = ipvs; } } else { - array = kzalloc(count * sizeof(struct task_struct *), + array = kcalloc(count, sizeof(struct task_struct *), GFP_KERNEL); if (!array) goto out; diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6b366fd905542ff086a36da4111bd11646d274d8..6add39e0ec20d61d21883082e5079d16cab6942c 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -83,7 +83,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, IP_VS_DBG_ADDR(least->af, &least->addr), ntohs(least->port), atomic_read(&least->activeconns), - atomic_read(&least->refcnt), + refcount_read(&least->refcnt), atomic_read(&least->weight), loh); return least; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 17e6d4406ca7c32657eff5e103d0aa1b9317e813..62258dd457ac9825aa14f26bfebb08bb2f2f1755 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -218,7 +218,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), + refcount_read(&dest->refcnt), atomic_read(&dest->weight)); mark->cl = dest; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4e1a98fcc8c3faa3fd037cc0516fd8a3108ab2a9..2eab1e0400f48a5816239cbb95a6b192169137bf 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -775,7 +775,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); @@ -866,7 +866,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); @@ -1338,7 +1338,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); @@ -1429,7 +1429,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 45da11afa785b2779857b3786881158c821263b3..86691671290519809bf8b39b150c7b040a3c0fe1 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -55,7 +55,7 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) }; EXPORT_SYMBOL_GPL(seq_print_acct); -static struct nf_ct_ext_type acct_extend __read_mostly = { +static const struct nf_ct_ext_type acct_extend = { .len = sizeof(struct nf_conn_acct), .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 57a26cc90c9fada2be251d6718f8b814e059beea..03d2ccffa9fa3c1eecfbc07dcadc420a0ab5e315 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -207,6 +207,8 @@ static int __init nf_conntrack_amanda_init(void) { int ret, i; + NF_CT_HELPER_BUILD_BUG_ON(0); + for (i = 0; i < ARRAY_SIZE(search); i++) { search[i].ts = textsearch_prepare(ts_algo, search[i].string, search[i].len, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ffb78e5f7b70912a2bba608a7f32f0a2bc486adc..e847dbaa0c6b3aefc3d417421a2b529a10735e38 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 last_bucket; bool exiting; + bool early_drop; long next_gc_run; }; @@ -180,14 +181,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; - -/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used - * for the nfctinfo. We cheat by (ab)using the PER CPU cache line - * alignment to enforce this. - */ -DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); - static unsigned int nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, @@ -706,7 +699,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && - !nfct_nat(ct) && + ((ct->status & IPS_NAT_DONE_MASK) == 0) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { enum ip_conntrack_info oldinfo; @@ -918,7 +911,7 @@ static unsigned int early_drop_list(struct net *net, continue; /* kill only if still in same netns -- might have moved due to - * SLAB_DESTROY_BY_RCU rules. + * SLAB_TYPESAFE_BY_RCU rules. * * We steal the timer reference. If that fails timer has * already fired or someone else deleted it. Just drop ref @@ -959,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash) return false; } +static bool gc_worker_skip_ct(const struct nf_conn *ct) +{ + return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); +} + +static bool gc_worker_can_early_drop(const struct nf_conn *ct) +{ + const struct nf_conntrack_l4proto *l4proto; + + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) + return true; + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) + return true; + + return false; +} + static void gc_worker(struct work_struct *work) { unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned int i, goal, buckets = 0, expired_count = 0; + unsigned int nf_conntrack_max95 = 0; struct conntrack_gc_work *gc_work; unsigned int ratio, scanned = 0; unsigned long next_run; @@ -971,6 +984,8 @@ static void gc_worker(struct work_struct *work) goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->last_bucket; + if (gc_work->early_drop) + nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; do { struct nf_conntrack_tuple_hash *h; @@ -987,6 +1002,8 @@ static void gc_worker(struct work_struct *work) i = 0; hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { + struct net *net; + tmp = nf_ct_tuplehash_to_ctrack(h); scanned++; @@ -995,6 +1012,27 @@ static void gc_worker(struct work_struct *work) expired_count++; continue; } + + if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) + continue; + + net = nf_ct_net(tmp); + if (atomic_read(&net->ct.count) < nf_conntrack_max95) + continue; + + /* need to take reference to avoid possible races */ + if (!atomic_inc_not_zero(&tmp->ct_general.use)) + continue; + + if (gc_worker_skip_ct(tmp)) { + nf_ct_put(tmp); + continue; + } + + if (gc_worker_can_early_drop(tmp)) + nf_ct_kill(tmp); + + nf_ct_put(tmp); } /* could check get_nulls_value() here and restart if ct @@ -1040,6 +1078,7 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; + gc_work->early_drop = false; queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } @@ -1065,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { if (!early_drop(net, hash)) { + if (!conntrack_gc_work.early_drop) + conntrack_gc_work.early_drop = true; atomic_dec(&net->ct.count); net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); return ERR_PTR(-ENOMEM); @@ -1073,7 +1114,7 @@ __nf_conntrack_alloc(struct net *net, /* * Do not use kmem_cache_zalloc(), as this cache uses - * SLAB_DESTROY_BY_RCU. + * SLAB_TYPESAFE_BY_RCU. */ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) @@ -1118,7 +1159,7 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); /* A freed object has refcnt == 0, that's - * the golden rule for SLAB_DESTROY_BY_RCU + * the golden rule for SLAB_TYPESAFE_BY_RCU */ NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); @@ -1133,7 +1174,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ -static struct nf_conntrack_tuple_hash * +static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, @@ -1241,21 +1282,20 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } -/* On success, returns conntrack ptr, sets skb->_nfct | ctinfo */ -static inline struct nf_conn * +/* On success, returns 0, sets skb->_nfct | ctinfo */ +static int resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto, - int *set_reply, - enum ip_conntrack_info *ctinfo) + struct nf_conntrack_l4proto *l4proto) { const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; + enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; struct nf_conn *ct; u32 hash; @@ -1264,7 +1304,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { pr_debug("Can't get tuple\n"); - return NULL; + return 0; } /* look for tuple match */ @@ -1275,33 +1315,30 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, skb, dataoff, hash); if (!h) - return NULL; + return 0; if (IS_ERR(h)) - return (void *)h; + return PTR_ERR(h); } ct = nf_ct_tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED_REPLY; - /* Please set reply bit if this packet OK */ - *set_reply = 1; + ctinfo = IP_CT_ESTABLISHED_REPLY; } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { pr_debug("normal packet for %p\n", ct); - *ctinfo = IP_CT_ESTABLISHED; + ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { pr_debug("related packet for %p\n", ct); - *ctinfo = IP_CT_RELATED; + ctinfo = IP_CT_RELATED; } else { pr_debug("new packet for %p\n", ct); - *ctinfo = IP_CT_NEW; + ctinfo = IP_CT_NEW; } - *set_reply = 0; } - nf_ct_set(skb, ct, *ctinfo); - return ct; + nf_ct_set(skb, ct, ctinfo); + return 0; } unsigned int @@ -1315,13 +1352,13 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; - int set_reply = 0; int ret; tmpl = nf_ct_get(skb, &ctinfo); - if (tmpl) { + if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ - if (!nf_ct_is_template(tmpl)) { + if ((tmpl && !nf_ct_is_template(tmpl)) || + ctinfo == IP_CT_UNTRACKED) { NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } @@ -1358,23 +1395,22 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } repeat: - ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, - l3proto, l4proto, &set_reply, &ctinfo); - if (!ct) { - /* Not valid part of a connection */ - NF_CT_STAT_INC_ATOMIC(net, invalid); - ret = NF_ACCEPT; - goto out; - } - - if (IS_ERR(ct)) { + ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, + l3proto, l4proto); + if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); ret = NF_DROP; goto out; } - NF_CT_ASSERT(skb_nfct(skb)); + ct = nf_ct_get(skb, &ctinfo); + if (!ct) { + /* Not valid part of a connection */ + NF_CT_STAT_INC_ATOMIC(net, invalid); + ret = NF_ACCEPT; + goto out; + } /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); @@ -1399,7 +1435,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } - if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) + if (ctinfo == IP_CT_ESTABLISHED_REPLY && + !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: if (tmpl) @@ -1634,18 +1671,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -static int untrack_refs(void) -{ - int cnt = 0, cpu; - - for_each_possible_cpu(cpu) { - struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); - - cnt += atomic_read(&ct->ct_general.use) - 1; - } - return cnt; -} - void nf_conntrack_cleanup_start(void) { conntrack_gc_work.exiting = true; @@ -1655,8 +1680,6 @@ void nf_conntrack_cleanup_start(void) void nf_conntrack_cleanup_end(void) { RCU_INIT_POINTER(nf_ct_destroy, NULL); - while (untrack_refs() > 0) - schedule(); cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); @@ -1830,20 +1853,44 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -void nf_ct_untracked_status_or(unsigned long bits) +static __always_inline unsigned int total_extension_size(void) { - int cpu; + /* remember to add new extensions below */ + BUILD_BUG_ON(NF_CT_EXT_NUM > 9); - for_each_possible_cpu(cpu) - per_cpu(nf_conntrack_untracked, cpu).status |= bits; -} -EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); + return sizeof(struct nf_ct_ext) + + sizeof(struct nf_conn_help) +#if IS_ENABLED(CONFIG_NF_NAT) + + sizeof(struct nf_conn_nat) +#endif + + sizeof(struct nf_conn_seqadj) + + sizeof(struct nf_conn_acct) +#ifdef CONFIG_NF_CONNTRACK_EVENTS + + sizeof(struct nf_conntrack_ecache) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + + sizeof(struct nf_conn_tstamp) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + + sizeof(struct nf_conn_timeout) +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + + sizeof(struct nf_conn_labels) +#endif +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + + sizeof(struct nf_conn_synproxy) +#endif + ; +}; int nf_conntrack_init_start(void) { int max_factor = 8; int ret = -ENOMEM; - int i, cpu; + int i; + + /* struct nf_ct_ext uses u8 to store offsets/size */ + BUILD_BUG_ON(total_extension_size() > 255u); seqcount_init(&nf_conntrack_generation); @@ -1882,7 +1929,7 @@ int nf_conntrack_init_start(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), NFCT_INFOMASK + 1, - SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); + SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; @@ -1926,15 +1973,6 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_proto; - /* Set up fake conntrack: to never be deleted, not in any hashes */ - for_each_possible_cpu(cpu) { - struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); - write_pnet(&ct->ct_net, &init_net); - atomic_set(&ct->ct_general.use, 1); - } - /* - and look it like as a confirmed connection */ - nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); - conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); @@ -1982,6 +2020,7 @@ int nf_conntrack_init_net(struct net *net) int ret = -ENOMEM; int cpu; + BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); atomic_set(&net->ct.count, 0); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 22fc32143e9c4ae17bc48edc68eb0decf11d931c..caac41ad9483ed36333884f8e584521778e76937 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -195,7 +195,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) events = xchg(&e->cache, 0); - if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) + if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) goto out_unlock; /* We make a copy of the missed event cache without taking @@ -212,7 +212,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) ret = notify->fcn(events | missed, &item); - if (likely(ret >= 0 && !missed)) + if (likely(ret == 0 && !missed)) goto out_unlock; spin_lock_bh(&ct->lock); @@ -347,7 +347,7 @@ static struct ctl_table event_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static struct nf_ct_ext_type event_extend __read_mostly = { +static const struct nf_ct_ext_type event_extend = { .len = sizeof(struct nf_conntrack_ecache), .align = __alignof__(struct nf_conntrack_ecache), .id = NF_CT_EXT_ECACHE, @@ -420,6 +420,9 @@ int nf_conntrack_ecache_init(void) int ret = nf_ct_extend_register(&event_extend); if (ret < 0) pr_err("nf_ct_event: Unable to register event extension.\n"); + + BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ + return ret; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index d80073037856db9081e57b7c88e482a61b94a45f..e03d16ed550d6bf07a537f4b150148a3cd967f59 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -103,6 +103,17 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, nf_ct_zone_equal_any(i->master, zone); } +bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) +{ + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(nf_ct_remove_expect); + struct nf_conntrack_expect * __nf_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, @@ -133,7 +144,7 @@ nf_ct_expect_find_get(struct net *net, rcu_read_lock(); i = __nf_ct_expect_find(net, zone, tuple); - if (i && !atomic_inc_not_zero(&i->use)) + if (i && !refcount_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -186,7 +197,7 @@ nf_ct_find_expectation(struct net *net, return NULL; if (exp->flags & NF_CT_EXPECT_PERMANENT) { - atomic_inc(&exp->use); + refcount_inc(&exp->use); return exp; } else if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -211,10 +222,7 @@ void nf_ct_remove_expectations(struct nf_conn *ct) spin_lock_bh(&nf_conntrack_expect_lock); hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } + nf_ct_remove_expect(exp); } spin_unlock_bh(&nf_conntrack_expect_lock); } @@ -255,10 +263,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { spin_lock_bh(&nf_conntrack_expect_lock); - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } + nf_ct_remove_expect(exp); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); @@ -275,7 +280,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) return NULL; new->master = me; - atomic_set(&new->use, 1); + refcount_set(&new->use, 1); return new; } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); @@ -348,7 +353,7 @@ static void nf_ct_expect_free_rcu(struct rcu_head *head) void nf_ct_expect_put(struct nf_conntrack_expect *exp) { - if (atomic_dec_and_test(&exp->use)) + if (refcount_dec_and_test(&exp->use)) call_rcu(&exp->rcu, nf_ct_expect_free_rcu); } EXPORT_SYMBOL_GPL(nf_ct_expect_put); @@ -361,7 +366,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple); /* two references : one for hash insert, one for the timer */ - atomic_add(2, &exp->use); + refcount_add(2, &exp->use); hlist_add_head_rcu(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; @@ -394,10 +399,8 @@ static void evict_oldest_expect(struct nf_conn *master, last = exp; } - if (last && del_timer(&last->timeout)) { - nf_ct_unlink_expect(last); - nf_ct_expect_put(last); - } + if (last) + nf_ct_remove_expect(last); } static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) @@ -419,11 +422,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_ct_expect_put(i); + if (nf_ct_remove_expect(expect)) break; - } } else if (expect_clash(i, expect)) { ret = -EBUSY; goto out; @@ -549,7 +549,7 @@ static int exp_seq_show(struct seq_file *s, void *v) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); else - seq_printf(s, "- "); + seq_puts(s, "- "); seq_printf(s, "l3proto = %u proto=%u ", expect->tuple.src.l3num, expect->tuple.dst.protonum); @@ -559,7 +559,7 @@ static int exp_seq_show(struct seq_file *s, void *v) expect->tuple.dst.protonum)); if (expect->flags & NF_CT_EXPECT_PERMANENT) { - seq_printf(s, "PERMANENT"); + seq_puts(s, "PERMANENT"); delim = ","; } if (expect->flags & NF_CT_EXPECT_INACTIVE) { diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 008299b7f78fe3754946cf0a58029090234ad905..6c605e88ebae238ce618b9e808a4da0dc697d95c 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -18,17 +18,14 @@ static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); +#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */ -void __nf_ct_ext_destroy(struct nf_conn *ct) +void nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; struct nf_ct_ext_type *t; - struct nf_ct_ext *ext = ct->ext; for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!__nf_ct_ext_exist(ext, i)) - continue; - rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); @@ -41,54 +38,26 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) rcu_read_unlock(); } } -EXPORT_SYMBOL(__nf_ct_ext_destroy); - -static void * -nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp) -{ - unsigned int off, len; - struct nf_ct_ext_type *t; - size_t alloc_size; - - rcu_read_lock(); - t = rcu_dereference(nf_ct_ext_types[id]); - if (!t) { - rcu_read_unlock(); - return NULL; - } - - off = ALIGN(sizeof(struct nf_ct_ext), t->align); - len = off + t->len + var_alloc_len; - alloc_size = t->alloc_size + var_alloc_len; - rcu_read_unlock(); - - *ext = kzalloc(alloc_size, gfp); - if (!*ext) - return NULL; - - (*ext)->offset[id] = off; - (*ext)->len = len; - - return (void *)(*ext) + off; -} +EXPORT_SYMBOL(nf_ct_ext_destroy); -void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp) +void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { + unsigned int newlen, newoff, oldlen, alloc; struct nf_ct_ext *old, *new; - int newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); old = ct->ext; - if (!old) - return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp); - if (__nf_ct_ext_exist(old, id)) - return NULL; + if (old) { + if (__nf_ct_ext_exist(old, id)) + return NULL; + oldlen = old->len; + } else { + oldlen = sizeof(*new); + } rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); @@ -97,15 +66,19 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, return NULL; } - newoff = ALIGN(old->len, t->align); - newlen = newoff + t->len + var_alloc_len; + newoff = ALIGN(oldlen, t->align); + newlen = newoff + t->len; rcu_read_unlock(); - new = __krealloc(old, newlen, gfp); + alloc = max(newlen, NF_CT_EXT_PREALLOC); + new = __krealloc(old, alloc, gfp); if (!new) return NULL; - if (new != old) { + if (!old) { + memset(new->offset, 0, sizeof(new->offset)); + ct->ext = new; + } else if (new != old) { kfree_rcu(old, rcu); rcu_assign_pointer(ct->ext, new); } @@ -115,45 +88,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, memset((void *)new + newoff, 0, newlen - newoff); return (void *)new + newoff; } -EXPORT_SYMBOL(__nf_ct_ext_add_length); - -static void update_alloc_size(struct nf_ct_ext_type *type) -{ - int i, j; - struct nf_ct_ext_type *t1, *t2; - enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1; - - /* unnecessary to update all types */ - if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) { - min = type->id; - max = type->id; - } - - /* This assumes that extended areas in conntrack for the types - whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ - for (i = min; i <= max; i++) { - t1 = rcu_dereference_protected(nf_ct_ext_types[i], - lockdep_is_held(&nf_ct_ext_type_mutex)); - if (!t1) - continue; - - t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + - t1->len; - for (j = 0; j < NF_CT_EXT_NUM; j++) { - t2 = rcu_dereference_protected(nf_ct_ext_types[j], - lockdep_is_held(&nf_ct_ext_type_mutex)); - if (t2 == NULL || t2 == t1 || - (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) - continue; - - t1->alloc_size = ALIGN(t1->alloc_size, t2->align) - + t2->len; - } - } -} +EXPORT_SYMBOL(nf_ct_ext_add); /* This MUST be called in process context. */ -int nf_ct_extend_register(struct nf_ct_ext_type *type) +int nf_ct_extend_register(const struct nf_ct_ext_type *type) { int ret = 0; @@ -163,12 +101,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type) goto out; } - /* This ensures that nf_ct_ext_create() can allocate enough area - before updating alloc_size */ - type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) - + type->len; rcu_assign_pointer(nf_ct_ext_types[type->id], type); - update_alloc_size(type); out: mutex_unlock(&nf_ct_ext_type_mutex); return ret; @@ -176,11 +109,10 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type) EXPORT_SYMBOL_GPL(nf_ct_extend_register); /* This MUST be called in process context. */ -void nf_ct_extend_unregister(struct nf_ct_ext_type *type) +void nf_ct_extend_unregister(const struct nf_ct_ext_type *type) { mutex_lock(&nf_ct_ext_type_mutex); RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); - update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); synchronize_rcu(); } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 4aecef4a89fb135e5b50ac382f19645aafaaff94..f0e9a7511e1ac4915ed9bc43492d214417b3c1bd 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -577,6 +577,8 @@ static int __init nf_conntrack_ftp_init(void) { int i, ret = 0; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master)); + ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) return -ENOMEM; @@ -589,12 +591,10 @@ static int __init nf_conntrack_ftp_init(void) for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp", FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, sizeof(struct nf_ct_ftp_master), help, - nf_ct_ftp_from_nlattr, THIS_MODULE); + 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp", FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, sizeof(struct nf_ct_ftp_master), help, - nf_ct_ftp_from_nlattr, THIS_MODULE); + 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); } ret = nf_conntrack_helpers_register(ftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index f65d93639d12595884081726808f4c1e17caec03..3bcdc718484e4c5352ab2de17c5a8aaf227af2f3 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -637,7 +637,6 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, @@ -1215,7 +1214,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { { .name = "Q.931", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -1800,7 +1798,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1810,7 +1807,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1836,6 +1832,8 @@ static int __init nf_conntrack_h323_init(void) { int ret; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master)); + h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4eeb3418366ad5473945395346b6e4e5fc213fbc..7f6100ca63be6dd4f37c24852fd16b9a71b8a823 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -174,6 +174,10 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) #endif if (h != NULL && !try_module_get(h->me)) h = NULL; + if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { + module_put(h->me); + h = NULL; + } rcu_read_unlock(); @@ -181,14 +185,20 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); +void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) +{ + refcount_dec(&helper->refcnt); + module_put(helper->me); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); + struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp) { struct nf_conn_help *help; - help = nf_ct_ext_add_length(ct, NF_CT_EXT_HELPER, - helper->data_len, gfp); + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else @@ -386,19 +396,39 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); struct nf_conntrack_helper *cur; - int ret = 0; + int ret = 0, i; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); + if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) + return -EINVAL; + mutex_lock(&nf_ct_helper_mutex); - hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { - if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { - ret = -EEXIST; - goto out; + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) { + if (!strcmp(cur->name, me->name) && + (cur->tuple.src.l3num == NFPROTO_UNSPEC || + cur->tuple.src.l3num == me->tuple.src.l3num) && + cur->tuple.dst.protonum == me->tuple.dst.protonum) { + ret = -EEXIST; + goto out; + } + } + } + + /* avoid unpredictable behaviour for auto_assign_helper */ + if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) { + hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { + if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, + &mask)) { + ret = -EEXIST; + goto out; + } } } + refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: @@ -455,11 +485,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) if ((rcu_dereference_protected( help->helper, lockdep_is_held(&nf_conntrack_expect_lock) - ) == me || exp->helper == me) && - del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } + ) == me || exp->helper == me)) + nf_ct_remove_expect(exp); } } spin_unlock_bh(&nf_conntrack_expect_lock); @@ -491,7 +518,7 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, - u32 expect_class_max, u32 data_len, + u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), @@ -504,7 +531,6 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, helper->tuple.src.u.all = htons(spec_port); helper->expect_policy = exp_pol; helper->expect_class_max = expect_class_max; - helper->data_len = data_len; helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; @@ -544,7 +570,7 @@ void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); -static struct nf_ct_ext_type helper_extend __read_mostly = { +static const struct nf_ct_ext_type helper_extend = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), .id = NF_CT_EXT_HELPER, diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1972a149f958350c6f6dd5f466d36fed152b76fc..5523acce9d6993dc71e467bbdf7b718ab2b25bf7 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -243,6 +243,12 @@ static int __init nf_conntrack_irc_init(void) return -EINVAL; } + if (max_dcc_channels > NF_CT_EXPECT_MAX_CNT) { + pr_err("max_dcc_channels must not be more than %u\n", + NF_CT_EXPECT_MAX_CNT); + return -EINVAL; + } + irc_exp_policy.max_expected = max_dcc_channels; irc_exp_policy.timeout = dcc_timeout; @@ -257,7 +263,7 @@ static int __init nf_conntrack_irc_init(void) for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc", IRC_PORT, ports[i], i, &irc_exp_policy, - 0, 0, help, NULL, THIS_MODULE); + 0, help, NULL, THIS_MODULE); } ret = nf_conntrack_helpers_register(&irc[0], ports_c); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index bcab8bde73128f9f9f8b4dba31a1c283b07f8eb1..adf2198599018ca6ec4377ffa4b92bb0a2dcd169 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -82,7 +82,7 @@ void nf_connlabels_put(struct net *net) } EXPORT_SYMBOL_GPL(nf_connlabels_put); -static struct nf_ct_ext_type labels_extend __read_mostly = { +static const struct nf_ct_ext_type labels_extend = { .len = sizeof(struct nf_conn_labels), .align = __alignof__(struct nf_conn_labels), .id = NF_CT_EXT_LABELS, diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 4c8f30a3d6d2762e69604c4b1f6cd6821663bc64..496ce173f0c1937c73fb360e402c9c3e3936e1e9 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -58,6 +58,8 @@ static struct nf_conntrack_helper helper __read_mostly = { static int __init nf_conntrack_netbios_ns_init(void) { + NF_CT_HELPER_BUILD_BUG_ON(0); + exp_policy.timeout = timeout; return nf_conntrack_helper_register(&helper); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index dc7dfd68fafe5d8db341488ac7d9ad5bf8f80b46..a8be9b72e6cd2ca34166bba49a532f4f92e86e9e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -417,8 +419,7 @@ dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type) return -1; } -static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, - const struct nf_conn *ct) +static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *seq; @@ -426,15 +427,20 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj) return 0; + spin_lock_bh(&ct->lock); seq = &seqadj->seq[IP_CT_DIR_ORIGINAL]; if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1) - return -1; + goto err; seq = &seqadj->seq[IP_CT_DIR_REPLY]; if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1) - return -1; + goto err; + spin_unlock_bh(&ct->lock); return 0; +err: + spin_unlock_bh(&ct->lock); + return -1; } static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) @@ -467,7 +473,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nlattr *nest_parms; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -627,10 +633,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) unsigned int flags = 0, group; int err; - /* ignore our fake conntrack entry */ - if (nf_ct_is_untracked(ct)) - return 0; - if (events & (1 << IPCT_DESTROY)) { type = IPCTNL_MSG_CT_DELETE; group = NFNLGRP_CONNTRACK_DESTROY; @@ -652,7 +654,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) if (skb == NULL) goto errout; - type |= NFNL_SUBSYS_CTNETLINK << 8; + type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -888,8 +890,13 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) } out: local_bh_enable(); - if (last) + if (last) { + /* nf ct hash resize happened, now clear the leftover. */ + if ((struct nf_conn *)cb->args[1] == last) + cb->args[1] = 0; + nf_ct_put(last); + } while (i) { i--; @@ -908,7 +915,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_l3proto *l3proto; int ret = 0; - ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL); if (ret < 0) return ret; @@ -917,7 +924,7 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, if (likely(l3proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_IP_MAX, - l3proto->nla_policy); + l3proto->nla_policy, NULL); if (ret == 0) ret = l3proto->nlattr_to_tuple(tb, tuple); } @@ -938,7 +945,8 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_l4proto *l4proto; int ret = 0; - ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy); + ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy, + NULL); if (ret < 0) return ret; @@ -951,7 +959,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_PROTO_MAX, - l4proto->nla_policy); + l4proto->nla_policy, NULL); if (ret == 0) ret = l4proto->nlattr_to_tuple(tb, tuple); } @@ -1006,16 +1014,16 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { static int ctnetlink_parse_tuple(const struct nlattr * const cda[], - struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num, - struct nf_conntrack_zone *zone) + struct nf_conntrack_tuple *tuple, u32 type, + u_int8_t l3num, struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; memset(tuple, 0, sizeof(*tuple)); - err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy, + NULL); if (err < 0) return err; @@ -1065,7 +1073,7 @@ static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, int err; struct nlattr *tb[CTA_HELP_MAX+1]; - err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL); if (err < 0) return err; @@ -1419,6 +1427,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, } #endif +static void +__ctnetlink_change_status(struct nf_conn *ct, unsigned long on, + unsigned long off) +{ + unsigned int bit; + + /* Ignore these unchangable bits */ + on &= ~IPS_UNCHANGEABLE_MASK; + off &= ~IPS_UNCHANGEABLE_MASK; + + for (bit = 0; bit < __IPS_MAX_BIT; bit++) { + if (on & (1 << bit)) + set_bit(bit, &ct->status); + else if (off & (1 << bit)) + clear_bit(bit, &ct->status); + } +} + static int ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1438,10 +1464,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) /* ASSURED bit can only be set */ return -EBUSY; - /* Be careful here, modifying NAT bits can screw up things, - * so don't let users modify them directly if they don't pass - * nf_nat_range. */ - ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + __ctnetlink_change_status(ct, status, 0); return 0; } @@ -1510,23 +1533,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct, return 0; } + rcu_read_lock(); helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), nf_ct_protonum(ct)); if (helper == NULL) { -#ifdef CONFIG_MODULES - spin_unlock_bh(&nf_conntrack_expect_lock); - - if (request_module("nfct-helper-%s", helpname) < 0) { - spin_lock_bh(&nf_conntrack_expect_lock); - return -EOPNOTSUPP; - } - - spin_lock_bh(&nf_conntrack_expect_lock); - helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), - nf_ct_protonum(ct)); - if (helper) - return -EAGAIN; -#endif + rcu_read_unlock(); return -EOPNOTSUPP; } @@ -1535,13 +1546,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct, /* update private helper data if allowed. */ if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - return 0; + err = 0; } else - return -EBUSY; + err = -EBUSY; + } else { + /* we cannot set a helper for an existing conntrack */ + err = -EOPNOTSUPP; } - /* we cannot set a helper for an existing conntrack */ - return -EOPNOTSUPP; + rcu_read_unlock(); + return err; } static int ctnetlink_change_timeout(struct nf_conn *ct, @@ -1571,7 +1585,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, struct nf_conntrack_l4proto *l4proto; int err = 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, + NULL); if (err < 0) return err; @@ -1596,7 +1611,7 @@ static int change_seq_adj(struct nf_ct_seqadj *seq, int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; - err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy); + err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL); if (err < 0) return err; @@ -1631,25 +1646,30 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, if (!seqadj) return 0; + spin_lock_bh(&ct->lock); if (cda[CTA_SEQ_ADJ_ORIG]) { ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL], cda[CTA_SEQ_ADJ_ORIG]); if (ret < 0) - return ret; + goto err; - ct->status |= IPS_SEQ_ADJUST; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } if (cda[CTA_SEQ_ADJ_REPLY]) { ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY], cda[CTA_SEQ_ADJ_REPLY]); if (ret < 0) - return ret; + goto err; - ct->status |= IPS_SEQ_ADJUST; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); } + spin_unlock_bh(&ct->lock); return 0; +err: + spin_unlock_bh(&ct->lock); + return ret; } static int @@ -1814,6 +1834,8 @@ ctnetlink_create_conntrack(struct net *net, nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); nf_ct_labels_ext_add(ct); + nfct_seqadj_ext_add(ct); + nfct_synproxy_ext_add(ct); /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; @@ -1960,9 +1982,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, err = -EEXIST; ct = nf_ct_tuplehash_to_ctrack(h); if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - spin_lock_bh(&nf_conntrack_expect_lock); err = ctnetlink_change_conntrack(ct, cda); - spin_unlock_bh(&nf_conntrack_expect_lock); if (err == 0) { nf_conntrack_eventmask_report((1 << IPCT_REPLY) | (1 << IPCT_ASSURED) | @@ -1988,7 +2008,8 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_CT_GET_STATS_CPU); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2071,7 +2092,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2177,13 +2198,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - struct nf_conn *ct; - - ct = nf_ct_get(skb, ctinfo); - if (ct && nf_ct_is_untracked(ct)) - ct = NULL; - - return ct; + return nf_ct_get(skb, ctinfo); } static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) @@ -2300,10 +2315,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) /* This check is less strict than ctnetlink_change_status() * because callers often flip IPS_EXPECTED bits when sending * an NFQA_CT attribute to the kernel. So ignore the - * unchangeable bits but do not error out. + * unchangeable bits but do not error out. Also user programs + * are allowed to clear the bits that they are allowed to change. */ - ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | - (ct->status & IPS_UNCHANGEABLE_MASK); + __ctnetlink_change_status(ct, status, ~status); return 0; } @@ -2353,15 +2368,11 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL); if (ret < 0) return ret; - spin_lock_bh(&nf_conntrack_expect_lock); - ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); - spin_unlock_bh(&nf_conntrack_expect_lock); - - return ret; + return ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct); } static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda, @@ -2390,7 +2401,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, struct nf_conntrack_expect *exp; int err; - err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy); + err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy, + NULL); if (err < 0) return err; @@ -2443,7 +2455,7 @@ static struct nfnl_ct_hook ctnetlink_glue_hook = { static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - enum ctattr_expect type) + u32 type) { struct nlattr *nest_parms; @@ -2581,7 +2593,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2632,7 +2644,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) if (skb == NULL) goto errout; - type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2698,7 +2710,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!atomic_inc_not_zero(&exp->use)) + if (!refcount_inc_not_zero(&exp->use)) continue; cb->args[1] = (unsigned long)exp; goto out; @@ -2744,7 +2756,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { - if (!atomic_inc_not_zero(&exp->use)) + if (!refcount_inc_not_zero(&exp->use)) continue; cb->args[1] = (unsigned long)exp; goto out; @@ -3015,7 +3027,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, + exp_nat_nla_policy, NULL); if (err < 0) return err; @@ -3049,6 +3062,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conn_help *help; int err; + help = nfct_help(ct); + if (!help) + return ERR_PTR(-EOPNOTSUPP); + if (cda[CTA_EXPECT_CLASS] && helper) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) @@ -3058,26 +3075,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, if (!exp) return ERR_PTR(-ENOMEM); - help = nfct_help(ct); - if (!help) { - if (!cda[CTA_EXPECT_TIMEOUT]) { - err = -EINVAL; - goto err_out; - } - exp->timeout.expires = - jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; - - exp->flags = NF_CT_EXPECT_USERSPACE; - if (cda[CTA_EXPECT_FLAGS]) { - exp->flags |= - ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); - } + if (cda[CTA_EXPECT_FLAGS]) { + exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); + exp->flags &= ~NF_CT_EXPECT_USERSPACE; } else { - if (cda[CTA_EXPECT_FLAGS]) { - exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); - exp->flags &= ~NF_CT_EXPECT_USERSPACE; - } else - exp->flags = 0; + exp->flags = 0; } if (cda[CTA_EXPECT_FN]) { const char *name = nla_data(cda[CTA_EXPECT_FN]); @@ -3240,7 +3242,8 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_EXP_GET_STATS_CPU); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index f60a4755d71e2b0763be35e07976e02169e1b01e..6959e93063d4c957017b97e08dfca27b775461c2 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -263,7 +263,7 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) goto out_put_both; } -static inline int +static int pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, @@ -391,7 +391,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; } -static inline int +static int pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, @@ -523,6 +523,14 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, int ret; u_int16_t msg; +#if IS_ENABLED(CONFIG_NF_NAT) + if (!nf_ct_is_confirmed(ct) && (ct->status & IPS_NAT_MASK)) { + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + + if (!nat && !nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC)) + return NF_DROP; + } +#endif /* don't do any tracking before tcp handshake complete */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; @@ -596,7 +604,6 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = { static struct nf_conntrack_helper pptp __read_mostly = { .name = "pptp", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_pptp_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -607,6 +614,8 @@ static struct nf_conntrack_helper pptp __read_mostly = { static int __init nf_conntrack_pptp_init(void) { + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_pptp_master)); + return nf_conntrack_helper_register(&pptp); } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 2d6ee18034159d12603e50c694541a9bdda147fc..2de6c1fe326149c5ab46f06b9ca7a3db992c4e7f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -202,7 +202,7 @@ static int kill_l3proto(struct nf_conn *i, void *data) static int kill_l4proto(struct nf_conn *i, void *data) { struct nf_conntrack_l4proto *l4proto; - l4proto = (struct nf_conntrack_l4proto *)data; + l4proto = data; return nf_ct_protonum(i) == l4proto->l4proto && nf_ct_l3num(i) == l4proto->l3proto; } @@ -441,9 +441,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); void nf_ct_l4proto_pernet_unregister_one(struct net *net, struct nf_conntrack_l4proto *l4proto) { - struct nf_proto_net *pn = NULL; + struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); - pn = nf_ct_l4proto_net(net, l4proto); if (pn == NULL) return; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 93dd1c5b7bff9e5285530a446bba6811ec26ead4..b553fdd68816b98a0f6f58cba3bd7363959479cf 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -609,6 +609,20 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } +static bool dccp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.dccp.state) { + case CT_DCCP_CLOSEREQ: + case CT_DCCP_CLOSING: + case CT_DCCP_TIMEWAIT: + return true; + default: + break; + } + + return false; +} + static void dccp_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { @@ -665,7 +679,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return 0; err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, - dccp_nla_policy); + dccp_nla_policy, NULL); if (err < 0) return err; @@ -868,6 +882,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .packet = dccp_packet, .get_timeouts = dccp_get_timeouts, .error = dccp_error, + .can_early_drop = dccp_can_early_drop, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -902,6 +917,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .packet = dccp_packet, .get_timeouts = dccp_get_timeouts, .error = dccp_error, + .can_early_drop = dccp_can_early_drop, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 33279aab583d5eac3016b4a58f6bf2ea8b457395..1c5b14a6cab369591bd13e22b284a0737ad75c2e 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -512,16 +512,19 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, u8 pf, unsigned int hooknum) { const struct sctphdr *sh; - struct sctphdr _sctph; const char *logmsg; - sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); - if (!sh) { + if (skb->len < dataoff + sizeof(struct sctphdr)) { logmsg = "nf_ct_sctp: short packet "; goto out_invalid; } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && skb->ip_summed == CHECKSUM_NONE) { + if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) { + logmsg = "nf_ct_sctp: failed to read header "; + goto out_invalid; + } + sh = (const struct sctphdr *)(skb->data + dataoff); if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { logmsg = "nf_ct_sctp: bad CRC "; goto out_invalid; @@ -535,6 +538,20 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, return -NF_ACCEPT; } +static bool sctp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.sctp.state) { + case SCTP_CONNTRACK_SHUTDOWN_SENT: + case SCTP_CONNTRACK_SHUTDOWN_RECD: + case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: + return true; + default: + break; + } + + return false; +} + #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include @@ -584,10 +601,8 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) if (!attr) return 0; - err = nla_parse_nested(tb, - CTA_PROTOINFO_SCTP_MAX, - attr, - sctp_nla_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, + sctp_nla_policy, NULL); if (err < 0) return err; @@ -783,6 +798,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .get_timeouts = sctp_get_timeouts, .new = sctp_new, .error = sctp_error, + .can_early_drop = sctp_can_early_drop, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, @@ -818,6 +834,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .get_timeouts = sctp_get_timeouts, .new = sctp_new, .error = sctp_error, + .can_early_drop = sctp_can_early_drop, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b122e9dacfed06e27aecab3fbc71203772d7b427..9758a7dfd83ef95c0bcab1257d63cb38d9faed88 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -419,10 +419,9 @@ static void tcp_options(const struct sk_buff *skb, && opsize == TCPOLEN_WINDOW) { state->td_scale = *(u_int8_t *)ptr; - if (state->td_scale > 14) { - /* See RFC1323 */ - state->td_scale = 14; - } + if (state->td_scale > TCP_MAX_WSCALE) + state->td_scale = TCP_MAX_WSCALE; + state->flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; } @@ -1172,6 +1171,22 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static bool tcp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.tcp.state) { + case TCP_CONNTRACK_FIN_WAIT: + case TCP_CONNTRACK_LAST_ACK: + case TCP_CONNTRACK_TIME_WAIT: + case TCP_CONNTRACK_CLOSE: + case TCP_CONNTRACK_CLOSE_WAIT: + return true; + default: + break; + } + + return false; +} + #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include @@ -1234,7 +1249,8 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (!pattr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, + tcp_nla_policy, NULL); if (err < 0) return err; @@ -1549,6 +1565,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, + .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .nlattr_size = tcp_nlattr_size, @@ -1586,6 +1603,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, + .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .nlattr_size = tcp_nlattr_size, diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 9dcb9ee9b97d53361c73df05aa5fe9923e933bda..ae457f39d5ceb804aa60120c7d897dd441e49d2a 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -184,6 +184,8 @@ static int __init nf_conntrack_sane_init(void) { int i, ret = 0; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master)); + sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) return -ENOMEM; @@ -196,13 +198,11 @@ static int __init nf_conntrack_sane_init(void) for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane", SANE_PORT, ports[i], ports[i], - &sane_exp_policy, 0, - sizeof(struct nf_ct_sane_master), help, NULL, + &sane_exp_policy, 0, help, NULL, THIS_MODULE); nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane", SANE_PORT, ports[i], ports[i], - &sane_exp_policy, 0, - sizeof(struct nf_ct_sane_master), help, NULL, + &sane_exp_policy, 0, help, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index ef7063eced7c490dd6813d3e54ee72fdad9a8565..a975efd6b8c3e3baac2a80334237be9201ace130 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -231,7 +231,7 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct, } EXPORT_SYMBOL_GPL(nf_ct_seq_offset); -static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = { +static const struct nf_ct_ext_type nf_ct_seqadj_extend = { .len = sizeof(struct nf_conn_seqadj), .align = __alignof__(struct nf_conn_seqadj), .id = NF_CT_EXT_SEQADJ, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0d17894798b5caea7540b3bf7d341a0a2f623fd6..d38af4274335b9bed6de1edb4b0c4ae6fdf96656 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -829,10 +829,8 @@ static void flush_expectations(struct nf_conn *ct, bool media) hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) continue; - if (!del_timer(&exp->timeout)) + if (!nf_ct_remove_expect(exp)) continue; - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); if (!media) break; } @@ -1624,29 +1622,27 @@ static int __init nf_conntrack_sip_init(void) { int i, ret; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sip_master)); + if (ports_c == 0) ports[ports_c++] = SIP_PORT; for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_udp, + SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_tcp, + SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_udp, + SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_tcp, + SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2256147dcaad80ea982868607b2e66b233c27c6f..ccb5cb9043e0e769ba463759a69f4d9f36e122e0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -250,7 +250,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - seq_printf(s, "[UNREPLIED] "); + seq_puts(s, "[UNREPLIED] "); print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto); @@ -261,7 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; if (test_bit(IPS_ASSURED_BIT, &ct->status)) - seq_printf(s, "[ASSURED] "); + seq_puts(s, "[ASSURED] "); if (seq_has_overflowed(s)) goto release; @@ -350,7 +350,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + seq_puts(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index b1227dc6f75e115c8cb2e88e488c9349686891a0..0ec6779fd5d944406eb67ff6e3256698ec40d20c 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -113,16 +113,18 @@ static int __init nf_conntrack_tftp_init(void) { int i, ret; + NF_CT_HELPER_BUILD_BUG_ON(0); + if (ports_c == 0) ports[ports_c++] = TFTP_PORT; for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp", TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, 0, tftp_help, NULL, THIS_MODULE); + 0, tftp_help, NULL, THIS_MODULE); nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp", TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, 0, tftp_help, NULL, THIS_MODULE); + 0, tftp_help, NULL, THIS_MODULE); } ret = nf_conntrack_helpers_register(tftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 26e742006c48c944ba16aea8d8a2ed541ad06ca1..46aee65f339bea3e39e6fcdd94d82d4646b991b4 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook); -static struct nf_ct_ext_type timeout_extend __read_mostly = { +static const struct nf_ct_ext_type timeout_extend = { .len = sizeof(struct nf_conn_timeout), .align = __alignof__(struct nf_conn_timeout), .id = NF_CT_EXT_TIMEOUT, diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 7a394df0deb7686fa7fe4da162ff77d3c03435c5..4c4734b78318962360bd07265dc8179f95fc5135 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -33,7 +33,7 @@ static struct ctl_table tstamp_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static struct nf_ct_ext_type tstamp_extend __read_mostly = { +static const struct nf_ct_ext_type tstamp_extend = { .len = sizeof(struct nf_conn_tstamp), .align = __alignof__(struct nf_conn_tstamp), .id = NF_CT_EXT_TSTAMP, diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index c46d214d532355b7294a543d7c68843a82fb489d..bfa742da83aff37dfc8b426e1d193f0de056fb6f 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,7 +14,7 @@ /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry **entryp, unsigned int verdict); -void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); +unsigned int nf_queue_nf_hook_drop(struct net *net); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8d85a0598b603c5f7d1f2f6f0c0ba68bb96b4d58..8bb152a7cca499d8e9b349dcd9fe0693d6599fbf 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -71,7 +71,6 @@ void nf_log_unset(struct net *net, const struct nf_logger *logger) RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); } mutex_unlock(&nf_log_mutex); - synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unset); @@ -376,13 +375,13 @@ static int seq_show(struct seq_file *s, void *v) logger = nft_log_dereference(loggers[*pos][i]); seq_printf(s, "%s", logger->name); if (i == 0 && loggers[*pos][i + 1] != NULL) - seq_printf(s, ","); + seq_puts(s, ","); if (seq_has_overflowed(s)) return -ENOSPC; } - seq_printf(s, ")\n"); + seq_puts(s, ")\n"); if (seq_has_overflowed(s)) return -ENOSPC; diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index eb772380a202f0037745a4357cf2c00658181bcd..e4d61a7a52589d85d23185bba1919dfd188e62c2 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -33,7 +33,6 @@ static unsigned int help(struct sk_buff *skb, { char buffer[sizeof("65535")]; u_int16_t port; - unsigned int ret; /* Connection comes from client. */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; @@ -63,14 +62,14 @@ static unsigned int help(struct sk_buff *skb, } sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, - protoff, matchoff, matchlen, - buffer, strlen(buffer)); - if (ret != NF_ACCEPT) { + if (!nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, + protoff, matchoff, matchlen, + buffer, strlen(buffer))) { nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); nf_ct_unexpect_related(exp); + return NF_DROP; } - return ret; + return NF_ACCEPT; } static void __exit nf_nat_amanda_fini(void) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 82802e4a6640817e64eb3f3a6ffcb875ad14a747..6c72922d20caee83f498cb02cdce0c46899a1c22 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -71,11 +71,10 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) if (ct == NULL) return; - family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; - rcu_read_lock(); + family = nf_ct_l3num(ct); l3proto = __nf_nat_l3proto_find(family); if (l3proto == NULL) - goto out; + return; dir = CTINFO2DIR(ctinfo); if (dir == IP_CT_DIR_ORIGINAL) @@ -84,8 +83,6 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) statusbit = IPS_SRC_NAT; l3proto->decode_session(skb, ct, dir, statusbit, fl); -out: - rcu_read_unlock(); } int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) @@ -411,11 +408,9 @@ nf_nat_setup_info(struct nf_conn *ct, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; - struct nf_conn_nat *nat; - /* nat helper or nfctnetlink also setup binding */ - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) + /* Can't setup nat info for confirmed ct. */ + if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || @@ -549,10 +544,6 @@ struct nf_nat_proto_clean { static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) @@ -563,12 +554,10 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { - struct nf_conn_nat *nat = nfct_nat(ct); - if (nf_nat_proto_remove(ct, data)) return 1; - if (!nat) + if ((ct->status & IPS_SRC_NAT_DONE) == 0) return 0; /* This netns is being destroyed, and conntrack has nat null binding. @@ -577,7 +566,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - ct->status &= ~IPS_NAT_DONE_MASK; + clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); @@ -716,13 +705,9 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); /* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { - struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - - if (!nat) - return; - - rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + if (ct->status & IPS_SRC_NAT_DONE) + rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -730,7 +715,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .align = __alignof__(struct nf_conn_nat), .destroy = nf_nat_cleanup_conntrack, .id = NF_CT_EXT_NAT, - .flags = NF_CT_EXT_F_PREALLOC, }; #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -751,7 +735,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_nat_l4proto *l4proto; int err; - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, + protonat_nla_policy, NULL); if (err < 0) return err; @@ -780,7 +765,7 @@ nfnetlink_parse_nat(const struct nlattr *nat, memset(range, 0, sizeof(*range)); - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL); if (err < 0) return err; @@ -819,7 +804,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, /* No NAT information has been passed, allocate the null-binding */ if (attr == NULL) - return __nf_nat_alloc_null_binding(ct, manip); + return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0; err = nfnetlink_parse_nat(attr, ct, &range, l3proto); if (err < 0) @@ -874,9 +859,6 @@ static int __init nf_nat_init(void) nf_ct_helper_expectfn_register(&follow_master_nat); - /* Initialize fake conntrack so that NAT will skip it */ - nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 211661cb2c90a5d25a57d5ce41c9c5f9898c5302..607a373379b40cf5cef2d4bba1bfefa58a8dfa33 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -70,15 +70,15 @@ static void mangle_contents(struct sk_buff *skb, } /* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff *skb, unsigned int extra) +static bool enlarge_skb(struct sk_buff *skb, unsigned int extra) { if (skb->len + extra > 65535) - return 0; + return false; if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) - return 0; + return false; - return 1; + return true; } /* Generic function for mangling variable-length address changes inside @@ -89,26 +89,26 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) * skb enlargement, ... * * */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len, bool adjust) +bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) { const struct nf_nat_l3proto *l3proto; struct tcphdr *tcph; int oldlen, datalen; if (!skb_make_writable(skb, skb->len)) - return 0; + return false; if (rep_len > match_len && rep_len - match_len > skb_tailroom(skb) && !enlarge_skb(skb, rep_len - match_len)) - return 0; + return false; SKB_LINEAR_ASSERT(skb); @@ -128,7 +128,7 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_ct_seqadj_set(ct, ctinfo, tcph->seq, (int)rep_len - (int)match_len); - return 1; + return true; } EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); @@ -142,7 +142,7 @@ EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); * XXX - This function could be merged with nf_nat_mangle_tcp_packet which * should be fairly easy to do. */ -int +bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -157,12 +157,12 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, int datalen, oldlen; if (!skb_make_writable(skb, skb->len)) - return 0; + return false; if (rep_len > match_len && rep_len - match_len > skb_tailroom(skb) && !enlarge_skb(skb, rep_len - match_len)) - return 0; + return false; udph = (void *)skb->data + protoff; @@ -176,13 +176,13 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, /* fix udp checksum if udp checksum was previously calculated */ if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) - return 1; + return true; l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, datalen, oldlen); - return 1; + return true; } EXPORT_SYMBOL(nf_nat_mangle_udp_packet); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 1fb2258c35357c8c6f5072ae6593de2d53ab9f03..0648cb096bd87b2df3b84db5da8ff8fdbffc7018 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -37,7 +37,6 @@ static unsigned int help(struct sk_buff *skb, struct nf_conn *ct = exp->master; union nf_inet_addr newaddr; u_int16_t port; - unsigned int ret; /* Reply comes from server. */ newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3; @@ -83,14 +82,14 @@ static unsigned int help(struct sk_buff *skb, pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", buffer, &newaddr.ip, port); - ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, - matchlen, buffer, strlen(buffer)); - if (ret != NF_ACCEPT) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, + matchlen, buffer, strlen(buffer))) { nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_unexpect_related(exp); + return NF_DROP; } - return ret; + return NF_ACCEPT; } static void __exit nf_nat_irc_fini(void) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4a7662486f44bae527ed83e7e952c4d7195f3871..043850c9d154dcf128903d7dba6b17ea5ff20370 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,15 +96,18 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) +unsigned int nf_queue_nf_hook_drop(struct net *net) { const struct nf_queue_handler *qh; + unsigned int count = 0; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - qh->nf_hook_drop(net, entry); + count = qh->nf_hook_drop(net); rcu_read_unlock(); + + return count; } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 7c6d1fbe38b9aafc668ca39ee6e096d64f79bc7f..a504e87c6ddff1b1266a901549256f29dc1973d1 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -66,8 +66,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW) { opts->wscale = *ptr; - if (opts->wscale > 14) - opts->wscale = 14; + if (opts->wscale > TCP_MAX_WSCALE) + opts->wscale = TCP_MAX_WSCALE; opts->options |= XT_SYNPROXY_OPT_WSCALE; } break; @@ -287,9 +287,9 @@ static int synproxy_cpu_seq_show(struct seq_file *seq, void *v) struct synproxy_stats *stats = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries\t\tsyn_received\t" - "cookie_invalid\tcookie_valid\t" - "cookie_retrans\tconn_reopened\n"); + seq_puts(seq, "entries\t\tsyn_received\t" + "cookie_invalid\tcookie_valid\t" + "cookie_retrans\tconn_reopened\n"); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 434c739dfecaa8727dc193f8ad86e18133932660..da314be0c048720172bbd153cd2f730b486603ce 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -144,7 +144,7 @@ static int nf_tables_register_hooks(struct net *net, unsigned int hook_nops) { if (table->flags & NFT_TABLE_F_DORMANT || - !(chain->flags & NFT_BASE_CHAIN)) + !nft_is_base_chain(chain)) return 0; return nf_register_net_hooks(net, nft_base_chain(chain)->ops, @@ -157,7 +157,7 @@ static void nf_tables_unregister_hooks(struct net *net, unsigned int hook_nops) { if (table->flags & NFT_TABLE_F_DORMANT || - !(chain->flags & NFT_BASE_CHAIN)) + !nft_is_base_chain(chain)) return; nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); @@ -438,7 +438,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -587,7 +587,7 @@ static void _nf_tables_table_disable(struct net *net, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) continue; if (cnt && i++ == cnt) @@ -608,7 +608,7 @@ static int nf_tables_table_enable(struct net *net, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) continue; err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, @@ -989,7 +989,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1007,7 +1007,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) goto nla_put_failure; - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); const struct nf_hook_ops *ops = &basechain->ops[0]; struct nlattr *nest; @@ -1182,7 +1182,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) struct nft_stats *stats; int err; - err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); + err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy, + NULL); if (err < 0) return ERR_PTR(err); @@ -1226,7 +1227,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) { BUG_ON(chain->use > 0); - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); module_put(basechain->type->owner); @@ -1257,7 +1258,7 @@ static int nft_chain_parse_hook(struct net *net, int err; err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy); + nft_hook_policy, NULL); if (err < 0) return err; @@ -1364,8 +1365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } if (nla[NFTA_CHAIN_POLICY]) { - if ((chain != NULL && - !(chain->flags & NFT_BASE_CHAIN))) + if (chain != NULL && + !nft_is_base_chain(chain)) return -EOPNOTSUPP; if (chain == NULL && @@ -1396,7 +1397,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_chain_hook hook; struct nf_hook_ops *ops; - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) return -EBUSY; err = nft_chain_parse_hook(net, nla, afi, &hook, @@ -1433,7 +1434,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } if (nla[NFTA_CHAIN_COUNTERS]) { - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); @@ -1724,7 +1725,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy); + err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL); if (err < 0) return err; @@ -1734,7 +1735,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, if (tb[NFTA_EXPR_DATA]) { err = nla_parse_nested(info->tb, type->maxattr, - tb[NFTA_EXPR_DATA], type->policy); + tb[NFTA_EXPR_DATA], type->policy, NULL); if (err < 0) goto err1; } else @@ -1772,8 +1773,19 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, goto err1; } + if (ops->validate) { + const struct nft_data *data = NULL; + + err = ops->validate(ctx, expr, &data); + if (err < 0) + goto err2; + } + return 0; +err2: + if (ops->destroy) + ops->destroy(ctx, expr); err1: expr->ops = NULL; return err; @@ -1874,10 +1886,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, const struct nft_expr *expr, *next; struct nlattr *list; const struct nft_rule *prule; - int type = event | NFNL_SUBSYS_NFTABLES << 8; + u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), - flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1895,7 +1906,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { - prule = list_entry(rule->list.prev, struct nft_rule, list); + prule = list_prev_entry(rule, list); if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(prule->handle), NFTA_RULE_PAD)) @@ -2523,8 +2534,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, return 0; } -struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla, u8 genmask) +static struct nft_set *nf_tables_set_lookup(const struct nft_table *table, + const struct nlattr *nla, u8 genmask) { struct nft_set *set; @@ -2538,11 +2549,10 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, } return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nf_tables_set_lookup); -struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla, - u8 genmask) +static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla, + u8 genmask) { struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); @@ -2557,7 +2567,25 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net, } return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid); + +struct nft_set *nft_set_lookup(const struct net *net, + const struct nft_table *table, + const struct nlattr *nla_set_name, + const struct nlattr *nla_set_id, + u8 genmask) +{ + struct nft_set *set; + + set = nf_tables_set_lookup(table, nla_set_name, genmask); + if (IS_ERR(set)) { + if (!nla_set_id) + return set; + + set = nf_tables_set_lookup_byid(net, nla_set_id, genmask); + } + return set; +} +EXPORT_SYMBOL_GPL(nft_set_lookup); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) @@ -2617,7 +2645,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, u32 portid = ctx->portid; u32 seq = ctx->seq; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) @@ -2851,7 +2879,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx, struct nlattr *da[NFTA_SET_DESC_MAX + 1]; int err; - err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy); + err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, + nft_set_desc_policy, NULL); if (err < 0) return err; @@ -3338,37 +3367,52 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, return nf_tables_fill_setelem(args->skb, set, elem); } +struct nft_set_dump_ctx { + const struct nft_set *set; + struct nft_ctx ctx; +}; + static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { + struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); - u8 genmask = nft_genmask_cur(net); + struct nft_af_info *afi; + struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; - struct nft_ctx ctx; - struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1]; + bool set_found = false; struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; struct nlattr *nest; u32 portid, seq; - int event, err; + int event; - err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla, - NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy); - if (err < 0) - return err; + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (afi != dump_ctx->ctx.afi) + continue; - err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla, genmask); - if (err < 0) - return err; + list_for_each_entry_rcu(table, &afi->tables, list) { + if (table != dump_ctx->ctx.table) + continue; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); + list_for_each_entry_rcu(set, &table->sets, list) { + if (set == dump_ctx->set) { + set_found = true; + break; + } + } + break; + } + break; + } - event = NFT_MSG_NEWSETELEM; - event |= NFNL_SUBSYS_NFTABLES << 8; + if (!set_found) { + rcu_read_unlock(); + return -ENOENT; + } + + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM); portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; @@ -3378,11 +3422,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) goto nla_put_failure; nfmsg = nlmsg_data(nlh); - nfmsg->nfgen_family = ctx.afi->family; + nfmsg->nfgen_family = afi->family; nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff); + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name)) + if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name)) goto nla_put_failure; if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) goto nla_put_failure; @@ -3393,12 +3437,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.cb = cb; args.skb = skb; - args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - set->ops->walk(&ctx, set, &args.iter); + set->ops->walk(&dump_ctx->ctx, set, &args.iter); + rcu_read_unlock(); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); @@ -3412,9 +3457,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; nla_put_failure: + rcu_read_unlock(); return -ENOSPC; } +static int nf_tables_dump_set_done(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -3436,7 +3488,18 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_set, + .done = nf_tables_dump_set_done, }; + struct nft_set_dump_ctx *dump_ctx; + + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + if (!dump_ctx) + return -ENOMEM; + + dump_ctx->set = set; + dump_ctx->ctx = ctx; + + c.data = dump_ctx; return netlink_dump_start(nlsk, skb, nlh, &c); } return -EOPNOTSUPP; @@ -3453,7 +3516,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, struct nlattr *nest; int err; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) @@ -3564,9 +3627,9 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); - nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); + nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) - nft_data_uninit(nft_set_ext_data(ext), set->dtype); + nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) @@ -3575,6 +3638,18 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); +/* Only called from commit path, nft_set_elem_deactivate() already deals with + * the refcounting from the preparation phase. + */ +static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) +{ + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); + kfree(elem); +} + static int nft_setelem_parse_flags(const struct nft_set *set, const struct nlattr *attr, u32 *flags) { @@ -3612,7 +3687,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy); + nft_set_elem_policy, NULL); if (err < 0) return err; @@ -3749,6 +3824,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || + nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) + return -EBUSY; if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), @@ -3781,9 +3861,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) - nft_data_uninit(&data, d2.type); + nft_data_release(&data, d2.type); err2: - nft_data_uninit(&elem.key.val, d1.type); + nft_data_release(&elem.key.val, d1.type); err1: return err; } @@ -3828,6 +3908,53 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } +/** + * nft_data_hold - hold a nft_data item + * + * @data: struct nft_data to release + * @type: type of data + * + * Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded, + * NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and + * NFT_GOTO verdicts. This function must be called on active data objects + * from the second phase of the commit protocol. + */ +static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +{ + if (type == NFT_DATA_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + data->verdict.chain->use++; + break; + } + } +} + +static void nft_set_elem_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_hold(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use++; +} + +static void nft_set_elem_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) +{ + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) + nft_data_release(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use--; +} + static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr) { @@ -3842,7 +3969,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, int err; err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy); + nft_set_elem_policy, NULL); if (err < 0) goto err1; @@ -3893,6 +4020,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); elem.priv = priv; + nft_set_elem_deactivate(ctx->net, set, &elem); + nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -3902,7 +4031,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, err3: kfree(elem.priv); err2: - nft_data_uninit(&elem.key.val, desc.type); + nft_data_release(&elem.key.val, desc.type); err1: return err; } @@ -4064,7 +4193,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, }; -static struct nft_object *nft_obj_init(const struct nft_object_type *type, +static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, + const struct nft_object_type *type, const struct nlattr *attr) { struct nlattr *tb[type->maxattr + 1]; @@ -4072,7 +4202,8 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type, int err; if (attr) { - err = nla_parse_nested(tb, type->maxattr, attr, type->policy); + err = nla_parse_nested(tb, type->maxattr, attr, type->policy, + NULL); if (err < 0) goto err1; } else { @@ -4084,7 +4215,7 @@ static struct nft_object *nft_obj_init(const struct nft_object_type *type, if (obj == NULL) goto err1; - err = type->init((const struct nlattr * const *)tb, obj); + err = type->init(ctx, (const struct nlattr * const *)tb, obj); if (err < 0) goto err2; @@ -4192,7 +4323,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (IS_ERR(type)) return PTR_ERR(type); - obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]); + obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err1; @@ -4224,7 +4355,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -4406,8 +4537,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, err: kfree_skb(skb2); return err; - - return 0; } static void nft_obj_destroy(struct nft_object *obj) @@ -4497,7 +4626,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN; + int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); if (nlh == NULL) @@ -4679,7 +4808,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) if (nft_trans_chain_name(trans)[0]) strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); - if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(trans->ctx.chain)) return; basechain = nft_base_chain(trans->ctx.chain); @@ -4709,8 +4838,8 @@ static void nf_tables_commit_release(struct nft_trans *trans) nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: - nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv, true); + nf_tables_set_elem_destroy(nft_trans_elem_set(trans), + nft_trans_elem(trans).priv); break; case NFT_MSG_DELOBJ: nft_obj_destroy(nft_trans_obj(trans)); @@ -4945,6 +5074,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; + nft_set_elem_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; @@ -4993,7 +5123,7 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, { const struct nft_base_chain *basechain; - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); if (basechain->type->type != type) return -EOPNOTSUPP; @@ -5007,7 +5137,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, { struct nft_base_chain *basechain; - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); if ((1 << basechain->ops[0].hooknum) & hook_flags) @@ -5285,7 +5415,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_chain *chain; int err; - err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy); + err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy, + NULL); if (err < 0) return err; @@ -5316,7 +5447,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, tb[NFTA_VERDICT_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_BASE_CHAIN) + if (nft_is_base_chain(chain)) return -EOPNOTSUPP; chain->use++; @@ -5415,7 +5546,7 @@ int nft_data_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy); + err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); if (err < 0) return err; @@ -5429,7 +5560,7 @@ int nft_data_init(const struct nft_ctx *ctx, EXPORT_SYMBOL_GPL(nft_data_init); /** - * nft_data_uninit - release a nft_data item + * nft_data_release - release a nft_data item * * @data: struct nft_data to release * @type: type of data @@ -5437,7 +5568,7 @@ EXPORT_SYMBOL_GPL(nft_data_init); * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, * all others need to be released by calling this function. */ -void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) +void nft_data_release(const struct nft_data *data, enum nft_data_types type) { if (type < NFT_DATA_VERDICT) return; @@ -5448,7 +5579,7 @@ void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) WARN_ON(1); } } -EXPORT_SYMBOL_GPL(nft_data_uninit); +EXPORT_SYMBOL_GPL(nft_data_release); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len) @@ -5489,7 +5620,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; - BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); + BUG_ON(!nft_is_base_chain(ctx->chain)); nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, ctx->afi->nops); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 9e2ae424b64005555d5a7132ea1aa469dc4d0fc1..403432988313567358871d5f85a5d71fc0385051 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -128,7 +128,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, list_for_each_entry(table, &afi->tables, list) { ctx.table = table; list_for_each_entry_safe(chain, nr, &table->chains, list) { - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) continue; ctx.chain = chain; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 12eb9041dca284a398a2c1ac64924ba773a18489..e1b15e7a5793f6b877f63a95dc20ce2626caf7f5 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -169,7 +169,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; - int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; @@ -198,6 +198,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (!skb) return; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE); nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); if (!nlh) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 68eda920160e12f3cb904dad08b7f6cc9422571e..80f5ecf2c3d7096f579361663da5d2e651eebd3e 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -148,7 +148,8 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ -static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; @@ -191,8 +192,8 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int attrlen = nlh->nlmsg_len - min_len; __u8 subsys_id = NFNL_SUBSYS_ID(type); - err = nla_parse(cda, ss->cb[cb_id].attr_count, - attr, attrlen, ss->cb[cb_id].policy); + err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, + ss->cb[cb_id].policy, extack); if (err < 0) { rcu_read_unlock(); return err; @@ -261,7 +262,7 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) { - netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL); nfnl_err_del(nfnl_err); } } @@ -284,13 +285,13 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, int err; if (subsys_id >= NFNL_SUBSYS_COUNT) - return netlink_ack(skb, nlh, -EINVAL); + return netlink_ack(skb, nlh, -EINVAL, NULL); replay: status = 0; skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) - return netlink_ack(oskb, nlh, -ENOMEM); + return netlink_ack(oskb, nlh, -ENOMEM, NULL); nfnl_lock(subsys_id); ss = nfnl_dereference_protected(subsys_id); @@ -304,20 +305,20 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, #endif { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -EOPNOTSUPP); + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return kfree_skb(skb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -EOPNOTSUPP); + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return kfree_skb(skb); } if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { nfnl_unlock(subsys_id); - netlink_ack(oskb, nlh, -ERESTART); + netlink_ack(oskb, nlh, -ERESTART, NULL); return kfree_skb(skb); } @@ -376,8 +377,8 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; - err = nla_parse(cda, ss->cb[cb_id].attr_count, - attr, attrlen, ss->cb[cb_id].policy); + err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, + attrlen, ss->cb[cb_id].policy, NULL); if (err < 0) goto ack; @@ -407,7 +408,8 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, * pointing to the batch header. */ nfnl_err_reset(&err_list); - netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM); + netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM, + NULL); status |= NFNL_BATCH_FAILURE; goto done; } @@ -465,9 +467,10 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; - err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy); + err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, + NULL); if (err < 0) { - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, NULL); return; } if (cda[NFNL_BATCH_GENID]) @@ -493,14 +496,14 @@ static void nfnetlink_rcv(struct sk_buff *skb) return; if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { - netlink_ack(skb, nlh, -EPERM); + netlink_ack(skb, nlh, -EPERM, NULL); return; } if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) nfnetlink_rcv_skb_batch(skb, nlh); else - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + netlink_rcv_skb(skb, nfnetlink_rcv_msg); } #ifdef CONFIG_MODULES diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d44d89b561275e25bb31fe2b0ed198d13995533c..9898fb4d0512ce80d0bf970340e5da43db25ce15 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,7 @@ struct nf_acct { atomic64_t bytes; unsigned long flags; struct list_head head; - atomic_t refcnt; + refcount_t refcnt; char name[NFACCT_NAME_MAX]; struct rcu_head rcu_head; char data[0]; @@ -123,7 +124,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, atomic64_set(&nfacct->pkts, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } - atomic_set(&nfacct->refcnt, 1); + refcount_set(&nfacct->refcnt, 1); list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); return 0; } @@ -138,7 +139,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, u64 pkts, bytes; u32 old_flags; - event |= NFNL_SUBSYS_ACCT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -166,7 +167,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, NFACCT_PAD) || nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes), NFACCT_PAD) || - nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) + nla_put_be32(skb, NFACCT_USE, htonl(refcount_read(&acct->refcnt)))) goto nla_put_failure; if (acct->flags & NFACCT_F_QUOTA) { u64 *quota = (u64 *)acct->data; @@ -243,7 +244,8 @@ nfacct_filter_alloc(const struct nlattr * const attr) struct nlattr *tb[NFACCT_FILTER_MAX + 1]; int err; - err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy); + err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, + NULL); if (err < 0) return ERR_PTR(err); @@ -329,7 +331,7 @@ static int nfnl_acct_try_del(struct nf_acct *cur) /* We want to avoid races with nfnl_acct_put. So only when the current * refcnt is 1, we decrease it to 0. */ - if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) { + if (refcount_dec_if_one(&cur->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&cur->head); kfree_rcu(cur, rcu_head); @@ -413,7 +415,7 @@ struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) if (!try_module_get(THIS_MODULE)) goto err; - if (!atomic_inc_not_zero(&cur->refcnt)) { + if (!refcount_inc_not_zero(&cur->refcnt)) { module_put(THIS_MODULE); goto err; } @@ -429,7 +431,7 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { - if (atomic_dec_and_test(&acct->refcnt)) + if (refcount_dec_and_test(&acct->refcnt)) kfree_rcu(acct, rcu_head); module_put(THIS_MODULE); @@ -502,7 +504,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { list_del_rcu(&cur->head); - if (atomic_dec_and_test(&cur->refcnt)) + if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d45558178da5b62a8ad7c896e096c1862c512091..be678a323598c3237a2cae09e4e3ed4bdea46614 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -77,7 +77,8 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, + nfnl_cthelper_tuple_pol, NULL); if (err < 0) return err; @@ -104,7 +105,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - memcpy(help->data, nla_data(attr), help->helper->data_len); + nla_memcpy(help->data, nla_data(attr), sizeof(help->data)); return 0; } @@ -137,7 +138,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -150,6 +152,9 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) + return -EINVAL; + expect_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); @@ -171,7 +176,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + nfnl_cthelper_expect_policy_set, NULL); if (ret < 0) return ret; @@ -213,6 +218,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], { struct nf_conntrack_helper *helper; struct nfnl_cthelper *nfcth; + unsigned int size; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) @@ -228,7 +234,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); - helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + if (size > FIELD_SIZEOF(struct nf_conn_help, data)) { + ret = -ENOMEM; + goto err2; + } + helper->flags |= NF_CT_HELPER_F_USERSPACE; memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); @@ -276,7 +287,7 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, int err; err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, - nfnl_cthelper_expect_pol); + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -290,6 +301,9 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, new_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT) + return -EINVAL; + new_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); @@ -336,7 +350,7 @@ static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, int err; err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + nfnl_cthelper_expect_policy_set, NULL); if (err < 0) return err; @@ -501,7 +515,7 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, unsigned int flags = portid ? NLM_F_MULTI : 0; int status; - event |= NFNL_SUBSYS_CTHELPER << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -672,6 +686,7 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } + ret = -ENOENT; list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { cur = &nlcth->helper; j++; @@ -685,16 +700,20 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple.dst.protonum != cur->tuple.dst.protonum)) continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); + if (refcount_dec_if_one(&cur->refcnt)) { + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); - list_del(&nlcth->list); - kfree(nlcth); + list_del(&nlcth->list); + kfree(nlcth); + } else { + ret = -EBUSY; + } } /* Make sure we return success if we flush and there is no helpers */ - return (found || j == 0) ? 0 : -ENOENT; + return (found || j == 0) ? 0 : ret; } static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 47d6656c9119fd5c75fb1fb5f0bfd4bb49f1c38b..a3e7bb54d96acf2e7ac570077d57502d5b23625b 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -56,7 +56,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy); + attr, l4proto->ctnl_timeout.nla_policy, + NULL); if (ret < 0) return ret; @@ -138,7 +139,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->l3num = l3num; timeout->l4proto = l4proto; - atomic_set(&timeout->refcnt, 1); + refcount_set(&timeout->refcnt, 1); list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; @@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; - event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -172,7 +173,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || nla_put_be32(skb, CTA_TIMEOUT_USE, - htonl(atomic_read(&timeout->refcnt)))) + htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { @@ -339,7 +340,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) /* We want to avoid races with ctnl_timeout_put. So only when the * current refcnt is 1, we decrease it to 0. */ - if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) { + if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); @@ -431,7 +432,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -536,7 +537,7 @@ ctnl_timeout_find_get(struct net *net, const char *name) if (!try_module_get(THIS_MODULE)) goto err; - if (!atomic_inc_not_zero(&timeout->refcnt)) { + if (!refcount_inc_not_zero(&timeout->refcnt)) { module_put(THIS_MODULE); goto err; } @@ -550,7 +551,7 @@ ctnl_timeout_find_get(struct net *net, const char *name) static void ctnl_timeout_put(struct ctnl_timeout *timeout) { - if (atomic_dec_and_test(&timeout->refcnt)) + if (refcount_dec_and_test(&timeout->refcnt)) kfree_rcu(timeout, rcu_head); module_put(THIS_MODULE); @@ -601,7 +602,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_del_rcu(&cur->head); nf_ct_l4proto_put(cur->l4proto); - if (atomic_dec_and_test(&cur->refcnt)) + if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 08247bf7d7b836828c8151ed07f438e05973c2c0..da9704971a83cd2f1f2808bd99061c880d9349f7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -40,6 +40,8 @@ #include #include +#include + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #include "../bridge/br_private.h" @@ -57,7 +59,7 @@ struct nfulnl_instance { struct hlist_node hlist; /* global list of instances */ spinlock_t lock; - atomic_t use; /* use count */ + refcount_t use; /* use count */ unsigned int qlen; /* number of nlmsgs in skb */ struct sk_buff *skb; /* pre-allocatd skb */ @@ -115,7 +117,7 @@ __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num) static inline void instance_get(struct nfulnl_instance *inst) { - atomic_inc(&inst->use); + refcount_inc(&inst->use); } static struct nfulnl_instance * @@ -125,7 +127,7 @@ instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num) rcu_read_lock_bh(); inst = __instance_lookup(log, group_num); - if (inst && !atomic_inc_not_zero(&inst->use)) + if (inst && !refcount_inc_not_zero(&inst->use)) inst = NULL; rcu_read_unlock_bh(); @@ -145,7 +147,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head) static void instance_put(struct nfulnl_instance *inst) { - if (inst && atomic_dec_and_test(&inst->use)) + if (inst && refcount_dec_and_test(&inst->use)) call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); } @@ -180,7 +182,7 @@ instance_create(struct net *net, u_int16_t group_num, INIT_HLIST_NODE(&inst->hlist); spin_lock_init(&inst->lock); /* needs to be two, since we _put() after creation */ - atomic_set(&inst->use, 2); + refcount_set(&inst->use, 2); setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); @@ -409,7 +411,7 @@ __build_packet_message(struct nfnl_log_net *log, const unsigned char *hwhdrp; nlh = nlmsg_put(inst->skb, 0, 0, - NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, + nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), sizeof(struct nfgenmsg), 0); if (!nlh) return -1; @@ -801,7 +803,7 @@ static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", .type = NF_LOG_TYPE_ULOG, - .logfn = &nfulnl_log_packet, + .logfn = nfulnl_log_packet, .me = THIS_MODULE, }; @@ -1031,7 +1033,7 @@ static int seq_show(struct seq_file *s, void *v) inst->group_num, inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, - inst->flushtimeout, atomic_read(&inst->use)); + inst->flushtimeout, refcount_read(&inst->use)); return 0; } @@ -1138,10 +1140,10 @@ static int __init nfnetlink_log_init(void) static void __exit nfnetlink_log_fini(void) { - nf_log_unregister(&nfulnl_logger); nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_log_net_ops); + nf_log_unregister(&nfulnl_logger); } MODULE_DESCRIPTION("netfilter userspace logging"); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 933509ebf3d3e2e84aecd55fd7f19f21f69e46d4..8a0f218b79380bb66176b54a7bcd54e43e909a0a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -447,7 +447,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh = nlmsg_put(skb, 0, 0, - NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), sizeof(struct nfgenmsg), 0); if (!nlh) { skb_tx_error(entskb); @@ -922,16 +922,10 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) -{ - return rcu_access_pointer(entry->hook) == - (struct nf_hook_entry *)entry_ptr; -} - -static void nfqnl_nf_hook_drop(struct net *net, - const struct nf_hook_entry *hook) +static unsigned int nfqnl_nf_hook_drop(struct net *net) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); + unsigned int instances = 0; int i; rcu_read_lock(); @@ -939,10 +933,14 @@ static void nfqnl_nf_hook_drop(struct net *net, struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; - hlist_for_each_entry_rcu(inst, head, hlist) - nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook); + hlist_for_each_entry_rcu(inst, head, hlist) { + nfqnl_flush(inst, NULL, 0); + instances++; + } } rcu_read_unlock(); + + return instances; } static int @@ -1109,7 +1107,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, int err; err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN], - nfqa_vlan_policy); + nfqa_vlan_policy, NULL); if (err < 0) return err; @@ -1213,8 +1211,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { }; static const struct nf_queue_handler nfqh = { - .outfn = &nfqnl_enqueue_packet, - .nf_hook_drop = &nfqnl_nf_hook_drop, + .outfn = nfqnl_enqueue_packet, + .nf_hook_drop = nfqnl_nf_hook_drop, }; static int nfqnl_recv_config(struct net *net, struct sock *ctnl, diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 877d9acd91ef5c616c43d00f265facbfe4c2d334..fff8073e2a5692c14037a77c5d8151cf0c1bbcb0 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -83,17 +83,26 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (d1.len != priv->len) - return -EINVAL; + if (d1.len != priv->len) { + err = -EINVAL; + goto err1; + } err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, tb[NFTA_BITWISE_XOR]); if (err < 0) - return err; - if (d2.len != priv->len) - return -EINVAL; + goto err1; + if (d2.len != priv->len) { + err = -EINVAL; + goto err2; + } return 0; +err2: + nft_data_release(&priv->xor, d2.type); +err1: + nft_data_release(&priv->mask, d1.type); + return err; } static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2b96effeadc1bc708a1f16e89342c8d39c4c4da7..c2945eb3397c8991ae05ea84abb2ba15591cbefb 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -201,10 +201,18 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) if (err < 0) return ERR_PTR(err); + if (desc.type != NFT_DATA_VALUE) { + err = -EINVAL; + goto err1; + } + if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) return &nft_cmp_fast_ops; - else - return &nft_cmp_ops; + + return &nft_cmp_ops; +err1: + nft_data_release(&data, desc.type); + return ERR_PTR(-EINVAL); } struct nft_expr_type nft_cmp_type __read_mostly = { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index c21e7eb8dce02a6b73c5a466300ae793a2787b26..f753ec69f7902b80c9f448f764ecc1e48f6a2680 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -42,7 +42,8 @@ static int nft_compat_chain_validate_dependency(const char *tablename, { const struct nft_base_chain *basechain; - if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) + if (!tablename || + !nft_is_base_chain(chain)) return 0; basechain = nft_base_chain(chain); @@ -165,7 +166,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, par->entryinfo = entry; par->target = target; par->targinfo = info; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -200,7 +201,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) int err; err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, - nft_rule_compat_policy); + nft_rule_compat_policy, NULL); if (err < 0) return err; @@ -230,10 +231,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(target->table, ctx->chain); - if (ret < 0) - goto err; - target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -302,7 +299,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -383,7 +380,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, par->entryinfo = entry; par->match = match; par->matchinfo = info; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -419,10 +416,6 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; - ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); - if (ret < 0) - goto err; - match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -485,7 +478,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -511,7 +504,7 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - event |= NFNL_SUBSYS_NFT_COMPAT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 7f84222133414d4a65ed96508c83dea711295c9f..67a710ebde09da21464da4bb22a78a36c791274d 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -82,7 +82,8 @@ static int nft_counter_do_init(const struct nlattr * const tb[], return 0; } -static int nft_counter_obj_init(const struct nlattr * const tb[], +static int nft_counter_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], struct nft_object *obj) { struct nft_counter_percpu_priv *priv = nft_obj_data(obj); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 0264258c46feb5071a8eebcf9299b4b717fd0a32..1678e9e75e8ee7d22301d6083ddeb04dd39ab385 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -32,6 +32,12 @@ struct nft_ct { }; }; +struct nft_ct_helper_obj { + struct nf_conntrack_helper *helper4; + struct nf_conntrack_helper *helper6; + u8 l4proto; +}; + #ifdef CONFIG_NF_CONNTRACK_ZONES static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template); static unsigned int nft_ct_pcpu_template_refcnt __read_mostly; @@ -66,12 +72,12 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_STATE: - if (ct == NULL) - state = NF_CT_STATE_INVALID_BIT; - else if (nf_ct_is_untracked(ct)) + if (ct) + state = NF_CT_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) state = NF_CT_STATE_UNTRACKED_BIT; else - state = NF_CT_STATE_BIT(ctinfo); + state = NF_CT_STATE_INVALID_BIT; *dest = state; return; default: @@ -258,7 +264,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr, struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_template(ct)) return; switch (priv->key) { @@ -277,6 +283,22 @@ static void nft_ct_set_eval(const struct nft_expr *expr, ®s->data[priv->sreg], NF_CT_LABELS_MAX_SIZE / sizeof(u32)); break; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + case NFT_CT_EVENTMASK: { + struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct); + u32 ctmask = regs->data[priv->sreg]; + + if (e) { + if (e->ctmask != ctmask) + e->ctmask = ctmask; + break; + } + + if (ctmask && !nf_ct_is_confirmed(ct)) + nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC); + break; + } #endif default: break; @@ -532,6 +554,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, nft_ct_pcpu_template_refcnt++; len = sizeof(u16); break; +#endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + case NFT_CT_EVENTMASK: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); + break; #endif default: return -EOPNOTSUPP; @@ -696,7 +725,7 @@ nft_ct_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_ct_type __read_mostly = { .name = "ct", - .select_ops = &nft_ct_select_ops, + .select_ops = nft_ct_select_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, .owner = THIS_MODULE, @@ -712,12 +741,10 @@ static void nft_notrack_eval(const struct nft_expr *expr, ct = nf_ct_get(pkt->skb, &ctinfo); /* Previously seen (loopback or untracked)? Ignore. */ - if (ct) + if (ct || ctinfo == IP_CT_UNTRACKED) return; - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - nf_ct_set(skb, ct, IP_CT_NEW); + nf_ct_set(skb, ct, IP_CT_UNTRACKED); } static struct nft_expr_type nft_notrack_type; @@ -733,6 +760,162 @@ static struct nft_expr_type nft_notrack_type __read_mostly = { .owner = THIS_MODULE, }; +static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conntrack_helper *help4, *help6; + char name[NF_CT_HELPER_NAME_LEN]; + int family = ctx->afi->family; + + if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) + return -EINVAL; + + priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]); + if (!priv->l4proto) + return -ENOENT; + + nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name)); + + if (tb[NFTA_CT_HELPER_L3PROTO]) + family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO])); + + help4 = NULL; + help6 = NULL; + + switch (family) { + case NFPROTO_IPV4: + if (ctx->afi->family == NFPROTO_IPV6) + return -EINVAL; + + help4 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_IPV6: + if (ctx->afi->family == NFPROTO_IPV4) + return -EINVAL; + + help6 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_NETDEV: /* fallthrough */ + case NFPROTO_BRIDGE: /* same */ + case NFPROTO_INET: + help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4, + priv->l4proto); + help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6, + priv->l4proto); + break; + default: + return -EAFNOSUPPORT; + } + + /* && is intentional; only error if INET found neither ipv4 or ipv6 */ + if (!help4 && !help6) + return -ENOENT; + + priv->helper4 = help4; + priv->helper6 = help6; + + return 0; +} + +static void nft_ct_helper_obj_destroy(struct nft_object *obj) +{ + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + + if (priv->helper4) + nf_conntrack_helper_put(priv->helper4); + if (priv->helper6) + nf_conntrack_helper_put(priv->helper6); +} + +static void nft_ct_helper_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb); + struct nf_conntrack_helper *to_assign = NULL; + struct nf_conn_help *help; + + if (!ct || + nf_ct_is_confirmed(ct) || + nf_ct_is_template(ct) || + priv->l4proto != nf_ct_protonum(ct)) + return; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + to_assign = priv->helper4; + break; + case NFPROTO_IPV6: + to_assign = priv->helper6; + break; + default: + WARN_ON_ONCE(1); + return; + } + + if (!to_assign) + return; + + if (test_bit(IPS_HELPER_BIT, &ct->status)) + return; + + help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC); + if (help) { + rcu_assign_pointer(help->helper, to_assign); + set_bit(IPS_HELPER_BIT, &ct->status); + } +} + +static int nft_ct_helper_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + const struct nft_ct_helper_obj *priv = nft_obj_data(obj); + const struct nf_conntrack_helper *helper = priv->helper4; + u16 family; + + if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name)) + return -1; + + if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto)) + return -1; + + if (priv->helper4 && priv->helper6) + family = NFPROTO_INET; + else if (priv->helper6) + family = NFPROTO_IPV6; + else + family = NFPROTO_IPV4; + + if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family))) + return -1; + + return 0; +} + +static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = { + [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING, + .len = NF_CT_HELPER_NAME_LEN - 1 }, + [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 }, + [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 }, +}; + +static struct nft_object_type nft_ct_helper_obj __read_mostly = { + .type = NFT_OBJECT_CT_HELPER, + .size = sizeof(struct nft_ct_helper_obj), + .maxattr = NFTA_CT_HELPER_MAX, + .policy = nft_ct_helper_policy, + .eval = nft_ct_helper_obj_eval, + .init = nft_ct_helper_obj_init, + .destroy = nft_ct_helper_obj_destroy, + .dump = nft_ct_helper_obj_dump, + .owner = THIS_MODULE, +}; + static int __init nft_ct_module_init(void) { int err; @@ -747,7 +930,14 @@ static int __init nft_ct_module_init(void) if (err < 0) goto err1; + err = nft_register_obj(&nft_ct_helper_obj); + if (err < 0) + goto err2; + return 0; + +err2: + nft_unregister_expr(&nft_notrack_type); err1: nft_unregister_expr(&nft_ct_type); return err; @@ -755,6 +945,7 @@ static int __init nft_ct_module_init(void) static void __exit nft_ct_module_exit(void) { + nft_unregister_obj(&nft_ct_helper_obj); nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } @@ -766,3 +957,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS_NFT_EXPR("ct"); MODULE_ALIAS_NFT_EXPR("notrack"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 049ad2d9ee66959367a051903563dca6ba654edb..66221ad891a9f281c1cf8e723c6a8c302e1f3741 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -82,8 +82,7 @@ static void nft_dynset_eval(const struct nft_expr *expr, nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { timeout = priv->timeout ? : set->timeout; *nft_set_ext_expiration(ext) = jiffies + timeout; - } else if (sexpr == NULL) - goto out; + } if (sexpr != NULL) sexpr->ops->eval(sexpr, regs, pkt); @@ -92,7 +91,7 @@ static void nft_dynset_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; return; } -out: + if (!priv->invert) regs->verdict.code = NFT_BREAK; } @@ -133,16 +132,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->invert = true; } - set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], - genmask); - if (IS_ERR(set)) { - if (tb[NFTA_DYNSET_SET_ID]) - set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_DYNSET_SET_ID], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME], + tb[NFTA_DYNSET_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (set->ops->update == NULL) return -EOPNOTSUPP; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index c308920b194cdbe5e3a2e9a09cfb8aab7267f588..1ec49fe5845f1ec8f289f411d456a5765303e244 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -98,14 +98,21 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, goto err; offset = i + priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - memcpy(dest, opt + offset, priv->len); + if (priv->flags & NFT_EXTHDR_F_PRESENT) { + *dest = 1; + } else { + dest[priv->len / NFT_REG32_SIZE] = 0; + memcpy(dest, opt + offset, priv->len); + } return; } err: - regs->verdict.code = NFT_BREAK; + if (priv->flags & NFT_EXTHDR_F_PRESENT) + *dest = 0; + else + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { @@ -225,7 +232,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_exthdr_type __read_mostly = { .name = "exthdr", - .select_ops = &nft_exthdr_select_ops, + .select_ops = nft_exthdr_select_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 29a4906adc277cd3a2cf55242224e4c99fd070e7..21df8cccea6582e56d7bfbb6fba21f821b7c56d9 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -24,7 +24,8 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { EXPORT_SYMBOL(nft_fib_policy); #define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ - NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF | \ + NFTA_FIB_F_PRESENT) int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -112,7 +113,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - return nft_fib_validate(ctx, expr, NULL); + return 0; } EXPORT_SYMBOL_GPL(nft_fib_init); @@ -133,19 +134,22 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) } EXPORT_SYMBOL_GPL(nft_fib_dump); -void nft_fib_store_result(void *reg, enum nft_fib_result r, +void nft_fib_store_result(void *reg, const struct nft_fib *priv, const struct nft_pktinfo *pkt, int index) { struct net_device *dev; u32 *dreg = reg; - switch (r) { + switch (priv->result) { case NFT_FIB_RESULT_OIF: - *dreg = index; + *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; break; case NFT_FIB_RESULT_OIFNAME: dev = dev_get_by_index_rcu(nft_net(pkt), index); - strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + if (priv->flags & NFTA_FIB_F_PRESENT) + *dreg = !!dev; + else + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); break; default: WARN_ON_ONCE(1); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c4dad1254ead01818fb5b2c0474b26f74762fee2..24f2f7567ddb779af44ee5050df36292f413d4d6 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -17,7 +17,7 @@ #include #include -struct nft_hash { +struct nft_jhash { enum nft_registers sreg:8; enum nft_registers dreg:8; u8 len; @@ -27,11 +27,11 @@ struct nft_hash { u32 offset; }; -static void nft_hash_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_jhash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_hash *priv = nft_expr_priv(expr); + struct nft_jhash *priv = nft_expr_priv(expr); const void *data = ®s->data[priv->sreg]; u32 h; @@ -39,6 +39,25 @@ static void nft_hash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } +struct nft_symhash { + enum nft_registers dreg:8; + u32 modulus; + u32 offset; +}; + +static void nft_symhash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + u32 h; + + h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + + regs->data[priv->dreg] = h + priv->offset; +} + static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, @@ -46,13 +65,14 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, + [NFTA_HASH_TYPE] = { .type = NLA_U32 }, }; -static int nft_hash_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_jhash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_expr_priv(expr); + struct nft_jhash *priv = nft_expr_priv(expr); u32 len; int err; @@ -95,10 +115,36 @@ static int nft_hash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_hash_dump(struct sk_buff *skb, - const struct nft_expr *expr) +static int nft_symhash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_symhash *priv = nft_expr_priv(expr); + + if (!tb[NFTA_HASH_DREG] || + !tb[NFTA_HASH_MODULUS]) + return -EINVAL; + + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); + + priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); + if (priv->modulus <= 1) + return -ERANGE; + + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_jhash_dump(struct sk_buff *skb, + const struct nft_expr *expr) { - const struct nft_hash *priv = nft_expr_priv(expr); + const struct nft_jhash *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg)) goto nla_put_failure; @@ -114,6 +160,28 @@ static int nft_hash_dump(struct sk_buff *skb, if (priv->offset != 0) if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_JENKINS))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static int nft_symhash_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_symhash *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) + goto nla_put_failure; + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_TYPE, htonl(NFT_HASH_SYM))) + goto nla_put_failure; return 0; nla_put_failure: @@ -121,17 +189,46 @@ static int nft_hash_dump(struct sk_buff *skb, } static struct nft_expr_type nft_hash_type; -static const struct nft_expr_ops nft_hash_ops = { +static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), - .eval = nft_hash_eval, - .init = nft_hash_init, - .dump = nft_hash_dump, + .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)), + .eval = nft_jhash_eval, + .init = nft_jhash_init, + .dump = nft_jhash_dump, }; +static const struct nft_expr_ops nft_symhash_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), + .eval = nft_symhash_eval, + .init = nft_symhash_init, + .dump = nft_symhash_dump, +}; + +static const struct nft_expr_ops * +nft_hash_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + u32 type; + + if (!tb[NFTA_HASH_TYPE]) + return &nft_jhash_ops; + + type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); + switch (type) { + case NFT_HASH_SYM: + return &nft_symhash_ops; + case NFT_HASH_JENKINS: + return &nft_jhash_ops; + default: + break; + } + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_hash_type __read_mostly = { .name = "hash", - .ops = &nft_hash_ops, + .select_ops = nft_hash_select_ops, .policy = nft_hash_policy, .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 728baf88295aab3d4f0e1272d551672ae5a2fb13..4717d77969271c324087ed7677df636b414e54ad 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -65,7 +65,7 @@ static int nft_immediate_init(const struct nft_ctx *ctx, return 0; err1: - nft_data_uninit(&priv->data, desc.type); + nft_data_release(&priv->data, desc.type); return err; } @@ -73,7 +73,8 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); + + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index c6baf412236d662b0d165fec3b4ff26579c9c6d8..18dd57a526513bd726944fa7ad7a9e41fcfb0251 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -17,9 +17,8 @@ #include #include -static DEFINE_SPINLOCK(limit_lock); - struct nft_limit { + spinlock_t lock; u64 last; u64 tokens; u64 tokens_max; @@ -34,7 +33,7 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) u64 now, tokens; s64 delta; - spin_lock_bh(&limit_lock); + spin_lock_bh(&limit->lock); now = ktime_get_ns(); tokens = limit->tokens + now - limit->last; if (tokens > limit->tokens_max) @@ -44,11 +43,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) delta = tokens - cost; if (delta >= 0) { limit->tokens = delta; - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&limit->lock); return limit->invert; } limit->tokens = tokens; - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&limit->lock); return !limit->invert; } @@ -86,6 +85,7 @@ static int nft_limit_init(struct nft_limit *limit, limit->invert = true; } limit->last = ktime_get_ns(); + spin_lock_init(&limit->lock); return 0; } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index e21aea7e5ec8f141ea3155d1da3c491484c00a73..475570e89ede710b323868792bb16fd5f09d1b09 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -71,16 +71,10 @@ static int nft_lookup_init(const struct nft_ctx *ctx, tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask); - if (IS_ERR(set)) { - if (tb[NFTA_LOOKUP_SET_ID]) { - set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_LOOKUP_SET_ID], - genmask); - } - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET], + tb[NFTA_LOOKUP_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (set->flags & NFT_SET_EVAL) return -EOPNOTSUPP; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 11ce016cd47948f3a2902402e0cbbda5e22d9438..6ac03d4266c9038cb4ffd3de412a216b769d07a1 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -46,10 +46,6 @@ int nft_masq_init(const struct nft_ctx *ctx, struct nft_masq *priv = nft_expr_priv(expr); int err; - err = nft_masq_validate(ctx, expr, NULL); - if (err) - return err; - if (tb[NFTA_MASQ_FLAGS]) { priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); if (priv->flags & ~NF_NAT_RANGE_MASK) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 7b60e01f38ff9f2f9fa7d28f6f99b4f889d190d7..5a60eb23a7ed8cc82cc2500c7f8c116ee6df2f30 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -372,10 +372,6 @@ int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_meta_set_validate(ctx, expr, NULL); - if (err < 0) - return err; - priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) @@ -471,7 +467,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", - .select_ops = &nft_meta_select_ops, + .select_ops = nft_meta_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 439e0bd152a004c98664a19ae6c920458fa6160a..ed548d06b6dda9a98888bb83f2baa6b45c965c15 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -138,10 +138,6 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - err = nft_nat_validate(ctx, expr, NULL); - if (err < 0) - return err; - if (tb[NFTA_NAT_FAMILY] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index a66b36097b8f4f3cc1d230d9943be852866fcfbd..5a3a52c71545ac15ae5c906dbf53d6d425c3c557 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -188,7 +188,7 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) static struct nft_expr_type nft_ng_type __read_mostly = { .name = "numgen", - .select_ops = &nft_ng_select_ops, + .select_ops = nft_ng_select_ops, .policy = nft_ng_policy, .maxattr = NFTA_NG_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 1ae8c49ca4a1fac06f69c41f68a36b7e85593adb..1dd428fbaaa3f836e7bcecb50355f60c14d8faad 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -116,16 +116,10 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, struct nft_set *set; int err; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask); - if (IS_ERR(set)) { - if (tb[NFTA_OBJREF_SET_ID]) { - set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_OBJREF_SET_ID], - genmask); - } - if (IS_ERR(set)) - return PTR_ERR(set); - } + set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME], + tb[NFTA_OBJREF_SET_ID], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index dbb6aaff67ec5c151f8c8c6333721421f8e85076..98613658d4ac5ce359e946efca4c93d9df216817 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -197,7 +197,7 @@ nft_queue_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_queue_type __read_mostly = { .name = "queue", - .select_ops = &nft_queue_select_ops, + .select_ops = nft_queue_select_ops, .policy = nft_queue_policy, .maxattr = NFTA_QUEUE_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 2d6fe3559912674385e7679557fc31ddeb901b38..25e33159be57882fcf9875725079188dfaa7d113 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -99,7 +99,8 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return 0; } -static int nft_quota_obj_init(const struct nlattr * const tb[], +static int nft_quota_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], struct nft_object *obj) { struct nft_quota *priv = nft_obj_data(obj); diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 9edc74eedc1021e836bc767defc02fac1a63333f..cedb96c3619fa991395602dff1363314d3de13ea 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -102,9 +102,9 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr priv->len = desc_from.len; return 0; err2: - nft_data_uninit(&priv->data_to, desc_to.type); + nft_data_release(&priv->data_to, desc_to.type); err1: - nft_data_uninit(&priv->data_from, desc_from.type); + nft_data_release(&priv->data_from, desc_from.type); return err; } diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 40dcd05146d5fb0f346e6e170d16de9813e92804..1e66538bf0ff24e3286ec6312e4d593c6197bd9b 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -47,10 +47,6 @@ int nft_redir_init(const struct nft_ctx *ctx, unsigned int plen; int err; - err = nft_redir_validate(ctx, expr, NULL); - if (err < 0) - return err; - plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { priv->sreg_proto_min = diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index c64de3f7379df551fa413a4af186f3c16886f112..29f5bd2377b0deaf7ede8ec0573bf71cfeef7478 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -42,11 +42,6 @@ int nft_reject_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int err; - - err = nft_reject_validate(ctx, expr, NULL); - if (err < 0) - return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 9e90a02cb104dad81daf84208902e90390a8f504..5a7fb5ff867d382f04633a2fab53997d0ca1f2b2 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -66,11 +66,7 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code, err; - - err = nft_reject_validate(ctx, expr, NULL); - if (err < 0) - return err; + int icmp_code; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 8ebbc2940f4c593d393c65bd5674d90feb585d98..b988162b5b15b9442b496abf2571a9cf7dbc66f3 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -257,6 +257,11 @@ static int nft_bitmap_init(const struct nft_set *set, static void nft_bitmap_destroy(const struct nft_set *set) { + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *be, *n; + + list_for_each_entry_safe(be, n, &priv->list, head) + nft_set_elem_destroy(set, be, true); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 5f652720fc78e6de437c100d31524588e729001c..3d3a6df4ce70ea0950a4f07cab75b3c54680e09a 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -222,7 +222,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem elem; int err; - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + err = rhashtable_walk_init(&priv->ht, &hti, GFP_ATOMIC); iter->err = err; if (err) return; @@ -352,7 +352,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); + nft_set_elem_destroy(arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 78dfbf9588b368107bdc385c7ef208a9abd3d297..fbdbaa00dd5fd751f6ab8f428de987017b5bdf79 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -18,9 +18,8 @@ #include #include -static DEFINE_SPINLOCK(nft_rbtree_lock); - struct nft_rbtree { + rwlock_t lock; struct rb_root root; }; @@ -44,14 +43,14 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { - const struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; const void *this; int d; - spin_lock_bh(&nft_rbtree_lock); + read_lock_bh(&priv->lock); parent = priv->root.rb_node; while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); @@ -75,7 +74,7 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } if (nft_rbtree_interval_end(rbe)) goto out; - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); *ext = &rbe->ext; return true; @@ -85,12 +84,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && !nft_rbtree_interval_end(interval)) { - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); *ext = &interval->ext; return true; } out: - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); return false; } @@ -117,17 +116,17 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, else if (d > 0) p = &parent->rb_right; else { - if (nft_set_elem_active(&rbe->ext, genmask)) { - if (nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(new)) - p = &parent->rb_left; - else if (!nft_rbtree_interval_end(rbe) && - nft_rbtree_interval_end(new)) - p = &parent->rb_right; - else { - *ext = &rbe->ext; - return -EEXIST; - } + if (nft_rbtree_interval_end(rbe) && + !nft_rbtree_interval_end(new)) { + p = &parent->rb_left; + } else if (!nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_end(new)) { + p = &parent->rb_right; + } else if (nft_set_elem_active(&rbe->ext, genmask)) { + *ext = &rbe->ext; + return -EEXIST; + } else { + p = &parent->rb_left; } } } @@ -140,12 +139,13 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) { + struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->priv; int err; - spin_lock_bh(&nft_rbtree_lock); + write_lock_bh(&priv->lock); err = __nft_rbtree_insert(net, set, rbe, ext); - spin_unlock_bh(&nft_rbtree_lock); + write_unlock_bh(&priv->lock); return err; } @@ -157,9 +157,9 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe = elem->priv; - spin_lock_bh(&nft_rbtree_lock); + write_lock_bh(&priv->lock); rb_erase(&rbe->node, &priv->root); - spin_unlock_bh(&nft_rbtree_lock); + write_unlock_bh(&priv->lock); } static void nft_rbtree_activate(const struct net *net, @@ -224,12 +224,12 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { - const struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - spin_lock_bh(&nft_rbtree_lock); + read_lock_bh(&priv->lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); @@ -242,13 +242,13 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, iter->err = iter->fn(ctx, set, iter, &elem); if (iter->err < 0) { - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); return; } cont: iter->count++; } - spin_unlock_bh(&nft_rbtree_lock); + read_unlock_bh(&priv->lock); } static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[]) @@ -262,6 +262,7 @@ static int nft_rbtree_init(const struct nft_set *set, { struct nft_rbtree *priv = nft_set_priv(set); + rwlock_init(&priv->lock); priv->root = RB_ROOT; return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 14857afc9937d30cae604fe839029593c1006944..1770c1d9b37fc14be9d9d8cf4721d21c745826ad 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -283,28 +283,30 @@ static int xt_obj_to_user(u16 __user *psize, u16 size, &U->u.user.revision, K->u.kernel.TYPE->revision) int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size) + int usersize, int size, int aligned_size) { usersize = usersize ? : size; if (copy_to_user(dst, src, usersize)) return -EFAULT; - if (usersize != size && clear_user(dst + usersize, size - usersize)) + if (usersize != aligned_size && + clear_user(dst + usersize, aligned_size - usersize)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(xt_data_to_user); -#define XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ +#define XT_DATA_TO_USER(U, K, TYPE) \ xt_data_to_user(U->data, K->data, \ K->u.kernel.TYPE->usersize, \ - C_SIZE ? : K->u.kernel.TYPE->TYPE##size) + K->u.kernel.TYPE->TYPE##size, \ + XT_ALIGN(K->u.kernel.TYPE->TYPE##size)) int xt_match_to_user(const struct xt_entry_match *m, struct xt_entry_match __user *u) { return XT_OBJ_TO_USER(u, m, match, 0) || - XT_DATA_TO_USER(u, m, match, 0); + XT_DATA_TO_USER(u, m, match); } EXPORT_SYMBOL_GPL(xt_match_to_user); @@ -312,7 +314,7 @@ int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u) { return XT_OBJ_TO_USER(u, t, target, 0) || - XT_DATA_TO_USER(u, t, target, 0); + XT_DATA_TO_USER(u, t, target); } EXPORT_SYMBOL_GPL(xt_target_to_user); @@ -611,6 +613,12 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); +#define COMPAT_XT_DATA_TO_USER(U, K, TYPE, C_SIZE) \ + xt_data_to_user(U->data, K->data, \ + K->u.kernel.TYPE->usersize, \ + C_SIZE, \ + COMPAT_XT_ALIGN(C_SIZE)) + int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size) { @@ -626,7 +634,7 @@ int xt_compat_match_to_user(const struct xt_entry_match *m, if (match->compat_to_user((void __user *)cm->data, m->data)) return -EFAULT; } else { - if (XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) + if (COMPAT_XT_DATA_TO_USER(cm, m, match, msize - sizeof(*cm))) return -EFAULT; } @@ -763,17 +771,8 @@ EXPORT_SYMBOL(xt_check_entry_offsets); */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { - unsigned int *off; - - off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN); + return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); - if (off) - return off; - - if (size < (SIZE_MAX / sizeof(unsigned int))) - off = vmalloc(size * sizeof(unsigned int)); - - return off; } EXPORT_SYMBOL(xt_alloc_entry_offsets); @@ -981,7 +980,7 @@ int xt_compat_target_to_user(const struct xt_entry_target *t, if (target->compat_to_user((void __user *)ct->data, t->data)) return -EFAULT; } else { - if (XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) + if (COMPAT_XT_DATA_TO_USER(ct, t, target, tsize - sizeof(*ct))) return -EFAULT; } @@ -1007,8 +1006,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!info) { - info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | - __GFP_NORETRY | __GFP_HIGHMEM, + info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL); if (!info) return NULL; @@ -1051,8 +1049,10 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, list_for_each_entry(t, &init_net.xt.tables[af], list) { if (strcmp(t->name, name)) continue; - if (!try_module_get(t->me)) + if (!try_module_get(t->me)) { + mutex_unlock(&xt[af].mutex); return NULL; + } mutex_unlock(&xt[af].mutex); if (t->table_init(net) != 0) { @@ -1114,7 +1114,7 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) - i->jumpstack = vzalloc(size); + i->jumpstack = kvzalloc(size, GFP_KERNEL); else i->jumpstack = kzalloc(size, GFP_KERNEL); if (i->jumpstack == NULL) @@ -1136,12 +1136,8 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) */ size = sizeof(void *) * i->stacksize * 2u; for_each_possible_cpu(cpu) { - if (size > PAGE_SIZE) - i->jumpstack[cpu] = vmalloc_node(size, - cpu_to_node(cpu)); - else - i->jumpstack[cpu] = kmalloc_node(size, - GFP_KERNEL, cpu_to_node(cpu)); + i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); if (i->jumpstack[cpu] == NULL) /* * Freeing will be done later on by the callers. The diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 19247a17e5114f1e278e30508085463d7396959e..c502419d63061c7aa5d3205187e7429294b3cc97 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -31,146 +31,76 @@ MODULE_ALIAS("ip6t_AUDIT"); MODULE_ALIAS("ebt_AUDIT"); MODULE_ALIAS("arpt_AUDIT"); -static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb, - unsigned int proto, unsigned int offset) -{ - switch (proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_UDPLITE: { - const __be16 *pptr; - __be16 _ports[2]; - - pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports); - if (pptr == NULL) { - audit_log_format(ab, " truncated=1"); - return; - } - - audit_log_format(ab, " sport=%hu dport=%hu", - ntohs(pptr[0]), ntohs(pptr[1])); - } - break; - - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: { - const u8 *iptr; - u8 _ih[2]; - - iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih); - if (iptr == NULL) { - audit_log_format(ab, " truncated=1"); - return; - } - - audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu", - iptr[0], iptr[1]); - - } - break; - } -} - -static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) +static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) { struct iphdr _iph; const struct iphdr *ih; - ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); - if (!ih) { - audit_log_format(ab, " truncated=1"); - return; - } + ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph); + if (!ih) + return false; - audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu", - &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol); + audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu", + &ih->saddr, &ih->daddr, ih->protocol); - if (ntohs(ih->frag_off) & IP_OFFSET) { - audit_log_format(ab, " frag=1"); - return; - } - - audit_proto(ab, skb, ih->protocol, ih->ihl * 4); + return true; } -static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) +static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) { struct ipv6hdr _ip6h; const struct ipv6hdr *ih; u8 nexthdr; __be16 frag_off; - int offset; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); - if (!ih) { - audit_log_format(ab, " truncated=1"); - return; - } + if (!ih) + return false; nexthdr = ih->nexthdr; - offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), - &nexthdr, &frag_off); + ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off); audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", &ih->saddr, &ih->daddr, nexthdr); - if (offset) - audit_proto(ab, skb, nexthdr, offset); + return true; } static unsigned int audit_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_audit_info *info = par->targinfo; struct audit_buffer *ab; + int fam = -1; if (audit_enabled == 0) goto errout; - ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); if (ab == NULL) goto errout; - audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", - info->type, xt_hooknum(par), skb->len, - xt_in(par) ? xt_inname(par) : "?", - xt_out(par) ? xt_outname(par) : "?"); - - if (skb->mark) - audit_log_format(ab, " mark=%#x", skb->mark); - - if (skb->dev && skb->dev->type == ARPHRD_ETHER) { - audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - - if (xt_family(par) == NFPROTO_BRIDGE) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - audit_ip4(ab, skb); - break; - - case htons(ETH_P_IPV6): - audit_ip6(ab, skb); - break; - } - } - } + audit_log_format(ab, "mark=%#x", skb->mark); switch (xt_family(par)) { + case NFPROTO_BRIDGE: + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; + break; + case htons(ETH_P_IPV6): + fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; + break; + } + break; case NFPROTO_IPV4: - audit_ip4(ab, skb); + fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; break; - case NFPROTO_IPV6: - audit_ip6(ab, skb); + fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; break; } -#ifdef CONFIG_NETWORK_SECMARK - if (skb->secmark) - audit_log_secctx(ab, skb->secmark); -#endif + if (fam == -1) + audit_log_format(ab, " saddr=? daddr=? proto=-1"); audit_log_end(ab); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index b008db0184b8a2a1679d5d4737427a672c36b560..623ef37de886fa22fd508a1261f0c1934f767e5f 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,11 +26,12 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->_nfct != 0) return XT_CONTINUE; - /* special case the untracked ct : we want the percpu object */ - if (!ct) - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - nf_ct_set(skb, ct, IP_CT_NEW); + if (ct) { + atomic_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); + } else { + nf_ct_set(skb, ct, IP_CT_UNTRACKED); + } return XT_CONTINUE; } @@ -95,7 +96,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); if (help == NULL) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -167,8 +168,10 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto err_put_timeout; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); - if (timeout_ext == NULL) + if (!timeout_ext) { ret = -ENOMEM; + goto err_put_timeout; + } rcu_read_unlock(); return ret; @@ -200,6 +203,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { struct nf_conntrack_zone zone; + struct nf_conn_help *help; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -248,7 +252,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (info->timeout[0]) { ret = xt_ct_set_timeout(ct, par, info->timeout); if (ret < 0) - goto err3; + goto err4; } __set_bit(IPS_CONFIRMED_BIT, &ct->status); nf_conntrack_get(&ct->ct_general); @@ -256,6 +260,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, info->ct = ct; return 0; +err4: + help = nfct_help(ct); + if (help) + nf_conntrack_helper_put(help->helper); err3: nf_ct_tmpl_free(ct); err2: @@ -335,10 +343,10 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { help = nfct_help(ct); if (help) - module_put(help->helper->me); + nf_conntrack_helper_put(help->helper); nf_ct_netns_put(par->net, par->family); @@ -412,8 +420,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) if (skb->_nfct != 0) return XT_CONTINUE; - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return XT_CONTINUE; } diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 02afaf48a7290b15e6f2d56b2ca2c5cb07455b4d..60e6dbe124605569f249f0e15c7304b231fe1500 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -84,7 +84,7 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, struct nf_conntrack_tuple *otuple; struct nf_conntrack_tuple *rtuple; - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return -1; otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 9a9884a39c0e9cc806a3a6b0b16bbd387a26d220..57ef175dfbfaa86db8368c10fbbd51ba3b2a691c 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -121,9 +121,6 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - if (nf_ct_is_untracked(ct)) - return false; - if (ct->master) hash = xt_cluster_hash(ct->master, info); else diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 7827128d5a95f5faf699d49c5dac796d03744a51..23372879e6e3004398454f6c96944422db8e1cc7 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -29,7 +29,7 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) bool invert = info->options & XT_CONNLABEL_OP_INVERT; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return invert; labels = nf_ct_labels_find(ct); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 9935d5029b0e52735b45bca017e52dd72f593c1e..ec377cc6a369c71025136fc4b8b3cf3fce177eea 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int32_t newmark; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return XT_CONTINUE; switch (info->mode) { @@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return false; return ((ct->mark & info->mask) == info->mark) ^ info->invert; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index c0fb217bc64969bd0da2c656d4418f8c80b29d14..39cf1d019240e61f504bc8ced96a63c41c9bd839 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -172,12 +172,11 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); - if (ct) { - if (nf_ct_is_untracked(ct)) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - } else + if (ct) + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) + statebit = XT_CONNTRACK_STATE_UNTRACKED; + else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2a6dfe8b74d37f7d3d8ed0569650f6417b76d58a..762e1874f28b7b1faba5b48e94f743c0414689f3 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -119,7 +119,7 @@ static int cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) { if (revision == 1) { - struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; + struct hashlimit_cfg1 *cfg = from; to->mode = cfg->mode; to->avg = cfg->avg; @@ -895,7 +895,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; *pos = ++(*bucket); if (*pos >= htable->cfg.size) { @@ -909,7 +909,7 @@ static void dl_seq_stop(struct seq_file *s, void *v) __releases(htable->lock) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; if (!IS_ERR(bucket)) kfree(bucket); @@ -980,7 +980,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_show_v1(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; struct dsthash_ent *ent; if (!hlist_empty(&htable->hash[*bucket])) { @@ -994,7 +994,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) static int dl_seq_show(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; struct dsthash_ent *ent; if (!hlist_empty(&htable->hash[*bucket])) { diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 0fdc89064488050bf4e667b1c09a26169a326872..42540d26c2b8ec34ab62422fe5894588df2306b6 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -116,7 +116,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) { + if (ct == NULL) { match = false; goto out_put_cp; } diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index dab962df178795612580a1c8e22257213bdab07d..d27b5f1ea619f9696912b58bd5012358206725d7 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -18,6 +18,7 @@ #include struct xt_limit_priv { + spinlock_t lock; unsigned long prev; uint32_t credit; }; @@ -32,8 +33,6 @@ MODULE_ALIAS("ip6t_limit"); * see net/sched/sch_tbf.c in the linux source tree */ -static DEFINE_SPINLOCK(limit_lock); - /* Rusty: This is my (non-mathematically-inclined) understanding of this algorithm. The `average rate' in jiffies becomes your initial amount of credit `credit' and the most credit you can ever have @@ -72,7 +71,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) struct xt_limit_priv *priv = r->master; unsigned long now = jiffies; - spin_lock_bh(&limit_lock); + spin_lock_bh(&priv->lock); priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY; if (priv->credit > r->credit_cap) priv->credit = r->credit_cap; @@ -80,11 +79,11 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (priv->credit >= r->cost) { /* We're not limited. */ priv->credit -= r->cost; - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&priv->lock); return true; } - spin_unlock_bh(&limit_lock); + spin_unlock_bh(&priv->lock); return false; } @@ -126,6 +125,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par) r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } + spin_lock_init(&priv->lock); + return 0; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 1d89a4eaf841a33e95247badfba064ce7dad0f63..3f6c4fa78bdb717c067b57b53a0900529dcc4000 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -388,10 +388,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, } sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; - if (sz <= PAGE_SIZE) - t = kzalloc(sz, GFP_KERNEL); - else - t = vzalloc(sz); + t = kvzalloc(sz, GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; @@ -532,7 +529,7 @@ static int recent_seq_show(struct seq_file *seq, void *v) &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); - seq_printf(seq, "\n"); + seq_putc(seq, '\n'); return 0; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 770bbec878f149f5688584982a552440ff5351fd..e75ef39669c5a9a5b72c9a1cec8b72020600eae1 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -152,7 +152,7 @@ static int socket_mt_enable_defrag(struct net *net, int family) switch (family) { case NFPROTO_IPV4: return nf_defrag_ipv4_enable(net); -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: return nf_defrag_ipv6_enable(net); #endif diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5746a33789a50ced02746d57403970872f2cf29f..5fbd79194d21ee1f26fa669162b8de92d965f8c8 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -28,14 +28,13 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned int statebit; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (!ct) + if (ct) + statebit = XT_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) + statebit = XT_STATE_UNTRACKED; + else statebit = XT_STATE_INVALID; - else { - if (nf_ct_is_untracked(ct)) - statebit = XT_STATE_UNTRACKED; - else - statebit = XT_STATE_BIT(ctinfo); - } + return (sinfo->statemask & statebit); } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 4149d3e6358976f093dbcb25dee79d10f6275231..9aacf2da3d98feaf94319f7315b255323f4416f2 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -101,7 +101,7 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + netlbl_cipsov4_genl_policy, NULL) != 0) return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) @@ -148,7 +148,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + netlbl_cipsov4_genl_policy, NULL) != 0) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); @@ -170,10 +170,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { - if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) - goto add_std_failure; + if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) + goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { case NLBL_CIPSOV4_A_MLSLVLLOC: @@ -236,7 +236,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + netlbl_cipsov4_genl_policy, NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_a, @@ -244,8 +244,9 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy) != 0) + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 596eaff66649e5955d6c0f349f062b6d8360dc2d..7586d446d7dcafc5c44b43190398840b68107d1f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -78,14 +79,6 @@ struct listeners { /* state bits */ #define NETLINK_S_CONGESTED 0x0 -/* flags */ -#define NETLINK_F_KERNEL_SOCKET 0x1 -#define NETLINK_F_RECV_PKTINFO 0x2 -#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 -#define NETLINK_F_RECV_NO_ENOBUFS 0x8 -#define NETLINK_F_LISTEN_ALL_NSID 0x10 -#define NETLINK_F_CAP_ACK 0x20 - static inline int netlink_is_kernel(struct sock *sk) { return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; @@ -1423,7 +1416,8 @@ static void do_one_broadcast(struct sock *sk, goto out; } NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); - NETLINK_CB(p->skb2).nsid_is_set = true; + if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) + NETLINK_CB(p->skb2).nsid_is_set = true; val = netlink_broadcast_deliver(sk, p->skb2); if (val < 0) { netlink_overrun(sk); @@ -1660,6 +1654,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, nlk->flags &= ~NETLINK_F_CAP_ACK; err = 0; break; + case NETLINK_EXT_ACK: + if (val) + nlk->flags |= NETLINK_F_EXT_ACK; + else + nlk->flags &= ~NETLINK_F_EXT_ACK; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1744,6 +1745,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_EXT_ACK: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0; + if (put_user(len, optlen) || put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -2275,21 +2285,44 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, } EXPORT_SYMBOL(__netlink_dump_start); -void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) +void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack) { struct sk_buff *skb; struct nlmsghdr *rep; struct nlmsgerr *errmsg; size_t payload = sizeof(*errmsg); + size_t tlvlen = 0; struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); + unsigned int flags = 0; /* Error messages get the original request appened, unless the user - * requests to cap the error message. + * requests to cap the error message, and get extra error data if + * requested. */ - if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) - payload += nlmsg_len(nlh); + if (err) { + if (!(nlk->flags & NETLINK_F_CAP_ACK)) + payload += nlmsg_len(nlh); + else + flags |= NLM_F_CAPPED; + if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); + } + } else { + flags |= NLM_F_CAPPED; + + if (nlk->flags & NETLINK_F_EXT_ACK && + extack && extack->cookie_len) + tlvlen += nla_total_size(extack->cookie_len); + } + + if (tlvlen) + flags |= NLM_F_ACK_TLVS; - skb = nlmsg_new(payload, GFP_KERNEL); + skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -2305,17 +2338,42 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) } rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - NLMSG_ERROR, payload, 0); + NLMSG_ERROR, payload, flags); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); + + if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (err) { + if (extack->_msg) + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, + extack->_msg)); + if (extack->bad_attr && + !WARN_ON((u8 *)extack->bad_attr < in_skb->data || + (u8 *)extack->bad_attr >= in_skb->data + + in_skb->len)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, + (u8 *)extack->bad_attr - + in_skb->data)); + } else { + if (extack->cookie_len) + WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, + extack->cookie_len, + extack->cookie)); + } + } + + nlmsg_end(skb, rep); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, - struct nlmsghdr *)) + struct nlmsghdr *, + struct netlink_ext_ack *)) { + struct netlink_ext_ack extack = {}; struct nlmsghdr *nlh; int err; @@ -2336,13 +2394,13 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_type < NLMSG_MIN_TYPE) goto ack; - err = cb(skb, nlh); + err = cb(skb, nlh, &extack); if (err == -EINTR) goto skip; ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) - netlink_ack(skb, nlh, err); + netlink_ack(skb, nlh, err, &extack); skip: msglen = NLMSG_ALIGN(nlh->nlmsg_len); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 4fdb3831897775547f77c069a8018c0d2a253c8c..3490f2430532cf753118e14883b52c4af151b18c 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -6,6 +6,15 @@ #include #include +/* flags */ +#define NETLINK_F_KERNEL_SOCKET 0x1 +#define NETLINK_F_RECV_PKTINFO 0x2 +#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 +#define NETLINK_F_CAP_ACK 0x20 +#define NETLINK_F_EXT_ACK 0x40 + #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index a5546249fb1022b52144a40717b8a4268755b972..8faa20b4d4573f3e2b510f03f3d4a85a70352559 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) nlk->groups); } +static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + u32 flags = 0; + + if (nlk->cb_running) + flags |= NDIAG_FLAG_CB_RUNNING; + if (nlk->flags & NETLINK_F_RECV_PKTINFO) + flags |= NDIAG_FLAG_PKTINFO; + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) + flags |= NDIAG_FLAG_BROADCAST_ERROR; + if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) + flags |= NDIAG_FLAG_NO_ENOBUFS; + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + flags |= NDIAG_FLAG_LISTEN_ALL_NSID; + if (nlk->flags & NETLINK_F_CAP_ACK) + flags |= NDIAG_FLAG_CAP_ACK; + + return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags); +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_diag_req *req, u32 portid, u32 seq, u32 flags, int sk_ino) @@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->ndiag_show & NDIAG_SHOW_FLAGS) && + sk_diag_put_flags(sk, skb)) + goto out_nlmsg_trim; + nlmsg_end(skb, nlh); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 92e0981f74040d7029b65863167b459322612024..10f8b4cff40accd99898ea4d0696a2ba5d90080c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -497,7 +497,8 @@ static int genl_lock_done(struct netlink_callback *cb) static int genl_family_rcv_msg(const struct genl_family *family, struct sk_buff *skb, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { const struct genl_ops *ops; struct net *net = sock_net(skb->sk); @@ -573,7 +574,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (attrbuf) { err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - ops->policy); + ops->policy, extack); if (err < 0) goto out; } @@ -584,6 +585,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = attrbuf; + info.extack = extack; genl_info_net_set(&info, net); memset(&info.user_ptr, 0, sizeof(info.user_ptr)); @@ -605,7 +607,8 @@ static int genl_family_rcv_msg(const struct genl_family *family, return err; } -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { const struct genl_family *family; int err; @@ -617,7 +620,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!family->parallel_ops) genl_lock(); - err = genl_family_rcv_msg(family, skb, nlh); + err = genl_family_rcv_msg(family, skb, nlh, extack); if (!family->parallel_ops) genl_unlock(); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 03f3d5c7beb8d173fae25456b278ee9cf3e04c5e..6b0850e63e09cf747a498f913f7eabe11e59c380 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -119,7 +119,8 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) u32 idx; rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - attrbuf, nfc_genl_family.maxattr, nfc_genl_policy); + attrbuf, nfc_genl_family.maxattr, nfc_genl_policy, + NULL); if (rc < 0) return ERR_PTR(rc); @@ -303,6 +304,17 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev) return -EMSGSIZE; } +static int nfc_genl_setup_device_added(struct nfc_dev *dev, struct sk_buff *msg) +{ + if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || + nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || + nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || + nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) + return -1; + return 0; +} + int nfc_genl_device_added(struct nfc_dev *dev) { struct sk_buff *msg; @@ -317,10 +329,7 @@ int nfc_genl_device_added(struct nfc_dev *dev) if (!hdr) goto free_msg; - if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || - nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || - nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || - nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up)) + if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -596,11 +605,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (cb) genl_dump_check_consistent(cb, hdr, &nfc_genl_family); - if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || - nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || - nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || - nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || - nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) + if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -918,7 +923,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) rc = nfc_activate_target(dev, target_idx, protocol); nfc_put_device(dev); - return 0; + return rc; } static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) @@ -1161,7 +1166,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, - nfc_sdp_genl_policy); + nfc_sdp_genl_policy, info->extack); if (rc != 0) { rc = -EINVAL; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c82301ce3fffb6caeb41a9882a53289ec7b63c8d..e4610676299bcdac626db1a30cd4da44ccc62c0b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -44,13 +44,10 @@ #include "conntrack.h" #include "vport.h" -static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key, - const struct nlattr *attr, int len); - struct deferred_action { struct sk_buff *skb; const struct nlattr *actions; + int actions_len; /* Store pkt_key clone when creating deferred action. */ struct sw_flow_key pkt_key; @@ -82,14 +79,31 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; -struct recirc_keys { +struct action_flow_keys { struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; }; static struct action_fifo __percpu *action_fifos; -static struct recirc_keys __percpu *recirc_keys; +static struct action_flow_keys __percpu *flow_keys; static DEFINE_PER_CPU(int, exec_actions_level); +/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' + * space. Return NULL if out of key spaces. + */ +static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) +{ + struct action_flow_keys *keys = this_cpu_ptr(flow_keys); + int level = this_cpu_read(exec_actions_level); + struct sw_flow_key *key = NULL; + + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + key = &keys->key[level - 1]; + *key = *key_; + } + + return key; +} + static void action_fifo_init(struct action_fifo *fifo) { fifo->head = 0; @@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo) /* Return true if fifo is not full */ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, - const struct sw_flow_key *key, - const struct nlattr *attr) + const struct sw_flow_key *key, + const struct nlattr *actions, + const int actions_len) { struct action_fifo *fifo; struct deferred_action *da; @@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, da = action_fifo_put(fifo); if (da) { da->skb = skb; - da->actions = attr; + da->actions = actions; + da->actions_len = actions_len; da->pkt_key = *key; } @@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key) return !(key->mac_proto & SW_FLOW_KEY_INVALID); } +static int clone_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + u32 recirc_id, + const struct nlattr *actions, int len, + bool last, bool clone_flow_key); + static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { @@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +/* When 'last' is true, sample() should always consume the 'skb'. + * Otherwise, sample() should keep 'skb' intact regardless what + * actions are executed within sample(). + */ static int sample(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, - const struct nlattr *actions, int actions_len) + bool last) { - const struct nlattr *acts_list = NULL; - const struct nlattr *a; - int rem; - u32 cutlen = 0; - - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { - u32 probability; - - switch (nla_type(a)) { - case OVS_SAMPLE_ATTR_PROBABILITY: - probability = nla_get_u32(a); - if (!probability || prandom_u32() > probability) - return 0; - break; - - case OVS_SAMPLE_ATTR_ACTIONS: - acts_list = a; - break; - } - } - - rem = nla_len(acts_list); - a = nla_data(acts_list); - - /* Actions list is empty, do nothing */ - if (unlikely(!rem)) + struct nlattr *actions; + struct nlattr *sample_arg; + int rem = nla_len(attr); + const struct sample_arg *arg; + bool clone_flow_key; + + /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); + + if ((arg->probability != U32_MAX) && + (!arg->probability || prandom_u32() > arg->probability)) { + if (last) + consume_skb(skb); return 0; - - /* The only known usage of sample action is having a single user-space - * action, or having a truncate action followed by a single user-space - * action. Treat this usage as a special case. - * The output_userspace() should clone the skb to be sent to the - * user space. This skb will be consumed by its caller. - */ - if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) { - struct ovs_action_trunc *trunc = nla_data(a); - - if (skb->len > trunc->max_len) - cutlen = skb->len - trunc->max_len; - - a = nla_next(a, &rem); } - if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && - nla_is_last(a, rem))) - return output_userspace(dp, skb, key, a, actions, - actions_len, cutlen); - - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - /* Skip the sample action when out of memory. */ - return 0; - - if (!add_deferred_actions(skb, key, a)) { - if (net_ratelimit()) - pr_warn("%s: deferred actions limit reached, dropping sample action\n", - ovs_dp_name(dp)); - - kfree_skb(skb); - } - return 0; + clone_flow_key = !arg->exec; + return clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); } static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, @@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb, static int execute_recirc(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *a, int rem) + const struct nlattr *a, bool last) { - struct deferred_action *da; - int level; + u32 recirc_id; if (!is_flow_key_valid(key)) { int err; @@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, } BUG_ON(!is_flow_key_valid(key)); - if (!nla_is_last(a, rem)) { - /* Recirc action is the not the last action - * of the action list, need to clone the skb. - */ - skb = skb_clone(skb, GFP_ATOMIC); - - /* Skip the recirc action when out of memory, but - * continue on with the rest of the action list. - */ - if (!skb) - return 0; - } - - level = this_cpu_read(exec_actions_level); - if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { - struct recirc_keys *rks = this_cpu_ptr(recirc_keys); - struct sw_flow_key *recirc_key = &rks->key[level - 1]; - - *recirc_key = *key; - recirc_key->recirc_id = nla_get_u32(a); - ovs_dp_process_packet(skb, recirc_key); - - return 0; - } - - da = add_deferred_actions(skb, key, NULL); - if (da) { - da->pkt_key.recirc_id = nla_get_u32(a); - } else { - kfree_skb(skb); - - if (net_ratelimit()) - pr_warn("%s: deferred action limit reached, drop recirc action\n", - ovs_dp_name(dp)); - } - - return 0; + recirc_id = nla_get_u32(a); + return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true); } /* Execute a list of actions against 'skb'. */ @@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb, key); break; - case OVS_ACTION_ATTR_RECIRC: - err = execute_recirc(dp, skb, key, a, rem); - if (nla_is_last(a, rem)) { + case OVS_ACTION_ATTR_RECIRC: { + bool last = nla_is_last(a, rem); + + err = execute_recirc(dp, skb, key, a, last); + if (last) { /* If this is the last action, the skb has * been consumed or freed. * Return immediately. @@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return err; } break; + } case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, key, nla_data(a)); @@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_masked_set_action(skb, key, nla_data(a)); break; - case OVS_ACTION_ATTR_SAMPLE: - err = sample(dp, skb, key, a, attr, len); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = sample(dp, skb, key, a, last); + if (last) + return err; + break; + } case OVS_ACTION_ATTR_CT: if (!is_flow_key_valid(key)) { @@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return 0; } +/* Execute the actions on the clone of the packet. The effect of the + * execution does not affect the original 'skb' nor the original 'key'. + * + * The execution may be deferred in case the actions can not be executed + * immediately. + */ +static int clone_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 recirc_id, + const struct nlattr *actions, int len, + bool last, bool clone_flow_key) +{ + struct deferred_action *da; + struct sw_flow_key *clone; + + skb = last ? skb : skb_clone(skb, GFP_ATOMIC); + if (!skb) { + /* Out of memory, skip this action. + */ + return 0; + } + + /* When clone_flow_key is false, the 'key' will not be change + * by the actions, then the 'key' can be used directly. + * Otherwise, try to clone key from the next recursion level of + * 'flow_keys'. If clone is successful, execute the actions + * without deferring. + */ + clone = clone_flow_key ? clone_key(key) : key; + if (clone) { + int err = 0; + + if (actions) { /* Sample action */ + if (clone_flow_key) + __this_cpu_inc(exec_actions_level); + + err = do_execute_actions(dp, skb, clone, + actions, len); + + if (clone_flow_key) + __this_cpu_dec(exec_actions_level); + } else { /* Recirc action */ + clone->recirc_id = recirc_id; + ovs_dp_process_packet(skb, clone); + } + return err; + } + + /* Out of 'flow_keys' space. Defer actions */ + da = add_deferred_actions(skb, key, actions, len); + if (da) { + if (!actions) { /* Recirc action */ + key = &da->pkt_key; + key->recirc_id = recirc_id; + } + } else { + /* Out of per CPU action FIFO space. Drop the 'skb' and + * log an error. + */ + kfree_skb(skb); + + if (net_ratelimit()) { + if (actions) { /* Sample action */ + pr_warn("%s: deferred action limit reached, drop sample action\n", + ovs_dp_name(dp)); + } else { /* Recirc action */ + pr_warn("%s: deferred action limit reached, drop recirc action\n", + ovs_dp_name(dp)); + } + } + } + return 0; +} + static void process_deferred_actions(struct datapath *dp) { struct action_fifo *fifo = this_cpu_ptr(action_fifos); @@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp) struct sk_buff *skb = da->skb; struct sw_flow_key *key = &da->pkt_key; const struct nlattr *actions = da->actions; + int actions_len = da->actions_len; if (actions) - do_execute_actions(dp, skb, key, actions, - nla_len(actions)); + do_execute_actions(dp, skb, key, actions, actions_len); else ovs_dp_process_packet(skb, key); } while (!action_fifo_is_empty(fifo)); @@ -1323,8 +1354,8 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; - recirc_keys = alloc_percpu(struct recirc_keys); - if (!recirc_keys) { + flow_keys = alloc_percpu(struct action_flow_keys); + if (!flow_keys) { free_percpu(action_fifos); return -ENOMEM; } @@ -1335,5 +1366,5 @@ int action_fifos_init(void) void action_fifos_exit(void) { free_percpu(action_fifos); - free_percpu(recirc_keys); + free_percpu(flow_keys); } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 7b2c2fce408a02d4251f03a2e3f0b4d9e7fccb80..08679ebb3068298a58a081926c6a7dd5a2a73d17 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -66,7 +66,9 @@ struct ovs_conntrack_info { u8 commit : 1; u8 nat : 3; /* enum ovs_ct_nat */ u8 force : 1; + u8 have_eventmask : 1; u16 family; + u32 eventmask; /* Mask of 1 << IPCT_*. */ struct md_mark mark; struct md_labels labels; #ifdef CONFIG_NF_NAT_NEEDED @@ -373,7 +375,7 @@ static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, } /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the - * IPCT_LABEL bit it set in the event cache. + * IPCT_LABEL bit is set in the event cache. */ nf_conntrack_event_cache(IPCT_LABEL, ct); @@ -514,10 +516,38 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, u16 proto, const struct sk_buff *skb) { struct nf_conntrack_tuple tuple; + struct nf_conntrack_expect *exp; if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) return NULL; - return __nf_ct_expect_find(net, zone, &tuple); + + exp = __nf_ct_expect_find(net, zone, &tuple); + if (exp) { + struct nf_conntrack_tuple_hash *h; + + /* Delete existing conntrack entry, if it clashes with the + * expectation. This can happen since conntrack ALGs do not + * check for clashes between (new) expectations and existing + * conntrack entries. nf_conntrack_in() will check the + * expectations only if a conntrack entry can not be found, + * which can lead to OVS finding the expectation (here) in the + * init direction, but which will not be removed by the + * nf_conntrack_in() call, if a matching conntrack entry is + * found instead. In this case all init direction packets + * would be reported as new related packets, while reply + * direction packets would be reported as un-related + * established packets. + */ + h = nf_conntrack_find_get(net, zone, &tuple); + if (h) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + nf_ct_delete(ct, 0, 0); + nf_conntrack_put(&ct->ct_general); + } + } + + return exp; } /* This replicates logic from nf_conntrack_core.c that is not exported. */ @@ -795,11 +825,6 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, enum nf_nat_manip_type maniptype; int err; - if (nf_ct_is_untracked(ct)) { - /* A NAT action may only be performed on tracked packets. */ - return NF_ACCEPT; - } - /* Add NAT extension if not confirmed yet. */ if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) return NF_ACCEPT; /* Can't NAT. */ @@ -1007,6 +1032,20 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, if (!ct) return 0; + /* Set the conntrack event mask if given. NEW and DELETE events have + * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener + * typically would receive many kinds of updates. Setting the event + * mask allows those events to be filtered. The set event mask will + * remain in effect for the lifetime of the connection unless changed + * by a further CT action with both the commit flag and the eventmask + * option. */ + if (info->have_eventmask) { + struct nf_conntrack_ecache *cache = nf_ct_ecache_find(ct); + + if (cache) + cache->ctmask = info->eventmask; + } + /* Apply changes before confirming the connection so that the initial * conntrack NEW netlink event carries the values given in the CT * action. @@ -1084,7 +1123,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); if (!help) { - module_put(helper->me); + nf_conntrack_helper_put(helper); return -ENOMEM; } @@ -1238,6 +1277,8 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { /* NAT length is checked when parsing the nested attributes. */ [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, #endif + [OVS_CT_ATTR_EVENTMASK] = { .minlen = sizeof(u32), + .maxlen = sizeof(u32) }, }; static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, @@ -1316,6 +1357,11 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, break; } #endif + case OVS_CT_ATTR_EVENTMASK: + info->have_eventmask = true; + info->eventmask = nla_get_u32(a); + break; + default: OVS_NLERR(log, "Unknown conntrack attr (%d)", type); @@ -1515,6 +1561,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, ct_info->helper->name)) return -EMSGSIZE; } + if (ct_info->have_eventmask && + nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask)) + return -EMSGSIZE; + #ifdef CONFIG_NF_NAT_NEEDED if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) return -EMSGSIZE; @@ -1534,7 +1584,7 @@ void ovs_ct_free_action(const struct nlattr *a) static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { if (ct_info->helper) - module_put(ct_info->helper->me); + nf_conntrack_helper_put(ct_info->helper); if (ct_info->ct) nf_ct_tmpl_free(ct_info->ct); } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9c62b6325f7adc92420b605cf4cdc3b26da437fc..7b17da9a94a0c15ab8ec28205e025d9554c798fb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1353,7 +1353,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int err; err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, - OVS_FLOW_ATTR_MAX, flow_policy); + OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c6e9377436df1e93081c825c142b712a811277b..da931bdef8a7b5f25c189a5fe76e5fc2c4c1efdf 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -34,8 +34,6 @@ #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 -#define SAMPLE_ACTION_DEPTH 3 - /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 1105a838bab83f9fe647423060d08e4a415bab61..7e1d8a2afa63687aeb87121dbae614bad2ec34a3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -59,6 +59,39 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +static bool actions_may_change_flow(const struct nlattr *actions) +{ + struct nlattr *nla; + int rem; + + nla_for_each_nested(nla, actions, rem) { + u16 action = nla_type(nla); + + switch (action) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_RECIRC: + case OVS_ACTION_ATTR_TRUNC: + case OVS_ACTION_ATTR_USERSPACE: + break; + + case OVS_ACTION_ATTR_CT: + case OVS_ACTION_ATTR_HASH: + case OVS_ACTION_ATTR_POP_ETH: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_ETH: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SET_MASKED: + default: + return true; + } + } + return false; +} + static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { @@ -2023,18 +2056,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci, bool log) + __be16 eth_type, __be16 vlan_tci, + bool log, bool last) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem, start, err, st_acts; + int rem, start, err; + struct sample_arg arg; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -2058,20 +2093,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; - err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32), log); + + /* When both skb and flow may be changed, put the sample + * into a deferred fifo. On the other hand, if only skb + * may be modified, the actions can be executed in place. + * + * Do this analysis at the flow installation time. + * Set 'clone_action->exec' to true if the actions can be + * executed without being deferred. + * + * If the sample is the last action, it can always be excuted + * rather than deferred. + */ + arg.exec = last || !actions_may_change_flow(actions); + arg.probability = nla_get_u32(probability); + + err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), + log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); - if (st_acts < 0) - return st_acts; - err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, log); + if (err) return err; - add_nested_action_end(*sfa, st_acts); add_nested_action_end(*sfa, start); return 0; @@ -2380,8 +2427,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, - attr, userspace_policy); + error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, + userspace_policy, NULL); if (error) return error; @@ -2408,16 +2455,13 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -2555,13 +2599,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; break; - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(net, a, key, depth, sfa, - eth_type, vlan_tci, log); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = validate_and_copy_sample(net, a, key, sfa, + eth_type, vlan_tci, + log, last); if (err) return err; skip_copy = true; break; + } case OVS_ACTION_ATTR_CT: err = ovs_ct_copy_action(net, a, key, sfa, log); @@ -2615,7 +2663,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return PTR_ERR(*sfa); (*sfa)->orig_len = nla_len(attr); - err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, + err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); @@ -2623,39 +2671,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; } -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +static int sample_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) { - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; + struct nlattr *start, *ac_start = NULL, *sample_arg; + int err = 0, rem = nla_len(attr); + const struct sample_arg *arg; + struct nlattr *actions; start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, - sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { + err = -EMSGSIZE; + goto out; + } + + ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(actions, rem, skb); + +out: + if (err) { + nla_nest_cancel(skb, ac_start); + nla_nest_cancel(skb, start); + } else { + nla_nest_end(skb, ac_start); + nla_nest_end(skb, start); } - nla_nest_end(skb, start); return err; } diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 89193a634da45bb78498ecd28ca39c455e32f2e4..04a3128adcf0adcc2603e45c9054cdd2fd0e7a0b 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -94,7 +94,6 @@ static void internal_dev_destructor(struct net_device *dev) struct vport *vport = ovs_internal_dev_get_vport(dev); ovs_vport_free(vport); - free_netdev(dev); } static void @@ -156,7 +155,8 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_PHONY_HEADROOM | IFF_NO_QUEUE; - netdev->destructor = internal_dev_destructor; + netdev->needs_free_netdev = true; + netdev->priv_destructor = internal_dev_destructor; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 7eb955e453e6d657d13d0aa7b35ce7c8b7de2f15..869acb3b3d3f25a75ee4912359ee0fea31fa7c2d 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -70,7 +70,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, if (nla_len(attr) < sizeof(struct nlattr)) return -EINVAL; - err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy, + NULL); if (err < 0) return err; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8489beff5c25c971067f38833ed4a790a98dd86e..e3eeed19cc7a130e80e24a3d67776a5b5a3c2698 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1496,6 +1496,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, DEFINE_MUTEX(fanout_mutex); EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); +static u16 fanout_next_id; static void __fanout_link(struct sock *sk, struct packet_sock *po) { @@ -1629,6 +1630,36 @@ static void fanout_release_data(struct packet_fanout *f) }; } +static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) +{ + struct packet_fanout *f; + + list_for_each_entry(f, &fanout_list, list) { + if (f->id == candidate_id && + read_pnet(&f->net) == sock_net(sk)) { + return false; + } + } + return true; +} + +static bool fanout_find_new_id(struct sock *sk, u16 *new_id) +{ + u16 id = fanout_next_id; + + do { + if (__fanout_id_is_free(sk, id)) { + *new_id = id; + fanout_next_id = id + 1; + return true; + } + + id++; + } while (id != fanout_next_id); + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_rollover *rollover = NULL; @@ -1676,6 +1707,19 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) po->rollover = rollover; } + if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { + if (id != 0) { + err = -EINVAL; + goto out; + } + if (!fanout_find_new_id(sk, &id)) { + err = -ENOMEM; + goto out; + } + /* ephemeral flag for the first socket in the group: drop it */ + flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8); + } + match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -2614,13 +2658,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } - sockc.tsflags = po->sk.sk_tsflags; - if (msg->msg_controllen) { - err = sock_cmsg_send(&po->sk, msg, &sockc); - if (unlikely(err)) - goto out; - } - err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2628,6 +2665,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + sockc.tsflags = po->sk.sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out_put; + } + if (po->sk.sk_socket->type == SOCK_RAW) reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size @@ -3836,6 +3880,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_HDRLEN: if (len > sizeof(int)) len = sizeof(int); + if (len < sizeof(int)) + return -EINVAL; if (copy_from_user(&val, optval, len)) return -EFAULT; switch (val) { diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 21c28b51be9439b20369b48077ac8392db7c3150..2c9337946e3038f130d198bb51d061a648ef1ed4 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -236,7 +236,7 @@ static void gprs_setup(struct net_device *dev) dev->tx_queue_len = 10; dev->netdev_ops = &gprs_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; } /* diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index bc5ee5fbe6ae3845b6ec67eaea2beaec2d12ba08..45b3af3080d8349f9e9c6e7f424f0548a79a42b0 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -61,7 +61,8 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U8 }, }; -static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -78,7 +79,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy, + extack); if (err < 0) return err; @@ -226,7 +228,8 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { [RTA_OIF] = { .type = NLA_U32 }, }; -static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; @@ -243,7 +246,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy); + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy, + extack); if (err < 0) return err; diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig index b83c6807a5ae5cedc63b073c7f928b1776f42524..326fd97444f5bed5c82af7d632d8a4424f9908d3 100644 --- a/net/qrtr/Kconfig +++ b/net/qrtr/Kconfig @@ -16,7 +16,7 @@ if QRTR config QRTR_SMD tristate "SMD IPC Router channels" - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG || (COMPILE_TEST && RPMSG=n) ---help--- Say Y here to support SMD based ipcrouter channels. SMD is the most common transport for IPC Router. diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index ae5ac175b2bef96ffa614bc799db5cd90a7bdc08..a9a8c7d5a4a983b9be12fe85a7f71f3e6a825f19 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -658,7 +658,9 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } if (plen != len) { - skb_pad(skb, plen - len); + rc = skb_pad(skb, plen - len); + if (rc) + goto out_node; skb_put(skb, plen - len); } @@ -943,7 +945,8 @@ static const struct nla_policy qrtr_policy[IFA_MAX + 1] = { [IFA_LOCAL] = { .type = NLA_U32 }, }; -static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) +static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct nlattr *tb[IFA_MAX + 1]; struct ifaddrmsg *ifm; @@ -957,7 +960,7 @@ static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) ASSERT_RTNL(); - rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy); + rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, extack); if (rc < 0) return rc; diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c index 0d11132b3370a4024be644dede74a77d188a4f64..50615d5efac1529a0fd617c2692b1e9419da7137 100644 --- a/net/qrtr/smd.c +++ b/net/qrtr/smd.c @@ -14,21 +14,21 @@ #include #include -#include +#include #include "qrtr.h" struct qrtr_smd_dev { struct qrtr_endpoint ep; - struct qcom_smd_channel *channel; + struct rpmsg_endpoint *channel; struct device *dev; }; /* from smd to qrtr */ -static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel, - const void *data, size_t len) +static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev, + void *data, int len, void *priv, u32 addr) { - struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel); + struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev); int rc; if (!qdev) @@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) if (rc) goto out; - rc = qcom_smd_send(qdev->channel, skb->data, skb->len); + rc = rpmsg_send(qdev->channel, skb->data, skb->len); out: if (rc) @@ -64,57 +64,55 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) return rc; } -static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) +static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev) { struct qrtr_smd_dev *qdev; int rc; - qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); + qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL); if (!qdev) return -ENOMEM; - qdev->channel = sdev->channel; - qdev->dev = &sdev->dev; + qdev->channel = rpdev->ept; + qdev->dev = &rpdev->dev; qdev->ep.xmit = qcom_smd_qrtr_send; rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); if (rc) return rc; - qcom_smd_set_drvdata(sdev->channel, qdev); - dev_set_drvdata(&sdev->dev, qdev); + dev_set_drvdata(&rpdev->dev, qdev); - dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); + dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n"); return 0; } -static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) +static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev) { - struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); + struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev); qrtr_endpoint_unregister(&qdev->ep); - dev_set_drvdata(&sdev->dev, NULL); + dev_set_drvdata(&rpdev->dev, NULL); } -static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { +static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = { { "IPCRTR" }, {} }; -static struct qcom_smd_driver qcom_smd_qrtr_driver = { +static struct rpmsg_driver qcom_smd_qrtr_driver = { .probe = qcom_smd_qrtr_probe, .remove = qcom_smd_qrtr_remove, .callback = qcom_smd_qrtr_callback, - .smd_match_table = qcom_smd_qrtr_smd_match, - .driver = { + .id_table = qcom_smd_qrtr_smd_match, + .drv = { .name = "qcom_smd_qrtr", - .owner = THIS_MODULE, }, }; -module_qcom_smd_driver(qcom_smd_qrtr_driver); +module_rpmsg_driver(qcom_smd_qrtr_driver); MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); MODULE_LICENSE("GPL v2"); diff --git a/net/rds/connection.c b/net/rds/connection.c index 1fa75ab7b733230585666abcb7279ba691365256..6a5ebdea7d2e9eb3b624a01a8a87ef601e3b2f13 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, + RDS_CONN_DOWN) && + !rds_conn_path_transition(cp, RDS_CONN_ERROR, RDS_CONN_DOWN)) { /* This can happen - eg when we're in the middle of tearing * down the connection, and someone unloads the rds module. - * Quite reproduceable with loopback connections. + * Quite reproducible with loopback connections. * Mostly harmless. + * + * Note that this also happens with rds-tcp because + * we could have triggered rds_conn_path_drop in irq + * mode from rds_tcp_state change on the receipt of + * a FIN, thus we need to recheck for RDS_CONN_ERROR + * here. */ rds_conn_path_error(cp, "%s: failed to transition " "to state DOWN, current state " diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 1c38d2c7caa8e955585b45f0c9218a0775013b4d..80fb6f63e768d3461c47533615c875526bb8bab9 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -702,9 +702,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, event->param.conn.initiator_depth); /* rdma_accept() calls rdma_reject() internally if it fails */ - err = rdma_accept(cm_id, &conn_param); - if (err) - rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); + if (rdma_accept(cm_id, &conn_param)) + rds_ib_conn_error(conn, "rdma_accept failed\n"); out: if (conn) diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 4fe8f4fec4eee66c826b5beb5b02ae61d19b483a..86ef907067bb084e01ac4f8d5f00d0c17f40ac55 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -78,17 +78,15 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) return ibmr; out_no_cigar: - if (ibmr) { - if (fmr->fmr) - ib_dealloc_fmr(fmr->fmr); - kfree(ibmr); - } + kfree(ibmr); atomic_dec(&pool->item_count); + return ERR_PTR(err); } -int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, - struct scatterlist *sg, unsigned int nents) +static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, + struct rds_ib_mr *ibmr, struct scatterlist *sg, + unsigned int nents) { struct ib_device *dev = rds_ibdev->dev; struct rds_ib_fmr *fmr = &ibmr->u.fmr; @@ -114,29 +112,39 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); if (dma_addr & ~PAGE_MASK) { - if (i > 0) + if (i > 0) { + ib_dma_unmap_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); return -EINVAL; - else + } else { ++page_cnt; + } } if ((dma_addr + dma_len) & ~PAGE_MASK) { - if (i < sg_dma_len - 1) + if (i < sg_dma_len - 1) { + ib_dma_unmap_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); return -EINVAL; - else + } else { ++page_cnt; + } } len += dma_len; } page_cnt += len >> PAGE_SHIFT; - if (page_cnt > ibmr->pool->fmr_attr.max_pages) + if (page_cnt > ibmr->pool->fmr_attr.max_pages) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -EINVAL; + } dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, rdsibdev_to_node(rds_ibdev)); - if (!dma_pages) + if (!dma_pages) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -ENOMEM; + } page_cnt = 0; for (i = 0; i < sg_dma_len; ++i) { @@ -149,8 +157,10 @@ int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, } ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr); - if (ret) + if (ret) { + ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); goto out; + } /* Success - we successfully remapped the MR, so we can * safely tear down the old mapping. diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 5d6e98a79a5e4b3de1f472c5fc513fce545bf6f9..0ea4ab017a8cc3f807931e1194cddb5048a82956 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h @@ -125,8 +125,6 @@ void rds_ib_mr_exit(void); void __rds_ib_teardown_mr(struct rds_ib_mr *); void rds_ib_teardown_mr(struct rds_ib_mr *); struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int); -int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *, - struct scatterlist *, unsigned int); struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *); int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **); struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *, diff --git a/net/rds/recv.c b/net/rds/recv.c index 8b7e7b7f2c2dbebee31dd7626540b797f1fa353f..c70c32cb05f5ff3a0f91dd0627b9525413c37318 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -594,7 +594,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, goto out; while (1) { - struct iov_iter save; /* If there are pending notifications, do those - and nothing else */ if (!list_empty(&rs->rs_notify_queue)) { ret = rds_notify_queue_get(rs, msg); @@ -630,7 +629,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, rdsdebug("copying inc %p from %pI4:%u to user\n", inc, &inc->i_conn->c_faddr, ntohs(inc->i_hdr.h_sport)); - save = msg->msg_iter; ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); if (ret < 0) break; @@ -644,7 +642,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, rds_inc_put(inc); inc = NULL; rds_stats_inc(s_recv_deliver_raced); - msg->msg_iter = save; + iov_iter_revert(&msg->msg_iter, ret); continue; } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 22569007677357ba40347ee46e6584c683de2597..431404dbdad1cebe5d80cdb25de8b60db75fa87b 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -84,13 +84,10 @@ static struct ctl_table rds_tcp_sysctl_table[] = { /* doing it this way avoids calling tcp_sk() */ void rds_tcp_nonagle(struct socket *sock) { - mm_segment_t oldfs = get_fs(); int val = 1; - set_fs(KERNEL_DS); - sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, (char __user *)&val, + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, sizeof(val)); - set_fs(oldfs); } u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index dcf4742083eab360ded7fcaddad8608b2526aa29..52d11d7725c839820f76410c9837b762f2b2205e 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -40,13 +40,7 @@ static void rds_tcp_cork(struct socket *sock, int val) { - mm_segment_t oldfs; - - oldfs = get_fs(); - set_fs(KERNEL_DS); - sock->ops->setsockopt(sock, SOL_TCP, TCP_CORK, (char __user *)&val, - sizeof(val)); - set_fs(oldfs); + kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val)); } void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) diff --git a/net/rds/threads.c b/net/rds/threads.c index e36e333a0aa0d7430c852ce419ac7ed85094bbec..3e447d056d092a405311265a06a8596b2ce8dc87 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work) struct rds_connection *conn = cp->cp_conn; int ret; - if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) + if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 26a7b1db1361e554733b0ff40a54d8e68e59af09..7486926e60a88a56a364b727800f3cd1a671d808 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -739,6 +739,25 @@ static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, return ret; } +/* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + /* * conn_client.c */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0ed181f53f32a0145c03b0006b92de5c7a0101aa..1752fcf8e8f1dd85866004a2ee38c8bbaa040138 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -413,11 +413,11 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, ECONNABORTED); + conn->remote_abort, -ECONNABORTED); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, ECONNABORTED); + conn->local_abort, -ECONNABORTED); break; default: BUG(); @@ -600,7 +600,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED); abort = true; /* fall through */ case RXRPC_CALL_COMPLETE: diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 97a17ada4431d58b7a0f9c07be3b13b0230a6390..7a77844aab16be5f32a16b7edd3a7f440006c221 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -386,7 +386,7 @@ void rxrpc_process_call(struct work_struct *work) now = ktime_get_real(); if (ktime_before(call->expire_at, now)) { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d79cd36987a95b86f2af9fac4688ab86e20f41d5..47f7f4205653aa0643c77f7381688df79b51cbbe 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -486,7 +486,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_put_call(call, rxrpc_call_put); } @@ -494,7 +494,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_got); - rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c3be03e8d098213e4956bb644827865206470e19..e8dea0d49e7fedf2951616c1aed8e93ec693a1dd 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -550,6 +550,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, call->cid = conn->proto.cid | channel; call->call_id = call_id; + trace_rxrpc_connect_call(call); _net("CONNECT call %08x:%08x as call %d on conn %d", call->cid, call->call_id, call->debug_id, conn->debug_id); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b099b64366f356c27dea0a4dd215cc1034e61b55..46babcf82ce8648190e018ee69ad16701e2e969a 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -168,7 +168,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, * generate a connection-level abort */ static int rxrpc_abort_connection(struct rxrpc_connection *conn, - u32 error, u32 abort_code) + int error, u32 abort_code) { struct rxrpc_wire_header whdr; struct msghdr msg; @@ -281,14 +281,17 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) < 0) + &wtmp, sizeof(wtmp)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_abort")); return -EPROTO; + } abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED); + abort_code, -ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -327,7 +330,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; default: - _leave(" = -EPROTO [%u]", sp->hdr.type); + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_conn_pkt")); return -EPROTO; } } @@ -370,7 +374,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) abort: _debug("abort %d, %d", ret, abort_code); - rxrpc_abort_connection(conn, -ret, abort_code); + rxrpc_abort_connection(conn, ret, abort_code); _leave(" [aborted]"); } @@ -419,9 +423,8 @@ void rxrpc_process_connection(struct work_struct *work) goto out; protocol_error: - if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) + if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - _leave(" [EPROTO]"); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 18b2ad8be8e2b57dd57ef846287add68b027b08e..45dba732a3b4743ba37c46d05b02125f16eac23d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -30,7 +30,7 @@ static void rxrpc_proto_abort(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) { set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -665,6 +665,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, + ntohl(ackinfo->rwind), wake); call->tx_winsize = rwind; } @@ -877,7 +879,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) } /* - * Process an ABORT packet. + * Process an ABORT packet directed at a call. */ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { @@ -892,10 +894,12 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) &wtmp, sizeof(wtmp)) >= 0) abort_code = ntohl(wtmp); + trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code); + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED)) + abort_code, -ECONNABORTED)) rxrpc_notify_socket(call); } @@ -958,7 +962,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) { + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) { set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -1017,8 +1021,11 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_hdr")); return -EBADMSG; + } memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 7d4375e557e6ee4a50961ad25d9e6f01a34290db..af276f173b10ebb26a8b68d2560ad958b8f3d09a 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -46,7 +46,10 @@ static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - *_abort_code = RX_PROTOCOL_ERROR; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("chall_none")); return -EPROTO; } @@ -54,7 +57,10 @@ static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - *_abort_code = RX_PROTOCOL_ERROR; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("resp_none")); return -EPROTO; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bf13b8470c9ad51783325d7f47c32414b07f0c40..1ed9c0c2e94f1c70ab0ca61fa16e87f76530f78a 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -296,7 +296,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work) hlist_del_init(&call->error_link); rxrpc_see_call(call); - if (rxrpc_set_call_completion(call, compl, 0, error)) + if (rxrpc_set_call_completion(call, compl, 0, -error)) rxrpc_notify_socket(call); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 3e2f1a8e9c5b51bf90ce8679c06b6ec8a8958ea9..f9caf3b775097f62e8cb27fe12f3e925a3f8c323 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -83,11 +83,11 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); break; case RXRPC_CALL_NETWORK_ERROR: - tmp = call->error; + tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); break; case RXRPC_CALL_LOCAL_ERROR: - tmp = call->error; + tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: @@ -682,14 +682,16 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, return ret; short_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); ret = -EBADMSG; goto out; excess_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); ret = -EMSGSIZE; goto out; call_complete: *_abort = call->abort_code; - ret = -call->error; + ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (size > 0) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 4374e7b9c7bff9fdd1d36a5a9fc8f135414ed360..1bb9b2ccc2673714367d2ec4356d39b5a3839895 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -148,15 +148,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, u32 data_size, void *sechdr) { - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxkad_level1_hdr hdr; struct rxrpc_crypt iv; struct scatterlist sg; u16 check; - sp = rxrpc_skb(skb); - _enter(""); check = sp->hdr.seq ^ call->call_id; @@ -323,6 +321,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_crypt iv; struct scatterlist sg[16]; struct sk_buff *trailer; + bool aborted; u32 data_size, buf; u16 check; int nsg; @@ -330,7 +329,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); if (len < 8) { - rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", + RXKADSEALEDINCON); goto protocol_error; } @@ -355,7 +355,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* Extract the decrypted packet length */ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { - rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", + RXKADDATALEN); goto protocol_error; } offset += sizeof(sechdr); @@ -368,12 +369,14 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", + RXKADSEALEDINCON); goto protocol_error; } if (data_size > len) { - rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", + RXKADDATALEN); goto protocol_error; } @@ -381,8 +384,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO"); + if (aborted) + rxrpc_send_abort_packet(call); return -EPROTO; nomem: @@ -403,6 +406,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; struct sk_buff *trailer; + bool aborted; u32 data_size, buf; u16 check; int nsg; @@ -410,7 +414,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, _enter(",{%d}", skb->len); if (len < 8) { - rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", + RXKADSEALEDINCON); goto protocol_error; } @@ -445,7 +450,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, /* Extract the decrypted packet length */ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { - rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", + RXKADDATALEN); goto protocol_error; } offset += sizeof(sechdr); @@ -458,12 +464,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", + RXKADSEALEDINCON); goto protocol_error; } if (data_size > len) { - rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", + RXKADDATALEN); goto protocol_error; } @@ -471,8 +479,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO"); + if (aborted) + rxrpc_send_abort_packet(call); return -EPROTO; nomem: @@ -491,6 +499,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg; + bool aborted; u16 cksum; u32 x, y; @@ -522,10 +531,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, cksum = 1; /* zero checksums are not permitted */ if (cksum != expected_cksum) { - rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO [csum failed]"); - return -EPROTO; + aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", + RXKADSEALEDINCON); + goto protocol_error; } switch (call->conn->params.security_level) { @@ -538,6 +546,11 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, default: return -ENOANO; } + +protocol_error: + if (aborted) + rxrpc_send_abort_packet(call); + return -EPROTO; } /* @@ -754,22 +767,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + const char *eproto; u32 version, nonce, min_level, abort_code; int ret; _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); - if (!conn->params.key) { - _leave(" = -EPROTO [no key]"); - return -EPROTO; - } + eproto = tracepoint_string("chall_no_key"); + abort_code = RX_PROTOCOL_ERROR; + if (!conn->params.key) + goto protocol_error; + abort_code = RXKADEXPIRED; ret = key_validate(conn->params.key); - if (ret < 0) { - *_abort_code = RXKADEXPIRED; - return ret; - } + if (ret < 0) + goto other_error; + eproto = tracepoint_string("chall_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &challenge, sizeof(challenge)) < 0) @@ -782,13 +796,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", sp->hdr.serial, version, nonce, min_level); + eproto = tracepoint_string("chall_ver"); abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) goto protocol_error; abort_code = RXKADLEVELFAIL; + ret = -EACCES; if (conn->params.security_level < min_level) - goto protocol_error; + goto other_error; token = conn->params.key->payload.data[0]; @@ -815,28 +831,34 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, return rxkad_send_response(conn, &sp->hdr, &resp, token->kad); protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + ret = -EPROTO; +other_error: *_abort_code = abort_code; - _leave(" = -EPROTO [%d]", abort_code); - return -EPROTO; + return ret; } /* * decrypt the kerberos IV ticket in the response */ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, + struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, time_t *_expiry, u32 *_abort_code) { struct skcipher_request *req; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv, key; struct scatterlist sg[1]; struct in_addr addr; unsigned int life; + const char *eproto; time_t issue, now; bool little_endian; int ret; + u32 abort_code; u8 *p, *q, *name, *end; _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key)); @@ -847,11 +869,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, if (ret < 0) { switch (ret) { case -EKEYEXPIRED: - *_abort_code = RXKADEXPIRED; - goto error; + abort_code = RXKADEXPIRED; + goto other_error; default: - *_abort_code = RXKADNOAUTH; - goto error; + abort_code = RXKADNOAUTH; + goto other_error; } } @@ -860,13 +882,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); + ret = -ENOMEM; req = skcipher_request_alloc(conn->server_key->payload.data[0], GFP_NOFS); - if (!req) { - *_abort_code = RXKADNOAUTH; - ret = -ENOMEM; - goto error; - } + if (!req) + goto temporary_error; sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); @@ -877,11 +897,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p = ticket; end = p + ticket_len; -#define Z(size) \ +#define Z(field) \ ({ \ u8 *__str = p; \ + eproto = tracepoint_string("rxkad_bad_"#field); \ q = memchr(p, 0, end - p); \ - if (!q || q - p > (size)) \ + if (!q || q - p > (field##_SZ)) \ goto bad_ticket; \ for (; p < q; p++) \ if (!isprint(*p)) \ @@ -896,17 +917,18 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p++; /* extract the authentication name */ - name = Z(ANAME_SZ); + name = Z(ANAME); _debug("KIV ANAME: %s", name); /* extract the principal's instance */ - name = Z(INST_SZ); + name = Z(INST); _debug("KIV INST : %s", name); /* extract the principal's authentication domain */ - name = Z(REALM_SZ); + name = Z(REALM); _debug("KIV REALM: %s", name); + eproto = tracepoint_string("rxkad_bad_len"); if (end - p < 4 + 8 + 4 + 2) goto bad_ticket; @@ -941,36 +963,37 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, /* check the ticket is in date */ if (issue > now) { - *_abort_code = RXKADNOAUTH; + abort_code = RXKADNOAUTH; ret = -EKEYREJECTED; - goto error; + goto other_error; } if (issue < now - life) { - *_abort_code = RXKADEXPIRED; + abort_code = RXKADEXPIRED; ret = -EKEYEXPIRED; - goto error; + goto other_error; } *_expiry = issue + life; /* get the service name */ - name = Z(SNAME_SZ); + name = Z(SNAME); _debug("KIV SNAME: %s", name); /* get the service instance name */ - name = Z(INST_SZ); + name = Z(INST); _debug("KIV SINST: %s", name); - - ret = 0; -error: - _leave(" = %d", ret); - return ret; + return 0; bad_ticket: - *_abort_code = RXKADBADTICKET; - ret = -EBADMSG; - goto error; + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + abort_code = RXKADBADTICKET; + ret = -EPROTO; +other_error: + *_abort_code = abort_code; + return ret; +temporary_error: + return ret; } /* @@ -1020,6 +1043,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, __attribute__((aligned(8))); /* must be aligned for crypto */ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; + const char *eproto; time_t expiry; void *ticket; u32 abort_code, version, kvno, ticket_len, level; @@ -1028,6 +1052,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + eproto = tracepoint_string("rxkad_rsp_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &response, sizeof(response)) < 0) @@ -1041,40 +1066,43 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); + eproto = tracepoint_string("rxkad_rsp_ver"); abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) goto protocol_error; + eproto = tracepoint_string("rxkad_rsp_tktlen"); abort_code = RXKADTICKETLEN; if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) goto protocol_error; + eproto = tracepoint_string("rxkad_rsp_unkkey"); abort_code = RXKADUNKNOWNKEY; if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) goto protocol_error; /* extract the kerberos ticket and decrypt and decode it */ + ret = -ENOMEM; ticket = kmalloc(ticket_len, GFP_NOFS); if (!ticket) - return -ENOMEM; + goto temporary_error; + eproto = tracepoint_string("rxkad_tkt_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), ticket, ticket_len) < 0) goto protocol_error_free; - ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, - &expiry, &abort_code); - if (ret < 0) { - *_abort_code = abort_code; - kfree(ticket); - return ret; - } + ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, + &expiry, _abort_code); + if (ret < 0) + goto temporary_error_free; /* use the session key from inside the ticket to decrypt the * response */ rxkad_decrypt_response(conn, &response, &session_key); + eproto = tracepoint_string("rxkad_rsp_param"); abort_code = RXKADSEALEDINCON; if (ntohl(response.encrypted.epoch) != conn->proto.epoch) goto protocol_error_free; @@ -1085,6 +1113,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, csum = response.encrypted.checksum; response.encrypted.checksum = 0; rxkad_calc_response_checksum(&response); + eproto = tracepoint_string("rxkad_rsp_csum"); if (response.encrypted.checksum != csum) goto protocol_error_free; @@ -1093,11 +1122,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_call *call; u32 call_id = ntohl(response.encrypted.call_id[i]); + eproto = tracepoint_string("rxkad_rsp_callid"); if (call_id > INT_MAX) goto protocol_error_unlock; + eproto = tracepoint_string("rxkad_rsp_callctr"); if (call_id < conn->channels[i].call_counter) goto protocol_error_unlock; + + eproto = tracepoint_string("rxkad_rsp_callst"); if (call_id > conn->channels[i].call_counter) { call = rcu_dereference_protected( conn->channels[i].call, @@ -1109,10 +1142,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, } spin_unlock(&conn->channel_lock); + eproto = tracepoint_string("rxkad_rsp_seq"); abort_code = RXKADOUTOFSEQUENCE; if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1) goto protocol_error_free; + eproto = tracepoint_string("rxkad_rsp_level"); abort_code = RXKADLEVELFAIL; level = ntohl(response.encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) @@ -1123,10 +1158,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * this the connection security can be handled in exactly the same way * as for a client connection */ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); - if (ret < 0) { - kfree(ticket); - return ret; - } + if (ret < 0) + goto temporary_error_free; kfree(ticket); _leave(" = 0"); @@ -1137,9 +1170,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, protocol_error_free: kfree(ticket); protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); *_abort_code = abort_code; - _leave(" = -EPROTO [%d]", abort_code); return -EPROTO; + +temporary_error_free: + kfree(ticket); +temporary_error: + /* Ignore the response packet if we got a temporary error such as + * ENOMEM. We just want to send the challenge again. Note that we + * also come out this way if the ticket decryption fails. + */ + return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 97ab214ca4118d7a451a4e56a916bd5809ae81f3..96ffa5d5733bd73249e32e2c10a420af9946e9b2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -556,7 +556,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { ret = 0; - if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED)) ret = rxrpc_send_abort_packet(call); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -623,7 +623,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, read_unlock_bh(&call->state_lock); break; default: - /* Request phase complete for this client call */ + /* Request phase complete for this client call */ + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); ret = -EPROTO; break; } @@ -642,20 +643,24 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @error: Local error value * @why: 3-char string indicating why. * - * Allow a kernel service to abort a call, if it's still in an abortable state. + * Allow a kernel service to abort a call, if it's still in an abortable state + * and return true if the call was aborted, false if it was already complete. */ -void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, +bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, u32 abort_code, int error, const char *why) { + bool aborted; + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); - if (rxrpc_abort_call(why, call, 0, abort_code, error)) + aborted = rxrpc_abort_call(why, call, 0, abort_code, error); + if (aborted) rxrpc_send_abort_packet(call); mutex_unlock(&call->user_mutex); - _leave(""); + return aborted; } EXPORT_SYMBOL(rxrpc_kernel_abort_call); diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 403790cce7d2324054aa48b49c70f7d4deff8a2a..9fb84f0de6af0c3a75f190ed66cab0dc771db5e4 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -352,6 +352,51 @@ config NET_SCH_PLUG To compile this code as a module, choose M here: the module will be called sch_plug. +menuconfig NET_SCH_DEFAULT + bool "Allow override default queue discipline" + ---help--- + Support for selection of default queuing discipline. + + Nearly all users can safely say no here, and the default + of pfifo_fast will be used. Many distributions already set + the default value via /proc/sys/net/core/default_qdisc. + + If unsure, say N. + +if NET_SCH_DEFAULT + +choice + prompt "Default queuing discipline" + default DEFAULT_PFIFO_FAST + help + Select the queueing discipline that will be used by default + for all network devices. + + config DEFAULT_FQ + bool "Fair Queue" if NET_SCH_FQ + + config DEFAULT_CODEL + bool "Controlled Delay" if NET_SCH_CODEL + + config DEFAULT_FQ_CODEL + bool "Fair Queue Controlled Delay" if NET_SCH_FQ_CODEL + + config DEFAULT_SFQ + bool "Stochastic Fair Queue" if NET_SCH_SFQ + + config DEFAULT_PFIFO_FAST + bool "Priority FIFO Fast" +endchoice + +config DEFAULT_NET_SCH + string + default "pfifo_fast" if DEFAULT_PFIFO_FAST + default "fq" if DEFAULT_FQ + default "fq_codel" if DEFAULT_FQ_CODEL + default "sfq" if DEFAULT_SFQ + default "pfifo_fast" +endif + comment "Classification" config NET_CLS diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b70aa57319ea3233395dc7ae349b8b7aab5dfd03..a90e8f355c00e9a39b4a17403b6c7459b6ea9c2f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -428,24 +428,49 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) return res; } +/*TCA_ACT_MAX_PRIO is 32, there count upto 32 */ +#define TCA_ACT_MAX_PRIO_MASK 0x1FF int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { int ret = -1, i; + u32 jmp_prgcnt = 0; + u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ if (skb_skip_tc_classify(skb)) return TC_ACT_OK; +restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; + if (jmp_prgcnt > 0) { + jmp_prgcnt -= 1; + continue; + } repeat: ret = a->ops->act(skb, a, res); if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ + + if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { + jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; + if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) { + /* faulty opcode, stop pipeline */ + return TC_ACT_OK; + } else { + jmp_ttl -= 1; + if (jmp_ttl > 0) + goto restart_act_graph; + else /* faulty graph, stop pipeline */ + return TC_ACT_OK; + } + } + if (ret != TC_ACT_PIPE) break; } + return ret; } EXPORT_SYMBOL(tcf_action_exec); @@ -529,20 +554,20 @@ int tcf_action_dump(struct sk_buff *skb, struct list_head *actions, return err; } -static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) +static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) { - a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL); - if (!a->act_cookie) - return -ENOMEM; + struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return NULL; - a->act_cookie->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); - if (!a->act_cookie->data) { - kfree(a->act_cookie); - return -ENOMEM; + c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); + if (!c->data) { + kfree(c); + return NULL; } - a->act_cookie->len = nla_len(tb[TCA_ACT_COOKIE]); + c->len = nla_len(tb[TCA_ACT_COOKIE]); - return 0; + return c; } struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, @@ -551,13 +576,14 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, { struct tc_action *a; struct tc_action_ops *a_o; + struct tc_cookie *cookie = NULL; char act_name[IFNAMSIZ]; struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); if (err < 0) goto err_out; err = -EINVAL; @@ -566,6 +592,18 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, goto err_out; if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) goto err_out; + if (tb[TCA_ACT_COOKIE]) { + int cklen = nla_len(tb[TCA_ACT_COOKIE]); + + if (cklen > TC_COOKIE_MAX_SIZE) + goto err_out; + + cookie = nla_memdup_cookie(tb); + if (!cookie) { + err = -ENOMEM; + goto err_out; + } + } } else { err = -EINVAL; if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) @@ -604,20 +642,12 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (err < 0) goto err_mod; - if (tb[TCA_ACT_COOKIE]) { - int cklen = nla_len(tb[TCA_ACT_COOKIE]); - - if (cklen > TC_COOKIE_MAX_SIZE) { - err = -EINVAL; - tcf_hash_release(a, bind); - goto err_mod; - } - - if (nla_memdup_cookie(a, tb) < 0) { - err = -ENOMEM; - tcf_hash_release(a, bind); - goto err_mod; + if (name == NULL && tb[TCA_ACT_COOKIE]) { + if (a->act_cookie) { + kfree(a->act_cookie->data); + kfree(a->act_cookie); } + a->act_cookie = cookie; } /* module count goes up only when brand new policy is created @@ -632,6 +662,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, err_mod: module_put(a_o->owner); err_out: + if (cookie) { + kfree(cookie->data); + kfree(cookie); + } return ERR_PTR(err); } @@ -654,7 +688,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); if (err < 0) return err; @@ -786,7 +820,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); if (err < 0) goto err_out; @@ -835,7 +869,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL); if (err < 0) goto err_out; @@ -921,7 +955,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, struct tc_action *act; LIST_HEAD(actions); - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); + ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL); if (ret < 0) return ret; @@ -993,7 +1027,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return tcf_add_notify(net, n, &actions, portid); } -static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) +static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; @@ -1004,7 +1039,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL, + extack); if (ret < 0) return ret; @@ -1051,19 +1087,20 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n) struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0) + if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, + NULL, NULL) < 0) return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), - NLMSG_ALIGN(nla_len(tb1)), NULL) < 0) + NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; - if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0) + if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 520baa41cba3a8d24a659b67c5a5ed8a44129f37..d33947d6e9d017a05b704d233ec97f73abb2ab74 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -283,7 +283,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); + ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f9bb43c25697e70d18fe9bbba90f6e98dfe05759..2155bc6c6a1e79049de9dc9c8e611b592e42a77f 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -109,7 +109,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy); + ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy, + NULL); if (ret < 0) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e978ccd4402cbc68ba1c46e20909a047978df1c2..ab6fdbd34db774bdb9d8a958ead2c808a3405d13 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -59,7 +59,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy); + err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); if (err < 0) return err; @@ -181,6 +181,9 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, struct tcphdr *tcph; const struct iphdr *iph; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + return 1; + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; @@ -202,6 +205,9 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, struct tcphdr *tcph; const struct ipv6hdr *ip6h; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + return 1; + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); if (tcph == NULL) return 0; @@ -225,6 +231,9 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -278,6 +287,9 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + return 1; + /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e6c874a2b283f6e265b61a39f1fcd053389b9051..99afe8b1f1fb015584f303e7352da8b6171774eb 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -73,7 +73,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy); + err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 71e7ff22f7c92a86cacad9a1b8d18d3d726f52fb..c5dec308b8b1eb29505fed1b9a71b3ef70f5e91a 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -443,7 +443,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; - err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy); + err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL); if (err < 0) return err; @@ -514,7 +514,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (tb[TCA_IFE_METALST]) { err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], - NULL); + NULL, NULL); if (err) { metadata_parse_err: if (exists) @@ -603,8 +603,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, return -1; } -int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, - u16 metaid, u16 mlen, void *mdata) +static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, + u16 metaid, u16 mlen, void *mdata) { struct tcf_meta_info *e; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 992ef8d624f11819e032411e72ea60aa6aa93ba1..36f0ced9e60c03297e195135ca5a8a53d1a3a27b 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -107,7 +107,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy); + err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index af49c7dca8608cebbaa0224976ae7df104e5526f..1b5549ababd4690617b5ff0c3842c6cbb4806a41 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -87,7 +87,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy); + ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL); if (ret < 0) return ret; if (tb[TCA_MIRRED_PARMS] == NULL) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9b6aec665495992fd6d63773890b5e897ba51819..9016ab8a0649780a02d8b29f76f98f19c0283e11 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -50,7 +50,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); + err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c1310472f620fd44b9ae8a6cfeefef5419cad54e..7dc5892671c818db55b024b10d56ec2252c2f826 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -72,7 +72,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, } err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, - pedit_key_ex_policy); + pedit_key_ex_policy, NULL); if (err) goto err_out; @@ -94,8 +94,10 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, k++; } - if (n) + if (n) { + err = -EINVAL; goto err_out; + } return keys_ex; @@ -147,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy); + err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0ba91d1ce99480c4817684dbe0b4fde61811e03e..b062bc80c7cb11b0ea473916c58957b54db21594 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -90,7 +90,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy); + err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL); if (err < 0) return err; @@ -132,21 +132,21 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, } } - spin_lock_bh(&police->tcf_lock); if (est) { err = gen_replace_estimator(&police->tcf_bstats, NULL, &police->tcf_rate_est, &police->tcf_lock, NULL, est); if (err) - goto failure_unlock; + goto failure; } else if (tb[TCA_POLICE_AVRATE] && (ret == ACT_P_CREATED || !gen_estimator_active(&police->tcf_rate_est))) { err = -EINVAL; - goto failure_unlock; + goto failure; } + spin_lock_bh(&police->tcf_lock); /* No failure allowed after this point */ police->tcfp_mtu = parm->mtu; if (police->tcfp_mtu == 0) { @@ -192,8 +192,6 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return ret; -failure_unlock: - spin_unlock_bh(&police->tcf_lock); failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 0b8217b4763f59d2d4a71684080536b080ab27e1..59d6645a4007818656376b02a9c5a458e028420b 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -50,7 +50,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy); + ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 823a73ad0c602b8e079be04934f8a57d53480da1..43605e7ce05107adb79bd1dbc562f4961eb20e51 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -94,7 +94,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy); + err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 06ccae3c12eecf85383105489320537d52c4d948..6b3e65d7de0c2e81240618e4500ee894819df833 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -82,7 +82,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy); + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index c736627f8f4a0e0ff86db535ec95459a417e4ada..a73c4bbcada293b2923a41572104d6f9362d37d9 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -103,7 +103,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index e3a58e02119877d237f9aa18c5f9d9fdfb44d0c9..b9a2f241a5b3adca04a5d162569f7908c01c2912 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -89,7 +89,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 19e0dba305ce8101d9db33ff0a1eaab849a730d0..13ba3a89f675d7d5ddaeab893a9d548be735e39e 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -121,7 +121,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy); + err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 732f7cae459d4656aa8d69020f44a66abb501f34..22f88b35a5463e1d896271f1e1b05675bcaa1955 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -178,14 +178,11 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, return ERR_PTR(err); } -static bool tcf_proto_destroy(struct tcf_proto *tp, bool force) +static void tcf_proto_destroy(struct tcf_proto *tp) { - if (tp->ops->destroy(tp, force)) { - module_put(tp->ops->owner); - kfree_rcu(tp, rcu); - return true; - } - return false; + tp->ops->destroy(tp); + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); } void tcf_destroy_chain(struct tcf_proto __rcu **fl) @@ -194,14 +191,15 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl) while ((tp = rtnl_dereference(*fl)) != NULL) { RCU_INIT_POINTER(*fl, tp->next); - tcf_proto_destroy(tp, true); + tcf_proto_destroy(tp); } } EXPORT_SYMBOL(tcf_destroy_chain); /* Add/change/delete/get a filter node */ -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; @@ -229,7 +227,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); if (err < 0) return err; @@ -360,7 +358,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) RCU_INIT_POINTER(*back, next); tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER, false); - tcf_proto_destroy(tp, true); + tcf_proto_destroy(tp); err = 0; goto errout; } @@ -371,24 +369,28 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) goto errout; } } else { + bool last; + switch (n->nlmsg_type) { case RTM_NEWTFILTER: if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_proto_destroy(tp, true); + tcf_proto_destroy(tp); err = -EEXIST; goto errout; } break; case RTM_DELTFILTER: - err = tp->ops->delete(tp, fh); + err = tp->ops->delete(tp, fh, &last); if (err) goto errout; next = rtnl_dereference(tp->next); tfilter_notify(net, skb, n, tp, t->tcm_handle, RTM_DELTFILTER, false); - if (tcf_proto_destroy(tp, false)) + if (last) { RCU_INIT_POINTER(*back, next); + tcf_proto_destroy(tp); + } goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, @@ -410,7 +412,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_proto_destroy(tp, true); + tcf_proto_destroy(tp); } errout: diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 5877f6061b57589ce9ba9226281e85f47eafb523..c4fd63a068f98b1288bacebd5f352af6037362b9 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -93,30 +93,28 @@ static void basic_delete_filter(struct rcu_head *head) kfree(f); } -static bool basic_destroy(struct tcf_proto *tp, bool force) +static void basic_destroy(struct tcf_proto *tp) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; - if (!force && !list_empty(&head->flist)) - return false; - list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); } kfree_rcu(head, rcu); - return true; } -static int basic_delete(struct tcf_proto *tp, unsigned long arg) +static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { + struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = (struct basic_filter *) arg; list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, basic_delete_filter); + *last = list_empty(&head->flist); return 0; } @@ -174,7 +172,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], - basic_policy); + basic_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 80f688436dd70bae84f0c2fffd3d16c4b1d2c4da..5ebeae996e6327023bfce0307cd6b6a07c0860c2 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -274,25 +274,24 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); } -static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) +static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { + struct cls_bpf_head *head = rtnl_dereference(tp->root); + __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg); + *last = list_empty(&head->plist); return 0; } -static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) +static void cls_bpf_destroy(struct tcf_proto *tp) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; - if (!force && !list_empty(&head->plist)) - return false; - list_for_each_entry_safe(prog, tmp, &head->plist, link) __cls_bpf_delete(tp, prog); kfree_rcu(head, rcu); - return true; } static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) @@ -478,7 +477,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy); + ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy, + NULL); if (ret < 0) return ret; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index c1f20077837f06bb1998f5621c797e575e10ca21..12ce547eea04dfb57d3f47540b57e8150a8ebca4 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, new->handle = handle; new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], - cgroup_policy); + cgroup_policy, NULL); if (err < 0) goto errout; @@ -131,20 +131,16 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, return err; } -static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force) +static void cls_cgroup_destroy(struct tcf_proto *tp) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); - if (!force) - return false; /* Head can still be NULL due to cls_cgroup_init(). */ if (head) call_rcu(&head->rcu, cls_cgroup_destroy_rcu); - - return true; } -static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) +static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { return -EOPNOTSUPP; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 3d6b9286c203f298b14b5254e5c12cb4781eb4b1..3065752b9cda59e4cb2555acfe2ae26722152ffc 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -400,7 +400,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); + err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); if (err < 0) return err; @@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, get_random_bytes(&fnew->hashrnd, 4); } - fnew->perturb_timer.function = flow_perturbation; - fnew->perturb_timer.data = (unsigned long)fnew; - init_timer_deferrable(&fnew->perturb_timer); + setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, + (unsigned long)fnew); tcf_exts_change(tp, &fnew->exts, &e); tcf_em_tree_change(tp, &fnew->ematches, &t); @@ -563,12 +562,14 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return err; } -static int flow_delete(struct tcf_proto *tp, unsigned long arg) +static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { + struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = (struct flow_filter *)arg; list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); + *last = list_empty(&head->filters); return 0; } @@ -584,20 +585,16 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static bool flow_destroy(struct tcf_proto *tp, bool force) +static void flow_destroy(struct tcf_proto *tp) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; - if (!force && !list_empty(&head->filters)) - return false; - list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); call_rcu(&f->rcu, flow_destroy_filter); } kfree_rcu(head, rcu); - return true; } static unsigned long flow_get(struct tcf_proto *tp, u32 handle) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9d0c99d2e9fbcc35aee5a51274cf9fe7f542a14f..ca526c0881bd6b4bcff19835befb537f45c6d5ff 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv6_addrs enc_ipv6; }; struct flow_dissector_key_ports enc_tp; + struct flow_dissector_key_mpls mpls; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -328,21 +330,16 @@ static void fl_destroy_rcu(struct rcu_head *rcu) schedule_work(&head->work); } -static bool fl_destroy(struct tcf_proto *tp, bool force) +static void fl_destroy(struct tcf_proto *tp) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f, *next; - if (!force && !list_empty(&head->filters)) - return false; - list_for_each_entry_safe(f, next, &head->filters, list) __fl_delete(tp, f); __module_get(THIS_MODULE); call_rcu(&head->rcu, fl_destroy_rcu); - - return true; } static unsigned long fl_get(struct tcf_proto *tp, u32 handle) @@ -423,6 +420,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN }, [TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN }, [TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_MPLS_TTL] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -438,6 +439,41 @@ static void fl_set_key_val(struct nlattr **tb, memcpy(mask, nla_data(tb[mask_type]), len); } +static int fl_set_key_mpls(struct nlattr **tb, + struct flow_dissector_key_mpls *key_val, + struct flow_dissector_key_mpls *key_mask) +{ + if (tb[TCA_FLOWER_KEY_MPLS_TTL]) { + key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]); + key_mask->mpls_ttl = MPLS_TTL_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_BOS]) { + u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]); + + if (bos & ~MPLS_BOS_MASK) + return -EINVAL; + key_val->mpls_bos = bos; + key_mask->mpls_bos = MPLS_BOS_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_TC]) { + u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]); + + if (tc & ~MPLS_TC_MASK) + return -EINVAL; + key_val->mpls_tc = tc; + key_mask->mpls_tc = MPLS_TC_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) { + u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]); + + if (label & ~MPLS_LABEL_MASK) + return -EINVAL; + key_val->mpls_label = label; + key_mask->mpls_label = MPLS_LABEL_MASK; + } + return 0; +} + static void fl_set_key_vlan(struct nlattr **tb, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) @@ -594,6 +630,11 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE_MASK, sizeof(key->icmp.code)); + } else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) || + key->basic.n_proto == htons(ETH_P_MPLS_MC)) { + ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls); + if (ret) + return ret; } else if (key->basic.n_proto == htons(ETH_P_ARP) || key->basic.n_proto == htons(ETH_P_RARP)) { fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP, @@ -729,6 +770,8 @@ static void fl_init_dissector(struct cls_fl_head *head, FLOW_DISSECTOR_KEY_ICMP, icmp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ARP, arp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_MPLS, mpls); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, @@ -848,7 +891,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tb) return -ENOBUFS; - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], + fl_policy, NULL); if (err < 0) goto errout_tb; @@ -946,7 +990,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, return err; } -static int fl_delete(struct tcf_proto *tp, unsigned long arg) +static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = (struct cls_fl_filter *) arg; @@ -955,6 +999,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); __fl_delete(tp, f); + *last = list_empty(&head->filters); return 0; } @@ -994,6 +1039,41 @@ static int fl_dump_key_val(struct sk_buff *skb, return 0; } +static int fl_dump_key_mpls(struct sk_buff *skb, + struct flow_dissector_key_mpls *mpls_key, + struct flow_dissector_key_mpls *mpls_mask) +{ + int err; + + if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask))) + return 0; + if (mpls_mask->mpls_ttl) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL, + mpls_key->mpls_ttl); + if (err) + return err; + } + if (mpls_mask->mpls_tc) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC, + mpls_key->mpls_tc); + if (err) + return err; + } + if (mpls_mask->mpls_label) { + err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL, + mpls_key->mpls_label); + if (err) + return err; + } + if (mpls_mask->mpls_bos) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS, + mpls_key->mpls_bos); + if (err) + return err; + } + return 0; +} + static int fl_dump_key_vlan(struct sk_buff *skb, struct flow_dissector_key_vlan *vlan_key, struct flow_dissector_key_vlan *vlan_mask) @@ -1099,6 +1179,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, sizeof(key->basic.n_proto))) goto nla_put_failure; + if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls)) + goto nla_put_failure; + if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan)) goto nla_put_failure; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 9dc63d54e1672e82cfae5f677a1ba811c72df337..d3885362e017e1b477c6c2e4d8abea4b1e021c53 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -127,20 +127,14 @@ static void fw_delete_filter(struct rcu_head *head) kfree(f); } -static bool fw_destroy(struct tcf_proto *tp, bool force) +static void fw_destroy(struct tcf_proto *tp) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; int h; if (head == NULL) - return true; - - if (!force) { - for (h = 0; h < HTSIZE; h++) - if (rcu_access_pointer(head->ht[h])) - return false; - } + return; for (h = 0; h < HTSIZE; h++) { while ((f = rtnl_dereference(head->ht[h])) != NULL) { @@ -150,17 +144,17 @@ static bool fw_destroy(struct tcf_proto *tp, bool force) call_rcu(&f->rcu, fw_delete_filter); } } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); - return true; } -static int fw_delete(struct tcf_proto *tp, unsigned long arg) +static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *)arg; struct fw_filter __rcu **fp; struct fw_filter *pfp; + int ret = -EINVAL; + int h; if (head == NULL || f == NULL) goto out; @@ -173,11 +167,21 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg) RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); call_rcu(&f->rcu, fw_delete_filter); - return 0; + ret = 0; + break; } } + + *last = true; + for (h = 0; h < HTSIZE; h++) { + if (rcu_access_pointer(head->ht[h])) { + *last = false; + break; + } + } + out: - return -EINVAL; + return ret; } static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { @@ -250,7 +254,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!opt) return handle ? -EINVAL : 0; /* Succeed if it is old method. */ - err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy); + err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 224eb2c143462a39f3a05e78203344e44e6eb578..51859b8edd7eff3845ca3d5b5b0d900583736d4a 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -90,19 +90,18 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, &offload); } -static bool mall_destroy(struct tcf_proto *tp, bool force) +static void mall_destroy(struct tcf_proto *tp) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct net_device *dev = tp->q->dev_queue->dev; if (!head) - return true; + return; if (tc_should_offload(dev, tp, head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); call_rcu(&head->rcu, mall_destroy_rcu); - return true; } static unsigned long mall_get(struct tcf_proto *tp, u32 handle) @@ -161,8 +160,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (head) return -EEXIST; - err = nla_parse_nested(tb, TCA_MATCHALL_MAX, - tca[TCA_OPTIONS], mall_policy); + err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], + mall_policy, NULL); if (err < 0) return err; @@ -204,8 +203,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) head; rcu_assign_pointer(tp->root, new); - if (head) - call_rcu(&head->rcu, mall_destroy_rcu); return 0; err_replace_hw_filter: @@ -216,7 +213,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, return err; } -static int mall_delete(struct tcf_proto *tp, unsigned long arg) +static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { return -EOPNOTSUPP; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 455fc8f83d0ae0ab48ba97fc2a11ce99e8e00784..d63d5502ee020350e72b818c5607000a21df915a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -140,8 +140,6 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, goto failure; id = dst->tclassid; - if (head == NULL) - goto old_method; iif = inet_iif(skb); @@ -194,15 +192,6 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, route4_set_fastmap(head, id, iif, ROUTE4_FAILURE); failure: return -1; - -old_method: - if (id && (TC_H_MAJ(id) == 0 || - !(TC_H_MAJ(id^tp->q->handle)))) { - res->classid = id; - res->class = 0; - return 0; - } - return -1; } static inline u32 to_hash(u32 id) @@ -234,9 +223,6 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) struct route4_filter *f; unsigned int h1, h2; - if (!head) - return 0; - h1 = to_hash(handle); if (h1 > 256) return 0; @@ -276,20 +262,13 @@ static void route4_delete_filter(struct rcu_head *head) kfree(f); } -static bool route4_destroy(struct tcf_proto *tp, bool force) +static void route4_destroy(struct tcf_proto *tp) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; if (head == NULL) - return true; - - if (!force) { - for (h1 = 0; h1 <= 256; h1++) { - if (rcu_access_pointer(head->table[h1])) - return false; - } - } + return; for (h1 = 0; h1 <= 256; h1++) { struct route4_bucket *b; @@ -312,12 +291,10 @@ static bool route4_destroy(struct tcf_proto *tp, bool force) kfree_rcu(b, rcu); } } - RCU_INIT_POINTER(tp->root, NULL); kfree_rcu(head, rcu); - return true; } -static int route4_delete(struct tcf_proto *tp, unsigned long arg) +static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = (struct route4_filter *)arg; @@ -325,7 +302,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) struct route4_filter *nf; struct route4_bucket *b; unsigned int h = 0; - int i; + int i, h1; if (!head || !f) return -EINVAL; @@ -356,16 +333,25 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) rt = rtnl_dereference(b->ht[i]); if (rt) - return 0; + goto out; } /* OK, session has no flows */ RCU_INIT_POINTER(head->table[to_hash(h)], NULL); kfree_rcu(b, rcu); + break; + } + } - return 0; +out: + *last = true; + for (h1 = 0; h1 <= 256; h1++) { + if (rcu_access_pointer(head->table[h1])) { + *last = false; + break; } } + return 0; } @@ -489,7 +475,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy); + err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 322438fb3ffcb426194be6c1dd05893b27cdf51c..0d9d077986992926a79af6236b8840181517679d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -152,8 +152,6 @@ static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; nhptr = ip_hdr(skb); #endif - if (unlikely(!head)) - return -1; restart: #if RSVP_DST_LEN == 4 @@ -302,22 +300,13 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) call_rcu(&f->rcu, rsvp_delete_filter_rcu); } -static bool rsvp_destroy(struct tcf_proto *tp, bool force) +static void rsvp_destroy(struct tcf_proto *tp) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; if (data == NULL) - return true; - - if (!force) { - for (h1 = 0; h1 < 256; h1++) { - if (rcu_access_pointer(data->ht[h1])) - return false; - } - } - - RCU_INIT_POINTER(tp->root, NULL); + return; for (h1 = 0; h1 < 256; h1++) { struct rsvp_session *s; @@ -337,10 +326,9 @@ static bool rsvp_destroy(struct tcf_proto *tp, bool force) } } kfree_rcu(data, rcu); - return true; } -static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) +static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg; @@ -348,7 +336,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) unsigned int h = f->handle; struct rsvp_session __rcu **sp; struct rsvp_session *nsp, *s = f->sess; - int i; + int i, h1; fp = &s->ht[(h >> 8) & 0xFF]; for (nfp = rtnl_dereference(*fp); nfp; @@ -361,7 +349,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) for (i = 0; i <= 16; i++) if (s->ht[i]) - return 0; + goto out; /* OK, session has no flows */ sp = &head->ht[h & 0xFF]; @@ -370,13 +358,23 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) if (nsp == s) { RCU_INIT_POINTER(*sp, s->next); kfree_rcu(s, rcu); - return 0; + goto out; } } - return 0; + break; } } + +out: + *last = true; + for (h1 = 0; h1 < 256; h1++) { + if (rcu_access_pointer(head->ht[h1])) { + *last = false; + break; + } + } + return 0; } @@ -484,7 +482,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy); + err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 0751245a6aced60163601b94ab1386e0c5e30565..8a8a58357c39b8e38c2d3740aaa93ed67866fe54 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -150,7 +150,7 @@ static void tcindex_destroy_fexts(struct rcu_head *head) kfree(f); } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) +static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; @@ -186,6 +186,8 @@ static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) call_rcu(&f->rcu, tcindex_destroy_fexts); else call_rcu(&r->rcu, tcindex_destroy_rexts); + + *last = false; return 0; } @@ -193,7 +195,9 @@ static int tcindex_destroy_element(struct tcf_proto *tp, unsigned long arg, struct tcf_walker *walker) { - return tcindex_delete(tp, arg); + bool last; + + return tcindex_delete(tp, arg, &last); } static void __tcindex_destroy(struct rcu_head *head) @@ -482,7 +486,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, if (!opt) return 0; - err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy); + err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL); if (err < 0) return err; @@ -529,14 +533,11 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static bool tcindex_destroy(struct tcf_proto *tp, bool force) +static void tcindex_destroy(struct tcf_proto *tp) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; - if (!force) - return false; - pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); walker.count = 0; walker.skip = 0; @@ -544,7 +545,6 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) tcindex_walk(tp, &walker); call_rcu(&p->rcu, __tcindex_destroy); - return true; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4dbe0c680fe6363a88ca47cb167dcae9327920d1..d20e72a095d578e65eda0dafc62217146aeea1c1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -585,37 +585,13 @@ static bool ht_empty(struct tc_u_hnode *ht) return true; } -static bool u32_destroy(struct tcf_proto *tp, bool force) +static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); - if (!force) { - if (root_ht) { - if (root_ht->refcnt > 1) - return false; - if (root_ht->refcnt == 1) { - if (!ht_empty(root_ht)) - return false; - } - } - - if (tp_c->refcnt > 1) - return false; - - if (tp_c->refcnt == 1) { - struct tc_u_hnode *ht; - - for (ht = rtnl_dereference(tp_c->hlist); - ht; - ht = rtnl_dereference(ht->next)) - if (!ht_empty(ht)) - return false; - } - } - if (root_ht && --root_ht->refcnt == 0) u32_destroy_hnode(tp, root_ht); @@ -640,20 +616,22 @@ static bool u32_destroy(struct tcf_proto *tp, bool force) } tp->data = NULL; - return true; } -static int u32_delete(struct tcf_proto *tp, unsigned long arg) +static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last) { struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); + struct tc_u_common *tp_c = tp->data; + int ret = 0; if (ht == NULL) - return 0; + goto out; if (TC_U32_KEY(ht->handle)) { u32_remove_hw_knode(tp, ht->handle); - return u32_delete_key(tp, (struct tc_u_knode *)ht); + ret = u32_delete_key(tp, (struct tc_u_knode *)ht); + goto out; } if (root_ht == ht) @@ -666,7 +644,40 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) return -EBUSY; } - return 0; +out: + *last = true; + if (root_ht) { + if (root_ht->refcnt > 1) { + *last = false; + goto ret; + } + if (root_ht->refcnt == 1) { + if (!ht_empty(root_ht)) { + *last = false; + goto ret; + } + } + } + + if (tp_c->refcnt > 1) { + *last = false; + goto ret; + } + + if (tp_c->refcnt == 1) { + struct tc_u_hnode *ht; + + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) + if (!ht_empty(ht)) { + *last = false; + break; + } + } + +ret: + return ret; } #define NR_U32_NODE (1<<12) @@ -860,7 +871,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy); + err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL); if (err < 0) return err; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index ae7e4f5b348b86ad352e0011c8e0d0227a44f1e3..eb0e9bab54c175d706b6f89f354f2dd5b8793a50 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -912,7 +912,7 @@ static int em_meta_change(struct net *net, void *data, int len, struct tcf_meta_hdr *hdr; struct meta_match *meta = NULL; - err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy); + err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index fbb7ebfc58c6761f6afb58e62646908b10e2bf09..03b677bc07005c7f864077edad6f22ca9dadbfdd 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -314,7 +314,7 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, if (!nla) return 0; - err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy); + err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bcf49cd2278670197f2a7e9d4e9a62ae8d117468..e88342fde1bc409aed6a3c86e7a628030eaac66f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -251,6 +251,15 @@ int qdisc_set_default(const char *name) return ops ? 0 : -ENOENT; } +#ifdef CONFIG_NET_SCH_DEFAULT +/* Set default value from kernel config */ +static int __init sch_default_qdisc(void) +{ + return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); +} +late_initcall(sch_default_qdisc); +#endif + /* We know handle. Find qdisc among all qdisc's attached to device * (root qdisc, all its children, children of children etc.) * Note: caller either uses rtnl or rcu_read_lock() @@ -274,7 +283,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) return NULL; } -void qdisc_hash_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; @@ -282,6 +291,8 @@ void qdisc_hash_add(struct Qdisc *q) WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); + if (invisible) + q->flags |= TCQ_F_INVISIBLE; } } EXPORT_SYMBOL(qdisc_hash_add); @@ -455,7 +466,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) u16 *tab = NULL; int err; - err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy); + err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) @@ -1003,7 +1014,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, goto err_out4; } - qdisc_hash_add(sch); + qdisc_hash_add(sch, false); return sch; } @@ -1114,7 +1125,8 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); @@ -1129,7 +1141,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); if (err < 0) return err; @@ -1183,7 +1195,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) * Create/change qdisc. */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm; @@ -1198,7 +1211,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); if (err < 0) return err; @@ -1401,9 +1414,14 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, return -1; } -static bool tc_qdisc_dump_ignore(struct Qdisc *q) +static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) { - return (q->flags & TCQ_F_BUILTIN) ? true : false; + if (q->flags & TCQ_F_BUILTIN) + return true; + if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) + return true; + + return false; } static int qdisc_notify(struct net *net, struct sk_buff *oskb, @@ -1417,12 +1435,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (old && !tc_qdisc_dump_ignore(old)) { + if (old && !tc_qdisc_dump_ignore(old, false)) { if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } - if (new && !tc_qdisc_dump_ignore(new)) { + if (new && !tc_qdisc_dump_ignore(new, false)) { if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; @@ -1439,7 +1457,8 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, - int *q_idx_p, int s_q_idx, bool recur) + int *q_idx_p, int s_q_idx, bool recur, + bool dump_invisible) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; @@ -1452,7 +1471,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, if (q_idx < s_q_idx) { q_idx++; } else { - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) @@ -1474,7 +1493,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; continue; } - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) @@ -1496,12 +1515,21 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; + const struct nlmsghdr *nlh = cb->nlh; + struct tcmsg *tcm = nlmsg_data(nlh); + struct nlattr *tca[TCA_MAX + 1]; + int err; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; idx = 0; ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + if (err < 0) + return err; + for_each_netdev(net, dev) { struct netdev_queue *dev_queue; @@ -1512,13 +1540,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) q_idx = 0; if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, - true) < 0) + true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx, false) < 0) + &q_idx, s_q_idx, false, + tca[TCA_DUMP_INVISIBLE]) < 0) goto done; cont: @@ -1540,7 +1569,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); @@ -1559,7 +1589,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); if (err < 0) return err; @@ -1762,7 +1792,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, { struct qdisc_dump_args arg; - if (tc_qdisc_dump_ignore(q) || + if (tc_qdisc_dump_ignore(q, false) || *t_p < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { @@ -1801,6 +1831,12 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (!qdisc_dev(root)) return 0; + if (tcm->tcm_parent) { + q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); + if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + return 0; + } hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2209c2ddacbfb0b1f222cb593477891d7124d9e3..40cbceed4de82aa58d70c2c67782fc65088b25c5 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -214,7 +214,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, if (opt == NULL) return -EINVAL; - error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy); + error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL); if (error < 0) return error; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index d6ca18dc04c3e9e72efedd44088e95118a06b711..7415859fd4c3f65e6ff801b08d683b796d1eaa03 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1137,7 +1137,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) struct tc_ratespec *r; int err; - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy); + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1161,6 +1161,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) sch->handle); if (!q->link.q) q->link.q = &noop_qdisc; + else + qdisc_hash_add(q->link.q, true); q->link.priority = TC_CBQ_MAXPRIO - 1; q->link.priority2 = TC_CBQ_MAXPRIO - 1; @@ -1472,7 +1474,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy); + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1600,6 +1602,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!cl->q) cl->q = &noop_qdisc; + else + qdisc_hash_add(cl->q, true); + cl->common.classid = classid; cl->tparent = parent; cl->qdisc = sch; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3b86a97bc67c3e953cb181eddcb5c0c16bf3b27f..b30a2c70bd489b36a2295a11707b6a36d4eb9ac0 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -58,7 +58,6 @@ struct choke_sched_data { /* Variables */ struct red_vars vars; - struct tcf_proto __rcu *filter_list; struct { u32 prob_drop; /* Early probability drops */ u32 prob_mark; /* Early probability marks */ @@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid) choke_skb_cb(skb)->classid = classid; } -static u16 choke_get_classid(const struct sk_buff *skb) -{ - return choke_skb_cb(skb)->classid; -} - /* * Compare flow of two packets * Returns true only if source and destination address and port match. @@ -187,40 +181,6 @@ static bool choke_match_flow(struct sk_buff *skb1, sizeof(choke_skb_cb(skb1)->keys)); } -/* - * Classify flow using either: - * 1. pre-existing classification result in skb - * 2. fast internal classification - * 3. use TC filter based classification - */ -static bool choke_classify(struct sk_buff *skb, - struct Qdisc *sch, int *qerr) - -{ - struct choke_sched_data *q = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl; - int result; - - fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); - if (result >= 0) { -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - return false; - } -#endif - choke_set_classid(skb, TC_H_MIN(res.classid)); - return true; - } - - return false; -} - /* * Select a packet at random from queue * HACK: since queue can have holes from previous deletion; retry several @@ -257,25 +217,15 @@ static bool choke_match_random(const struct choke_sched_data *q, return false; oskb = choke_peek_random(q, pidx); - if (rcu_access_pointer(q->filter_list)) - return choke_get_classid(nskb) == choke_get_classid(oskb); - return choke_match_flow(oskb, nskb); } static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; struct choke_sched_data *q = qdisc_priv(sch); const struct red_parms *p = &q->parms; - if (rcu_access_pointer(q->filter_list)) { - /* If using external classifiers, get result and record it. */ - if (!choke_classify(skb, sch, &ret)) - goto other_drop; /* Packet was eaten by filter */ - } - choke_skb_cb(skb)->keys_valid = 0; /* Compute average queue usage (see RED) */ q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen); @@ -339,12 +289,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, congestion_drop: qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; - -other_drop: - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; } static struct sk_buff *choke_dequeue(struct Qdisc *sch) @@ -413,7 +357,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); + err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL); if (err < 0) return err; @@ -432,10 +376,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) if (mask != q->tab_mask) { struct sk_buff **ntab; - ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), - GFP_KERNEL | __GFP_NOWARN); - if (!ntab) - ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); + ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO); if (!ntab) return -ENOMEM; @@ -538,7 +479,6 @@ static void choke_destroy(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); choke_free(q->tab); } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 5bfa79ee657cf3674d2b6204aee80f63207b84f9..c518a1efcb9d3bd98d1de0d921bd250ed46bc69c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -140,7 +140,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy); + err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index bb4cbdf7500482b170eef6e7923cf2f2259e52b5..58a8c32eab2394f90e5935ad661b6223afee106b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -76,7 +76,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy); + err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL); if (err < 0) return err; @@ -117,6 +117,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; + else + qdisc_hash_add(cl->qdisc, true); if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 5334e309f17f0ef4416dddcdc9278c14ecb5585d..1c0f877f673a73977d3191a78a2964e6c1d41259 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -129,7 +129,7 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy); + err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); if (err < 0) goto errout; @@ -342,7 +342,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy); + err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); if (err < 0) goto errout; @@ -374,6 +374,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; + else + qdisc_hash_add(p->q, true); pr_debug("%s: qdisc %p\n", __func__, p->q); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a4f738ac77283b19927aef0a0e6c9fc8dafcdd42..b488721a0059adb24aea47240afa0164a6e467a9 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -624,16 +624,6 @@ static void fq_rehash(struct fq_sched_data *q, q->stat_gc_flows += fcnt; } -static void *fq_alloc_node(size_t sz, int node) -{ - void *ptr; - - ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node); - if (!ptr) - ptr = vmalloc_node(sz, node); - return ptr; -} - static void fq_free(void *addr) { kvfree(addr); @@ -650,7 +640,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; /* If XPS was setup, we can allocate memory on right NUMA node */ - array = fq_alloc_node(sizeof(struct rb_root) << log, + array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT, netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; @@ -698,7 +688,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy); + err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9f3a884d15903fd9012c01b5eee802e02f9f709e..9201abce928cda39f099fc94d9fc96de1468a947 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -288,7 +288,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct fq_codel_flow *flow; struct list_head *head; u32 prev_drop_count, prev_ecn_mark; - unsigned int prev_backlog; begin: head = &q->new_flows; @@ -307,7 +306,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) prev_drop_count = q->cstats.drop_count; prev_ecn_mark = q->cstats.ecn_mark; - prev_backlog = sch->qstats.backlog; skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, &flow->cvars, &q->cstats, qdisc_pkt_len, @@ -385,7 +383,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy); + err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, + NULL); if (err < 0) return err; if (tb[TCA_FQ_CODEL_FLOWS]) { @@ -447,27 +446,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static void *fq_codel_zalloc(size_t sz) -{ - void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vzalloc(sz); - return ptr; -} - -static void fq_codel_free(void *addr) -{ - kvfree(addr); -} - static void fq_codel_destroy(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(&q->filter_list); - fq_codel_free(q->backlogs); - fq_codel_free(q->flows); + kvfree(q->backlogs); + kvfree(q->flows); } static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) @@ -494,13 +479,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) } if (!q->flows) { - q->flows = fq_codel_zalloc(q->flows_cnt * - sizeof(struct fq_codel_flow)); + q->flows = kvzalloc(q->flows_cnt * + sizeof(struct fq_codel_flow), GFP_KERNEL); if (!q->flows) return -ENOMEM; - q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32)); + q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); if (!q->backlogs) { - fq_codel_free(q->flows); + kvfree(q->flows); return -ENOMEM; } for (i = 0; i < q->flows_cnt; i++) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1a2f9e964330a5cd0c0b9a5cac91807221b0ffd9..52a2c55f6d9eb29ec1de3135596fe5a1529ebf23 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -795,7 +795,7 @@ static void attach_default_qdiscs(struct net_device *dev) } #ifdef CONFIG_NET_SCHED if (dev->qdisc != &noop_qdisc) - qdisc_hash_add(dev->qdisc); + qdisc_hash_add(dev->qdisc, false); #endif } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index c78a093c551ae84a0ba5ba13a7af9cc89e2ca383..17c7130454bd90e8af1d17e95f477ea558fb481d 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -401,7 +401,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); + err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); if (err < 0) return err; @@ -470,7 +470,7 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); + err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ffaa6fb0990f0aa31487a2f1829b1f1accf8b21..5cb82f6c1b0605d7a4373062dc4232e14347bc22 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -957,7 +957,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy); + err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); if (err < 0) return err; @@ -1066,6 +1066,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; + else + qdisc_hash_add(cl->qdisc, true); INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1425,6 +1427,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; + else + qdisc_hash_add(q->root.qdisc, true); INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 2fae8b5f1b80c017c4ae60df54c9143f82de4e9d..51d3ba682af9ba0f69a3f3c3036519bf527cdee0 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -467,29 +467,14 @@ static void hhf_reset(struct Qdisc *sch) rtnl_kfree_skbs(skb, skb); } -static void *hhf_zalloc(size_t sz) -{ - void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vzalloc(sz); - - return ptr; -} - -static void hhf_free(void *addr) -{ - kvfree(addr); -} - static void hhf_destroy(struct Qdisc *sch) { int i; struct hhf_sched_data *q = qdisc_priv(sch); for (i = 0; i < HHF_ARRAYS_CNT; i++) { - hhf_free(q->hhf_arrays[i]); - hhf_free(q->hhf_valid_bits[i]); + kvfree(q->hhf_arrays[i]); + kvfree(q->hhf_valid_bits[i]); } for (i = 0; i < HH_FLOWS_CNT; i++) { @@ -503,7 +488,7 @@ static void hhf_destroy(struct Qdisc *sch) kfree(flow); } } - hhf_free(q->hh_flows); + kvfree(q->hh_flows); } static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { @@ -529,7 +514,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy); + err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); if (err < 0) return err; @@ -609,8 +594,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) if (!q->hh_flows) { /* Initialize heavy-hitter flow table. */ - q->hh_flows = hhf_zalloc(HH_FLOWS_CNT * - sizeof(struct list_head)); + q->hh_flows = kvzalloc(HH_FLOWS_CNT * + sizeof(struct list_head), GFP_KERNEL); if (!q->hh_flows) return -ENOMEM; for (i = 0; i < HH_FLOWS_CNT; i++) @@ -624,8 +609,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) /* Initialize heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { - q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * - sizeof(u32)); + q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN * + sizeof(u32), GFP_KERNEL); if (!q->hhf_arrays[i]) { /* Note: hhf_destroy() will be called * by our caller. @@ -637,8 +622,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) /* Initialize valid bits of heavy-hitter filter arrays. */ for (i = 0; i < HHF_ARRAYS_CNT; i++) { - q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / - BITS_PER_BYTE); + q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN / + BITS_PER_BYTE, GFP_KERNEL); if (!q->hhf_valid_bits[i]) { /* Note: hhf_destroy() will be called * by our caller. diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4cd5fb134bc9e2dbcdd61b51fb951f94301ed54c..570ef3b0c09b7abe49da4787b668397d4413f0e8 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1017,7 +1017,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); if (err < 0) return err; @@ -1342,7 +1342,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!opt) goto failure; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); if (err < 0) goto failure; @@ -1460,6 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_class_hash_insert(&q->clhash, &cl->common); if (parent) parent->children++; + if (cl->un.leaf.q != &noop_qdisc) + qdisc_hash_add(cl->un.leaf.q, true); } else { if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 20b7f1646f69270e08d8b7588759a0146f262e89..cadfdd4f1e521b3d68b8fa62d5797f3ff604651d 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -84,7 +84,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc); + qdisc_hash_add(qdisc, false); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 922683418e53853cb71747d8d30ab0e4a989254b..0a4cf27ea54bd78768d4fa084f7b082460f5f266 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -21,14 +21,13 @@ struct mqprio_sched { struct Qdisc **qdiscs; - int hw_owned; + int hw_offload; }; static void mqprio_destroy(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO}; unsigned int ntx; if (priv->qdiscs) { @@ -39,10 +38,15 @@ static void mqprio_destroy(struct Qdisc *sch) kfree(priv->qdiscs); } - if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) + if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { + struct tc_mqprio_qopt offload = { 0 }; + struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, + { .mqprio = &offload } }; + dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); - else + } else { netdev_set_num_tc(dev, 0); + } } static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) @@ -59,15 +63,20 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) return -EINVAL; } - /* net_device does not support requested operation */ - if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) - return -EINVAL; + /* Limit qopt->hw to maximum supported offload value. Drivers have + * the option of overriding this later if they don't support the a + * given offload type. + */ + if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX) + qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX; - /* if hw owned qcount and qoffset are taken from LLD so - * no reason to verify them here + /* If hardware offload is requested we will leave it to the device + * to either populate the queue counts itself or to validate the + * provided queue counts. If ndo_setup_tc is not present then + * hardware doesn't support offload and we should return an error. */ if (qopt->hw) - return 0; + return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL; for (i = 0; i < qopt->num_tc; i++) { unsigned int last = qopt->offset[i] + qopt->count[i]; @@ -139,13 +148,15 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO, - { .tc = qopt->num_tc }}; + struct tc_mqprio_qopt offload = *qopt; + struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, + { .mqprio = &offload } }; - priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) return err; + + priv->hw_offload = offload.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -175,7 +186,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc); + qdisc_hash_add(qdisc, false); } kfree(priv->qdiscs); priv->qdiscs = NULL; @@ -243,7 +254,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); - opt.hw = priv->hw_owned; + opt.hw = priv->hw_offload; for (i = 0; i < netdev_get_num_tc(dev); i++) { opt.count[i] = dev->tc_to_txq[i].count; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index e7839a0d0eaa52572f675fdb1dfc590c2a70ac76..43a3a10b3c8118fc2e0deff98be2635d2ad81330 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -217,6 +217,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); old = q->queues[i]; q->queues[i] = child; + if (child != &noop_qdisc) + qdisc_hash_add(child, true); if (old != &noop_qdisc) { qdisc_tree_reduce_backlog(old, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c8bb62a1e7449344a0fd81241fe0102ea2f9c0f9..1b3dd6190e9386c6f8104153eb9fdc0255e03ac5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -462,7 +462,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If a delay is expected, orphan the skb. (orphaning usually takes * place at TX completion time, so _before_ the link transit delay) */ - if (q->latency || q->jitter) + if (q->latency || q->jitter || q->rate) skb_orphan_partial(skb); /* @@ -530,21 +530,31 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, now = psched_get_time(); if (q->rate) { - struct sk_buff *last; + struct netem_skb_cb *last = NULL; + + if (sch->q.tail) + last = netem_skb_cb(sch->q.tail); + if (q->t_root.rb_node) { + struct sk_buff *t_skb; + struct netem_skb_cb *t_last; + + t_skb = netem_rb_to_skb(rb_last(&q->t_root)); + t_last = netem_skb_cb(t_skb); + if (!last || + t_last->time_to_send > last->time_to_send) { + last = t_last; + } + } - if (sch->q.qlen) - last = sch->q.tail; - else - last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { /* * Last packet in queue is reference point (now), * calculate this time bonus and subtract * from delay. */ - delay -= netem_skb_cb(last)->time_to_send - now; + delay -= last->time_to_send - now; delay = max_t(psched_tdiff_t, 0, delay); - now = netem_skb_cb(last)->time_to_send; + now = last->time_to_send; } delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); @@ -692,15 +702,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) spinlock_t *root_lock; struct disttable *d; int i; - size_t s; if (n > NETEM_DIST_MAX) return -EINVAL; - s = sizeof(struct disttable) + n * sizeof(s16); - d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN); - if (!d) - d = vmalloc(s); + d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); if (!d) return -ENOMEM; @@ -833,7 +839,7 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, if (nested_len >= nla_attr_size(0)) return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy); + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 5c3a99d6aa82ae0dcfd71ebceaae8de5d943e352..6c2791d6102dae67fe006b17723d6f50edc07885 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -190,7 +190,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy); + err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index d4d7db267b6edfa56582ca4a588590e0ded9fe66..92c2e6d448d7984af35d6beb2cb3aea717b76511 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -192,8 +192,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) qdisc_destroy(child); } - for (i = oldbands; i < q->bands; i++) + for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; + if (q->queues[i] != &noop_qdisc) + qdisc_hash_add(q->queues[i], true); + } sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f9e712ce2d15ce9280c31d2f75d62b84034ae51d..041eba3006ccdd84e1ea1419443f1d169550ceb5 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -418,7 +418,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); + err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy, + NULL); if (err < 0) return err; @@ -494,6 +495,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, goto destroy_class; } + if (cl->qdisc != &noop_qdisc) + qdisc_hash_add(cl->qdisc, true); sch_tree_lock(sch); qdisc_class_hash_insert(&q->clhash, &cl->common); sch_tree_unlock(sch); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 249b2a18acbd99288eb0a2579a0f29c2ab0b3ded..11292adce4121022a86aff0aed537be3d9319490 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -173,7 +173,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy); + err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); if (err < 0) return err; @@ -191,6 +191,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) return PTR_ERR(child); } + if (child != &noop_qdisc) + qdisc_hash_add(child, true); sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index fe6963d2151956c508b510edec680b89201173ce..0f777273ba293dcf2e62ae484dd0033bce942a2b 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -495,7 +495,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) int err; if (opt) { - err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); + err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL); if (err < 0) return -EINVAL; @@ -513,6 +513,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt) if (IS_ERR(child)) return PTR_ERR(child); + if (child != &noop_qdisc) + qdisc_hash_add(child, true); sch_tree_lock(sch); qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 42e8c8615e6563a2deabbb3c3437e3985d01ae14..332d94be6e1cb09386307b6c16b6954244a805c9 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -685,11 +685,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static void *sfq_alloc(size_t sz) { - void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN); - - if (!ptr) - ptr = vmalloc(sz); - return ptr; + return kvmalloc(sz, GFP_KERNEL); } static void sfq_free(void *addr) @@ -714,9 +710,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) struct sfq_sched_data *q = qdisc_priv(sch); int i; - q->perturb_timer.function = sfq_perturbation; - q->perturb_timer.data = (unsigned long)sch; - init_timer_deferrable(&q->perturb_timer); + setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, + (unsigned long)sch); for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 303355c449ab336227d9b115496e0882f2f2a079..b2e4b6ad241a8e538c654ef4e8417ab756740a45 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -315,7 +315,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; - err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); + err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); if (err < 0) return err; @@ -396,6 +396,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; + if (child != &noop_qdisc) + qdisc_hash_add(child, true); } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a9708da28eb53ff2987264c6c7d7ca6ec2ff09e9..95238284c422e23fbd834a3aedc54c9243743d0f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1176,7 +1176,9 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->ctsn_ack_point = asoc->next_tsn - 1; asoc->adv_peer_ack_point = asoc->ctsn_ack_point; - if (!asoc->stream) { + + if (sctp_state(asoc, COOKIE_WAIT)) { + sctp_stream_free(asoc->stream); asoc->stream = new->stream; new->stream = NULL; } diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index e3621cb4827fadb5f5cb41ebe8455dfa3300a765..697721a7a3f1761373aa66b847bd744ea1b42d10 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { - if (chunk->sent_count) + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + + if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - else + streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/input.c b/net/sctp/input.c index 0e06a278d2a911e2360e75e983b623e453284b7b..ba9ad32fc44740b9ec45d95e265d3d895148d7a7 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, struct sctp_association **app, struct sctp_transport **tpp) { + struct sctp_init_chunk *chunkhdr, _chunkhdr; union sctp_addr saddr; union sctp_addr daddr; struct sctp_af *af; struct sock *sk = NULL; struct sctp_association *asoc; struct sctp_transport *transport = NULL; - struct sctp_init_chunk *chunkhdr; __u32 vtag = ntohl(sctphdr->vtag); - int len = skb->len - ((void *)sctphdr - (void *)skb->data); *app = NULL; *tpp = NULL; @@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); - if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) - + sizeof(__be32) || + /* chunk header + first 4 octects of init header */ + chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) + + sizeof(struct sctphdr), + sizeof(struct sctp_chunkhdr) + + sizeof(__be32), &_chunkhdr); + if (!chunkhdr || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || - ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) { + ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) goto out; - } + } else if (vtag != asoc->c.peer_vtag) { goto out; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 961ee59f696a0b0a8b6c2bade0031a073dff53ad..f5b45b8b8b16e6965d24cd79b150828a608c2121 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct sctp_bind_addr *bp; struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; - union sctp_addr *baddr = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; __u8 matchlen = 0; - __u8 bmatchlen; sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); @@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid) + struct dst_entry *bdst; + __u8 bmatchlen; + + if (!laddr->valid || + laddr->state != SCTP_ADDR_SRC || + laddr->a.sa.sa_family != AF_INET6 || + scope > sctp_scope(&laddr->a)) continue; - if ((laddr->state == SCTP_ADDR_SRC) && - (laddr->a.sa.sa_family == AF_INET6) && - (scope <= sctp_scope(&laddr->a))) { - bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); - if (!baddr || (matchlen < bmatchlen)) { - baddr = &laddr->a; - matchlen = bmatchlen; - } - } - } - if (baddr) { - fl6->saddr = baddr->v6.sin6_addr; - fl6->fl6_sport = baddr->v6.sin6_port; + + fl6->saddr = laddr->a.v6.sin6_addr; + fl6->fl6_sport = laddr->a.v6.sin6_port; final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); - dst = ip6_dst_lookup_flow(sk, fl6, final_p); + bdst = ip6_dst_lookup_flow(sk, fl6, final_p); + + if (!IS_ERR(bdst) && + ipv6_chk_addr(dev_net(bdst->dev), + &laddr->a.v6.sin6_addr, bdst->dev, 1)) { + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + break; + } + + bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); + if (matchlen > bmatchlen) + continue; + + if (!IS_ERR_OR_NULL(dst)) + dst_release(dst); + dst = bdst; + matchlen = bmatchlen; } rcu_read_unlock(); @@ -665,6 +677,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; rcu_read_lock(); opt = rcu_dereference(np->opt); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8081476ed313cca8ad8f9876ec5ef5dc8fd68207..fe4c3d462f6ebc48d11d2a587f2adb7a39fde2e5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, struct sctp_chunk *chk, *temp; list_for_each_entry_safe(chk, temp, queue, transmitted_list) { + struct sctp_stream_out *streamout; + if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; @@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); + streamout = &asoc->stream->out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -396,6 +400,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) { + struct sctp_stream_out *streamout = + &asoc->stream->out[chk->sinfo.sinfo_stream]; + + streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + } msg_len -= SCTP_DATA_SNDSIZE(chk) + sizeof(struct sk_buff) + diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 118faff6a332ee24caf3d772b6f00641128ef104..92e332e173914f16c10ba54038dbb78dee0d0c49 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1512,14 +1512,12 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, struct iov_iter *from) { void *target; - ssize_t copied; /* Make room in chunk for data. */ target = skb_put(chunk->skb, len); /* Copy data (whole iovec) into chunk */ - copied = copy_from_iter(target, len, from); - if (copied != len) + if (!copy_from_iter_full(target, len, from)) return -EFAULT; /* Adjust the chunk length field. */ @@ -2456,16 +2454,11 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * stream sequence number shall be set to 0. */ - /* Allocate storage for the negotiated streams if it is not a temporary - * association. - */ - if (!asoc->temp) { - if (sctp_stream_init(asoc, gfp)) - goto clean_up; + if (sctp_stream_init(asoc, gfp)) + goto clean_up; - if (sctp_assoc_set_id(asoc, gfp)) - goto clean_up; - } + if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) + goto clean_up; /* ADDIP Section 4.1 ASCONF Chunk Procedures * diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24c6ccce7539097728f3733ae5ecc037562cefcf..f863b5573e42d6f1c4908bf045af0079edc4b2d9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2088,6 +2088,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, } } + /* Set temp so that it won't be added into hashtable */ + new_asoc->temp = 1; + /* Compare the tie_tag in cookie with the verification tag of * current association. */ @@ -3872,9 +3875,18 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net, else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST) reply = sctp_process_strreset_inreq( (struct sctp_association *)asoc, param, &ev); - /* More handles for other types will be added here, by now it - * just ignores other types. - */ + else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST) + reply = sctp_process_strreset_tsnreq( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) + reply = sctp_process_strreset_addstrm_out( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) + reply = sctp_process_strreset_addstrm_in( + (struct sctp_association *)asoc, param, &ev); + else if (param.p->type == SCTP_PARAM_RESET_RESPONSE) + reply = sctp_process_strreset_resp( + (struct sctp_association *)asoc, param, &ev); if (ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d9d4c92e06b312e6c300afd8f3d5db33161fd9f7..30aa0a529215ae54e43bdcb54a6e1870761996c3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3758,6 +3758,39 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, return retval; } +static int sctp_setsockopt_reconfig_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + asoc->reconf_enable = !!params.assoc_value; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + sp->ep->reconf_enable = !!params.assoc_value; + } else { + goto out; + } + + retval = 0; + +out: + return retval; +} + static int sctp_setsockopt_enable_strreset(struct sock *sk, char __user *optval, unsigned int optlen) @@ -4038,6 +4071,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); break; + case SCTP_RECONFIG_SUPPORTED: + retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen); + break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); break; @@ -4586,13 +4622,13 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize; hash++, head++) { - read_lock(&head->lock); + read_lock_bh(&head->lock); sctp_for_each_hentry(epb, &head->chain) { err = cb(sctp_ep(epb), p); if (err) break; } - read_unlock(&head->lock); + read_unlock_bh(&head->lock); } return err; @@ -6540,6 +6576,102 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len, return retval; } +static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_out *streamout; + struct sctp_association *asoc; + struct sctp_prstatus params; + int retval = -EINVAL; + int policy; + + if (len < sizeof(params)) + goto out; + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) { + retval = -EFAULT; + goto out; + } + + policy = params.sprstat_policy; + if (policy & ~SCTP_PR_SCTP_MASK) + goto out; + + asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); + if (!asoc || params.sprstat_sid >= asoc->stream->outcnt) + goto out; + + streamout = &asoc->stream->out[params.sprstat_sid]; + if (policy == SCTP_PR_SCTP_NONE) { + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { + params.sprstat_abandoned_unsent += + streamout->abandoned_unsent[policy]; + params.sprstat_abandoned_sent += + streamout->abandoned_sent[policy]; + } + } else { + params.sprstat_abandoned_unsent = + streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + params.sprstat_abandoned_sent = + streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + } + + if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->reconf_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->ep->reconf_enable; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -6748,6 +6880,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_PR_STREAM_STATUS: + retval = sctp_getsockopt_pr_streamstatus(sk, len, optval, + optlen); + break; + case SCTP_RECONFIG_SUPPORTED: + retval = sctp_getsockopt_reconfig_supported(sk, len, optval, + optlen); + break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); @@ -7440,9 +7580,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk->sk_shutdown & RCV_SHUTDOWN) break; - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, noblock)) - continue; + if (sk_can_busy_loop(sk)) { + sk_busy_loop(sk, noblock); + + if (!skb_queue_empty(&sk->sk_receive_queue)) + continue; + } /* User doesn't want to wait. */ error = -EAGAIN; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index bbed997e1c5f01d4401adcd4664be4b6d16a93fa..dda53a2939866d5bb7e3233a1ba0a90160ecf488 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -294,18 +294,6 @@ int sctp_send_add_streams(struct sctp_association *asoc, stream->out = streamout; } - if (in) { - struct sctp_stream_in *streamin; - - streamin = krealloc(stream->in, incnt * sizeof(*streamin), - GFP_KERNEL); - if (!streamin) - goto out; - - memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); - stream->in = streamin; - } - chunk = sctp_make_strreset_addstrm(asoc, out, in); if (!chunk) goto out; @@ -330,13 +318,14 @@ int sctp_send_add_streams(struct sctp_association *asoc, } static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( - struct sctp_association *asoc, __u32 resp_seq) + struct sctp_association *asoc, __u32 resp_seq, + __be16 type) { struct sctp_chunk *chunk = asoc->strreset_chunk; struct sctp_reconf_chunk *hdr; union sctp_params param; - if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk) + if (!chunk) return NULL; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; @@ -347,13 +336,21 @@ static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param( */ struct sctp_strreset_tsnreq *req = param.v; - if (req->request_seq == resp_seq) + if ((!resp_seq || req->request_seq == resp_seq) && + (!type || type == req->param_hdr.type)) return param.v; } return NULL; } +static void sctp_update_strreset_result(struct sctp_association *asoc, + __u32 result) +{ + asoc->strreset_result[1] = asoc->strreset_result[0]; + asoc->strreset_result[0] = result; +} + struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_association *asoc, union sctp_params param, @@ -370,15 +367,19 @@ struct sctp_chunk *sctp_process_strreset_outreq( if (ntohl(outreq->send_reset_at_tsn) > sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map)) { result = SCTP_STRRESET_IN_PROGRESS; - goto out; + goto err; } - if (request_seq > asoc->strreset_inseq) { + if (TSN_lt(asoc->strreset_inseq, request_seq) || + TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; - goto out; - } else if (request_seq == asoc->strreset_inseq) { - asoc->strreset_inseq++; + goto err; + } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { + i = asoc->strreset_inseq - request_seq - 1; + result = asoc->strreset_result[i]; + goto err; } + asoc->strreset_inseq++; /* Check strreset_enable after inseq inc, as sender cannot tell * the peer doesn't enable strreset after receiving response with @@ -388,13 +389,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( goto out; if (asoc->strreset_chunk) { - sctp_paramhdr_t *param_hdr; - struct sctp_transport *t; - - param_hdr = sctp_chunk_lookup_strreset_param( - asoc, outreq->response_seq); - if (!param_hdr || param_hdr->type != - SCTP_PARAM_RESET_IN_REQUEST) { + if (!sctp_chunk_lookup_strreset_param( + asoc, outreq->response_seq, + SCTP_PARAM_RESET_IN_REQUEST)) { /* same process with outstanding isn't 0 */ result = SCTP_STRRESET_ERR_IN_PROGRESS; goto out; @@ -404,6 +401,8 @@ struct sctp_chunk *sctp_process_strreset_outreq( asoc->strreset_outseq++; if (!asoc->strreset_outstanding) { + struct sctp_transport *t; + t = asoc->strreset_chunk->transport; if (del_timer(&t->reconf_timer)) sctp_transport_put(t); @@ -439,6 +438,8 @@ struct sctp_chunk *sctp_process_strreset_outreq( GFP_ATOMIC); out: + sctp_update_strreset_result(asoc, result); +err: return sctp_make_strreset_resp(asoc, result, request_seq); } @@ -455,12 +456,18 @@ struct sctp_chunk *sctp_process_strreset_inreq( __u32 request_seq; request_seq = ntohl(inreq->request_seq); - if (request_seq > asoc->strreset_inseq) { + if (TSN_lt(asoc->strreset_inseq, request_seq) || + TSN_lt(request_seq, asoc->strreset_inseq - 2)) { result = SCTP_STRRESET_ERR_BAD_SEQNO; - goto out; - } else if (request_seq == asoc->strreset_inseq) { - asoc->strreset_inseq++; + goto err; + } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { + i = asoc->strreset_inseq - request_seq - 1; + result = asoc->strreset_result[i]; + if (result == SCTP_STRRESET_PERFORMED) + return NULL; + goto err; } + asoc->strreset_inseq++; if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) goto out; @@ -495,12 +502,407 @@ struct sctp_chunk *sctp_process_strreset_inreq( asoc->strreset_outstanding = 1; sctp_chunk_hold(asoc->strreset_chunk); + result = SCTP_STRRESET_PERFORMED; + *evp = sctp_ulpevent_make_stream_reset_event(asoc, SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); out: + sctp_update_strreset_result(asoc, result); +err: if (!chunk) chunk = sctp_make_strreset_resp(asoc, result, request_seq); return chunk; } + +struct sctp_chunk *sctp_process_strreset_tsnreq( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen; + struct sctp_strreset_tsnreq *tsnreq = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + __u32 request_seq; + __u16 i; + + request_seq = ntohl(tsnreq->request_seq); + if (TSN_lt(asoc->strreset_inseq, request_seq) || + TSN_lt(request_seq, asoc->strreset_inseq - 2)) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto err; + } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { + i = asoc->strreset_inseq - request_seq - 1; + result = asoc->strreset_result[i]; + if (result == SCTP_STRRESET_PERFORMED) { + next_tsn = asoc->next_tsn; + init_tsn = + sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1; + } + goto err; + } + asoc->strreset_inseq++; + + if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) + goto out; + + if (asoc->strreset_outstanding) { + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + /* G3: The same processing as though a SACK chunk with no gap report + * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were + * received MUST be performed. + */ + max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map); + sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen); + sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + + /* G1: Compute an appropriate value for the Receiver's Next TSN -- the + * TSN that the peer should use to send the next DATA chunk. The + * value SHOULD be the smallest TSN not acknowledged by the + * receiver of the request plus 2^31. + */ + init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31); + sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + init_tsn, GFP_ATOMIC); + + /* G4: The same processing as though a FWD-TSN chunk (as defined in + * [RFC3758]) with all streams affected and a new cumulative TSN + * ACK of the Receiver's Next TSN minus 1 were received MUST be + * performed. + */ + sctp_outq_free(&asoc->outqueue); + + /* G2: Compute an appropriate value for the local endpoint's next TSN, + * i.e., the next TSN assigned by the receiver of the SSN/TSN reset + * chunk. The value SHOULD be the highest TSN sent by the receiver + * of the request plus 1. + */ + next_tsn = asoc->next_tsn; + asoc->ctsn_ack_point = next_tsn - 1; + asoc->adv_peer_ack_point = asoc->ctsn_ack_point; + + /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all + * incoming and outgoing streams. + */ + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + for (i = 0; i < stream->incnt; i++) + stream->in[i].ssn = 0; + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn, + next_tsn, GFP_ATOMIC); + +out: + sctp_update_strreset_result(asoc, result); +err: + return sctp_make_strreset_tsnresp(asoc, result, request_seq, + next_tsn, init_tsn); +} + +struct sctp_chunk *sctp_process_strreset_addstrm_out( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_addstrm *addstrm = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + struct sctp_stream_in *streamin; + __u32 request_seq, incnt; + __u16 in, i; + + request_seq = ntohl(addstrm->request_seq); + if (TSN_lt(asoc->strreset_inseq, request_seq) || + TSN_lt(request_seq, asoc->strreset_inseq - 2)) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto err; + } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { + i = asoc->strreset_inseq - request_seq - 1; + result = asoc->strreset_result[i]; + goto err; + } + asoc->strreset_inseq++; + + if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) + goto out; + + if (asoc->strreset_chunk) { + if (!sctp_chunk_lookup_strreset_param( + asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) { + /* same process with outstanding isn't 0 */ + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + asoc->strreset_outstanding--; + asoc->strreset_outseq++; + + if (!asoc->strreset_outstanding) { + struct sctp_transport *t; + + t = asoc->strreset_chunk->transport; + if (del_timer(&t->reconf_timer)) + sctp_transport_put(t); + + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } + } + + in = ntohs(addstrm->number_of_streams); + incnt = stream->incnt + in; + if (!in || incnt > SCTP_MAX_STREAM) + goto out; + + streamin = krealloc(stream->in, incnt * sizeof(*streamin), + GFP_ATOMIC); + if (!streamin) + goto out; + + memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); + stream->in = streamin; + stream->incnt = incnt; + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_stream_change_event(asoc, + 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC); + +out: + sctp_update_strreset_result(asoc, result); +err: + return sctp_make_strreset_resp(asoc, result, request_seq); +} + +struct sctp_chunk *sctp_process_strreset_addstrm_in( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_addstrm *addstrm = param.v; + struct sctp_stream *stream = asoc->stream; + __u32 result = SCTP_STRRESET_DENIED; + struct sctp_stream_out *streamout; + struct sctp_chunk *chunk = NULL; + __u32 request_seq, outcnt; + __u16 out, i; + + request_seq = ntohl(addstrm->request_seq); + if (TSN_lt(asoc->strreset_inseq, request_seq) || + TSN_lt(request_seq, asoc->strreset_inseq - 2)) { + result = SCTP_STRRESET_ERR_BAD_SEQNO; + goto err; + } else if (TSN_lt(request_seq, asoc->strreset_inseq)) { + i = asoc->strreset_inseq - request_seq - 1; + result = asoc->strreset_result[i]; + if (result == SCTP_STRRESET_PERFORMED) + return NULL; + goto err; + } + asoc->strreset_inseq++; + + if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) + goto out; + + if (asoc->strreset_outstanding) { + result = SCTP_STRRESET_ERR_IN_PROGRESS; + goto out; + } + + out = ntohs(addstrm->number_of_streams); + outcnt = stream->outcnt + out; + if (!out || outcnt > SCTP_MAX_STREAM) + goto out; + + streamout = krealloc(stream->out, outcnt * sizeof(*streamout), + GFP_ATOMIC); + if (!streamout) + goto out; + + memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); + stream->out = streamout; + + chunk = sctp_make_strreset_addstrm(asoc, out, 0); + if (!chunk) + goto out; + + asoc->strreset_chunk = chunk; + asoc->strreset_outstanding = 1; + sctp_chunk_hold(asoc->strreset_chunk); + + stream->outcnt = outcnt; + + result = SCTP_STRRESET_PERFORMED; + + *evp = sctp_ulpevent_make_stream_change_event(asoc, + 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC); + +out: + sctp_update_strreset_result(asoc, result); +err: + if (!chunk) + chunk = sctp_make_strreset_resp(asoc, result, request_seq); + + return chunk; +} + +struct sctp_chunk *sctp_process_strreset_resp( + struct sctp_association *asoc, + union sctp_params param, + struct sctp_ulpevent **evp) +{ + struct sctp_strreset_resp *resp = param.v; + struct sctp_stream *stream = asoc->stream; + struct sctp_transport *t; + __u16 i, nums, flags = 0; + sctp_paramhdr_t *req; + __u32 result; + + req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0); + if (!req) + return NULL; + + result = ntohl(resp->result); + if (result != SCTP_STRRESET_PERFORMED) { + /* if in progress, do nothing but retransmit */ + if (result == SCTP_STRRESET_IN_PROGRESS) + return NULL; + else if (result == SCTP_STRRESET_DENIED) + flags = SCTP_STREAM_RESET_DENIED; + else + flags = SCTP_STREAM_RESET_FAILED; + } + + if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { + struct sctp_strreset_outreq *outreq; + __u16 *str_p; + + outreq = (struct sctp_strreset_outreq *)req; + str_p = outreq->list_of_streams; + nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2; + + if (result == SCTP_STRRESET_PERFORMED) { + if (nums) { + for (i = 0; i < nums; i++) + stream->out[ntohs(str_p[i])].ssn = 0; + } else { + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + } + + flags = SCTP_STREAM_RESET_OUTGOING_SSN; + } + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, + nums, str_p, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { + struct sctp_strreset_inreq *inreq; + __u16 *str_p; + + /* if the result is performed, it's impossible for inreq */ + if (result == SCTP_STRRESET_PERFORMED) + return NULL; + + inreq = (struct sctp_strreset_inreq *)req; + str_p = inreq->list_of_streams; + nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2; + + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, + nums, str_p, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) { + struct sctp_strreset_resptsn *resptsn; + __u32 stsn, rtsn; + + /* check for resptsn, as sctp_verify_reconf didn't do it*/ + if (ntohs(param.p->length) != sizeof(*resptsn)) + return NULL; + + resptsn = (struct sctp_strreset_resptsn *)resp; + stsn = ntohl(resptsn->senders_next_tsn); + rtsn = ntohl(resptsn->receivers_next_tsn); + + if (result == SCTP_STRRESET_PERFORMED) { + __u32 mtsn = sctp_tsnmap_get_max_tsn_seen( + &asoc->peer.tsn_map); + + sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn); + sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); + + sctp_tsnmap_init(&asoc->peer.tsn_map, + SCTP_TSN_MAP_INITIAL, + stsn, GFP_ATOMIC); + + sctp_outq_free(&asoc->outqueue); + + asoc->next_tsn = rtsn; + asoc->ctsn_ack_point = asoc->next_tsn - 1; + asoc->adv_peer_ack_point = asoc->ctsn_ack_point; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].ssn = 0; + for (i = 0; i < stream->incnt; i++) + stream->in[i].ssn = 0; + } + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags, + stsn, rtsn, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) { + struct sctp_strreset_addstrm *addstrm; + __u16 number; + + addstrm = (struct sctp_strreset_addstrm *)req; + nums = ntohs(addstrm->number_of_streams); + number = stream->outcnt - nums; + + if (result == SCTP_STRRESET_PERFORMED) + for (i = number; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + else + stream->outcnt = number; + + *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, + 0, nums, GFP_ATOMIC); + } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) { + struct sctp_strreset_addstrm *addstrm; + + /* if the result is performed, it's impossible for addstrm in + * request. + */ + if (result == SCTP_STRRESET_PERFORMED) + return NULL; + + addstrm = (struct sctp_strreset_addstrm *)req; + nums = ntohs(addstrm->number_of_streams); + + *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, + nums, 0, GFP_ATOMIC); + } + + asoc->strreset_outstanding--; + asoc->strreset_outseq++; + + /* remove everything for this reconf request */ + if (!asoc->strreset_outstanding) { + t = asoc->strreset_chunk->transport; + if (del_timer(&t->reconf_timer)) + sctp_transport_put(t); + + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + } + + return NULL; +} diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index daf8554fd42a5e537bb58572823b2028f74be930..0e732f68c2bfc3b791dade5a85c36628904ee490 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -274,6 +274,13 @@ static struct ctl_table sctp_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "reconf_enable", + .data = &init_net.sctp.reconf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "auth_enable", .data = &init_net.sctp.auth_enable, diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index c8881bc542a066e6f7f234beea3c7208394242c5..ec2b3e013c2f4ba48eabfc8d8ed20921cabc08a1 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -883,6 +883,62 @@ struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( return event; } +struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( + const struct sctp_association *asoc, __u16 flags, __u32 local_tsn, + __u32 remote_tsn, gfp_t gfp) +{ + struct sctp_assoc_reset_event *areset; + struct sctp_ulpevent *event; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_assoc_reset_event), + MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + areset = (struct sctp_assoc_reset_event *) + skb_put(skb, sizeof(struct sctp_assoc_reset_event)); + + areset->assocreset_type = SCTP_ASSOC_RESET_EVENT; + areset->assocreset_flags = flags; + areset->assocreset_length = sizeof(struct sctp_assoc_reset_event); + sctp_ulpevent_set_owner(event, asoc); + areset->assocreset_assoc_id = sctp_assoc2id(asoc); + areset->assocreset_local_tsn = local_tsn; + areset->assocreset_remote_tsn = remote_tsn; + + return event; +} + +struct sctp_ulpevent *sctp_ulpevent_make_stream_change_event( + const struct sctp_association *asoc, __u16 flags, + __u32 strchange_instrms, __u32 strchange_outstrms, gfp_t gfp) +{ + struct sctp_stream_change_event *schange; + struct sctp_ulpevent *event; + struct sk_buff *skb; + + event = sctp_ulpevent_new(sizeof(struct sctp_stream_change_event), + MSG_NOTIFICATION, gfp); + if (!event) + return NULL; + + skb = sctp_event2skb(event); + schange = (struct sctp_stream_change_event *) + skb_put(skb, sizeof(struct sctp_stream_change_event)); + + schange->strchange_type = SCTP_STREAM_CHANGE_EVENT; + schange->strchange_flags = flags; + schange->strchange_length = sizeof(struct sctp_stream_change_event); + sctp_ulpevent_set_owner(event, asoc); + schange->strchange_assoc_id = sctp_assoc2id(asoc); + schange->strchange_instrms = strchange_instrms; + schange->strchange_outstrms = strchange_outstrms; + + return event; +} + /* Return the notification type, assuming this is a notification * event. */ diff --git a/net/smc/Kconfig b/net/smc/Kconfig index c717ef0896aa2accaee05e3cf4c10d066c61eeb3..33954852f3f89b3bd05595a159e5c2ed56d074ad 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -8,6 +8,10 @@ config SMC The Linux implementation of the SMC-R solution is designed as a separate socket family SMC. + Warning: SMC will expose all memory for remote reads and writes + once a connection is established. Don't enable this option except + for tightly controlled lab environment. + Select this option if you want to run SMC socket applications config SMC_DIAG diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 093803786eacf3388dc470a04c02293f60e4102e..6793d7348cc811f41395c5ca3a9d1f1390813646 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -101,7 +101,7 @@ struct proto smc_proto = { .unhash = smc_unhash_sk, .obj_size = sizeof(struct smc_sock), .h.smc_hash = &smc_v4_hashinfo, - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, }; EXPORT_SYMBOL_GPL(smc_proto); @@ -147,7 +147,6 @@ static int smc_release(struct socket *sock) schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); } - sk->sk_prot->unhash(sk); release_sock(sk); sock_put(sk); @@ -451,6 +450,9 @@ static int smc_connect_rdma(struct smc_sock *smc) goto decline_rdma_unlock; } + smc_close_init(smc); + smc_rx_init(smc); + if (local_contact == SMC_FIRST_CONTACT) { rc = smc_ib_ready_link(link); if (rc) { @@ -477,7 +479,6 @@ static int smc_connect_rdma(struct smc_sock *smc) mutex_unlock(&smc_create_lgr_pending); smc_tx_init(smc); - smc_rx_init(smc); out_connected: smc_copy_sock_settings_to_clc(smc); @@ -637,7 +638,8 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { - /* tbd in follow-on patch: close this sock */ + new_sk->sk_prot->unhash(new_sk); + sock_put(new_sk); continue; } if (new_sock) @@ -657,8 +659,13 @@ void smc_close_non_accepted(struct sock *sk) if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; - if (!smc->use_fallback) + if (smc->use_fallback) { + sk->sk_state = SMC_CLOSED; + } else { smc_close_active(smc); + sock_set_flag(sk, SOCK_DEAD); + sk->sk_shutdown |= SHUTDOWN_MASK; + } if (smc->clcsock) { struct socket *tcp; @@ -666,11 +673,9 @@ void smc_close_non_accepted(struct sock *sk) smc->clcsock = NULL; sock_release(tcp); } - sock_set_flag(sk, SOCK_DEAD); - sk->sk_shutdown |= SHUTDOWN_MASK; if (smc->use_fallback) { schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else { + } else if (sk->sk_state == SMC_CLOSED) { smc_conn_free(&smc->conn); schedule_delayed_work(&smc->sock_put_work, SMC_CLOSE_SOCK_PUT_DELAY); @@ -800,6 +805,9 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } + smc_close_init(new_smc); + smc_rx_init(new_smc); + rc = smc_clc_send_accept(new_smc, local_contact); if (rc) goto out_err; @@ -839,7 +847,6 @@ static void smc_listen_work(struct work_struct *work) } smc_tx_init(new_smc); - smc_rx_init(new_smc); out_connected: sk_refcnt_debug_inc(newsmcsk); diff --git a/net/smc/smc.h b/net/smc/smc.h index ee5fbea24549d2df9f55bb0ad177d548c637bb0f..6e44313e4467d01fbdc77f07ee7d14c50ab92ce2 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -164,6 +164,7 @@ struct smc_connection { #ifndef KERNEL_HAS_ATOMIC64 spinlock_t acurs_lock; /* protect cursors */ #endif + struct work_struct close_work; /* peer sent some closing */ }; struct smc_sock { /* smc sock container */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 5a339493872efd123826c31a2771485b3322f67a..a7294edbc22177d2c5b5a21deffaacea1e0855e2 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -217,8 +217,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_err = ECONNRESET; conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; } - if (smc_cdc_rxed_any_close_or_senddone(conn)) - smc_close_passive_received(smc); + if (smc_cdc_rxed_any_close_or_senddone(conn)) { + smc->sk.sk_shutdown |= RCV_SHUTDOWN; + if (smc->clcsock && smc->clcsock->sk) + smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; + sock_set_flag(&smc->sk, SOCK_DONE); + schedule_work(&conn->close_work); + } /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ @@ -228,8 +233,6 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc_close_wake_tx_prepared(smc); } - /* subsequent patch: trigger socket release if connection closed */ - /* socket connected but not accepted */ if (!smc->sk.sk_socket) return; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index e41f594a1e1d0c3d47706e4c80f9de587f953c9b..03ec058d18df642ef7e219000a62e51bc6c0e6fe 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); @@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 67a71d170bedb4be2658cfa5f7a654098da4962f..3c2e166b5d222f4c932179ae442cbd88969efc92 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -117,7 +117,6 @@ void smc_close_active_abort(struct smc_sock *smc) struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - bh_lock_sock(&smc->sk); smc->sk.sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { smc->clcsock->sk->sk_err = ECONNABORTED; @@ -125,6 +124,7 @@ void smc_close_active_abort(struct smc_sock *smc) } switch (smc->sk.sk_state) { case SMC_INIT: + case SMC_ACTIVE: smc->sk.sk_state = SMC_PEERABORTWAIT; break; case SMC_APPCLOSEWAIT1: @@ -161,10 +161,15 @@ void smc_close_active_abort(struct smc_sock *smc) } sock_set_flag(&smc->sk, SOCK_DEAD); - bh_unlock_sock(&smc->sk); smc->sk.sk_state_change(&smc->sk); } +static inline bool smc_close_sent_any_close(struct smc_connection *conn) +{ + return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || + conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; +} + int smc_close_active(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = @@ -185,8 +190,7 @@ int smc_close_active(struct smc_sock *smc) case SMC_INIT: sk->sk_state = SMC_CLOSED; if (smc->smc_listen_work.func) - flush_work(&smc->smc_listen_work); - sock_put(sk); + cancel_work_sync(&smc->smc_listen_work); break; case SMC_LISTEN: sk->sk_state = SMC_CLOSED; @@ -198,7 +202,7 @@ int smc_close_active(struct smc_sock *smc) } release_sock(sk); smc_close_cleanup_listen(sk); - flush_work(&smc->tcp_listen_work); + cancel_work_sync(&smc->smc_listen_work); lock_sock(sk); break; case SMC_ACTIVE: @@ -218,7 +222,7 @@ int smc_close_active(struct smc_sock *smc) case SMC_APPFINCLOSEWAIT: /* socket already shutdown wr or both (active close) */ if (txflags->peer_done_writing && - !txflags->peer_conn_closed) { + !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); } @@ -248,6 +252,13 @@ int smc_close_active(struct smc_sock *smc) break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: + if (txflags->peer_done_writing && + !smc_close_sent_any_close(conn)) { + /* just shutdown wr done, send close request */ + rc = smc_close_final(conn); + } + /* peer sending PeerConnectionClosed will cause transition */ + break; case SMC_PEERFINCLOSEWAIT: /* peer sending PeerConnectionClosed will cause transition */ break; @@ -285,7 +296,7 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (txflags->peer_done_writing && - !txflags->peer_conn_closed) { + !smc_close_sent_any_close(&smc->conn)) { /* just shutdown, but not yet closed locally */ smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; @@ -306,22 +317,27 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, * or peer_done_writing. - * Called under tasklet context. */ -void smc_close_passive_received(struct smc_sock *smc) +static void smc_close_passive_work(struct work_struct *work) { - struct smc_cdc_conn_state_flags *rxflags = - &smc->conn.local_rx_ctrl.conn_state_flags; + struct smc_connection *conn = container_of(work, + struct smc_connection, + close_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + struct smc_cdc_conn_state_flags *rxflags; struct sock *sk = &smc->sk; int old_state; - sk->sk_shutdown |= RCV_SHUTDOWN; - if (smc->clcsock && smc->clcsock->sk) - smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; - sock_set_flag(&smc->sk, SOCK_DONE); - + lock_sock(&smc->sk); old_state = sk->sk_state; + if (!conn->alert_token_local) { + /* abnormal termination */ + smc_close_active_abort(smc); + goto wakeup; + } + + rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { smc_close_passive_abort_received(smc); goto wakeup; @@ -331,7 +347,7 @@ void smc_close_passive_received(struct smc_sock *smc) case SMC_INIT: if (atomic_read(&smc->conn.bytes_to_rcv) || (rxflags->peer_done_writing && - !rxflags->peer_conn_closed)) + !smc_cdc_rxed_any_close(conn))) sk->sk_state = SMC_APPCLOSEWAIT1; else sk->sk_state = SMC_CLOSED; @@ -348,7 +364,7 @@ void smc_close_passive_received(struct smc_sock *smc) if (!smc_cdc_rxed_any_close(&smc->conn)) break; if (sock_flag(sk, SOCK_DEAD) && - (sk->sk_shutdown == SHUTDOWN_MASK)) { + smc_close_sent_any_close(conn)) { /* smc_release has already been called locally */ sk->sk_state = SMC_CLOSED; } else { @@ -367,17 +383,19 @@ void smc_close_passive_received(struct smc_sock *smc) } wakeup: - if (old_state != sk->sk_state) - sk->sk_state_change(sk); sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ - if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); + if (old_state != sk->sk_state) { + sk->sk_state_change(sk); + if ((sk->sk_state == SMC_CLOSED) && + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { + smc_conn_free(&smc->conn); + schedule_delayed_work(&smc->sock_put_work, + SMC_CLOSE_SOCK_PUT_DELAY); + } } + release_sock(&smc->sk); } void smc_close_sock_put_work(struct work_struct *work) @@ -442,3 +460,9 @@ int smc_close_shutdown_write(struct smc_sock *smc) sk->sk_state_change(&smc->sk); return rc; } + +/* Initialize close properties on connection establishment. */ +void smc_close_init(struct smc_sock *smc) +{ + INIT_WORK(&smc->conn.close_work, smc_close_passive_work); +} diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index bc9a2df3633cee576b15c0a9d715d1bdf6753957..4a3d99a8d7cbf18598ef74dae61e2be2a0a17b48 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -21,8 +21,8 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); void smc_close_active_abort(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); -void smc_close_passive_received(struct smc_sock *smc); void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); +void smc_close_init(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 0eac633fb3549f6e715f25145b16e22b2bc49241..3ac09a629ea1a4c38c6bc21d996a69611c1633b7 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -316,7 +316,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); __smc_lgr_unregister_conn(conn); - smc_close_active_abort(smc); + schedule_work(&conn->close_work); sock_put(&smc->sk); node = rb_first(&lgr->conns_all); } @@ -613,19 +613,8 @@ int smc_rmb_create(struct smc_sock *smc) rmb_desc = NULL; continue; /* if mapping failed, try smaller one */ } - rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd, - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE, - &rmb_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) { - smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, rmb_desc, - DMA_FROM_DEVICE); - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); - rmb_desc = NULL; - continue; - } + rmb_desc->rkey[SMC_SINGLE_LINK] = + lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; rmb_desc->used = 1; write_lock_bh(&lgr->rmbs_lock); list_add(&rmb_desc->list, @@ -668,6 +657,7 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && + (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && test_bit(i, lgr->rtokens_used_mask)) { conn->rtoken_idx = i; return 0; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 27eb38056a27fb07e3ce72afbacaffaf4ae55f73..b013cb43a327ea81cd8bc9e4a5ffff733f87f6a7 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -93,7 +93,7 @@ struct smc_buf_desc { u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; /* mapped address of buffer */ void *cpu_addr; /* virtual address of buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + u32 rkey[SMC_LINKS_PER_LGR_MAX]; /* for rmb only: * rkey provided to peer */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e6743c008ac548259447daa0e0c207aa7f249c63..b31715505a358cd4e73d1af6c58a49ef7ef8ecbe 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -37,24 +37,6 @@ u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET; /* unique system * identifier */ -int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct ib_mr **mr) -{ - int rc; - - if (*mr) - return 0; /* already done */ - - /* obtain unique key - - * next invocation of get_dma_mr returns a different key! - */ - *mr = pd->device->get_dma_mr(pd, access_flags); - rc = PTR_ERR_OR_ZERO(*mr); - if (IS_ERR(*mr)) - *mr = NULL; - return rc; -} - static int smc_ib_modify_qp_init(struct smc_link *lnk) { struct ib_qp_attr qp_attr; @@ -80,12 +62,11 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk) memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IB_QPS_RTR; qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); - qp_attr.ah_attr.port_num = lnk->ibport; - qp_attr.ah_attr.ah_flags = IB_AH_GRH; - qp_attr.ah_attr.grh.hop_limit = 1; - memcpy(&qp_attr.ah_attr.grh.dgid, lnk->peer_gid, - sizeof(lnk->peer_gid)); - memcpy(&qp_attr.ah_attr.dmac, lnk->peer_mac, + qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; + rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); + rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0); + rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); + memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, sizeof(lnk->peer_mac)); qp_attr.dest_qp_num = lnk->peer_qpn; qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */ @@ -179,8 +160,6 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, u8 port_idx; smcibdev = container_of(handler, struct smc_ib_device, event_handler); - if (!smc_pnet_find_ib(smcibdev->ibdev->name)) - return; switch (ibevent->event) { case IB_EVENT_PORT_ERR: @@ -213,7 +192,8 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; - lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); + lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, + IB_PD_UNSAFE_GLOBAL_RKEY); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; @@ -259,7 +239,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = 1, - .max_inline_data = SMC_WR_TX_SIZE, }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index a95f74bb556915f92d0fa1bdbd2e34eb9814c3b1..b567152a526d48c86110a9574833e8610684af4b 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -11,6 +11,7 @@ #ifndef _SMC_IB_H #define _SMC_IB_H +#include #include #include @@ -60,8 +61,6 @@ void smc_ib_dealloc_protection_domain(struct smc_link *lnk); int smc_ib_create_protection_domain(struct smc_link *lnk); void smc_ib_destroy_queue_pair(struct smc_link *lnk); int smc_ib_create_queue_pair(struct smc_link *lnk); -int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct ib_mr **mr); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 9d3e7fb8348d3efae50515446591642815697b45..78f7af28ae4f25d71469d54d44f98ef4e2df94eb 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -219,7 +219,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) } /* Find an infiniband device by a given name. The device might not exist. */ -struct smc_ib_device *smc_pnet_find_ib(char *ib_name) +static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) { struct smc_ib_device *ibdev; @@ -523,8 +523,11 @@ void smc_pnet_find_roce_resource(struct sock *sk, read_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { if (dst->dev == pnetelem->ndev) { - *smcibdev = pnetelem->smcibdev; - *ibport = pnetelem->ib_port; + if (smc_ib_port_active(pnetelem->smcibdev, + pnetelem->ib_port)) { + *smcibdev = pnetelem->smcibdev; + *ibport = pnetelem->ib_port; + } break; } } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 32ab3df928ca183643fab96ffd8c09667a54fbde..c4f1bccd43589c0d2d2591925dfa3053b7fa4aef 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -16,7 +16,6 @@ struct smc_ib_device; int smc_pnet_init(void) __init; void smc_pnet_exit(void); int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); -struct smc_ib_device *smc_pnet_find_ib(char *ib_name); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_ib_device **smcibdev, u8 *ibport); diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index c4ef9a4ec56971e685d419a4a89f51e8181709c1..f0c8b089f770a229b7c805fe5b2d59da40a5c1e3 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -36,11 +36,10 @@ static void smc_rx_data_ready(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); - else - sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 69a0013dd25cecbee0658168da577d15ed8913d8..21ec1832ab517d647ac8942ddbd5484492eb4c94 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -431,9 +431,13 @@ static void smc_tx_work(struct work_struct *work) struct smc_connection, tx_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + int rc; lock_sock(&smc->sk); - smc_tx_sndbuf_nonempty(conn); + rc = smc_tx_sndbuf_nonempty(conn); + if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && + !atomic_read(&conn->bytes_to_rcv)) + conn->local_rx_ctrl.prod_flags.write_blocked = 0; release_sock(&smc->sk); } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index eadf157418dcd42c036685e5858bea6e5c559506..874ee9f9d79674f9576c28f9d1dd669e03e62422 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -447,7 +447,7 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_ibs[i].num_sge = 1; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = - IB_SEND_SIGNALED | IB_SEND_SOLICITED | IB_SEND_INLINE; + IB_SEND_SIGNALED | IB_SEND_SOLICITED; } for (i = 0; i < lnk->wr_rx_cnt; i++) { lnk->wr_rx_sges[i].addr = diff --git a/net/socket.c b/net/socket.c index 985ef06792d6e54c69d296f3e15baf89be972f9c..c2564eb25c6b8faf5c99504ef1d3c90f9bb57c73 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); + +/* This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. Assumes that the caller has a lock on the socket. + */ +u32 kernel_sock_ip_overhead(struct sock *sk) +{ + struct inet_sock *inet; + struct ip_options_rcu *opt; + u32 overhead = 0; + bool owned_by_user; +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6_pinfo *np; + struct ipv6_txoptions *optv6 = NULL; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + + if (!sk) + return overhead; + + owned_by_user = sock_owned_by_user(sk); + switch (sk->sk_family) { + case AF_INET: + inet = inet_sk(sk); + overhead += sizeof(struct iphdr); + opt = rcu_dereference_protected(inet->inet_opt, + owned_by_user); + if (opt) + overhead += opt->opt.optlen; + return overhead; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + np = inet6_sk(sk); + overhead += sizeof(struct ipv6hdr); + if (np) + optv6 = rcu_dereference_protected(np->opt, + owned_by_user); + if (optv6) + overhead += (optv6->opt_flen + optv6->opt_nflen); + return overhead; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */ + return overhead; + } +} +EXPORT_SYMBOL(kernel_sock_ip_overhead); diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 04ce2c0b660e0d381a22038bb463d46312befaf5..ac09ca8032965bfd4280fb3f6d3410c08cbda243 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -52,6 +52,7 @@ config SUNRPC_XPRT_RDMA tristate "RPC-over-RDMA transport" depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS default SUNRPC && INFINIBAND + select SG_POOL help This option allows the NFS client and server to use RDMA transports (InfiniBand, iWARP, or RoCE). diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 52da3ce54bb53c8434f0acc276c0003e385486e7..b5cb921775a0b20358f590b69b4cdfd0609a1acb 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1042,8 +1042,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) struct rpc_task *task; task = rpc_new_task(task_setup_data); - if (IS_ERR(task)) - goto out; rpc_task_set_client(task, task_setup_data->rpc_client); rpc_task_set_rpc_message(task, task_setup_data->rpc_message); @@ -1053,7 +1051,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) atomic_inc(&task->tk_count); rpc_execute(task); -out: return task; } EXPORT_SYMBOL_GPL(rpc_run_task); @@ -1140,10 +1137,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) * Create an rpc_task to send the data */ task = rpc_new_task(&task_setup_data); - if (IS_ERR(task)) { - xprt_free_bc_request(req); - goto out; - } task->tk_rqstp = req; /* @@ -1158,7 +1151,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); rpc_execute(task); -out: dprintk("RPC: rpc_run_bc_task: task= %p\n", task); return task; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5db68b371db2cac54e37cc0a883259dbc61125cb..0cc83839c13c3621a70a4b9e8ded773217a2edf3 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -965,11 +965,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) if (task == NULL) { task = rpc_alloc_task(); - if (task == NULL) { - rpc_release_calldata(setup_data->callback_ops, - setup_data->callback_data); - return ERR_PTR(-ENOMEM); - } flags = RPC_TASK_DYNAMIC; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a08aeb56b8e457d56285558048e4a41ab00b03c9..bc0f5a0ecbdce2a5203bf707ebfba8c99821d0f7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -702,59 +702,32 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) return task; } -/* - * Create or destroy enough new threads to make the number - * of threads the given number. If `pool' is non-NULL, applies - * only to threads in that pool, otherwise round-robins between - * all pools. Caller must ensure that mutual exclusion between this and - * server startup or shutdown. - * - * Destroying threads relies on the service threads filling in - * rqstp->rq_task, which only the nfs ones do. Assumes the serv - * has been created using svc_create_pooled(). - * - * Based on code that used to be in nfsd_svc() but tweaked - * to be pool-aware. - */ -int -svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +/* create new threads */ +static int +svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { struct svc_rqst *rqstp; struct task_struct *task; struct svc_pool *chosen_pool; - int error = 0; unsigned int state = serv->sv_nrthreads-1; int node; - if (pool == NULL) { - /* The -1 assumes caller has done a svc_get() */ - nrservs -= (serv->sv_nrthreads-1); - } else { - spin_lock_bh(&pool->sp_lock); - nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } - - /* create new threads */ - while (nrservs > 0) { + do { nrservs--; chosen_pool = choose_pool(serv, pool, &state); node = svc_pool_map_get_node(chosen_pool->sp_id); rqstp = svc_prepare_thread(serv, chosen_pool, node); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); - break; - } + if (IS_ERR(rqstp)) + return PTR_ERR(rqstp); __module_get(serv->sv_ops->svo_module); task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { - error = PTR_ERR(task); module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); - break; + return PTR_ERR(task); } rqstp->rq_task = task; @@ -763,18 +736,103 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) svc_sock_update_bufs(serv); wake_up_process(task); - } + } while (nrservs > 0); + + return 0; +} + + +/* destroy old threads */ +static int +svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + /* destroy old threads */ - while (nrservs < 0 && - (task = choose_victim(serv, pool, &state)) != NULL) { + do { + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; send_sig(SIGINT, task, 1); nrservs++; + } while (nrservs < 0); + + return 0; +} + +/* + * Create or destroy enough new threads to make the number + * of threads the given number. If `pool' is non-NULL, applies + * only to threads in that pool, otherwise round-robins between + * all pools. Caller must ensure that mutual exclusion between this and + * server startup or shutdown. + * + * Destroying threads relies on the service threads filling in + * rqstp->rq_task, which only the nfs ones do. Assumes the serv + * has been created using svc_create_pooled(). + * + * Based on code that used to be in nfsd_svc() but tweaked + * to be pool-aware. + */ +int +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + if (pool == NULL) { + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); + } else { + spin_lock_bh(&pool->sp_lock); + nrservs -= pool->sp_nrthreads; + spin_unlock_bh(&pool->sp_lock); } - return error; + if (nrservs > 0) + return svc_start_kthreads(serv, pool, nrservs); + if (nrservs < 0) + return svc_signal_kthreads(serv, pool, nrservs); + return 0; } EXPORT_SYMBOL_GPL(svc_set_num_threads); +/* destroy old threads */ +static int +svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + + /* destroy old threads */ + do { + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; + kthread_stop(task); + nrservs++; + } while (nrservs < 0); + return 0; +} + +int +svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + if (pool == NULL) { + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); + } else { + spin_lock_bh(&pool->sp_lock); + nrservs -= pool->sp_nrthreads; + spin_unlock_bh(&pool->sp_lock); + } + + if (nrservs > 0) + return svc_start_kthreads(serv, pool, nrservs); + if (nrservs < 0) + return svc_stop_kthreads(serv, pool, nrservs); + return 0; +} +EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); + /* * Called from a server thread as it's exiting. Caller must hold the "service * mutex" for the service. diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1f7082144e0168070a9e8ed703ca044f9700b8dc..e34f4ee7f2b6cecdf404daab0d3aab363d94ba01 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) EXPORT_SYMBOL_GPL(xdr_init_decode); /** - * xdr_init_decode - Initialize an xdr_stream for decoding data. + * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages * @xdr: pointer to xdr_stream struct * @buf: pointer to XDR buffer from which to decode data * @pages: list of pages to decode into diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b530a2852ba87ac042baec67bf64a3e57e1232b5..3e63c5e97ebe67875f98836eae254f7d3c3460ba 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } +EXPORT_SYMBOL_GPL(xprt_force_disconnect); /** * xprt_conditional_disconnect - force a transport to disconnect diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index ef19fa42c50ff2e15ecdee7d87f1207ffc3c445c..c1ae8142ab734739c8631e537f0c1157e8e06a60 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -4,5 +4,5 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \ fmr_ops.o frwr_ops.o \ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ - module.o + svc_rdma_rw.o module.o rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 24fedd4b117e8f61cf500f629f2b47f2bc53e76f..03f6b5840764dcc2c486015edd8dc7de4cd84b26 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -119,11 +119,9 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) for (i = 0; i < (reqs << 1); i++) { rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); - if (!rqst) { - pr_err("RPC: %s: Failed to create bc rpc_rqst\n", - __func__); + if (!rqst) goto out_free; - } + dprintk("RPC: %s: new rqst %p\n", __func__, rqst); rqst->rq_xprt = &r_xprt->rx_xprt; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a044be2d6ad726eccfbd991038086aefc746df47..694e9b13ecf07722848d86345589ad4793474fb5 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, } sge->length = len; - ib_dma_sync_single_for_device(ia->ri_device, sge->addr, + ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, DMA_TO_DEVICE); req->rl_send_wr.num_sge++; return true; @@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, sge[sge_no].addr = rdmab_addr(rb); sge[sge_no].length = xdr->head[0].iov_len; sge[sge_no].lkey = rdmab_lkey(rb); - ib_dma_sync_single_for_device(device, sge[sge_no].addr, + ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr, sge[sge_no].length, DMA_TO_DEVICE); /* If there is a Read chunk, the page list is being handled @@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) return 0; out_err: - pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", - PTR_ERR(iptr)); - r_xprt->rx_stats.failed_marshal_count++; + if (PTR_ERR(iptr) != -ENOBUFS) { + pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", + PTR_ERR(iptr)); + r_xprt->rx_stats.failed_marshal_count++; + } return PTR_ERR(iptr); } diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index c846ca9f1ebaa661f1e4a93893752950d35a4b6a..a4a8f6989ee747a1a5046e1e5cff92a0ca167775 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -58,9 +58,9 @@ unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS; static unsigned int min_max_requests = 4; static unsigned int max_max_requests = 16384; -unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; -static unsigned int min_max_inline = 4096; -static unsigned int max_max_inline = 65536; +unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH; +static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH; +static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH; atomic_t rdma_stat_recv; atomic_t rdma_stat_read; @@ -247,8 +247,6 @@ int svc_rdma_init(void) dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); dprintk("\tmax_requests : %u\n", svcrdma_max_requests); - dprintk("\tsq_depth : %u\n", - svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index ff1df40f0d261bc956f1af3410d8780f4c582b83..c676ed0efb5af2cceb6ed59e238a0f7a0109f916 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -12,7 +12,17 @@ #undef SVCRDMA_BACKCHANNEL_DEBUG -int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, +/** + * svc_rdma_handle_bc_reply - Process incoming backchannel reply + * @xprt: controlling backchannel transport + * @rdma_resp: pointer to incoming transport header + * @rcvbuf: XDR buffer into which to decode the reply + * + * Returns: + * %0 if @rcvbuf is filled in, xprt_complete_rqst called, + * %-EAGAIN if server should call ->recvfrom again. + */ +int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, struct xdr_buf *rcvbuf) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); @@ -27,13 +37,13 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, p = (__be32 *)src->iov_base; len = src->iov_len; - xid = rmsgp->rm_xid; + xid = *rdma_resp; #ifdef SVCRDMA_BACKCHANNEL_DEBUG pr_info("%s: xid=%08x, length=%zu\n", __func__, be32_to_cpu(xid), len); pr_info("%s: RPC/RDMA: %*ph\n", - __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp); + __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp); pr_info("%s: RPC: %*ph\n", __func__, (int)len, p); #endif @@ -53,7 +63,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, goto out_unlock; memcpy(dst->iov_base, p, len); - credits = be32_to_cpu(rmsgp->rm_credit); + credits = be32_to_cpup(rdma_resp + 2); if (credits == 0) credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_buf.rb_bc_max_requests) @@ -90,9 +100,9 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, * Caller holds the connection's mutex and has already marshaled * the RPC/RDMA request. * - * This is similar to svc_rdma_reply, but takes an rpc_rqst - * instead, does not support chunks, and avoids blocking memory - * allocation. + * This is similar to svc_rdma_send_reply_msg, but takes a struct + * rpc_rqst instead, does not support chunks, and avoids blocking + * memory allocation. * * XXX: There is still an opportunity to block in svc_rdma_send() * if there are no SQ entries to post the Send. This may occur if @@ -101,59 +111,36 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) { - struct xdr_buf *sndbuf = &rqst->rq_snd_buf; struct svc_rdma_op_ctxt *ctxt; - struct svc_rdma_req_map *vec; - struct ib_send_wr send_wr; int ret; - vec = svc_rdma_get_req_map(rdma); - ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false); - if (ret) + ctxt = svc_rdma_get_context(rdma); + + /* rpcrdma_bc_send_request builds the transport header and + * the backchannel RPC message in the same buffer. Thus only + * one SGE is needed to send both. + */ + ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer, + rqst->rq_snd_buf.len); + if (ret < 0) goto out_err; ret = svc_rdma_repost_recv(rdma, GFP_NOIO); if (ret) goto out_err; - ctxt = svc_rdma_get_context(rdma); - ctxt->pages[0] = virt_to_page(rqst->rq_buffer); - ctxt->count = 1; - - ctxt->direction = DMA_TO_DEVICE; - ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; - ctxt->sge[0].length = sndbuf->len; - ctxt->sge[0].addr = - ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, - sndbuf->len, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { - ret = -EIO; - goto out_unmap; - } - svc_rdma_count_mappings(rdma, ctxt); - - memset(&send_wr, 0, sizeof(send_wr)); - ctxt->cqe.done = svc_rdma_wc_send; - send_wr.wr_cqe = &ctxt->cqe; - send_wr.sg_list = ctxt->sge; - send_wr.num_sge = 1; - send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; - - ret = svc_rdma_send(rdma, &send_wr); - if (ret) { - ret = -EIO; + ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); + if (ret) goto out_unmap; - } out_err: - svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: %s returns %d\n", __func__, ret); return ret; out_unmap: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); + ret = -EIO; goto out_err; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 1c4aabf0f65772c13265421262feb030ab4a58ca..bdcf7d85a3dc098e63ca3ff77309496262b39b76 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -166,92 +166,3 @@ int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) dprintk("svcrdma: failed to parse transport header\n"); return -EINVAL; } - -int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, - struct rpcrdma_msg *rmsgp, - enum rpcrdma_errcode err, __be32 *va) -{ - __be32 *startp = va; - - *va++ = rmsgp->rm_xid; - *va++ = rmsgp->rm_vers; - *va++ = xprt->sc_fc_credits; - *va++ = rdma_error; - *va++ = cpu_to_be32(err); - if (err == ERR_VERS) { - *va++ = rpcrdma_version; - *va++ = rpcrdma_version; - } - - return (int)((unsigned long)va - (unsigned long)startp); -} - -/** - * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header - * @rdma_resp: buffer containing Reply transport header - * - * Returns length of transport header, in bytes. - */ -unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp) -{ - unsigned int nsegs; - __be32 *p; - - p = rdma_resp; - - /* RPC-over-RDMA V1 replies never have a Read list. */ - p += rpcrdma_fixed_maxsz + 1; - - /* Skip Write list. */ - while (*p++ != xdr_zero) { - nsegs = be32_to_cpup(p++); - p += nsegs * rpcrdma_segment_maxsz; - } - - /* Skip Reply chunk. */ - if (*p++ != xdr_zero) { - nsegs = be32_to_cpup(p++); - p += nsegs * rpcrdma_segment_maxsz; - } - - return (unsigned long)p - (unsigned long)rdma_resp; -} - -void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) -{ - struct rpcrdma_write_array *ary; - - /* no read-list */ - rmsgp->rm_body.rm_chunks[0] = xdr_zero; - - /* write-array discrim */ - ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[1]; - ary->wc_discrim = xdr_one; - ary->wc_nchunks = cpu_to_be32(chunks); - - /* write-list terminator */ - ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; - - /* reply-array discriminator */ - ary->wc_array[chunks].wc_target.rs_length = xdr_zero; -} - -void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, - int chunks) -{ - ary->wc_discrim = xdr_one; - ary->wc_nchunks = cpu_to_be32(chunks); -} - -void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, - int chunk_no, - __be32 rs_handle, - __be64 rs_offset, - u32 write_len) -{ - struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; - seg->rs_handle = rs_handle; - seg->rs_offset = rs_offset; - seg->rs_length = cpu_to_be32(write_len); -} diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index f7b2daf72a86582807798379ac3be336b061a958..27a99bf5b1a6f669be74f86347c7a16e3ce213fe 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -558,33 +558,85 @@ static void rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_arg.buflen = head->arg.buflen; } +static void svc_rdma_send_error(struct svcxprt_rdma *xprt, + __be32 *rdma_argp, int status) +{ + struct svc_rdma_op_ctxt *ctxt; + __be32 *p, *err_msgp; + unsigned int length; + struct page *page; + int ret; + + ret = svc_rdma_repost_recv(xprt, GFP_KERNEL); + if (ret) + return; + + page = alloc_page(GFP_KERNEL); + if (!page) + return; + err_msgp = page_address(page); + + p = err_msgp; + *p++ = *rdma_argp; + *p++ = *(rdma_argp + 1); + *p++ = xprt->sc_fc_credits; + *p++ = rdma_error; + if (status == -EPROTONOSUPPORT) { + *p++ = err_vers; + *p++ = rpcrdma_version; + *p++ = rpcrdma_version; + } else { + *p++ = err_chunk; + } + length = (unsigned long)p - (unsigned long)err_msgp; + + /* Map transport header; no RPC message payload */ + ctxt = svc_rdma_get_context(xprt); + ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length); + if (ret) { + dprintk("svcrdma: Error %d mapping send for protocol error\n", + ret); + return; + } + + ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0); + if (ret) { + dprintk("svcrdma: Error %d posting send for protocol error\n", + ret); + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 1); + } +} + /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes * the RPC/RDMA header small and fixed in size, so it is * straightforward to check the RPC header's direction field. */ -static bool -svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp) +static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, + __be32 *rdma_resp) { - __be32 *p = (__be32 *)rmsgp; + __be32 *p; if (!xprt->xpt_bc_xprt) return false; - if (rmsgp->rm_type != rdma_msg) + p = rdma_resp + 3; + if (*p++ != rdma_msg) return false; - if (rmsgp->rm_body.rm_chunks[0] != xdr_zero) + + if (*p++ != xdr_zero) return false; - if (rmsgp->rm_body.rm_chunks[1] != xdr_zero) + if (*p++ != xdr_zero) return false; - if (rmsgp->rm_body.rm_chunks[2] != xdr_zero) + if (*p++ != xdr_zero) return false; - /* sanity */ - if (p[7] != rmsgp->rm_xid) + /* XID sanity */ + if (*p++ != *rdma_resp) return false; /* call direction */ - if (p[8] == cpu_to_be32(RPC_CALL)) + if (*p == cpu_to_be32(RPC_CALL)) return false; return true; @@ -650,8 +702,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_drop; rqstp->rq_xprt_hlen = ret; - if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { - ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, + if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) { + ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, + &rmsgp->rm_xid, &rqstp->rq_arg); svc_rdma_put_context(ctxt, 0); if (ret) @@ -686,7 +739,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) return ret; out_err: - svc_rdma_send_error(rdma_xprt, rmsgp, ret); + svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret); svc_rdma_put_context(ctxt, 0); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c new file mode 100644 index 0000000000000000000000000000000000000000..0cf6202776933bdd227a2a332c77835efe2228bd --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2016 Oracle. All rights reserved. + * + * Use the core R/W API to move RPC-over-RDMA Read and Write chunks. + */ + +#include +#include +#include + +#include + +#define RPCDBG_FACILITY RPCDBG_SVCXPRT + +/* Each R/W context contains state for one chain of RDMA Read or + * Write Work Requests. + * + * Each WR chain handles a single contiguous server-side buffer, + * because scatterlist entries after the first have to start on + * page alignment. xdr_buf iovecs cannot guarantee alignment. + * + * Each WR chain handles only one R_key. Each RPC-over-RDMA segment + * from a client may contain a unique R_key, so each WR chain moves + * up to one segment at a time. + * + * The scatterlist makes this data structure over 4KB in size. To + * make it less likely to fail, and to handle the allocation for + * smaller I/O requests without disabling bottom-halves, these + * contexts are created on demand, but cached and reused until the + * controlling svcxprt_rdma is destroyed. + */ +struct svc_rdma_rw_ctxt { + struct list_head rw_list; + struct rdma_rw_ctx rw_ctx; + int rw_nents; + struct sg_table rw_sg_table; + struct scatterlist rw_first_sgl[0]; +}; + +static inline struct svc_rdma_rw_ctxt * +svc_rdma_next_ctxt(struct list_head *list) +{ + return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt, + rw_list); +} + +static struct svc_rdma_rw_ctxt * +svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) +{ + struct svc_rdma_rw_ctxt *ctxt; + + spin_lock(&rdma->sc_rw_ctxt_lock); + + ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts); + if (ctxt) { + list_del(&ctxt->rw_list); + spin_unlock(&rdma->sc_rw_ctxt_lock); + } else { + spin_unlock(&rdma->sc_rw_ctxt_lock); + ctxt = kmalloc(sizeof(*ctxt) + + SG_CHUNK_SIZE * sizeof(struct scatterlist), + GFP_KERNEL); + if (!ctxt) + goto out; + INIT_LIST_HEAD(&ctxt->rw_list); + } + + ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; + if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, + ctxt->rw_sg_table.sgl)) { + kfree(ctxt); + ctxt = NULL; + } +out: + return ctxt; +} + +static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, + struct svc_rdma_rw_ctxt *ctxt) +{ + sg_free_table_chained(&ctxt->rw_sg_table, true); + + spin_lock(&rdma->sc_rw_ctxt_lock); + list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts); + spin_unlock(&rdma->sc_rw_ctxt_lock); +} + +/** + * svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts + * @rdma: transport about to be destroyed + * + */ +void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma) +{ + struct svc_rdma_rw_ctxt *ctxt; + + while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) { + list_del(&ctxt->rw_list); + kfree(ctxt); + } +} + +/* A chunk context tracks all I/O for moving one Read or Write + * chunk. This is a a set of rdma_rw's that handle data movement + * for all segments of one chunk. + * + * These are small, acquired with a single allocator call, and + * no more than one is needed per chunk. They are allocated on + * demand, and not cached. + */ +struct svc_rdma_chunk_ctxt { + struct ib_cqe cc_cqe; + struct svcxprt_rdma *cc_rdma; + struct list_head cc_rwctxts; + int cc_sqecount; + enum dma_data_direction cc_dir; +}; + +static void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir) +{ + cc->cc_rdma = rdma; + svc_xprt_get(&rdma->sc_xprt); + + INIT_LIST_HEAD(&cc->cc_rwctxts); + cc->cc_sqecount = 0; + cc->cc_dir = dir; +} + +static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc) +{ + struct svcxprt_rdma *rdma = cc->cc_rdma; + struct svc_rdma_rw_ctxt *ctxt; + + while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { + list_del(&ctxt->rw_list); + + rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, + rdma->sc_port_num, ctxt->rw_sg_table.sgl, + ctxt->rw_nents, cc->cc_dir); + svc_rdma_put_rw_ctxt(rdma, ctxt); + } + svc_xprt_put(&rdma->sc_xprt); +} + +/* State for sending a Write or Reply chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions + */ +struct svc_rdma_write_info { + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + unsigned int wi_nsegs; + __be32 *wi_segs; + + /* SGL constructor arguments */ + struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; +}; + +static struct svc_rdma_write_info * +svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk) +{ + struct svc_rdma_write_info *info; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return info; + + info->wi_seg_off = 0; + info->wi_seg_no = 0; + info->wi_nsegs = be32_to_cpup(++chunk); + info->wi_segs = ++chunk; + svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE); + return info; +} + +static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) +{ + svc_rdma_cc_release(&info->wi_cc); + kfree(info); +} + +/** + * svc_rdma_write_done - Write chunk completion + * @cq: controlling Completion Queue + * @wc: Work Completion + * + * Pages under I/O are freed by a subsequent Send completion. + */ +static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_cqe *cqe = wc->wr_cqe; + struct svc_rdma_chunk_ctxt *cc = + container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); + struct svcxprt_rdma *rdma = cc->cc_rdma; + struct svc_rdma_write_info *info = + container_of(cc, struct svc_rdma_write_info, wi_cc); + + atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); + wake_up(&rdma->sc_send_wait); + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("svcrdma: write ctx: %s (%u/0x%x)\n", + ib_wc_status_msg(wc->status), + wc->status, wc->vendor_err); + } + + svc_rdma_write_info_free(info); +} + +/* This function sleeps when the transport's Send Queue is congested. + * + * Assumptions: + * - If ib_post_send() succeeds, only one completion is expected, + * even if one or more WRs are flushed. This is true when posting + * an rdma_rw_ctx or when posting a single signaled WR. + */ +static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) +{ + struct svcxprt_rdma *rdma = cc->cc_rdma; + struct svc_xprt *xprt = &rdma->sc_xprt; + struct ib_send_wr *first_wr, *bad_wr; + struct list_head *tmp; + struct ib_cqe *cqe; + int ret; + + first_wr = NULL; + cqe = &cc->cc_cqe; + list_for_each(tmp, &cc->cc_rwctxts) { + struct svc_rdma_rw_ctxt *ctxt; + + ctxt = list_entry(tmp, struct svc_rdma_rw_ctxt, rw_list); + first_wr = rdma_rw_ctx_wrs(&ctxt->rw_ctx, rdma->sc_qp, + rdma->sc_port_num, cqe, first_wr); + cqe = NULL; + } + + do { + if (atomic_sub_return(cc->cc_sqecount, + &rdma->sc_sq_avail) > 0) { + ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); + if (ret) + break; + return 0; + } + + atomic_inc(&rdma_stat_sq_starve); + atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); + wait_event(rdma->sc_send_wait, + atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); + } while (1); + + pr_err("svcrdma: ib_post_send failed (%d)\n", ret); + set_bit(XPT_CLOSE, &xprt->xpt_flags); + + /* If even one was posted, there will be a completion. */ + if (bad_wr != first_wr) + return 0; + + atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); + wake_up(&rdma->sc_send_wait); + return -ENOTCONN; +} + +/* Build and DMA-map an SGL that covers one kvec in an xdr_buf + */ +static void svc_rdma_vec_to_sg(struct svc_rdma_write_info *info, + unsigned int len, + struct svc_rdma_rw_ctxt *ctxt) +{ + struct scatterlist *sg = ctxt->rw_sg_table.sgl; + + sg_set_buf(&sg[0], info->wi_base, len); + info->wi_base += len; + + ctxt->rw_nents = 1; +} + +/* Build and DMA-map an SGL that covers part of an xdr_buf's pagelist. + */ +static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info, + unsigned int remaining, + struct svc_rdma_rw_ctxt *ctxt) +{ + unsigned int sge_no, sge_bytes, page_off, page_no; + struct xdr_buf *xdr = info->wi_xdr; + struct scatterlist *sg; + struct page **page; + + page_off = (info->wi_next_off + xdr->page_base) & ~PAGE_MASK; + page_no = (info->wi_next_off + xdr->page_base) >> PAGE_SHIFT; + page = xdr->pages + page_no; + info->wi_next_off += remaining; + sg = ctxt->rw_sg_table.sgl; + sge_no = 0; + do { + sge_bytes = min_t(unsigned int, remaining, + PAGE_SIZE - page_off); + sg_set_page(sg, *page, sge_bytes, page_off); + + remaining -= sge_bytes; + sg = sg_next(sg); + page_off = 0; + sge_no++; + page++; + } while (remaining); + + ctxt->rw_nents = sge_no; +} + +/* Construct RDMA Write WRs to send a portion of an xdr_buf containing + * an RPC Reply. + */ +static int +svc_rdma_build_writes(struct svc_rdma_write_info *info, + void (*constructor)(struct svc_rdma_write_info *info, + unsigned int len, + struct svc_rdma_rw_ctxt *ctxt), + unsigned int remaining) +{ + struct svc_rdma_chunk_ctxt *cc = &info->wi_cc; + struct svcxprt_rdma *rdma = cc->cc_rdma; + struct svc_rdma_rw_ctxt *ctxt; + __be32 *seg; + int ret; + + cc->cc_cqe.done = svc_rdma_write_done; + seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz; + do { + unsigned int write_len; + u32 seg_length, seg_handle; + u64 seg_offset; + + if (info->wi_seg_no >= info->wi_nsegs) + goto out_overflow; + + seg_handle = be32_to_cpup(seg); + seg_length = be32_to_cpup(seg + 1); + xdr_decode_hyper(seg + 2, &seg_offset); + seg_offset += info->wi_seg_off; + + write_len = min(remaining, seg_length - info->wi_seg_off); + ctxt = svc_rdma_get_rw_ctxt(rdma, + (write_len >> PAGE_SHIFT) + 2); + if (!ctxt) + goto out_noctx; + + constructor(info, write_len, ctxt); + ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, + rdma->sc_port_num, ctxt->rw_sg_table.sgl, + ctxt->rw_nents, 0, seg_offset, + seg_handle, DMA_TO_DEVICE); + if (ret < 0) + goto out_initerr; + + list_add(&ctxt->rw_list, &cc->cc_rwctxts); + cc->cc_sqecount += ret; + if (write_len == seg_length - info->wi_seg_off) { + seg += 4; + info->wi_seg_no++; + info->wi_seg_off = 0; + } else { + info->wi_seg_off += write_len; + } + remaining -= write_len; + } while (remaining); + + return 0; + +out_overflow: + dprintk("svcrdma: inadequate space in Write chunk (%u)\n", + info->wi_nsegs); + return -E2BIG; + +out_noctx: + dprintk("svcrdma: no R/W ctxs available\n"); + return -ENOMEM; + +out_initerr: + svc_rdma_put_rw_ctxt(rdma, ctxt); + pr_err("svcrdma: failed to map pagelist (%d)\n", ret); + return -EIO; +} + +/* Send one of an xdr_buf's kvecs by itself. To send a Reply + * chunk, the whole RPC Reply is written back to the client. + * This function writes either the head or tail of the xdr_buf + * containing the Reply. + */ +static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info, + struct kvec *vec) +{ + info->wi_base = vec->iov_base; + return svc_rdma_build_writes(info, svc_rdma_vec_to_sg, + vec->iov_len); +} + +/* Send an xdr_buf's page list by itself. A Write chunk is + * just the page list. a Reply chunk is the head, page list, + * and tail. This function is shared between the two types + * of chunk. + */ +static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info, + struct xdr_buf *xdr) +{ + info->wi_xdr = xdr; + info->wi_next_off = 0; + return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg, + xdr->page_len); +} + +/** + * svc_rdma_send_write_chunk - Write all segments in a Write chunk + * @rdma: controlling RDMA transport + * @wr_ch: Write chunk provided by client + * @xdr: xdr_buf containing the data payload + * + * Returns a non-negative number of bytes the chunk consumed, or + * %-E2BIG if the payload was larger than the Write chunk, + * %-ENOMEM if rdma_rw context pool was exhausted, + * %-ENOTCONN if posting failed (connection is lost), + * %-EIO if rdma_rw initialization failed (DMA mapping, etc). + */ +int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, + struct xdr_buf *xdr) +{ + struct svc_rdma_write_info *info; + int ret; + + if (!xdr->page_len) + return 0; + + info = svc_rdma_write_info_alloc(rdma, wr_ch); + if (!info) + return -ENOMEM; + + ret = svc_rdma_send_xdr_pagelist(info, xdr); + if (ret < 0) + goto out_err; + + ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); + if (ret < 0) + goto out_err; + return xdr->page_len; + +out_err: + svc_rdma_write_info_free(info); + return ret; +} + +/** + * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk + * @rdma: controlling RDMA transport + * @rp_ch: Reply chunk provided by client + * @writelist: true if client provided a Write list + * @xdr: xdr_buf containing an RPC Reply + * + * Returns a non-negative number of bytes the chunk consumed, or + * %-E2BIG if the payload was larger than the Reply chunk, + * %-ENOMEM if rdma_rw context pool was exhausted, + * %-ENOTCONN if posting failed (connection is lost), + * %-EIO if rdma_rw initialization failed (DMA mapping, etc). + */ +int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch, + bool writelist, struct xdr_buf *xdr) +{ + struct svc_rdma_write_info *info; + int consumed, ret; + + info = svc_rdma_write_info_alloc(rdma, rp_ch); + if (!info) + return -ENOMEM; + + ret = svc_rdma_send_xdr_kvec(info, &xdr->head[0]); + if (ret < 0) + goto out_err; + consumed = xdr->head[0].iov_len; + + /* Send the page list in the Reply chunk only if the + * client did not provide Write chunks. + */ + if (!writelist && xdr->page_len) { + ret = svc_rdma_send_xdr_pagelist(info, xdr); + if (ret < 0) + goto out_err; + consumed += xdr->page_len; + } + + if (xdr->tail[0].iov_len) { + ret = svc_rdma_send_xdr_kvec(info, &xdr->tail[0]); + if (ret < 0) + goto out_err; + consumed += xdr->tail[0].iov_len; + } + + ret = svc_rdma_post_chunk_ctxt(&info->wi_cc); + if (ret < 0) + goto out_err; + return consumed; + +out_err: + svc_rdma_write_info_free(info); + return ret; +} diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 515221b16d0956ea027e91985c89606c403d5109..1736337f3a557a68399f5d1b103822cc08d19562 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2016 Oracle. All rights reserved. * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * @@ -40,6 +41,63 @@ * Author: Tom Tucker */ +/* Operation + * + * The main entry point is svc_rdma_sendto. This is called by the + * RPC server when an RPC Reply is ready to be transmitted to a client. + * + * The passed-in svc_rqst contains a struct xdr_buf which holds an + * XDR-encoded RPC Reply message. sendto must construct the RPC-over-RDMA + * transport header, post all Write WRs needed for this Reply, then post + * a Send WR conveying the transport header and the RPC message itself to + * the client. + * + * svc_rdma_sendto must fully transmit the Reply before returning, as + * the svc_rqst will be recycled as soon as sendto returns. Remaining + * resources referred to by the svc_rqst are also recycled at that time. + * Therefore any resources that must remain longer must be detached + * from the svc_rqst and released later. + * + * Page Management + * + * The I/O that performs Reply transmission is asynchronous, and may + * complete well after sendto returns. Thus pages under I/O must be + * removed from the svc_rqst before sendto returns. + * + * The logic here depends on Send Queue and completion ordering. Since + * the Send WR is always posted last, it will always complete last. Thus + * when it completes, it is guaranteed that all previous Write WRs have + * also completed. + * + * Write WRs are constructed and posted. Each Write segment gets its own + * svc_rdma_rw_ctxt, allowing the Write completion handler to find and + * DMA-unmap the pages under I/O for that Write segment. The Write + * completion handler does not release any pages. + * + * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt. + * The ownership of all of the Reply's pages are transferred into that + * ctxt, the Send WR is posted, and sendto returns. + * + * The svc_rdma_op_ctxt is presented when the Send WR completes. The + * Send completion handler finally releases the Reply's pages. + * + * This mechanism also assumes that completions on the transport's Send + * Completion Queue do not run in parallel. Otherwise a Write completion + * and Send completion running at the same time could release pages that + * are still DMA-mapped. + * + * Error Handling + * + * - If the Send WR is posted successfully, it will either complete + * successfully, or get flushed. Either way, the Send completion + * handler releases the Reply's pages. + * - If the Send WR cannot be not posted, the forward path releases + * the Reply's pages. + * + * This handles the case, without the use of page reference counting, + * where two different Write segments send portions of the same page. + */ + #include #include #include @@ -55,113 +113,141 @@ static u32 xdr_padsize(u32 len) return (len & 3) ? (4 - (len & 3)) : 0; } -int svc_rdma_map_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec, - bool write_chunk_present) +/* Returns length of transport header, in bytes. + */ +static unsigned int svc_rdma_reply_hdr_len(__be32 *rdma_resp) { - int sge_no; - u32 sge_bytes; - u32 page_bytes; - u32 page_off; - int page_no; - - if (xdr->len != - (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { - pr_err("svcrdma: %s: XDR buffer length error\n", __func__); - return -EIO; - } + unsigned int nsegs; + __be32 *p; - /* Skip the first sge, this is for the RPCRDMA header */ - sge_no = 1; + p = rdma_resp; + + /* RPC-over-RDMA V1 replies never have a Read list. */ + p += rpcrdma_fixed_maxsz + 1; - /* Head SGE */ - vec->sge[sge_no].iov_base = xdr->head[0].iov_base; - vec->sge[sge_no].iov_len = xdr->head[0].iov_len; - sge_no++; - - /* pages SGE */ - page_no = 0; - page_bytes = xdr->page_len; - page_off = xdr->page_base; - while (page_bytes) { - vec->sge[sge_no].iov_base = - page_address(xdr->pages[page_no]) + page_off; - sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); - page_bytes -= sge_bytes; - vec->sge[sge_no].iov_len = sge_bytes; - - sge_no++; - page_no++; - page_off = 0; /* reset for next time through loop */ + /* Skip Write list. */ + while (*p++ != xdr_zero) { + nsegs = be32_to_cpup(p++); + p += nsegs * rpcrdma_segment_maxsz; } - /* Tail SGE */ - if (xdr->tail[0].iov_len) { - unsigned char *base = xdr->tail[0].iov_base; - size_t len = xdr->tail[0].iov_len; - u32 xdr_pad = xdr_padsize(xdr->page_len); + /* Skip Reply chunk. */ + if (*p++ != xdr_zero) { + nsegs = be32_to_cpup(p++); + p += nsegs * rpcrdma_segment_maxsz; + } - if (write_chunk_present && xdr_pad) { - base += xdr_pad; - len -= xdr_pad; - } + return (unsigned long)p - (unsigned long)rdma_resp; +} - if (len) { - vec->sge[sge_no].iov_base = base; - vec->sge[sge_no].iov_len = len; - sge_no++; +/* One Write chunk is copied from Call transport header to Reply + * transport header. Each segment's length field is updated to + * reflect number of bytes consumed in the segment. + * + * Returns number of segments in this chunk. + */ +static unsigned int xdr_encode_write_chunk(__be32 *dst, __be32 *src, + unsigned int remaining) +{ + unsigned int i, nsegs; + u32 seg_len; + + /* Write list discriminator */ + *dst++ = *src++; + + /* number of segments in this chunk */ + nsegs = be32_to_cpup(src); + *dst++ = *src++; + + for (i = nsegs; i; i--) { + /* segment's RDMA handle */ + *dst++ = *src++; + + /* bytes returned in this segment */ + seg_len = be32_to_cpu(*src); + if (remaining >= seg_len) { + /* entire segment was consumed */ + *dst = *src; + remaining -= seg_len; + } else { + /* segment only partly filled */ + *dst = cpu_to_be32(remaining); + remaining = 0; } - } + dst++; src++; - dprintk("svcrdma: %s: sge_no %d page_no %d " - "page_base %u page_len %u head_len %zu tail_len %zu\n", - __func__, sge_no, page_no, xdr->page_base, xdr->page_len, - xdr->head[0].iov_len, xdr->tail[0].iov_len); + /* segment's RDMA offset */ + *dst++ = *src++; + *dst++ = *src++; + } - vec->count = sge_no; - return 0; + return nsegs; } -static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - u32 xdr_off, size_t len, int dir) +/* The client provided a Write list in the Call message. Fill in + * the segments in the first Write chunk in the Reply's transport + * header with the number of bytes consumed in each segment. + * Remaining chunks are returned unused. + * + * Assumptions: + * - Client has provided only one Write chunk + */ +static void svc_rdma_xdr_encode_write_list(__be32 *rdma_resp, __be32 *wr_ch, + unsigned int consumed) { - struct page *page; - dma_addr_t dma_addr; - if (xdr_off < xdr->head[0].iov_len) { - /* This offset is in the head */ - xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK; - page = virt_to_page(xdr->head[0].iov_base); - } else { - xdr_off -= xdr->head[0].iov_len; - if (xdr_off < xdr->page_len) { - /* This offset is in the page list */ - xdr_off += xdr->page_base; - page = xdr->pages[xdr_off >> PAGE_SHIFT]; - xdr_off &= ~PAGE_MASK; - } else { - /* This offset is in the tail */ - xdr_off -= xdr->page_len; - xdr_off += (unsigned long) - xdr->tail[0].iov_base & ~PAGE_MASK; - page = virt_to_page(xdr->tail[0].iov_base); - } + unsigned int nsegs; + __be32 *p, *q; + + /* RPC-over-RDMA V1 replies never have a Read list. */ + p = rdma_resp + rpcrdma_fixed_maxsz + 1; + + q = wr_ch; + while (*q != xdr_zero) { + nsegs = xdr_encode_write_chunk(p, q, consumed); + q += 2 + nsegs * rpcrdma_segment_maxsz; + p += 2 + nsegs * rpcrdma_segment_maxsz; + consumed = 0; } - dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off, - min_t(size_t, PAGE_SIZE, len), dir); - return dma_addr; + + /* Terminate Write list */ + *p++ = xdr_zero; + + /* Reply chunk discriminator; may be replaced later */ + *p = xdr_zero; +} + +/* The client provided a Reply chunk in the Call message. Fill in + * the segments in the Reply chunk in the Reply message with the + * number of bytes consumed in each segment. + * + * Assumptions: + * - Reply can always fit in the provided Reply chunk + */ +static void svc_rdma_xdr_encode_reply_chunk(__be32 *rdma_resp, __be32 *rp_ch, + unsigned int consumed) +{ + __be32 *p; + + /* Find the Reply chunk in the Reply's xprt header. + * RPC-over-RDMA V1 replies never have a Read list. + */ + p = rdma_resp + rpcrdma_fixed_maxsz + 1; + + /* Skip past Write list */ + while (*p++ != xdr_zero) + p += 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz; + + xdr_encode_write_chunk(p, rp_ch, consumed); } /* Parse the RPC Call's transport header. */ -static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp, - struct rpcrdma_write_array **write, - struct rpcrdma_write_array **reply) +static void svc_rdma_get_write_arrays(__be32 *rdma_argp, + __be32 **write, __be32 **reply) { __be32 *p; - p = (__be32 *)&rmsgp->rm_body.rm_chunks[0]; + p = rdma_argp + rpcrdma_fixed_maxsz; /* Read list */ while (*p++ != xdr_zero) @@ -169,7 +255,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp, /* Write list */ if (*p != xdr_zero) { - *write = (struct rpcrdma_write_array *)p; + *write = p; while (*p++ != xdr_zero) p += 1 + be32_to_cpu(*p) * 4; } else { @@ -179,7 +265,7 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp, /* Reply chunk */ if (*p != xdr_zero) - *reply = (struct rpcrdma_write_array *)p; + *reply = p; else *reply = NULL; } @@ -189,360 +275,321 @@ static void svc_rdma_get_write_arrays(struct rpcrdma_msg *rmsgp, * Invalidate, and responder chooses one rkey to invalidate. * * Find a candidate rkey to invalidate when sending a reply. Picks the - * first rkey it finds in the chunks lists. + * first R_key it finds in the chunk lists. * * Returns zero if RPC's chunk lists are empty. */ -static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp, - struct rpcrdma_write_array *wr_ary, - struct rpcrdma_write_array *rp_ary) +static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp, + __be32 *wr_lst, __be32 *rp_ch) { - struct rpcrdma_read_chunk *rd_ary; - struct rpcrdma_segment *arg_ch; + __be32 *p; - rd_ary = (struct rpcrdma_read_chunk *)&rdma_argp->rm_body.rm_chunks[0]; - if (rd_ary->rc_discrim != xdr_zero) - return be32_to_cpu(rd_ary->rc_target.rs_handle); + p = rdma_argp + rpcrdma_fixed_maxsz; + if (*p != xdr_zero) + p += 2; + else if (wr_lst && be32_to_cpup(wr_lst + 1)) + p = wr_lst + 2; + else if (rp_ch && be32_to_cpup(rp_ch + 1)) + p = rp_ch + 2; + else + return 0; + return be32_to_cpup(p); +} - if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { - arg_ch = &wr_ary->wc_array[0].wc_target; - return be32_to_cpu(arg_ch->rs_handle); - } +/* ib_dma_map_page() is used here because svc_rdma_dma_unmap() + * is used during completion to DMA-unmap this memory, and + * it uses ib_dma_unmap_page() exclusively. + */ +static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + unsigned int sge_no, + unsigned char *base, + unsigned int len) +{ + unsigned long offset = (unsigned long)base & ~PAGE_MASK; + struct ib_device *dev = rdma->sc_cm_id->device; + dma_addr_t dma_addr; - if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { - arg_ch = &rp_ary->wc_array[0].wc_target; - return be32_to_cpu(arg_ch->rs_handle); - } + dma_addr = ib_dma_map_page(dev, virt_to_page(base), + offset, len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(dev, dma_addr)) + return -EIO; + ctxt->sge[sge_no].addr = dma_addr; + ctxt->sge[sge_no].length = len; + ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; + svc_rdma_count_mappings(rdma, ctxt); return 0; } -/* Assumptions: - * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE - */ -static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, - u32 rmr, u64 to, - u32 xdr_off, int write_len, - struct svc_rdma_req_map *vec) +static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + unsigned int sge_no, + struct page *page, + unsigned int offset, + unsigned int len) { - struct ib_rdma_wr write_wr; - struct ib_sge *sge; - int xdr_sge_no; - int sge_no; - int sge_bytes; - int sge_off; - int bc; - struct svc_rdma_op_ctxt *ctxt; + struct ib_device *dev = rdma->sc_cm_id->device; + dma_addr_t dma_addr; - if (vec->count > RPCSVC_MAXPAGES) { - pr_err("svcrdma: Too many pages (%lu)\n", vec->count); + dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE); + if (ib_dma_mapping_error(dev, dma_addr)) return -EIO; - } - dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " - "write_len=%d, vec->sge=%p, vec->count=%lu\n", - rmr, (unsigned long long)to, xdr_off, - write_len, vec->sge, vec->count); + ctxt->sge[sge_no].addr = dma_addr; + ctxt->sge[sge_no].length = len; + ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; + svc_rdma_count_mappings(rdma, ctxt); + return 0; +} - ctxt = svc_rdma_get_context(xprt); +/** + * svc_rdma_map_reply_hdr - DMA map the transport header buffer + * @rdma: controlling transport + * @ctxt: op_ctxt for the Send WR + * @rdma_resp: buffer containing transport header + * @len: length of transport header + * + * Returns: + * %0 if the header is DMA mapped, + * %-EIO if DMA mapping failed. + */ +int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + __be32 *rdma_resp, + unsigned int len) +{ ctxt->direction = DMA_TO_DEVICE; - sge = ctxt->sge; - - /* Find the SGE associated with xdr_off */ - for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < vec->count; - xdr_sge_no++) { - if (vec->sge[xdr_sge_no].iov_len > bc) - break; - bc -= vec->sge[xdr_sge_no].iov_len; - } - - sge_off = bc; - bc = write_len; - sge_no = 0; - - /* Copy the remaining SGE */ - while (bc != 0) { - sge_bytes = min_t(size_t, - bc, vec->sge[xdr_sge_no].iov_len-sge_off); - sge[sge_no].length = sge_bytes; - sge[sge_no].addr = - dma_map_xdr(xprt, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(xprt->sc_cm_id->device, - sge[sge_no].addr)) - goto err; - svc_rdma_count_mappings(xprt, ctxt); - sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; - ctxt->count++; - sge_off = 0; - sge_no++; - xdr_sge_no++; - if (xdr_sge_no > vec->count) { - pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no); - goto err; - } - bc -= sge_bytes; - if (sge_no == xprt->sc_max_sge) - break; - } - - /* Prepare WRITE WR */ - memset(&write_wr, 0, sizeof write_wr); - ctxt->cqe.done = svc_rdma_wc_write; - write_wr.wr.wr_cqe = &ctxt->cqe; - write_wr.wr.sg_list = &sge[0]; - write_wr.wr.num_sge = sge_no; - write_wr.wr.opcode = IB_WR_RDMA_WRITE; - write_wr.wr.send_flags = IB_SEND_SIGNALED; - write_wr.rkey = rmr; - write_wr.remote_addr = to; - - /* Post It */ - atomic_inc(&rdma_stat_write); - if (svc_rdma_send(xprt, &write_wr.wr)) - goto err; - return write_len - bc; - err: - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); - return -EIO; + ctxt->pages[0] = virt_to_page(rdma_resp); + ctxt->count = 1; + return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len); } -noinline -static int send_write_chunks(struct svcxprt_rdma *xprt, - struct rpcrdma_write_array *wr_ary, - struct rpcrdma_msg *rdma_resp, - struct svc_rqst *rqstp, - struct svc_rdma_req_map *vec) +/* Load the xdr_buf into the ctxt's sge array, and DMA map each + * element as it is added. + * + * Returns the number of sge elements loaded on success, or + * a negative errno on failure. + */ +static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + struct xdr_buf *xdr, __be32 *wr_lst) { - u32 xfer_len = rqstp->rq_res.page_len; - int write_len; - u32 xdr_off; - int chunk_off; - int chunk_no; - int nchunks; - struct rpcrdma_write_array *res_ary; + unsigned int len, sge_no, remaining, page_off; + struct page **ppages; + unsigned char *base; + u32 xdr_pad; int ret; - res_ary = (struct rpcrdma_write_array *) - &rdma_resp->rm_body.rm_chunks[1]; - - /* Write chunks start at the pagelist */ - nchunks = be32_to_cpu(wr_ary->wc_nchunks); - for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; - xfer_len && chunk_no < nchunks; - chunk_no++) { - struct rpcrdma_segment *arg_ch; - u64 rs_offset; - - arg_ch = &wr_ary->wc_array[chunk_no].wc_target; - write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length)); - - /* Prepare the response chunk given the length actually - * written */ - xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset); - svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, - arg_ch->rs_handle, - arg_ch->rs_offset, - write_len); - chunk_off = 0; - while (write_len) { - ret = send_write(xprt, rqstp, - be32_to_cpu(arg_ch->rs_handle), - rs_offset + chunk_off, - xdr_off, - write_len, - vec); - if (ret <= 0) - goto out_err; - chunk_off += ret; - xdr_off += ret; - xfer_len -= ret; - write_len -= ret; + sge_no = 1; + + ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, + xdr->head[0].iov_base, + xdr->head[0].iov_len); + if (ret < 0) + return ret; + + /* If a Write chunk is present, the xdr_buf's page list + * is not included inline. However the Upper Layer may + * have added XDR padding in the tail buffer, and that + * should not be included inline. + */ + if (wr_lst) { + base = xdr->tail[0].iov_base; + len = xdr->tail[0].iov_len; + xdr_pad = xdr_padsize(xdr->page_len); + + if (len && xdr_pad) { + base += xdr_pad; + len -= xdr_pad; } + + goto tail; + } + + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + page_off = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - page_off, remaining); + + ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++, + *ppages++, page_off, len); + if (ret < 0) + return ret; + + remaining -= len; + page_off = 0; } - /* Update the req with the number of chunks actually used */ - svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); - return rqstp->rq_res.page_len; + base = xdr->tail[0].iov_base; + len = xdr->tail[0].iov_len; +tail: + if (len) { + ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len); + if (ret < 0) + return ret; + } -out_err: - pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret); - return -EIO; + return sge_no - 1; } -noinline -static int send_reply_chunks(struct svcxprt_rdma *xprt, - struct rpcrdma_write_array *rp_ary, - struct rpcrdma_msg *rdma_resp, - struct svc_rqst *rqstp, - struct svc_rdma_req_map *vec) +/* The svc_rqst and all resources it owns are released as soon as + * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt + * so they are released by the Send completion handler. + */ +static void svc_rdma_save_io_pages(struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *ctxt) { - u32 xfer_len = rqstp->rq_res.len; - int write_len; - u32 xdr_off; - int chunk_no; - int chunk_off; - int nchunks; - struct rpcrdma_segment *ch; - struct rpcrdma_write_array *res_ary; - int ret; + int i, pages = rqstp->rq_next_page - rqstp->rq_respages; - /* XXX: need to fix when reply lists occur with read-list and or - * write-list */ - res_ary = (struct rpcrdma_write_array *) - &rdma_resp->rm_body.rm_chunks[2]; - - /* xdr offset starts at RPC message */ - nchunks = be32_to_cpu(rp_ary->wc_nchunks); - for (xdr_off = 0, chunk_no = 0; - xfer_len && chunk_no < nchunks; - chunk_no++) { - u64 rs_offset; - ch = &rp_ary->wc_array[chunk_no].wc_target; - write_len = min(xfer_len, be32_to_cpu(ch->rs_length)); - - /* Prepare the reply chunk given the length actually - * written */ - xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset); - svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, - ch->rs_handle, ch->rs_offset, - write_len); - chunk_off = 0; - while (write_len) { - ret = send_write(xprt, rqstp, - be32_to_cpu(ch->rs_handle), - rs_offset + chunk_off, - xdr_off, - write_len, - vec); - if (ret <= 0) - goto out_err; - chunk_off += ret; - xdr_off += ret; - xfer_len -= ret; - write_len -= ret; - } + ctxt->count += pages; + for (i = 0; i < pages; i++) { + ctxt->pages[i + 1] = rqstp->rq_respages[i]; + rqstp->rq_respages[i] = NULL; } - /* Update the req with the number of chunks actually used */ - svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); + rqstp->rq_next_page = rqstp->rq_respages + 1; +} - return rqstp->rq_res.len; +/** + * svc_rdma_post_send_wr - Set up and post one Send Work Request + * @rdma: controlling transport + * @ctxt: op_ctxt for transmitting the Send WR + * @num_sge: number of SGEs to send + * @inv_rkey: R_key argument to Send With Invalidate, or zero + * + * Returns: + * %0 if the Send* was posted successfully, + * %-ENOTCONN if the connection was lost or dropped, + * %-EINVAL if there was a problem with the Send we built, + * %-ENOMEM if ib_post_send failed. + */ +int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, int num_sge, + u32 inv_rkey) +{ + struct ib_send_wr *send_wr = &ctxt->send_wr; -out_err: - pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret); - return -EIO; + dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge); + + send_wr->next = NULL; + ctxt->cqe.done = svc_rdma_wc_send; + send_wr->wr_cqe = &ctxt->cqe; + send_wr->sg_list = ctxt->sge; + send_wr->num_sge = num_sge; + send_wr->send_flags = IB_SEND_SIGNALED; + if (inv_rkey) { + send_wr->opcode = IB_WR_SEND_WITH_INV; + send_wr->ex.invalidate_rkey = inv_rkey; + } else { + send_wr->opcode = IB_WR_SEND; + } + + return svc_rdma_send(rdma, send_wr); } -/* This function prepares the portion of the RPCRDMA message to be - * sent in the RDMA_SEND. This function is called after data sent via - * RDMA has already been transmitted. There are three cases: - * - The RPCRDMA header, RPC header, and payload are all sent in a - * single RDMA_SEND. This is the "inline" case. - * - The RPCRDMA header and some portion of the RPC header and data - * are sent via this RDMA_SEND and another portion of the data is - * sent via RDMA. - * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC - * header and data are all transmitted via RDMA. - * In all three cases, this function prepares the RPCRDMA header in - * sge[0], the 'type' parameter indicates the type to place in the - * RPCRDMA header, and the 'byte_count' field indicates how much of - * the XDR to include in this RDMA_SEND. NB: The offset of the payload - * to send is zero in the XDR. +/* Prepare the portion of the RPC Reply that will be transmitted + * via RDMA Send. The RPC-over-RDMA transport header is prepared + * in sge[0], and the RPC xdr_buf is prepared in following sges. + * + * Depending on whether a Write list or Reply chunk is present, + * the server may send all, a portion of, or none of the xdr_buf. + * In the latter case, only the transport header (sge[0]) is + * transmitted. + * + * RDMA Send is the last step of transmitting an RPC reply. Pages + * involved in the earlier RDMA Writes are here transferred out + * of the rqstp and into the ctxt's page array. These pages are + * DMA unmapped by each Write completion, but the subsequent Send + * completion finally releases these pages. + * + * Assumptions: + * - The Reply's transport header will never be larger than a page. */ -static int send_reply(struct svcxprt_rdma *rdma, - struct svc_rqst *rqstp, - struct page *page, - struct rpcrdma_msg *rdma_resp, - struct svc_rdma_req_map *vec, - int byte_count, - u32 inv_rkey) +static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma, + __be32 *rdma_argp, __be32 *rdma_resp, + struct svc_rqst *rqstp, + __be32 *wr_lst, __be32 *rp_ch) { struct svc_rdma_op_ctxt *ctxt; - struct ib_send_wr send_wr; - u32 xdr_off; - int sge_no; - int sge_bytes; - int page_no; - int pages; - int ret = -EIO; - - /* Prepare the context */ + u32 inv_rkey; + int ret; + + dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n", + (rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"), + rqstp->rq_res.head[0].iov_len, + rqstp->rq_res.page_len, + rqstp->rq_res.tail[0].iov_len); + ctxt = svc_rdma_get_context(rdma); - ctxt->direction = DMA_TO_DEVICE; - ctxt->pages[0] = page; - ctxt->count = 1; - /* Prepare the SGE for the RPCRDMA Header */ - ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; - ctxt->sge[0].length = - svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp); - ctxt->sge[0].addr = - ib_dma_map_page(rdma->sc_cm_id->device, page, 0, - ctxt->sge[0].length, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) + ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, + svc_rdma_reply_hdr_len(rdma_resp)); + if (ret < 0) goto err; - svc_rdma_count_mappings(rdma, ctxt); - - ctxt->direction = DMA_TO_DEVICE; - /* Map the payload indicated by 'byte_count' */ - xdr_off = 0; - for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { - sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); - byte_count -= sge_bytes; - ctxt->sge[sge_no].addr = - dma_map_xdr(rdma, &rqstp->rq_res, xdr_off, - sge_bytes, DMA_TO_DEVICE); - xdr_off += sge_bytes; - if (ib_dma_mapping_error(rdma->sc_cm_id->device, - ctxt->sge[sge_no].addr)) + if (!rp_ch) { + ret = svc_rdma_map_reply_msg(rdma, ctxt, + &rqstp->rq_res, wr_lst); + if (ret < 0) goto err; - svc_rdma_count_mappings(rdma, ctxt); - ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; - ctxt->sge[sge_no].length = sge_bytes; } - if (byte_count != 0) { - pr_err("svcrdma: Could not map %d bytes\n", byte_count); + + svc_rdma_save_io_pages(rqstp, ctxt); + + inv_rkey = 0; + if (rdma->sc_snd_w_inv) + inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch); + ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey); + if (ret) goto err; - } - /* Save all respages in the ctxt and remove them from the - * respages array. They are our pages until the I/O - * completes. + return 0; + +err: + pr_err("svcrdma: failed to post Send WR (%d)\n", ret); + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 1); + return ret; +} + +/* Given the client-provided Write and Reply chunks, the server was not + * able to form a complete reply. Return an RDMA_ERROR message so the + * client can retire this RPC transaction. As above, the Send completion + * routine releases payload pages that were part of a previous RDMA Write. + * + * Remote Invalidation is skipped for simplicity. + */ +static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, + __be32 *rdma_resp, struct svc_rqst *rqstp) +{ + struct svc_rdma_op_ctxt *ctxt; + __be32 *p; + int ret; + + ctxt = svc_rdma_get_context(rdma); + + /* Replace the original transport header with an + * RDMA_ERROR response. XID etc are preserved. */ - pages = rqstp->rq_next_page - rqstp->rq_respages; - for (page_no = 0; page_no < pages; page_no++) { - ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; - ctxt->count++; - rqstp->rq_respages[page_no] = NULL; - } - rqstp->rq_next_page = rqstp->rq_respages + 1; + p = rdma_resp + 3; + *p++ = rdma_error; + *p = err_chunk; - if (sge_no > rdma->sc_max_sge) { - pr_err("svcrdma: Too many sges (%d)\n", sge_no); + ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20); + if (ret < 0) goto err; - } - memset(&send_wr, 0, sizeof send_wr); - ctxt->cqe.done = svc_rdma_wc_send; - send_wr.wr_cqe = &ctxt->cqe; - send_wr.sg_list = ctxt->sge; - send_wr.num_sge = sge_no; - if (inv_rkey) { - send_wr.opcode = IB_WR_SEND_WITH_INV; - send_wr.ex.invalidate_rkey = inv_rkey; - } else - send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; - ret = svc_rdma_send(rdma, &send_wr); + svc_rdma_save_io_pages(rqstp, ctxt); + + ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0); if (ret) goto err; return 0; - err: +err: + pr_err("svcrdma: failed to post Send WR (%d)\n", ret); svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); return ret; @@ -552,39 +599,36 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) { } +/** + * svc_rdma_sendto - Transmit an RPC reply + * @rqstp: processed RPC request, reply XDR already in ::rq_res + * + * Any resources still associated with @rqstp are released upon return. + * If no reply message was possible, the connection is closed. + * + * Returns: + * %0 if an RPC reply has been successfully posted, + * %-ENOMEM if a resource shortage occurred (connection is lost), + * %-ENOTCONN if posting failed (connection is lost). + */ int svc_rdma_sendto(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); - struct rpcrdma_msg *rdma_argp; - struct rpcrdma_msg *rdma_resp; - struct rpcrdma_write_array *wr_ary, *rp_ary; - int ret; - int inline_bytes; + __be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch; + struct xdr_buf *xdr = &rqstp->rq_res; struct page *res_page; - struct svc_rdma_req_map *vec; - u32 inv_rkey; - __be32 *p; - - dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); + int ret; - /* Get the RDMA request header. The receive logic always - * places this at the start of page 0. + /* Find the call's chunk lists to decide how to send the reply. + * Receive places the Call's xprt header at the start of page 0. */ rdma_argp = page_address(rqstp->rq_pages[0]); - svc_rdma_get_write_arrays(rdma_argp, &wr_ary, &rp_ary); - - inv_rkey = 0; - if (rdma->sc_snd_w_inv) - inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary); + svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch); - /* Build an req vec for the XDR */ - vec = svc_rdma_get_req_map(rdma); - ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL); - if (ret) - goto err0; - inline_bytes = rqstp->rq_res.len; + dprintk("svcrdma: preparing response for XID 0x%08x\n", + be32_to_cpup(rdma_argp)); /* Create the RDMA response header. xprt->xpt_mutex, * acquired in svc_send(), serializes RPC replies. The @@ -598,115 +642,57 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err0; rdma_resp = page_address(res_page); - p = &rdma_resp->rm_xid; - *p++ = rdma_argp->rm_xid; - *p++ = rdma_argp->rm_vers; + p = rdma_resp; + *p++ = *rdma_argp; + *p++ = *(rdma_argp + 1); *p++ = rdma->sc_fc_credits; - *p++ = rp_ary ? rdma_nomsg : rdma_msg; + *p++ = rp_ch ? rdma_nomsg : rdma_msg; /* Start with empty chunks */ *p++ = xdr_zero; *p++ = xdr_zero; *p = xdr_zero; - /* Send any write-chunk data and build resp write-list */ - if (wr_ary) { - ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec); + if (wr_lst) { + /* XXX: Presume the client sent only one Write chunk */ + ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr); if (ret < 0) - goto err1; - inline_bytes -= ret + xdr_padsize(ret); + goto err2; + svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret); } - - /* Send any reply-list data and update resp reply-list */ - if (rp_ary) { - ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec); + if (rp_ch) { + ret = svc_rdma_send_reply_chunk(rdma, rp_ch, wr_lst, xdr); if (ret < 0) - goto err1; - inline_bytes -= ret; + goto err2; + svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret); } - /* Post a fresh Receive buffer _before_ sending the reply */ ret = svc_rdma_post_recv(rdma, GFP_KERNEL); if (ret) goto err1; - - ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, - inline_bytes, inv_rkey); + ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp, + wr_lst, rp_ch); if (ret < 0) goto err0; + return 0; - svc_rdma_put_req_map(rdma, vec); - dprintk("svcrdma: send_reply returns %d\n", ret); - return ret; + err2: + if (ret != -E2BIG) + goto err1; + + ret = svc_rdma_post_recv(rdma, GFP_KERNEL); + if (ret) + goto err1; + ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp); + if (ret < 0) + goto err0; + return 0; err1: put_page(res_page); err0: - svc_rdma_put_req_map(rdma, vec); pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n", ret); - set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + set_bit(XPT_CLOSE, &xprt->xpt_flags); return -ENOTCONN; } - -void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, - int status) -{ - struct ib_send_wr err_wr; - struct page *p; - struct svc_rdma_op_ctxt *ctxt; - enum rpcrdma_errcode err; - __be32 *va; - int length; - int ret; - - ret = svc_rdma_repost_recv(xprt, GFP_KERNEL); - if (ret) - return; - - p = alloc_page(GFP_KERNEL); - if (!p) - return; - va = page_address(p); - - /* XDR encode an error reply */ - err = ERR_CHUNK; - if (status == -EPROTONOSUPPORT) - err = ERR_VERS; - length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); - - ctxt = svc_rdma_get_context(xprt); - ctxt->direction = DMA_TO_DEVICE; - ctxt->count = 1; - ctxt->pages[0] = p; - - /* Prepare SGE for local address */ - ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey; - ctxt->sge[0].length = length; - ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device, - p, 0, length, DMA_TO_DEVICE); - if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { - dprintk("svcrdma: Error mapping buffer for protocol error\n"); - svc_rdma_put_context(ctxt, 1); - return; - } - svc_rdma_count_mappings(xprt, ctxt); - - /* Prepare SEND WR */ - memset(&err_wr, 0, sizeof(err_wr)); - ctxt->cqe.done = svc_rdma_wc_send; - err_wr.wr_cqe = &ctxt->cqe; - err_wr.sg_list = ctxt->sge; - err_wr.num_sge = 1; - err_wr.opcode = IB_WR_SEND; - err_wr.send_flags = IB_SEND_SIGNALED; - - /* Post It */ - ret = svc_rdma_send(xprt, &err_wr); - if (ret) { - dprintk("svcrdma: Error %d posting send for protocol error\n", - ret); - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 1); - } -} diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fc8f14c7bfec60dc5828340861a747e49f06193e..a9d9cb1ba4c6068b1a80c225cb147d1993a75d76 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -272,85 +272,6 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) } } -static struct svc_rdma_req_map *alloc_req_map(gfp_t flags) -{ - struct svc_rdma_req_map *map; - - map = kmalloc(sizeof(*map), flags); - if (map) - INIT_LIST_HEAD(&map->free); - return map; -} - -static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt) -{ - unsigned int i; - - /* One for each receive buffer on this connection. */ - i = xprt->sc_max_requests; - - while (i--) { - struct svc_rdma_req_map *map; - - map = alloc_req_map(GFP_KERNEL); - if (!map) { - dprintk("svcrdma: No memory for request map\n"); - return false; - } - list_add(&map->free, &xprt->sc_maps); - } - return true; -} - -struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt) -{ - struct svc_rdma_req_map *map = NULL; - - spin_lock(&xprt->sc_map_lock); - if (list_empty(&xprt->sc_maps)) - goto out_empty; - - map = list_first_entry(&xprt->sc_maps, - struct svc_rdma_req_map, free); - list_del_init(&map->free); - spin_unlock(&xprt->sc_map_lock); - -out: - map->count = 0; - return map; - -out_empty: - spin_unlock(&xprt->sc_map_lock); - - /* Pre-allocation amount was incorrect */ - map = alloc_req_map(GFP_NOIO); - if (map) - goto out; - - WARN_ONCE(1, "svcrdma: empty request map list?\n"); - return NULL; -} - -void svc_rdma_put_req_map(struct svcxprt_rdma *xprt, - struct svc_rdma_req_map *map) -{ - spin_lock(&xprt->sc_map_lock); - list_add(&map->free, &xprt->sc_maps); - spin_unlock(&xprt->sc_map_lock); -} - -static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt) -{ - while (!list_empty(&xprt->sc_maps)) { - struct svc_rdma_req_map *map; - - map = list_first_entry(&xprt->sc_maps, - struct svc_rdma_req_map, free); - list_del(&map->free); - kfree(map); - } -} - /* QP event handler */ static void qp_event_handler(struct ib_event *event, void *context) { @@ -473,24 +394,6 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) svc_rdma_put_context(ctxt, 1); } -/** - * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC - * @cq: completion queue - * @wc: completed WR - * - */ -void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_cqe *cqe = wc->wr_cqe; - struct svc_rdma_op_ctxt *ctxt; - - svc_rdma_send_wc_common_put(cq, wc, "write"); - - ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe); - svc_rdma_unmap_dma(ctxt); - svc_rdma_put_context(ctxt, 0); -} - /** * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC * @cq: completion queue @@ -561,14 +464,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); INIT_LIST_HEAD(&cma_xprt->sc_ctxts); - INIT_LIST_HEAD(&cma_xprt->sc_maps); + INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); spin_lock_init(&cma_xprt->sc_frmr_q_lock); spin_lock_init(&cma_xprt->sc_ctxt_lock); - spin_lock_init(&cma_xprt->sc_map_lock); + spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); /* * Note that this implies that the underlying transport support @@ -999,6 +902,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt, newxprt->sc_cm_id); dev = newxprt->sc_cm_id->device; + newxprt->sc_port_num = newxprt->sc_cm_id->port_num; /* Qualify the transport resource defaults with the * capabilities of this particular device */ @@ -1014,13 +918,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) svcrdma_max_bc_requests); newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; - newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth; + newxprt->sc_sq_depth = newxprt->sc_rq_depth; atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); if (!svc_rdma_prealloc_ctxts(newxprt)) goto errout; - if (!svc_rdma_prealloc_maps(newxprt)) - goto errout; /* * Limit ORD based on client limit, local device limit, and @@ -1050,6 +952,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = &newxprt->sc_xprt; + qp_attr.port_num = newxprt->sc_cm_id->port_num; + qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests; qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; qp_attr.cap.max_send_sge = newxprt->sc_max_sge; @@ -1248,8 +1152,8 @@ static void __svc_rdma_free(struct work_struct *work) } rdma_dealloc_frmr_q(rdma); + svc_rdma_destroy_rw_ctxts(rdma); svc_rdma_destroy_ctxts(rdma); - svc_rdma_destroy_maps(rdma); /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index c717f54107768902db7c571ce65f4019624c7bcc..62ecbccd9748e54874059209070ebe4a6b9591e7 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; - int xprt_rdma_pad_optimize = 0; +unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; +int xprt_rdma_pad_optimize; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args) new_xprt = rpcx_to_rdmax(xprt); - rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy); + rc = rpcrdma_ia_open(new_xprt, sap); if (rc) goto out1; @@ -457,19 +457,33 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(rc); } -/* - * Close a connection, during shutdown or timeout/reconnect +/** + * xprt_rdma_close - Close down RDMA connection + * @xprt: generic transport to be closed + * + * Called during transport shutdown reconnect, or device + * removal. Caller holds the transport's write lock. */ static void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + + dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); - dprintk("RPC: %s: closing\n", __func__); - if (r_xprt->rx_ep.rep_connected > 0) + if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { + xprt_clear_connected(xprt); + rpcrdma_ia_remove(ia); + return; + } + if (ep->rep_connected == -ENODEV) + return; + if (ep->rep_connected > 0) xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); - rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); + rpcrdma_ep_disconnect(ep, ia); } static void @@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) dprintk("RPC: %s: %u\n", __func__, port); } +/** + * xprt_rdma_timer - invoked when an RPC times out + * @xprt: controlling RPC transport + * @task: RPC task that timed out + * + * Invoked when the transport is still connected, but an RPC + * retransmit timeout occurs. + * + * Since RDMA connections don't have a keep-alive, forcibly + * disconnect and retry to connect. This drives full + * detection of the network path, and retransmissions of + * all pending RPCs. + */ +static void +xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) +{ + dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt); + + xprt_force_disconnect(xprt); +} + static void xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task) * xprt_rdma_send_request - marshal and send an RPC request * @task: RPC task with an RPC message in rq_snd_buf * + * Caller holds the transport's write lock. + * * Return values: * 0: The request has been sent * ENOTCONN: Caller needs to invoke connect logic then call again @@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int rc = 0; + if (!xprt_connected(xprt)) + goto drop_connection; + /* On retransmit, remove any previously registered chunks */ if (unlikely(!list_empty(&req->rl_registered))) r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); @@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .alloc_slot = xprt_alloc_slot, .release_request = xprt_release_rqst_cong, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ + .timer = xprt_rdma_timer, .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ .set_port = xprt_rdma_set_port, .connect = xprt_rdma_connect, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3b332b395045b5b0ad07bc13a30db1420d7f7082..3dbce9ac4327a89ea6d84f348041609c5d653b4a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -53,7 +53,7 @@ #include #include #include -#include /* try_module_get()/module_put() */ + #include #include "xprt_rdma.h" @@ -69,8 +69,11 @@ /* * internal functions */ +static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); +static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); -static struct workqueue_struct *rpcrdma_receive_wq; +static struct workqueue_struct *rpcrdma_receive_wq __read_mostly; int rpcrdma_alloc_wq(void) @@ -180,7 +183,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) rep->rr_wc_flags = wc->wc_flags; rep->rr_inv_rkey = wc->ex.invalidate_rkey; - ib_dma_sync_single_for_cpu(rep->rr_device, + ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); @@ -262,6 +265,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) __func__, ep); complete(&ia->ri_done); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + pr_info("rpcrdma: removing device for %pIS:%u\n", + sap, rpc_get_port(sap)); +#endif + set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); + ep->rep_connected = -ENODEV; + xprt_force_disconnect(&xprt->rx_xprt); + wait_for_completion(&ia->ri_remove_done); + + ia->ri_id = NULL; + ia->ri_pd = NULL; + ia->ri_device = NULL; + /* Return 1 to ensure the core destroys the id. */ + return 1; case RDMA_CM_EVENT_ESTABLISHED: connstate = 1; ib_query_qp(ia->ri_id->qp, attr, @@ -291,9 +309,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) goto connected; case RDMA_CM_EVENT_DISCONNECTED: connstate = -ECONNABORTED; - goto connected; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - connstate = -ENODEV; connected: dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); @@ -329,14 +344,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) return 0; } -static void rpcrdma_destroy_id(struct rdma_cm_id *id) -{ - if (id) { - module_put(id->device->owner); - rdma_destroy_id(id); - } -} - static struct rdma_cm_id * rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia, struct sockaddr *addr) @@ -346,6 +353,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, int rc; init_completion(&ia->ri_done); + init_completion(&ia->ri_remove_done); id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); @@ -370,16 +378,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, goto out; } - /* FIXME: - * Until xprtrdma supports DEVICE_REMOVAL, the provider must - * be pinned while there are active NFS/RDMA mounts to prevent - * hangs and crashes at umount time. - */ - if (!ia->ri_async_rc && !try_module_get(id->device->owner)) { - dprintk("RPC: %s: Failed to get device module\n", - __func__); - ia->ri_async_rc = -ENODEV; - } rc = ia->ri_async_rc; if (rc) goto out; @@ -389,21 +387,20 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, if (rc) { dprintk("RPC: %s: rdma_resolve_route() failed %i\n", __func__, rc); - goto put; + goto out; } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { dprintk("RPC: %s: wait() exited: %i\n", __func__, rc); - goto put; + goto out; } rc = ia->ri_async_rc; if (rc) - goto put; + goto out; return id; -put: - module_put(id->device->owner); + out: rdma_destroy_id(id); return ERR_PTR(rc); @@ -413,13 +410,16 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, * Exported functions. */ -/* - * Open and initialize an Interface Adapter. - * o initializes fields of struct rpcrdma_ia, including - * interface and provider attributes and protection zone. +/** + * rpcrdma_ia_open - Open and initialize an Interface Adapter. + * @xprt: controlling transport + * @addr: IP address of remote peer + * + * Returns 0 on success, negative errno if an appropriate + * Interface Adapter could not be found and opened. */ int -rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) +rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) { struct rpcrdma_ia *ia = &xprt->rx_ia; int rc; @@ -427,7 +427,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); - goto out1; + goto out_err; } ia->ri_device = ia->ri_id->device; @@ -435,10 +435,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); - goto out2; + goto out_err; } - switch (memreg) { + switch (xprt_rdma_memreg_strategy) { case RPCRDMA_FRMR: if (frwr_is_supported(ia)) { ia->ri_ops = &rpcrdma_frwr_memreg_ops; @@ -452,28 +452,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } /*FALLTHROUGH*/ default: - pr_err("rpcrdma: Unsupported memory registration mode: %d\n", - memreg); + pr_err("rpcrdma: Device %s does not support memreg mode %d\n", + ia->ri_device->name, xprt_rdma_memreg_strategy); rc = -EINVAL; - goto out3; + goto out_err; } return 0; -out3: - ib_dealloc_pd(ia->ri_pd); - ia->ri_pd = NULL; -out2: - rpcrdma_destroy_id(ia->ri_id); - ia->ri_id = NULL; -out1: +out_err: + rpcrdma_ia_close(ia); return rc; } -/* - * Clean up/close an IA. - * o if event handles and PD have been initialized, free them. - * o close the IA +/** + * rpcrdma_ia_remove - Handle device driver unload + * @ia: interface adapter being removed + * + * Divest transport H/W resources associated with this adapter, + * but allow it to be restored later. + */ +void +rpcrdma_ia_remove(struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, + rx_ia); + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_req *req; + struct rpcrdma_rep *rep; + + cancel_delayed_work_sync(&buf->rb_refresh_worker); + + /* This is similar to rpcrdma_ep_destroy, but: + * - Don't cancel the connect worker. + * - Don't call rpcrdma_ep_disconnect, which waits + * for another conn upcall, which will deadlock. + * - rdma_disconnect is unneeded, the underlying + * connection is already gone. + */ + if (ia->ri_id->qp) { + ib_drain_qp(ia->ri_id->qp); + rdma_destroy_qp(ia->ri_id); + ia->ri_id->qp = NULL; + } + ib_free_cq(ep->rep_attr.recv_cq); + ib_free_cq(ep->rep_attr.send_cq); + + /* The ULP is responsible for ensuring all DMA + * mappings and MRs are gone. + */ + list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list) + rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf); + list_for_each_entry(req, &buf->rb_allreqs, rl_all) { + rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf); + rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); + rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); + } + rpcrdma_destroy_mrs(buf); + + /* Allow waiters to continue */ + complete(&ia->ri_remove_done); +} + +/** + * rpcrdma_ia_close - Clean up/close an IA. + * @ia: interface adapter to close + * */ void rpcrdma_ia_close(struct rpcrdma_ia *ia) @@ -482,13 +527,15 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { if (ia->ri_id->qp) rdma_destroy_qp(ia->ri_id); - rpcrdma_destroy_id(ia->ri_id); - ia->ri_id = NULL; + rdma_destroy_id(ia->ri_id); } + ia->ri_id = NULL; + ia->ri_device = NULL; /* If the pd is still busy, xprtrdma missed freeing a resource */ if (ia->ri_pd && !IS_ERR(ia->ri_pd)) ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; } /* @@ -646,6 +693,99 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ib_free_cq(ep->rep_attr.send_cq); } +/* Re-establish a connection after a device removal event. + * Unlike a normal reconnection, a fresh PD and a new set + * of MRs and buffers is needed. + */ +static int +rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +{ + struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; + int rc, err; + + pr_info("%s: r_xprt = %p\n", __func__, r_xprt); + + rc = -EHOSTUNREACH; + if (rpcrdma_ia_open(r_xprt, sap)) + goto out1; + + rc = -ENOMEM; + err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data); + if (err) { + pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); + goto out2; + } + + rc = -ENETUNREACH; + err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); + if (err) { + pr_err("rpcrdma: rdma_create_qp returned %d\n", err); + goto out3; + } + + rpcrdma_create_mrs(r_xprt); + return 0; + +out3: + rpcrdma_ep_destroy(ep, ia); +out2: + rpcrdma_ia_close(ia); +out1: + return rc; +} + +static int +rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, + struct rpcrdma_ia *ia) +{ + struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr; + struct rdma_cm_id *id, *old; + int err, rc; + + dprintk("RPC: %s: reconnecting...\n", __func__); + + rpcrdma_ep_disconnect(ep, ia); + + rc = -EHOSTUNREACH; + id = rpcrdma_create_id(r_xprt, ia, sap); + if (IS_ERR(id)) + goto out; + + /* As long as the new ID points to the same device as the + * old ID, we can reuse the transport's existing PD and all + * previously allocated MRs. Also, the same device means + * the transport's previous DMA mappings are still valid. + * + * This is a sanity check only. There should be no way these + * point to two different devices here. + */ + old = id; + rc = -ENETUNREACH; + if (ia->ri_device != id->device) { + pr_err("rpcrdma: can't reconnect on different device!\n"); + goto out_destroy; + } + + err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); + if (err) { + dprintk("RPC: %s: rdma_create_qp returned %d\n", + __func__, err); + goto out_destroy; + } + + /* Atomically replace the transport's ID and QP. */ + rc = 0; + old = ia->ri_id; + ia->ri_id = id; + rdma_destroy_qp(old); + +out_destroy: + rdma_destroy_id(old); +out: + return rc; +} + /* * Connect unconnected endpoint. */ @@ -654,61 +794,30 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - struct rdma_cm_id *id, *old; - struct sockaddr *sap; unsigned int extras; - int rc = 0; + int rc; - if (ep->rep_connected != 0) { retry: - dprintk("RPC: %s: reconnecting...\n", __func__); - - rpcrdma_ep_disconnect(ep, ia); - - sap = (struct sockaddr *)&r_xprt->rx_data.addr; - id = rpcrdma_create_id(r_xprt, ia, sap); - if (IS_ERR(id)) { - rc = -EHOSTUNREACH; - goto out; - } - /* TEMP TEMP TEMP - fail if new device: - * Deregister/remarshal *all* requests! - * Close and recreate adapter, pd, etc! - * Re-determine all attributes still sane! - * More stuff I haven't thought of! - * Rrrgh! - */ - if (ia->ri_device != id->device) { - printk("RPC: %s: can't reconnect on " - "different device!\n", __func__); - rpcrdma_destroy_id(id); - rc = -ENETUNREACH; - goto out; - } - /* END TEMP */ - rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); - if (rc) { - dprintk("RPC: %s: rdma_create_qp failed %i\n", - __func__, rc); - rpcrdma_destroy_id(id); - rc = -ENETUNREACH; - goto out; - } - - old = ia->ri_id; - ia->ri_id = id; - - rdma_destroy_qp(old); - rpcrdma_destroy_id(old); - } else { + switch (ep->rep_connected) { + case 0: dprintk("RPC: %s: connecting...\n", __func__); rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); if (rc) { dprintk("RPC: %s: rdma_create_qp failed %i\n", __func__, rc); - /* do not update ep->rep_connected */ - return -ENETUNREACH; + rc = -ENETUNREACH; + goto out_noupdate; } + break; + case -ENODEV: + rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia); + if (rc) + goto out_noupdate; + break; + default: + rc = rpcrdma_ep_reconnect(r_xprt, ep, ia); + if (rc) + goto out; } ep->rep_connected = 0; @@ -736,6 +845,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) out: if (rc) ep->rep_connected = rc; + +out_noupdate: return rc; } @@ -878,7 +989,6 @@ struct rpcrdma_rep * rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_rep *rep; int rc; @@ -894,7 +1004,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) goto out_free; } - rep->rr_device = ia->ri_device; rep->rr_cqe.done = rpcrdma_wc_receive; rep->rr_rxprt = r_xprt; INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); @@ -1037,6 +1146,7 @@ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { cancel_delayed_work_sync(&buf->rb_recovery_worker); + cancel_delayed_work_sync(&buf->rb_refresh_worker); while (!list_empty(&buf->rb_recv_bufs)) { struct rpcrdma_rep *rep; @@ -1081,7 +1191,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) out_nomws: dprintk("RPC: %s: no MWs available\n", __func__); - schedule_delayed_work(&buf->rb_refresh_worker, 0); + if (r_xprt->rx_ep.rep_connected != -ENODEV) + schedule_delayed_work(&buf->rb_refresh_worker, 0); /* Allow the reply handler and refresh worker to run */ cond_resched(); @@ -1231,17 +1342,19 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) { + struct ib_device *device = ia->ri_device; + if (rb->rg_direction == DMA_NONE) return false; - rb->rg_iov.addr = ib_dma_map_single(ia->ri_device, + rb->rg_iov.addr = ib_dma_map_single(device, (void *)rb->rg_base, rdmab_length(rb), rb->rg_direction); - if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb))) + if (ib_dma_mapping_error(device, rdmab_addr(rb))) return false; - rb->rg_device = ia->ri_device; + rb->rg_device = device; rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; return true; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 171a35116de911878ce88402c3bac2979740af63..1d66acf1a723e51efb0b2e91065122cd66856af7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -69,6 +69,7 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct completion ri_done; + struct completion ri_remove_done; int ri_async_rc; unsigned int ri_max_segs; unsigned int ri_max_frmr_depth; @@ -78,10 +79,15 @@ struct rpcrdma_ia { bool ri_reminv_expected; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; + unsigned long ri_flags; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; +enum { + RPCRDMA_IAF_REMOVING = 0, +}; + /* * RDMA Endpoint -- one per transport instance */ @@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) return (struct rpcrdma_msg *)rb->rg_base; } +static inline struct ib_device * +rdmab_device(struct rpcrdma_regbuf *rb) +{ + return rb->rg_device; +} + #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) /* To ensure a transport can always make forward progress, @@ -209,7 +221,6 @@ struct rpcrdma_rep { unsigned int rr_len; int rr_wc_flags; u32 rr_inv_rkey; - struct ib_device *rr_device; struct rpcrdma_xprt *rr_rxprt; struct work_struct rr_work; struct list_head rr_list; @@ -380,7 +391,6 @@ struct rpcrdma_buffer { spinlock_t rb_mwlock; /* protect rb_mws list */ struct list_head rb_mws; struct list_head rb_all; - char *rb_pool; spinlock_t rb_lock; /* protect buf lists */ int rb_send_count, rb_recv_count; @@ -497,10 +507,16 @@ struct rpcrdma_xprt { * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ extern int xprt_rdma_pad_optimize; +/* This setting controls the hunt for a supported memory + * registration strategy. + */ +extern unsigned int xprt_rdma_memreg_strategy; + /* * Interface Adapter calls - xprtrdma/verbs.c */ -int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); +int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); +void rpcrdma_ia_remove(struct rpcrdma_ia *ia); void rpcrdma_ia_close(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *); bool fmr_is_supported(struct rpcrdma_ia *); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 16aff8ddc16f8f3e66e31a86ce227b3ac49857bf..d5b54c020decdc2665d671f34d74dd809aa6682a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2432,7 +2432,12 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ENETUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* retry with existing socket, after a delay */ + /* + * xs_tcp_force_close() wakes tasks with -EIO. + * We need to wake them first to ensure the + * correct error code. + */ + xprt_wake_pending_tasks(xprt, status); xs_tcp_force_close(xprt); goto out; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 017801f9dbaae28ff3bd411af3fce3a84da6c90d..8d40a7d31c9908c22b757fc0ab92bce436211c90 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -826,7 +826,7 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev, int err; err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, - switchdev_port_bridge_policy); + switchdev_port_bridge_policy, NULL); if (err) return err; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 919981324171bd2028ed3a8662d16bf77f520c1a..9aed6fe1bf1ad67057d5ec001806729cd5988a94 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -106,7 +106,6 @@ __init int net_sysctl_init(void) ret = register_pernet_subsys(&sysctl_pernet_ops); if (ret) goto out1; - register_sysctl_root(&net_sysctl_root); out: return ret; out1: diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 33a5bdfbef76c00578921198bfbbe9c735f643d2..d174ee3254eecb523dbc86061d80a1e812dcc305 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -802,7 +802,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -851,7 +851,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -891,7 +891,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -939,7 +939,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -982,7 +982,7 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -1104,7 +1104,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy); + tipc_nl_media_policy, info->extack); if (err) return err; @@ -1152,7 +1152,7 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy); + tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; diff --git a/net/tipc/link.c b/net/tipc/link.c index ddd2dd6f77aae1a5cd77c6bd86fac293db66e145..60820dc35a08ade9805080de4c15ff9819e150f5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1827,7 +1827,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) int err; err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, - tipc_nl_prop_policy); + tipc_nl_prop_policy, NULL); if (err) return err; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 312ef7de57d7ba27c58533df9edb2f4dd61cc864..ab3087687a32446ffa3bbfaccf206028886e6945 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -508,7 +508,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) } if (skb_cloned(_skb) && - pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL)) + pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) goto exit; /* Now reverse the concerned fields */ diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9be6592e4a6fa20c78995396ffa3dfcd1f19537a..bd0aac87b41ac627e5c897256a79150226926514 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); if (!sseq) @@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); + tipc_subscrp_put(s); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); diff --git a/net/tipc/net.c b/net/tipc/net.c index ab8a2d5d1e3245d31f97375e5a27deda9a15ad58..719c5924b6383e6bf548baf57fdb713283012d87 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -211,8 +211,8 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], - tipc_nl_net_policy); + info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, + info->extack); if (err) return err; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 26ca8dd64ded64407db8ef885f8dbb6edd30e4b8..b76f13f6fea10a53d00ed14a38cdf5cdf7afa44c 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -268,7 +268,8 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) if (!*attr) return -EOPNOTSUPP; - return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); + return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy, + NULL); } int __init tipc_netlink_start(void) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e1ae8a8a2b8eacf93224cc332fff4a537d8d1ab0..9bfe886ab33080f653ec1c39e0255b15709d0b76 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -296,7 +296,7 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, err = nla_parse(attrbuf, tipc_genl_family.maxattr, (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL); + trans_buf->len, NULL, NULL); if (err) goto parse_out; @@ -352,7 +352,7 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], NULL); + attrs[TIPC_NLA_BEARER], NULL, NULL); if (err) return err; @@ -472,7 +472,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL); + NULL, NULL); if (err) return err; @@ -480,7 +480,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, - link[TIPC_NLA_LINK_PROP], NULL); + link[TIPC_NLA_LINK_PROP], NULL, NULL); if (err) return err; @@ -488,7 +488,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, - link[TIPC_NLA_LINK_STATS], NULL); + link[TIPC_NLA_LINK_STATS], NULL, NULL); if (err) return err; @@ -598,7 +598,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL); + NULL, NULL); if (err) return err; @@ -795,7 +795,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL); + attrs[TIPC_NLA_NAME_TABLE], NULL, NULL); if (err) return err; @@ -803,7 +803,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, - nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL); if (err) return err; @@ -863,7 +863,7 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], - NULL); + NULL, NULL); if (err) return err; @@ -929,7 +929,7 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - NULL); + NULL, NULL); if (err) return err; @@ -940,8 +940,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - nla_parse_nested(con, TIPC_NLA_CON_MAX, sock[TIPC_NLA_SOCK_CON], - NULL); + nla_parse_nested(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], NULL, NULL); node = nla_get_u32(con[TIPC_NLA_CON_NODE]); tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", @@ -977,8 +977,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], - NULL); + err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); if (err) return err; @@ -998,7 +998,7 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], - NULL); + NULL, NULL); if (err) return err; @@ -1045,7 +1045,7 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, return -EINVAL; err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], - NULL); + NULL, NULL); if (err) return err; diff --git a/net/tipc/node.c b/net/tipc/node.c index 4512e83652b16da259db9327fe0958c3642e70f1..aeef8011ac7d82d828289f4085efe3acaa8a3945 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1607,8 +1607,8 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) return -EINVAL; err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], - tipc_nl_net_policy); + info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, + info->extack); if (err) return err; @@ -1774,7 +1774,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); + tipc_nl_link_policy, info->extack); if (err) return err; @@ -1902,7 +1902,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy); + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2042,7 +2042,7 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, info->attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy); + tipc_nl_monitor_policy, info->extack); if (err) return err; @@ -2098,6 +2098,8 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) int err; msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; msg.portid = info->snd_portid; msg.seq = info->snd_seq; @@ -2163,7 +2165,7 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy); + tipc_nl_monitor_policy, NULL); if (err) return err; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c21758e88b24b875da4bd97b3c4d2..1b92b72e812f942fc9826d8542f29bf1bd7c26c5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -51,6 +51,7 @@ #define TIPC_FWD_MSG 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 +#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */ enum { TIPC_LISTEN = TCP_LISTEN, @@ -361,25 +362,25 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) return 0; } -#define tipc_wait_for_cond(sock_, timeout_, condition_) \ -({ \ - int rc_ = 0; \ - int done_ = 0; \ - \ - while (!(condition_) && !done_) { \ - struct sock *sk_ = sock->sk; \ - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ - \ - rc_ = tipc_sk_sock_err(sock_, timeout_); \ - if (rc_) \ - break; \ - prepare_to_wait(sk_sleep(sk_), &wait_, \ - TASK_INTERRUPTIBLE); \ - done_ = sk_wait_event(sk_, timeout_, \ - (condition_), &wait_); \ - remove_wait_queue(sk_sleep(sk_), &wait_); \ - } \ - rc_; \ +#define tipc_wait_for_cond(sock_, timeo_, condition_) \ +({ \ + struct sock *sk_; \ + int rc_; \ + \ + while ((rc_ = !(condition_))) { \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + sk_ = (sock_)->sk; \ + rc_ = tipc_sk_sock_err((sock_), timeo_); \ + if (rc_) \ + break; \ + prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + release_sock(sk_); \ + *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ + sched_annotate_sleep(); \ + lock_sock(sk_); \ + remove_wait_queue(sk_sleep(sk_), &wait_); \ + } \ + rc_; \ }) /** @@ -866,6 +867,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + goto exit; + } + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { @@ -1083,7 +1092,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) } } while (sent < dlen && !rc); - return rc ? rc : sent; + return sent ? sent : rc; } /** @@ -1259,7 +1268,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) struct sock *sk = sock->sk; DEFINE_WAIT(wait); long timeo = *timeop; - int err; + int err = sock_error(sk); + + if (err) + return err; for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -1281,6 +1293,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = sock_intr_errno(timeo); if (signal_pending(current)) break; + + err = sock_error(sk); + if (err) + break; } finish_wait(sk_sleep(sk), &wait); *timeop = timeo; @@ -1290,7 +1306,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) /** * tipc_recvmsg - receive packet-oriented message * @m: descriptor for message info - * @buf_len: total size of user buffer area + * @buflen: length of user buffer area * @flags: receive flags * * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. @@ -1298,95 +1314,85 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * * Returns size of returned message data, errno otherwise */ -static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, - int flags) +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, + size_t buflen, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *buf; - struct tipc_msg *msg; - bool is_connectionless = tipc_sk_type_connectionless(sk); - long timeo; - unsigned int sz; - u32 err; - int res, hlen; + struct sk_buff *skb; + struct tipc_msg *hdr; + bool connected = !tipc_sk_type_connectionless(sk); + int rc, err, hlen, dlen, copy; + long timeout; /* Catch invalid receive requests */ - if (unlikely(!buf_len)) + if (unlikely(!buflen)) return -EINVAL; lock_sock(sk); - - if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) { - res = -ENOTCONN; + if (unlikely(connected && sk->sk_state == TIPC_OPEN)) { + rc = -ENOTCONN; goto exit; } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); -restart: - - /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, &timeo); - if (res) - goto exit; - - /* Look at first message in receive queue */ - buf = skb_peek(&sk->sk_receive_queue); - msg = buf_msg(buf); - sz = msg_data_sz(msg); - hlen = msg_hdr_sz(msg); - err = msg_errcode(msg); - - /* Discard an empty non-errored message & try again */ - if ((!sz) && (!err)) { + do { + /* Look at first msg in receive queue; wait if necessary */ + rc = tipc_wait_for_rcvmsg(sock, &timeout); + if (unlikely(rc)) + goto exit; + skb = skb_peek(&sk->sk_receive_queue); + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + hlen = msg_hdr_sz(hdr); + err = msg_errcode(hdr); + if (likely(dlen || err)) + break; tsk_advance_rx_queue(sk); - goto restart; - } - - /* Capture sender's address (optional) */ - set_orig_addr(m, msg); + } while (1); - /* Capture ancillary data (optional) */ - res = tipc_sk_anc_data_recv(m, msg, tsk); - if (res) + /* Collect msg meta data, including error code and rejected data */ + set_orig_addr(m, hdr); + rc = tipc_sk_anc_data_recv(m, hdr, tsk); + if (unlikely(rc)) goto exit; - /* Capture message data (if valid) & compute return value (always) */ - if (!err) { - if (unlikely(buf_len < sz)) { - sz = buf_len; + /* Capture data if non-error msg, otherwise just set return value */ + if (likely(!err)) { + copy = min_t(int, dlen, buflen); + if (unlikely(copy != dlen)) m->msg_flags |= MSG_TRUNC; - } - res = skb_copy_datagram_msg(buf, hlen, m, sz); - if (res) - goto exit; - res = sz; + rc = skb_copy_datagram_msg(skb, hlen, m, copy); } else { - if (is_connectionless || err == TIPC_CONN_SHUTDOWN || - m->msg_control) - res = 0; - else - res = -ECONNRESET; + copy = 0; + rc = 0; + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + rc = -ECONNRESET; } + if (unlikely(rc)) + goto exit; + /* Caption of data or error code/rejected data was successful */ if (unlikely(flags & MSG_PEEK)) goto exit; - if (likely(!is_connectionless)) { - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); - if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) - tipc_sk_send_ack(tsk); - } tsk_advance_rx_queue(sk); + if (likely(!connected)) + goto exit; + + /* Send connection flow control ack when applicable */ + tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) + tipc_sk_send_ack(tsk); exit: release_sock(sk); - return res; + return rc ? rc : copy; } /** - * tipc_recv_stream - receive stream-oriented data + * tipc_recvstream - receive stream-oriented data * @m: descriptor for message info - * @buf_len: total size of user buffer area + * @buflen: total size of user buffer area * @flags: receive flags * * Used for SOCK_STREAM messages only. If not enough data is available @@ -1394,111 +1400,98 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, * * Returns size of returned message data, errno otherwise */ -static int tipc_recv_stream(struct socket *sock, struct msghdr *m, - size_t buf_len, int flags) +static int tipc_recvstream(struct socket *sock, struct msghdr *m, + size_t buflen, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *buf; - struct tipc_msg *msg; - long timeo; - unsigned int sz; - int target; - int sz_copied = 0; - u32 err; - int res = 0, hlen; + struct sk_buff *skb; + struct tipc_msg *hdr; + struct tipc_skb_cb *skb_cb; + bool peek = flags & MSG_PEEK; + int offset, required, copy, copied = 0; + int hlen, dlen, err, rc; + long timeout; /* Catch invalid receive attempts */ - if (unlikely(!buf_len)) + if (unlikely(!buflen)) return -EINVAL; lock_sock(sk); if (unlikely(sk->sk_state == TIPC_OPEN)) { - res = -ENOTCONN; - goto exit; - } - - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - -restart: - /* Look for a message in receive queue; wait if necessary */ - res = tipc_wait_for_rcvmsg(sock, &timeo); - if (res) + rc = -ENOTCONN; goto exit; - - /* Look at first message in receive queue */ - buf = skb_peek(&sk->sk_receive_queue); - msg = buf_msg(buf); - sz = msg_data_sz(msg); - hlen = msg_hdr_sz(msg); - err = msg_errcode(msg); - - /* Discard an empty non-errored message & try again */ - if ((!sz) && (!err)) { - tsk_advance_rx_queue(sk); - goto restart; } + required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen); + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - /* Optionally capture sender's address & ancillary data of first msg */ - if (sz_copied == 0) { - set_orig_addr(m, msg); - res = tipc_sk_anc_data_recv(m, msg, tsk); - if (res) - goto exit; - } - - /* Capture message data (if valid) & compute return value (always) */ - if (!err) { - u32 offset = TIPC_SKB_CB(buf)->bytes_read; - u32 needed; - int sz_to_copy; - - sz -= offset; - needed = (buf_len - sz_copied); - sz_to_copy = min(sz, needed); - - res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); - if (res) - goto exit; + do { + /* Look at first msg in receive queue; wait if necessary */ + rc = tipc_wait_for_rcvmsg(sock, &timeout); + if (unlikely(rc)) + break; + skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + hlen = msg_hdr_sz(hdr); + err = msg_errcode(hdr); + + /* Discard any empty non-errored (SYN-) message */ + if (unlikely(!dlen && !err)) { + tsk_advance_rx_queue(sk); + continue; + } - sz_copied += sz_to_copy; + /* Collect msg meta data, incl. error code and rejected data */ + if (!copied) { + set_orig_addr(m, hdr); + rc = tipc_sk_anc_data_recv(m, hdr, tsk); + if (rc) + break; + } - if (sz_to_copy < sz) { - if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->bytes_read = - offset + sz_to_copy; - goto exit; + /* Copy data if msg ok, otherwise return error/partial data */ + if (likely(!err)) { + offset = skb_cb->bytes_read; + copy = min_t(int, dlen - offset, buflen - copied); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + break; + copied += copy; + offset += copy; + if (unlikely(offset < dlen)) { + if (!peek) + skb_cb->bytes_read = offset; + break; + } + } else { + rc = 0; + if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control) + rc = -ECONNRESET; + if (copied || rc) + break; } - } else { - if (sz_copied != 0) - goto exit; /* can't add error msg to valid data */ - if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control) - res = 0; - else - res = -ECONNRESET; - } + if (unlikely(peek)) + break; - if (unlikely(flags & MSG_PEEK)) - goto exit; + tsk_advance_rx_queue(sk); - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); - if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) - tipc_sk_send_ack(tsk); - tsk_advance_rx_queue(sk); + /* Send connection flow control advertisement when applicable */ + tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); + if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)) + tipc_sk_send_ack(tsk); - /* Loop around if more data is required */ - if ((sz_copied < buf_len) && /* didn't get all requested data */ - (!skb_queue_empty(&sk->sk_receive_queue) || - (sz_copied < target)) && /* and more is ready or required */ - (!err)) /* and haven't reached a FIN */ - goto restart; + /* Exit if all requested data or FIN/error received */ + if (copied == buflen || err) + break; + } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required); exit: release_sock(sk); - return sz_copied ? sz_copied : res; + return copied ? copied : rc; } /** @@ -1551,6 +1544,8 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); + u32 pport = msg_origport(hdr); + u32 pnode = msg_orignode(hdr); if (unlikely(msg_mcast(hdr))) return false; @@ -1558,18 +1553,28 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) switch (sk->sk_state) { case TIPC_CONNECTING: /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(hdr))) - return false; + if (unlikely(!msg_connected(hdr))) { + if (pport != tsk_peer_port(tsk) || + pnode != tsk_peer_node(tsk)) + return false; + + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } if (unlikely(msg_errcode(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); return true; } if (unlikely(!msg_isdata(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; + sk->sk_state_change(sk); return true; } @@ -1581,8 +1586,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + sk->sk_data_ready(sk); /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); @@ -2511,6 +2515,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2519,7 +2545,7 @@ static const struct proto_ops msg_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2540,7 +2566,7 @@ static const struct proto_ops packet_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2561,7 +2587,7 @@ static const struct proto_ops stream_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2571,7 +2597,7 @@ static const struct proto_ops stream_ops = { .setsockopt = tipc_setsockopt, .getsockopt = tipc_getsockopt, .sendmsg = tipc_sendstream, - .recvmsg = tipc_recv_stream, + .recvmsg = tipc_recvstream, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage }; @@ -2844,7 +2870,7 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - tipc_nl_sock_policy); + tipc_nl_sock_policy, NULL); if (err) return err; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 271cd66e4b3b66534d8686bec94132bc9737314e..0bf91cd3733cb37ecc8ba4ccf7ae5a26cb6e966d 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,8 +54,6 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); -static void tipc_subscrp_put(struct tipc_subscription *subscription); -static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; - tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); - tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) @@ -145,6 +141,7 @@ static void tipc_subscrp_timeout(unsigned long data) spin_lock_bh(&subscriber->lock); tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ @@ -177,20 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; - spin_lock_bh(&subscriber->lock); - list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); - spin_unlock_bh(&subscriber->lock); kfree(sub); tipc_subscrb_put(subscriber); } -static void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_subscrp_put(struct tipc_subscription *subscription) { kref_put(&subscription->kref, tipc_subscrp_kref_release); } -static void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_subscrp_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } @@ -210,11 +204,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, continue; tipc_nametbl_unsubscribe(sub); - tipc_subscrp_get(sub); - spin_unlock_bh(&subscriber->lock); + list_del(&sub->subscrp_list); tipc_subscrp_delete(sub); - tipc_subscrp_put(sub); - spin_lock_bh(&subscriber->lock); if (s) break; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ffdc214c117a924f34b416fde415fcd18201ebc0..ee52957dc9524a76ac371aa19d950dc90bfe4035 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); +void tipc_subscrp_put(struct tipc_subscription *subscription); +void tipc_subscrp_get(struct tipc_subscription *subscription); + #endif diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 46061cf48cd13506a12cb9b8fb53f64d9a56aa06..ecca64fc6a6f223bf8c0e09e3a299fe4fd62509d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -457,7 +457,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy); + tipc_nl_bearer_policy, NULL); if (err) return err; @@ -609,7 +609,8 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; struct udp_media_addr *dst; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy)) + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, + tipc_nl_udp_policy, NULL)) return -EINVAL; if (!opts[TIPC_NLA_UDP_REMOTE]) @@ -662,7 +663,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy)) + tipc_nl_udp_policy, NULL)) goto err; if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 928691c434087e8ff72b84fc6515539115c0509b..1a0c961f4ffeef40abc5b980e004b01120e3c536 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -996,10 +996,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path = { NULL, NULL }; + struct path path = { }; err = -EINVAL; - if (sunaddr->sun_family != AF_UNIX) + if (addr_len < offsetofend(struct sockaddr_un, sun_family) || + sunaddr->sun_family != AF_UNIX) goto out; if (addr_len == sizeof(short)) { @@ -1110,6 +1111,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, unsigned int hash; int err; + err = -EINVAL; + if (alen < offsetofend(struct sockaddr, sa_family)) + goto out; + if (addr->sa_family != AF_UNSPEC) { err = unix_mkname(sunaddr, alen, &hash); if (err < 0) diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index bc27c70e0e59f76a487af92455ac6542a035f762..09fc2eb29dc88898f631bfdf3e747bf16fbe4462 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o -vsock-y += af_vsock.o vsock_addr.o +vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ vmci_transport_notify_qstate.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 6f7f6757ceefb500551fafbf40c462835c4baf88..dfc8c51e4d74ec378a338ab9bb2560b3811f393b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1540,8 +1540,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, long timeout; int err; struct vsock_transport_send_notify_data send_data; - - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); sk = sock->sk; vsk = vsock_sk(sk); @@ -1584,11 +1583,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; - while (total_written < len) { ssize_t written; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && @@ -1597,33 +1595,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); if (err < 0) { - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } release_sock(sk); - timeout = schedule_timeout(timeout); + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } else if (timeout == 0) { err = -EAGAIN; - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); goto out_err; } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after diff --git a/net/vmw_vsock/af_vsock_tap.c b/net/vmw_vsock/af_vsock_tap.c new file mode 100644 index 0000000000000000000000000000000000000000..98f09b5393668e1e8ac6bfd5b03e4cf5ffd903b7 --- /dev/null +++ b/net/vmw_vsock/af_vsock_tap.c @@ -0,0 +1,114 @@ +/* + * Tap functions for AF_VSOCK sockets. + * + * Code based on net/netlink/af_netlink.c tap functions. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +static DEFINE_SPINLOCK(vsock_tap_lock); +static struct list_head vsock_tap_all __read_mostly = + LIST_HEAD_INIT(vsock_tap_all); + +int vsock_add_tap(struct vsock_tap *vt) +{ + if (unlikely(vt->dev->type != ARPHRD_VSOCKMON)) + return -EINVAL; + + __module_get(vt->module); + + spin_lock(&vsock_tap_lock); + list_add_rcu(&vt->list, &vsock_tap_all); + spin_unlock(&vsock_tap_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_add_tap); + +int vsock_remove_tap(struct vsock_tap *vt) +{ + struct vsock_tap *tmp; + bool found = false; + + spin_lock(&vsock_tap_lock); + + list_for_each_entry(tmp, &vsock_tap_all, list) { + if (vt == tmp) { + list_del_rcu(&vt->list); + found = true; + goto out; + } + } + + pr_warn("vsock_remove_tap: %p not found\n", vt); +out: + spin_unlock(&vsock_tap_lock); + + synchronize_net(); + + if (found) + module_put(vt->module); + + return found ? 0 : -ENODEV; +} +EXPORT_SYMBOL_GPL(vsock_remove_tap); + +static int __vsock_deliver_tap_skb(struct sk_buff *skb, + struct net_device *dev) +{ + int ret = 0; + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (nskb) { + dev_hold(dev); + + nskb->dev = dev; + ret = dev_queue_xmit(nskb); + if (unlikely(ret > 0)) + ret = net_xmit_errno(ret); + + dev_put(dev); + } + + return ret; +} + +static void __vsock_deliver_tap(struct sk_buff *skb) +{ + int ret; + struct vsock_tap *tmp; + + list_for_each_entry_rcu(tmp, &vsock_tap_all, list) { + ret = __vsock_deliver_tap_skb(skb, tmp->dev); + if (unlikely(ret)) + break; + } +} + +void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque) +{ + struct sk_buff *skb; + + rcu_read_lock(); + + if (likely(list_empty(&vsock_tap_all))) + goto out; + + skb = build_skb(opaque); + if (skb) { + __vsock_deliver_tap(skb); + consume_skb(skb); + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(vsock_deliver_tap); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 68675a151f22b8b63c02b25a67b833d9a6046d84..403d86e80162e7796fd75249b1ae876d1eee1e6a 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -144,6 +144,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) list_del_init(&pkt->list); spin_unlock_bh(&vsock->send_pkt_list_lock); + virtio_transport_deliver_tap_pkt(pkt); + reply = pkt->reply; sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); @@ -370,6 +372,7 @@ static void virtio_transport_rx_work(struct work_struct *work) } pkt->len = len - sizeof(pkt->hdr); + virtio_transport_deliver_tap_pkt(pkt); virtio_transport_recv_pkt(pkt); } } while (!virtqueue_enable_cb(vq)); @@ -573,9 +576,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names, - NULL); + ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX, + vsock->vqs, callbacks, names, + NULL); if (ret < 0) goto out; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index af087b44ceea2311e53060e2442b4af2024bb037..18e24793659f928221297208ff4c82c8e385d14b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -85,6 +86,69 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, return NULL; } +/* Packet capture */ +static struct sk_buff *virtio_transport_build_skb(void *opaque) +{ + struct virtio_vsock_pkt *pkt = opaque; + unsigned char *t_hdr, *payload; + struct af_vsockmon_hdr *hdr; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + GFP_ATOMIC); + if (!skb) + return NULL; + + hdr = (struct af_vsockmon_hdr *)skb_put(skb, sizeof(*hdr)); + + /* pkt->hdr is little-endian so no need to byteswap here */ + hdr->src_cid = pkt->hdr.src_cid; + hdr->src_port = pkt->hdr.src_port; + hdr->dst_cid = pkt->hdr.dst_cid; + hdr->dst_port = pkt->hdr.dst_port; + + hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); + hdr->len = cpu_to_le16(sizeof(pkt->hdr)); + memset(hdr->reserved, 0, sizeof(hdr->reserved)); + + switch (le16_to_cpu(pkt->hdr.op)) { + case VIRTIO_VSOCK_OP_REQUEST: + case VIRTIO_VSOCK_OP_RESPONSE: + hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); + break; + case VIRTIO_VSOCK_OP_RST: + case VIRTIO_VSOCK_OP_SHUTDOWN: + hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); + break; + case VIRTIO_VSOCK_OP_RW: + hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); + break; + case VIRTIO_VSOCK_OP_CREDIT_UPDATE: + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); + break; + default: + hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); + break; + } + + t_hdr = skb_put(skb, sizeof(pkt->hdr)); + memcpy(t_hdr, &pkt->hdr, sizeof(pkt->hdr)); + + if (pkt->len) { + payload = skb_put(skb, pkt->len); + memcpy(payload, pkt->buf, pkt->len); + } + + return skb; +} + +void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) +{ + vsock_deliver_tap(virtio_transport_build_skb, pkt); +} +EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); + static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 4be4fbbc0b5035662b1cd756bd4e99dd3351309e..10ae7823a19def7bde20d669e3913a40178e7da2 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1; static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) { - int err; - switch (vmci_error) { case VMCI_ERROR_NO_MEM: - err = ENOMEM; - break; + return -ENOMEM; case VMCI_ERROR_DUPLICATE_ENTRY: case VMCI_ERROR_ALREADY_EXISTS: - err = EADDRINUSE; - break; + return -EADDRINUSE; case VMCI_ERROR_NO_ACCESS: - err = EPERM; - break; + return -EPERM; case VMCI_ERROR_NO_RESOURCES: - err = ENOBUFS; - break; + return -ENOBUFS; case VMCI_ERROR_INVALID_RESOURCE: - err = EHOSTUNREACH; - break; + return -EHOSTUNREACH; case VMCI_ERROR_INVALID_ARGS: default: - err = EINVAL; + break; } - - return err > 0 ? -err : err; + return -EINVAL; } static u32 vmci_transport_peer_rid(u32 peer_cid) diff --git a/net/wireless/ap.c b/net/wireless/ap.c index bdad1f951561b3d8b7c75d8992e1c3c1a623f146..25666d3009be8be07410f7b0b53c81f62566111a 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -32,6 +32,11 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev); + + /* Should we apply the grace period during beaconing interface + * shutdown also? + */ + cfg80211_sched_dfs_chan_update(rdev); } return err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 5497d022fadabf17512c0f4c0b77ce50cce345dc..b8aa5a7d5c77ae8453062de794fee7329e429f75 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -456,6 +456,123 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, return (r1 + r2 > 0); } +/* + * Checks if center frequency of chan falls with in the bandwidth + * range of chandef. + */ +bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan) +{ + int width; + u32 cf_offset, freq; + + if (chandef->chan->center_freq == chan->center_freq) + return true; + + width = cfg80211_chandef_get_width(chandef); + if (width <= 20) + return false; + + cf_offset = width / 2 - 10; + + for (freq = chandef->center_freq1 - width / 2 + 10; + freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { + if (chan->center_freq == freq) + return true; + } + + if (!chandef->center_freq2) + return false; + + for (freq = chandef->center_freq2 - width / 2 + 10; + freq <= chandef->center_freq2 + width / 2 - 10; freq += 20) { + if (chan->center_freq == freq) + return true; + } + + return false; +} + +bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) +{ + bool active = false; + + ASSERT_WDEV_LOCK(wdev); + + if (!wdev->chandef.chan) + return false; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + active = wdev->beacon_interval != 0; + break; + case NL80211_IFTYPE_ADHOC: + active = wdev->ssid_len != 0; + break; + case NL80211_IFTYPE_MESH_POINT: + active = wdev->mesh_id_len != 0; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_P2P_DEVICE: + /* Can NAN type be considered as beaconing interface? */ + case NL80211_IFTYPE_NAN: + break; + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + WARN_ON(1); + } + + return active; +} + +static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan) +{ + struct wireless_dev *wdev; + + list_for_each_entry(wdev, &wiphy->wdev_list, list) { + wdev_lock(wdev); + if (!cfg80211_beaconing_iface_active(wdev)) { + wdev_unlock(wdev); + continue; + } + + if (cfg80211_is_sub_chan(&wdev->chandef, chan)) { + wdev_unlock(wdev); + return true; + } + wdev_unlock(wdev); + } + + return false; +} + +bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan) +{ + struct cfg80211_registered_device *rdev; + + ASSERT_RTNL(); + + if (!(chan->flags & IEEE80211_CHAN_RADAR)) + return false; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) + continue; + + if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan)) + return true; + } + + return false; +} static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, u32 center_freq, diff --git a/net/wireless/core.c b/net/wireless/core.c index e55e05bc48053f5542696a4c2d53170dfd02577c..83ea164f16b3e018546260d8e809ae6b244d85e3 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -305,30 +305,14 @@ static void cfg80211_event_work(struct work_struct *work) void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) { - struct cfg80211_iface_destroy *item; + struct wireless_dev *wdev, *tmp; ASSERT_RTNL(); - spin_lock_irq(&rdev->destroy_list_lock); - while ((item = list_first_entry_or_null(&rdev->destroy_list, - struct cfg80211_iface_destroy, - list))) { - struct wireless_dev *wdev, *tmp; - u32 nlportid = item->nlportid; - - list_del(&item->list); - kfree(item); - spin_unlock_irq(&rdev->destroy_list_lock); - - list_for_each_entry_safe(wdev, tmp, - &rdev->wiphy.wdev_list, list) { - if (nlportid == wdev->owner_nlportid) - rdev_del_virtual_intf(rdev, wdev); - } - - spin_lock_irq(&rdev->destroy_list_lock); + list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) { + if (wdev->nl_owner_dead) + rdev_del_virtual_intf(rdev, wdev); } - spin_unlock_irq(&rdev->destroy_list_lock); } static void cfg80211_destroy_iface_wk(struct work_struct *work) @@ -346,13 +330,47 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work) static void cfg80211_sched_scan_stop_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *req, *tmp; rdev = container_of(work, struct cfg80211_registered_device, sched_scan_stop_wk); rtnl_lock(); + list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { + if (req->nl_owner_dead) + cfg80211_stop_sched_scan_req(rdev, req, false); + } + rtnl_unlock(); +} - __cfg80211_stop_sched_scan(rdev, false); +static void cfg80211_propagate_radar_detect_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + propagate_radar_detect_wk); + + rtnl_lock(); + + regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->radar_chandef, + NL80211_DFS_UNAVAILABLE, + NL80211_RADAR_DETECTED); + + rtnl_unlock(); +} + +static void cfg80211_propagate_cac_done_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + + rdev = container_of(work, struct cfg80211_registered_device, + propagate_cac_done_wk); + + rtnl_lock(); + + regulatory_propagate_dfs_state(&rdev->wiphy, &rdev->cac_done_chandef, + NL80211_DFS_AVAILABLE, + NL80211_RADAR_CAC_FINISHED); rtnl_unlock(); } @@ -436,8 +454,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, spin_lock_init(&rdev->beacon_registrations_lock); spin_lock_init(&rdev->bss_lock); INIT_LIST_HEAD(&rdev->bss_list); + INIT_LIST_HEAD(&rdev->sched_scan_req_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); - INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results); INIT_LIST_HEAD(&rdev->mlme_unreg); spin_lock_init(&rdev->mlme_unreg_lock); INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk); @@ -452,10 +470,12 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, rdev->wiphy.dev.platform_data = rdev; device_enable_async_suspend(&rdev->wiphy.dev); - INIT_LIST_HEAD(&rdev->destroy_list); - spin_lock_init(&rdev->destroy_list_lock); INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk); INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk); + INIT_WORK(&rdev->sched_scan_res_wk, cfg80211_sched_scan_results_wk); + INIT_WORK(&rdev->propagate_radar_detect_wk, + cfg80211_propagate_radar_detect_wk); + INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -915,6 +935,8 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->destroy_work); flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->mlme_unreg_wk); + flush_work(&rdev->propagate_radar_detect_wk); + flush_work(&rdev->propagate_cac_done_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -954,6 +976,12 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); +void cfg80211_cqm_config_free(struct wireless_dev *wdev) +{ + kfree(wdev->cqm_config); + wdev->cqm_config = NULL; +} + void cfg80211_unregister_wdev(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -980,6 +1008,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) WARN_ON_ONCE(1); break; } + + cfg80211_cqm_config_free(wdev); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -1001,7 +1031,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; - struct cfg80211_sched_scan_request *sched_scan_req; + struct cfg80211_sched_scan_request *pos, *tmp; ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); @@ -1012,9 +1042,11 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - sched_scan_req = rtnl_dereference(rdev->sched_scan_req); - if (sched_scan_req && dev == sched_scan_req->dev) - __cfg80211_stop_sched_scan(rdev, false); + list_for_each_entry_safe(pos, tmp, &rdev->sched_scan_req_list, + list) { + if (dev == pos->dev) + cfg80211_stop_sched_scan_req(rdev, pos, false); + } #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); @@ -1089,7 +1121,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; - struct cfg80211_sched_scan_request *sched_scan_req; + struct cfg80211_sched_scan_request *pos, *tmp; if (!wdev) return NOTIFY_DONE; @@ -1114,7 +1146,15 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - wdev->identifier = ++rdev->wdev_id; + /* + * We get here also when the interface changes network namespaces, + * as it's registered into the new one, but we don't want it to + * change ID in that case. Checking if the ID is already assigned + * works, because 0 isn't considered a valid ID and the memory is + * 0-initialized. + */ + if (!wdev->identifier) + wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list); rdev->devlist_generation++; /* can only change netns with wiphy */ @@ -1158,10 +1198,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, ___cfg80211_scan_done(rdev, false); } - sched_scan_req = rtnl_dereference(rdev->sched_scan_req); - if (WARN_ON(sched_scan_req && - sched_scan_req->dev == wdev->netdev)) { - __cfg80211_stop_sched_scan(rdev, false); + list_for_each_entry_safe(pos, tmp, + &rdev->sched_scan_req_list, list) { + if (WARN_ON(pos && pos->dev == wdev->netdev)) + cfg80211_stop_sched_scan_req(rdev, pos, false); } rdev->opencount--; @@ -1208,12 +1248,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, */ if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && - rdev->ops->set_power_mgmt) - if (rdev_set_power_mgmt(rdev, dev, wdev->ps, - wdev->ps_timeout)) { - /* assume this means it's off */ - wdev->ps = false; - } + rdev->ops->set_power_mgmt && + rdev_set_power_mgmt(rdev, dev, wdev->ps, + wdev->ps_timeout)) { + /* assume this means it's off */ + wdev->ps = false; + } break; case NETDEV_UNREGISTER: /* @@ -1234,6 +1274,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, kzfree(wdev->wext.keys); #endif flush_work(&wdev->disconnect_wk); + cfg80211_cqm_config_free(wdev); } /* * synchronise (so that we won't find this netdev diff --git a/net/wireless/core.h b/net/wireless/core.h index 58ca206982feafa7ddfe2c90d4483791e5057cf0..6e809325af3bf090f09fb8fef4356bada3a1beed 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -74,10 +74,9 @@ struct cfg80211_registered_device { u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; - struct cfg80211_sched_scan_request __rcu *sched_scan_req; + struct list_head sched_scan_req_list; unsigned long suspend_at; struct work_struct scan_done_wk; - struct work_struct sched_scan_results_wk; struct genl_info *cur_cmd_info; @@ -91,11 +90,15 @@ struct cfg80211_registered_device { struct cfg80211_coalesce *coalesce; - spinlock_t destroy_list_lock; - struct list_head destroy_list; struct work_struct destroy_work; - struct work_struct sched_scan_stop_wk; + struct work_struct sched_scan_res_wk; + + struct cfg80211_chan_def radar_chandef; + struct work_struct propagate_radar_detect_wk; + + struct cfg80211_chan_def cac_done_chandef; + struct work_struct propagate_cac_done_wk; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ @@ -220,23 +223,8 @@ struct cfg80211_event { enum cfg80211_event_type type; union { - struct { - u8 bssid[ETH_ALEN]; - const u8 *req_ie; - const u8 *resp_ie; - size_t req_ie_len; - size_t resp_ie_len; - struct cfg80211_bss *bss; - int status; /* -1 = failed; 0..65535 = status code */ - enum nl80211_timeout_reason timeout_reason; - } cr; - struct { - const u8 *req_ie; - const u8 *resp_ie; - size_t req_ie_len; - size_t resp_ie_len; - struct cfg80211_bss *bss; - } rm; + struct cfg80211_connect_resp_params cr; + struct cfg80211_roam_info rm; struct { const u8 *ie; size_t ie_len; @@ -267,9 +255,11 @@ struct cfg80211_beacon_registration { u32 nlportid; }; -struct cfg80211_iface_destroy { - struct list_head list; - u32 nlportid; +struct cfg80211_cqm_config { + u32 rssi_hyst; + s32 last_rssi_event_value; + int n_rssi_thresholds; + s32 rssi_thresholds[0]; }; void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); @@ -385,21 +375,16 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct cfg80211_connect_params *connect, struct cfg80211_cached_keys *connkeys, const u8 *prev_bssid); -void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - int status, bool wextev, - struct cfg80211_bss *bss, - enum nl80211_timeout_reason timeout_reason); +void __cfg80211_connect_result(struct net_device *dev, + struct cfg80211_connect_resp_params *params, + bool wextev); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, - struct cfg80211_bss *bss, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len); + struct cfg80211_roam_info *info); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); @@ -423,13 +408,20 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, void __cfg80211_scan_done(struct work_struct *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message); -void __cfg80211_sched_scan_results(struct work_struct *wk); +void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req); +int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev, + bool want_multi); +void cfg80211_sched_scan_results_wk(struct work_struct *work); +int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req, + bool driver_initiated); int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, - bool driver_initiated); + u64 reqid, bool driver_initiated); void cfg80211_upload_connect_keys(struct wireless_dev *wdev); int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, - u32 *flags, struct vif_params *params); + struct vif_params *params); void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); void cfg80211_process_wdev_events(struct wireless_dev *wdev); @@ -459,6 +451,16 @@ unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); +void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); + +bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, + struct ieee80211_channel *chan); + +bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev); + +bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan); + static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { unsigned long end = jiffies; @@ -512,4 +514,6 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, #define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; }) #endif +void cfg80211_cqm_config_free(struct wireless_dev *wdev); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 364f900a3dc4d2810c90fa3f613d1b9a08cb5e71..10bf040a0982d2f7859f98399c24de26a7fd5383 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -190,6 +190,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) if (!nowext) wdev->wext.ibss.ssid_len = 0; #endif + cfg80211_sched_dfs_chan_update(rdev); } void cfg80211_clear_ibss(struct net_device *dev, bool nowext) diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2d8518a37eabc4795e97f7f600f14cdf0f63d7ff..ec0b1c20ac9920106efbc8798b53d1077a646e36 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -262,6 +262,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, wdev->beacon_interval = 0; memset(&wdev->chandef, 0, sizeof(wdev->chandef)); rdev_set_qos_map(rdev, dev, NULL); + cfg80211_sched_dfs_chan_update(rdev); } return err; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 22b3d999006559d7572287c2b8e6f4fad286766d..d8df7a5180a0473b56e74e8b4eb93b380965ea49 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -26,9 +26,16 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; - u8 *ie = mgmt->u.assoc_resp.variable; - int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); - u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); + struct cfg80211_connect_resp_params cr; + + memset(&cr, 0, sizeof(cr)); + cr.status = (int)le16_to_cpu(mgmt->u.assoc_resp.status_code); + cr.bssid = mgmt->bssid; + cr.bss = bss; + cr.resp_ie = mgmt->u.assoc_resp.variable; + cr.resp_ie_len = + len - offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); + cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED; trace_cfg80211_send_rx_assoc(dev, bss); @@ -38,7 +45,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, * and got a reject -- we only try again with an assoc * frame instead of reassoc. */ - if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) { + if (cfg80211_sme_rx_assoc_resp(wdev, cr.status)) { cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); return; @@ -46,10 +53,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues); /* update current_bss etc., consumes the bss reference */ - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, - status_code, - status_code == WLAN_STATUS_SUCCESS, bss, - NL80211_TIMEOUT_UNSPECIFIED); + __cfg80211_connect_result(dev, &cr, cr.status == WLAN_STATUS_SUCCESS); } EXPORT_SYMBOL(cfg80211_rx_assoc_resp); @@ -745,6 +749,12 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, } EXPORT_SYMBOL(cfg80211_rx_mgmt); +void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev) +{ + cancel_delayed_work(&rdev->dfs_update_channels_wk); + queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, 0); +} + void cfg80211_dfs_channels_update_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); @@ -755,6 +765,8 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) struct wiphy *wiphy; bool check_again = false; unsigned long timeout, next_time = 0; + unsigned long time_dfs_update; + enum nl80211_radar_event radar_event; int bandid, i; rdev = container_of(delayed_work, struct cfg80211_registered_device, @@ -770,11 +782,27 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) for (i = 0; i < sband->n_channels; i++) { c = &sband->channels[i]; - if (c->dfs_state != NL80211_DFS_UNAVAILABLE) + if (!(c->flags & IEEE80211_CHAN_RADAR)) + continue; + + if (c->dfs_state != NL80211_DFS_UNAVAILABLE && + c->dfs_state != NL80211_DFS_AVAILABLE) continue; - timeout = c->dfs_state_entered + msecs_to_jiffies( - IEEE80211_DFS_MIN_NOP_TIME_MS); + if (c->dfs_state == NL80211_DFS_UNAVAILABLE) { + time_dfs_update = IEEE80211_DFS_MIN_NOP_TIME_MS; + radar_event = NL80211_RADAR_NOP_FINISHED; + } else { + if (regulatory_pre_cac_allowed(wiphy) || + cfg80211_any_wiphy_oper_chan(wiphy, c)) + continue; + + time_dfs_update = REG_PRE_CAC_EXPIRY_GRACE_MS; + radar_event = NL80211_RADAR_PRE_CAC_EXPIRED; + } + + timeout = c->dfs_state_entered + + msecs_to_jiffies(time_dfs_update); if (time_after_eq(jiffies, timeout)) { c->dfs_state = NL80211_DFS_USABLE; @@ -784,8 +812,12 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) NL80211_CHAN_NO_HT); nl80211_radar_notify(rdev, &chandef, - NL80211_RADAR_NOP_FINISHED, - NULL, GFP_ATOMIC); + radar_event, NULL, + GFP_ATOMIC); + + regulatory_propagate_dfs_state(wiphy, &chandef, + c->dfs_state, + radar_event); continue; } @@ -810,7 +842,6 @@ void cfg80211_radar_event(struct wiphy *wiphy, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - unsigned long timeout; trace_cfg80211_radar_event(wiphy, chandef); @@ -820,11 +851,12 @@ void cfg80211_radar_event(struct wiphy *wiphy, */ cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); - timeout = msecs_to_jiffies(IEEE80211_DFS_MIN_NOP_TIME_MS); - queue_delayed_work(cfg80211_wq, &rdev->dfs_update_channels_wk, - timeout); + cfg80211_sched_dfs_chan_update(rdev); nl80211_radar_notify(rdev, chandef, NL80211_RADAR_DETECTED, NULL, gfp); + + memcpy(&rdev->radar_chandef, chandef, sizeof(struct cfg80211_chan_def)); + queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk); } EXPORT_SYMBOL(cfg80211_radar_event); @@ -851,6 +883,10 @@ void cfg80211_cac_event(struct net_device *netdev, msecs_to_jiffies(wdev->cac_time_ms); WARN_ON(!time_after_eq(jiffies, timeout)); cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); + memcpy(&rdev->cac_done_chandef, chandef, + sizeof(struct cfg80211_chan_def)); + queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); + cfg80211_sched_dfs_chan_update(rdev); break; case NL80211_RADAR_CAC_ABORTED: break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2312dc2ffdb98b37b2909274c57eed68935267d7..c3bc9da30cff997970dc2cf8aebd05c4795c3cf9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -410,6 +410,16 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, [NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 }, + [NL80211_ATTR_FILS_ERP_USERNAME] = { .type = NLA_BINARY, + .len = FILS_ERP_MAX_USERNAME_LEN }, + [NL80211_ATTR_FILS_ERP_REALM] = { .type = NLA_BINARY, + .len = FILS_ERP_MAX_REALM_LEN }, + [NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 }, + [NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY, + .len = FILS_ERP_MAX_RRK_LEN }, + [NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 }, + [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN }, + [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -487,6 +497,7 @@ static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, + [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = { .len = ETH_ALEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, }; @@ -548,7 +559,7 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, genl_family_attrbuf(&nl80211_fam), - nl80211_fam.maxattr, nl80211_policy); + nl80211_fam.maxattr, nl80211_policy, NULL); if (err) return err; @@ -719,7 +730,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, - nl80211_key_policy); + nl80211_key_policy, NULL); if (err) return err; @@ -760,7 +771,7 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, tb[NL80211_KEY_DEFAULT_TYPES], - nl80211_key_default_policy); + nl80211_key_default_policy, NULL); if (err) return err; @@ -807,10 +818,11 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; - int err = nla_parse_nested( - kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, - info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], - nl80211_key_default_policy); + int err = nla_parse_nested(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -1366,7 +1378,7 @@ static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } - if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + if (rdev->wiphy.max_sched_scan_reqs) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); @@ -1805,6 +1817,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; + if (rdev->wiphy.max_sched_scan_reqs && + nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_MAX_REQS, + rdev->wiphy.max_sched_scan_reqs)) + goto nla_put_failure; + if (nla_put(msg, NL80211_ATTR_EXT_FEATURES, sizeof(rdev->wiphy.ext_features), rdev->wiphy.ext_features)) @@ -1892,8 +1909,8 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct nl80211_dump_wiphy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - tb, nl80211_fam.maxattr, nl80211_policy); + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, + nl80211_fam.maxattr, nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ if (ret) return 0; @@ -2308,7 +2325,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rem_txq_params) { result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX, nl_txq_params, - txq_params_policy); + txq_params_policy, + info->extack); if (result) return result; result = parse_txq_params(tb, &txq_params); @@ -2695,17 +2713,82 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (!nla) return -EINVAL; - if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, - nla, mntr_flags_policy)) + if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla, + mntr_flags_policy, NULL)) return -EINVAL; for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++) if (flags[flag]) *mntrflags |= (1<attrs[NL80211_ATTR_MNTR_FLAGS]) { + if (type != NL80211_IFTYPE_MONITOR) + return -EINVAL; + + err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], + ¶ms->flags); + if (err) + return err; + + change = true; + } + + if (params->flags & MONITOR_FLAG_ACTIVE && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) { + const u8 *mumimo_groups; + u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; + + if (type != NL80211_IFTYPE_MONITOR) + return -EINVAL; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) + return -EOPNOTSUPP; + + mumimo_groups = + nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]); + + /* bits 0 and 63 are reserved and must be zero */ + if ((mumimo_groups[0] & BIT(0)) || + (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(7))) + return -EINVAL; + + params->vht_mumimo_groups = mumimo_groups; + change = true; + } + + if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) { + u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; + + if (type != NL80211_IFTYPE_MONITOR) + return -EINVAL; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) + return -EOPNOTSUPP; + + params->vht_mumimo_follow_addr = + nla_data(info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]); + change = true; + } + + return change ? 1 : 0; +} + static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, struct net_device *netdev, u8 use_4addr, enum nl80211_iftype iftype) @@ -2739,7 +2822,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) int err; enum nl80211_iftype otype, ntype; struct net_device *dev = info->user_ptr[1]; - u32 _flags, *flags = NULL; bool change = false; memset(¶ms, 0, sizeof(params)); @@ -2782,56 +2864,14 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) params.use_4addr = -1; } - if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { - if (ntype != NL80211_IFTYPE_MONITOR) - return -EINVAL; - err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], - &_flags); - if (err) - return err; - - flags = &_flags; - change = true; - } - - if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) { - const u8 *mumimo_groups; - u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; - - if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) - return -EOPNOTSUPP; - - mumimo_groups = - nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]); - - /* bits 0 and 63 are reserved and must be zero */ - if ((mumimo_groups[0] & BIT(7)) || - (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(0))) - return -EINVAL; - - memcpy(params.vht_mumimo_groups, mumimo_groups, - VHT_MUMIMO_GROUPS_DATA_LEN); - change = true; - } - - if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) { - u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; - - if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) - return -EOPNOTSUPP; - - nla_memcpy(params.macaddr, - info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR], - ETH_ALEN); + err = nl80211_parse_mon_options(rdev, ntype, info, ¶ms); + if (err < 0) + return err; + if (err > 0) change = true; - } - - if (flags && (*flags & MONITOR_FLAG_ACTIVE) && - !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) - return -EOPNOTSUPP; if (change) - err = cfg80211_change_iface(rdev, dev, ntype, flags, ¶ms); + err = cfg80211_change_iface(rdev, dev, ntype, ¶ms); else err = 0; @@ -2849,7 +2889,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; - u32 flags; /* to avoid failing a new interface creation due to pending removal */ cfg80211_destroy_ifaces(rdev); @@ -2885,13 +2924,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return err; } - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? - info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, - &flags); - - if (!err && (flags & MONITOR_FLAG_ACTIVE) && - !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) - return -EOPNOTSUPP; + err = nl80211_parse_mon_options(rdev, type, info, ¶ms); + if (err < 0) + return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -2899,8 +2934,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), - NET_NAME_USER, type, err ? NULL : &flags, - ¶ms); + NET_NAME_USER, type, ¶ms); if (WARN_ON(!wdev)) { nlmsg_free(msg); return -EPROTO; @@ -3561,7 +3595,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, if (sband == NULL) return -EINVAL; err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates, - nl80211_txattr_policy); + nl80211_txattr_policy, info->extack); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { @@ -3818,6 +3852,19 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, return false; return true; case NL80211_CMD_CONNECT: + /* SAE not supported yet */ + if (auth_type == NL80211_AUTHTYPE_SAE) + return false; + /* FILS with SK PFS or PK not supported yet */ + if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) + return false; + if (!wiphy_ext_feature_isset( + &rdev->wiphy, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && + auth_type == NL80211_AUTHTYPE_FILS_SK) + return false; + return true; case NL80211_CMD_START_AP: /* SAE not supported yet */ if (auth_type == NL80211_AUTHTYPE_SAE) @@ -4100,8 +4147,8 @@ static int parse_station_flags(struct genl_info *info, if (!nla) return 0; - if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, - nla, sta_flags_policy)) + if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla, + sta_flags_policy, info->extack)) return -EINVAL; /* @@ -4151,7 +4198,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, struct nlattr *rate; u32 bitrate; u16 bitrate_compat; - enum nl80211_attrs rate_flg; + enum nl80211_rate_info rate_flg; rate = nla_nest_start(msg, attr); if (!rate) @@ -4728,7 +4775,7 @@ static int nl80211_parse_sta_wme(struct genl_info *info, nla = info->attrs[NL80211_ATTR_STA_WME]; err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, - nl80211_sta_wme_policy); + nl80211_sta_wme_policy, info->extack); if (err) return err; @@ -5703,7 +5750,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, cur_params.dot11MeshGateAnnouncementProtocol) || nla_put_u8(msg, NL80211_MESHCONF_FORWARDING, cur_params.dot11MeshForwarding) || - nla_put_u32(msg, NL80211_MESHCONF_RSSI_THRESHOLD, + nla_put_s32(msg, NL80211_MESHCONF_RSSI_THRESHOLD, cur_params.rssi_threshold) || nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE, cur_params.ht_opmode) || @@ -5853,7 +5900,7 @@ do { \ return -EINVAL; if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], - nl80211_meshconf_params_policy)) + nl80211_meshconf_params_policy, info->extack)) return -EINVAL; /* This makes sure that there aren't more than 32 mesh config @@ -6002,7 +6049,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, return -EINVAL; if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], - nl80211_mesh_setup_params_policy)) + nl80211_mesh_setup_params_policy, info->extack)) return -EINVAL; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC]) @@ -6393,7 +6440,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX, - nl_reg_rule, reg_rule_policy); + nl_reg_rule, reg_rule_policy, + info->extack); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); @@ -6461,7 +6509,7 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, return -EINVAL; err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, - nl80211_bss_select_policy); + nl80211_bss_select_policy, NULL); if (err) return err; @@ -6545,6 +6593,19 @@ static int nl80211_parse_random_mac(struct nlattr **attrs, return 0; } +static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) +{ + ASSERT_WDEV_LOCK(wdev); + + if (!cfg80211_beaconing_iface_active(wdev)) + return true; + + if (!(wdev->chandef.chan->flags & IEEE80211_CHAN_RADAR)) + return true; + + return regulatory_pre_cac_allowed(wdev->wiphy); +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6670,6 +6731,25 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; + wdev_lock(wdev); + if (!cfg80211_off_channel_oper_allowed(wdev)) { + struct ieee80211_channel *chan; + + if (request->n_channels != 1) { + wdev_unlock(wdev); + err = -EBUSY; + goto out_free; + } + + chan = request->channels[0]; + if (chan->center_freq != wdev->chandef.chan->center_freq) { + wdev_unlock(wdev); + err = -EBUSY; + goto out_free; + } + } + wdev_unlock(wdev); + i = 0; if (n_ssids) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { @@ -6862,7 +6942,7 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, return -EINVAL; err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX, - attr, nl80211_plan_policy); + attr, nl80211_plan_policy, NULL); if (err) return err; @@ -6953,11 +7033,19 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, err = nla_parse_nested(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy); + attr, nl80211_match_policy, + NULL); if (err) return ERR_PTR(err); + + /* SSID and BSSID are mutually exclusive */ + if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] && + tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) + return ERR_PTR(-EINVAL); + /* add other standalone attributes here */ - if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]) { + if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] || + tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) { n_match_sets++; continue; } @@ -7128,15 +7216,17 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { - struct nlattr *ssid, *rssi; + struct nlattr *ssid, *bssid, *rssi; err = nla_parse_nested(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy); + attr, nl80211_match_policy, + NULL); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; - if (ssid) { + bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]; + if (ssid || bssid) { if (WARN_ON(i >= n_match_sets)) { /* this indicates a programming error, * the loop above should have verified @@ -7146,14 +7236,25 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, goto out_free; } - if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { - err = -EINVAL; - goto out_free; + if (ssid) { + if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out_free; + } + memcpy(request->match_sets[i].ssid.ssid, + nla_data(ssid), nla_len(ssid)); + request->match_sets[i].ssid.ssid_len = + nla_len(ssid); } - memcpy(request->match_sets[i].ssid.ssid, - nla_data(ssid), nla_len(ssid)); - request->match_sets[i].ssid.ssid_len = - nla_len(ssid); + if (bssid) { + if (nla_len(bssid) != ETH_ALEN) { + err = -EINVAL; + goto out_free; + } + memcpy(request->match_sets[i].bssid, + nla_data(bssid), ETH_ALEN); + } + /* special attribute - old implementation w/a */ request->match_sets[i].rssi_thold = default_match_rssi; @@ -7261,14 +7362,16 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_sched_scan_request *sched_scan_req; + bool want_multi; int err; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || - !rdev->ops->sched_scan_start) + if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_start) return -EOPNOTSUPP; - if (rdev->sched_scan_req) - return -EINPROGRESS; + want_multi = info->attrs[NL80211_ATTR_SCHED_SCAN_MULTI]; + err = cfg80211_sched_scan_req_possible(rdev, want_multi); + if (err) + return err; sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, info->attrs, @@ -7278,6 +7381,14 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (err) goto out_err; + /* leave request id zero for legacy request + * or if driver does not support multi-scheduled scan + */ + if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) { + while (!sched_scan_req->reqid) + sched_scan_req->reqid = rdev->wiphy.cookie_counter++; + } + err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) goto out_free; @@ -7288,10 +7399,9 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) sched_scan_req->owner_nlportid = info->snd_portid; - rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req); + cfg80211_add_sched_scan_req(rdev, sched_scan_req); - nl80211_send_sched_scan(rdev, dev, - NL80211_CMD_START_SCHED_SCAN); + nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_START_SCHED_SCAN); return 0; out_free: @@ -7303,13 +7413,27 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { + struct cfg80211_sched_scan_request *req; struct cfg80211_registered_device *rdev = info->user_ptr[0]; + u64 cookie; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || - !rdev->ops->sched_scan_stop) + if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; - return __cfg80211_stop_sched_scan(rdev, false); + if (info->attrs[NL80211_ATTR_COOKIE]) { + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + return __cfg80211_stop_sched_scan(rdev, cookie, false); + } + + req = list_first_or_null_rcu(&rdev->sched_scan_req_list, + struct cfg80211_sched_scan_request, + list); + if (!req || req->reqid || + (req->owner_nlportid && + req->owner_nlportid != info->snd_portid)) + return -ENOENT; + + return cfg80211_stop_sched_scan_req(rdev, req, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, @@ -7433,7 +7557,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], - nl80211_policy); + nl80211_policy, info->extack); if (err) return err; @@ -8639,7 +8763,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - attrbuf, nl80211_fam.maxattr, nl80211_policy); + attrbuf, nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) goto out_err; @@ -8867,6 +8992,35 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } } + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && + info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] && + info->attrs[NL80211_ATTR_FILS_ERP_REALM] && + info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] && + info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { + connect.fils_erp_username = + nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); + connect.fils_erp_username_len = + nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); + connect.fils_erp_realm = + nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); + connect.fils_erp_realm_len = + nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); + connect.fils_erp_next_seq_num = + nla_get_u16( + info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]); + connect.fils_erp_rrk = + nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); + connect.fils_erp_rrk_len = + nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); + } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] || + info->attrs[NL80211_ATTR_FILS_ERP_REALM] || + info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] || + info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { + kzfree(connkeys); + return -EINVAL; + } + wdev_lock(dev->ieee80211_ptr); err = cfg80211_connect(rdev, dev, &connect, connkeys, @@ -8986,14 +9140,28 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); - if (!info->attrs[NL80211_ATTR_MAC]) - return -EINVAL; - if (!info->attrs[NL80211_ATTR_PMKID]) return -EINVAL; pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); - pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_MAC]) { + pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + } else if (info->attrs[NL80211_ATTR_SSID] && + info->attrs[NL80211_ATTR_FILS_CACHE_ID] && + (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA || + info->attrs[NL80211_ATTR_PMK])) { + pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + pmksa.cache_id = + nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); + } else { + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_PMK]) { + pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); + pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); + } if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) @@ -9096,6 +9264,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; + const struct cfg80211_chan_def *compat_chandef; struct sk_buff *msg; void *hdr; u64 cookie; @@ -9124,6 +9293,18 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (err) return err; + wdev_lock(wdev); + if (!cfg80211_off_channel_oper_allowed(wdev) && + !cfg80211_chandef_identical(&wdev->chandef, &chandef)) { + compat_chandef = cfg80211_chandef_compatible(&wdev->chandef, + &chandef); + if (compat_chandef != &chandef) { + wdev_unlock(wdev); + return -EBUSY; + } + } + wdev_unlock(wdev); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -9299,6 +9480,13 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; + wdev_lock(wdev); + if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev)) { + wdev_unlock(wdev); + return -EBUSY; + } + wdev_unlock(wdev); + params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); @@ -9466,7 +9654,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) static const struct nla_policy nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = { - [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_BINARY }, [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, @@ -9495,28 +9683,123 @@ static int nl80211_set_cqm_txe(struct genl_info *info, return rdev_set_cqm_txe_config(rdev, dev, rate, pkts, intvl); } +static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + s32 last, low, high; + u32 hyst; + int i, n; + int err; + + /* RSSI reporting disabled? */ + if (!wdev->cqm_config) + return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + + /* + * Obtain current RSSI value if possible, if not and no RSSI threshold + * event has been received yet, we should receive an event after a + * connection is established and enough beacons received to calculate + * the average. + */ + if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && + rdev->ops->get_station) { + struct station_info sinfo; + u8 *mac_addr; + + mac_addr = wdev->current_bss->pub.bssid; + + err = rdev_get_station(rdev, dev, mac_addr, &sinfo); + if (err) + return err; + + if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) + wdev->cqm_config->last_rssi_event_value = + (s8) sinfo.rx_beacon_signal_avg; + } + + last = wdev->cqm_config->last_rssi_event_value; + hyst = wdev->cqm_config->rssi_hyst; + n = wdev->cqm_config->n_rssi_thresholds; + + for (i = 0; i < n; i++) + if (last < wdev->cqm_config->rssi_thresholds[i]) + break; + + low = i > 0 ? + (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; + high = i < n ? + (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + + return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); +} + static int nl80211_set_cqm_rssi(struct genl_info *info, - s32 threshold, u32 hysteresis) + const s32 *thresholds, int n_thresholds, + u32 hysteresis) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; + int i, err; + s32 prev = S32_MIN; - if (threshold > 0) - return -EINVAL; - - /* disabling - hysteresis should also be zero then */ - if (threshold == 0) - hysteresis = 0; + /* Check all values negative and sorted */ + for (i = 0; i < n_thresholds; i++) { + if (thresholds[i] > 0 || thresholds[i] <= prev) + return -EINVAL; - if (!rdev->ops->set_cqm_rssi_config) - return -EOPNOTSUPP; + prev = thresholds[i]; + } if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - return rdev_set_cqm_rssi_config(rdev, dev, threshold, hysteresis); + wdev_lock(wdev); + cfg80211_cqm_config_free(wdev); + wdev_unlock(wdev); + + if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { + if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ + return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); + + return rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], hysteresis); + } + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST)) + return -EOPNOTSUPP; + + if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ + n_thresholds = 0; + + wdev_lock(wdev); + if (n_thresholds) { + struct cfg80211_cqm_config *cqm_config; + + cqm_config = kzalloc(sizeof(struct cfg80211_cqm_config) + + n_thresholds * sizeof(s32), GFP_KERNEL); + if (!cqm_config) { + err = -ENOMEM; + goto unlock; + } + + cqm_config->rssi_hyst = hysteresis; + cqm_config->n_rssi_thresholds = n_thresholds; + memcpy(cqm_config->rssi_thresholds, thresholds, + n_thresholds * sizeof(s32)); + + wdev->cqm_config = cqm_config; + } + + err = cfg80211_cqm_rssi_update(rdev, dev); + +unlock: + wdev_unlock(wdev); + + return err; } static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) @@ -9530,16 +9813,22 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) return -EINVAL; err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm, - nl80211_attr_cqm_policy); + nl80211_attr_cqm_policy, info->extack); if (err) return err; if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] && attrs[NL80211_ATTR_CQM_RSSI_HYST]) { - s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); + const s32 *thresholds = + nla_data(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); + int len = nla_len(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]); - return nl80211_set_cqm_rssi(info, threshold, hysteresis); + if (len % 4) + return -EINVAL; + + return nl80211_set_cqm_rssi(info, thresholds, len / 4, + hysteresis); } if (attrs[NL80211_ATTR_CQM_TXE_RATE] && @@ -9940,7 +10229,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return -EINVAL; err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr, - nl80211_wowlan_tcp_policy); + nl80211_wowlan_tcp_policy, NULL); if (err) return err; @@ -10085,7 +10374,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, goto out; } - err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy); + err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy, + NULL); if (err) goto out; @@ -10122,7 +10412,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG, info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], - nl80211_wowlan_policy); + nl80211_wowlan_policy, info->extack); if (err) return err; @@ -10205,7 +10495,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - NULL); + NULL, info->extack); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10416,7 +10706,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, - nl80211_coalesce_policy); + nl80211_coalesce_policy, NULL); if (err) return err; @@ -10454,7 +10744,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; @@ -10575,7 +10865,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA, info->attrs[NL80211_ATTR_REKEY_DATA], - nl80211_rekey_policy); + nl80211_rekey_policy, info->extack); if (err) return err; @@ -10892,7 +11182,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, info->attrs[NL80211_ATTR_NAN_FUNC], - nl80211_nan_func_policy); + nl80211_nan_func_policy, info->extack); if (err) return err; @@ -10989,7 +11279,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX, tb[NL80211_NAN_FUNC_SRF], - nl80211_nan_srf_policy); + nl80211_nan_srf_policy, info->extack); if (err) goto out; @@ -11524,8 +11814,8 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, return 0; } - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - attrbuf, nl80211_fam.maxattr, nl80211_policy); + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, + nl80211_fam.maxattr, nl80211_policy, NULL); if (err) return err; @@ -12970,18 +13260,19 @@ static int nl80211_prep_scan_msg(struct sk_buff *msg, static int nl80211_prep_sched_scan_msg(struct sk_buff *msg, - struct cfg80211_registered_device *rdev, - struct net_device *netdev, - u32 portid, u32 seq, int flags, u32 cmd) + struct cfg80211_sched_scan_request *req, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) return -1; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, + wiphy_to_rdev(req->wiphy)->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, req->dev->ifindex) || + nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->reqid, + NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13041,8 +13332,7 @@ void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev, NL80211_MCGRP_SCAN, GFP_KERNEL); } -void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 cmd) +void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd) { struct sk_buff *msg; @@ -13050,12 +13340,12 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_prep_sched_scan_msg(msg, rdev, netdev, 0, 0, 0, cmd) < 0) { + if (nl80211_prep_sched_scan_msg(msg, req, cmd) < 0) { nlmsg_free(msg); return; } - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(req->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } @@ -13296,17 +13586,16 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, } void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - int status, - enum nl80211_timeout_reason timeout_reason, + struct net_device *netdev, + struct cfg80211_connect_resp_params *cr, gfp_t gfp) { struct sk_buff *msg; void *hdr; - msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp); + msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len + + cr->fils_kek_len + cr->pmk_len + + (cr->pmkid ? WLAN_PMKID_LEN : 0), gfp); if (!msg) return; @@ -13318,17 +13607,31 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - (bssid && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) || + (cr->bssid && + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, cr->bssid)) || nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, - status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE : - status) || - (status < 0 && + cr->status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE : + cr->status) || + (cr->status < 0 && (nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) || - nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, timeout_reason))) || - (req_ie && - nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) || - (resp_ie && - nla_put(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie))) + nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, + cr->timeout_reason))) || + (cr->req_ie && + nla_put(msg, NL80211_ATTR_REQ_IE, cr->req_ie_len, cr->req_ie)) || + (cr->resp_ie && + nla_put(msg, NL80211_ATTR_RESP_IE, cr->resp_ie_len, + cr->resp_ie)) || + (cr->update_erp_next_seq_num && + nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, + cr->fils_erp_next_seq_num)) || + (cr->status == WLAN_STATUS_SUCCESS && + ((cr->fils_kek && + nla_put(msg, NL80211_ATTR_FILS_KEK, cr->fils_kek_len, + cr->fils_kek)) || + (cr->pmk && + nla_put(msg, NL80211_ATTR_PMK, cr->pmk_len, cr->pmk)) || + (cr->pmkid && + nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->pmkid))))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13343,14 +13646,14 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, } void nl80211_send_roamed(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp) + struct net_device *netdev, + struct cfg80211_roam_info *info, gfp_t gfp) { struct sk_buff *msg; void *hdr; + const u8 *bssid = info->bss ? info->bss->bssid : info->bssid; - msg = nlmsg_new(100 + req_ie_len + resp_ie_len, gfp); + msg = nlmsg_new(100 + info->req_ie_len + info->resp_ie_len, gfp); if (!msg) return; @@ -13363,10 +13666,12 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid) || - (req_ie && - nla_put(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie)) || - (resp_ie && - nla_put(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie))) + (info->req_ie && + nla_put(msg, NL80211_ATTR_REQ_IE, info->req_ie_len, + info->req_ie)) || + (info->resp_ie && + nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, + info->resp_ie))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13968,6 +14273,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, s32 rssi_level, gfp_t gfp) { struct sk_buff *msg; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); @@ -13975,6 +14282,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) return; + if (wdev->cqm_config) { + wdev->cqm_config->last_rssi_event_value = rssi_level; + + cfg80211_cqm_rssi_update(rdev, dev); + + if (rssi_level == 0) + rssi_level = wdev->cqm_config->last_rssi_event_value; + } + msg = cfg80211_prepare_cqm(dev, NULL, gfp); if (!msg) return; @@ -14619,26 +14935,26 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - bool schedule_destroy_work = false; - struct cfg80211_sched_scan_request *sched_scan_req = - rcu_dereference(rdev->sched_scan_req); - - if (sched_scan_req && notify->portid && - sched_scan_req->owner_nlportid == notify->portid) { - sched_scan_req->owner_nlportid = 0; + struct cfg80211_sched_scan_request *sched_scan_req; - if (rdev->ops->sched_scan_stop && - rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + list_for_each_entry_rcu(sched_scan_req, + &rdev->sched_scan_req_list, + list) { + if (sched_scan_req->owner_nlportid == notify->portid) { + sched_scan_req->nl_owner_dead = true; schedule_work(&rdev->sched_scan_stop_wk); + } } list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); - if (wdev->owner_nlportid == notify->portid) - schedule_destroy_work = true; - else if (wdev->conn_owner_nlportid == notify->portid) + if (wdev->owner_nlportid == notify->portid) { + wdev->nl_owner_dead = true; + schedule_work(&rdev->destroy_work); + } else if (wdev->conn_owner_nlportid == notify->portid) { schedule_work(&wdev->disconnect_wk); + } } spin_lock_bh(&rdev->beacon_registrations_lock); @@ -14651,19 +14967,6 @@ static int nl80211_netlink_notify(struct notifier_block * nb, } } spin_unlock_bh(&rdev->beacon_registrations_lock); - - if (schedule_destroy_work) { - struct cfg80211_iface_destroy *destroy; - - destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC); - if (destroy) { - destroy->nlportid = notify->portid; - spin_lock(&rdev->destroy_list_lock); - list_add(&destroy->list, &rdev->destroy_list); - spin_unlock(&rdev->destroy_list_lock); - schedule_work(&rdev->destroy_work); - } - } } rcu_read_unlock(); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index e488dca87423eb7c85fc80d408c3d45f91e9a97a..b96933322077c6238dd39c4d990941e85154e757 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -16,8 +16,7 @@ struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, bool aborted); void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev, struct sk_buff *msg); -void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 cmd); +void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd); void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, struct regulatory_request *request); @@ -53,16 +52,12 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp); void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - int status, - enum nl80211_timeout_reason timeout_reason, + struct net_device *netdev, + struct cfg80211_connect_resp_params *params, gfp_t gfp); void nl80211_send_roamed(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); + struct net_device *netdev, + struct cfg80211_roam_info *info, gfp_t gfp); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2f425075ada8eae81c8ecb04d975d0c9f0d36578..0598c1e5d0adbba6375beec2dcfb667fb4883192 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -36,13 +36,13 @@ static inline void rdev_set_wakeup(struct cfg80211_registered_device *rdev, static inline struct wireless_dev *rdev_add_virtual_intf(struct cfg80211_registered_device *rdev, char *name, unsigned char name_assign_type, - enum nl80211_iftype type, u32 *flags, + enum nl80211_iftype type, struct vif_params *params) { struct wireless_dev *ret; trace_rdev_add_virtual_intf(&rdev->wiphy, name, type); ret = rdev->ops->add_virtual_intf(&rdev->wiphy, name, name_assign_type, - type, flags, params); + type, params); trace_rdev_return_wdev(&rdev->wiphy, ret); return ret; } @@ -61,12 +61,11 @@ rdev_del_virtual_intf(struct cfg80211_registered_device *rdev, static inline int rdev_change_virtual_intf(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype type, - u32 *flags, struct vif_params *params) + struct vif_params *params) { int ret; trace_rdev_change_virtual_intf(&rdev->wiphy, dev, type); - ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, flags, - params); + ret = rdev->ops->change_virtual_intf(&rdev->wiphy, dev, type, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -749,6 +748,18 @@ rdev_set_cqm_rssi_config(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_cqm_rssi_range_config(struct cfg80211_registered_device *rdev, + struct net_device *dev, s32 low, s32 high) +{ + int ret; + trace_rdev_set_cqm_rssi_range_config(&rdev->wiphy, dev, low, high); + ret = rdev->ops->set_cqm_rssi_range_config(&rdev->wiphy, dev, + low, high); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev, struct net_device *dev, u32 rate, u32 pkts, u32 intvl) @@ -802,18 +813,18 @@ rdev_sched_scan_start(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *request) { int ret; - trace_rdev_sched_scan_start(&rdev->wiphy, dev, request); + trace_rdev_sched_scan_start(&rdev->wiphy, dev, request->reqid); ret = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_sched_scan_stop(struct cfg80211_registered_device *rdev, - struct net_device *dev) + struct net_device *dev, u64 reqid) { int ret; - trace_rdev_sched_scan_stop(&rdev->wiphy, dev); - ret = rdev->ops->sched_scan_stop(&rdev->wiphy, dev); + trace_rdev_sched_scan_stop(&rdev->wiphy, dev, reqid); + ret = rdev->ops->sched_scan_stop(&rdev->wiphy, dev, reqid); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 753efcd51fa3495c66e7063d3ce6c1aca7fd9d14..5fae296a6a583256f9874319dfaa9ee87e977fff 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2067,6 +2067,88 @@ reg_process_hint_country_ie(struct wiphy *wiphy, return REG_REQ_IGNORE; } +bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2) +{ + const struct ieee80211_regdomain *wiphy1_regd = NULL; + const struct ieee80211_regdomain *wiphy2_regd = NULL; + const struct ieee80211_regdomain *cfg80211_regd = NULL; + bool dfs_domain_same; + + rcu_read_lock(); + + cfg80211_regd = rcu_dereference(cfg80211_regdomain); + wiphy1_regd = rcu_dereference(wiphy1->regd); + if (!wiphy1_regd) + wiphy1_regd = cfg80211_regd; + + wiphy2_regd = rcu_dereference(wiphy2->regd); + if (!wiphy2_regd) + wiphy2_regd = cfg80211_regd; + + dfs_domain_same = wiphy1_regd->dfs_region == wiphy2_regd->dfs_region; + + rcu_read_unlock(); + + return dfs_domain_same; +} + +static void reg_copy_dfs_chan_state(struct ieee80211_channel *dst_chan, + struct ieee80211_channel *src_chan) +{ + if (!(dst_chan->flags & IEEE80211_CHAN_RADAR) || + !(src_chan->flags & IEEE80211_CHAN_RADAR)) + return; + + if (dst_chan->flags & IEEE80211_CHAN_DISABLED || + src_chan->flags & IEEE80211_CHAN_DISABLED) + return; + + if (src_chan->center_freq == dst_chan->center_freq && + dst_chan->dfs_state == NL80211_DFS_USABLE) { + dst_chan->dfs_state = src_chan->dfs_state; + dst_chan->dfs_state_entered = src_chan->dfs_state_entered; + } +} + +static void wiphy_share_dfs_chan_state(struct wiphy *dst_wiphy, + struct wiphy *src_wiphy) +{ + struct ieee80211_supported_band *src_sband, *dst_sband; + struct ieee80211_channel *src_chan, *dst_chan; + int i, j, band; + + if (!reg_dfs_domain_same(dst_wiphy, src_wiphy)) + return; + + for (band = 0; band < NUM_NL80211_BANDS; band++) { + dst_sband = dst_wiphy->bands[band]; + src_sband = src_wiphy->bands[band]; + if (!dst_sband || !src_sband) + continue; + + for (i = 0; i < dst_sband->n_channels; i++) { + dst_chan = &dst_sband->channels[i]; + for (j = 0; j < src_sband->n_channels; j++) { + src_chan = &src_sband->channels[j]; + reg_copy_dfs_chan_state(dst_chan, src_chan); + } + } + } +} + +static void wiphy_all_share_dfs_chan_state(struct wiphy *wiphy) +{ + struct cfg80211_registered_device *rdev; + + ASSERT_RTNL(); + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (wiphy == &rdev->wiphy) + continue; + wiphy_share_dfs_chan_state(wiphy, &rdev->wiphy); + } +} + /* This processes *all* regulatory hints */ static void reg_process_hint(struct regulatory_request *reg_request) { @@ -2110,6 +2192,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) if (treatment == REG_REQ_ALREADY_SET && wiphy && wiphy->regulatory_flags & REGULATORY_STRICT_REG) { wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_all_share_dfs_chan_state(wiphy); reg_check_channels(); } @@ -3061,6 +3144,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy) lr = get_last_request(); wiphy_update_regulatory(wiphy, lr->initiator); + wiphy_all_share_dfs_chan_state(wiphy); } void wiphy_regulatory_deregister(struct wiphy *wiphy) @@ -3120,6 +3204,67 @@ bool regulatory_indoor_allowed(void) return reg_is_indoor; } +bool regulatory_pre_cac_allowed(struct wiphy *wiphy) +{ + const struct ieee80211_regdomain *regd = NULL; + const struct ieee80211_regdomain *wiphy_regd = NULL; + bool pre_cac_allowed = false; + + rcu_read_lock(); + + regd = rcu_dereference(cfg80211_regdomain); + wiphy_regd = rcu_dereference(wiphy->regd); + if (!wiphy_regd) { + if (regd->dfs_region == NL80211_DFS_ETSI) + pre_cac_allowed = true; + + rcu_read_unlock(); + + return pre_cac_allowed; + } + + if (regd->dfs_region == wiphy_regd->dfs_region && + wiphy_regd->dfs_region == NL80211_DFS_ETSI) + pre_cac_allowed = true; + + rcu_read_unlock(); + + return pre_cac_allowed; +} + +void regulatory_propagate_dfs_state(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state, + enum nl80211_radar_event event) +{ + struct cfg80211_registered_device *rdev; + + ASSERT_RTNL(); + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (wiphy == &rdev->wiphy) + continue; + + if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) + continue; + + if (!ieee80211_get_channel(&rdev->wiphy, + chandef->chan->center_freq)) + continue; + + cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state); + + if (event == NL80211_RADAR_DETECTED || + event == NL80211_RADAR_CAC_FINISHED) + cfg80211_sched_dfs_chan_update(rdev); + + nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL); + } +} + int __init regulatory_init(void) { int err = 0; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index f6ced316b5a49e204632ef42675309614237cc54..ca7fedf2e7a16e2a39ec7b0d3c988154d0e76d79 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -143,4 +143,40 @@ int cfg80211_get_unii(int freq); */ bool regulatory_indoor_allowed(void); +/* + * Grace period to timeout pre-CAC results on the dfs channels. This timeout + * value is used for Non-ETSI domain. + * TODO: May be make this timeout available through regdb? + */ +#define REG_PRE_CAC_EXPIRY_GRACE_MS 2000 + +/** + * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain + * @wiphy: wiphy for which pre-CAC capability is checked. + + * Pre-CAC is allowed only in ETSI domain. + */ +bool regulatory_pre_cac_allowed(struct wiphy *wiphy); + +/** + * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys + * @wiphy - wiphy on which radar is detected and the event will be propagated + * to other available wiphys having the same DFS domain + * @chandef - Channel definition of radar detected channel + * @dfs_state - DFS channel state to be set + * @event - Type of radar event which triggered this DFS state change + * + * This function should be called with rtnl lock held. + */ +void regulatory_propagate_dfs_state(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_dfs_state dfs_state, + enum nl80211_radar_event event); + +/** + * reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured + * @wiphy1 - wiphy it's dfs_region to be checked against that of wiphy2 + * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 + */ +bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 21be56b3128ee74c4119ee67d5f0b85fecfe0b2e..9f0901f3e42b60db2c6aee7927b2b0ddc0ab7759 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -300,93 +300,168 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, } EXPORT_SYMBOL(cfg80211_scan_done); -void __cfg80211_sched_scan_results(struct work_struct *wk) +void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req) { - struct cfg80211_registered_device *rdev; - struct cfg80211_sched_scan_request *request; + ASSERT_RTNL(); - rdev = container_of(wk, struct cfg80211_registered_device, - sched_scan_results_wk); + list_add_rcu(&req->list, &rdev->sched_scan_req_list); +} - rtnl_lock(); +static void cfg80211_del_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req) +{ + ASSERT_RTNL(); - request = rtnl_dereference(rdev->sched_scan_req); + list_del_rcu(&req->list); + kfree_rcu(req, rcu_head); +} - /* we don't have sched_scan_req anymore if the scan is stopping */ - if (request) { - if (request->flags & NL80211_SCAN_FLAG_FLUSH) { - /* flush entries from previous scans */ - spin_lock_bh(&rdev->bss_lock); - __cfg80211_bss_expire(rdev, request->scan_start); - spin_unlock_bh(&rdev->bss_lock); - request->scan_start = jiffies; - } - nl80211_send_sched_scan(rdev, request->dev, - NL80211_CMD_SCHED_SCAN_RESULTS); +static struct cfg80211_sched_scan_request * +cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid) +{ + struct cfg80211_sched_scan_request *pos; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + + list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list) { + if (pos->reqid == reqid) + return pos; } + return NULL; +} + +/* + * Determines if a scheduled scan request can be handled. When a legacy + * scheduled scan is running no other scheduled scan is allowed regardless + * whether the request is for legacy or multi-support scan. When a multi-support + * scheduled scan is running a request for legacy scan is not allowed. In this + * case a request for multi-support scan can be handled if resources are + * available, ie. struct wiphy::max_sched_scan_reqs limit is not yet reached. + */ +int cfg80211_sched_scan_req_possible(struct cfg80211_registered_device *rdev, + bool want_multi) +{ + struct cfg80211_sched_scan_request *pos; + int i = 0; + + list_for_each_entry(pos, &rdev->sched_scan_req_list, list) { + /* request id zero means legacy in progress */ + if (!i && !pos->reqid) + return -EINPROGRESS; + i++; + } + + if (i) { + /* no legacy allowed when multi request(s) are active */ + if (!want_multi) + return -EINPROGRESS; + + /* resource limit reached */ + if (i == rdev->wiphy.max_sched_scan_reqs) + return -ENOSPC; + } + return 0; +} + +void cfg80211_sched_scan_results_wk(struct work_struct *work) +{ + struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *req, *tmp; + rdev = container_of(work, struct cfg80211_registered_device, + sched_scan_res_wk); + + rtnl_lock(); + list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { + if (req->report_results) { + req->report_results = false; + if (req->flags & NL80211_SCAN_FLAG_FLUSH) { + /* flush entries from previous scans */ + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, req->scan_start); + spin_unlock_bh(&rdev->bss_lock); + req->scan_start = jiffies; + } + nl80211_send_sched_scan(req, + NL80211_CMD_SCHED_SCAN_RESULTS); + } + } rtnl_unlock(); } -void cfg80211_sched_scan_results(struct wiphy *wiphy) +void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid) { - trace_cfg80211_sched_scan_results(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_sched_scan_request *request; + + trace_cfg80211_sched_scan_results(wiphy, reqid); /* ignore if we're not scanning */ - if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req)) - queue_work(cfg80211_wq, - &wiphy_to_rdev(wiphy)->sched_scan_results_wk); + rcu_read_lock(); + request = cfg80211_find_sched_scan_req(rdev, reqid); + if (request) { + request->report_results = true; + queue_work(cfg80211_wq, &rdev->sched_scan_res_wk); + } + rcu_read_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_RTNL(); - trace_cfg80211_sched_scan_stopped(wiphy); + trace_cfg80211_sched_scan_stopped(wiphy, reqid); - __cfg80211_stop_sched_scan(rdev, true); + __cfg80211_stop_sched_scan(rdev, reqid, true); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); -void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid) { rtnl_lock(); - cfg80211_sched_scan_stopped_rtnl(wiphy); + cfg80211_sched_scan_stopped_rtnl(wiphy, reqid); rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); -int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, - bool driver_initiated) +int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, + struct cfg80211_sched_scan_request *req, + bool driver_initiated) { - struct cfg80211_sched_scan_request *sched_scan_req; - struct net_device *dev; - ASSERT_RTNL(); - if (!rdev->sched_scan_req) - return -ENOENT; - - sched_scan_req = rtnl_dereference(rdev->sched_scan_req); - dev = sched_scan_req->dev; - if (!driver_initiated) { - int err = rdev_sched_scan_stop(rdev, dev); + int err = rdev_sched_scan_stop(rdev, req->dev, req->reqid); if (err) return err; } - nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED); + nl80211_send_sched_scan(req, NL80211_CMD_SCHED_SCAN_STOPPED); - RCU_INIT_POINTER(rdev->sched_scan_req, NULL); - kfree_rcu(sched_scan_req, rcu_head); + cfg80211_del_sched_scan_req(rdev, req); return 0; } +int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, + u64 reqid, bool driver_initiated) +{ + struct cfg80211_sched_scan_request *sched_scan_req; + + ASSERT_RTNL(); + + sched_scan_req = cfg80211_find_sched_scan_req(rdev, reqid); + if (!sched_scan_req) + return -ENOENT; + + return cfg80211_stop_sched_scan_req(rdev, sched_scan_req, + driver_initiated); +} + void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs) { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index b347e63d7aaa6814f524d3b080a005a88966d65d..532a0007ce82a7380536e32d5be3749e58bb4fc7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,6 +5,7 @@ * * Copyright 2009 Johannes Berg * Copyright (C) 2009 Intel Corporation. All rights reserved. + * Copyright 2017 Intel Deutschland GmbH */ #include @@ -253,10 +254,13 @@ void cfg80211_conn_work(struct work_struct *work) } treason = NL80211_TIMEOUT_UNSPECIFIED; if (cfg80211_conn_do_work(wdev, &treason)) { - __cfg80211_connect_result( - wdev->netdev, bssid, - NULL, 0, NULL, 0, -1, false, NULL, - treason); + struct cfg80211_connect_resp_params cr; + + memset(&cr, 0, sizeof(cr)); + cr.status = -1; + cr.bssid = bssid; + cr.timeout_reason = treason; + __cfg80211_connect_result(wdev->netdev, &cr, false); } wdev_unlock(wdev); } @@ -359,10 +363,13 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; schedule_work(&rdev->conn_work); } else if (status_code != WLAN_STATUS_SUCCESS) { - __cfg80211_connect_result(wdev->netdev, mgmt->bssid, - NULL, 0, NULL, 0, - status_code, false, NULL, - NL80211_TIMEOUT_UNSPECIFIED); + struct cfg80211_connect_resp_params cr; + + memset(&cr, 0, sizeof(cr)); + cr.status = status_code; + cr.bssid = mgmt->bssid; + cr.timeout_reason = NL80211_TIMEOUT_UNSPECIFIED; + __cfg80211_connect_result(wdev->netdev, &cr, false); } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); @@ -669,12 +676,9 @@ static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); */ /* This method must consume bss one way or another */ -void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - int status, bool wextev, - struct cfg80211_bss *bss, - enum nl80211_timeout_reason timeout_reason) +void __cfg80211_connect_result(struct net_device *dev, + struct cfg80211_connect_resp_params *cr, + bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *country_ie; @@ -686,48 +690,48 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) { - cfg80211_put_bss(wdev->wiphy, bss); + cfg80211_put_bss(wdev->wiphy, cr->bss); return; } - nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, - bssid, req_ie, req_ie_len, - resp_ie, resp_ie_len, - status, timeout_reason, GFP_KERNEL); + nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr, + GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT if (wextev) { - if (req_ie && status == WLAN_STATUS_SUCCESS) { + if (cr->req_ie && cr->status == WLAN_STATUS_SUCCESS) { memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = req_ie_len; - wireless_send_event(dev, IWEVASSOCREQIE, &wrqu, req_ie); + wrqu.data.length = cr->req_ie_len; + wireless_send_event(dev, IWEVASSOCREQIE, &wrqu, + cr->req_ie); } - if (resp_ie && status == WLAN_STATUS_SUCCESS) { + if (cr->resp_ie && cr->status == WLAN_STATUS_SUCCESS) { memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = resp_ie_len; - wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie); + wrqu.data.length = cr->resp_ie_len; + wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, + cr->resp_ie); } memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; - if (bssid && status == WLAN_STATUS_SUCCESS) { - memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); - memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN); + if (cr->bssid && cr->status == WLAN_STATUS_SUCCESS) { + memcpy(wrqu.ap_addr.sa_data, cr->bssid, ETH_ALEN); + memcpy(wdev->wext.prev_bssid, cr->bssid, ETH_ALEN); wdev->wext.prev_bssid_valid = true; } wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); } #endif - if (!bss && (status == WLAN_STATUS_SUCCESS)) { + if (!cr->bss && (cr->status == WLAN_STATUS_SUCCESS)) { WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); - bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, - wdev->ssid, wdev->ssid_len, - wdev->conn_bss_type, - IEEE80211_PRIVACY_ANY); - if (bss) - cfg80211_hold_bss(bss_from_pub(bss)); + cr->bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->bssid, + wdev->ssid, wdev->ssid_len, + wdev->conn_bss_type, + IEEE80211_PRIVACY_ANY); + if (cr->bss) + cfg80211_hold_bss(bss_from_pub(cr->bss)); } if (wdev->current_bss) { @@ -736,29 +740,29 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = NULL; } - if (status != WLAN_STATUS_SUCCESS) { + if (cr->status != WLAN_STATUS_SUCCESS) { kzfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; wdev->conn_owner_nlportid = 0; - if (bss) { - cfg80211_unhold_bss(bss_from_pub(bss)); - cfg80211_put_bss(wdev->wiphy, bss); + if (cr->bss) { + cfg80211_unhold_bss(bss_from_pub(cr->bss)); + cfg80211_put_bss(wdev->wiphy, cr->bss); } cfg80211_sme_free(wdev); return; } - if (WARN_ON(!bss)) + if (WARN_ON(!cr->bss)) return; - wdev->current_bss = bss_from_pub(bss); + wdev->current_bss = bss_from_pub(cr->bss); if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) cfg80211_upload_connect_keys(wdev); rcu_read_lock(); - country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY); + country_ie = ieee80211_bss_get_ie(cr->bss, WLAN_EID_COUNTRY); if (!country_ie) { rcu_read_unlock(); return; @@ -775,70 +779,99 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, * - country_ie + 2, the start of the country ie data, and * - and country_ie[1] which is the IE length */ - regulatory_hint_country_ie(wdev->wiphy, bss->channel->band, + regulatory_hint_country_ie(wdev->wiphy, cr->bss->channel->band, country_ie + 2, country_ie[1]); kfree(country_ie); } /* Consumes bss object one way or another */ -void cfg80211_connect_bss(struct net_device *dev, const u8 *bssid, - struct cfg80211_bss *bss, const u8 *req_ie, - size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, int status, gfp_t gfp, - enum nl80211_timeout_reason timeout_reason) +void cfg80211_connect_done(struct net_device *dev, + struct cfg80211_connect_resp_params *params, + gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; + u8 *next; - if (bss) { + if (params->bss) { /* Make sure the bss entry provided by the driver is valid. */ - struct cfg80211_internal_bss *ibss = bss_from_pub(bss); + struct cfg80211_internal_bss *ibss = bss_from_pub(params->bss); if (WARN_ON(list_empty(&ibss->list))) { - cfg80211_put_bss(wdev->wiphy, bss); + cfg80211_put_bss(wdev->wiphy, params->bss); return; } } - ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); + ev = kzalloc(sizeof(*ev) + (params->bssid ? ETH_ALEN : 0) + + params->req_ie_len + params->resp_ie_len + + params->fils_kek_len + params->pmk_len + + (params->pmkid ? WLAN_PMKID_LEN : 0), gfp); if (!ev) { - cfg80211_put_bss(wdev->wiphy, bss); + cfg80211_put_bss(wdev->wiphy, params->bss); return; } ev->type = EVENT_CONNECT_RESULT; - if (bssid) - memcpy(ev->cr.bssid, bssid, ETH_ALEN); - if (req_ie_len) { - ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev); - ev->cr.req_ie_len = req_ie_len; - memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len); + next = ((u8 *)ev) + sizeof(*ev); + if (params->bssid) { + ev->cr.bssid = next; + memcpy((void *)ev->cr.bssid, params->bssid, ETH_ALEN); + next += ETH_ALEN; } - if (resp_ie_len) { - ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len; - ev->cr.resp_ie_len = resp_ie_len; - memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len); + if (params->req_ie_len) { + ev->cr.req_ie = next; + ev->cr.req_ie_len = params->req_ie_len; + memcpy((void *)ev->cr.req_ie, params->req_ie, + params->req_ie_len); + next += params->req_ie_len; } - if (bss) - cfg80211_hold_bss(bss_from_pub(bss)); - ev->cr.bss = bss; - ev->cr.status = status; - ev->cr.timeout_reason = timeout_reason; + if (params->resp_ie_len) { + ev->cr.resp_ie = next; + ev->cr.resp_ie_len = params->resp_ie_len; + memcpy((void *)ev->cr.resp_ie, params->resp_ie, + params->resp_ie_len); + next += params->resp_ie_len; + } + if (params->fils_kek_len) { + ev->cr.fils_kek = next; + ev->cr.fils_kek_len = params->fils_kek_len; + memcpy((void *)ev->cr.fils_kek, params->fils_kek, + params->fils_kek_len); + next += params->fils_kek_len; + } + if (params->pmk_len) { + ev->cr.pmk = next; + ev->cr.pmk_len = params->pmk_len; + memcpy((void *)ev->cr.pmk, params->pmk, params->pmk_len); + next += params->pmk_len; + } + if (params->pmkid) { + ev->cr.pmkid = next; + memcpy((void *)ev->cr.pmkid, params->pmkid, WLAN_PMKID_LEN); + next += WLAN_PMKID_LEN; + } + ev->cr.update_erp_next_seq_num = params->update_erp_next_seq_num; + if (params->update_erp_next_seq_num) + ev->cr.fils_erp_next_seq_num = params->fils_erp_next_seq_num; + if (params->bss) + cfg80211_hold_bss(bss_from_pub(params->bss)); + ev->cr.bss = params->bss; + ev->cr.status = params->status; + ev->cr.timeout_reason = params->timeout_reason; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } -EXPORT_SYMBOL(cfg80211_connect_bss); +EXPORT_SYMBOL(cfg80211_connect_done); /* Consumes bss object one way or another */ void __cfg80211_roamed(struct wireless_dev *wdev, - struct cfg80211_bss *bss, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len) + struct cfg80211_roam_info *info) { #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; @@ -856,97 +889,84 @@ void __cfg80211_roamed(struct wireless_dev *wdev, cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; - cfg80211_hold_bss(bss_from_pub(bss)); - wdev->current_bss = bss_from_pub(bss); + if (WARN_ON(!info->bss)) + return; + + cfg80211_hold_bss(bss_from_pub(info->bss)); + wdev->current_bss = bss_from_pub(info->bss); nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), - wdev->netdev, bss->bssid, - req_ie, req_ie_len, resp_ie, resp_ie_len, - GFP_KERNEL); + wdev->netdev, info, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT - if (req_ie) { + if (info->req_ie) { memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = req_ie_len; + wrqu.data.length = info->req_ie_len; wireless_send_event(wdev->netdev, IWEVASSOCREQIE, - &wrqu, req_ie); + &wrqu, info->req_ie); } - if (resp_ie) { + if (info->resp_ie) { memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = resp_ie_len; + wrqu.data.length = info->resp_ie_len; wireless_send_event(wdev->netdev, IWEVASSOCRESPIE, - &wrqu, resp_ie); + &wrqu, info->resp_ie); } memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); - memcpy(wdev->wext.prev_bssid, bss->bssid, ETH_ALEN); + memcpy(wrqu.ap_addr.sa_data, info->bss->bssid, ETH_ALEN); + memcpy(wdev->wext.prev_bssid, info->bss->bssid, ETH_ALEN); wdev->wext.prev_bssid_valid = true; wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL); #endif return; out: - cfg80211_put_bss(wdev->wiphy, bss); -} - -void cfg80211_roamed(struct net_device *dev, - struct ieee80211_channel *channel, - const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_bss *bss; - - bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, - wdev->ssid_len, - wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); - if (WARN_ON(!bss)) - return; - - cfg80211_roamed_bss(dev, bss, req_ie, req_ie_len, resp_ie, - resp_ie_len, gfp); + cfg80211_put_bss(wdev->wiphy, info->bss); } -EXPORT_SYMBOL(cfg80211_roamed); -/* Consumes bss object one way or another */ -void cfg80211_roamed_bss(struct net_device *dev, - struct cfg80211_bss *bss, const u8 *req_ie, - size_t req_ie_len, const u8 *resp_ie, - size_t resp_ie_len, gfp_t gfp) +/* Consumes info->bss object one way or another */ +void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, + gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; - if (WARN_ON(!bss)) + if (!info->bss) { + info->bss = cfg80211_get_bss(wdev->wiphy, info->channel, + info->bssid, wdev->ssid, + wdev->ssid_len, + wdev->conn_bss_type, + IEEE80211_PRIVACY_ANY); + } + + if (WARN_ON(!info->bss)) return; - ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); + ev = kzalloc(sizeof(*ev) + info->req_ie_len + info->resp_ie_len, gfp); if (!ev) { - cfg80211_put_bss(wdev->wiphy, bss); + cfg80211_put_bss(wdev->wiphy, info->bss); return; } ev->type = EVENT_ROAMED; ev->rm.req_ie = ((u8 *)ev) + sizeof(*ev); - ev->rm.req_ie_len = req_ie_len; - memcpy((void *)ev->rm.req_ie, req_ie, req_ie_len); - ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len; - ev->rm.resp_ie_len = resp_ie_len; - memcpy((void *)ev->rm.resp_ie, resp_ie, resp_ie_len); - ev->rm.bss = bss; + ev->rm.req_ie_len = info->req_ie_len; + memcpy((void *)ev->rm.req_ie, info->req_ie, info->req_ie_len); + ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + info->req_ie_len; + ev->rm.resp_ie_len = info->resp_ie_len; + memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); + ev->rm.bss = info->bss; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } -EXPORT_SYMBOL(cfg80211_roamed_bss); +EXPORT_SYMBOL(cfg80211_roamed); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 776e80cef9b4ee2761681f3884427a8343e6050a..ca8b2059f92c43c5d4d3bac24f39fd141cd31141 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -576,11 +576,6 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap, TP_ARGS(wiphy, netdev) ); -DEFINE_EVENT(wiphy_netdev_evt, rdev_sched_scan_stop, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), - TP_ARGS(wiphy, netdev) -); - DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) @@ -1322,6 +1317,28 @@ TRACE_EVENT(rdev_set_cqm_rssi_config, __entry->rssi_thold, __entry->rssi_hyst) ); +TRACE_EVENT(rdev_set_cqm_rssi_range_config, + TP_PROTO(struct wiphy *wiphy, + struct net_device *netdev, s32 low, s32 high), + TP_ARGS(wiphy, netdev, low, high), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(s32, rssi_low) + __field(s32, rssi_high) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->rssi_low = low; + __entry->rssi_high = high; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT + ", range: %d - %d ", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->rssi_low, __entry->rssi_high) +); + TRACE_EVENT(rdev_set_cqm_txe_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u32 rate, u32 pkts, u32 intvl), @@ -1588,20 +1605,31 @@ DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_ARGS(wiphy, rx, tx) ); -TRACE_EVENT(rdev_sched_scan_start, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_sched_scan_request *request), - TP_ARGS(wiphy, netdev, request), +DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), + TP_ARGS(wiphy, netdev, id), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(u64, id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->id = id; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", id: %llu", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->id) +); + +DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_start, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), + TP_ARGS(wiphy, netdev, id) +); + +DEFINE_EVENT(wiphy_netdev_id_evt, rdev_sched_scan_stop, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u64 id), + TP_ARGS(wiphy, netdev, id) ); TRACE_EVENT(rdev_tdls_mgmt, @@ -2792,14 +2820,28 @@ TRACE_EVENT(cfg80211_scan_done, MAC_PR_ARG(tsf_bssid)) ); -DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_results, - TP_PROTO(struct wiphy *wiphy), - TP_ARGS(wiphy) +DECLARE_EVENT_CLASS(wiphy_id_evt, + TP_PROTO(struct wiphy *wiphy, u64 id), + TP_ARGS(wiphy, id), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(u64, id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->id = id; + ), + TP_printk(WIPHY_PR_FMT ", id: %llu", WIPHY_PR_ARG, __entry->id) ); -DEFINE_EVENT(wiphy_only_evt, cfg80211_sched_scan_stopped, - TP_PROTO(struct wiphy *wiphy), - TP_ARGS(wiphy) +DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_stopped, + TP_PROTO(struct wiphy *wiphy, u64 id), + TP_ARGS(wiphy, id) +); + +DEFINE_EVENT(wiphy_id_evt, cfg80211_sched_scan_results, + TP_PROTO(struct wiphy *wiphy, u64 id), + TP_ARGS(wiphy, id) ); TRACE_EVENT(cfg80211_get_bss, diff --git a/net/wireless/util.c b/net/wireless/util.c index 68e5f2ecee1aa22f17ab9a55eb566124e585740b..4992f1025c9d3749bddd5f22948d2dad791aa9a9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, if (iftype == NL80211_IFTYPE_MESH_POINT) skb_copy_bits(skb, hdrlen, &mesh_flags, 1); + mesh_flags &= MESH_FLAGS_AE; + switch (hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { case cpu_to_le16(IEEE80211_FCTL_TODS): @@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, iftype != NL80211_IFTYPE_STATION)) return -1; if (iftype == NL80211_IFTYPE_MESH_POINT) { - if (mesh_flags & MESH_FLAGS_AE_A4) + if (mesh_flags == MESH_FLAGS_AE_A4) return -1; - if (mesh_flags & MESH_FLAGS_AE_A5_A6) { + if (mesh_flags == MESH_FLAGS_AE_A5_A6) { skb_copy_bits(skb, hdrlen + offsetof(struct ieee80211s_hdr, eaddr1), tmp.h_dest, 2 * ETH_ALEN); @@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, ether_addr_equal(tmp.h_source, addr))) return -1; if (iftype == NL80211_IFTYPE_MESH_POINT) { - if (mesh_flags & MESH_FLAGS_AE_A5_A6) + if (mesh_flags == MESH_FLAGS_AE_A5_A6) return -1; - if (mesh_flags & MESH_FLAGS_AE_A4) + if (mesh_flags == MESH_FLAGS_AE_A4) skb_copy_bits(skb, hdrlen + offsetof(struct ieee80211s_hdr, eaddr1), tmp.h_source, ETH_ALEN); @@ -659,7 +661,7 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame, int offset, int len) { struct skb_shared_info *sh = skb_shinfo(skb); - const skb_frag_t *frag = &sh->frags[-1]; + const skb_frag_t *frag = &sh->frags[0]; struct page *frag_page; void *frag_ptr; int frag_len, frag_size; @@ -672,10 +674,10 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame, while (offset >= frag_size) { offset -= frag_size; - frag++; frag_page = skb_frag_page(frag); frag_ptr = skb_frag_address(frag); frag_size = skb_frag_size(frag); + frag++; } frag_ptr += offset; @@ -687,12 +689,12 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame, len -= cur_len; while (len > 0) { - frag++; frag_len = skb_frag_size(frag); cur_len = min(len, frag_len); __frame_add_frag(frame, skb_frag_page(frag), skb_frag_address(frag), cur_len, frag_len); len -= cur_len; + frag++; } } @@ -914,11 +916,11 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) netdev_err(dev, "failed to set key %d\n", i); continue; } - if (wdev->connect_keys->def == i) - if (rdev_set_default_key(rdev, dev, i, true, true)) { - netdev_err(dev, "failed to set defkey %d\n", i); - continue; - } + if (wdev->connect_keys->def == i && + rdev_set_default_key(rdev, dev, i, true, true)) { + netdev_err(dev, "failed to set defkey %d\n", i); + continue; + } } kzfree(wdev->connect_keys); @@ -929,7 +931,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) { struct cfg80211_event *ev; unsigned long flags; - const u8 *bssid = NULL; spin_lock_irqsave(&wdev->event_lock, flags); while (!list_empty(&wdev->event_list)) { @@ -941,20 +942,13 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) wdev_lock(wdev); switch (ev->type) { case EVENT_CONNECT_RESULT: - if (!is_zero_ether_addr(ev->cr.bssid)) - bssid = ev->cr.bssid; __cfg80211_connect_result( - wdev->netdev, bssid, - ev->cr.req_ie, ev->cr.req_ie_len, - ev->cr.resp_ie, ev->cr.resp_ie_len, - ev->cr.status, - ev->cr.status == WLAN_STATUS_SUCCESS, - ev->cr.bss, ev->cr.timeout_reason); + wdev->netdev, + &ev->cr, + ev->cr.status == WLAN_STATUS_SUCCESS); break; case EVENT_ROAMED: - __cfg80211_roamed(wdev, ev->rm.bss, ev->rm.req_ie, - ev->rm.req_ie_len, ev->rm.resp_ie, - ev->rm.resp_ie_len); + __cfg80211_roamed(wdev, &ev->rm); break; case EVENT_DISCONNECTED: __cfg80211_disconnected(wdev->netdev, @@ -991,7 +985,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) int cfg80211_change_iface(struct cfg80211_registered_device *rdev, struct net_device *dev, enum nl80211_iftype ntype, - u32 *flags, struct vif_params *params) + struct vif_params *params) { int err; enum nl80211_iftype otype = dev->ieee80211_ptr->iftype; @@ -1049,7 +1043,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, cfg80211_process_rdev_events(rdev); } - err = rdev_change_virtual_intf(rdev, dev, ntype, flags, params); + err = rdev_change_virtual_intf(rdev, dev, ntype, params); WARN_ON(!err && dev->ieee80211_ptr->iftype != ntype); @@ -1097,6 +1091,35 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return err; } +static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate) +{ + int modulation, streams, bitrate; + + /* the formula below does only work for MCS values smaller than 32 */ + if (WARN_ON_ONCE(rate->mcs >= 32)) + return 0; + + modulation = rate->mcs & 7; + streams = (rate->mcs >> 3) + 1; + + bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000; + + if (modulation < 4) + bitrate *= (modulation + 1); + else if (modulation == 4) + bitrate *= (modulation + 2); + else + bitrate *= (modulation + 3); + + bitrate *= streams; + + if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) + bitrate = (bitrate / 9) * 10; + + /* do NOT round down here */ + return (bitrate + 50000) / 100000; +} + static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) { static const u32 __mcs2bitrate[] = { @@ -1230,39 +1253,14 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) u32 cfg80211_calculate_bitrate(struct rate_info *rate) { - int modulation, streams, bitrate; - - if (!(rate->flags & RATE_INFO_FLAGS_MCS) && - !(rate->flags & RATE_INFO_FLAGS_VHT_MCS)) - return rate->legacy; + if (rate->flags & RATE_INFO_FLAGS_MCS) + return cfg80211_calculate_bitrate_ht(rate); if (rate->flags & RATE_INFO_FLAGS_60G) return cfg80211_calculate_bitrate_60g(rate); if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) return cfg80211_calculate_bitrate_vht(rate); - /* the formula below does only work for MCS values smaller than 32 */ - if (WARN_ON_ONCE(rate->mcs >= 32)) - return 0; - - modulation = rate->mcs & 7; - streams = (rate->mcs >> 3) + 1; - - bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000; - - if (modulation < 4) - bitrate *= (modulation + 1); - else if (modulation == 4) - bitrate *= (modulation + 2); - else - bitrate *= (modulation + 3); - - bitrate *= streams; - - if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) - bitrate = (bitrate / 9) * 10; - - /* do NOT round down here */ - return (bitrate + 50000) / 100000; + return rate->legacy; } EXPORT_SYMBOL(cfg80211_calculate_bitrate); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index a220156cf2175a459727fcf67e1291036dca21ae..5d4a02c7979b0d7478ff2b39f82e24bed97a6e78 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -62,7 +62,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); + return cfg80211_change_iface(rdev, dev, type, &vifparams); } EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8b911c29860e79f21b0ac8e1d3a80ed373fd537e..5a1a98df3499841172f47b71418f95d9558158d0 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1791,32 +1791,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb) static int __init x25_init(void) { - int rc = proto_register(&x25_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&x25_proto, 0); + if (rc) goto out; rc = sock_register(&x25_family_ops); - if (rc != 0) + if (rc) goto out_proto; dev_add_pack(&x25_packet_type); rc = register_netdevice_notifier(&x25_dev_notifier); - if (rc != 0) + if (rc) goto out_sock; - pr_info("Linux Version 0.2\n"); + rc = x25_register_sysctl(); + if (rc) + goto out_dev; - x25_register_sysctl(); rc = x25_proc_init(); - if (rc != 0) - goto out_dev; + if (rc) + goto out_sysctl; + + pr_info("Linux Version 0.2\n"); + out: return rc; +out_sysctl: + x25_unregister_sysctl(); out_dev: unregister_netdevice_notifier(&x25_dev_notifier); out_sock: + dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); out_proto: proto_unregister(&x25_proto); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index a06dfe143c675039b57cdc5a83164228730d3e5e..ba078c85f0a1533ffbbadef1ab5789399dd1528a 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = { { }, }; -void __init x25_register_sysctl(void) +int __init x25_register_sysctl(void) { x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table); + if (!x25_table_header) + return -ENOMEM; + return 0; } void x25_unregister_sysctl(void) diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index c0e961983f17a077bde9bab18d946ecef0cba0ae..abf81b329dc1f3276e1cf5631ebae4ec31a229de 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ xfrm_sysctl.o xfrm_replay.o +obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c new file mode 100644 index 0000000000000000000000000000000000000000..574e6f32f94f29a496ef20f1a673a0e718a1a31b --- /dev/null +++ b/net/xfrm/xfrm_device.c @@ -0,0 +1,208 @@ +/* + * xfrm_device.c - IPsec device offloading code. + * + * Copyright (c) 2015 secunet Security Networks AG + * + * Author: + * Steffen Klassert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +{ + int err; + struct xfrm_state *x; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (skb_is_gso(skb)) + return 0; + + if (xo) { + x = skb->sp->xvec[skb->sp->len - 1]; + if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) + return 0; + + x->outer_mode->xmit(x, skb); + + err = x->type_offload->xmit(x, skb, features); + if (err) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + return err; + } + + skb_push(skb, skb->data - skb_mac_header(skb)); + } + + return 0; +} +EXPORT_SYMBOL_GPL(validate_xmit_xfrm); + +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo) +{ + int err; + struct dst_entry *dst; + struct net_device *dev; + struct xfrm_state_offload *xso = &x->xso; + xfrm_address_t *saddr; + xfrm_address_t *daddr; + + if (!x->type_offload) + return 0; + + /* We don't yet support UDP encapsulation, TFC padding and ESN. */ + if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) + return 0; + + dev = dev_get_by_index(net, xuo->ifindex); + if (!dev) { + if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { + saddr = &x->props.saddr; + daddr = &x->id.daddr; + } else { + saddr = &x->id.daddr; + daddr = &x->props.saddr; + } + + dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family); + if (IS_ERR(dst)) + return 0; + + dev = dst->dev; + + dev_hold(dev); + dst_release(dst); + } + + if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + dev_put(dev); + return 0; + } + + xso->dev = dev; + xso->num_exthdrs = 1; + xso->flags = xuo->flags; + + err = dev->xfrmdev_ops->xdo_dev_state_add(x); + if (err) { + dev_put(dev); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xfrm_dev_state_add); + +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + int mtu; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct net_device *dev = x->xso.dev; + + if (!x->type_offload || x->encap) + return false; + + if ((x->xso.offload_handle && (dev == dst->path->dev)) && + !dst->child->xfrm && x->type->get_mtu) { + mtu = x->type->get_mtu(x, xdst->child_mtu_cached); + + if (skb->len <= mtu) + goto ok; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + goto ok; + } + + return false; + +ok: + if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok) + return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); + + return true; +} +EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); + +int xfrm_dev_register(struct net_device *dev) +{ + if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) + return NOTIFY_BAD; + if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && + !(dev->features & NETIF_F_HW_ESP)) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + +static int xfrm_dev_unregister(struct net_device *dev) +{ + return NOTIFY_DONE; +} + +static int xfrm_dev_feat_change(struct net_device *dev) +{ + if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) + return NOTIFY_BAD; + else if (!(dev->features & NETIF_F_HW_ESP)) + dev->xfrmdev_ops = NULL; + + if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && + !(dev->features & NETIF_F_HW_ESP)) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + +static int xfrm_dev_down(struct net_device *dev) +{ + if (dev->features & NETIF_F_HW_ESP) + xfrm_dev_state_flush(dev_net(dev), dev, true); + + xfrm_garbage_collect(dev_net(dev)); + + return NOTIFY_DONE; +} + +static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_REGISTER: + return xfrm_dev_register(dev); + + case NETDEV_UNREGISTER: + return xfrm_dev_unregister(dev); + + case NETDEV_FEAT_CHANGE: + return xfrm_dev_feat_change(dev); + + case NETDEV_DOWN: + return xfrm_dev_down(dev); + } + return NOTIFY_DONE; +} + +static struct notifier_block xfrm_dev_notifier = { + .notifier_call = xfrm_dev_event, +}; + +void __net_init xfrm_dev_init(void) +{ + register_netdevice_notifier(&xfrm_dev_notifier); +} diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 666c5ffe929dca388b218d99c2d71946a231872f..eaea9c4fb3b0e6ec8903cfc5456519e8d4e7dcd1 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -54,8 +54,8 @@ static inline unsigned int __xfrm4_dpref_spref_hash(const xfrm_address_t *daddr, static inline unsigned int __xfrm6_pref_hash(const xfrm_address_t *addr, __u8 prefixlen) { - int pdw; - int pbi; + unsigned int pdw; + unsigned int pbi; u32 initval = 0; pdw = prefixlen >> 5; /* num of whole u32 in prefix */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 46bdb4fbed0bb34a5d6ae40991b3fda6e5dff82c..9de4b1dbc0aea3f768a8d98eeab456c0c8e0fa61 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -107,6 +107,8 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; + memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + if (src) { int i; @@ -207,8 +209,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) unsigned int family; int decaps = 0; int async = 0; - struct xfrm_offload *xo; bool xfrm_gro = false; + bool crypto_done = false; + struct xfrm_offload *xo = xfrm_offload(skb); if (encap_type < 0) { x = xfrm_input_state(skb); @@ -220,9 +223,40 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } + /* encap_type < -1 indicates a GRO call. */ encap_type = 0; seq = XFRM_SPI_SKB_CB(skb)->seq; + + if (xo && (xo->flags & CRYPTO_DONE)) { + crypto_done = true; + x = xfrm_input_state(skb); + family = XFRM_SPI_SKB_CB(skb)->family; + + if (!(xo->status & CRYPTO_SUCCESS)) { + if (xo->status & + (CRYPTO_TRANSPORT_AH_AUTH_FAILED | + CRYPTO_TRANSPORT_ESP_AUTH_FAILED | + CRYPTO_TUNNEL_AH_AUTH_FAILED | + CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { + + xfrm_audit_state_icvfail(x, skb, + x->type->proto); + x->stats.integrity_failed++; + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop; + } + + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto drop; + } + + if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto drop; + } + } + goto lock; } @@ -311,7 +345,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb_dst_force(skb); dev_hold(skb->dev); - nexthdr = x->type->input(x, skb); + if (crypto_done) + nexthdr = x->type_offload->input_tail(x, skb); + else + nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; @@ -395,7 +432,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (xo) xfrm_gro = xo->flags & XFRM_GRO; - err = x->inner_mode->afinfo->transport_finish(skb, async); + err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8ba29fe58352abcc887235d36d42a06797d94b28..8c0b6722aaa87c6d346d0ba57c2e541a6703c79d 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,12 +99,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); - /* Inner headers are invalid now. */ - skb->encapsulation = 0; - - err = x->type->output(x, skb); - if (err == -EINPROGRESS) - goto out; + if (xfrm_offload(skb)) { + x->type_offload->encap(x, skb); + } else { + err = x->type->output(x, skb); + if (err == -EINPROGRESS) + goto out; + } resume: if (err) { @@ -200,8 +201,40 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); + struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + secpath_reset(skb); + skb->encapsulation = 0; + + if (xfrm_dev_offload_ok(skb, x)) { + struct sec_path *sp; + + sp = secpath_dup(skb->sp); + if (!sp) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + return -ENOMEM; + } + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + skb->encapsulation = 1; + + sp->olen++; + sp->xvec[skb->sp->len++] = x; + xfrm_state_hold(x); + + if (skb_is_gso(skb)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + + return xfrm_output2(net, sk, skb); + } + + if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) + goto out; + } + if (skb_is_gso(skb)) return xfrm_output_gso(net, sk, skb); @@ -214,6 +247,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) } } +out: return xfrm_output2(net, sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 236cbbc0ab9cfff05cd027ffb0dc56aa15e61033..ed4e52d95172e2d8dcca603cdf62cfb55517f3c9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -116,11 +116,10 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa return afinfo; } -static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family) +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -135,6 +134,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, return dst; } +EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, int oif, @@ -1006,6 +1006,10 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + + if (cnt) + xfrm_garbage_collect(net); + return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -1793,43 +1797,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto out; } -#ifdef CONFIG_XFRM_SUB_POLICY -static int xfrm_dst_alloc_copy(void **target, const void *src, int size) -{ - if (!*target) { - *target = kmalloc(size, GFP_ATOMIC); - if (!*target) - return -ENOMEM; - } - - memcpy(*target, src, size); - return 0; -} -#endif - -static int xfrm_dst_update_parent(struct dst_entry *dst, - const struct xfrm_selector *sel) -{ -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - return xfrm_dst_alloc_copy((void **)&(xdst->partner), - sel, sizeof(*sel)); -#else - return 0; -#endif -} - -static int xfrm_dst_update_origin(struct dst_entry *dst, - const struct flowi *fl) -{ -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); -#else - return 0; -#endif -} - static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) @@ -1901,16 +1868,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, xdst = (struct xfrm_dst *)dst; xdst->num_xfrms = err; - if (num_pols > 1) - err = xfrm_dst_update_parent(dst, &pols[1]->selector); - else - err = xfrm_dst_update_origin(dst, fl); - if (unlikely(err)) { - dst_free(dst); - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - return ERR_PTR(err); - } - xdst->num_pols = num_pols; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); @@ -2929,21 +2886,6 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - switch (event) { - case NETDEV_DOWN: - xfrm_garbage_collect(dev_net(dev)); - } - return NOTIFY_DONE; -} - -static struct notifier_block xfrm_dev_notifier = { - .notifier_call = xfrm_dev_event, -}; - #ifdef CONFIG_XFRM_STATISTICS static int __net_init xfrm_statistics_init(struct net *net) { @@ -3020,7 +2962,7 @@ static int __net_init xfrm_policy_init(struct net *net) INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); if (net_eq(net, &init_net)) - register_netdevice_notifier(&xfrm_dev_notifier); + xfrm_dev_init(); return 0; out_bydst: diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index cdc2e2e71bffa2d871d051870f0daf27067815fb..8b23c5bcf8e88bcf2141dd75fac6eeec54a1b143 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -45,7 +45,8 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) return seq_hi; } - +EXPORT_SYMBOL(xfrm_replay_seqhi); +; static void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; @@ -558,6 +559,158 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) x->repl->notify(x, XFRM_REPLAY_UPDATE); } +#ifdef CONFIG_XFRM_OFFLOAD +static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 oseq = x->replay.oseq; + + if (!xo) + return xfrm_replay_overflow(x, skb); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + if (!skb_is_gso(skb)) { + XFRM_SKB_CB(skb)->seq.output.low = ++oseq; + xo->seq.low = oseq; + } else { + XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; + xo->seq.low = oseq + 1; + oseq += skb_shinfo(skb)->gso_segs; + } + + XFRM_SKB_CB(skb)->seq.output.hi = 0; + xo->seq.hi = 0; + if (unlikely(oseq < x->replay.oseq)) { + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + + x->replay.oseq = oseq; + + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + __u32 oseq = replay_esn->oseq; + + if (!xo) + return xfrm_replay_overflow_bmp(x, skb); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + if (!skb_is_gso(skb)) { + XFRM_SKB_CB(skb)->seq.output.low = ++oseq; + xo->seq.low = oseq; + } else { + XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; + xo->seq.low = oseq + 1; + oseq += skb_shinfo(skb)->gso_segs; + } + + XFRM_SKB_CB(skb)->seq.output.hi = 0; + xo->seq.hi = 0; + if (unlikely(oseq < replay_esn->oseq)) { + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } else { + replay_esn->oseq = oseq; + } + + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + __u32 oseq = replay_esn->oseq; + __u32 oseq_hi = replay_esn->oseq_hi; + + if (!xo) + return xfrm_replay_overflow_esn(x, skb); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + if (!skb_is_gso(skb)) { + XFRM_SKB_CB(skb)->seq.output.low = ++oseq; + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.low = oseq; + xo->seq.hi = oseq_hi; + } else { + XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.low = oseq = oseq + 1; + xo->seq.hi = oseq_hi; + oseq += skb_shinfo(skb)->gso_segs; + } + + if (unlikely(oseq < replay_esn->oseq)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; + xo->seq.hi = oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + + replay_esn->oseq = oseq; + replay_esn->oseq_hi = oseq_hi; + + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static const struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .recheck = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow_offload, +}; + +static const struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_offload_bmp, +}; + +static const struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, + .notify = xfrm_replay_notify_esn, + .overflow = xfrm_replay_overflow_offload_esn, +}; +#else static const struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, @@ -581,6 +734,7 @@ static const struct xfrm_replay xfrm_replay_esn = { .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; +#endif int xfrm_init_replay(struct xfrm_state *x) { @@ -595,10 +749,12 @@ int xfrm_init_replay(struct xfrm_state *x) if (replay_esn->replay_window == 0) return -EINVAL; x->repl = &xfrm_replay_esn; - } else + } else { x->repl = &xfrm_replay_bmp; - } else + } + } else { x->repl = &xfrm_replay_legacy; + } return 0; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5a597dbbe564f09ad6b77e1f529127a0410c7fd4..2e291bc5f1fc1003ca62e40f67da8a6a46875c02 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -251,6 +251,75 @@ static void xfrm_put_type(const struct xfrm_type *type) module_put(type->owner); } +static DEFINE_SPINLOCK(xfrm_type_offload_lock); +int xfrm_register_type_offload(const struct xfrm_type_offload *type, + unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + const struct xfrm_type_offload **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_offload_map; + spin_lock_bh(&xfrm_type_offload_lock); + + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; + else + err = -EEXIST; + spin_unlock_bh(&xfrm_type_offload_lock); + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(xfrm_register_type_offload); + +int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, + unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + const struct xfrm_type_offload **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_offload_map; + spin_lock_bh(&xfrm_type_offload_lock); + + if (unlikely(typemap[type->proto] != type)) + err = -ENOENT; + else + typemap[type->proto] = NULL; + spin_unlock_bh(&xfrm_type_offload_lock); + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type_offload); + +static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + const struct xfrm_type_offload **typemap; + const struct xfrm_type_offload *type; + + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_offload_map; + + type = typemap[proto]; + if ((type && !try_module_get(type->owner))) + type = NULL; + + rcu_read_unlock(); + return type; +} + +static void xfrm_put_type_offload(const struct xfrm_type_offload *type) +{ + module_put(type->owner); +} + static DEFINE_SPINLOCK(xfrm_mode_lock); int xfrm_register_mode(struct xfrm_mode *mode, int family) { @@ -365,10 +434,13 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) xfrm_put_mode(x->inner_mode_iaf); if (x->outer_mode) xfrm_put_mode(x->outer_mode); + if (x->type_offload) + xfrm_put_type_offload(x->type_offload); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); } + xfrm_dev_state_free(x); security_xfrm_state_free(x); kfree(x); } @@ -538,6 +610,8 @@ int __xfrm_state_delete(struct xfrm_state *x) net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); + xfrm_dev_state_delete(x); + /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. @@ -582,12 +656,41 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) return err; } + +static inline int +xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) +{ + int i, err = 0; + + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + struct xfrm_state_offload *xso; + + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + xso = &x->xso; + + if (xso->dev == dev && + (err = security_xfrm_state_delete(x)) != 0) { + xfrm_audit_state_delete(x, 0, task_valid); + return err; + } + } + } + + return err; +} #else static inline int xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } + +static inline int +xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) +{ + return 0; +} #endif int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) @@ -630,6 +733,48 @@ int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) } EXPORT_SYMBOL(xfrm_state_flush); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid) +{ + int i, err = 0, cnt = 0; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid); + if (err) + goto out; + + err = -ESRCH; + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + struct xfrm_state_offload *xso; +restart: + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + xso = &x->xso; + + if (!xfrm_state_kern(x) && xso->dev == dev) { + xfrm_state_hold(x); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + err = xfrm_state_delete(x); + xfrm_audit_state_delete(x, err ? 0 : 1, + task_valid); + xfrm_state_put(x); + if (!err) + cnt++; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + goto restart; + } + } + } + if (cnt) + err = 0; + +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return err; +} +EXPORT_SYMBOL(xfrm_dev_state_flush); + void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&net->xfrm.xfrm_state_lock); @@ -1238,6 +1383,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; + x->replay = orig->replay; + x->preplay = orig->preplay; return x; @@ -2077,6 +2224,8 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) if (x->type == NULL) goto error; + x->type_offload = xfrm_get_type_offload(x->id.proto, family); + err = x->type->init_state(x); if (err) goto error; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 40a8aa39220d67d349c36f80ab1628d0e90c3fa7..38614df33ec8d5751bd88a48d0068926b9032556 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -55,7 +55,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return -EINVAL; } - algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } @@ -71,7 +71,7 @@ static int verify_auth_trunc(struct nlattr **attrs) if (nla_len(rt) < xfrm_alg_auth_len(algp)) return -EINVAL; - algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } @@ -87,7 +87,7 @@ static int verify_aead(struct nlattr **attrs) if (nla_len(rt) < aead_len(algp)) return -EINVAL; - algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } @@ -595,6 +595,13 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (attrs[XFRMA_OFFLOAD_DEV]) { + err = xfrm_dev_state_add(net, x, + nla_data(attrs[XFRMA_OFFLOAD_DEV])); + if (err) + goto error; + } + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; @@ -779,6 +786,23 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } +static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb) +{ + struct xfrm_user_offload *xuo; + struct nlattr *attr; + + attr = nla_reserve(skb, XFRMA_OFFLOAD_DEV, sizeof(*xuo)); + if (attr == NULL) + return -EMSGSIZE; + + xuo = nla_data(attr); + + xuo->ifindex = xso->dev->ifindex; + xuo->flags = xso->flags; + + return 0; +} + static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) { struct xfrm_algo *algo; @@ -869,6 +893,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, &x->replay); if (ret) goto out; + if(x->xso.dev) + ret = copy_user_offload(&x->xso, skb); + if (ret) + goto out; if (x->security) ret = copy_sec_ctx(x->security, skb); out: @@ -932,8 +960,8 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) u8 proto = 0; int err; - err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, - xfrma_policy); + err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy, + NULL); if (err < 0) return err; @@ -2406,6 +2434,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, + [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { @@ -2448,7 +2477,8 @@ static const struct xfrm_link { [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, }; -static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; @@ -2488,7 +2518,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, link->nla_max ? : XFRMA_MAX, - link->nla_pol ? : xfrma_policy); + link->nla_pol ? : xfrma_policy, extack); if (err < 0) return err; @@ -2622,6 +2652,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->coaddr)); if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); + if (x->xso.dev) + l += nla_total_size(sizeof(x->xso)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); @@ -3108,7 +3140,6 @@ static bool xfrm_is_alive(const struct km_event *c) } static struct xfrm_mgr netlink_mgr = { - .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 09e9d535bd7487d81574cf8572a41b6e697566fd..6c7468eb3684494b46a248fcef39a1e451fd6856 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -34,6 +34,8 @@ hostprogs-y += sampleip hostprogs-y += tc_l2_redirect hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel +hostprogs-y += test_map_in_map +hostprogs-y += per_socket_stats_example # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -72,6 +74,8 @@ sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o +test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o +per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -105,6 +109,8 @@ always += trace_event_kern.o always += sampleip_kern.o always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o +always += test_map_in_map_kern.o +always += cookie_uid_helper_example.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -139,6 +145,7 @@ HOSTLOADLIBES_sampleip += -lelf HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf +HOSTLOADLIBES_test_map_in_map += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang @@ -182,4 +189,5 @@ $(obj)/%.o: $(src)/%.c -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ + -Wno-unknown-warning-option \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index faaffe2e139a989de6f90835121a725091d4a289..9a9c95f2c9fb3a7de27eb70659429418ada04301 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -80,6 +80,7 @@ struct bpf_map_def { unsigned int value_size; unsigned int max_entries; unsigned int map_flags; + unsigned int inner_map_idx; }; static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = @@ -145,11 +146,30 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) +#elif defined(__sparc__) + +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) +#if defined(__arch64__) +#define PT_REGS_IP(x) ((x)->tpc) +#else +#define PT_REGS_IP(x) ((x)->pc) +#endif + #endif #ifdef __powerpc__ #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#elif defined(__sparc__) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index b86ee54da2d14d6ba0de18481d2c55ed1a70a67b..74456b3eb89acb4671fcebaafc2ce7bd102b3ed3 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" @@ -37,13 +39,8 @@ int event_fd[MAX_PROGS]; int prog_cnt; int prog_array_fd = -1; -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; +struct bpf_map_data map_data[MAX_MAPS]; +int map_data_count = 0; static int populate_prog_array(const char *event, int prog_fd) { @@ -192,24 +189,45 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return 0; } -static int load_maps(struct bpf_map_def *maps, int len) +static int load_maps(struct bpf_map_data *maps, int nr_maps, + fixup_map_cb fixup_map) { int i; - for (i = 0; i < len / sizeof(struct bpf_map_def); i++) { + for (i = 0; i < nr_maps; i++) { + if (fixup_map) { + fixup_map(&maps[i], i); + /* Allow userspace to assign map FD prior to creation */ + if (maps[i].fd != -1) { + map_fd[i] = maps[i].fd; + continue; + } + } - map_fd[i] = bpf_create_map(maps[i].type, - maps[i].key_size, - maps[i].value_size, - maps[i].max_entries, - maps[i].map_flags); + if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { + int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; + + map_fd[i] = bpf_create_map_in_map(maps[i].def.type, + maps[i].def.key_size, + inner_map_fd, + maps[i].def.max_entries, + maps[i].def.map_flags); + } else { + map_fd[i] = bpf_create_map(maps[i].def.type, + maps[i].def.key_size, + maps[i].def.value_size, + maps[i].def.max_entries, + maps[i].def.map_flags); + } if (map_fd[i] < 0) { printf("failed to create a map: %d %s\n", errno, strerror(errno)); return 1; } + maps[i].fd = map_fd[i]; - if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) prog_array_fd = map_fd[i]; } return 0; @@ -239,7 +257,8 @@ static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, } static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, - GElf_Shdr *shdr, struct bpf_insn *insn) + GElf_Shdr *shdr, struct bpf_insn *insn, + struct bpf_map_data *maps, int nr_maps) { int i, nrels; @@ -249,6 +268,8 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, GElf_Sym sym; GElf_Rel rel; unsigned int insn_idx; + bool match = false; + int j, map_idx; gelf_getrel(data, i, &rel); @@ -262,20 +283,157 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, return 1; } insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; + + /* Match FD relocation against recorded map_data[] offset */ + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].elf_offset == sym.st_value) { + match = true; + break; + } + } + if (match) { + insn[insn_idx].imm = maps[map_idx].fd; + } else { + printf("invalid relo for insn[%d] no map_data match\n", + insn_idx); + return 1; + } } return 0; } -int load_bpf_file(char *path) +static int cmp_symbols(const void *l, const void *r) { - int fd, i; + const GElf_Sym *lsym = (const GElf_Sym *)l; + const GElf_Sym *rsym = (const GElf_Sym *)r; + + if (lsym->st_value < rsym->st_value) + return -1; + else if (lsym->st_value > rsym->st_value) + return 1; + else + return 0; +} + +static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, + Elf *elf, Elf_Data *symbols, int strtabidx) +{ + int map_sz_elf, map_sz_copy; + bool validate_zero = false; + Elf_Data *data_maps; + int i, nr_maps; + GElf_Sym *sym; + Elf_Scn *scn; + int copy_sz; + + if (maps_shndx < 0) + return -EINVAL; + if (!symbols) + return -EINVAL; + + /* Get data for maps section via elf index */ + scn = elf_getscn(elf, maps_shndx); + if (scn) + data_maps = elf_getdata(scn, NULL); + if (!scn || !data_maps) { + printf("Failed to get Elf_Data from maps section %d\n", + maps_shndx); + return -EINVAL; + } + + /* For each map get corrosponding symbol table entry */ + sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); + for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + assert(nr_maps < MAX_MAPS+1); + if (!gelf_getsym(symbols, i, &sym[nr_maps])) + continue; + if (sym[nr_maps].st_shndx != maps_shndx) + continue; + /* Only increment iif maps section */ + nr_maps++; + } + + /* Align to map_fd[] order, via sort on offset in sym.st_value */ + qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); + + /* Keeping compatible with ELF maps section changes + * ------------------------------------------------ + * The program size of struct bpf_map_def is known by loader + * code, but struct stored in ELF file can be different. + * + * Unfortunately sym[i].st_size is zero. To calculate the + * struct size stored in the ELF file, assume all struct have + * the same size, and simply divide with number of map + * symbols. + */ + map_sz_elf = data_maps->d_size / nr_maps; + map_sz_copy = sizeof(struct bpf_map_def); + if (map_sz_elf < map_sz_copy) { + /* + * Backward compat, loading older ELF file with + * smaller struct, keeping remaining bytes zero. + */ + map_sz_copy = map_sz_elf; + } else if (map_sz_elf > map_sz_copy) { + /* + * Forward compat, loading newer ELF file with larger + * struct with unknown features. Assume zero means + * feature not used. Thus, validate rest of struct + * data is zero. + */ + validate_zero = true; + } + + /* Memcpy relevant part of ELF maps data to loader maps */ + for (i = 0; i < nr_maps; i++) { + unsigned char *addr, *end; + struct bpf_map_def *def; + const char *map_name; + size_t offset; + + map_name = elf_strptr(elf, strtabidx, sym[i].st_name); + maps[i].name = strdup(map_name); + if (!maps[i].name) { + printf("strdup(%s): %s(%d)\n", map_name, + strerror(errno), errno); + free(sym); + return -errno; + } + + /* Symbol value is offset into ELF maps section data area */ + offset = sym[i].st_value; + def = (struct bpf_map_def *)(data_maps->d_buf + offset); + maps[i].elf_offset = offset; + memset(&maps[i].def, 0, sizeof(struct bpf_map_def)); + memcpy(&maps[i].def, def, map_sz_copy); + + /* Verify no newer features were requested */ + if (validate_zero) { + addr = (unsigned char*) def + map_sz_copy; + end = (unsigned char*) def + map_sz_elf; + for (; addr < end; addr++) { + if (*addr != 0) { + free(sym); + return -EFBIG; + } + } + } + } + + free(sym); + return nr_maps; +} + +static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) +{ + int fd, i, ret, maps_shndx = -1, strtabidx = -1; Elf *elf; GElf_Ehdr ehdr; GElf_Shdr shdr, shdr_prog; - Elf_Data *data, *data_prog, *symbols = NULL; + Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; char *shname, *shname_prog; + int nr_maps = 0; /* reset global variables */ kern_version = 0; @@ -323,14 +481,41 @@ int load_bpf_file(char *path) } memcpy(&kern_version, data->d_buf, sizeof(int)); } else if (strcmp(shname, "maps") == 0) { - processed_sec[i] = true; - if (load_maps(data->d_buf, data->d_size)) - return 1; + int j; + + maps_shndx = i; + data_maps = data; + for (j = 0; j < MAX_MAPS; j++) + map_data[j].fd = -1; } else if (shdr.sh_type == SHT_SYMTAB) { + strtabidx = shdr.sh_link; symbols = data; } } + ret = 1; + + if (!symbols) { + printf("missing SHT_SYMTAB section\n"); + goto done; + } + + if (data_maps) { + nr_maps = load_elf_maps_section(map_data, maps_shndx, + elf, symbols, strtabidx); + if (nr_maps < 0) { + printf("Error: Failed loading ELF maps (errno:%d):%s\n", + nr_maps, strerror(-nr_maps)); + ret = 1; + goto done; + } + if (load_maps(map_data, nr_maps, fixup_map)) + goto done; + map_data_count = nr_maps; + + processed_sec[maps_shndx] = true; + } + /* load programs that need map fixup (relocations) */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) @@ -354,7 +539,8 @@ int load_bpf_file(char *path) processed_sec[shdr.sh_info] = true; processed_sec[i] = true; - if (parse_relo_and_apply(data, symbols, &shdr, insns)) + if (parse_relo_and_apply(data, symbols, &shdr, insns, + map_data, nr_maps)) continue; if (memcmp(shname_prog, "kprobe/", 7) == 0 || @@ -387,8 +573,20 @@ int load_bpf_file(char *path) load_and_attach(shname, data->d_buf, data->d_size); } + ret = 0; +done: close(fd); - return 0; + return ret; +} + +int load_bpf_file(char *path) +{ + return do_load_bpf_file(path, NULL); +} + +int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) +{ + return do_load_bpf_file(path, fixup_map); } void read_trace_pipe(void) @@ -473,7 +671,7 @@ struct ksym *ksym_search(long key) return &syms[0]; } -int set_link_xdp_fd(int ifindex, int fd) +int set_link_xdp_fd(int ifindex, int fd, __u32 flags) { struct sockaddr_nl sa; int sock, seq = 0, len, ret = -1; @@ -509,15 +707,28 @@ int set_link_xdp_fd(int ifindex, int fd) req.nh.nlmsg_seq = ++seq; req.ifinfo.ifi_family = AF_UNSPEC; req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ nla = (struct nlattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + nla->nla_len = NLA_HDRLEN; - nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index c827827299b3b77831650e762376b061b6c93021..ca0563d04744fe31a472b53824a5a1135b53be48 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -6,12 +6,36 @@ #define MAX_MAPS 32 #define MAX_PROGS 32 -extern int map_fd[MAX_MAPS]; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; + unsigned int inner_map_idx; +}; + +struct bpf_map_data { + int fd; + char *name; + size_t elf_offset; + struct bpf_map_def def; +}; + +typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); + extern int prog_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS]; extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; extern int prog_cnt; +/* There is a one-to-one mapping between map_fd[] and map_data[]. + * The map_data[] just contains more rich info on the given map. + */ +extern int map_fd[MAX_MAPS]; +extern struct bpf_map_data map_data[MAX_MAPS]; +extern int map_data_count; + /* parses elf file compiled by llvm .c->.o * . parses 'maps' section and creates maps via BPF syscall * . parses 'license' section and passes it to syscall @@ -25,6 +49,7 @@ extern int prog_cnt; * returns zero on success */ int load_bpf_file(char *path); +int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); void read_trace_pipe(void); struct ksym { @@ -34,5 +59,5 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); -int set_link_xdp_fd(int ifindex, int fd); +int set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c new file mode 100644 index 0000000000000000000000000000000000000000..9d751e209f313ca8f20a07e6d578cfb98c23defd --- /dev/null +++ b/samples/bpf/cookie_uid_helper_example.c @@ -0,0 +1,323 @@ +/* This test is a demo of using get_socket_uid and get_socket_cookie + * helper function to do per socket based network traffic monitoring. + * It requires iptables version higher then 1.6.1. to load pinned eBPF + * program into the xt_bpf match. + * + * TEST: + * ./run_cookie_uid_helper_example.sh -option + * option: + * -t: do traffic monitoring test, the program will continuously + * print out network traffic happens after program started A sample + * output is shown below: + * + * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 + * cookie: 132, uid: 0x0, Pakcet Count: 2, Bytes Count: 286 + * cookie: 812, uid: 0x3e8, Pakcet Count: 3, Bytes Count: 1726 + * cookie: 802, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104 + * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 + * cookie: 831, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104 + * cookie: 0, uid: 0x0, Pakcet Count: 6, Bytes Count: 712 + * cookie: 880, uid: 0xfffe, Pakcet Count: 1, Bytes Count: 70 + * + * -s: do getsockopt SO_COOKIE test, the program will set up a pair of + * UDP sockets and send packets between them. And read out the traffic data + * directly from the ebpf map based on the socket cookie. + * + * Clean up: if using shell script, the script file will delete the iptables + * rule and unmount the bpf program when exit. Else the iptables rule need + * to be deleted by hand, see run_cookie_uid_helper_example.sh for detail. + */ + +#define _GNU_SOURCE + +#define offsetof(type, member) __builtin_offsetof(type, member) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" + +#define PORT 8888 + +struct stats { + uint32_t uid; + uint64_t packets; + uint64_t bytes; +}; + +static int map_fd, prog_fd; + +static bool test_finish; + +static void maps_create(void) +{ + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), + sizeof(struct stats), 100, 0); + if (map_fd < 0) + error(1, errno, "map create failed!\n"); +} + +static void prog_load(void) +{ + static char log_buf[1 << 16]; + + struct bpf_insn prog[] = { + /* + * Save sk_buff for future usage. value stored in R6 to R10 will + * not be reset after a bpf helper function call. + */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* + * pc1: BPF_FUNC_get_socket_cookie takes one parameter, + * R1: sk_buff + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + /* pc2-4: save &socketCookie to r7 for future usage*/ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* + * pc5-8: set up the registers for BPF_FUNC_map_lookup_elem, + * it takes two parameters (R1: map_fd, R2: &socket_cookie) + */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* + * pc9. if r0 != 0x0, go to pc+14, since we have the cookie + * stored already + * Otherwise do pc10-22 to setup a new data entry. + */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 14), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_uid), + /* + * Place a struct stats in the R10 stack and sequentially + * place the member value into the memory. Packets value + * is set by directly place a IMM value 1 into the stack. + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, + -32 + (__s16)offsetof(struct stats, uid)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, + -32 + (__s16)offsetof(struct stats, packets), 1), + /* + * __sk_buff is a special struct used for eBPF program to + * directly access some sk_buff field. + */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, + -32 + (__s16)offsetof(struct stats, bytes)), + /* + * add new map entry using BPF_FUNC_map_update_elem, it takes + * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats, + * R4: flags) + */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_update_elem), + BPF_JMP_IMM(BPF_JA, 0, 0, 5), + /* + * pc24-30 update the packet info to a exist data entry, it can + * be done by directly write to pointers instead of using + * BPF_FUNC_map_update_elem helper function + */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, packets)), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, bytes)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }; + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + ARRAY_SIZE(prog), "GPL", 0, + log_buf, sizeof(log_buf)); + if (prog_fd < 0) + error(1, errno, "failed to load prog\n%s\n", log_buf); +} + +static void prog_attach_iptables(char *file) +{ + int ret; + char rules[100]; + + if (bpf_obj_pin(prog_fd, file)) + error(1, errno, "bpf_obj_pin"); + if (strlen(file) > 50) { + printf("file path too long: %s\n", file); + exit(1); + } + sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", + file); + ret = system(rules); + if (ret < 0) { + printf("iptables rule update failed: %d/n", WEXITSTATUS(ret)); + exit(1); + } +} + +static void print_table(void) +{ + struct stats curEntry; + uint32_t curN = UINT32_MAX; + uint32_t nextN; + int res; + + while (bpf_map_get_next_key(map_fd, &curN, &nextN) > -1) { + curN = nextN; + res = bpf_map_lookup_elem(map_fd, &curN, &curEntry); + if (res < 0) { + error(1, errno, "fail to get entry value of Key: %u\n", + curN); + } else { + printf("cookie: %u, uid: 0x%x, Packet Count: %lu," + " Bytes Count: %lu\n", curN, curEntry.uid, + curEntry.packets, curEntry.bytes); + } + } +} + +static void udp_client(void) +{ + struct sockaddr_in si_other = {0}; + struct sockaddr_in si_me = {0}; + struct stats dataEntry; + int s_rcv, s_send, i, recv_len; + char message = 'a'; + char buf; + uint64_t cookie; + int res; + socklen_t cookie_len = sizeof(cookie); + socklen_t slen = sizeof(si_other); + + s_rcv = socket(PF_INET, SOCK_DGRAM, 0); + if (s_rcv < 0) + error(1, errno, "rcv socket creat failed!\n"); + si_other.sin_family = AF_INET; + si_other.sin_port = htons(PORT); + if (inet_aton("127.0.0.1", &si_other.sin_addr) == 0) + error(1, errno, "inet_aton\n"); + if (bind(s_rcv, (struct sockaddr *)&si_other, sizeof(si_other)) == -1) + error(1, errno, "bind\n"); + s_send = socket(PF_INET, SOCK_DGRAM, 0); + if (s_send < 0) + error(1, errno, "send socket creat failed!\n"); + res = getsockopt(s_send, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len); + if (res < 0) + printf("get cookie failed: %s\n", strerror(errno)); + res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry); + if (res != -1) + error(1, errno, "socket stat found while flow not active\n"); + for (i = 0; i < 10; i++) { + res = sendto(s_send, &message, sizeof(message), 0, + (struct sockaddr *)&si_other, slen); + if (res == -1) + error(1, errno, "send\n"); + if (res != sizeof(message)) + error(1, 0, "%uB != %luB\n", res, sizeof(message)); + recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0, + (struct sockaddr *)&si_me, &slen); + if (recv_len < 0) + error(1, errno, "revieve\n"); + res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr), + sizeof(si_me.sin_addr)); + if (res != 0) + error(1, EFAULT, "sender addr error: %d\n", res); + printf("Message received: %c\n", buf); + res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry); + if (res < 0) + error(1, errno, "lookup sk stat failed, cookie: %lu\n", + cookie); + printf("cookie: %lu, uid: 0x%x, Packet Count: %lu," + " Bytes Count: %lu\n\n", cookie, dataEntry.uid, + dataEntry.packets, dataEntry.bytes); + } + close(s_send); + close(s_rcv); +} + +static int usage(void) +{ + printf("Usage: ./run_cookie_uid_helper_example.sh" + " bpfObjName -option\n" + " -t traffic monitor test\n" + " -s getsockopt cookie test\n"); + return 1; +} + +static void finish(int ret) +{ + test_finish = true; +} + +int main(int argc, char *argv[]) +{ + int opt; + bool cfg_test_traffic = false; + bool cfg_test_cookie = false; + + if (argc != 3) + return usage(); + while ((opt = getopt(argc, argv, "ts")) != -1) { + switch (opt) { + case 't': + cfg_test_traffic = true; + break; + case 's': + cfg_test_cookie = true; + break; + + default: + printf("unknown option %c\n", opt); + usage(); + return -1; + } + } + maps_create(); + prog_load(); + prog_attach_iptables(argv[2]); + if (cfg_test_traffic) { + if (signal(SIGINT, finish) == SIG_ERR) + error(1, errno, "register SIGINT handler failed"); + if (signal(SIGTERM, finish) == SIG_ERR) + error(1, errno, "register SIGTERM handler failed"); + while (!test_finish) { + print_table(); + printf("\n"); + sleep(1); + }; + } else if (cfg_test_cookie) { + udp_client(); + } + close(prog_fd); + close(map_fd); + return 0; +} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 3705fba453a005fb32f5dfb51dd5763f5f364ccf..8ab36a04c174a9c154594b7ef4fe2ef2c5c5dc5e 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -135,6 +135,16 @@ struct bpf_insn; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index a91872a97742a6413c316548c66ab8349ba1aff0..245165817fbe63bd53ea9cf7f3a3ab6cf5e7a2fa 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -11,6 +11,7 @@ #include "bpf_helpers.h" #define MAX_ENTRIES 1000 +#define MAX_NR_CPUS 1024 struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, @@ -26,7 +27,7 @@ struct bpf_map_def SEC("maps") lru_hash_map = { .max_entries = 10000, }; -struct bpf_map_def SEC("maps") percpu_lru_hash_map = { +struct bpf_map_def SEC("maps") nocommon_lru_hash_map = { .type = BPF_MAP_TYPE_LRU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -34,6 +35,19 @@ struct bpf_map_def SEC("maps") percpu_lru_hash_map = { .map_flags = BPF_F_NO_COMMON_LRU, }; +struct bpf_map_def SEC("maps") inner_lru_hash_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + +struct bpf_map_def SEC("maps") array_of_lru_hashs = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(u32), + .max_entries = MAX_NR_CPUS, +}; + struct bpf_map_def SEC("maps") percpu_hash_map = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(u32), @@ -65,6 +79,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + SEC("kprobe/sys_getuid") int stress_hmap(struct pt_regs *ctx) { @@ -93,6 +114,7 @@ int stress_percpu_hmap(struct pt_regs *ctx) bpf_map_delete_elem(&percpu_hash_map, &key); return 0; } + SEC("kprobe/sys_getgid") int stress_hmap_alloc(struct pt_regs *ctx) { @@ -121,24 +143,56 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getpid") +SEC("kprobe/sys_connect") int stress_lru_hmap_alloc(struct pt_regs *ctx) { - u32 key = bpf_get_prandom_u32(); + struct sockaddr_in6 *in6; + u16 test_case, dst6[8]; + int addrlen, ret; + char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n"; long val = 1; + u32 key = bpf_get_prandom_u32(); - bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); + addrlen = (int)PT_REGS_PARM3(ctx); - return 0; -} + if (addrlen != sizeof(*in6)) + return 0; -SEC("kprobe/sys_getppid") -int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx) -{ - u32 key = bpf_get_prandom_u32(); - long val = 1; + ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); + if (ret) + goto done; - bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY); + if (dst6[0] != 0xdead || dst6[1] != 0xbeef) + return 0; + + test_case = dst6[7]; + + if (test_case == 0) { + ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); + } else if (test_case == 1) { + ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val, + BPF_ANY); + } else if (test_case == 2) { + void *nolocal_lru_map; + int cpu = bpf_get_smp_processor_id(); + + nolocal_lru_map = bpf_map_lookup_elem(&array_of_lru_hashs, + &cpu); + if (!nolocal_lru_map) { + ret = -ENOENT; + goto done; + } + + ret = bpf_map_update_elem(nolocal_lru_map, &key, &val, + BPF_ANY); + } else { + ret = -EINVAL; + } + +done: + if (ret) + bpf_trace_printk(fmt, sizeof(fmt), ret); return 0; } @@ -165,5 +219,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } +SEC("kprobe/sys_getpgid") +int stress_hash_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&hash_map, &key); + + return 0; +} + +SEC("kprobe/sys_getpgrp") +int stress_array_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&array_map, &key); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 680260a91f50c893dd26a1b968cc220a299c530f..1a8894b5ac5147c36833ffae500ddfbeb00277d1 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -18,10 +18,14 @@ #include #include #include +#include +#include + #include "libbpf.h" #include "bpf_load.h" -#define MAX_CNT 1000000 +#define TEST_BIT(t) (1U << (t)) +#define MAX_NR_CPUS 1024 static __u64 time_get_ns(void) { @@ -31,15 +35,44 @@ static __u64 time_get_ns(void) return ts.tv_sec * 1000000000ull + ts.tv_nsec; } -#define HASH_PREALLOC (1 << 0) -#define PERCPU_HASH_PREALLOC (1 << 1) -#define HASH_KMALLOC (1 << 2) -#define PERCPU_HASH_KMALLOC (1 << 3) -#define LRU_HASH_PREALLOC (1 << 4) -#define PERCPU_LRU_HASH_PREALLOC (1 << 5) -#define LPM_KMALLOC (1 << 6) +enum test_type { + HASH_PREALLOC, + PERCPU_HASH_PREALLOC, + HASH_KMALLOC, + PERCPU_HASH_KMALLOC, + LRU_HASH_PREALLOC, + NOCOMMON_LRU_HASH_PREALLOC, + LPM_KMALLOC, + HASH_LOOKUP, + ARRAY_LOOKUP, + INNER_LRU_HASH_PREALLOC, + NR_TESTS, +}; + +const char *test_map_names[NR_TESTS] = { + [HASH_PREALLOC] = "hash_map", + [PERCPU_HASH_PREALLOC] = "percpu_hash_map", + [HASH_KMALLOC] = "hash_map_alloc", + [PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc", + [LRU_HASH_PREALLOC] = "lru_hash_map", + [NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map", + [LPM_KMALLOC] = "lpm_trie_map_alloc", + [HASH_LOOKUP] = "hash_map", + [ARRAY_LOOKUP] = "array_map", + [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", +}; static int test_flags = ~0; +static uint32_t num_map_entries; +static uint32_t inner_lru_hash_size; +static int inner_lru_hash_idx = -1; +static int array_of_lru_hashs_idx = -1; +static uint32_t max_cnt = 1000000; + +static int check_test_flags(enum test_type t) +{ + return test_flags & TEST_BIT(t); +} static void test_hash_prealloc(int cpu) { @@ -47,34 +80,89 @@ static void test_hash_prealloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_getuid); printf("%d:hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } -static void test_lru_hash_prealloc(int cpu) +static void do_test_lru(enum test_type test, int cpu) { + static int inner_lru_map_fds[MAX_NR_CPUS]; + + struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 }; + const char *test_name; __u64 start_time; - int i; + int i, ret; + + if (test == INNER_LRU_HASH_PREALLOC) { + int outer_fd = map_fd[array_of_lru_hashs_idx]; + + assert(cpu < MAX_NR_CPUS); + + if (cpu) { + inner_lru_map_fds[cpu] = + bpf_create_map(BPF_MAP_TYPE_LRU_HASH, + sizeof(uint32_t), sizeof(long), + inner_lru_hash_size, 0); + if (inner_lru_map_fds[cpu] == -1) { + printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", + strerror(errno), errno); + exit(1); + } + } else { + inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx]; + } + + ret = bpf_map_update_elem(outer_fd, &cpu, + &inner_lru_map_fds[cpu], + BPF_ANY); + if (ret) { + printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n", + cpu, strerror(errno), errno); + exit(1); + } + } + + in6.sin6_addr.s6_addr16[0] = 0xdead; + in6.sin6_addr.s6_addr16[1] = 0xbeef; + + if (test == LRU_HASH_PREALLOC) { + test_name = "lru_hash_map_perf"; + in6.sin6_addr.s6_addr16[7] = 0; + } else if (test == NOCOMMON_LRU_HASH_PREALLOC) { + test_name = "nocommon_lru_hash_map_perf"; + in6.sin6_addr.s6_addr16[7] = 1; + } else if (test == INNER_LRU_HASH_PREALLOC) { + test_name = "inner_lru_hash_map_perf"; + in6.sin6_addr.s6_addr16[7] = 2; + } else { + assert(0); + } start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - syscall(__NR_getpid); - printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + for (i = 0; i < max_cnt; i++) { + ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); + assert(ret == -1 && errno == EBADF); + } + printf("%d:%s pre-alloc %lld events per sec\n", + cpu, test_name, + max_cnt * 1000000000ll / (time_get_ns() - start_time)); +} + +static void test_lru_hash_prealloc(int cpu) +{ + do_test_lru(LRU_HASH_PREALLOC, cpu); } -static void test_percpu_lru_hash_prealloc(int cpu) +static void test_nocommon_lru_hash_prealloc(int cpu) { - __u64 start_time; - int i; + do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu); +} - start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - syscall(__NR_getppid); - printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); +static void test_inner_lru_hash_prealloc(int cpu) +{ + do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); } static void test_percpu_hash_prealloc(int cpu) @@ -83,10 +171,10 @@ static void test_percpu_hash_prealloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_geteuid); printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } static void test_hash_kmalloc(int cpu) @@ -95,10 +183,10 @@ static void test_hash_kmalloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_getgid); printf("%d:hash_map_perf kmalloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } static void test_percpu_hash_kmalloc(int cpu) @@ -107,10 +195,10 @@ static void test_percpu_hash_kmalloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_getegid); printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } static void test_lpm_kmalloc(int cpu) @@ -119,40 +207,63 @@ static void test_lpm_kmalloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_gettid); printf("%d:lpm_perf kmalloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } -static void loop(int cpu) +static void test_hash_lookup(int cpu) { - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + __u64 start_time; + int i; - if (test_flags & HASH_PREALLOC) - test_hash_prealloc(cpu); + start_time = time_get_ns(); + for (i = 0; i < max_cnt; i++) + syscall(__NR_getpgid, 0); + printf("%d:hash_lookup %lld lookups per sec\n", + cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); +} - if (test_flags & PERCPU_HASH_PREALLOC) - test_percpu_hash_prealloc(cpu); +static void test_array_lookup(int cpu) +{ + __u64 start_time; + int i; - if (test_flags & HASH_KMALLOC) - test_hash_kmalloc(cpu); + start_time = time_get_ns(); + for (i = 0; i < max_cnt; i++) + syscall(__NR_getpgrp, 0); + printf("%d:array_lookup %lld lookups per sec\n", + cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); +} - if (test_flags & PERCPU_HASH_KMALLOC) - test_percpu_hash_kmalloc(cpu); +typedef void (*test_func)(int cpu); +const test_func test_funcs[] = { + [HASH_PREALLOC] = test_hash_prealloc, + [PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc, + [HASH_KMALLOC] = test_hash_kmalloc, + [PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc, + [LRU_HASH_PREALLOC] = test_lru_hash_prealloc, + [NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc, + [LPM_KMALLOC] = test_lpm_kmalloc, + [HASH_LOOKUP] = test_hash_lookup, + [ARRAY_LOOKUP] = test_array_lookup, + [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, +}; - if (test_flags & LRU_HASH_PREALLOC) - test_lru_hash_prealloc(cpu); +static void loop(int cpu) +{ + cpu_set_t cpuset; + int i; - if (test_flags & PERCPU_LRU_HASH_PREALLOC) - test_percpu_lru_hash_prealloc(cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); - if (test_flags & LPM_KMALLOC) - test_lpm_kmalloc(cpu); + for (i = 0; i < NR_TESTS; i++) { + if (check_test_flags(i)) + test_funcs[i](cpu); + } } static void run_perf_test(int tasks) @@ -209,6 +320,38 @@ static void fill_lpm_trie(void) assert(!r); } +static void fixup_map(struct bpf_map_data *map, int idx) +{ + int i; + + if (!strcmp("inner_lru_hash_map", map->name)) { + inner_lru_hash_idx = idx; + inner_lru_hash_size = map->def.max_entries; + } + + if (!strcmp("array_of_lru_hashs", map->name)) { + if (inner_lru_hash_idx == -1) { + printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n"); + exit(1); + } + map->def.inner_map_idx = inner_lru_hash_idx; + array_of_lru_hashs_idx = idx; + } + + if (num_map_entries <= 0) + return; + + inner_lru_hash_size = num_map_entries; + + /* Only change the max_entries for the enabled test(s) */ + for (i = 0; i < NR_TESTS; i++) { + if (!strcmp(test_map_names[i], map->name) && + (check_test_flags(i))) { + map->def.max_entries = num_map_entries; + } + } +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -224,7 +367,13 @@ int main(int argc, char **argv) if (argc > 2) num_cpu = atoi(argv[2]) ? : num_cpu; - if (load_bpf_file(filename)) { + if (argc > 3) + num_map_entries = atoi(argv[3]); + + if (argc > 4) + max_cnt = atoi(argv[4]); + + if (load_bpf_file_fixup_map(filename, fixup_map)) { printf("%s", bpf_log_buf); return 1; } diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 9cce2a66bd664c40668b3c8ca0dd12bd148bb21b..512f87a5fd20e08d0b97893b17e20e971425830e 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -100,6 +100,7 @@ int main(int argc, char **argv) setrlimit(RLIMIT_MEMLOCK, &r); signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); diff --git a/samples/bpf/run_cookie_uid_helper_example.sh b/samples/bpf/run_cookie_uid_helper_example.sh new file mode 100755 index 0000000000000000000000000000000000000000..f898cfa2b1aa52f71a65db469450d4d001b08b2c --- /dev/null +++ b/samples/bpf/run_cookie_uid_helper_example.sh @@ -0,0 +1,14 @@ +#!/bin/bash +local_dir="$(pwd)" +root_dir=$local_dir/../.. +mnt_dir=$(mktemp -d --tmp) + +on_exit() { + iptables -D OUTPUT -m bpf --object-pinned ${mnt_dir}/bpf_prog -j ACCEPT + umount ${mnt_dir} + rm -r ${mnt_dir} +} + +trap on_exit EXIT +mount -t bpf bpf ${mnt_dir} +./per_socket_stats_example ${mnt_dir}/bpf_prog $1 diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index be59d7dcbdde3664c573a78d6d27adee03234a16..4ed690b907ff844961499d492350746065e423fb 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -180,6 +180,7 @@ int main(int argc, char **argv) return 1; } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); /* do sampling */ printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n", diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index d96dc88d3b04067135fe80c95628fca37a29c022..73c35714226842b248027b214989e56570e4378f 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -25,7 +25,9 @@ #include "bpf_util.h" #define min(a, b) ((a) < (b) ? (a) : (b)) -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#ifndef offsetof +# define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c new file mode 100644 index 0000000000000000000000000000000000000000..42c44d091dd176b87bb92d1aeb849b1f8ea8cdd6 --- /dev/null +++ b/samples/bpf/test_map_in_map_kern.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define MAX_NR_PORTS 65536 + +/* map #0 */ +struct bpf_map_def SEC("maps") port_a = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = MAX_NR_PORTS, +}; + +/* map #1 */ +struct bpf_map_def SEC("maps") port_h = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* map #2 */ +struct bpf_map_def SEC("maps") reg_result_h = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* map #3 */ +struct bpf_map_def SEC("maps") inline_result_h = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* map #4 */ /* Test case #0 */ +struct bpf_map_def SEC("maps") a_of_port_a = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(u32), + .inner_map_idx = 0, /* map_fd[0] is port_a */ + .max_entries = MAX_NR_PORTS, +}; + +/* map #5 */ /* Test case #1 */ +struct bpf_map_def SEC("maps") h_of_port_a = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(u32), + .inner_map_idx = 0, /* map_fd[0] is port_a */ + .max_entries = 1, +}; + +/* map #6 */ /* Test case #2 */ +struct bpf_map_def SEC("maps") h_of_port_h = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(u32), + .inner_map_idx = 1, /* map_fd[1] is port_h */ + .max_entries = 1, +}; + +static __always_inline int do_reg_lookup(void *inner_map, u32 port) +{ + int *result; + + result = bpf_map_lookup_elem(inner_map, &port); + return result ? *result : -ENOENT; +} + +static __always_inline int do_inline_array_lookup(void *inner_map, u32 port) +{ + int *result; + + if (inner_map != &port_a) + return -EINVAL; + + result = bpf_map_lookup_elem(&port_a, &port); + return result ? *result : -ENOENT; +} + +static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port) +{ + int *result; + + if (inner_map != &port_h) + return -EINVAL; + + result = bpf_map_lookup_elem(&port_h, &port); + return result ? *result : -ENOENT; +} + +SEC("kprobe/sys_connect") +int trace_sys_connect(struct pt_regs *ctx) +{ + struct sockaddr_in6 *in6; + u16 test_case, port, dst6[8]; + int addrlen, ret, inline_ret, ret_key = 0; + u32 port_key; + void *outer_map, *inner_map; + bool inline_hash = false; + + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); + addrlen = (int)PT_REGS_PARM3(ctx); + + if (addrlen != sizeof(*in6)) + return 0; + + ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); + if (ret) { + inline_ret = ret; + goto done; + } + + if (dst6[0] != 0xdead || dst6[1] != 0xbeef) + return 0; + + test_case = dst6[7]; + + ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port); + if (ret) { + inline_ret = ret; + goto done; + } + + port_key = port; + + ret = -ENOENT; + if (test_case == 0) { + outer_map = &a_of_port_a; + } else if (test_case == 1) { + outer_map = &h_of_port_a; + } else if (test_case == 2) { + outer_map = &h_of_port_h; + } else { + ret = __LINE__; + inline_ret = ret; + goto done; + } + + inner_map = bpf_map_lookup_elem(outer_map, &port_key); + if (!inner_map) { + ret = __LINE__; + inline_ret = ret; + goto done; + } + + ret = do_reg_lookup(inner_map, port_key); + + if (test_case == 0 || test_case == 1) + inline_ret = do_inline_array_lookup(inner_map, port_key); + else + inline_ret = do_inline_hash_lookup(inner_map, port_key); + +done: + bpf_map_update_elem(®_result_h, &ret_key, &ret, BPF_ANY); + bpf_map_update_elem(&inline_result_h, &ret_key, &inline_ret, BPF_ANY); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c new file mode 100644 index 0000000000000000000000000000000000000000..f62fdc2bd4281005f959cdb85cf1aafe2dc08193 --- /dev/null +++ b/samples/bpf/test_map_in_map_user.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +#define PORT_A (map_fd[0]) +#define PORT_H (map_fd[1]) +#define REG_RESULT_H (map_fd[2]) +#define INLINE_RESULT_H (map_fd[3]) +#define A_OF_PORT_A (map_fd[4]) /* Test case #0 */ +#define H_OF_PORT_A (map_fd[5]) /* Test case #1 */ +#define H_OF_PORT_H (map_fd[6]) /* Test case #2 */ + +static const char * const test_names[] = { + "Array of Array", + "Hash of Array", + "Hash of Hash", +}; + +#define NR_TESTS (sizeof(test_names) / sizeof(*test_names)) + +static void populate_map(uint32_t port_key, int magic_result) +{ + int ret; + + ret = bpf_map_update_elem(PORT_A, &port_key, &magic_result, BPF_ANY); + assert(!ret); + + ret = bpf_map_update_elem(PORT_H, &port_key, &magic_result, + BPF_NOEXIST); + assert(!ret); + + ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY); + assert(!ret); + + ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST); + assert(!ret); + + ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST); + assert(!ret); +} + +static void test_map_in_map(void) +{ + struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 }; + uint32_t result_key = 0, port_key; + int result, inline_result; + int magic_result = 0xfaceb00c; + int ret; + int i; + + port_key = rand() & 0x00FF; + populate_map(port_key, magic_result); + + in6.sin6_addr.s6_addr16[0] = 0xdead; + in6.sin6_addr.s6_addr16[1] = 0xbeef; + in6.sin6_port = port_key; + + for (i = 0; i < NR_TESTS; i++) { + printf("%s: ", test_names[i]); + + in6.sin6_addr.s6_addr16[7] = i; + ret = connect(-1, (struct sockaddr *)&in6, sizeof(in6)); + assert(ret == -1 && errno == EBADF); + + ret = bpf_map_lookup_elem(REG_RESULT_H, &result_key, &result); + assert(!ret); + + ret = bpf_map_lookup_elem(INLINE_RESULT_H, &result_key, + &inline_result); + assert(!ret); + + if (result != magic_result || inline_result != magic_result) { + printf("Error. result:%d inline_result:%d\n", + result, inline_result); + exit(1); + } + + bpf_map_delete_elem(REG_RESULT_H, &result_key); + bpf_map_delete_elem(INLINE_RESULT_H, &result_key); + + printf("Pass\n"); + } +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + + assert(!setrlimit(RLIMIT_MEMLOCK, &r)); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + test_map_in_map(); + + return 0; +} diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 0c5561d193a487f2257ccece359530b97b06a170..fa4336423da569f31fc432b9adc61f0835bb3f69 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -192,6 +192,7 @@ int main(int argc, char **argv) setrlimit(RLIMIT_MEMLOCK, &r); signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index ded9804c503418c9ed46b1ffa677455720e33a7b..7321a3f253c991f88e96f2a53e6ffa3e7c5f1719 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" @@ -112,6 +113,7 @@ static void int_exit(int sig) int main(int ac, char **argv) { + struct rlimit r = {1024*1024, RLIM_INFINITY}; char filename[256]; long key, next_key, value; FILE *f; @@ -119,7 +121,13 @@ int main(int ac, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); /* start 'ping' in the background to have some kfree_skb events */ f = popen("ping -c5 localhost", "r"); diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 8f7d199d59459cf16520b62a0ef12dcb7e6f6c04..fe372239d5054c44a7dcb70f05530aa19eb3cfc9 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" @@ -112,11 +113,17 @@ static void print_hist(int fd) int main(int ac, char **argv) { + struct rlimit r = {1024*1024, RLIM_INFINITY}; char filename[256]; int i; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index 03449f773cb1f4da6b1e42c272cc1c83da36074f..22c644f1f4c378db2877e94977dcfdd5df87118a 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -12,6 +12,8 @@ #include #include #include +#include + #include "libbpf.h" #include "bpf_load.h" @@ -50,11 +52,17 @@ static void print_old_objects(int fd) int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; int i; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index d2be65d1fd86ed199909bc6c68327f7d31e6b35a..2431c0321b712ce54d15414836b148d483a8a632 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -5,6 +5,7 @@ * License as published by the Free Software Foundation. */ #include +#include #include #include #include @@ -12,16 +13,18 @@ #include #include #include +#include #include "bpf_load.h" #include "bpf_util.h" #include "libbpf.h" static int ifindex; +static __u32 xdp_flags; static void int_exit(int sig) { - set_link_xdp_fd(ifindex, -1); + set_link_xdp_fd(ifindex, -1, xdp_flags); exit(0); } @@ -54,18 +57,43 @@ static void poll_stats(int interval) } } -int main(int ac, char **argv) +static void usage(const char *prog) { - char filename[256]; + fprintf(stderr, + "usage: %s [OPTS] IFINDEX\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n", + prog); +} - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); +int main(int argc, char **argv) +{ + const char *optstr = "SN"; + char filename[256]; + int opt; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } - if (ac != 2) { - printf("usage: %s IFINDEX\n", argv[0]); + if (optind == argc) { + usage(basename(argv[0])); return 1; } + ifindex = strtoul(argv[optind], NULL, 0); - ifindex = strtoul(argv[1], NULL, 0); + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); @@ -78,8 +106,9 @@ int main(int ac, char **argv) } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); - if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 70e192fc61aa4b9a317a1e8b11df0d36034cc877..715cd12eaca5c0729d11e0918e564db9e2fa980f 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -5,6 +5,7 @@ * License as published by the Free Software Foundation. */ #include +#include #include #include #include @@ -24,11 +25,12 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; +static __u32 xdp_flags = 0; static void int_exit(int sig) { if (ifindex > -1) - set_link_xdp_fd(ifindex, -1); + set_link_xdp_fd(ifindex, -1, xdp_flags); exit(0); } @@ -77,6 +79,8 @@ static void usage(const char *cmd) printf(" -m Used in sending the IP Tunneled pkt\n"); printf(" -T Default: 0 (forever)\n"); printf(" -P Default is TCP\n"); + printf(" -S use skb-mode\n"); + printf(" -N enforce native mode\n"); printf(" -h Display this help\n"); } @@ -136,7 +140,7 @@ int main(int argc, char **argv) { unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:h"; + const char *optstr = "i:a:p:s:d:m:T:P:SNh"; int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -201,6 +205,12 @@ int main(int argc, char **argv) case 'T': kill_after_s = atoi(optarg); break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; default: usage(argv[0]); return 1; @@ -234,6 +244,7 @@ int main(int argc, char **argv) } signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); while (min_port <= max_port) { vip.dport = htons(min_port++); @@ -243,14 +254,14 @@ int main(int argc, char **argv) } } - if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } poll_stats(kill_after_s); - set_link_xdp_fd(ifindex, -1); + set_link_xdp_fd(ifindex, -1, xdp_flags); return 0; } diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index e34f871e69b1ae60ebd8718dd1f0063a4db00e52..84795223f15f7e9611293f2812f68990c0cf29d0 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -17,6 +17,8 @@ * along with this program; if not, see . */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -69,6 +71,21 @@ static int livepatch_init(void) { int ret; + if (!klp_have_reliable_stack() && !patch.immediate) { + /* + * WARNING: Be very careful when using 'patch.immediate' in + * your patches. It's ok to use it for simple patches like + * this, but for more complex patches which change function + * semantics, locking semantics, or data structures, it may not + * be safe. Use of this option will also prevent removal of + * the patch. + * + * See Documentation/livepatch/livepatch.txt for more details. + */ + patch.immediate = true; + pr_notice("The consistency model isn't supported for your architecture. Bypassing safety mechanisms and applying the patch immediately.\n"); + } + ret = klp_register_patch(&patch); if (ret) return ret; @@ -82,7 +99,6 @@ static int livepatch_init(void) static void livepatch_exit(void) { - WARN_ON(klp_disable_patch(&patch)); WARN_ON(klp_unregister_patch(&patch)); } diff --git a/samples/mei/TODO b/samples/mei/TODO deleted file mode 100644 index 6b3625d3058cc16fbf341528f65a4e209a79100b..0000000000000000000000000000000000000000 --- a/samples/mei/TODO +++ /dev/null @@ -1,2 +0,0 @@ -TODO: - - Cleanup and split the timer function diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index afe3fd3af1e40616857b3e6c425be632c1fa2667..61f87a99bf0a1c512e572d3cbdcf4b4b5d7ae785 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -116,12 +116,12 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) cc-option = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) # cc-option-yn # Usage: flag := $(call cc-option-yn,-march=winchip-c6) cc-option-yn = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) # cc-option-align # Prefix align with either -falign or -malign @@ -131,7 +131,7 @@ cc-option-align = $(subst -functions=0,,\ # cc-disable-warning # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) cc-disable-warning = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) # cc-name # Expands to either gcc or clang diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d883116ebaa452d9c2f6c657de53121ebd9d50bd..733e044fff8b37bae101788ac82be43fafa1a942 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -177,6 +177,14 @@ cmd_cc_symtypes_c = \ $(obj)/%.symtypes : $(src)/%.c FORCE $(call cmd,cc_symtypes_c) +# LLVM assembly +# Generate .ll files from .c +quiet_cmd_cc_ll_c = CC $(quiet_modtag) $@ + cmd_cc_ll_c = $(CC) $(c_flags) -emit-llvm -S -o $@ $< + +$(obj)/%.ll: $(src)/%.c FORCE + $(call if_changed_dep,cc_ll_c) + # C (.c) files # The C file is compiled and updated dependency information is generated. # (See cmd_cc_o_c + relevant part of rule_cc_o_c) @@ -272,14 +280,14 @@ define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ $(cmd_modversions_c) \ - $(cmd_objtool) \ + $(call echo-cmd,objtool) $(cmd_objtool) \ $(call echo-cmd,record_mcount) $(cmd_record_mcount) endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ $(cmd_modversions_S) \ - $(cmd_objtool) + $(call echo-cmd,objtool) $(cmd_objtool) endef # List module undefined symbols (or empty line if not enabled) diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index a1be75d0a5fd3fbf4742e555046896ea6fa6fe65..34614a48b717eafa2c15f418e6e6769905e32ec1 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -20,12 +20,6 @@ include include/config/auto.conf include scripts/Kbuild.include include $(src)/Makefile -PHONY += __dtbs_install_prep -__dtbs_install_prep: -ifeq ("$(dtbinst-root)", "$(obj)") - $(Q)mkdir -p $(INSTALL_DTBS_PATH) -endif - dtbinst-files := $(dtb-y) dtbinst-dirs := $(dts-dirs) @@ -35,8 +29,6 @@ quiet_cmd_dtb_install = INSTALL $< install-dir = $(patsubst $(dtbinst-root)%,$(INSTALL_DTBS_PATH)%,$(obj)) -$(dtbinst-files) $(dtbinst-dirs): | __dtbs_install_prep - $(dtbinst-files): %.dtb: $(obj)/%.dtb $(call cmd,dtb_install,$(install-dir)) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 7c321a603b079d355bd127aebb04adc0294b4ba5..fb3522fd87029f8e841469d2bb300ef6eb7fe1cb 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -64,7 +64,6 @@ ifeq ($(cc-name),clang) KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides) KBUILD_CFLAGS += $(call cc-disable-warning, unused-value) KBUILD_CFLAGS += $(call cc-disable-warning, format) -KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option) KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare) KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length) KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized) diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index 1106d6ca3a384baa81b077b599d83b8e9941108e..ce753a408c56823dbd1c4b5d4fb5cfe88e7054c4 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -1,33 +1,56 @@ # ========================================================================== # Installing headers # -# header-y - list files to be installed. They are preprocessed -# to remove __KERNEL__ section of the file -# genhdr-y - Same as header-y but in a generated/ directory +# All headers under include/uapi, include/generated/uapi, +# arch//include/uapi and arch//include/generated/uapi are +# exported. +# They are preprocessed to remove __KERNEL__ section of the file. # # ========================================================================== +PHONY := __headers +__headers: + +include scripts/Kbuild.include + +srcdir := $(srctree)/$(obj) +subdirs := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.)) +# caller may set destination dir (when installing to asm/) +_dst := $(if $(dst),$(dst),$(obj)) + +# Recursion +__headers: $(subdirs) + +.PHONY: $(subdirs) +$(subdirs): + $(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@ + +# Skip header install/check for include/uapi and arch/$(hdr-arch)/include/uapi. +# We have only sub-directories there. +skip-inst := $(if $(filter %/uapi,$(obj)),1) + +ifeq ($(skip-inst),) + # generated header directory gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj))) +# Kbuild file is optional kbuild-file := $(srctree)/$(obj)/Kbuild -include $(kbuild-file) - -# called may set destination dir (when installing to asm/) -_dst := $(if $(destination-y),$(destination-y),$(if $(dst),$(dst),$(obj))) +-include $(kbuild-file) old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild ifneq ($(wildcard $(old-kbuild-file)),) include $(old-kbuild-file) endif -include scripts/Kbuild.include - installdir := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst)) -header-y := $(sort $(header-y)) -subdirs := $(patsubst %/,%,$(filter %/, $(header-y))) -header-y := $(filter-out %/, $(header-y)) +gendir := $(objtree)/$(gen) +header-files := $(notdir $(wildcard $(srcdir)/*.h)) +header-files += $(notdir $(wildcard $(srcdir)/*.agh)) +header-files := $(filter-out $(no-export-headers), $(header-files)) +genhdr-files := $(notdir $(wildcard $(gendir)/*.h)) +genhdr-files := $(filter-out $(header-files), $(genhdr-files)) # files used to track state of install/check install-file := $(installdir)/.install @@ -35,36 +58,20 @@ check-file := $(installdir)/.check # generic-y list all files an architecture uses from asm-generic # Use this to build a list of headers which require a wrapper -wrapper-files := $(filter $(header-y), $(generic-y)) - -srcdir := $(srctree)/$(obj) -gendir := $(objtree)/$(gen) - -oldsrcdir := $(srctree)/$(subst /uapi,,$(obj)) +generic-files := $(notdir $(wildcard $(srctree)/include/uapi/asm-generic/*.h)) +wrapper-files := $(filter $(generic-files), $(generic-y)) +wrapper-files := $(filter-out $(header-files), $(wrapper-files)) # all headers files for this dir -header-y := $(filter-out $(generic-y), $(header-y)) -all-files := $(header-y) $(genhdr-y) $(wrapper-files) +all-files := $(header-files) $(genhdr-files) $(wrapper-files) output-files := $(addprefix $(installdir)/, $(all-files)) -input-files1 := $(foreach hdr, $(header-y), \ - $(if $(wildcard $(srcdir)/$(hdr)), \ - $(wildcard $(srcdir)/$(hdr))) \ - ) -input-files1-name := $(notdir $(input-files1)) -input-files2 := $(foreach hdr, $(header-y), \ - $(if $(wildcard $(srcdir)/$(hdr)),, \ - $(if $(wildcard $(oldsrcdir)/$(hdr)), \ - $(wildcard $(oldsrcdir)/$(hdr)), \ - $(error Missing UAPI file $(srcdir)/$(hdr))) \ - )) -input-files2-name := $(notdir $(input-files2)) -input-files3 := $(foreach hdr, $(genhdr-y), \ - $(if $(wildcard $(gendir)/$(hdr)), \ - $(wildcard $(gendir)/$(hdr)), \ - $(error Missing generated UAPI file $(gendir)/$(hdr)) \ - )) -input-files3-name := $(notdir $(input-files3)) +ifneq ($(mandatory-y),) +missing := $(filter-out $(all-files),$(mandatory-y)) +ifneq ($(missing),) +$(error Some mandatory headers ($(missing)) are missing in $(obj)) +endif +endif # Work out what needs to be removed oldheaders := $(patsubst $(installdir)/%,%,$(wildcard $(installdir)/*.h)) @@ -78,9 +85,8 @@ printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@)) quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\ file$(if $(word 2, $(all-files)),s)) cmd_install = \ - $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(input-files1-name); \ - $(CONFIG_SHELL) $< $(installdir) $(oldsrcdir) $(input-files2-name); \ - $(CONFIG_SHELL) $< $(installdir) $(gendir) $(input-files3-name); \ + $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-files); \ + $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-files); \ for F in $(wrapper-files); do \ echo "\#include " > $(installdir)/$$F; \ done; \ @@ -98,21 +104,21 @@ quiet_cmd_check = CHECK $(printdir) ($(words $(all-files)) files) $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \ touch $@ -PHONY += __headersinst __headerscheck - ifndef HDRCHECK # Rules for installing headers -__headersinst: $(subdirs) $(install-file) +__headers: $(install-file) @: targets += $(install-file) -$(install-file): scripts/headers_install.sh $(input-files1) $(input-files2) $(input-files3) FORCE +$(install-file): scripts/headers_install.sh \ + $(addprefix $(srcdir)/,$(header-files)) \ + $(addprefix $(gendir)/,$(genhdr-files)) FORCE $(if $(unwanted),$(call cmd,remove),) $(if $(wildcard $(dir $@)),,$(shell mkdir -p $(dir $@))) $(call if_changed,install) else -__headerscheck: $(subdirs) $(check-file) +__headers: $(check-file) @: targets += $(check-file) @@ -121,11 +127,6 @@ $(check-file): scripts/headers_check.pl $(output-files) FORCE endif -# Recursion -.PHONY: $(subdirs) -$(subdirs): - $(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@ - targets := $(wildcard $(sort $(targets))) cmd_files := $(wildcard \ $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) @@ -134,6 +135,8 @@ ifneq ($(cmd_files),) include $(cmd_files) endif +endif # skip-inst + .PHONY: $(PHONY) PHONY += FORCE FORCE: ; diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 7234e61e7ce370a775ec6981b391b6d102a01770..58c05e5d9870b6c18a72da7dc44ff3112994946d 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -175,7 +175,7 @@ ld_flags = $(LDFLAGS) $(ldflags-y) dtc_cpp_flags = -Wp,-MD,$(depfile).pre.tmp -nostdinc \ -I$(srctree)/arch/$(SRCARCH)/boot/dts \ - -I$(srctree)/arch/$(SRCARCH)/boot/dts/include \ + -I$(srctree)/scripts/dtc/include-prefixes \ -I$(srctree)/drivers/of/testcase-data \ -undef -D__DTS__ @@ -280,9 +280,21 @@ DTC ?= $(objtree)/scripts/dtc/dtc # Disable noisy checks by default ifeq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),) -DTC_FLAGS += -Wno-unit_address_vs_reg +DTC_FLAGS += -Wno-unit_address_vs_reg \ + -Wno-simple_bus_reg \ + -Wno-unit_address_format \ + -Wno-pci_bridge \ + -Wno-pci_device_bus_num \ + -Wno-pci_device_reg endif +ifeq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),2) +DTC_FLAGS += -Wnode_name_chars_strict \ + -Wproperty_name_chars_strict +endif + +DTC_FLAGS += $(DTC_FLAGS_$(basetarget)) + # Generate an assembly file to wrap the output of the device tree compiler quiet_cmd_dt_S_dtb= DTB $@ cmd_dt_S_dtb= \ @@ -408,3 +420,34 @@ quiet_cmd_xzmisc = XZMISC $@ cmd_xzmisc = (cat $(filter-out FORCE,$^) | \ xz --check=crc32 --lzma2=dict=1MiB) > $@ || \ (rm -f $@ ; false) + +# ASM offsets +# --------------------------------------------------------------------------- + +# Default sed regexp - multiline due to syntax constraints +# +# Use [:space:] because LLVM's integrated assembler inserts around +# the .ascii directive whereas GCC keeps the as-is. +define sed-offsets + 's:^[[:space:]]*\.ascii[[:space:]]*"\(.*\)".*:\1:; \ + /^->/{s:->#\(.*\):/* \1 */:; \ + s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ + s:->::; p;}' +endef + +# Use filechk to avoid rebuilds when a header changes, but the resulting file +# does not +define filechk_offsets + (set -e; \ + echo "#ifndef $2"; \ + echo "#define $2"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-offsets); \ + echo ""; \ + echo "#endif" ) +endef diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index baa3c7be04ad8897761f92abd2b5d428d24f8471..4b9569fa931b90200a2c809598fe3765b157fbb7 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -55,6 +55,7 @@ my $spelling_file = "$D/spelling.txt"; my $codespell = 0; my $codespellfile = "/usr/share/codespell/dictionary.txt"; my $conststructsfile = "$D/const_structs.checkpatch"; +my $typedefsfile = ""; my $color = 1; my $allow_c99_comments = 1; @@ -113,6 +114,7 @@ Options: --codespell Use the codespell dictionary for spelling/typos (default:/usr/share/codespell/dictionary.txt) --codespellfile Use this codespell dictionary + --typedefsfile Read additional types from this file --color Use colors when output is STDOUT (default: on) -h, --help, --version display this help and exit @@ -208,6 +210,7 @@ GetOptions( 'test-only=s' => \$tst_only, 'codespell!' => \$codespell, 'codespellfile=s' => \$codespellfile, + 'typedefsfile=s' => \$typedefsfile, 'color!' => \$color, 'h|help' => \$help, 'version' => \$help @@ -629,29 +632,44 @@ if ($codespell) { $misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix; -my $const_structs = ""; -if (open(my $conststructs, '<', $conststructsfile)) { - while (<$conststructs>) { - my $line = $_; +sub read_words { + my ($wordsRef, $file) = @_; - $line =~ s/\s*\n?$//g; - $line =~ s/^\s*//g; + if (open(my $words, '<', $file)) { + while (<$words>) { + my $line = $_; - next if ($line =~ m/^\s*#/); - next if ($line =~ m/^\s*$/); - if ($line =~ /\s/) { - print("$conststructsfile: '$line' invalid - ignored\n"); - next; - } + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; - $const_structs .= '|' if ($const_structs ne ""); - $const_structs .= $line; + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + if ($line =~ /\s/) { + print("$file: '$line' invalid - ignored\n"); + next; + } + + $$wordsRef .= '|' if ($$wordsRef ne ""); + $$wordsRef .= $line; + } + close($file); + return 1; } - close($conststructsfile); -} else { - warn "No structs that should be const will be found - file '$conststructsfile': $!\n"; + + return 0; } +my $const_structs = ""; +read_words(\$const_structs, $conststructsfile) + or warn "No structs that should be const will be found - file '$conststructsfile': $!\n"; + +my $typeOtherTypedefs = ""; +if (length($typedefsfile)) { + read_words(\$typeOtherTypedefs, $typedefsfile) + or warn "No additional types will be considered - file '$typedefsfile': $!\n"; +} +$typeTypedefs .= '|' . $typeOtherTypedefs if ($typeOtherTypedefs ne ""); + sub build_types { my $mods = "(?x: \n" . join("|\n ", (@modifierList, @modifierListFile)) . "\n)"; my $all = "(?x: \n" . join("|\n ", (@typeList, @typeListFile)) . "\n)"; @@ -2195,8 +2213,7 @@ sub process { } #next; } - if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { - my $context = $4; + if ($rawline =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { $realline=$1-1; if (defined $2) { $realcnt=$3+1; @@ -2205,12 +2222,6 @@ sub process { } $in_comment = 0; - if ($context =~ /\b(\w+)\s*\(/) { - $context_function = $1; - } else { - undef $context_function; - } - # Guestimate if this is a continuing comment. Run # the context looking for a comment "edge". If this # edge is a close comment then we must be in a comment @@ -2281,7 +2292,8 @@ sub process { #extract the line range in the file after the patch is applied if (!$in_commit_log && - $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { + $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@(.*)/) { + my $context = $4; $is_patch = 1; $first_line = $linenr + 1; $realline=$1-1; @@ -2297,6 +2309,11 @@ sub process { %suppress_whiletrailers = (); %suppress_export = (); $suppress_statement = 0; + if ($context =~ /\b(\w+)\s*\(/) { + $context_function = $1; + } else { + undef $context_function; + } next; # track the line number as we move through the hunk, note that @@ -2539,6 +2556,7 @@ sub process { # Check for git id commit length and improperly formed commit descriptions if ($in_commit_log && !$commit_log_possible_stack_dump && $line !~ /^\s*(?:Link|Patchwork|http|https|BugLink):/i && + $line !~ /^This reverts commit [0-9a-f]{7,40}/ && ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i || ($line =~ /(?:\s|^)[0-9a-f]{12,40}(?:[\s"'\(\[]|$)/i && $line !~ /[\<\[][0-9a-f]{12,40}[\>\]]/i && @@ -2628,8 +2646,8 @@ sub process { # Check if it's the start of a commit log # (not a header line and we haven't seen the patch filename) if ($in_header_lines && $realfile =~ /^$/ && - !($rawline =~ /^\s+\S/ || - $rawline =~ /^(commit\b|from\b|[\w-]+:).*$/i)) { + !($rawline =~ /^\s+(?:\S|$)/ || + $rawline =~ /^(?:commit\b|from\b|[\w-]+:)/i)) { $in_header_lines = 0; $in_commit_log = 1; $has_commit_log = 1; @@ -2757,13 +2775,6 @@ sub process { #print "is_start<$is_start> is_end<$is_end> length<$length>\n"; } -# discourage the addition of CONFIG_EXPERIMENTAL in Kconfig. - if ($realfile =~ /Kconfig/ && - $line =~ /.\s*depends on\s+.*\bEXPERIMENTAL\b/) { - WARN("CONFIG_EXPERIMENTAL", - "Use of CONFIG_EXPERIMENTAL is deprecated. For alternatives, see https://lkml.org/lkml/2012/10/23/580\n"); - } - # discourage the use of boolean for type definition attributes of Kconfig options if ($realfile =~ /Kconfig/ && $line =~ /^\+\s*\bboolean\b/) { @@ -3133,6 +3144,17 @@ sub process { # check we are in a valid C source file if not then ignore this hunk next if ($realfile !~ /\.(h|c)$/); +# check if this appears to be the start function declaration, save the name + if ($sline =~ /^\+\{\s*$/ && + $prevline =~ /^\+(?:(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*)?($Ident)\(/) { + $context_function = $1; + } + +# check if this appears to be the end of function declaration + if ($sline =~ /^\+\}\s*$/) { + undef $context_function; + } + # check indentation of any line with a bare else # (but not if it is a multiple line "if (foo) return bar; else return baz;") # if the previous line is a break or return and is indented 1 tab more... @@ -3157,12 +3179,6 @@ sub process { } } -# discourage the addition of CONFIG_EXPERIMENTAL in #if(def). - if ($line =~ /^\+\s*\#\s*if.*\bCONFIG_EXPERIMENTAL\b/) { - WARN("CONFIG_EXPERIMENTAL", - "Use of CONFIG_EXPERIMENTAL is deprecated. For alternatives, see https://lkml.org/lkml/2012/10/23/580\n"); - } - # check for RCS/CVS revision markers if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) { WARN("CVS_KEYWORD", @@ -3338,7 +3354,7 @@ sub process { } # Check relative indent for conditionals and blocks. - if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { + if ($line =~ /\b(?:(?:if|while|for|(?:[a-z_]+|)for_each[a-z_]+)\s*\(|(?:do|else)\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) { ($stat, $cond, $line_nr_next, $remain_next, $off_next) = ctx_statement_block($linenr, $realcnt, 0) if (!defined $stat); @@ -3430,6 +3446,8 @@ sub process { if ($check && $s ne '' && (($sindent % 8) != 0 || ($sindent < $indent) || + ($sindent == $indent && + ($s !~ /^\s*(?:\}|\{|else\b)/)) || ($sindent > $indent + 8))) { WARN("SUSPECT_CODE_INDENT", "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); @@ -4851,8 +4869,10 @@ sub process { $dstat !~ /^\(\{/ && # ({... $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/) { - - if ($dstat =~ /;/) { + if ($dstat =~ /^\s*if\b/) { + ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", + "Macros starting with if should be enclosed by a do - while loop to avoid possible if/else logic defects\n" . "$herectx"); + } elsif ($dstat =~ /;/) { ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE", "Macros with multiple statements should be enclosed in a do - while loop\n" . "$herectx"); } else { @@ -5174,14 +5194,16 @@ sub process { "break quoted strings at a space character\n" . $hereprev); } -#check for an embedded function name in a string when the function is known -# as part of a diff. This does not work for -f --file checking as it -#depends on patch context providing the function name +# check for an embedded function name in a string when the function is known +# This does not work very well for -f --file checking as it depends on patch +# context providing the function name or a single line form for in-file +# function declarations if ($line =~ /^\+.*$String/ && defined($context_function) && - get_quoted_string($line, $rawline) =~ /\b$context_function\b/) { + get_quoted_string($line, $rawline) =~ /\b$context_function\b/ && + length(get_quoted_string($line, $rawline)) != (length($context_function) + 2)) { WARN("EMBEDDED_FUNCTION_NAME", - "Prefer using \"%s\", __func__ to embedded function names\n" . $herecurr); + "Prefer using '\"%s...\", __func__' to using '$context_function', this function's name, in a string\n" . $herecurr); } # check for spaces before a quoted newline @@ -5676,6 +5698,32 @@ sub process { } } + # check for vsprintf extension %p misuses + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s && + $1 !~ /^_*volatile_*$/) { + my $bad_extension = ""; + my $lc = $stat =~ tr@\n@@; + $lc = $lc + $linenr; + for (my $count = $linenr; $count <= $lc; $count++) { + my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0)); + $fmt =~ s/%%//g; + if ($fmt =~ /(\%[\*\d\.]*p(?![\WFfSsBKRraEhMmIiUDdgVCbGN]).)/) { + $bad_extension = $1; + last; + } + } + if ($bad_extension ne "") { + my $stat_real = raw_line($linenr, 0); + for (my $count = $linenr + 1; $count <= $lc; $count++) { + $stat_real = $stat_real . "\n" . raw_line($count, 0); + } + WARN("VSPRINTF_POINTER_EXTENSION", + "Invalid vsprintf pointer extension '$bad_extension'\n" . "$here\n$stat_real\n"); + } + } + # Check for misused memsets if ($^V && $^V ge 5.10.0 && defined $stat && @@ -5893,7 +5941,8 @@ sub process { # check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc if ($^V && $^V ge 5.10.0 && - $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) { + defined $stat && + $stat =~ /^\+\s*($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) { my $oldfunc = $3; my $a1 = $4; my $a2 = $10; @@ -5907,11 +5956,17 @@ sub process { } if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ && !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) { + my $ctx = ''; + my $herectx = $here . "\n"; + my $cnt = statement_rawlines($stat); + for (my $n = 0; $n < $cnt; $n++) { + $herectx .= raw_line($linenr, $n) . "\n"; + } if (WARN("ALLOC_WITH_MULTIPLY", - "Prefer $newfunc over $oldfunc with multiply\n" . $herecurr) && + "Prefer $newfunc over $oldfunc with multiply\n" . $herectx) && + $cnt == 1 && $fix) { $fixed[$fixlinenr] =~ s/\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)/$1 . ' = ' . "$newfunc(" . trim($r1) . ', ' . trim($r2)/e; - } } } @@ -6066,11 +6121,11 @@ sub process { } # check for various structs that are normally const (ops, kgdb, device_tree) +# and avoid what seem like struct definitions 'struct foo {' if ($line !~ /\bconst\b/ && - $line =~ /\bstruct\s+($const_structs)\b/) { + $line =~ /\bstruct\s+($const_structs)\b(?!\s*\{)/) { WARN("CONST_STRUCT", - "struct $1 should normally be const\n" . - $herecurr); + "struct $1 should normally be const\n" . $herecurr); } # use of NR_CPUS is usually wrong diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 3033be701e9a24c5c8c090ce6f453df070e8cb23..9d37aa4faf5ce8ce2719c71921ee7ab0eabc22a1 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -12,7 +12,6 @@ # sh64 port by Paul Mundt # Random bits by Matt Mackall # M68k port by Geert Uytterhoeven and Andreas Schwab -# AVR32 port by Haavard Skinnemoen (Atmel) # AArch64, PARISC ports by Kyle McMartin # sparc port by Martin Habets # @@ -51,10 +50,6 @@ my (@stack, $re, $dre, $x, $xs, $funcre); } elsif ($arch eq 'arm') { #c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64 $re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o; - } elsif ($arch eq 'avr32') { - #8000008a: 20 1d sub sp,4 - #80000ca8: fa cd 05 b0 sub sp,sp,1456 - $re = qr/^.*sub.*sp.*,([0-9]{1,8})/o; } elsif ($arch =~ /^x86(_64)?$/ || $arch =~ /^i[3456]86$/) { #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp # or diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index 2c9082ba61376fd7fb5dd375ca2afac1bb44d53a..116b7735ee9f7aad88b748de929be5b9b83d2859 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -148,6 +148,7 @@ cat << EOF #define __IGNORE_sysfs #define __IGNORE_uselib #define __IGNORE__sysctl +#define __IGNORE_arch_prctl /* ... including the "new" 32-bit uid syscalls */ #define __IGNORE_lchown32 diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c index 3d18e45374c83cc764d0481c1ecf305f596fc4e7..4b72b530c84f1f4b4e98d8342d9d62fefe8d34d6 100644 --- a/scripts/dtc/checks.c +++ b/scripts/dtc/checks.c @@ -72,17 +72,16 @@ struct check { #define CHECK(_nm, _fn, _d, ...) \ CHECK_ENTRY(_nm, _fn, _d, false, false, __VA_ARGS__) -#ifdef __GNUC__ -static inline void check_msg(struct check *c, const char *fmt, ...) __attribute__((format (printf, 2, 3))); -#endif -static inline void check_msg(struct check *c, const char *fmt, ...) +static inline void PRINTF(3, 4) check_msg(struct check *c, struct dt_info *dti, + const char *fmt, ...) { va_list ap; va_start(ap, fmt); if ((c->warn && (quiet < 1)) || (c->error && (quiet < 2))) { - fprintf(stderr, "%s (%s): ", + fprintf(stderr, "%s: %s (%s): ", + strcmp(dti->outname, "-") ? dti->outname : "", (c->error) ? "ERROR" : "Warning", c->name); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); @@ -90,11 +89,11 @@ static inline void check_msg(struct check *c, const char *fmt, ...) va_end(ap); } -#define FAIL(c, ...) \ - do { \ - TRACE((c), "\t\tFAILED at %s:%d", __FILE__, __LINE__); \ - (c)->status = FAILED; \ - check_msg((c), __VA_ARGS__); \ +#define FAIL(c, dti, ...) \ + do { \ + TRACE((c), "\t\tFAILED at %s:%d", __FILE__, __LINE__); \ + (c)->status = FAILED; \ + check_msg((c), dti, __VA_ARGS__); \ } while (0) static void check_nodes_props(struct check *c, struct dt_info *dti, struct node *node) @@ -127,7 +126,7 @@ static bool run_check(struct check *c, struct dt_info *dti) error = error || run_check(prq, dti); if (prq->status != PASSED) { c->status = PREREQ; - check_msg(c, "Failed prerequisite '%s'", + check_msg(c, dti, "Failed prerequisite '%s'", c->prereq[i]->name); } } @@ -157,7 +156,7 @@ static bool run_check(struct check *c, struct dt_info *dti) static inline void check_always_fail(struct check *c, struct dt_info *dti, struct node *node) { - FAIL(c, "always_fail check"); + FAIL(c, dti, "always_fail check"); } CHECK(always_fail, check_always_fail, NULL); @@ -172,7 +171,7 @@ static void check_is_string(struct check *c, struct dt_info *dti, return; /* Not present, assumed ok */ if (!data_is_one_string(prop->val)) - FAIL(c, "\"%s\" property in %s is not a string", + FAIL(c, dti, "\"%s\" property in %s is not a string", propname, node->fullpath); } #define WARNING_IF_NOT_STRING(nm, propname) \ @@ -191,7 +190,7 @@ static void check_is_cell(struct check *c, struct dt_info *dti, return; /* Not present, assumed ok */ if (prop->val.len != sizeof(cell_t)) - FAIL(c, "\"%s\" property in %s is not a single cell", + FAIL(c, dti, "\"%s\" property in %s is not a single cell", propname, node->fullpath); } #define WARNING_IF_NOT_CELL(nm, propname) \ @@ -213,7 +212,7 @@ static void check_duplicate_node_names(struct check *c, struct dt_info *dti, child2; child2 = child2->next_sibling) if (streq(child->name, child2->name)) - FAIL(c, "Duplicate node name %s", + FAIL(c, dti, "Duplicate node name %s", child->fullpath); } ERROR(duplicate_node_names, check_duplicate_node_names, NULL); @@ -228,7 +227,7 @@ static void check_duplicate_property_names(struct check *c, struct dt_info *dti, if (prop2->deleted) continue; if (streq(prop->name, prop2->name)) - FAIL(c, "Duplicate property name %s in %s", + FAIL(c, dti, "Duplicate property name %s in %s", prop->name, node->fullpath); } } @@ -239,6 +238,7 @@ ERROR(duplicate_property_names, check_duplicate_property_names, NULL); #define UPPERCASE "ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define DIGITS "0123456789" #define PROPNODECHARS LOWERCASE UPPERCASE DIGITS ",._+*#?-" +#define PROPNODECHARSSTRICT LOWERCASE UPPERCASE DIGITS ",-" static void check_node_name_chars(struct check *c, struct dt_info *dti, struct node *node) @@ -246,16 +246,27 @@ static void check_node_name_chars(struct check *c, struct dt_info *dti, int n = strspn(node->name, c->data); if (n < strlen(node->name)) - FAIL(c, "Bad character '%c' in node %s", + FAIL(c, dti, "Bad character '%c' in node %s", node->name[n], node->fullpath); } ERROR(node_name_chars, check_node_name_chars, PROPNODECHARS "@"); +static void check_node_name_chars_strict(struct check *c, struct dt_info *dti, + struct node *node) +{ + int n = strspn(node->name, c->data); + + if (n < node->basenamelen) + FAIL(c, dti, "Character '%c' not recommended in node %s", + node->name[n], node->fullpath); +} +CHECK(node_name_chars_strict, check_node_name_chars_strict, PROPNODECHARSSTRICT); + static void check_node_name_format(struct check *c, struct dt_info *dti, struct node *node) { if (strchr(get_unitname(node), '@')) - FAIL(c, "Node %s has multiple '@' characters in name", + FAIL(c, dti, "Node %s has multiple '@' characters in name", node->fullpath); } ERROR(node_name_format, check_node_name_format, NULL, &node_name_chars); @@ -274,11 +285,11 @@ static void check_unit_address_vs_reg(struct check *c, struct dt_info *dti, if (prop) { if (!unitname[0]) - FAIL(c, "Node %s has a reg or ranges property, but no unit name", + FAIL(c, dti, "Node %s has a reg or ranges property, but no unit name", node->fullpath); } else { if (unitname[0]) - FAIL(c, "Node %s has a unit name, but no reg property", + FAIL(c, dti, "Node %s has a unit name, but no reg property", node->fullpath); } } @@ -293,12 +304,44 @@ static void check_property_name_chars(struct check *c, struct dt_info *dti, int n = strspn(prop->name, c->data); if (n < strlen(prop->name)) - FAIL(c, "Bad character '%c' in property name \"%s\", node %s", + FAIL(c, dti, "Bad character '%c' in property name \"%s\", node %s", prop->name[n], prop->name, node->fullpath); } } ERROR(property_name_chars, check_property_name_chars, PROPNODECHARS); +static void check_property_name_chars_strict(struct check *c, + struct dt_info *dti, + struct node *node) +{ + struct property *prop; + + for_each_property(node, prop) { + const char *name = prop->name; + int n = strspn(name, c->data); + + if (n == strlen(prop->name)) + continue; + + /* Certain names are whitelisted */ + if (streq(name, "device_type")) + continue; + + /* + * # is only allowed at the beginning of property names not counting + * the vendor prefix. + */ + if (name[n] == '#' && ((n == 0) || (name[n-1] == ','))) { + name += n + 1; + n = strspn(name, c->data); + } + if (n < strlen(name)) + FAIL(c, dti, "Character '%c' not recommended in property name \"%s\", node %s", + name[n], prop->name, node->fullpath); + } +} +CHECK(property_name_chars_strict, check_property_name_chars_strict, PROPNODECHARSSTRICT); + #define DESCLABEL_FMT "%s%s%s%s%s" #define DESCLABEL_ARGS(node,prop,mark) \ ((mark) ? "value of " : ""), \ @@ -327,7 +370,7 @@ static void check_duplicate_label(struct check *c, struct dt_info *dti, return; if ((othernode != node) || (otherprop != prop) || (othermark != mark)) - FAIL(c, "Duplicate label '%s' on " DESCLABEL_FMT + FAIL(c, dti, "Duplicate label '%s' on " DESCLABEL_FMT " and " DESCLABEL_FMT, label, DESCLABEL_ARGS(node, prop, mark), DESCLABEL_ARGS(othernode, otherprop, othermark)); @@ -367,7 +410,7 @@ static cell_t check_phandle_prop(struct check *c, struct dt_info *dti, return 0; if (prop->val.len != sizeof(cell_t)) { - FAIL(c, "%s has bad length (%d) %s property", + FAIL(c, dti, "%s has bad length (%d) %s property", node->fullpath, prop->val.len, prop->name); return 0; } @@ -379,7 +422,7 @@ static cell_t check_phandle_prop(struct check *c, struct dt_info *dti, /* "Set this node's phandle equal to some * other node's phandle". That's nonsensical * by construction. */ { - FAIL(c, "%s in %s is a reference to another node", + FAIL(c, dti, "%s in %s is a reference to another node", prop->name, node->fullpath); } /* But setting this node's phandle equal to its own @@ -393,7 +436,7 @@ static cell_t check_phandle_prop(struct check *c, struct dt_info *dti, phandle = propval_cell(prop); if ((phandle == 0) || (phandle == -1)) { - FAIL(c, "%s has bad value (0x%x) in %s property", + FAIL(c, dti, "%s has bad value (0x%x) in %s property", node->fullpath, phandle, prop->name); return 0; } @@ -420,7 +463,7 @@ static void check_explicit_phandles(struct check *c, struct dt_info *dti, return; if (linux_phandle && phandle && (phandle != linux_phandle)) - FAIL(c, "%s has mismatching 'phandle' and 'linux,phandle'" + FAIL(c, dti, "%s has mismatching 'phandle' and 'linux,phandle'" " properties", node->fullpath); if (linux_phandle && !phandle) @@ -428,7 +471,7 @@ static void check_explicit_phandles(struct check *c, struct dt_info *dti, other = get_node_by_phandle(root, phandle); if (other && (other != node)) { - FAIL(c, "%s has duplicated phandle 0x%x (seen before at %s)", + FAIL(c, dti, "%s has duplicated phandle 0x%x (seen before at %s)", node->fullpath, phandle, other->fullpath); return; } @@ -453,7 +496,7 @@ static void check_name_properties(struct check *c, struct dt_info *dti, if ((prop->val.len != node->basenamelen+1) || (memcmp(prop->val.val, node->name, node->basenamelen) != 0)) { - FAIL(c, "\"name\" property in %s is incorrect (\"%s\" instead" + FAIL(c, dti, "\"name\" property in %s is incorrect (\"%s\" instead" " of base node name)", node->fullpath, prop->val.val); } else { /* The name property is correct, and therefore redundant. @@ -488,16 +531,16 @@ static void fixup_phandle_references(struct check *c, struct dt_info *dti, refnode = get_node_by_ref(dt, m->ref); if (! refnode) { if (!(dti->dtsflags & DTSF_PLUGIN)) - FAIL(c, "Reference to non-existent node or " + FAIL(c, dti, "Reference to non-existent node or " "label \"%s\"\n", m->ref); else /* mark the entry as unresolved */ - *((cell_t *)(prop->val.val + m->offset)) = + *((fdt32_t *)(prop->val.val + m->offset)) = cpu_to_fdt32(0xffffffff); continue; } phandle = get_node_phandle(dt, refnode); - *((cell_t *)(prop->val.val + m->offset)) = cpu_to_fdt32(phandle); + *((fdt32_t *)(prop->val.val + m->offset)) = cpu_to_fdt32(phandle); } } } @@ -520,7 +563,7 @@ static void fixup_path_references(struct check *c, struct dt_info *dti, refnode = get_node_by_ref(dt, m->ref); if (!refnode) { - FAIL(c, "Reference to non-existent node or label \"%s\"\n", + FAIL(c, dti, "Reference to non-existent node or label \"%s\"\n", m->ref); continue; } @@ -579,19 +622,19 @@ static void check_reg_format(struct check *c, struct dt_info *dti, return; /* No "reg", that's fine */ if (!node->parent) { - FAIL(c, "Root node has a \"reg\" property"); + FAIL(c, dti, "Root node has a \"reg\" property"); return; } if (prop->val.len == 0) - FAIL(c, "\"reg\" property in %s is empty", node->fullpath); + FAIL(c, dti, "\"reg\" property in %s is empty", node->fullpath); addr_cells = node_addr_cells(node->parent); size_cells = node_size_cells(node->parent); entrylen = (addr_cells + size_cells) * sizeof(cell_t); if (!entrylen || (prop->val.len % entrylen) != 0) - FAIL(c, "\"reg\" property in %s has invalid length (%d bytes) " + FAIL(c, dti, "\"reg\" property in %s has invalid length (%d bytes) " "(#address-cells == %d, #size-cells == %d)", node->fullpath, prop->val.len, addr_cells, size_cells); } @@ -608,7 +651,7 @@ static void check_ranges_format(struct check *c, struct dt_info *dti, return; if (!node->parent) { - FAIL(c, "Root node has a \"ranges\" property"); + FAIL(c, dti, "Root node has a \"ranges\" property"); return; } @@ -620,17 +663,17 @@ static void check_ranges_format(struct check *c, struct dt_info *dti, if (prop->val.len == 0) { if (p_addr_cells != c_addr_cells) - FAIL(c, "%s has empty \"ranges\" property but its " + FAIL(c, dti, "%s has empty \"ranges\" property but its " "#address-cells (%d) differs from %s (%d)", node->fullpath, c_addr_cells, node->parent->fullpath, p_addr_cells); if (p_size_cells != c_size_cells) - FAIL(c, "%s has empty \"ranges\" property but its " + FAIL(c, dti, "%s has empty \"ranges\" property but its " "#size-cells (%d) differs from %s (%d)", node->fullpath, c_size_cells, node->parent->fullpath, p_size_cells); } else if ((prop->val.len % entrylen) != 0) { - FAIL(c, "\"ranges\" property in %s has invalid length (%d bytes) " + FAIL(c, dti, "\"ranges\" property in %s has invalid length (%d bytes) " "(parent #address-cells == %d, child #address-cells == %d, " "#size-cells == %d)", node->fullpath, prop->val.len, p_addr_cells, c_addr_cells, c_size_cells); @@ -638,6 +681,229 @@ static void check_ranges_format(struct check *c, struct dt_info *dti, } WARNING(ranges_format, check_ranges_format, NULL, &addr_size_cells); +static const struct bus_type pci_bus = { + .name = "PCI", +}; + +static void check_pci_bridge(struct check *c, struct dt_info *dti, struct node *node) +{ + struct property *prop; + cell_t *cells; + + prop = get_property(node, "device_type"); + if (!prop || !streq(prop->val.val, "pci")) + return; + + node->bus = &pci_bus; + + if (!strneq(node->name, "pci", node->basenamelen) && + !strneq(node->name, "pcie", node->basenamelen)) + FAIL(c, dti, "Node %s node name is not \"pci\" or \"pcie\"", + node->fullpath); + + prop = get_property(node, "ranges"); + if (!prop) + FAIL(c, dti, "Node %s missing ranges for PCI bridge (or not a bridge)", + node->fullpath); + + if (node_addr_cells(node) != 3) + FAIL(c, dti, "Node %s incorrect #address-cells for PCI bridge", + node->fullpath); + if (node_size_cells(node) != 2) + FAIL(c, dti, "Node %s incorrect #size-cells for PCI bridge", + node->fullpath); + + prop = get_property(node, "bus-range"); + if (!prop) { + FAIL(c, dti, "Node %s missing bus-range for PCI bridge", + node->fullpath); + return; + } + if (prop->val.len != (sizeof(cell_t) * 2)) { + FAIL(c, dti, "Node %s bus-range must be 2 cells", + node->fullpath); + return; + } + cells = (cell_t *)prop->val.val; + if (fdt32_to_cpu(cells[0]) > fdt32_to_cpu(cells[1])) + FAIL(c, dti, "Node %s bus-range 1st cell must be less than or equal to 2nd cell", + node->fullpath); + if (fdt32_to_cpu(cells[1]) > 0xff) + FAIL(c, dti, "Node %s bus-range maximum bus number must be less than 256", + node->fullpath); +} +WARNING(pci_bridge, check_pci_bridge, NULL, + &device_type_is_string, &addr_size_cells); + +static void check_pci_device_bus_num(struct check *c, struct dt_info *dti, struct node *node) +{ + struct property *prop; + unsigned int bus_num, min_bus, max_bus; + cell_t *cells; + + if (!node->parent || (node->parent->bus != &pci_bus)) + return; + + prop = get_property(node, "reg"); + if (!prop) + return; + + cells = (cell_t *)prop->val.val; + bus_num = (fdt32_to_cpu(cells[0]) & 0x00ff0000) >> 16; + + prop = get_property(node->parent, "bus-range"); + if (!prop) { + min_bus = max_bus = 0; + } else { + cells = (cell_t *)prop->val.val; + min_bus = fdt32_to_cpu(cells[0]); + max_bus = fdt32_to_cpu(cells[0]); + } + if ((bus_num < min_bus) || (bus_num > max_bus)) + FAIL(c, dti, "Node %s PCI bus number %d out of range, expected (%d - %d)", + node->fullpath, bus_num, min_bus, max_bus); +} +WARNING(pci_device_bus_num, check_pci_device_bus_num, NULL, ®_format, &pci_bridge); + +static void check_pci_device_reg(struct check *c, struct dt_info *dti, struct node *node) +{ + struct property *prop; + const char *unitname = get_unitname(node); + char unit_addr[5]; + unsigned int dev, func, reg; + cell_t *cells; + + if (!node->parent || (node->parent->bus != &pci_bus)) + return; + + prop = get_property(node, "reg"); + if (!prop) { + FAIL(c, dti, "Node %s missing PCI reg property", node->fullpath); + return; + } + + cells = (cell_t *)prop->val.val; + if (cells[1] || cells[2]) + FAIL(c, dti, "Node %s PCI reg config space address cells 2 and 3 must be 0", + node->fullpath); + + reg = fdt32_to_cpu(cells[0]); + dev = (reg & 0xf800) >> 11; + func = (reg & 0x700) >> 8; + + if (reg & 0xff000000) + FAIL(c, dti, "Node %s PCI reg address is not configuration space", + node->fullpath); + if (reg & 0x000000ff) + FAIL(c, dti, "Node %s PCI reg config space address register number must be 0", + node->fullpath); + + if (func == 0) { + snprintf(unit_addr, sizeof(unit_addr), "%x", dev); + if (streq(unitname, unit_addr)) + return; + } + + snprintf(unit_addr, sizeof(unit_addr), "%x,%x", dev, func); + if (streq(unitname, unit_addr)) + return; + + FAIL(c, dti, "Node %s PCI unit address format error, expected \"%s\"", + node->fullpath, unit_addr); +} +WARNING(pci_device_reg, check_pci_device_reg, NULL, ®_format, &pci_bridge); + +static const struct bus_type simple_bus = { + .name = "simple-bus", +}; + +static bool node_is_compatible(struct node *node, const char *compat) +{ + struct property *prop; + const char *str, *end; + + prop = get_property(node, "compatible"); + if (!prop) + return false; + + for (str = prop->val.val, end = str + prop->val.len; str < end; + str += strnlen(str, end - str) + 1) { + if (strneq(str, compat, end - str)) + return true; + } + return false; +} + +static void check_simple_bus_bridge(struct check *c, struct dt_info *dti, struct node *node) +{ + if (node_is_compatible(node, "simple-bus")) + node->bus = &simple_bus; +} +WARNING(simple_bus_bridge, check_simple_bus_bridge, NULL, &addr_size_cells); + +static void check_simple_bus_reg(struct check *c, struct dt_info *dti, struct node *node) +{ + struct property *prop; + const char *unitname = get_unitname(node); + char unit_addr[17]; + unsigned int size; + uint64_t reg = 0; + cell_t *cells = NULL; + + if (!node->parent || (node->parent->bus != &simple_bus)) + return; + + prop = get_property(node, "reg"); + if (prop) + cells = (cell_t *)prop->val.val; + else { + prop = get_property(node, "ranges"); + if (prop && prop->val.len) + /* skip of child address */ + cells = ((cell_t *)prop->val.val) + node_addr_cells(node); + } + + if (!cells) { + if (node->parent->parent && !(node->bus == &simple_bus)) + FAIL(c, dti, "Node %s missing or empty reg/ranges property", node->fullpath); + return; + } + + size = node_addr_cells(node->parent); + while (size--) + reg = (reg << 32) | fdt32_to_cpu(*(cells++)); + + snprintf(unit_addr, sizeof(unit_addr), "%zx", reg); + if (!streq(unitname, unit_addr)) + FAIL(c, dti, "Node %s simple-bus unit address format error, expected \"%s\"", + node->fullpath, unit_addr); +} +WARNING(simple_bus_reg, check_simple_bus_reg, NULL, ®_format, &simple_bus_bridge); + +static void check_unit_address_format(struct check *c, struct dt_info *dti, + struct node *node) +{ + const char *unitname = get_unitname(node); + + if (node->parent && node->parent->bus) + return; + + if (!unitname[0]) + return; + + if (!strncmp(unitname, "0x", 2)) { + FAIL(c, dti, "Node %s unit name should not have leading \"0x\"", + node->fullpath); + /* skip over 0x for next test */ + unitname += 2; + } + if (unitname[0] == '0' && isxdigit(unitname[1])) + FAIL(c, dti, "Node %s unit name should not have leading 0s", + node->fullpath); +} +WARNING(unit_address_format, check_unit_address_format, NULL, + &node_name_format, &pci_bridge, &simple_bus_bridge); + /* * Style checks */ @@ -656,11 +922,11 @@ static void check_avoid_default_addr_size(struct check *c, struct dt_info *dti, return; if (node->parent->addr_cells == -1) - FAIL(c, "Relying on default #address-cells value for %s", + FAIL(c, dti, "Relying on default #address-cells value for %s", node->fullpath); if (node->parent->size_cells == -1) - FAIL(c, "Relying on default #size-cells value for %s", + FAIL(c, dti, "Relying on default #size-cells value for %s", node->fullpath); } WARNING(avoid_default_addr_size, check_avoid_default_addr_size, NULL, @@ -684,7 +950,7 @@ static void check_obsolete_chosen_interrupt_controller(struct check *c, prop = get_property(chosen, "interrupt-controller"); if (prop) - FAIL(c, "/chosen has obsolete \"interrupt-controller\" " + FAIL(c, dti, "/chosen has obsolete \"interrupt-controller\" " "property"); } WARNING(obsolete_chosen_interrupt_controller, @@ -703,9 +969,20 @@ static struct check *check_table[] = { &address_cells_is_cell, &size_cells_is_cell, &interrupt_cells_is_cell, &device_type_is_string, &model_is_string, &status_is_string, + &property_name_chars_strict, + &node_name_chars_strict, + &addr_size_cells, ®_format, &ranges_format, &unit_address_vs_reg, + &unit_address_format, + + &pci_bridge, + &pci_device_reg, + &pci_device_bus_num, + + &simple_bus_bridge, + &simple_bus_reg, &avoid_default_addr_size, &obsolete_chosen_interrupt_controller, diff --git a/scripts/dtc/data.c b/scripts/dtc/data.c index 8cae23746882e4747ea5d13e300484452fc48eee..aa37a16c889186aff62274ec708e89dea7dbd971 100644 --- a/scripts/dtc/data.c +++ b/scripts/dtc/data.c @@ -171,9 +171,9 @@ struct data data_merge(struct data d1, struct data d2) struct data data_append_integer(struct data d, uint64_t value, int bits) { uint8_t value_8; - uint16_t value_16; - uint32_t value_32; - uint64_t value_64; + fdt16_t value_16; + fdt32_t value_32; + fdt64_t value_64; switch (bits) { case 8: @@ -197,14 +197,14 @@ struct data data_append_integer(struct data d, uint64_t value, int bits) } } -struct data data_append_re(struct data d, const struct fdt_reserve_entry *re) +struct data data_append_re(struct data d, uint64_t address, uint64_t size) { - struct fdt_reserve_entry bere; + struct fdt_reserve_entry re; - bere.address = cpu_to_fdt64(re->address); - bere.size = cpu_to_fdt64(re->size); + re.address = cpu_to_fdt64(address); + re.size = cpu_to_fdt64(size); - return data_append_data(d, &bere, sizeof(bere)); + return data_append_data(d, &re, sizeof(re)); } struct data data_append_cell(struct data d, cell_t word) diff --git a/scripts/dtc/dtc-lexer.l b/scripts/dtc/dtc-lexer.l index c600603044f398e7f07ee0ebdc8c025712fd735f..fd825ebba69cb25e160e057f53bef994970093fc 100644 --- a/scripts/dtc/dtc-lexer.l +++ b/scripts/dtc/dtc-lexer.l @@ -62,7 +62,8 @@ static int dts_version = 1; static void push_input_file(const char *filename); static bool pop_input_file(void); -static void lexical_error(const char *fmt, ...); +static void PRINTF(1, 2) lexical_error(const char *fmt, ...); + %} %% diff --git a/scripts/dtc/dtc-lexer.lex.c_shipped b/scripts/dtc/dtc-lexer.lex.c_shipped index 2c862bc86ad05d4610fb6668802b62ff23d6dcaa..64c243772398d614d7cd0a6ec9cfd0d6baeb8da3 100644 --- a/scripts/dtc/dtc-lexer.lex.c_shipped +++ b/scripts/dtc/dtc-lexer.lex.c_shipped @@ -655,8 +655,9 @@ static int dts_version = 1; static void push_input_file(const char *filename); static bool pop_input_file(void); -static void lexical_error(const char *fmt, ...); -#line 660 "dtc-lexer.lex.c" +static void PRINTF(1, 2) lexical_error(const char *fmt, ...); + +#line 661 "dtc-lexer.lex.c" #define INITIAL 0 #define BYTESTRING 1 @@ -878,9 +879,9 @@ YY_DECL } { -#line 68 "dtc-lexer.l" +#line 69 "dtc-lexer.l" -#line 884 "dtc-lexer.lex.c" +#line 885 "dtc-lexer.lex.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -937,7 +938,7 @@ do_action: /* This label is used only to access EOF actions. */ case 1: /* rule 1 can match eol */ YY_RULE_SETUP -#line 69 "dtc-lexer.l" +#line 70 "dtc-lexer.l" { char *name = strchr(yytext, '\"') + 1; yytext[yyleng-1] = '\0'; @@ -947,7 +948,7 @@ YY_RULE_SETUP case 2: /* rule 2 can match eol */ YY_RULE_SETUP -#line 75 "dtc-lexer.l" +#line 76 "dtc-lexer.l" { char *line, *fnstart, *fnend; struct data fn; @@ -981,7 +982,7 @@ case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(BYTESTRING): case YY_STATE_EOF(PROPNODENAME): case YY_STATE_EOF(V1): -#line 104 "dtc-lexer.l" +#line 105 "dtc-lexer.l" { if (!pop_input_file()) { yyterminate(); @@ -991,7 +992,7 @@ case YY_STATE_EOF(V1): case 3: /* rule 3 can match eol */ YY_RULE_SETUP -#line 110 "dtc-lexer.l" +#line 111 "dtc-lexer.l" { DPRINT("String: %s\n", yytext); yylval.data = data_copy_escape_string(yytext+1, @@ -1001,7 +1002,7 @@ YY_RULE_SETUP YY_BREAK case 4: YY_RULE_SETUP -#line 117 "dtc-lexer.l" +#line 118 "dtc-lexer.l" { DPRINT("Keyword: /dts-v1/\n"); dts_version = 1; @@ -1011,7 +1012,7 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 124 "dtc-lexer.l" +#line 125 "dtc-lexer.l" { DPRINT("Keyword: /plugin/\n"); return DT_PLUGIN; @@ -1019,7 +1020,7 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 129 "dtc-lexer.l" +#line 130 "dtc-lexer.l" { DPRINT("Keyword: /memreserve/\n"); BEGIN_DEFAULT(); @@ -1028,7 +1029,7 @@ YY_RULE_SETUP YY_BREAK case 7: YY_RULE_SETUP -#line 135 "dtc-lexer.l" +#line 136 "dtc-lexer.l" { DPRINT("Keyword: /bits/\n"); BEGIN_DEFAULT(); @@ -1037,7 +1038,7 @@ YY_RULE_SETUP YY_BREAK case 8: YY_RULE_SETUP -#line 141 "dtc-lexer.l" +#line 142 "dtc-lexer.l" { DPRINT("Keyword: /delete-property/\n"); DPRINT("\n"); @@ -1047,7 +1048,7 @@ YY_RULE_SETUP YY_BREAK case 9: YY_RULE_SETUP -#line 148 "dtc-lexer.l" +#line 149 "dtc-lexer.l" { DPRINT("Keyword: /delete-node/\n"); DPRINT("\n"); @@ -1057,7 +1058,7 @@ YY_RULE_SETUP YY_BREAK case 10: YY_RULE_SETUP -#line 155 "dtc-lexer.l" +#line 156 "dtc-lexer.l" { DPRINT("Label: %s\n", yytext); yylval.labelref = xstrdup(yytext); @@ -1067,7 +1068,7 @@ YY_RULE_SETUP YY_BREAK case 11: YY_RULE_SETUP -#line 162 "dtc-lexer.l" +#line 163 "dtc-lexer.l" { char *e; DPRINT("Integer Literal: '%s'\n", yytext); @@ -1093,7 +1094,7 @@ YY_RULE_SETUP case 12: /* rule 12 can match eol */ YY_RULE_SETUP -#line 184 "dtc-lexer.l" +#line 185 "dtc-lexer.l" { struct data d; DPRINT("Character literal: %s\n", yytext); @@ -1117,7 +1118,7 @@ YY_RULE_SETUP YY_BREAK case 13: YY_RULE_SETUP -#line 205 "dtc-lexer.l" +#line 206 "dtc-lexer.l" { /* label reference */ DPRINT("Ref: %s\n", yytext+1); yylval.labelref = xstrdup(yytext+1); @@ -1126,7 +1127,7 @@ YY_RULE_SETUP YY_BREAK case 14: YY_RULE_SETUP -#line 211 "dtc-lexer.l" +#line 212 "dtc-lexer.l" { /* new-style path reference */ yytext[yyleng-1] = '\0'; DPRINT("Ref: %s\n", yytext+2); @@ -1136,7 +1137,7 @@ YY_RULE_SETUP YY_BREAK case 15: YY_RULE_SETUP -#line 218 "dtc-lexer.l" +#line 219 "dtc-lexer.l" { yylval.byte = strtol(yytext, NULL, 16); DPRINT("Byte: %02x\n", (int)yylval.byte); @@ -1145,7 +1146,7 @@ YY_RULE_SETUP YY_BREAK case 16: YY_RULE_SETUP -#line 224 "dtc-lexer.l" +#line 225 "dtc-lexer.l" { DPRINT("/BYTESTRING\n"); BEGIN_DEFAULT(); @@ -1154,7 +1155,7 @@ YY_RULE_SETUP YY_BREAK case 17: YY_RULE_SETUP -#line 230 "dtc-lexer.l" +#line 231 "dtc-lexer.l" { DPRINT("PropNodeName: %s\n", yytext); yylval.propnodename = xstrdup((yytext[0] == '\\') ? @@ -1165,7 +1166,7 @@ YY_RULE_SETUP YY_BREAK case 18: YY_RULE_SETUP -#line 238 "dtc-lexer.l" +#line 239 "dtc-lexer.l" { DPRINT("Binary Include\n"); return DT_INCBIN; @@ -1174,64 +1175,64 @@ YY_RULE_SETUP case 19: /* rule 19 can match eol */ YY_RULE_SETUP -#line 243 "dtc-lexer.l" +#line 244 "dtc-lexer.l" /* eat whitespace */ YY_BREAK case 20: /* rule 20 can match eol */ YY_RULE_SETUP -#line 244 "dtc-lexer.l" +#line 245 "dtc-lexer.l" /* eat C-style comments */ YY_BREAK case 21: /* rule 21 can match eol */ YY_RULE_SETUP -#line 245 "dtc-lexer.l" +#line 246 "dtc-lexer.l" /* eat C++-style comments */ YY_BREAK case 22: YY_RULE_SETUP -#line 247 "dtc-lexer.l" +#line 248 "dtc-lexer.l" { return DT_LSHIFT; }; YY_BREAK case 23: YY_RULE_SETUP -#line 248 "dtc-lexer.l" +#line 249 "dtc-lexer.l" { return DT_RSHIFT; }; YY_BREAK case 24: YY_RULE_SETUP -#line 249 "dtc-lexer.l" +#line 250 "dtc-lexer.l" { return DT_LE; }; YY_BREAK case 25: YY_RULE_SETUP -#line 250 "dtc-lexer.l" +#line 251 "dtc-lexer.l" { return DT_GE; }; YY_BREAK case 26: YY_RULE_SETUP -#line 251 "dtc-lexer.l" +#line 252 "dtc-lexer.l" { return DT_EQ; }; YY_BREAK case 27: YY_RULE_SETUP -#line 252 "dtc-lexer.l" +#line 253 "dtc-lexer.l" { return DT_NE; }; YY_BREAK case 28: YY_RULE_SETUP -#line 253 "dtc-lexer.l" +#line 254 "dtc-lexer.l" { return DT_AND; }; YY_BREAK case 29: YY_RULE_SETUP -#line 254 "dtc-lexer.l" +#line 255 "dtc-lexer.l" { return DT_OR; }; YY_BREAK case 30: YY_RULE_SETUP -#line 256 "dtc-lexer.l" +#line 257 "dtc-lexer.l" { DPRINT("Char: %c (\\x%02x)\n", yytext[0], (unsigned)yytext[0]); @@ -1249,10 +1250,10 @@ YY_RULE_SETUP YY_BREAK case 31: YY_RULE_SETUP -#line 271 "dtc-lexer.l" +#line 272 "dtc-lexer.l" ECHO; YY_BREAK -#line 1256 "dtc-lexer.lex.c" +#line 1257 "dtc-lexer.lex.c" case YY_END_OF_BUFFER: { @@ -2218,7 +2219,7 @@ void yyfree (void * ptr ) #define YYTABLES_NAME "yytables" -#line 271 "dtc-lexer.l" +#line 272 "dtc-lexer.l" diff --git a/scripts/dtc/dtc-parser.tab.c_shipped b/scripts/dtc/dtc-parser.tab.c_shipped index 2965227a1b4a576de158a6f1f2629630e4b3dee2..0a7a5ed86f042f52a8811c11f14125319df8a712 100644 --- a/scripts/dtc/dtc-parser.tab.c_shipped +++ b/scripts/dtc/dtc-parser.tab.c_shipped @@ -1557,10 +1557,10 @@ yyreduce: { struct node *target = get_node_by_ref((yyvsp[-3].node), (yyvsp[-1].labelref)); - add_label(&target->labels, (yyvsp[-2].labelref)); - if (target) + if (target) { + add_label(&target->labels, (yyvsp[-2].labelref)); merge_nodes(target, (yyvsp[0].node)); - else + } else ERROR(&(yylsp[-1]), "Label or path %s not found", (yyvsp[-1].labelref)); (yyval.node) = (yyvsp[-3].node); } diff --git a/scripts/dtc/dtc-parser.y b/scripts/dtc/dtc-parser.y index b2fd4d155839c24a80b0222b33a560b42c2ae161..ca3f5003427cb5e280eb6ec4343547f7f37bd428 100644 --- a/scripts/dtc/dtc-parser.y +++ b/scripts/dtc/dtc-parser.y @@ -171,10 +171,10 @@ devicetree: { struct node *target = get_node_by_ref($1, $3); - add_label(&target->labels, $2); - if (target) + if (target) { + add_label(&target->labels, $2); merge_nodes(target, $4); - else + } else ERROR(&@3, "Label or path %s not found", $3); $$ = $1; } diff --git a/scripts/dtc/dtc.c b/scripts/dtc/dtc.c index a4edf4c7aebfb368319041d2bf960a66490780e9..f5eed9d72c0241bbe36c90aebe7dd7f60cc430fb 100644 --- a/scripts/dtc/dtc.c +++ b/scripts/dtc/dtc.c @@ -138,7 +138,7 @@ static const char *guess_type_by_name(const char *fname, const char *fallback) static const char *guess_input_format(const char *fname, const char *fallback) { struct stat statbuf; - uint32_t magic; + fdt32_t magic; FILE *f; if (stat(fname, &statbuf) != 0) @@ -159,8 +159,7 @@ static const char *guess_input_format(const char *fname, const char *fallback) } fclose(f); - magic = fdt32_to_cpu(magic); - if (magic == FDT_MAGIC) + if (fdt32_to_cpu(magic) == FDT_MAGIC) return "dtb"; return guess_type_by_name(fname, fallback); @@ -216,7 +215,7 @@ int main(int argc, char *argv[]) alignsize = strtol(optarg, NULL, 0); if (!is_power_of_2(alignsize)) die("Invalid argument \"%d\" to -a option\n", - optarg); + alignsize); break; case 'f': force = true; @@ -309,6 +308,8 @@ int main(int argc, char *argv[]) else die("Unknown input format \"%s\"\n", inform); + dti->outname = outname; + if (depfile) { fputc('\n', depfile); fclose(depfile); diff --git a/scripts/dtc/dtc.h b/scripts/dtc/dtc.h index c6f125c68ba8d100e7f1250f18462d5affd67e1d..fc24e17510fde472ebf10d88c7fb008b4eb1aab6 100644 --- a/scripts/dtc/dtc.h +++ b/scripts/dtc/dtc.h @@ -43,7 +43,6 @@ #define debug(...) #endif - #define DEFAULT_FDT_VERSION 17 /* @@ -114,7 +113,7 @@ struct data data_insert_at_marker(struct data d, struct marker *m, struct data data_merge(struct data d1, struct data d2); struct data data_append_cell(struct data d, cell_t word); struct data data_append_integer(struct data d, uint64_t word, int bits); -struct data data_append_re(struct data d, const struct fdt_reserve_entry *re); +struct data data_append_re(struct data d, uint64_t address, uint64_t size); struct data data_append_addr(struct data d, uint64_t addr); struct data data_append_byte(struct data d, uint8_t byte); struct data data_append_zeroes(struct data d, int len); @@ -136,6 +135,10 @@ struct label { struct label *next; }; +struct bus_type { + const char *name; +}; + struct property { bool deleted; char *name; @@ -162,6 +165,7 @@ struct node { int addr_cells, size_cells; struct label *labels; + const struct bus_type *bus; }; #define for_each_label_withdel(l0, l) \ @@ -227,7 +231,7 @@ uint32_t guess_boot_cpuid(struct node *tree); /* Boot info (tree plus memreserve information */ struct reserve_info { - struct fdt_reserve_entry re; + uint64_t address, size; struct reserve_info *next; @@ -246,6 +250,7 @@ struct dt_info { struct reserve_info *reservelist; uint32_t boot_cpuid_phys; struct node *dt; /* the device tree */ + const char *outname; /* filename being written to, "-" for stdout */ }; /* DTS version flags definitions */ diff --git a/scripts/dtc/flattree.c b/scripts/dtc/flattree.c index ebac548b3fa81a1658b0d60dcb369c2c50f1cab0..fcf71541d8a7e543766f45077ebfc1184c7d6cab 100644 --- a/scripts/dtc/flattree.c +++ b/scripts/dtc/flattree.c @@ -49,7 +49,7 @@ static struct version_info { struct emitter { void (*cell)(void *, cell_t); - void (*string)(void *, char *, int); + void (*string)(void *, const char *, int); void (*align)(void *, int); void (*data)(void *, struct data); void (*beginnode)(void *, struct label *labels); @@ -64,7 +64,7 @@ static void bin_emit_cell(void *e, cell_t val) *dtbuf = data_append_cell(*dtbuf, val); } -static void bin_emit_string(void *e, char *str, int len) +static void bin_emit_string(void *e, const char *str, int len) { struct data *dtbuf = e; @@ -144,22 +144,14 @@ static void asm_emit_cell(void *e, cell_t val) (val >> 8) & 0xff, val & 0xff); } -static void asm_emit_string(void *e, char *str, int len) +static void asm_emit_string(void *e, const char *str, int len) { FILE *f = e; - char c = 0; - if (len != 0) { - /* XXX: ewww */ - c = str[len]; - str[len] = '\0'; - } - - fprintf(f, "\t.string\t\"%s\"\n", str); - - if (len != 0) { - str[len] = c; - } + if (len != 0) + fprintf(f, "\t.string\t\"%.*s\"\n", len, str); + else + fprintf(f, "\t.string\t\"%s\"\n", str); } static void asm_emit_align(void *e, int a) @@ -179,7 +171,7 @@ static void asm_emit_data(void *e, struct data d) emit_offset_label(f, m->ref, m->offset); while ((d.len - off) >= sizeof(uint32_t)) { - asm_emit_cell(e, fdt32_to_cpu(*((uint32_t *)(d.val+off)))); + asm_emit_cell(e, fdt32_to_cpu(*((fdt32_t *)(d.val+off)))); off += sizeof(uint32_t); } @@ -318,17 +310,16 @@ static struct data flatten_reserve_list(struct reserve_info *reservelist, { struct reserve_info *re; struct data d = empty_data; - static struct fdt_reserve_entry null_re = {0,0}; int j; for (re = reservelist; re; re = re->next) { - d = data_append_re(d, &re->re); + d = data_append_re(d, re->address, re->size); } /* * Add additional reserved slots if the user asked for them. */ for (j = 0; j < reservenum; j++) { - d = data_append_re(d, &null_re); + d = data_append_re(d, 0, 0); } return d; @@ -544,11 +535,11 @@ void dt_to_asm(FILE *f, struct dt_info *dti, int version) fprintf(f, "\t.globl\t%s\n", l->label); fprintf(f, "%s:\n", l->label); } - ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->re.address >> 32)); + ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->address >> 32)); ASM_EMIT_BELONG(f, "0x%08x", - (unsigned int)(re->re.address & 0xffffffff)); - ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->re.size >> 32)); - ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->re.size & 0xffffffff)); + (unsigned int)(re->address & 0xffffffff)); + ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->size >> 32)); + ASM_EMIT_BELONG(f, "0x%08x", (unsigned int)(re->size & 0xffffffff)); } for (i = 0; i < reservenum; i++) { fprintf(f, "\t.long\t0, 0\n\t.long\t0, 0\n"); @@ -609,7 +600,7 @@ static void flat_read_chunk(struct inbuf *inb, void *p, int len) static uint32_t flat_read_word(struct inbuf *inb) { - uint32_t val; + fdt32_t val; assert(((inb->ptr - inb->base) % sizeof(val)) == 0); @@ -718,13 +709,15 @@ static struct reserve_info *flat_read_mem_reserve(struct inbuf *inb) * First pass, count entries. */ while (1) { + uint64_t address, size; + flat_read_chunk(inb, &re, sizeof(re)); - re.address = fdt64_to_cpu(re.address); - re.size = fdt64_to_cpu(re.size); - if (re.size == 0) + address = fdt64_to_cpu(re.address); + size = fdt64_to_cpu(re.size); + if (size == 0) break; - new = build_reserve_entry(re.address, re.size); + new = build_reserve_entry(address, size); reservelist = add_reserve_entry(reservelist, new); } @@ -817,6 +810,7 @@ static struct node *unflatten_tree(struct inbuf *dtbuf, struct dt_info *dt_from_blob(const char *fname) { FILE *f; + fdt32_t magic_buf, totalsize_buf; uint32_t magic, totalsize, version, size_dt, boot_cpuid_phys; uint32_t off_dt, off_str, off_mem_rsvmap; int rc; @@ -833,7 +827,7 @@ struct dt_info *dt_from_blob(const char *fname) f = srcfile_relative_open(fname, NULL); - rc = fread(&magic, sizeof(magic), 1, f); + rc = fread(&magic_buf, sizeof(magic_buf), 1, f); if (ferror(f)) die("Error reading DT blob magic number: %s\n", strerror(errno)); @@ -844,11 +838,11 @@ struct dt_info *dt_from_blob(const char *fname) die("Mysterious short read reading magic number\n"); } - magic = fdt32_to_cpu(magic); + magic = fdt32_to_cpu(magic_buf); if (magic != FDT_MAGIC) die("Blob has incorrect magic number\n"); - rc = fread(&totalsize, sizeof(totalsize), 1, f); + rc = fread(&totalsize_buf, sizeof(totalsize_buf), 1, f); if (ferror(f)) die("Error reading DT blob size: %s\n", strerror(errno)); if (rc < 1) { @@ -858,7 +852,7 @@ struct dt_info *dt_from_blob(const char *fname) die("Mysterious short read reading blob size\n"); } - totalsize = fdt32_to_cpu(totalsize); + totalsize = fdt32_to_cpu(totalsize_buf); if (totalsize < FDT_V1_SIZE) die("DT blob size (%d) is too small\n", totalsize); diff --git a/scripts/dtc/include-prefixes/arc b/scripts/dtc/include-prefixes/arc new file mode 120000 index 0000000000000000000000000000000000000000..5d21b5a69a112a34750e919a9222a8f99c696036 --- /dev/null +++ b/scripts/dtc/include-prefixes/arc @@ -0,0 +1 @@ +../../../arch/arc/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/arm b/scripts/dtc/include-prefixes/arm new file mode 120000 index 0000000000000000000000000000000000000000..eb14d4515a57e9fba995487c1f3c9235086a9596 --- /dev/null +++ b/scripts/dtc/include-prefixes/arm @@ -0,0 +1 @@ +../../../arch/arm/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/arm64 b/scripts/dtc/include-prefixes/arm64 new file mode 120000 index 0000000000000000000000000000000000000000..275c42c21d71c4cf2fd64df687c85a221129f059 --- /dev/null +++ b/scripts/dtc/include-prefixes/arm64 @@ -0,0 +1 @@ +../../../arch/arm64/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x new file mode 120000 index 0000000000000000000000000000000000000000..49ded4cae2be96e26b5184b7200019058a929dff --- /dev/null +++ b/scripts/dtc/include-prefixes/c6x @@ -0,0 +1 @@ +../../../arch/c6x/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris new file mode 120000 index 0000000000000000000000000000000000000000..736d998ba506ff257b3cfebef246c96ba75ea0cc --- /dev/null +++ b/scripts/dtc/include-prefixes/cris @@ -0,0 +1 @@ +../../../arch/cris/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/dt-bindings b/scripts/dtc/include-prefixes/dt-bindings new file mode 120000 index 0000000000000000000000000000000000000000..04fdbb3af01630bbbc1931323981ef9a2482710b --- /dev/null +++ b/scripts/dtc/include-prefixes/dt-bindings @@ -0,0 +1 @@ +../../../include/dt-bindings \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/h8300 b/scripts/dtc/include-prefixes/h8300 new file mode 120000 index 0000000000000000000000000000000000000000..3bdaa332c54c7af4cb03841978822f5675e451d6 --- /dev/null +++ b/scripts/dtc/include-prefixes/h8300 @@ -0,0 +1 @@ +../../../arch/h8300/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag new file mode 120000 index 0000000000000000000000000000000000000000..87a3c847db8fb56e81462a62f732d7f25027de63 --- /dev/null +++ b/scripts/dtc/include-prefixes/metag @@ -0,0 +1 @@ +../../../arch/metag/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/microblaze b/scripts/dtc/include-prefixes/microblaze new file mode 120000 index 0000000000000000000000000000000000000000..d9830330a21d0916fb0df4315406abe137e59d24 --- /dev/null +++ b/scripts/dtc/include-prefixes/microblaze @@ -0,0 +1 @@ +../../../arch/microblaze/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/mips b/scripts/dtc/include-prefixes/mips new file mode 120000 index 0000000000000000000000000000000000000000..ae8d4948dc8d5a098cf70b1d07182a04b84dbe05 --- /dev/null +++ b/scripts/dtc/include-prefixes/mips @@ -0,0 +1 @@ +../../../arch/mips/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/nios2 b/scripts/dtc/include-prefixes/nios2 new file mode 120000 index 0000000000000000000000000000000000000000..51772336d13fb3606011db9fd8b608aa016bf56f --- /dev/null +++ b/scripts/dtc/include-prefixes/nios2 @@ -0,0 +1 @@ +../../../arch/nios2/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/openrisc b/scripts/dtc/include-prefixes/openrisc new file mode 120000 index 0000000000000000000000000000000000000000..71c3bc75c560ac4697ee26e85f8b01994366112f --- /dev/null +++ b/scripts/dtc/include-prefixes/openrisc @@ -0,0 +1 @@ +../../../arch/openrisc/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/powerpc b/scripts/dtc/include-prefixes/powerpc new file mode 120000 index 0000000000000000000000000000000000000000..7cd6ec16e899044c9fdbb991c8ef2d4ec594fc91 --- /dev/null +++ b/scripts/dtc/include-prefixes/powerpc @@ -0,0 +1 @@ +../../../arch/powerpc/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/sh b/scripts/dtc/include-prefixes/sh new file mode 120000 index 0000000000000000000000000000000000000000..67d37808c599dba0314822acc47f9ae2160adf5f --- /dev/null +++ b/scripts/dtc/include-prefixes/sh @@ -0,0 +1 @@ +../../../arch/sh/boot/dts \ No newline at end of file diff --git a/scripts/dtc/include-prefixes/xtensa b/scripts/dtc/include-prefixes/xtensa new file mode 120000 index 0000000000000000000000000000000000000000..d1eaf6ec7a2b9691d34ff2a01a3d589f945623bd --- /dev/null +++ b/scripts/dtc/include-prefixes/xtensa @@ -0,0 +1 @@ +../../../arch/xtensa/boot/dts \ No newline at end of file diff --git a/scripts/dtc/libfdt/fdt_rw.c b/scripts/dtc/libfdt/fdt_rw.c index 2eed4f58387c2eaafcd4344903fc85f515b17018..3fd5847377c9b5be3d1cfc4e9485b25b5cdf6cf9 100644 --- a/scripts/dtc/libfdt/fdt_rw.c +++ b/scripts/dtc/libfdt/fdt_rw.c @@ -283,7 +283,8 @@ int fdt_setprop(void *fdt, int nodeoffset, const char *name, if (err) return err; - memcpy(prop->data, val, len); + if (len) + memcpy(prop->data, val, len); return 0; } diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h index b842b156fa17b57081fa2ee1cd09a0974297b4ef..ba86caa73d0168f97038cbac839c5572c8574f60 100644 --- a/scripts/dtc/libfdt/libfdt.h +++ b/scripts/dtc/libfdt/libfdt.h @@ -143,7 +143,9 @@ /* Low-level functions (you probably don't need these) */ /**********************************************************************/ +#ifndef SWIG /* This function is not useful in Python */ const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int checklen); +#endif static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen) { return (void *)(uintptr_t)fdt_offset_ptr(fdt, offset, checklen); @@ -210,7 +212,6 @@ int fdt_next_subnode(const void *fdt, int offset); /**********************************************************************/ /* General functions */ /**********************************************************************/ - #define fdt_get_header(fdt, field) \ (fdt32_to_cpu(((const struct fdt_header *)(fdt))->field)) #define fdt_magic(fdt) (fdt_get_header(fdt, magic)) @@ -354,8 +355,10 @@ int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size); * useful for finding subnodes based on a portion of a larger string, * such as a full path. */ +#ifndef SWIG /* Not available in Python */ int fdt_subnode_offset_namelen(const void *fdt, int parentoffset, const char *name, int namelen); +#endif /** * fdt_subnode_offset - find a subnode of a given node * @fdt: pointer to the device tree blob @@ -391,7 +394,9 @@ int fdt_subnode_offset(const void *fdt, int parentoffset, const char *name); * Identical to fdt_path_offset(), but only consider the first namelen * characters of path as the path name. */ +#ifndef SWIG /* Not available in Python */ int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen); +#endif /** * fdt_path_offset - find a tree node by its full path @@ -550,10 +555,12 @@ const struct fdt_property *fdt_get_property_by_offset(const void *fdt, * Identical to fdt_get_property(), but only examine the first namelen * characters of name for matching the property name. */ +#ifndef SWIG /* Not available in Python */ const struct fdt_property *fdt_get_property_namelen(const void *fdt, int nodeoffset, const char *name, int namelen, int *lenp); +#endif /** * fdt_get_property - find a given property in a given node @@ -624,8 +631,10 @@ static inline struct fdt_property *fdt_get_property_w(void *fdt, int nodeoffset, * -FDT_ERR_BADSTRUCTURE, * -FDT_ERR_TRUNCATED, standard meanings */ +#ifndef SWIG /* This function is not useful in Python */ const void *fdt_getprop_by_offset(const void *fdt, int offset, const char **namep, int *lenp); +#endif /** * fdt_getprop_namelen - get property value based on substring @@ -638,6 +647,7 @@ const void *fdt_getprop_by_offset(const void *fdt, int offset, * Identical to fdt_getprop(), but only examine the first namelen * characters of name for matching the property name. */ +#ifndef SWIG /* Not available in Python */ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset, const char *name, int namelen, int *lenp); static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset, @@ -647,6 +657,7 @@ static inline void *fdt_getprop_namelen_w(void *fdt, int nodeoffset, return (void *)(uintptr_t)fdt_getprop_namelen(fdt, nodeoffset, name, namelen, lenp); } +#endif /** * fdt_getprop - retrieve the value of a given property @@ -707,8 +718,10 @@ uint32_t fdt_get_phandle(const void *fdt, int nodeoffset); * Identical to fdt_get_alias(), but only examine the first namelen * characters of name for matching the alias name. */ +#ifndef SWIG /* Not available in Python */ const char *fdt_get_alias_namelen(const void *fdt, const char *name, int namelen); +#endif /** * fdt_get_alias - retrieve the path referenced by a given alias @@ -1106,10 +1119,12 @@ int fdt_size_cells(const void *fdt, int nodeoffset); * of the name. It is useful when you want to manipulate only one value of * an array and you have a string that doesn't end with \0. */ +#ifndef SWIG /* Not available in Python */ int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, const char *name, int namelen, uint32_t idx, const void *val, int len); +#endif /** * fdt_setprop_inplace - change a property's value, but not its size @@ -1139,8 +1154,10 @@ int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset, * -FDT_ERR_BADSTRUCTURE, * -FDT_ERR_TRUNCATED, standard meanings */ +#ifndef SWIG /* Not available in Python */ int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name, const void *val, int len); +#endif /** * fdt_setprop_inplace_u32 - change the value of a 32-bit integer property @@ -1527,6 +1544,36 @@ static inline int fdt_setprop_cell(void *fdt, int nodeoffset, const char *name, #define fdt_setprop_string(fdt, nodeoffset, name, str) \ fdt_setprop((fdt), (nodeoffset), (name), (str), strlen(str)+1) + +/** + * fdt_setprop_empty - set a property to an empty value + * @fdt: pointer to the device tree blob + * @nodeoffset: offset of the node whose property to change + * @name: name of the property to change + * + * fdt_setprop_empty() sets the value of the named property in the + * given node to an empty (zero length) value, or creates a new empty + * property if it does not already exist. + * + * This function may insert or delete data from the blob, and will + * therefore change the offsets of some existing nodes. + * + * returns: + * 0, on success + * -FDT_ERR_NOSPACE, there is insufficient free space in the blob to + * contain the new property value + * -FDT_ERR_BADOFFSET, nodeoffset did not point to FDT_BEGIN_NODE tag + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_BADMAGIC, + * -FDT_ERR_BADVERSION, + * -FDT_ERR_BADSTATE, + * -FDT_ERR_BADSTRUCTURE, + * -FDT_ERR_BADLAYOUT, + * -FDT_ERR_TRUNCATED, standard meanings + */ +#define fdt_setprop_empty(fdt, nodeoffset, name) \ + fdt_setprop((fdt), (nodeoffset), (name), NULL, 0) + /** * fdt_appendprop - append to or create a property * @fdt: pointer to the device tree blob @@ -1704,8 +1751,10 @@ int fdt_delprop(void *fdt, int nodeoffset, const char *name); * creating subnodes based on a portion of a larger string, such as a * full path. */ +#ifndef SWIG /* Not available in Python */ int fdt_add_subnode_namelen(void *fdt, int parentoffset, const char *name, int namelen); +#endif /** * fdt_add_subnode - creates a new node diff --git a/scripts/dtc/libfdt/libfdt_env.h b/scripts/dtc/libfdt/libfdt_env.h index 99f936dacc60a661ecc76cd2373771ec94d976d4..952056cddf09a8f7beffec104a9507961b3d85e1 100644 --- a/scripts/dtc/libfdt/libfdt_env.h +++ b/scripts/dtc/libfdt/libfdt_env.h @@ -58,16 +58,16 @@ #include #ifdef __CHECKER__ -#define __force __attribute__((force)) -#define __bitwise __attribute__((bitwise)) +#define FDT_FORCE __attribute__((force)) +#define FDT_BITWISE __attribute__((bitwise)) #else -#define __force -#define __bitwise +#define FDT_FORCE +#define FDT_BITWISE #endif -typedef uint16_t __bitwise fdt16_t; -typedef uint32_t __bitwise fdt32_t; -typedef uint64_t __bitwise fdt64_t; +typedef uint16_t FDT_BITWISE fdt16_t; +typedef uint32_t FDT_BITWISE fdt32_t; +typedef uint64_t FDT_BITWISE fdt64_t; #define EXTRACT_BYTE(x, n) ((unsigned long long)((uint8_t *)&x)[n]) #define CPU_TO_FDT16(x) ((EXTRACT_BYTE(x, 0) << 8) | EXTRACT_BYTE(x, 1)) @@ -80,29 +80,29 @@ typedef uint64_t __bitwise fdt64_t; static inline uint16_t fdt16_to_cpu(fdt16_t x) { - return (__force uint16_t)CPU_TO_FDT16(x); + return (FDT_FORCE uint16_t)CPU_TO_FDT16(x); } static inline fdt16_t cpu_to_fdt16(uint16_t x) { - return (__force fdt16_t)CPU_TO_FDT16(x); + return (FDT_FORCE fdt16_t)CPU_TO_FDT16(x); } static inline uint32_t fdt32_to_cpu(fdt32_t x) { - return (__force uint32_t)CPU_TO_FDT32(x); + return (FDT_FORCE uint32_t)CPU_TO_FDT32(x); } static inline fdt32_t cpu_to_fdt32(uint32_t x) { - return (__force fdt32_t)CPU_TO_FDT32(x); + return (FDT_FORCE fdt32_t)CPU_TO_FDT32(x); } static inline uint64_t fdt64_to_cpu(fdt64_t x) { - return (__force uint64_t)CPU_TO_FDT64(x); + return (FDT_FORCE uint64_t)CPU_TO_FDT64(x); } static inline fdt64_t cpu_to_fdt64(uint64_t x) { - return (__force fdt64_t)CPU_TO_FDT64(x); + return (FDT_FORCE fdt64_t)CPU_TO_FDT64(x); } #undef CPU_TO_FDT64 #undef CPU_TO_FDT32 diff --git a/scripts/dtc/livetree.c b/scripts/dtc/livetree.c index afa2f67b142aa115c623cebe21a5265c1b89415a..3673de07e4e58c1d1e86619a4be6f479f1429c08 100644 --- a/scripts/dtc/livetree.c +++ b/scripts/dtc/livetree.c @@ -242,7 +242,7 @@ void delete_property_by_name(struct node *node, char *name) struct property *prop = node->proplist; while (prop) { - if (!strcmp(prop->name, name)) { + if (streq(prop->name, name)) { delete_property(prop); return; } @@ -275,7 +275,7 @@ void delete_node_by_name(struct node *parent, char *name) struct node *node = parent->children; while (node) { - if (!strcmp(node->name, name)) { + if (streq(node->name, name)) { delete_node(node); return; } @@ -319,8 +319,8 @@ struct reserve_info *build_reserve_entry(uint64_t address, uint64_t size) memset(new, 0, sizeof(*new)); - new->re.address = address; - new->re.size = size; + new->address = address; + new->size = size; return new; } @@ -393,7 +393,7 @@ struct property *get_property(struct node *node, const char *propname) cell_t propval_cell(struct property *prop) { assert(prop->val.len == sizeof(cell_t)); - return fdt32_to_cpu(*((cell_t *)prop->val.val)); + return fdt32_to_cpu(*((fdt32_t *)prop->val.val)); } struct property *get_property_by_label(struct node *tree, const char *label, @@ -599,13 +599,13 @@ static int cmp_reserve_info(const void *ax, const void *bx) a = *((const struct reserve_info * const *)ax); b = *((const struct reserve_info * const *)bx); - if (a->re.address < b->re.address) + if (a->address < b->address) return -1; - else if (a->re.address > b->re.address) + else if (a->address > b->address) return 1; - else if (a->re.size < b->re.size) + else if (a->size < b->size) return -1; - else if (a->re.size > b->re.size) + else if (a->size > b->size) return 1; else return 0; @@ -847,6 +847,8 @@ static void add_fixup_entry(struct dt_info *dti, struct node *fn, xasprintf(&entry, "%s:%s:%u", node->fullpath, prop->name, m->offset); append_to_property(fn, m->ref, entry, strlen(entry) + 1); + + free(entry); } static void generate_fixups_tree_internal(struct dt_info *dti, @@ -900,7 +902,7 @@ static void add_local_fixup_entry(struct dt_info *dti, struct node *refnode) { struct node *wn, *nwn; /* local fixup node, walk node, new */ - uint32_t value_32; + fdt32_t value_32; char **compp; int i, depth; diff --git a/scripts/dtc/srcpos.c b/scripts/dtc/srcpos.c index aa3aad04cec47029a2c3a24624efffd2b98d150f..9d38459902f3c3c1fbb4ca5d3151a10e92fbf73d 100644 --- a/scripts/dtc/srcpos.c +++ b/scripts/dtc/srcpos.c @@ -252,7 +252,7 @@ srcpos_string(struct srcpos *pos) const char *fname = ""; char *pos_str; - if (pos) + if (pos->file && pos->file->name) fname = pos->file->name; diff --git a/scripts/dtc/srcpos.h b/scripts/dtc/srcpos.h index 2cdfcd82e95e09044827277fa66b83864c9e80bf..7caca8257c6d192e2b35c4c2fc9b317b1e74a870 100644 --- a/scripts/dtc/srcpos.h +++ b/scripts/dtc/srcpos.h @@ -22,6 +22,7 @@ #include #include +#include "util.h" struct srcfile_state { FILE *f; @@ -106,12 +107,10 @@ extern void srcpos_update(struct srcpos *pos, const char *text, int len); extern struct srcpos *srcpos_copy(struct srcpos *pos); extern char *srcpos_string(struct srcpos *pos); -extern void srcpos_verror(struct srcpos *pos, const char *prefix, - const char *fmt, va_list va) - __attribute__((format(printf, 3, 0))); -extern void srcpos_error(struct srcpos *pos, const char *prefix, - const char *fmt, ...) - __attribute__((format(printf, 3, 4))); +extern void PRINTF(3, 0) srcpos_verror(struct srcpos *pos, const char *prefix, + const char *fmt, va_list va); +extern void PRINTF(3, 4) srcpos_error(struct srcpos *pos, const char *prefix, + const char *fmt, ...); extern void srcpos_set_line(char *f, int l); diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c index c9d8967969f9ec03b264a9d769a18bc2df8529d2..2461a3d068a0d1d73169dafa7598e455dda8738b 100644 --- a/scripts/dtc/treesource.c +++ b/scripts/dtc/treesource.c @@ -137,7 +137,7 @@ static void write_propval_string(FILE *f, struct data val) static void write_propval_cells(FILE *f, struct data val) { void *propend = val.val + val.len; - cell_t *cp = (cell_t *)val.val; + fdt32_t *cp = (fdt32_t *)val.val; struct marker *m = val.markers; fprintf(f, "<"); @@ -275,8 +275,8 @@ void dt_to_source(FILE *f, struct dt_info *dti) for_each_label(re->labels, l) fprintf(f, "%s: ", l->label); fprintf(f, "/memreserve/\t0x%016llx 0x%016llx;\n", - (unsigned long long)re->re.address, - (unsigned long long)re->re.size); + (unsigned long long)re->address, + (unsigned long long)re->size); } write_tree_source_node(f, dti->dt, 0); diff --git a/scripts/dtc/update-dtc-source.sh b/scripts/dtc/update-dtc-source.sh index f5cde799db03f3154cf4fe93f0c7a715e5a617f4..b8ebcc6722d2c9fdc56cbbfb101a4ea22093b7e9 100755 --- a/scripts/dtc/update-dtc-source.sh +++ b/scripts/dtc/update-dtc-source.sh @@ -36,10 +36,19 @@ DTC_SOURCE="checks.c data.c dtc.c dtc.h flattree.c fstree.c livetree.c srcpos.c DTC_GENERATED="dtc-lexer.lex.c dtc-parser.tab.c dtc-parser.tab.h" LIBFDT_SOURCE="Makefile.libfdt fdt.c fdt.h fdt_empty_tree.c fdt_ro.c fdt_rw.c fdt_strerror.c fdt_sw.c fdt_wip.c libfdt.h libfdt_env.h libfdt_internal.h" +get_last_dtc_version() { + git log --oneline scripts/dtc/ | grep 'upstream' | head -1 | sed -e 's/^.* \(.*\)/\1/' +} + +last_dtc_ver=$(get_last_dtc_version) + # Build DTC cd $DTC_UPSTREAM_PATH make clean make check +dtc_version=$(git describe HEAD) +dtc_log=$(git log --oneline ${last_dtc_ver}..) + # Copy the files into the Linux tree cd $DTC_LINUX_PATH @@ -60,4 +69,13 @@ sed -i -- 's/#include /#include "libfdt_env.h"/g' ./libfdt/libfdt. sed -i -- 's/#include /#include "fdt.h"/g' ./libfdt/libfdt.h git add ./libfdt/libfdt.h -git commit -e -v -m "scripts/dtc: Update to upstream version [CHANGEME]" +commit_msg=$(cat << EOF +scripts/dtc: Update to upstream version ${dtc_version} + +This adds the following commits from upstream: + +${dtc_log} +EOF +) + +git commit -e -v -s -m "${commit_msg}" diff --git a/scripts/dtc/util.c b/scripts/dtc/util.c index 3550f86bd6df459e3143de9ecd3ffb2309a4441f..9953c32a024473c60d5b9db39b3ed6fee5ba8532 100644 --- a/scripts/dtc/util.c +++ b/scripts/dtc/util.c @@ -396,7 +396,7 @@ void utilfdt_print_data(const char *data, int len) } while (s < data + len); } else if ((len % 4) == 0) { - const uint32_t *cell = (const uint32_t *)data; + const fdt32_t *cell = (const fdt32_t *)data; printf(" = <"); for (i = 0, len /= 4; i < len; i++) @@ -412,15 +412,16 @@ void utilfdt_print_data(const char *data, int len) } } -void util_version(void) +void NORETURN util_version(void) { printf("Version: %s\n", DTC_VERSION); exit(0); } -void util_usage(const char *errmsg, const char *synopsis, - const char *short_opts, struct option const long_opts[], - const char * const opts_help[]) +void NORETURN util_usage(const char *errmsg, const char *synopsis, + const char *short_opts, + struct option const long_opts[], + const char * const opts_help[]) { FILE *fp = errmsg ? stderr : stdout; const char a_arg[] = ""; diff --git a/scripts/dtc/util.h b/scripts/dtc/util.h index f5c4f1b50d3036d645a15fb0f05f2db4474eaace..ad5f41199edbb74f3bdb97bdfc908caab6145571 100644 --- a/scripts/dtc/util.h +++ b/scripts/dtc/util.h @@ -25,9 +25,17 @@ * USA */ +#ifdef __GNUC__ +#define PRINTF(i, j) __attribute__((format (printf, i, j))) +#define NORETURN __attribute__((noreturn)) +#else +#define PRINTF(i, j) +#define NORETURN +#endif + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -static inline void __attribute__((noreturn)) die(const char *str, ...) +static inline void NORETURN PRINTF(1, 2) die(const char *str, ...) { va_list ap; @@ -53,13 +61,14 @@ static inline void *xrealloc(void *p, size_t len) void *new = realloc(p, len); if (!new) - die("realloc() failed (len=%d)\n", len); + die("realloc() failed (len=%zd)\n", len); return new; } extern char *xstrdup(const char *s); -extern int xasprintf(char **strp, const char *fmt, ...); + +extern int PRINTF(2, 3) xasprintf(char **strp, const char *fmt, ...); extern char *join_path(const char *path, const char *name); /** @@ -188,7 +197,7 @@ void utilfdt_print_data(const char *data, int len); /** * Show source version and exit */ -void util_version(void) __attribute__((noreturn)); +void NORETURN util_version(void); /** * Show usage and exit @@ -202,9 +211,10 @@ void util_version(void) __attribute__((noreturn)); * @param long_opts The structure of long options * @param opts_help An array of help strings (should align with long_opts) */ -void util_usage(const char *errmsg, const char *synopsis, - const char *short_opts, struct option const long_opts[], - const char * const opts_help[]) __attribute__((noreturn)); +void NORETURN util_usage(const char *errmsg, const char *synopsis, + const char *short_opts, + struct option const long_opts[], + const char * const opts_help[]); /** * Show usage and exit diff --git a/scripts/dtc/version_gen.h b/scripts/dtc/version_gen.h index 16c2e53a85e37302e7d11dc86dc1a5ffd8297bb7..1229e07b4912bd477ce7839257f0fb36105d8dde 100644 --- a/scripts/dtc/version_gen.h +++ b/scripts/dtc/version_gen.h @@ -1 +1 @@ -#define DTC_VERSION "DTC 1.4.2-g0931cea3" +#define DTC_VERSION "DTC 1.4.4-g756ffc4f" diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index f9b92ece78343a463a37d33e3f73fa8621b84444..5afd1098e33a173a18b2310aa24d205534df711c 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -23,10 +23,11 @@ class LxDmesg(gdb.Command): super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): - log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16) - log_first_idx = int(gdb.parse_and_eval("log_first_idx")) - log_next_idx = int(gdb.parse_and_eval("log_next_idx")) - log_buf_len = int(gdb.parse_and_eval("log_buf_len")) + log_buf_addr = int(str(gdb.parse_and_eval( + "'printk.c'::log_buf")).split()[0], 16) + log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx")) + log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx")) + log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len")) inf = gdb.inferiors()[0] start = log_buf_addr + log_first_idx diff --git a/scripts/genksyms/parse.tab.c_shipped b/scripts/genksyms/parse.tab.c_shipped index 69148d30ca3f8737142409ed16608220ccc46e44..d02258bafe7b9881fccd224bfd37a2833506c0ea 100644 --- a/scripts/genksyms/parse.tab.c_shipped +++ b/scripts/genksyms/parse.tab.c_shipped @@ -440,16 +440,16 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 4 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 524 +#define YYLAST 522 /* YYNTOKENS -- Number of terminals. */ #define YYNTOKENS 55 /* YYNNTS -- Number of nonterminals. */ #define YYNNTS 49 /* YYNRULES -- Number of rules. */ -#define YYNRULES 134 +#define YYNRULES 133 /* YYNRULES -- Number of states. */ -#define YYNSTATES 189 +#define YYNSTATES 187 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 @@ -506,13 +506,13 @@ static const yytype_uint16 yyprhs[] = 97, 101, 105, 109, 112, 115, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 145, 146, 148, 150, 153, 155, 157, 159, 161, 164, 166, - 168, 170, 175, 180, 183, 187, 191, 194, 196, 198, - 200, 205, 210, 213, 217, 221, 224, 226, 230, 231, - 233, 235, 239, 242, 245, 247, 248, 250, 252, 257, - 262, 265, 269, 273, 277, 278, 280, 283, 287, 291, - 292, 294, 296, 299, 303, 306, 307, 309, 311, 315, - 318, 321, 323, 326, 327, 330, 334, 339, 341, 345, - 347, 351, 354, 355, 357 + 168, 170, 175, 180, 183, 187, 190, 192, 194, 196, + 201, 206, 209, 213, 217, 220, 222, 226, 227, 229, + 231, 235, 238, 241, 243, 244, 246, 248, 253, 258, + 261, 265, 269, 273, 274, 276, 279, 283, 287, 288, + 290, 292, 295, 299, 302, 303, 305, 307, 311, 314, + 317, 319, 322, 323, 326, 330, 335, 337, 341, 343, + 347, 350, 351, 353 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ @@ -536,25 +536,24 @@ static const yytype_int8 yyrhs[] = 74, 75, -1, 8, -1, 27, -1, 32, -1, 18, -1, 72, 76, -1, 77, -1, 39, -1, 43, -1, 77, 49, 80, 50, -1, 77, 49, 1, 50, -1, - 77, 35, -1, 49, 76, 50, -1, 49, 1, 50, - -1, 72, 78, -1, 79, -1, 39, -1, 43, -1, - 79, 49, 80, 50, -1, 79, 49, 1, 50, -1, - 79, 35, -1, 49, 78, 50, -1, 49, 1, 50, - -1, 81, 38, -1, 81, -1, 82, 48, 38, -1, - -1, 82, -1, 83, -1, 82, 48, 83, -1, 67, - 84, -1, 72, 84, -1, 85, -1, -1, 39, -1, - 43, -1, 85, 49, 80, 50, -1, 85, 49, 1, - 50, -1, 85, 35, -1, 49, 84, 50, -1, 49, - 1, 50, -1, 66, 76, 34, -1, -1, 88, -1, - 52, 36, -1, 53, 90, 47, -1, 53, 1, 47, - -1, -1, 91, -1, 92, -1, 91, 92, -1, 66, - 93, 46, -1, 1, 46, -1, -1, 94, -1, 95, - -1, 94, 48, 95, -1, 78, 97, -1, 39, 96, - -1, 96, -1, 54, 36, -1, -1, 97, 32, -1, - 53, 99, 47, -1, 53, 99, 48, 47, -1, 100, - -1, 99, 48, 100, -1, 39, -1, 39, 52, 36, - -1, 31, 46, -1, -1, 31, -1, 30, 49, 39, - 50, 46, -1 + 77, 35, -1, 49, 76, 50, -1, 72, 78, -1, + 79, -1, 39, -1, 43, -1, 79, 49, 80, 50, + -1, 79, 49, 1, 50, -1, 79, 35, -1, 49, + 78, 50, -1, 49, 1, 50, -1, 81, 38, -1, + 81, -1, 82, 48, 38, -1, -1, 82, -1, 83, + -1, 82, 48, 83, -1, 67, 84, -1, 72, 84, + -1, 85, -1, -1, 39, -1, 43, -1, 85, 49, + 80, 50, -1, 85, 49, 1, 50, -1, 85, 35, + -1, 49, 84, 50, -1, 49, 1, 50, -1, 66, + 76, 34, -1, -1, 88, -1, 52, 36, -1, 53, + 90, 47, -1, 53, 1, 47, -1, -1, 91, -1, + 92, -1, 91, 92, -1, 66, 93, 46, -1, 1, + 46, -1, -1, 94, -1, 95, -1, 94, 48, 95, + -1, 78, 97, -1, 39, 96, -1, 96, -1, 54, + 36, -1, -1, 97, 32, -1, 53, 99, 47, -1, + 53, 99, 48, 47, -1, 100, -1, 99, 48, 100, + -1, 39, -1, 39, 52, 36, -1, 31, 46, -1, + -1, 31, -1, 30, 49, 39, 50, 46, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ @@ -567,13 +566,13 @@ static const yytype_uint16 yyrline[] = 238, 240, 242, 247, 250, 251, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 270, 275, 276, 280, 281, 285, 285, 285, 286, 294, 295, 299, - 308, 317, 319, 321, 323, 325, 332, 333, 337, 338, - 339, 341, 343, 345, 347, 352, 353, 354, 358, 359, - 363, 364, 369, 374, 376, 380, 381, 389, 393, 395, - 397, 399, 401, 406, 415, 416, 421, 426, 427, 431, - 432, 436, 437, 441, 443, 448, 449, 453, 454, 458, - 459, 460, 464, 468, 469, 473, 474, 478, 479, 482, - 487, 495, 499, 500, 504 + 308, 317, 319, 321, 323, 330, 331, 335, 336, 337, + 339, 341, 343, 345, 350, 351, 352, 356, 357, 361, + 362, 367, 372, 374, 378, 379, 387, 391, 393, 395, + 397, 399, 404, 413, 414, 419, 424, 425, 429, 430, + 434, 435, 439, 441, 446, 447, 451, 452, 456, 457, + 458, 462, 466, 467, 471, 472, 476, 477, 480, 485, + 493, 497, 498, 502 }; #endif @@ -636,13 +635,13 @@ static const yytype_uint8 yyr1[] = 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 72, 73, 73, 74, 74, 75, 75, 75, 75, 76, 76, 77, - 77, 77, 77, 77, 77, 77, 78, 78, 79, 79, - 79, 79, 79, 79, 79, 80, 80, 80, 81, 81, - 82, 82, 83, 84, 84, 85, 85, 85, 85, 85, - 85, 85, 85, 86, 87, 87, 88, 89, 89, 90, - 90, 91, 91, 92, 92, 93, 93, 94, 94, 95, - 95, 95, 96, 97, 97, 98, 98, 99, 99, 100, - 100, 101, 102, 102, 103 + 77, 77, 77, 77, 77, 78, 78, 79, 79, 79, + 79, 79, 79, 79, 80, 80, 80, 81, 81, 82, + 82, 83, 84, 84, 85, 85, 85, 85, 85, 85, + 85, 85, 86, 87, 87, 88, 89, 89, 90, 90, + 91, 91, 92, 92, 93, 93, 94, 94, 95, 95, + 95, 96, 97, 97, 98, 98, 99, 99, 100, 100, + 101, 102, 102, 103 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ @@ -655,13 +654,13 @@ static const yytype_uint8 yyr2[] = 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, - 1, 4, 4, 2, 3, 3, 2, 1, 1, 1, - 4, 4, 2, 3, 3, 2, 1, 3, 0, 1, - 1, 3, 2, 2, 1, 0, 1, 1, 4, 4, - 2, 3, 3, 3, 0, 1, 2, 3, 3, 0, - 1, 1, 2, 3, 2, 0, 1, 1, 3, 2, - 2, 1, 2, 0, 2, 3, 4, 1, 3, 1, - 3, 2, 0, 1, 5 + 1, 4, 4, 2, 3, 2, 1, 1, 1, 4, + 4, 2, 3, 3, 2, 1, 3, 0, 1, 1, + 3, 2, 2, 1, 0, 1, 1, 4, 4, 2, + 3, 3, 3, 0, 1, 2, 3, 3, 0, 1, + 1, 2, 3, 2, 0, 1, 1, 3, 2, 2, + 1, 2, 0, 2, 3, 4, 1, 3, 1, 3, + 2, 0, 1, 5 }; /* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM. @@ -675,189 +674,189 @@ static const yytype_uint8 yydefact[] = 0, 56, 0, 0, 65, 36, 57, 5, 10, 17, 23, 24, 26, 27, 33, 34, 11, 12, 13, 14, 15, 39, 0, 43, 6, 37, 0, 44, 22, 38, - 45, 0, 0, 131, 69, 70, 0, 59, 0, 18, - 19, 0, 132, 68, 25, 42, 129, 0, 127, 22, - 40, 0, 115, 0, 0, 111, 9, 17, 41, 95, - 0, 0, 0, 0, 58, 60, 61, 16, 0, 67, - 133, 103, 123, 73, 0, 0, 125, 0, 7, 114, - 108, 78, 79, 0, 0, 0, 123, 77, 0, 116, - 117, 121, 107, 0, 112, 132, 96, 57, 0, 95, - 92, 94, 35, 0, 75, 74, 62, 20, 104, 0, - 0, 86, 89, 90, 130, 126, 128, 120, 0, 78, - 0, 122, 76, 119, 82, 0, 113, 0, 0, 97, - 0, 93, 100, 0, 134, 124, 0, 21, 105, 72, - 71, 85, 0, 84, 83, 0, 0, 118, 102, 101, - 0, 0, 106, 87, 91, 81, 80, 99, 98 + 45, 0, 0, 130, 69, 70, 0, 59, 0, 18, + 19, 0, 131, 68, 25, 42, 128, 0, 126, 22, + 40, 0, 114, 0, 0, 110, 9, 17, 41, 94, + 0, 0, 0, 58, 60, 61, 16, 0, 67, 132, + 102, 122, 73, 0, 0, 124, 0, 7, 113, 107, + 77, 78, 0, 0, 0, 122, 76, 0, 115, 116, + 120, 106, 0, 111, 131, 95, 57, 0, 94, 91, + 93, 35, 0, 74, 62, 20, 103, 0, 0, 85, + 88, 89, 129, 125, 127, 119, 0, 77, 0, 121, + 75, 118, 81, 0, 112, 0, 0, 96, 0, 92, + 99, 0, 133, 123, 0, 21, 104, 72, 71, 84, + 0, 83, 82, 0, 0, 117, 101, 100, 0, 0, + 105, 86, 90, 80, 79, 98, 97 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int16 yydefgoto[] = { -1, 1, 2, 3, 37, 79, 58, 38, 68, 69, - 70, 82, 40, 41, 42, 43, 44, 71, 94, 95, - 45, 125, 73, 116, 117, 140, 141, 142, 143, 130, - 131, 46, 167, 168, 57, 83, 84, 85, 118, 119, - 120, 121, 138, 53, 77, 78, 47, 102, 48 + 70, 82, 40, 41, 42, 43, 44, 71, 93, 94, + 45, 124, 73, 115, 116, 138, 139, 140, 141, 129, + 130, 46, 165, 166, 57, 83, 84, 85, 117, 118, + 119, 120, 136, 53, 77, 78, 47, 101, 48 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -111 +#define YYPACT_NINF -94 static const yytype_int16 yypact[] = { - -111, 13, -111, 210, -111, -111, 28, -111, -111, -111, - -111, -111, -27, -111, 44, -111, -111, -111, -111, -111, - -111, -111, -111, -111, -24, -111, -20, -111, -111, -111, - 31, -111, 32, 42, -111, -111, -111, -111, -111, 34, - 481, -111, -111, -111, -111, -111, -111, -111, -111, -111, - -111, 51, 56, -111, -111, 52, 108, -111, 481, 52, - -111, 481, 58, -111, -111, -111, 19, 0, 54, 55, - -111, 34, 30, -18, -111, -111, 68, -25, -111, 481, - -111, 45, 33, 59, 159, -111, -111, 34, -111, 395, - 57, 60, 81, 88, -111, 0, -111, -111, 34, -111, - -111, -111, -111, -111, 257, 72, -111, -23, -111, -111, - -111, 85, -111, 20, 106, 47, -111, -10, 97, 96, - -111, -111, -111, 99, -111, 115, -111, -111, 5, 50, - -111, 11, -111, 102, -111, -111, -111, -111, -22, 100, - 103, 111, 104, -111, -111, -111, -111, -111, 113, -111, - 121, -111, -111, 124, -111, 303, -111, 33, 132, -111, - 139, -111, -111, 349, -111, -111, 122, -111, -111, -111, - -111, -111, 442, -111, -111, 140, 143, -111, -111, -111, - 144, 145, -111, -111, -111, -111, -111, -111, -111 + -94, 15, -94, 208, -94, -94, 34, -94, -94, -94, + -94, -94, -27, -94, -5, -94, -94, -94, -94, -94, + -94, -94, -94, -94, -25, -94, -16, -94, -94, -94, + -4, -94, 19, -24, -94, -94, -94, -94, -94, 24, + 479, -94, -94, -94, -94, -94, -94, -94, -94, -94, + -94, 29, 48, -94, -94, 37, 106, -94, 479, 37, + -94, 479, 54, -94, -94, -94, 24, -2, 49, 53, + -94, 24, -14, -11, -94, -94, 47, 38, -94, 479, + -94, 51, 23, 55, 157, -94, -94, 24, -94, 393, + 56, 58, 68, -94, -2, -94, -94, 24, -94, -94, + -94, -94, -94, 255, 67, -94, 5, -94, -94, -94, + 50, -94, 7, 69, 40, -94, -8, 83, 88, -94, + -94, -94, 91, -94, 109, -94, -94, 4, 45, -94, + 16, -94, 95, -94, -94, -94, -23, 92, 93, 108, + 96, -94, -94, -94, -94, -94, 97, -94, 98, -94, + -94, 118, -94, 301, -94, 23, 101, -94, 104, -94, + -94, 347, -94, -94, 120, -94, -94, -94, -94, -94, + 440, -94, -94, 111, 119, -94, -94, -94, 130, 137, + -94, -94, -94, -94, -94, -94, -94 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int16 yypgoto[] = { - -111, -111, 160, -111, -111, -111, -111, -51, -111, -111, - 98, -1, -61, -37, -111, -111, -111, -78, -111, -111, - -53, -30, -111, -66, -111, -110, -111, -111, -60, -63, - -111, -111, -111, -111, -21, -111, -111, 116, -111, -111, - 40, 90, 83, 152, -111, 105, -111, -111, -111 + -94, -94, 158, -94, -94, -94, -94, -45, -94, -94, + 94, -1, -61, -29, -94, -94, -94, -79, -94, -94, + -63, -7, -94, -93, -94, -92, -94, -94, -60, -57, + -94, -94, -94, -94, -19, -94, -94, 110, -94, -94, + 33, 82, 78, 144, -94, 99, -94, -94, -94 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If positive, shift that token. If negative, reduce the rule which number is the opposite. If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -111 +#define YYTABLE_NINF -110 static const yytype_int16 yytable[] = { - 89, 90, 39, 74, 115, 60, 158, 86, 10, 72, - 165, 129, 51, 4, 96, 55, 76, 103, 20, 59, - 92, 148, 106, 107, 145, 154, 52, 29, 108, 56, - 166, 104, 34, 56, 80, 115, 93, 115, 88, 155, - -95, 99, 136, 89, 126, 176, 162, 150, 159, 152, - 129, 129, 74, 181, 128, -95, 67, 87, 64, 149, - 163, 100, 65, 112, 101, 160, 161, 54, 66, 113, - 67, 67, 111, 64, 49, 50, 112, 65, 87, 115, - 61, 62, 113, 66, 67, 67, 149, 114, 63, 126, - 112, 109, 110, 159, 89, 76, 113, 91, 67, 128, - 97, 67, 89, 98, 52, 56, 122, 132, 144, 81, - 133, 89, 184, 7, 8, 9, 10, 11, 12, 13, - 105, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 134, 26, 27, 28, 29, 30, 31, 135, 114, - 34, 35, 151, 156, 157, 109, 100, -22, 164, 171, - 169, 36, 172, 170, -22, -109, 165, -22, 182, -22, - 123, 5, -22, 173, 7, 8, 9, 10, 11, 12, - 13, 174, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 178, 26, 27, 28, 29, 30, 31, 179, - 185, 34, 35, 186, 187, 188, 137, 177, -22, 153, - 124, 147, 36, 75, 0, -22, -110, 0, -22, 0, - -22, 6, 146, -22, 0, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 0, 0, 0, 0, 0, -22, - 0, 0, 0, 36, 0, 0, -22, 0, 139, -22, - 0, -22, 7, 8, 9, 10, 11, 12, 13, 0, + 89, 90, 39, 114, 95, 156, 10, 60, 146, 163, + 128, 74, 51, 86, 55, 4, 20, 99, 54, 148, + 100, 150, 63, 59, 102, 29, 52, 152, 56, 164, + 34, 134, 72, 114, 107, 114, 80, 56, 103, -94, + 88, 153, 89, 125, 76, 61, 147, 157, 128, 128, + 111, 160, 143, 127, -94, 67, 112, 87, 67, 92, + 74, 174, 110, 64, 98, 161, 111, 65, 62, 179, + 158, 159, 112, 66, 67, 67, 114, 113, 87, 147, + 49, 50, 52, 111, 125, 105, 106, 76, 157, 112, + 56, 67, 89, 91, 127, 96, 67, 108, 109, 104, + 89, 97, 121, 142, 113, 149, 131, 81, 132, 89, + 182, 7, 8, 9, 10, 11, 12, 13, 133, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 154, + 26, 27, 28, 29, 30, 31, 155, 108, 34, 35, + 99, 162, 167, 168, 170, -22, 169, 171, 172, 36, + 163, 176, -22, -108, 177, -22, 180, -22, 122, 5, + -22, 183, 7, 8, 9, 10, 11, 12, 13, 184, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 185, 26, 27, 28, 29, 30, 31, 186, 175, 34, + 35, 135, 145, 151, 123, 75, -22, 0, 0, 0, + 36, 0, 0, -22, -109, 144, -22, 0, -22, 6, + 0, -22, 0, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 0, 0, 0, 0, 0, -22, 0, 0, + 0, 36, 0, 0, -22, 0, 137, -22, 0, -22, + 7, 8, 9, 10, 11, 12, 13, 0, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 0, 26, + 27, 28, 29, 30, 31, 0, 0, 34, 35, 0, + 0, 0, 0, -87, 0, 0, 0, 0, 36, 0, + 0, 0, 173, 0, 0, -87, 7, 8, 9, 10, + 11, 12, 13, 0, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 0, 26, 27, 28, 29, 30, + 31, 0, 0, 34, 35, 0, 0, 0, 0, -87, + 0, 0, 0, 0, 36, 0, 0, 0, 178, 0, + 0, -87, 7, 8, 9, 10, 11, 12, 13, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 26, 27, 28, 29, 30, 31, 0, 0, 34, - 35, 0, 0, 0, 0, -88, 0, 0, 0, 0, - 36, 0, 0, 0, 175, 0, 0, -88, 7, 8, + 35, 0, 0, 0, 0, -87, 0, 0, 0, 0, + 36, 0, 0, 0, 0, 0, 0, -87, 7, 8, 9, 10, 11, 12, 13, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 26, 27, 28, 29, 30, 31, 0, 0, 34, 35, 0, 0, 0, - 0, -88, 0, 0, 0, 0, 36, 0, 0, 0, - 180, 0, 0, -88, 7, 8, 9, 10, 11, 12, + 0, 0, 125, 0, 0, 0, 126, 0, 0, 0, + 0, 0, 127, 0, 67, 7, 8, 9, 10, 11, + 12, 13, 0, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 0, 26, 27, 28, 29, 30, 31, + 0, 0, 34, 35, 0, 0, 0, 0, 181, 0, + 0, 0, 0, 36, 7, 8, 9, 10, 11, 12, 13, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 26, 27, 28, 29, 30, 31, 0, - 0, 34, 35, 0, 0, 0, 0, -88, 0, 0, - 0, 0, 36, 0, 0, 0, 0, 0, 0, -88, - 7, 8, 9, 10, 11, 12, 13, 0, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 0, 26, - 27, 28, 29, 30, 31, 0, 0, 34, 35, 0, - 0, 0, 0, 0, 126, 0, 0, 0, 127, 0, - 0, 0, 0, 0, 128, 0, 67, 7, 8, 9, - 10, 11, 12, 13, 0, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 0, 26, 27, 28, 29, - 30, 31, 0, 0, 34, 35, 0, 0, 0, 0, - 183, 0, 0, 0, 0, 36, 7, 8, 9, 10, - 11, 12, 13, 0, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 0, 26, 27, 28, 29, 30, - 31, 0, 0, 34, 35, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 36 + 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 36 }; #define yypact_value_is_default(Yystate) \ - (!!((Yystate) == (-111))) + (!!((Yystate) == (-94))) #define yytable_value_is_error(Yytable_value) \ YYID (0) static const yytype_int16 yycheck[] = { - 61, 61, 3, 40, 82, 26, 1, 58, 8, 39, - 32, 89, 39, 0, 67, 39, 39, 35, 18, 39, - 1, 1, 47, 48, 47, 35, 53, 27, 79, 53, - 52, 49, 32, 53, 55, 113, 66, 115, 59, 49, - 35, 71, 95, 104, 39, 155, 35, 113, 43, 115, - 128, 129, 89, 163, 49, 50, 51, 58, 39, 39, - 49, 31, 43, 43, 34, 128, 129, 23, 49, 49, - 51, 51, 39, 39, 46, 47, 43, 43, 79, 157, - 49, 49, 49, 49, 51, 51, 39, 54, 46, 39, - 43, 46, 47, 43, 155, 39, 49, 39, 51, 49, - 46, 51, 163, 48, 53, 53, 47, 50, 36, 1, - 50, 172, 172, 5, 6, 7, 8, 9, 10, 11, - 52, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 50, 24, 25, 26, 27, 28, 29, 50, 54, - 32, 33, 36, 46, 48, 46, 31, 39, 46, 38, - 50, 43, 48, 50, 46, 47, 32, 49, 36, 51, - 1, 1, 54, 50, 5, 6, 7, 8, 9, 10, - 11, 50, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 50, 24, 25, 26, 27, 28, 29, 50, - 50, 32, 33, 50, 50, 50, 98, 157, 39, 116, - 84, 111, 43, 51, -1, 46, 47, -1, 49, -1, - 51, 1, 107, 54, -1, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, -1, -1, -1, -1, -1, 39, - -1, -1, -1, 43, -1, -1, 46, -1, 1, 49, - -1, 51, 5, 6, 7, 8, 9, 10, 11, -1, + 61, 61, 3, 82, 67, 1, 8, 26, 1, 32, + 89, 40, 39, 58, 39, 0, 18, 31, 23, 112, + 34, 114, 46, 39, 35, 27, 53, 35, 53, 52, + 32, 94, 39, 112, 79, 114, 55, 53, 49, 35, + 59, 49, 103, 39, 39, 49, 39, 43, 127, 128, + 43, 35, 47, 49, 50, 51, 49, 58, 51, 66, + 89, 153, 39, 39, 71, 49, 43, 43, 49, 161, + 127, 128, 49, 49, 51, 51, 155, 54, 79, 39, + 46, 47, 53, 43, 39, 47, 48, 39, 43, 49, + 53, 51, 153, 39, 49, 46, 51, 46, 47, 52, + 161, 48, 47, 36, 54, 36, 50, 1, 50, 170, + 170, 5, 6, 7, 8, 9, 10, 11, 50, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 46, + 24, 25, 26, 27, 28, 29, 48, 46, 32, 33, + 31, 46, 50, 50, 48, 39, 38, 50, 50, 43, + 32, 50, 46, 47, 50, 49, 36, 51, 1, 1, + 54, 50, 5, 6, 7, 8, 9, 10, 11, 50, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 50, 24, 25, 26, 27, 28, 29, 50, 155, 32, + 33, 97, 110, 115, 84, 51, 39, -1, -1, -1, + 43, -1, -1, 46, 47, 106, 49, -1, 51, 1, + -1, 54, -1, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, -1, -1, -1, -1, -1, 39, -1, -1, + -1, 43, -1, -1, 46, -1, 1, 49, -1, 51, + 5, 6, 7, 8, 9, 10, 11, -1, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, -1, 24, + 25, 26, 27, 28, 29, -1, -1, 32, 33, -1, + -1, -1, -1, 38, -1, -1, -1, -1, 43, -1, + -1, -1, 1, -1, -1, 50, 5, 6, 7, 8, + 9, 10, 11, -1, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, -1, 24, 25, 26, 27, 28, + 29, -1, -1, 32, 33, -1, -1, -1, -1, 38, + -1, -1, -1, -1, 43, -1, -1, -1, 1, -1, + -1, 50, 5, 6, 7, 8, 9, 10, 11, -1, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, -1, 24, 25, 26, 27, 28, 29, -1, -1, 32, 33, -1, -1, -1, -1, 38, -1, -1, -1, -1, - 43, -1, -1, -1, 1, -1, -1, 50, 5, 6, + 43, -1, -1, -1, -1, -1, -1, 50, 5, 6, 7, 8, 9, 10, 11, -1, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, -1, 24, 25, 26, 27, 28, 29, -1, -1, 32, 33, -1, -1, -1, - -1, 38, -1, -1, -1, -1, 43, -1, -1, -1, - 1, -1, -1, 50, 5, 6, 7, 8, 9, 10, + -1, -1, 39, -1, -1, -1, 43, -1, -1, -1, + -1, -1, 49, -1, 51, 5, 6, 7, 8, 9, + 10, 11, -1, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, -1, 24, 25, 26, 27, 28, 29, + -1, -1, 32, 33, -1, -1, -1, -1, 38, -1, + -1, -1, -1, 43, 5, 6, 7, 8, 9, 10, 11, -1, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, -1, 24, 25, 26, 27, 28, 29, -1, - -1, 32, 33, -1, -1, -1, -1, 38, -1, -1, - -1, -1, 43, -1, -1, -1, -1, -1, -1, 50, - 5, 6, 7, 8, 9, 10, 11, -1, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, -1, 24, - 25, 26, 27, 28, 29, -1, -1, 32, 33, -1, - -1, -1, -1, -1, 39, -1, -1, -1, 43, -1, - -1, -1, -1, -1, 49, -1, 51, 5, 6, 7, - 8, 9, 10, 11, -1, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, -1, 24, 25, 26, 27, - 28, 29, -1, -1, 32, 33, -1, -1, -1, -1, - 38, -1, -1, -1, -1, 43, 5, 6, 7, 8, - 9, 10, 11, -1, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, -1, 24, 25, 26, 27, 28, - 29, -1, -1, 32, 33, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 43 + -1, 32, 33, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 43 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing @@ -873,16 +872,16 @@ static const yytype_uint8 yystos[] = 89, 49, 49, 46, 39, 43, 49, 51, 63, 64, 65, 72, 76, 77, 68, 98, 39, 99, 100, 60, 89, 1, 66, 90, 91, 92, 62, 66, 89, 67, - 83, 39, 1, 76, 73, 74, 75, 46, 48, 76, - 31, 34, 102, 35, 49, 52, 47, 48, 62, 46, - 47, 39, 43, 49, 54, 72, 78, 79, 93, 94, - 95, 96, 47, 1, 92, 76, 39, 43, 49, 72, - 84, 85, 50, 50, 50, 50, 75, 65, 97, 1, - 80, 81, 82, 83, 36, 47, 100, 96, 1, 39, - 78, 36, 78, 97, 35, 49, 46, 48, 1, 43, - 84, 84, 35, 49, 46, 32, 52, 87, 88, 50, - 50, 38, 48, 50, 50, 1, 80, 95, 50, 50, - 1, 80, 36, 38, 83, 50, 50, 50, 50 + 83, 39, 76, 73, 74, 75, 46, 48, 76, 31, + 34, 102, 35, 49, 52, 47, 48, 62, 46, 47, + 39, 43, 49, 54, 72, 78, 79, 93, 94, 95, + 96, 47, 1, 92, 76, 39, 43, 49, 72, 84, + 85, 50, 50, 50, 75, 65, 97, 1, 80, 81, + 82, 83, 36, 47, 100, 96, 1, 39, 78, 36, + 78, 97, 35, 49, 46, 48, 1, 43, 84, 84, + 35, 49, 46, 32, 52, 87, 88, 50, 50, 38, + 48, 50, 50, 1, 80, 95, 50, 50, 1, 80, + 36, 38, 83, 50, 50, 50, 50 }; #define yyerrok (yyerrstatus = 0) @@ -1928,12 +1927,12 @@ yyreduce: case 75: - { (yyval) = (yyvsp[(3) - (3)]); } + { (yyval) = (yyvsp[(2) - (2)]); } break; - case 76: + case 79: - { (yyval) = (yyvsp[(2) - (2)]); } + { (yyval) = (yyvsp[(4) - (4)]); } break; case 80: @@ -1943,12 +1942,12 @@ yyreduce: case 81: - { (yyval) = (yyvsp[(4) - (4)]); } + { (yyval) = (yyvsp[(2) - (2)]); } break; case 82: - { (yyval) = (yyvsp[(2) - (2)]); } + { (yyval) = (yyvsp[(3) - (3)]); } break; case 83: @@ -1958,45 +1957,40 @@ yyreduce: case 84: - { (yyval) = (yyvsp[(3) - (3)]); } - break; - - case 85: - { (yyval) = (yyvsp[(2) - (2)]); } break; - case 87: + case 86: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 88: + case 87: { (yyval) = NULL; } break; - case 91: + case 90: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 92: + case 91: { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); } break; - case 93: + case 92: { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); } break; - case 95: + case 94: { (yyval) = NULL; } break; - case 96: + case 95: { /* For version 2 checksums, we don't want to remember private parameter names. */ @@ -2005,39 +1999,39 @@ yyreduce: } break; - case 97: + case 96: { remove_node((yyvsp[(1) - (1)])); (yyval) = (yyvsp[(1) - (1)]); } break; - case 98: + case 97: { (yyval) = (yyvsp[(4) - (4)]); } break; - case 99: + case 98: { (yyval) = (yyvsp[(4) - (4)]); } break; - case 100: + case 99: { (yyval) = (yyvsp[(2) - (2)]); } break; - case 101: + case 100: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 102: + case 101: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 103: + case 102: { struct string_list *decl = *(yyvsp[(2) - (3)]); *(yyvsp[(2) - (3)]) = NULL; @@ -2046,87 +2040,87 @@ yyreduce: } break; - case 104: + case 103: { (yyval) = NULL; } break; - case 106: + case 105: { remove_list((yyvsp[(2) - (2)]), &(*(yyvsp[(1) - (2)]))->next); (yyval) = (yyvsp[(2) - (2)]); } break; - case 107: + case 106: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 108: + case 107: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 109: + case 108: { (yyval) = NULL; } break; - case 112: + case 111: { (yyval) = (yyvsp[(2) - (2)]); } break; - case 113: + case 112: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 114: + case 113: { (yyval) = (yyvsp[(2) - (2)]); } break; - case 115: + case 114: { (yyval) = NULL; } break; - case 118: + case 117: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 119: + case 118: { (yyval) = (yyvsp[(2) - (2)]) ? (yyvsp[(2) - (2)]) : (yyvsp[(1) - (2)]); } break; - case 120: + case 119: { (yyval) = (yyvsp[(2) - (2)]); } break; - case 122: + case 121: { (yyval) = (yyvsp[(2) - (2)]); } break; - case 123: + case 122: { (yyval) = NULL; } break; - case 125: + case 124: { (yyval) = (yyvsp[(3) - (3)]); } break; - case 126: + case 125: { (yyval) = (yyvsp[(4) - (4)]); } break; - case 129: + case 128: { const char *name = strdup((*(yyvsp[(1) - (1)]))->string); @@ -2134,7 +2128,7 @@ yyreduce: } break; - case 130: + case 129: { const char *name = strdup((*(yyvsp[(1) - (3)]))->string); @@ -2143,17 +2137,17 @@ yyreduce: } break; - case 131: + case 130: { (yyval) = (yyvsp[(2) - (2)]); } break; - case 132: + case 131: { (yyval) = NULL; } break; - case 134: + case 133: { export_symbol((*(yyvsp[(3) - (5)]))->string); (yyval) = (yyvsp[(5) - (5)]); } break; diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index 4fba255e54ae49ed0fcf196b4e72695624eab58b..00a6d7e5497126147dc0d9a564c4a6525fff6079 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -322,8 +322,6 @@ direct_declarator: { $$ = $2; } | '(' declarator ')' { $$ = $3; } - | '(' error ')' - { $$ = $3; } ; /* Nested declarators differ from regular declarators in that they do diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 33c85dfdfce9b3b6f3509c71d4fd91ef2bf43e2e..a26a5f2dce39431497bd430ab605dd061c98009c 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -202,6 +202,7 @@ EOF # '&struct_name.member' - name of a structure member # '@parameter' - name of a parameter # '%CONST' - name of a constant. +# '``LITERAL``' - literal string without any spaces on it. ## init lots of data @@ -210,7 +211,8 @@ my $warnings = 0; my $anon_struct_union = 0; # match expressions used to find embedded type information -my $type_constant = '\%([-_\w]+)'; +my $type_constant = '\b``([^\`]+)``\b'; +my $type_constant2 = '\%([-_\w]+)'; my $type_func = '(\w+)\(\)'; my $type_param = '\@(\w+(\.\.\.)?)'; my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params @@ -235,6 +237,7 @@ my $type_member_func = $type_member . '\(\)'; # these work fairly well my @highlights_html = ( [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_func, "\$1"], [$type_enum_xml, "\$1"], [$type_struct_xml, "\$1"], @@ -252,6 +255,7 @@ my $blankline_html = $local_lt . "p" . $local_gt; # was "

    " # html version 5 my @highlights_html5 = ( [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_func, "\$1"], [$type_enum_xml, "\$1"], [$type_struct_xml, "\$1"], @@ -268,6 +272,7 @@ my $blankline_html5 = $local_lt . "br /" . $local_gt; my @highlights_xml = ( ["([^=])\\\"([^\\\"<]+)\\\"", "\$1\$2"], [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_enum_xml, "\$1"], [$type_struct_xml, "\$1"], [$type_typedef_xml, "\$1"], @@ -283,6 +288,7 @@ my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $loca # gnome, docbook format my @highlights_gnome = ( [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_func, "\$1"], [$type_enum, "\$1"], [$type_struct, "\$1"], @@ -298,6 +304,7 @@ my $blankline_gnome = "\n"; # these are pretty rough my @highlights_man = ( [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_func, "\\\\fB\$1\\\\fP"], [$type_enum, "\\\\fI\$1\\\\fP"], [$type_struct, "\\\\fI\$1\\\\fP"], @@ -312,6 +319,7 @@ my $blankline_man = ""; # text-mode my @highlights_text = ( [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_func, "\$1"], [$type_enum, "\$1"], [$type_struct, "\$1"], @@ -326,6 +334,7 @@ my $blankline_text = ""; # rst-mode my @highlights_rst = ( [$type_constant, "``\$1``"], + [$type_constant2, "``\$1``"], # Note: need to escape () to avoid func matching later [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], @@ -344,6 +353,7 @@ my $blankline_rst = "\n"; # list mode my @highlights_list = ( [$type_constant, "\$1"], + [$type_constant2, "\$1"], [$type_func, "\$1"], [$type_enum, "\$1"], [$type_struct, "\$1"], @@ -2392,8 +2402,7 @@ sub push_parameter($$$) { } $anon_struct_union = 0; - my $param_name = $param; - $param_name =~ s/\[.*//; + $param =~ s/[\[\)].*//; if ($type eq "" && $param =~ /\.\.\.$/) { @@ -2424,9 +2433,9 @@ sub push_parameter($$$) { # but inline preprocessor statements); # also ignore unnamed structs/unions; if (!$anon_struct_union) { - if (!defined $parameterdescs{$param_name} && $param_name !~ /^#/) { + if (!defined $parameterdescs{$param} && $param !~ /^#/) { - $parameterdescs{$param_name} = $undescribed; + $parameterdescs{$param} = $undescribed; if (($type eq 'function') || ($type eq 'enum')) { print STDERR "${file}:$.: warning: Function parameter ". diff --git a/scripts/ksymoops/README b/scripts/ksymoops/README index f6cb06e3f30e96e79dfb1dec5e2eb33810a648f8..4130439801278816f027695a07447975ff513bd0 100644 --- a/scripts/ksymoops/README +++ b/scripts/ksymoops/README @@ -1,8 +1,7 @@ ksymoops has been removed from the kernel. It was always meant to be a free standing utility, not linked to any particular kernel version. The latest version can be found in -ftp://ftp..kernel.org/pub/linux/utils/kernel/ksymoops together -with patches to other utilities in order to give more accurate Oops -debugging. +https://www.kernel.org/pub/linux/utils/kernel/ksymoops together with patches to +other utilities in order to give more accurate Oops debugging. Keith Owens Sat Jun 19 10:30:34 EST 1999 diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 19d9bcadc0ccd448fd4cff3e687ec61e3f95001a..b497d9764dcf02e0dd1abab386faef3af8b65499 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -7,32 +7,8 @@ modpost-objs := modpost.o file2alias.o sumversion.o devicetable-offsets-file := devicetable-offsets.h -define sed-y - "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" -endef - -quiet_cmd_offsets = GEN $@ -define cmd_offsets - (set -e; \ - echo "#ifndef __DEVICETABLE_OFFSETS_H__"; \ - echo "#define __DEVICETABLE_OFFSETS_H__"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y) $<; \ - echo ""; \ - echo "#endif" ) > $@ -endef - -$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s - $(call if_changed,offsets) +$(obj)/$(devicetable-offsets-file): $(obj)/devicetable-offsets.s FORCE + $(call filechk,offsets,__DEVICETABLE_OFFSETS_H__) targets += $(devicetable-offsets-file) devicetable-offsets.s diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 9b6e246a45d09f530b3527b81946a30b1256697f..d61b9e8678e8c0fd17220dae2e3d49fa08d23070 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -20,8 +20,7 @@ SECTIONS { __kcrctab_unused_gpl 0 : { *(SORT(___kcrctab_unused_gpl+*)) } __kcrctab_gpl_future 0 : { *(SORT(___kcrctab_gpl_future+*)) } - . = ALIGN(8); - .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + .init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) } __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } } diff --git a/scripts/objdiff b/scripts/objdiff index 62e51dae2138db450555f3d15dbd14cc53c53652..4fb5d67968932bbf83fbdc63ee84c374d552fa41 100755 --- a/scripts/objdiff +++ b/scripts/objdiff @@ -57,13 +57,15 @@ get_output_dir() { do_objdump() { dir=$(get_output_dir $1) base=${1##*/} + stripped=$dir/${base%.o}.stripped dis=$dir/${base%.o}.dis [ ! -d "$dir" ] && mkdir -p $dir # remove addresses for a cleaner diff # http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and - $OBJDUMP -D $1 | sed "s/^[[:space:]]\+[0-9a-f]\+//" > $dis + $STRIP -g $1 -R __bug_table -R .note -R .comment -o $stripped + $OBJDUMP -D $stripped | sed -e "s/^[[:space:]]\+[0-9a-f]\+//" -e "s:^$stripped:$1:" > $dis } dorecord() { @@ -73,6 +75,7 @@ dorecord() { CMT="`git rev-parse --short HEAD`" + STRIP="${CROSS_COMPILE}strip" OBJDUMP="${CROSS_COMPILE}objdump" for d in $FILES; do diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 3c575cd07888807d14693ad8475b6b5a953a1ca4..aad67000e4dd76796894cbd1975d087c559ecc5c 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -69,7 +69,7 @@ set_debarch() { echo "" >&2 echo "** ** ** WARNING ** ** **" >&2 echo "" >&2 - echo "Your architecture doesn't have it's equivalent" >&2 + echo "Your architecture doesn't have its equivalent" >&2 echo "Debian userspace architecture defined!" >&2 echo "Falling back to using your current userspace instead!" >&2 echo "Please add support for $UTS_MACHINE to ${0} ..." >&2 @@ -143,12 +143,7 @@ else cp System.map "$tmpdir/boot/System.map-$version" cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version" fi -# Not all arches include the boot path in KBUILD_IMAGE -if [ -e $KBUILD_IMAGE ]; then - cp $KBUILD_IMAGE "$tmpdir/$installed_image_path" -else - cp arch/$ARCH/boot/$KBUILD_IMAGE "$tmpdir/$installed_image_path" -fi +cp "$($MAKE -s image_name)" "$tmpdir/$installed_image_path" if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then # Only some architectures with OF support have this target @@ -262,10 +257,10 @@ EOF cat < debian/copyright This is a packacked upstream version of the Linux kernel. -The sources may be found at most Linux ftp sites, including: -ftp://ftp.kernel.org/pub/linux/kernel +The sources may be found at most Linux archive sites, including: +https://www.kernel.org/pub/linux/kernel -Copyright: 1991 - 2015 Linus Torvalds and others. +Copyright: 1991 - 2017 Linus Torvalds and others. The git repository for mainline kernel development is at: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git @@ -288,7 +283,6 @@ Section: kernel Priority: optional Maintainer: $maintainer Build-Depends: $build_depends -Standards-Version: 3.8.4 Homepage: http://www.kernel.org/ EOF @@ -296,7 +290,6 @@ if [ "$ARCH" = "um" ]; then cat <> debian/control Package: $packagename -Provides: linux-image, linux-image-2.6, linux-modules-$version Architecture: any Description: User Mode Linux kernel, version $version User-mode Linux is a port of the Linux kernel to its own system call @@ -313,7 +306,6 @@ else cat <> debian/control Package: $packagename -Provides: linux-image, linux-image-2.6, linux-modules-$version Suggests: $fwpackagename Architecture: any Description: Linux kernel, version $version @@ -346,7 +338,6 @@ rm -f "$objtree/debian/hdrsrcfiles" "$objtree/debian/hdrobjfiles" cat <> debian/control Package: $kernel_headers_packagename -Provides: linux-headers, linux-headers-2.6 Architecture: any Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch} This package provides kernel header files for $KERNELRELEASE on \${kernel:debarch} @@ -404,7 +395,6 @@ if [ -n "$BUILD_DEBUG" ] ; then Package: $dbg_packagename Section: debug -Provides: linux-debug, linux-debug-$version Architecture: any Description: Linux kernel debugging symbols for $version This package will come in handy if you need to debug the kernel. It provides diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index aeb34223167c1a5ac9a2e46a7e2c346360cf93f7..16e086dcc56762dd7bc8e6ccd78324e43320d829 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -412,6 +412,7 @@ static int is_mcounted_section_name(char const *const txtname) { return strcmp(".text", txtname) == 0 || + strcmp(".init.text", txtname) == 0 || strcmp(".ref.text", txtname) == 0 || strcmp(".sched.text", txtname) == 0 || strcmp(".spinlock.text", txtname) == 0 || diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 0b6002b36f204f89fa0b0f1c23390fb54307fd21..1633c3e6c0b95af1933339b195b2f68527c639d8 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -130,6 +130,7 @@ if ($inputfile =~ m,kernel/trace/ftrace\.o$,) { # Acceptable sections to record. my %text_sections = ( ".text" => 1, + ".init.text" => 1, ".ref.text" => 1, ".sched.text" => 1, ".spinlock.text" => 1, diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index f4dd41f900d5ce8a672479139938feb8b96105f9..6a24569c3578af93673cdd40372fdd6224638de9 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -8,6 +8,7 @@ #include #include #include +#include struct security_class_mapping { const char *name; diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index c29fa4a6228d6f59f9346721d4569cb15002b3c6..ffe8179f5d41b38e43c475037c5ad0ab49c3a00d 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -32,6 +32,7 @@ #include #include #include +#include static void usage(char *name) { diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 0545f5a8cabed76cb2c49cfd8c2d08f567bc4980..eb38f49d4b755a75787611ff533940cb5787da60 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -46,6 +46,8 @@ ackowledge||acknowledge ackowledged||acknowledged acording||according activete||activate +actived||activated +actualy||actually acumulating||accumulating acumulator||accumulator adapater||adapter @@ -76,6 +78,8 @@ algorritm||algorithm aligment||alignment alignement||alignment allign||align +alligned||aligned +allocatote||allocate allocatrd||allocated allocte||allocate allpication||application @@ -141,6 +145,7 @@ asycronous||asynchronous asynchnous||asynchronous atomatically||automatically atomicly||atomically +atempt||attempt attachement||attachment attched||attached attemps||attempts @@ -174,6 +179,7 @@ bakup||backup baloon||balloon baloons||balloons bandwith||bandwidth +banlance||balance batery||battery beacuse||because becasue||because @@ -270,6 +276,7 @@ comunication||communication conbination||combination conditionaly||conditionally conected||connected +connecetd||connected configuartion||configuration configuratoin||configuration configuraton||configuration @@ -291,11 +298,14 @@ continous||continuous continously||continuously continueing||continuing contraints||constraints +contol||control +contoller||controller controled||controlled controler||controller controll||control contruction||construction contry||country +conuntry||country convertion||conversion convertor||converter convienient||convenient @@ -310,6 +320,7 @@ coutner||counter cryptocraphic||cryptographic cunter||counter curently||currently +cylic||cyclic dafault||default deafult||default deamon||daemon @@ -365,6 +376,8 @@ dictionnary||dictionary didnt||didn't diferent||different differrence||difference +diffrent||different +diffrentiate||differentiate difinition||definition diplay||display direectly||directly @@ -398,6 +411,7 @@ efective||effective efficently||efficiently ehther||ether eigth||eight +elementry||elementary eletronic||electronic embeded||embedded enabledi||enabled @@ -443,6 +457,7 @@ extened||extended extensability||extensibility extention||extension extracter||extractor +falied||failed faild||failed faill||fail failied||failed @@ -492,6 +507,7 @@ futhermore||furthermore futrue||future gaurenteed||guaranteed generiously||generously +genereate||generate genric||generic globel||global grabing||grabbing @@ -513,8 +529,10 @@ hierachy||hierarchy hierarchie||hierarchy howver||however hsould||should +hypervior||hypervisor hypter||hyper identidier||identifier +iligal||illegal illigal||illegal imblance||imbalance immeadiately||immediately @@ -590,6 +608,9 @@ interruptted||interrupted interupted||interrupted interupt||interrupt intial||initial +intialisation||initialisation +intialised||initialised +intialise||initialise intialization||initialization intialized||initialized intialize||initialize @@ -600,6 +621,7 @@ intuative||intuitive invaid||invalid invalde||invalid invalide||invalid +invalud||invalid invididual||individual invokation||invocation invokations||invocations @@ -663,15 +685,19 @@ messsages||messages microprocesspr||microprocessor milliseonds||milliseconds minium||minimum +minimam||minimum minumum||minimum +misalinged||misaligned miscelleneous||miscellaneous misformed||malformed mispelled||misspelled mispelt||misspelt +mising||missing miximum||maximum mmnemonic||mnemonic mnay||many modulues||modules +momery||memory monochorome||monochrome monochromo||monochrome monocrome||monochrome @@ -871,6 +897,7 @@ registerd||registered registeresd||registered registes||registers registraration||registration +regsiter||register regster||register regualar||regular reguator||regulator @@ -888,6 +915,7 @@ replys||replies reponse||response representaion||representation reqeust||request +requestied||requested requiere||require requirment||requirement requred||required @@ -981,6 +1009,7 @@ spinlcok||spinlock spinock||spinlock splitted||split spreaded||spread +spurrious||spurious sructure||structure stablilization||stabilization staically||statically @@ -1013,6 +1042,7 @@ superseeded||superseded suplied||supplied suported||supported suport||support +supportet||supported suppored||supported supportin||supporting suppoted||supported @@ -1056,6 +1086,7 @@ throught||through thses||these tiggered||triggered tipically||typically +timout||timeout tmis||this torerable||tolerable tramsmitted||transmitted @@ -1081,6 +1112,7 @@ unconditionaly||unconditionally underun||underrun unecessary||unnecessary unexecpted||unexpected +unexepected||unexpected unexpcted||unexpected unexpectd||unexpected unexpeted||unexpected @@ -1096,6 +1128,7 @@ unneded||unneeded unneedingly||unnecessarily unnsupported||unsupported unmached||unmatched +unregester||unregister unresgister||unregister unrgesiter||unregister unsinged||unsigned diff --git a/scripts/ver_linux b/scripts/ver_linux index 430b201f3e254ad852d5b712529d62bf24b1e8e1..b51de8a7e2a314c95d601894fb17116a9c3170ca 100755 --- a/scripts/ver_linux +++ b/scripts/ver_linux @@ -1,4 +1,4 @@ -#!/bin/awk -f +#!/usr/bin/awk -f # Before running this script please ensure that your PATH is # typical as you use for compilation/installation. I use # /bin /sbin /usr/bin /usr/sbin /usr/local/bin, but it may diff --git a/security/Kconfig b/security/Kconfig index d900f47eaa68526704a3bba7058bdc0044c97987..93027fdf47d1c7b610a4c7e15996bc288ceaba48 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -31,6 +31,11 @@ config SECURITY If you are unsure how to answer this question, answer N. +config SECURITY_WRITABLE_HOOKS + depends on SECURITY + bool + default n + config SECURITYFS bool "Enable the securityfs filesystem" help @@ -125,17 +130,8 @@ config HAVE_HARDENED_USERCOPY_ALLOCATOR validating memory ranges against heap object sizes in support of CONFIG_HARDENED_USERCOPY. -config HAVE_ARCH_HARDENED_USERCOPY - bool - help - The architecture supports CONFIG_HARDENED_USERCOPY by - calling check_object_size() just before performing the - userspace copies in the low level implementation of - copy_to_user() and copy_from_user(). - config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" - depends on HAVE_ARCH_HARDENED_USERCOPY depends on HAVE_HARDENED_USERCOPY_ALLOCATOR select BUG help diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 41073f70eb41db135b8b68defa7523c161624991..4f6ac9dbc65ddcda1cfc0bd188c705505e2d368c 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -98,7 +98,7 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, return ERR_PTR(-ESPIPE); /* freed by caller to simple_write_to_buffer */ - data = kvmalloc(sizeof(*data) + alloc_size); + data = kvmalloc(sizeof(*data) + alloc_size, GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); kref_init(&data->count); @@ -1357,7 +1357,7 @@ static int aa_mk_null_file(struct dentry *parent) inode->i_ino = get_next_ino(); inode->i_mode = S_IFCHR | S_IRUGO | S_IWUGO; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_instantiate(dentry, inode); diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c index de8dc78b61442c4a2c07adcdf2740a685dc08e70..136f2a04783670d5ea8ad0c93d063d5a866fc297 100644 --- a/security/apparmor/crypto.c +++ b/security/apparmor/crypto.c @@ -31,10 +31,7 @@ unsigned int aa_hash_size(void) char *aa_calc_hash(void *data, size_t len) { - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(apparmor_tfm)]; - } desc; + SHASH_DESC_ON_STACK(desc, apparmor_tfm); char *hash = NULL; int error = -ENOMEM; @@ -45,16 +42,16 @@ char *aa_calc_hash(void *data, size_t len) if (!hash) goto fail; - desc.shash.tfm = apparmor_tfm; - desc.shash.flags = 0; + desc->tfm = apparmor_tfm; + desc->flags = 0; - error = crypto_shash_init(&desc.shash); + error = crypto_shash_init(desc); if (error) goto fail; - error = crypto_shash_update(&desc.shash, (u8 *) data, len); + error = crypto_shash_update(desc, (u8 *) data, len); if (error) goto fail; - error = crypto_shash_final(&desc.shash, hash); + error = crypto_shash_final(desc, hash); if (error) goto fail; @@ -69,10 +66,7 @@ char *aa_calc_hash(void *data, size_t len) int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, size_t len) { - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(apparmor_tfm)]; - } desc; + SHASH_DESC_ON_STACK(desc, apparmor_tfm); int error = -ENOMEM; __le32 le32_version = cpu_to_le32(version); @@ -86,19 +80,19 @@ int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start, if (!profile->hash) goto fail; - desc.shash.tfm = apparmor_tfm; - desc.shash.flags = 0; + desc->tfm = apparmor_tfm; + desc->flags = 0; - error = crypto_shash_init(&desc.shash); + error = crypto_shash_init(desc); if (error) goto fail; - error = crypto_shash_update(&desc.shash, (u8 *) &le32_version, 4); + error = crypto_shash_update(desc, (u8 *) &le32_version, 4); if (error) goto fail; - error = crypto_shash_update(&desc.shash, (u8 *) start, len); + error = crypto_shash_update(desc, (u8 *) start, len); if (error) goto fail; - error = crypto_shash_final(&desc.shash, profile->hash); + error = crypto_shash_final(desc, profile->hash); if (error) goto fail; diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 65ff492a98079254b53cf7cf4630d1990d78e03e..550a700563b43ecb6546509bd976998e3e2ac578 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -57,24 +57,13 @@ pr_err_ratelimited("AppArmor: " fmt, ##args) /* Flag indicating whether initialization completed */ -extern int apparmor_initialized __initdata; +extern int apparmor_initialized; /* fn's in lib */ char *aa_split_fqname(char *args, char **ns_name); const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len); void aa_info_message(const char *str); -void *__aa_kvmalloc(size_t size, gfp_t flags); - -static inline void *kvmalloc(size_t size) -{ - return __aa_kvmalloc(size, 0); -} - -static inline void *kvzalloc(size_t size) -{ - return __aa_kvmalloc(size, __GFP_ZERO); -} /** * aa_strneq - compare null terminated @str to a non null terminated substring diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 66475bda6f72212c330e7c6f8febfc930beb4e74..7cd788a9445be4c033e5e7f099ca26de5c678858 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -128,36 +128,6 @@ void aa_info_message(const char *str) printk(KERN_INFO "AppArmor: %s\n", str); } -/** - * __aa_kvmalloc - do allocation preferring kmalloc but falling back to vmalloc - * @size: how many bytes of memory are required - * @flags: the type of memory to allocate (see kmalloc). - * - * Return: allocated buffer or NULL if failed - * - * It is possible that policy being loaded from the user is larger than - * what can be allocated by kmalloc, in those cases fall back to vmalloc. - */ -void *__aa_kvmalloc(size_t size, gfp_t flags) -{ - void *buffer = NULL; - - if (size == 0) - return NULL; - - /* do not attempt kmalloc if we need more than 16 pages at once */ - if (size <= (16*PAGE_SIZE)) - buffer = kmalloc(size, flags | GFP_KERNEL | __GFP_NORETRY | - __GFP_NOWARN); - if (!buffer) { - if (flags & __GFP_ZERO) - buffer = vzalloc(size); - else - buffer = vmalloc(size); - } - return buffer; -} - /** * aa_policy_init - initialize a policy structure * @policy: policy to initialize (NOT NULL) @@ -180,13 +150,13 @@ bool aa_policy_init(struct aa_policy *policy, const char *prefix, } else policy->hname = kstrdup(name, gfp); if (!policy->hname) - return 0; + return false; /* base.name is a substring of fqname */ policy->name = basename(policy->hname); INIT_LIST_HEAD(&policy->list); INIT_LIST_HEAD(&policy->profiles); - return 1; + return true; } /** diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 709eacd23909acfa5438e9173085d91eb1780991..8f3c0f7aca5a21edd2ca68fd5815a847a7de1863 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -39,7 +39,7 @@ #include "include/procattr.h" /* Flag indicating whether initialization completed */ -int apparmor_initialized __initdata; +int apparmor_initialized; DEFINE_PER_CPU(struct aa_buffers, aa_buffers); @@ -587,7 +587,7 @@ static int apparmor_task_setrlimit(struct task_struct *task, return error; } -static struct security_hook_list apparmor_hooks[] = { +static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), LSM_HOOK_INIT(capget, apparmor_capget), @@ -681,7 +681,7 @@ module_param_named(hash_policy, aa_g_hash_policy, aabool, S_IRUSR | S_IWUSR); #endif /* Debug mode */ -bool aa_g_debug = IS_ENABLED(CONFIG_SECURITY_DEBUG_MESSAGES); +bool aa_g_debug = IS_ENABLED(CONFIG_SECURITY_APPARMOR_DEBUG_MESSAGES); module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR); /* Audit mode */ @@ -710,7 +710,7 @@ module_param_named(logsyscall, aa_g_logsyscall, aabool, S_IRUSR | S_IWUSR); /* Maximum pathname length before accesses will start getting rejected */ unsigned int aa_g_path_max = 2 * PATH_MAX; -module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR | S_IWUSR); +module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR); /* Determines how paranoid loading of policy is and how much verification * on the loaded policy is done. @@ -738,78 +738,77 @@ __setup("apparmor=", apparmor_enabled_setup); /* set global flag turning off the ability to load policy */ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp) { - if (!policy_admin_capable(NULL)) + if (!apparmor_enabled) + return -EINVAL; + if (apparmor_initialized && !policy_admin_capable(NULL)) return -EPERM; return param_set_bool(val, kp); } static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp) { - if (!policy_view_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; + if (apparmor_initialized && !policy_view_capable(NULL)) + return -EPERM; return param_get_bool(buffer, kp); } static int param_set_aabool(const char *val, const struct kernel_param *kp) { - if (!policy_admin_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; + if (apparmor_initialized && !policy_admin_capable(NULL)) + return -EPERM; return param_set_bool(val, kp); } static int param_get_aabool(char *buffer, const struct kernel_param *kp) { - if (!policy_view_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; + if (apparmor_initialized && !policy_view_capable(NULL)) + return -EPERM; return param_get_bool(buffer, kp); } static int param_set_aauint(const char *val, const struct kernel_param *kp) { - if (!policy_admin_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; + if (apparmor_initialized && !policy_admin_capable(NULL)) + return -EPERM; return param_set_uint(val, kp); } static int param_get_aauint(char *buffer, const struct kernel_param *kp) { - if (!policy_view_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; + if (apparmor_initialized && !policy_view_capable(NULL)) + return -EPERM; return param_get_uint(buffer, kp); } static int param_get_audit(char *buffer, struct kernel_param *kp) { - if (!policy_view_capable(NULL)) - return -EPERM; - if (!apparmor_enabled) return -EINVAL; - + if (apparmor_initialized && !policy_view_capable(NULL)) + return -EPERM; return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]); } static int param_set_audit(const char *val, struct kernel_param *kp) { int i; - if (!policy_admin_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; - if (!val) return -EINVAL; + if (apparmor_initialized && !policy_admin_capable(NULL)) + return -EPERM; for (i = 0; i < AUDIT_MAX_INDEX; i++) { if (strcmp(val, audit_mode_names[i]) == 0) { @@ -823,11 +822,10 @@ static int param_set_audit(const char *val, struct kernel_param *kp) static int param_get_mode(char *buffer, struct kernel_param *kp) { - if (!policy_view_capable(NULL)) - return -EPERM; - if (!apparmor_enabled) return -EINVAL; + if (apparmor_initialized && !policy_view_capable(NULL)) + return -EPERM; return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]); } @@ -835,14 +833,13 @@ static int param_get_mode(char *buffer, struct kernel_param *kp) static int param_set_mode(const char *val, struct kernel_param *kp) { int i; - if (!policy_admin_capable(NULL)) - return -EPERM; if (!apparmor_enabled) return -EINVAL; - if (!val) return -EINVAL; + if (apparmor_initialized && !policy_admin_capable(NULL)) + return -EPERM; for (i = 0; i < APPARMOR_MODE_NAMES_MAX_INDEX; i++) { if (strcmp(val, aa_profile_mode_names[i]) == 0) { diff --git a/security/apparmor/match.c b/security/apparmor/match.c index eb0efef746f56ff193e4b507c97c22c486ffbce9..960c913381e2b61780d1cd9640199538286c13b7 100644 --- a/security/apparmor/match.c +++ b/security/apparmor/match.c @@ -88,7 +88,7 @@ static struct table_header *unpack_table(char *blob, size_t bsize) if (bsize < tsize) goto out; - table = kvzalloc(tsize); + table = kvzalloc(tsize, GFP_KERNEL); if (table) { table->td_id = th.td_id; table->td_flags = th.td_flags; diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index def1fbd6bdfd8185a8ec1c4ebcf102e5699acf09..cf9d670dca945ea01245df03468d0f99a22262fe 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -876,9 +876,11 @@ ssize_t aa_replace_profiles(struct aa_ns *view, struct aa_profile *profile, if (ns_name) { ns = aa_prepare_ns(view, ns_name); if (IS_ERR(ns)) { + op = OP_PROF_LOAD; info = "failed to prepare namespace"; error = PTR_ERR(ns); ns = NULL; + ent = NULL; goto fail; } } else @@ -1013,7 +1015,7 @@ ssize_t aa_replace_profiles(struct aa_ns *view, struct aa_profile *profile, /* audit cause of failure */ op = (!ent->old) ? OP_PROF_LOAD : OP_PROF_REPL; fail: - audit_policy(profile, op, ns_name, ent->new->base.hname, + audit_policy(profile, op, ns_name, ent ? ent->new->base.hname : NULL, info, error); /* audit status that rest of profiles in the atomic set failed too */ info = "valid profile in failed atomic policy load"; @@ -1023,7 +1025,7 @@ ssize_t aa_replace_profiles(struct aa_ns *view, struct aa_profile *profile, /* skip entry that caused failure */ continue; } - op = (!ent->old) ? OP_PROF_LOAD : OP_PROF_REPL; + op = (!tmp->old) ? OP_PROF_LOAD : OP_PROF_REPL; audit_policy(profile, op, ns_name, tmp->new->base.hname, info, error); } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 2e37c9c26bbd17e0affa3155384979f99953ffa7..f3422a91353c459b3f0a95448aaaff9b6164e1bd 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -487,7 +487,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) static void *kvmemdup(const void *src, size_t len) { - void *p = kvmalloc(len); + void *p = kvmalloc(len, GFP_KERNEL); if (p) memcpy(p, src, len); diff --git a/security/commoncap.c b/security/commoncap.c index 78b37838a2d3e24302578581d32c0a4b0978a946..7abebd782d5e02d0c024ca0b2b1b189b1f4f83d1 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1071,7 +1071,7 @@ int cap_mmap_file(struct file *file, unsigned long reqprot, #ifdef CONFIG_SECURITY -struct security_hook_list capability_hooks[] = { +struct security_hook_list capability_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(capable, cap_capable), LSM_HOOK_INIT(settime, cap_settime), LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check), diff --git a/security/inode.c b/security/inode.c index 2cb14162ff8d5fa2f2ebfdd58061c07f4569f34c..eccd58ef2ae8456af7523414c5405024332e4731 100644 --- a/security/inode.c +++ b/security/inode.c @@ -28,7 +28,7 @@ static int mount_count; static int fill_super(struct super_block *sb, void *data, int silent) { - static struct tree_descr files[] = {{""}}; + static const struct tree_descr files[] = {{""}}; return simple_fill_super(sb, SECURITYFS_MAGIC, files); } diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 106e855e2d9d16933c6a5d6f39a1ead65143c40b..06554c448dce886d3bd7a01a745a78ab5fd734e8 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -81,18 +81,25 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, int __init integrity_init_keyring(const unsigned int id) { const struct cred *cred = current_cred(); + struct key_restriction *restriction; int err = 0; if (!init_keyring) return 0; + restriction = kzalloc(sizeof(struct key_restriction), GFP_KERNEL); + if (!restriction) + return -ENOMEM; + + restriction->check = restrict_link_to_ima; + keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0), KGIDT_INIT(0), cred, ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH), KEY_ALLOC_NOT_IN_QUOTA, - restrict_link_to_ima, NULL); + restriction, NULL); if (IS_ERR(keyring[id])) { err = PTR_ERR(keyring[id]); pr_info("Can't allocate %s keyring (%d)\n", diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 1fd9539a969dce20d292c70885758945be84acdd..5d0785cfe06387f65a53f84f2832007d77a7932c 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -207,10 +207,11 @@ int ima_appraise_measurement(enum ima_hooks func, cause = "missing-hash"; status = INTEGRITY_NOLABEL; - if (opened & FILE_CREATED) { + if (opened & FILE_CREATED) iint->flags |= IMA_NEW_FILE; + if ((iint->flags & IMA_NEW_FILE) && + !(iint->flags & IMA_DIGSIG_REQUIRED)) status = INTEGRITY_PASS; - } goto out; } diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c index 74a2799574642efb845b0eb60ac39c8f19f20b2b..073ddc9bce5ba14ad9af18f2cc4f569043016b98 100644 --- a/security/integrity/ima/ima_mok.c +++ b/security/integrity/ima/ima_mok.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -27,15 +28,23 @@ struct key *ima_blacklist_keyring; */ __init int ima_mok_init(void) { + struct key_restriction *restriction; + pr_notice("Allocating IMA blacklist keyring.\n"); + restriction = kzalloc(sizeof(struct key_restriction), GFP_KERNEL); + if (!restriction) + panic("Can't allocate IMA blacklist restriction."); + + restriction->check = restrict_link_by_builtin_trusted; + ima_blacklist_keyring = keyring_alloc(".ima_blacklist", KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, - restrict_link_by_builtin_trusted, NULL); + restriction, NULL); if (IS_ERR(ima_blacklist_keyring)) panic("Can't allocate IMA blacklist keyring."); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index aed47b777a57fbf495f14fa9038dbfb1759dd33a..3ab1067db624d860f9303a5e3ea8587ebbed699b 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -64,6 +64,8 @@ struct ima_rule_entry { u8 fsuuid[16]; kuid_t uid; kuid_t fowner; + bool (*uid_op)(kuid_t, kuid_t); /* Handlers for operators */ + bool (*fowner_op)(kuid_t, kuid_t); /* uid_eq(), uid_gt(), uid_lt() */ int pcr; struct { void *rule; /* LSM file metadata specific */ @@ -83,7 +85,7 @@ struct ima_rule_entry { * normal users can easily run the machine out of memory simply building * and running executables. */ -static struct ima_rule_entry dont_measure_rules[] = { +static struct ima_rule_entry dont_measure_rules[] __ro_after_init = { {.action = DONT_MEASURE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC}, @@ -97,32 +99,35 @@ static struct ima_rule_entry dont_measure_rules[] = { {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC} }; -static struct ima_rule_entry original_measurement_rules[] = { +static struct ima_rule_entry original_measurement_rules[] __ro_after_init = { {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, - .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + .uid = GLOBAL_ROOT_UID, .uid_op = &uid_eq, + .flags = IMA_FUNC | IMA_MASK | IMA_UID}, {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, }; -static struct ima_rule_entry default_measurement_rules[] = { +static struct ima_rule_entry default_measurement_rules[] __ro_after_init = { {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, - .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_EUID}, + .uid = GLOBAL_ROOT_UID, .uid_op = &uid_eq, + .flags = IMA_FUNC | IMA_INMASK | IMA_EUID}, {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, - .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_UID}, + .uid = GLOBAL_ROOT_UID, .uid_op = &uid_eq, + .flags = IMA_FUNC | IMA_INMASK | IMA_UID}, {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, {.action = MEASURE, .func = POLICY_CHECK, .flags = IMA_FUNC}, }; -static struct ima_rule_entry default_appraise_rules[] = { +static struct ima_rule_entry default_appraise_rules[] __ro_after_init = { {.action = DONT_APPRAISE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC}, @@ -139,10 +144,11 @@ static struct ima_rule_entry default_appraise_rules[] = { .flags = IMA_FUNC | IMA_DIGSIG_REQUIRED}, #endif #ifndef CONFIG_IMA_APPRAISE_SIGNED_INIT - {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .flags = IMA_FOWNER}, + {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .fowner_op = &uid_eq, + .flags = IMA_FOWNER}, #else /* force signature */ - {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, + {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .fowner_op = &uid_eq, .flags = IMA_FOWNER | IMA_DIGSIG_REQUIRED}, #endif }; @@ -240,19 +246,20 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode, if ((rule->flags & IMA_FSUUID) && memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid))) return false; - if ((rule->flags & IMA_UID) && !uid_eq(rule->uid, cred->uid)) + if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid)) return false; if (rule->flags & IMA_EUID) { if (has_capability_noaudit(current, CAP_SETUID)) { - if (!uid_eq(rule->uid, cred->euid) - && !uid_eq(rule->uid, cred->suid) - && !uid_eq(rule->uid, cred->uid)) + if (!rule->uid_op(cred->euid, rule->uid) + && !rule->uid_op(cred->suid, rule->uid) + && !rule->uid_op(cred->uid, rule->uid)) return false; - } else if (!uid_eq(rule->uid, cred->euid)) + } else if (!rule->uid_op(cred->euid, rule->uid)) return false; } - if ((rule->flags & IMA_FOWNER) && !uid_eq(rule->fowner, inode->i_uid)) + if ((rule->flags & IMA_FOWNER) && + !rule->fowner_op(inode->i_uid, rule->fowner)) return false; for (i = 0; i < MAX_LSM_RULES; i++) { int rc = 0; @@ -486,7 +493,9 @@ enum { Opt_obj_user, Opt_obj_role, Opt_obj_type, Opt_subj_user, Opt_subj_role, Opt_subj_type, Opt_func, Opt_mask, Opt_fsmagic, - Opt_fsuuid, Opt_uid, Opt_euid, Opt_fowner, + Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq, + Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt, + Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt, Opt_appraise_type, Opt_permit_directio, Opt_pcr }; @@ -507,9 +516,15 @@ static match_table_t policy_tokens = { {Opt_mask, "mask=%s"}, {Opt_fsmagic, "fsmagic=%s"}, {Opt_fsuuid, "fsuuid=%s"}, - {Opt_uid, "uid=%s"}, - {Opt_euid, "euid=%s"}, - {Opt_fowner, "fowner=%s"}, + {Opt_uid_eq, "uid=%s"}, + {Opt_euid_eq, "euid=%s"}, + {Opt_fowner_eq, "fowner=%s"}, + {Opt_uid_gt, "uid>%s"}, + {Opt_euid_gt, "euid>%s"}, + {Opt_fowner_gt, "fowner>%s"}, + {Opt_uid_lt, "uid<%s"}, + {Opt_euid_lt, "euid<%s"}, + {Opt_fowner_lt, "fowner<%s"}, {Opt_appraise_type, "appraise_type=%s"}, {Opt_permit_directio, "permit_directio"}, {Opt_pcr, "pcr=%s"}, @@ -541,24 +556,37 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, return result; } -static void ima_log_string(struct audit_buffer *ab, char *key, char *value) +static void ima_log_string_op(struct audit_buffer *ab, char *key, char *value, + bool (*rule_operator)(kuid_t, kuid_t)) { - audit_log_format(ab, "%s=", key); + if (rule_operator == &uid_gt) + audit_log_format(ab, "%s>", key); + else if (rule_operator == &uid_lt) + audit_log_format(ab, "%s<", key); + else + audit_log_format(ab, "%s=", key); audit_log_untrustedstring(ab, value); audit_log_format(ab, " "); } +static void ima_log_string(struct audit_buffer *ab, char *key, char *value) +{ + ima_log_string_op(ab, key, value, NULL); +} static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) { struct audit_buffer *ab; char *from; char *p; + bool uid_token; int result = 0; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_INTEGRITY_RULE); entry->uid = INVALID_UID; entry->fowner = INVALID_UID; + entry->uid_op = &uid_eq; + entry->fowner_op = &uid_eq; entry->action = UNKNOWN; while ((p = strsep(&rule, " \t")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -694,11 +722,21 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) if (!result) entry->flags |= IMA_FSUUID; break; - case Opt_uid: - ima_log_string(ab, "uid", args[0].from); - case Opt_euid: - if (token == Opt_euid) - ima_log_string(ab, "euid", args[0].from); + case Opt_uid_gt: + case Opt_euid_gt: + entry->uid_op = &uid_gt; + case Opt_uid_lt: + case Opt_euid_lt: + if ((token == Opt_uid_lt) || (token == Opt_euid_lt)) + entry->uid_op = &uid_lt; + case Opt_uid_eq: + case Opt_euid_eq: + uid_token = (token == Opt_uid_eq) || + (token == Opt_uid_gt) || + (token == Opt_uid_lt); + + ima_log_string_op(ab, uid_token ? "uid" : "euid", + args[0].from, entry->uid_op); if (uid_valid(entry->uid)) { result = -EINVAL; @@ -713,12 +751,18 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) (uid_t)lnum != lnum) result = -EINVAL; else - entry->flags |= (token == Opt_uid) + entry->flags |= uid_token ? IMA_UID : IMA_EUID; } break; - case Opt_fowner: - ima_log_string(ab, "fowner", args[0].from); + case Opt_fowner_gt: + entry->fowner_op = &uid_gt; + case Opt_fowner_lt: + if (token == Opt_fowner_lt) + entry->fowner_op = &uid_lt; + case Opt_fowner_eq: + ima_log_string_op(ab, "fowner", args[0].from, + entry->fowner_op); if (uid_valid(entry->fowner)) { result = -EINVAL; @@ -1049,19 +1093,34 @@ int ima_policy_show(struct seq_file *m, void *v) if (entry->flags & IMA_UID) { snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->uid)); - seq_printf(m, pt(Opt_uid), tbuf); + if (entry->uid_op == &uid_gt) + seq_printf(m, pt(Opt_uid_gt), tbuf); + else if (entry->uid_op == &uid_lt) + seq_printf(m, pt(Opt_uid_lt), tbuf); + else + seq_printf(m, pt(Opt_uid_eq), tbuf); seq_puts(m, " "); } if (entry->flags & IMA_EUID) { snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->uid)); - seq_printf(m, pt(Opt_euid), tbuf); + if (entry->uid_op == &uid_gt) + seq_printf(m, pt(Opt_euid_gt), tbuf); + else if (entry->uid_op == &uid_lt) + seq_printf(m, pt(Opt_euid_lt), tbuf); + else + seq_printf(m, pt(Opt_euid_eq), tbuf); seq_puts(m, " "); } if (entry->flags & IMA_FOWNER) { snprintf(tbuf, sizeof(tbuf), "%d", __kuid_val(entry->fowner)); - seq_printf(m, pt(Opt_fowner), tbuf); + if (entry->fowner_op == &uid_gt) + seq_printf(m, pt(Opt_fowner_gt), tbuf); + else if (entry->fowner_op == &uid_lt) + seq_printf(m, pt(Opt_fowner_lt), tbuf); + else + seq_printf(m, pt(Opt_fowner_eq), tbuf); seq_puts(m, " "); } diff --git a/security/keys/Kconfig b/security/keys/Kconfig index d942c7c2bc0aa0ee471de663d3f15587f12a1732..a7a23b5541f85a4994e0cc83d7d132e8a5681938 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -20,6 +20,10 @@ config KEYS If you are unsure as to whether this is required, answer N. +config KEYS_COMPAT + def_bool y + depends on COMPAT && KEYS + config PERSISTENT_KEYRINGS bool "Enable register of persistent per-UID keyrings" depends on KEYS @@ -89,7 +93,9 @@ config ENCRYPTED_KEYS config KEY_DH_OPERATIONS bool "Diffie-Hellman operations on retained keys" depends on KEYS - select MPILIB + select CRYPTO + select CRYPTO_HASH + select CRYPTO_DH help This option provides support for calculating Diffie-Hellman public keys and shared secrets using values stored as keys diff --git a/security/keys/Makefile b/security/keys/Makefile index 1fd4a16e6dafb6779f770690319918ad4341b9c7..57dff0c15809081c8ffd292af73e8b47a26d7fd7 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -15,7 +15,8 @@ obj-y := \ request_key.o \ request_key_auth.o \ user_defined.o -obj-$(CONFIG_KEYS_COMPAT) += compat.o +compat-obj-$(CONFIG_KEY_DH_OPERATIONS) += compat_dh.o +obj-$(CONFIG_KEYS_COMPAT) += compat.o $(compat-obj-y) obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o diff --git a/security/keys/compat.c b/security/keys/compat.c index 36c80bf5b89c6f28a7c6010aa6b72f373bf4d1e1..e87c89c0177c1559e9cc5e8874b4231de1dc3437 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -133,8 +133,13 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, return keyctl_get_persistent(arg2, arg3); case KEYCTL_DH_COMPUTE: - return keyctl_dh_compute(compat_ptr(arg2), compat_ptr(arg3), - arg4, compat_ptr(arg5)); + return compat_keyctl_dh_compute(compat_ptr(arg2), + compat_ptr(arg3), + arg4, compat_ptr(arg5)); + + case KEYCTL_RESTRICT_KEYRING: + return keyctl_restrict_keyring(arg2, compat_ptr(arg3), + compat_ptr(arg4)); default: return -EOPNOTSUPP; diff --git a/security/keys/compat_dh.c b/security/keys/compat_dh.c new file mode 100644 index 0000000000000000000000000000000000000000..a6a659b6bcb6d1d92daa0ddb5c207a3c399857db --- /dev/null +++ b/security/keys/compat_dh.c @@ -0,0 +1,38 @@ +/* 32-bit compatibility syscall for 64-bit systems for DH operations + * + * Copyright (C) 2016 Stephan Mueller + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include "internal.h" + +/* + * Perform the DH computation or DH based key derivation. + * + * If successful, 0 will be returned. + */ +long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct compat_keyctl_kdf_params __user *kdf) +{ + struct keyctl_kdf_params kdfcopy; + struct compat_keyctl_kdf_params compat_kdfcopy; + + if (!kdf) + return __keyctl_dh_compute(params, buffer, buflen, NULL); + + if (copy_from_user(&compat_kdfcopy, kdf, sizeof(compat_kdfcopy)) != 0) + return -EFAULT; + + kdfcopy.hashname = compat_ptr(compat_kdfcopy.hashname); + kdfcopy.otherinfo = compat_ptr(compat_kdfcopy.otherinfo); + kdfcopy.otherinfolen = compat_kdfcopy.otherinfolen; + + return __keyctl_dh_compute(params, buffer, buflen, &kdfcopy); +} diff --git a/security/keys/dh.c b/security/keys/dh.c index 893af4c450382ae45085d9efbfb1f3134a1b9479..4755d4b4f94544236cd2b8cca1b2169b895579b7 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -8,32 +8,17 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include +#include +#include +#include +#include +#include #include #include "internal.h" -/* - * Public key or shared secret generation function [RFC2631 sec 2.1.1] - * - * ya = g^xa mod p; - * or - * ZZ = yb^xa mod p; - * - * where xa is the local private key, ya is the local public key, g is - * the generator, p is the prime, yb is the remote public key, and ZZ - * is the shared secret. - * - * Both are the same calculation, so g or yb are the "base" and ya or - * ZZ are the "result". - */ -static int do_dh(MPI result, MPI base, MPI xa, MPI p) -{ - return mpi_powm(result, base, xa, p); -} - -static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi) +static ssize_t dh_data_from_key(key_serial_t keyid, void **data) { struct key *key; key_ref_t key_ref; @@ -54,19 +39,17 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi) status = key_validate(key); if (status == 0) { const struct user_key_payload *payload; + uint8_t *duplicate; payload = user_key_payload_locked(key); - if (maxlen == 0) { - *mpi = NULL; + duplicate = kmemdup(payload->data, payload->datalen, + GFP_KERNEL); + if (duplicate) { + *data = duplicate; ret = payload->datalen; - } else if (payload->datalen <= maxlen) { - *mpi = mpi_read_raw_data(payload->data, - payload->datalen); - if (*mpi) - ret = payload->datalen; } else { - ret = -EINVAL; + ret = -ENOMEM; } } up_read(&key->sem); @@ -77,90 +60,371 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi) return ret; } -long keyctl_dh_compute(struct keyctl_dh_params __user *params, - char __user *buffer, size_t buflen, - void __user *reserved) +static void dh_free_data(struct dh *dh) +{ + kzfree(dh->key); + kzfree(dh->p); + kzfree(dh->g); +} + +struct dh_completion { + struct completion completion; + int err; +}; + +static void dh_crypto_done(struct crypto_async_request *req, int err) +{ + struct dh_completion *compl = req->data; + + if (err == -EINPROGRESS) + return; + + compl->err = err; + complete(&compl->completion); +} + +struct kdf_sdesc { + struct shash_desc shash; + char ctx[]; +}; + +static int kdf_alloc(struct kdf_sdesc **sdesc_ret, char *hashname) +{ + struct crypto_shash *tfm; + struct kdf_sdesc *sdesc; + int size; + int err; + + /* allocate synchronous hash */ + tfm = crypto_alloc_shash(hashname, 0, 0); + if (IS_ERR(tfm)) { + pr_info("could not allocate digest TFM handle %s\n", hashname); + return PTR_ERR(tfm); + } + + err = -EINVAL; + if (crypto_shash_digestsize(tfm) == 0) + goto out_free_tfm; + + err = -ENOMEM; + size = sizeof(struct shash_desc) + crypto_shash_descsize(tfm); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + goto out_free_tfm; + sdesc->shash.tfm = tfm; + sdesc->shash.flags = 0x0; + + *sdesc_ret = sdesc; + + return 0; + +out_free_tfm: + crypto_free_shash(tfm); + return err; +} + +static void kdf_dealloc(struct kdf_sdesc *sdesc) +{ + if (!sdesc) + return; + + if (sdesc->shash.tfm) + crypto_free_shash(sdesc->shash.tfm); + + kzfree(sdesc); +} + +/* + * Implementation of the KDF in counter mode according to SP800-108 section 5.1 + * as well as SP800-56A section 5.8.1 (Single-step KDF). + * + * SP800-56A: + * The src pointer is defined as Z || other info where Z is the shared secret + * from DH and other info is an arbitrary string (see SP800-56A section + * 5.8.1.2). + */ +static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen, unsigned int zlen) +{ + struct shash_desc *desc = &sdesc->shash; + unsigned int h = crypto_shash_digestsize(desc->tfm); + int err = 0; + u8 *dst_orig = dst; + __be32 counter = cpu_to_be32(1); + + while (dlen) { + err = crypto_shash_init(desc); + if (err) + goto err; + + err = crypto_shash_update(desc, (u8 *)&counter, sizeof(__be32)); + if (err) + goto err; + + if (zlen && h) { + u8 tmpbuffer[h]; + size_t chunk = min_t(size_t, zlen, h); + memset(tmpbuffer, 0, chunk); + + do { + err = crypto_shash_update(desc, tmpbuffer, + chunk); + if (err) + goto err; + + zlen -= chunk; + chunk = min_t(size_t, zlen, h); + } while (zlen); + } + + if (src && slen) { + err = crypto_shash_update(desc, src, slen); + if (err) + goto err; + } + + if (dlen < h) { + u8 tmpbuffer[h]; + + err = crypto_shash_final(desc, tmpbuffer); + if (err) + goto err; + memcpy(dst, tmpbuffer, dlen); + memzero_explicit(tmpbuffer, h); + return 0; + } else { + err = crypto_shash_final(desc, dst); + if (err) + goto err; + + dlen -= h; + dst += h; + counter = cpu_to_be32(be32_to_cpu(counter) + 1); + } + } + + return 0; + +err: + memzero_explicit(dst_orig, dlen); + return err; +} + +static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc, + char __user *buffer, size_t buflen, + uint8_t *kbuf, size_t kbuflen, size_t lzero) +{ + uint8_t *outbuf = NULL; + int ret; + + outbuf = kmalloc(buflen, GFP_KERNEL); + if (!outbuf) { + ret = -ENOMEM; + goto err; + } + + ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen, lzero); + if (ret) + goto err; + + ret = buflen; + if (copy_to_user(buffer, outbuf, buflen) != 0) + ret = -EFAULT; + +err: + kzfree(outbuf); + return ret; +} + +long __keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct keyctl_kdf_params *kdfcopy) { long ret; - MPI base, private, prime, result; - unsigned nbytes; + ssize_t dlen; + int secretlen; + int outlen; struct keyctl_dh_params pcopy; - uint8_t *kbuf; - ssize_t keylen; - size_t resultlen; + struct dh dh_inputs; + struct scatterlist outsg; + struct dh_completion compl; + struct crypto_kpp *tfm; + struct kpp_request *req; + uint8_t *secret; + uint8_t *outbuf; + struct kdf_sdesc *sdesc = NULL; if (!params || (!buffer && buflen)) { ret = -EINVAL; - goto out; + goto out1; } if (copy_from_user(&pcopy, params, sizeof(pcopy)) != 0) { ret = -EFAULT; - goto out; + goto out1; } - if (reserved) { - ret = -EINVAL; - goto out; + if (kdfcopy) { + char *hashname; + + if (buflen > KEYCTL_KDF_MAX_OUTPUT_LEN || + kdfcopy->otherinfolen > KEYCTL_KDF_MAX_OI_LEN) { + ret = -EMSGSIZE; + goto out1; + } + + /* get KDF name string */ + hashname = strndup_user(kdfcopy->hashname, CRYPTO_MAX_ALG_NAME); + if (IS_ERR(hashname)) { + ret = PTR_ERR(hashname); + goto out1; + } + + /* allocate KDF from the kernel crypto API */ + ret = kdf_alloc(&sdesc, hashname); + kfree(hashname); + if (ret) + goto out1; } - keylen = mpi_from_key(pcopy.prime, buflen, &prime); - if (keylen < 0 || !prime) { - /* buflen == 0 may be used to query the required buffer size, - * which is the prime key length. - */ - ret = keylen; - goto out; + memset(&dh_inputs, 0, sizeof(dh_inputs)); + + dlen = dh_data_from_key(pcopy.prime, &dh_inputs.p); + if (dlen < 0) { + ret = dlen; + goto out1; } + dh_inputs.p_size = dlen; - /* The result is never longer than the prime */ - resultlen = keylen; + dlen = dh_data_from_key(pcopy.base, &dh_inputs.g); + if (dlen < 0) { + ret = dlen; + goto out2; + } + dh_inputs.g_size = dlen; - keylen = mpi_from_key(pcopy.base, SIZE_MAX, &base); - if (keylen < 0 || !base) { - ret = keylen; - goto error1; + dlen = dh_data_from_key(pcopy.private, &dh_inputs.key); + if (dlen < 0) { + ret = dlen; + goto out2; } + dh_inputs.key_size = dlen; - keylen = mpi_from_key(pcopy.private, SIZE_MAX, &private); - if (keylen < 0 || !private) { - ret = keylen; - goto error2; + secretlen = crypto_dh_key_len(&dh_inputs); + secret = kmalloc(secretlen, GFP_KERNEL); + if (!secret) { + ret = -ENOMEM; + goto out2; + } + ret = crypto_dh_encode_key(secret, secretlen, &dh_inputs); + if (ret) + goto out3; + + tfm = crypto_alloc_kpp("dh", CRYPTO_ALG_TYPE_KPP, 0); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + goto out3; } - result = mpi_alloc(0); - if (!result) { + ret = crypto_kpp_set_secret(tfm, secret, secretlen); + if (ret) + goto out4; + + outlen = crypto_kpp_maxsize(tfm); + + if (!kdfcopy) { + /* + * When not using a KDF, buflen 0 is used to read the + * required buffer length + */ + if (buflen == 0) { + ret = outlen; + goto out4; + } else if (outlen > buflen) { + ret = -EOVERFLOW; + goto out4; + } + } + + outbuf = kzalloc(kdfcopy ? (outlen + kdfcopy->otherinfolen) : outlen, + GFP_KERNEL); + if (!outbuf) { ret = -ENOMEM; - goto error3; + goto out4; } - kbuf = kmalloc(resultlen, GFP_KERNEL); - if (!kbuf) { + sg_init_one(&outsg, outbuf, outlen); + + req = kpp_request_alloc(tfm, GFP_KERNEL); + if (!req) { ret = -ENOMEM; - goto error4; + goto out5; } - ret = do_dh(result, base, private, prime); - if (ret) - goto error5; + kpp_request_set_input(req, NULL, 0); + kpp_request_set_output(req, &outsg, outlen); + init_completion(&compl.completion); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + dh_crypto_done, &compl); - ret = mpi_read_buffer(result, kbuf, resultlen, &nbytes, NULL); - if (ret != 0) - goto error5; + /* + * For DH, generate_public_key and generate_shared_secret are + * the same calculation + */ + ret = crypto_kpp_generate_public_key(req); + if (ret == -EINPROGRESS) { + wait_for_completion(&compl.completion); + ret = compl.err; + if (ret) + goto out6; + } - ret = nbytes; - if (copy_to_user(buffer, kbuf, nbytes) != 0) + if (kdfcopy) { + /* + * Concatenate SP800-56A otherinfo past DH shared secret -- the + * input to the KDF is (DH shared secret || otherinfo) + */ + if (copy_from_user(outbuf + req->dst_len, kdfcopy->otherinfo, + kdfcopy->otherinfolen) != 0) { + ret = -EFAULT; + goto out6; + } + + ret = keyctl_dh_compute_kdf(sdesc, buffer, buflen, outbuf, + req->dst_len + kdfcopy->otherinfolen, + outlen - req->dst_len); + } else if (copy_to_user(buffer, outbuf, req->dst_len) == 0) { + ret = req->dst_len; + } else { ret = -EFAULT; + } -error5: - kfree(kbuf); -error4: - mpi_free(result); -error3: - mpi_free(private); -error2: - mpi_free(base); -error1: - mpi_free(prime); -out: +out6: + kpp_request_free(req); +out5: + kzfree(outbuf); +out4: + crypto_free_kpp(tfm); +out3: + kzfree(secret); +out2: + dh_free_data(&dh_inputs); +out1: + kdf_dealloc(sdesc); return ret; } + +long keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct keyctl_kdf_params __user *kdf) +{ + struct keyctl_kdf_params kdfcopy; + + if (!kdf) + return __keyctl_dh_compute(params, buffer, buflen, NULL); + + if (copy_from_user(&kdfcopy, kdf, sizeof(kdfcopy)) != 0) + return -EFAULT; + + return __keyctl_dh_compute(params, buffer, buflen, &kdfcopy); +} diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 0010955d7876c2302704020af8b2ffb6010f2c0a..bb6324d1ccec32f6dde05f520d8d2ed2e089c785 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -54,13 +55,7 @@ static int blksize; #define MAX_DATA_SIZE 4096 #define MIN_DATA_SIZE 20 -struct sdesc { - struct shash_desc shash; - char ctx[]; -}; - -static struct crypto_shash *hashalg; -static struct crypto_shash *hmacalg; +static struct crypto_shash *hash_tfm; enum { Opt_err = -1, Opt_new, Opt_load, Opt_update @@ -141,23 +136,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc) */ static int valid_master_desc(const char *new_desc, const char *orig_desc) { - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN)) - goto out; - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { - if (strlen(new_desc) == KEY_USER_PREFIX_LEN) - goto out; - if (orig_desc) - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN)) - goto out; - } else - goto out; + int prefix_len; + + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) + prefix_len = KEY_TRUSTED_PREFIX_LEN; + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) + prefix_len = KEY_USER_PREFIX_LEN; + else + return -EINVAL; + + if (!new_desc[prefix_len]) + return -EINVAL; + + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) + return -EINVAL; + return 0; -out: - return -EINVAL; } /* @@ -321,53 +315,38 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k return ukey; } -static struct sdesc *alloc_sdesc(struct crypto_shash *alg) -{ - struct sdesc *sdesc; - int size; - - size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); - sdesc = kmalloc(size, GFP_KERNEL); - if (!sdesc) - return ERR_PTR(-ENOMEM); - sdesc->shash.tfm = alg; - sdesc->shash.flags = 0x0; - return sdesc; -} - -static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen, +static int calc_hash(struct crypto_shash *tfm, u8 *digest, const u8 *buf, unsigned int buflen) { - struct sdesc *sdesc; - int ret; + SHASH_DESC_ON_STACK(desc, tfm); + int err; - sdesc = alloc_sdesc(hmacalg); - if (IS_ERR(sdesc)) { - pr_info("encrypted_key: can't alloc %s\n", hmac_alg); - return PTR_ERR(sdesc); - } + desc->tfm = tfm; + desc->flags = 0; - ret = crypto_shash_setkey(hmacalg, key, keylen); - if (!ret) - ret = crypto_shash_digest(&sdesc->shash, buf, buflen, digest); - kfree(sdesc); - return ret; + err = crypto_shash_digest(desc, buf, buflen, digest); + shash_desc_zero(desc); + return err; } -static int calc_hash(u8 *digest, const u8 *buf, unsigned int buflen) +static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen, + const u8 *buf, unsigned int buflen) { - struct sdesc *sdesc; - int ret; + struct crypto_shash *tfm; + int err; - sdesc = alloc_sdesc(hashalg); - if (IS_ERR(sdesc)) { - pr_info("encrypted_key: can't alloc %s\n", hash_alg); - return PTR_ERR(sdesc); + tfm = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + pr_err("encrypted_key: can't alloc %s transform: %ld\n", + hmac_alg, PTR_ERR(tfm)); + return PTR_ERR(tfm); } - ret = crypto_shash_digest(&sdesc->shash, buf, buflen, digest); - kfree(sdesc); - return ret; + err = crypto_shash_setkey(tfm, key, keylen); + if (!err) + err = calc_hash(tfm, digest, buf, buflen); + crypto_free_shash(tfm); + return err; } enum derived_key_type { ENC_KEY, AUTH_KEY }; @@ -385,10 +364,9 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type, derived_buf_len = HASH_SIZE; derived_buf = kzalloc(derived_buf_len, GFP_KERNEL); - if (!derived_buf) { - pr_err("encrypted_key: out of memory\n"); + if (!derived_buf) return -ENOMEM; - } + if (key_type) strcpy(derived_buf, "AUTH_KEY"); else @@ -396,8 +374,8 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type, memcpy(derived_buf + strlen(derived_buf) + 1, master_key, master_keylen); - ret = calc_hash(derived_key, derived_buf, derived_buf_len); - kfree(derived_buf); + ret = calc_hash(hash_tfm, derived_key, derived_buf, derived_buf_len); + kzfree(derived_buf); return ret; } @@ -480,12 +458,9 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload, struct skcipher_request *req; unsigned int encrypted_datalen; u8 iv[AES_BLOCK_SIZE]; - unsigned int padlen; - char pad[16]; int ret; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); - padlen = encrypted_datalen - epayload->decrypted_datalen; req = init_skcipher_req(derived_key, derived_keylen); ret = PTR_ERR(req); @@ -493,11 +468,10 @@ static int derived_key_encrypt(struct encrypted_key_payload *epayload, goto out; dump_decrypted_data(epayload); - memset(pad, 0, sizeof pad); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], epayload->decrypted_data, epayload->decrypted_datalen); - sg_set_buf(&sg_in[1], pad, padlen); + sg_set_page(&sg_in[1], ZERO_PAGE(0), AES_BLOCK_SIZE, 0); sg_init_table(sg_out, 1); sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen); @@ -533,6 +507,7 @@ static int datablob_hmac_append(struct encrypted_key_payload *epayload, if (!ret) dump_hmac(NULL, digest, HASH_SIZE); out: + memzero_explicit(derived_key, sizeof(derived_key)); return ret; } @@ -561,8 +536,8 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload, ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len); if (ret < 0) goto out; - ret = memcmp(digest, epayload->format + epayload->datablob_len, - sizeof digest); + ret = crypto_memneq(digest, epayload->format + epayload->datablob_len, + sizeof(digest)); if (ret) { ret = -EINVAL; dump_hmac("datablob", @@ -571,6 +546,7 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload, dump_hmac("calc", digest, HASH_SIZE); } out: + memzero_explicit(derived_key, sizeof(derived_key)); return ret; } @@ -584,9 +560,14 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, struct skcipher_request *req; unsigned int encrypted_datalen; u8 iv[AES_BLOCK_SIZE]; - char pad[16]; + u8 *pad; int ret; + /* Throwaway buffer to hold the unused zero padding at the end */ + pad = kmalloc(AES_BLOCK_SIZE, GFP_KERNEL); + if (!pad) + return -ENOMEM; + encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); req = init_skcipher_req(derived_key, derived_keylen); ret = PTR_ERR(req); @@ -594,13 +575,12 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, goto out; dump_encrypted_data(epayload, encrypted_datalen); - memset(pad, 0, sizeof pad); sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); sg_set_buf(sg_in, epayload->encrypted_data, encrypted_datalen); sg_set_buf(&sg_out[0], epayload->decrypted_data, epayload->decrypted_datalen); - sg_set_buf(&sg_out[1], pad, sizeof pad); + sg_set_buf(&sg_out[1], pad, AES_BLOCK_SIZE); memcpy(iv, epayload->iv, sizeof(iv)); skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); @@ -612,6 +592,7 @@ static int derived_key_decrypt(struct encrypted_key_payload *epayload, goto out; dump_decrypted_data(epayload); out: + kfree(pad); return ret; } @@ -722,6 +703,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload, out: up_read(&mkey->sem); key_put(mkey); + memzero_explicit(derived_key, sizeof(derived_key)); return ret; } @@ -828,13 +810,13 @@ static int encrypted_instantiate(struct key *key, ret = encrypted_init(epayload, key->description, format, master_desc, decrypted_datalen, hex_encoded_iv); if (ret < 0) { - kfree(epayload); + kzfree(epayload); goto out; } rcu_assign_keypointer(key, epayload); out: - kfree(datablob); + kzfree(datablob); return ret; } @@ -843,8 +825,7 @@ static void encrypted_rcu_free(struct rcu_head *rcu) struct encrypted_key_payload *epayload; epayload = container_of(rcu, struct encrypted_key_payload, rcu); - memset(epayload->decrypted_data, 0, epayload->decrypted_datalen); - kfree(epayload); + kzfree(epayload); } /* @@ -902,7 +883,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) rcu_assign_keypointer(key, new_epayload); call_rcu(&epayload->rcu, encrypted_rcu_free); out: - kfree(buf); + kzfree(buf); return ret; } @@ -960,33 +941,26 @@ static long encrypted_read(const struct key *key, char __user *buffer, up_read(&mkey->sem); key_put(mkey); + memzero_explicit(derived_key, sizeof(derived_key)); if (copy_to_user(buffer, ascii_buf, asciiblob_len) != 0) ret = -EFAULT; - kfree(ascii_buf); + kzfree(ascii_buf); return asciiblob_len; out: up_read(&mkey->sem); key_put(mkey); + memzero_explicit(derived_key, sizeof(derived_key)); return ret; } /* - * encrypted_destroy - before freeing the key, clear the decrypted data - * - * Before freeing the key, clear the memory containing the decrypted - * key data. + * encrypted_destroy - clear and free the key's payload */ static void encrypted_destroy(struct key *key) { - struct encrypted_key_payload *epayload = key->payload.data[0]; - - if (!epayload) - return; - - memzero_explicit(epayload->decrypted_data, epayload->decrypted_datalen); - kfree(key->payload.data[0]); + kzfree(key->payload.data[0]); } struct key_type key_type_encrypted = { @@ -999,47 +973,17 @@ struct key_type key_type_encrypted = { }; EXPORT_SYMBOL_GPL(key_type_encrypted); -static void encrypted_shash_release(void) -{ - if (hashalg) - crypto_free_shash(hashalg); - if (hmacalg) - crypto_free_shash(hmacalg); -} - -static int __init encrypted_shash_alloc(void) +static int __init init_encrypted(void) { int ret; - hmacalg = crypto_alloc_shash(hmac_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hmacalg)) { - pr_info("encrypted_key: could not allocate crypto %s\n", - hmac_alg); - return PTR_ERR(hmacalg); - } - - hashalg = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hashalg)) { - pr_info("encrypted_key: could not allocate crypto %s\n", - hash_alg); - ret = PTR_ERR(hashalg); - goto hashalg_fail; + hash_tfm = crypto_alloc_shash(hash_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(hash_tfm)) { + pr_err("encrypted_key: can't allocate %s transform: %ld\n", + hash_alg, PTR_ERR(hash_tfm)); + return PTR_ERR(hash_tfm); } - return 0; - -hashalg_fail: - crypto_free_shash(hmacalg); - return ret; -} - -static int __init init_encrypted(void) -{ - int ret; - - ret = encrypted_shash_alloc(); - if (ret < 0) - return ret; ret = aes_get_sizes(); if (ret < 0) goto out; @@ -1048,14 +992,14 @@ static int __init init_encrypted(void) goto out; return 0; out: - encrypted_shash_release(); + crypto_free_shash(hash_tfm); return ret; } static void __exit cleanup_encrypted(void) { - encrypted_shash_release(); + crypto_free_shash(hash_tfm); unregister_key_type(&key_type_encrypted); } diff --git a/security/keys/gc.c b/security/keys/gc.c index addf060399e09547307d9c023f36d8dbf869a931..87cb260e4890f3ac464e8d3f3244077653510b74 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -46,7 +46,7 @@ static unsigned long key_gc_flags; * immediately unlinked. */ struct key_type key_type_dead = { - .name = "dead", + .name = ".dead", }; /* @@ -158,9 +158,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kfree(key->description); -#ifdef KEY_DEBUGGING - key->magic = KEY_DEBUG_MAGIC_X; -#endif + memzero_explicit(key, sizeof(*key)); kmem_cache_free(key_jar, key); } } @@ -220,7 +218,7 @@ static void key_garbage_collector(struct work_struct *work) key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (atomic_read(&key->usage) == 0) + if (refcount_read(&key->usage) == 0) goto found_unreferenced_key; if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) { @@ -229,6 +227,9 @@ static void key_garbage_collector(struct work_struct *work) set_bit(KEY_FLAG_DEAD, &key->flags); key->perm = 0; goto skip_dead_key; + } else if (key->type == &key_type_keyring && + key->restrict_link) { + goto found_restricted_keyring; } } @@ -334,6 +335,14 @@ static void key_garbage_collector(struct work_struct *work) gc_state |= KEY_GC_REAP_AGAIN; goto maybe_resched; + /* We found a restricted keyring and need to update the restriction if + * it is associated with the dead key type. + */ +found_restricted_keyring: + spin_unlock(&key_serial_lock); + keyring_restriction_gc(key, key_gc_dead_keytype); + goto maybe_resched; + /* We found a keyring and we need to check the payload for links to * dead or expired keys. We don't flag another reap immediately as we * have to wait for the old payload to be destroyed by RCU before we diff --git a/security/keys/internal.h b/security/keys/internal.h index a2f4c0abb8d847325465131e7a4e219489b441e6..c0f8682eba69a5d89af203f4979df65102b95909 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -17,6 +17,8 @@ #include #include #include +#include +#include struct iovec; @@ -53,7 +55,7 @@ struct key_user { struct rb_node node; struct mutex cons_lock; /* construction initiation lock */ spinlock_t lock; - atomic_t usage; /* for accessing qnkeys & qnbytes */ + refcount_t usage; /* for accessing qnkeys & qnbytes */ atomic_t nkeys; /* number of keys */ atomic_t nikeys; /* number of instantiated keys */ kuid_t uid; @@ -167,6 +169,8 @@ extern void key_change_session_keyring(struct callback_head *twork); extern struct work_struct key_gc_work; extern unsigned key_gc_delay; extern void keyring_gc(struct key *keyring, time_t limit); +extern void keyring_restriction_gc(struct key *keyring, + struct key_type *dead_type); extern void key_schedule_gc(time_t gc_at); extern void key_schedule_gc_links(void); extern void key_gc_keytype(struct key_type *ktype); @@ -249,6 +253,9 @@ struct iov_iter; extern long keyctl_instantiate_key_common(key_serial_t, struct iov_iter *, key_serial_t); +extern long keyctl_restrict_keyring(key_serial_t id, + const char __user *_type, + const char __user *_restriction); #ifdef CONFIG_PERSISTENT_KEYRINGS extern long keyctl_get_persistent(uid_t, key_serial_t); extern unsigned persistent_keyring_expiry; @@ -261,15 +268,34 @@ static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring) #ifdef CONFIG_KEY_DH_OPERATIONS extern long keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *, - size_t, void __user *); + size_t, struct keyctl_kdf_params __user *); +extern long __keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *, + size_t, struct keyctl_kdf_params *); +#ifdef CONFIG_KEYS_COMPAT +extern long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct compat_keyctl_kdf_params __user *kdf); +#endif +#define KEYCTL_KDF_MAX_OUTPUT_LEN 1024 /* max length of KDF output */ +#define KEYCTL_KDF_MAX_OI_LEN 64 /* max length of otherinfo */ #else static inline long keyctl_dh_compute(struct keyctl_dh_params __user *params, char __user *buffer, size_t buflen, - void __user *reserved) + struct keyctl_kdf_params __user *kdf) +{ + return -EOPNOTSUPP; +} + +#ifdef CONFIG_KEYS_COMPAT +static inline long compat_keyctl_dh_compute( + struct keyctl_dh_params __user *params, + char __user *buffer, size_t buflen, + struct keyctl_kdf_params __user *kdf) { return -EOPNOTSUPP; } #endif +#endif /* * Debugging key validation diff --git a/security/keys/key.c b/security/keys/key.c index 346fbf201c22e37e67dc58b5684c868e79ad9af5..83da68d98b40b452a1c8b37121a6ca270387d4f6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -93,7 +93,7 @@ struct key_user *key_user_lookup(kuid_t uid) /* if we get here, then the user record still hadn't appeared on the * second pass - so we use the candidate record */ - atomic_set(&candidate->usage, 1); + refcount_set(&candidate->usage, 1); atomic_set(&candidate->nkeys, 0); atomic_set(&candidate->nikeys, 0); candidate->uid = uid; @@ -110,7 +110,7 @@ struct key_user *key_user_lookup(kuid_t uid) /* okay - we found a user record for this UID */ found: - atomic_inc(&user->usage); + refcount_inc(&user->usage); spin_unlock(&key_user_lock); kfree(candidate); out: @@ -122,7 +122,7 @@ struct key_user *key_user_lookup(kuid_t uid) */ void key_user_put(struct key_user *user) { - if (atomic_dec_and_lock(&user->usage, &key_user_lock)) { + if (refcount_dec_and_lock(&user->usage, &key_user_lock)) { rb_erase(&user->node, &key_user_tree); spin_unlock(&key_user_lock); @@ -201,12 +201,15 @@ static inline void key_alloc_serial(struct key *key) * @cred: The credentials specifying UID namespace. * @perm: The permissions mask of the new key. * @flags: Flags specifying quota properties. - * @restrict_link: Optional link restriction method for new keyrings. + * @restrict_link: Optional link restriction for new keyrings. * * Allocate a key of the specified type with the attributes given. The key is * returned in an uninstantiated state and the caller needs to instantiate the * key before returning. * + * The restrict_link structure (if not NULL) will be freed when the + * keyring is destroyed, so it must be dynamically allocated. + * * The user's key count quota is updated to reflect the creation of the key and * the user's key data quota has the default for the key type reserved. The * instantiation function should amend this as necessary. If insufficient @@ -225,9 +228,7 @@ static inline void key_alloc_serial(struct key *key) struct key *key_alloc(struct key_type *type, const char *desc, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *)) + struct key_restriction *restrict_link) { struct key_user *user = NULL; struct key *key; @@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, if (!key->index_key.description) goto no_memory_3; - atomic_set(&key->usage, 1); + refcount_set(&key->usage, 1); init_rwsem(&key->sem); lockdep_set_class(&key->sem, &type->lock_class); key->index_key.type = type; @@ -499,19 +500,23 @@ int key_instantiate_and_link(struct key *key, } if (keyring) { - if (keyring->restrict_link) { - ret = keyring->restrict_link(keyring, key->type, - &prep.payload); - if (ret < 0) - goto error; - } ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error; + + if (keyring->restrict_link && keyring->restrict_link->check) { + struct key_restriction *keyres = keyring->restrict_link; + + ret = keyres->check(keyring, key->type, &prep.payload, + keyres->key); + if (ret < 0) + goto error_link_end; + } } ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit); +error_link_end: if (keyring) __key_link_end(keyring, &key->index_key, edit); @@ -621,7 +626,7 @@ void key_put(struct key *key) if (key) { key_check(key); - if (atomic_dec_and_test(&key->usage)) + if (refcount_dec_and_test(&key->usage)) schedule_work(&key_gc_work); } } @@ -655,14 +660,11 @@ struct key *key_lookup(key_serial_t id) goto error; found: - /* pretend it doesn't exist if it is awaiting deletion */ - if (atomic_read(&key->usage) == 0) - goto not_found; - - /* this races with key_put(), but that doesn't matter since key_put() - * doesn't actually change the key + /* A key is allowed to be looked up only if someone still owns a + * reference to it - otherwise it's awaiting the gc. */ - __key_get(key); + if (!refcount_inc_not_zero(&key->usage)) + goto not_found; error: spin_unlock(&key_serial_lock); @@ -806,9 +808,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, struct key *keyring, *key = NULL; key_ref_t key_ref; int ret; - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *) = NULL; + struct key_restriction *restrict_link = NULL; /* look up the key type to see if it's one of the registered kernel * types */ @@ -854,20 +854,21 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } index_key.desc_len = strlen(index_key.description); - if (restrict_link) { - ret = restrict_link(keyring, index_key.type, &prep.payload); - if (ret < 0) { - key_ref = ERR_PTR(ret); - goto error_free_prep; - } - } - ret = __key_link_begin(keyring, &index_key, &edit); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_free_prep; } + if (restrict_link && restrict_link->check) { + ret = restrict_link->check(keyring, index_key.type, + &prep.payload, restrict_link->key); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_link_end; + } + } + /* if we're going to allocate a new key, we're going to have * to modify the keyring */ ret = key_permission(keyring_ref, KEY_NEED_WRITE); @@ -962,12 +963,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) /* the key must be writable */ ret = key_permission(key_ref, KEY_NEED_WRITE); if (ret < 0) - goto error; + return ret; /* attempt to update it if supported */ - ret = -EOPNOTSUPP; if (!key->type->update) - goto error; + return -EOPNOTSUPP; memset(&prep, 0, sizeof(prep)); prep.data = payload; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 52c34532c78562643fce84832a5b536baf84988b..ab0b337c84b4c02e4856719398edb94d1caf101a 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -99,16 +99,11 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, /* pull the payload in if one was supplied */ payload = NULL; - if (_payload) { + if (plen) { ret = -ENOMEM; - payload = kmalloc(plen, GFP_KERNEL | __GFP_NOWARN); - if (!payload) { - if (plen <= PAGE_SIZE) - goto error2; - payload = vmalloc(plen); - if (!payload) - goto error2; - } + payload = kvmalloc(plen, GFP_KERNEL); + if (!payload) + goto error2; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) @@ -137,7 +132,10 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, key_ref_put(keyring_ref); error3: - kvfree(payload); + if (payload) { + memzero_explicit(payload, plen); + kvfree(payload); + } error2: kfree(description); error: @@ -273,7 +271,8 @@ long keyctl_get_keyring_ID(key_serial_t id, int create) * Create and join an anonymous session keyring or join a named session * keyring, creating it if necessary. A named session keyring must have Search * permission for it to be joined. Session keyrings without this permit will - * be skipped over. + * be skipped over. It is not permitted for userspace to create or join + * keyrings whose name begin with a dot. * * If successful, the ID of the joined session keyring will be returned. */ @@ -290,12 +289,16 @@ long keyctl_join_session_keyring(const char __user *_name) ret = PTR_ERR(name); goto error; } + + ret = -EPERM; + if (name[0] == '.') + goto error_name; } /* join the session */ ret = join_session_keyring(name); +error_name: kfree(name); - error: return ret; } @@ -324,7 +327,7 @@ long keyctl_update_key(key_serial_t id, /* pull the payload in if one was supplied */ payload = NULL; - if (_payload) { + if (plen) { ret = -ENOMEM; payload = kmalloc(plen, GFP_KERNEL); if (!payload) @@ -347,7 +350,7 @@ long keyctl_update_key(key_serial_t id, key_ref_put(key_ref); error2: - kfree(payload); + kzfree(payload); error: return ret; } @@ -1066,14 +1069,9 @@ long keyctl_instantiate_key_common(key_serial_t id, if (from) { ret = -ENOMEM; - payload = kmalloc(plen, GFP_KERNEL); - if (!payload) { - if (plen <= PAGE_SIZE) - goto error; - payload = vmalloc(plen); - if (!payload) - goto error; - } + payload = kvmalloc(plen, GFP_KERNEL); + if (!payload) + goto error; ret = -EFAULT; if (!copy_from_iter_full(payload, plen, from)) @@ -1098,7 +1096,10 @@ long keyctl_instantiate_key_common(key_serial_t id, keyctl_change_reqkey_auth(NULL); error2: - kvfree(payload); + if (payload) { + memzero_explicit(payload, plen); + kvfree(payload); + } error: return ret; } @@ -1253,8 +1254,8 @@ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error, * Read or set the default keyring in which request_key() will cache keys and * return the old setting. * - * If a process keyring is specified then this will be created if it doesn't - * yet exist. The old setting will be returned if successful. + * If a thread or process keyring is specified then it will be created if it + * doesn't yet exist. The old setting will be returned if successful. */ long keyctl_set_reqkey_keyring(int reqkey_defl) { @@ -1279,11 +1280,8 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_PROCESS_KEYRING: ret = install_process_keyring_to_cred(new); - if (ret < 0) { - if (ret != -EEXIST) - goto error; - ret = 0; - } + if (ret < 0) + goto error; goto set; case KEY_REQKEY_DEFL_DEFAULT: @@ -1582,6 +1580,59 @@ long keyctl_session_to_parent(void) return ret; } +/* + * Apply a restriction to a given keyring. + * + * The caller must have Setattr permission to change keyring restrictions. + * + * The requested type name may be a NULL pointer to reject all attempts + * to link to the keyring. If _type is non-NULL, _restriction can be + * NULL or a pointer to a string describing the restriction. If _type is + * NULL, _restriction must also be NULL. + * + * Returns 0 if successful. + */ +long keyctl_restrict_keyring(key_serial_t id, const char __user *_type, + const char __user *_restriction) +{ + key_ref_t key_ref; + bool link_reject = !_type; + char type[32]; + char *restriction = NULL; + long ret; + + key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + + if (_type) { + ret = key_get_type_from_user(type, _type, sizeof(type)); + if (ret < 0) + goto error; + } + + if (_restriction) { + if (!_type) { + ret = -EINVAL; + goto error; + } + + restriction = strndup_user(_restriction, PAGE_SIZE); + if (IS_ERR(restriction)) { + ret = PTR_ERR(restriction); + goto error; + } + } + + ret = keyring_restrict(key_ref, link_reject ? NULL : type, restriction); + kfree(restriction); + +error: + key_ref_put(key_ref); + + return ret; +} + /* * The key control system call */ @@ -1691,7 +1742,12 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, case KEYCTL_DH_COMPUTE: return keyctl_dh_compute((struct keyctl_dh_params __user *) arg2, (char __user *) arg3, (size_t) arg4, - (void __user *) arg5); + (struct keyctl_kdf_params __user *) arg5); + + case KEYCTL_RESTRICT_KEYRING: + return keyctl_restrict_keyring((key_serial_t) arg2, + (const char __user *) arg3, + (const char __user *) arg4); default: return -EOPNOTSUPP; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index c91e4e0cea08b94c38fcde47da0b5d9d9823dd5e..de81793f9920787101dec77eca28ddfbe91ebbc7 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -394,6 +394,13 @@ static void keyring_destroy(struct key *keyring) write_unlock(&keyring_name_lock); } + if (keyring->restrict_link) { + struct key_restriction *keyres = keyring->restrict_link; + + key_put(keyres->key); + kfree(keyres); + } + assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); } @@ -492,9 +499,7 @@ static long keyring_read(const struct key *keyring, struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags, - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *), + struct key_restriction *restrict_link, struct key *dest) { struct key *keyring; @@ -519,17 +524,19 @@ EXPORT_SYMBOL(keyring_alloc); * @keyring: The keyring being added to. * @type: The type of key being added. * @payload: The payload of the key intended to be added. + * @data: Additional data for evaluating restriction. * * Reject the addition of any links to a keyring. It can be overridden by * passing KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when * adding a key to a keyring. * - * This is meant to be passed as the restrict_link parameter to - * keyring_alloc(). + * This is meant to be stored in a key_restriction structure which is passed + * in the restrict_link parameter to keyring_alloc(). */ int restrict_link_reject(struct key *keyring, const struct key_type *type, - const union key_payload *payload) + const union key_payload *payload, + struct key *restriction_key) { return -EPERM; } @@ -699,7 +706,7 @@ static bool search_nested_keyrings(struct key *keyring, * Non-keyrings avoid the leftmost branch of the root entirely (root * slots 1-15). */ - ptr = ACCESS_ONCE(keyring->keys.root); + ptr = READ_ONCE(keyring->keys.root); if (!ptr) goto not_this_keyring; @@ -713,7 +720,7 @@ static bool search_nested_keyrings(struct key *keyring, if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) goto not_this_keyring; - ptr = ACCESS_ONCE(shortcut->next_node); + ptr = READ_ONCE(shortcut->next_node); node = assoc_array_ptr_to_node(ptr); goto begin_node; } @@ -733,7 +740,7 @@ static bool search_nested_keyrings(struct key *keyring, if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); smp_read_barrier_depends(); - ptr = ACCESS_ONCE(shortcut->next_node); + ptr = READ_ONCE(shortcut->next_node); BUG_ON(!assoc_array_ptr_is_node(ptr)); } node = assoc_array_ptr_to_node(ptr); @@ -745,7 +752,7 @@ static bool search_nested_keyrings(struct key *keyring, ascend_to_node: /* Go through the slots in a node */ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = ACCESS_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) goto descend_to_node; @@ -783,13 +790,13 @@ static bool search_nested_keyrings(struct key *keyring, /* We've dealt with all the slots in the current node, so now we need * to ascend to the parent and continue processing there. */ - ptr = ACCESS_ONCE(node->back_pointer); + ptr = READ_ONCE(node->back_pointer); slot = node->parent_slot; if (ptr && assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); smp_read_barrier_depends(); - ptr = ACCESS_ONCE(shortcut->back_pointer); + ptr = READ_ONCE(shortcut->back_pointer); slot = shortcut->parent_slot; } if (!ptr) @@ -940,6 +947,111 @@ key_ref_t keyring_search(key_ref_t keyring, } EXPORT_SYMBOL(keyring_search); +static struct key_restriction *keyring_restriction_alloc( + key_restrict_link_func_t check) +{ + struct key_restriction *keyres = + kzalloc(sizeof(struct key_restriction), GFP_KERNEL); + + if (!keyres) + return ERR_PTR(-ENOMEM); + + keyres->check = check; + + return keyres; +} + +/* + * Semaphore to serialise restriction setup to prevent reference count + * cycles through restriction key pointers. + */ +static DECLARE_RWSEM(keyring_serialise_restrict_sem); + +/* + * Check for restriction cycles that would prevent keyring garbage collection. + * keyring_serialise_restrict_sem must be held. + */ +static bool keyring_detect_restriction_cycle(const struct key *dest_keyring, + struct key_restriction *keyres) +{ + while (keyres && keyres->key && + keyres->key->type == &key_type_keyring) { + if (keyres->key == dest_keyring) + return true; + + keyres = keyres->key->restrict_link; + } + + return false; +} + +/** + * keyring_restrict - Look up and apply a restriction to a keyring + * + * @keyring: The keyring to be restricted + * @restriction: The restriction options to apply to the keyring + */ +int keyring_restrict(key_ref_t keyring_ref, const char *type, + const char *restriction) +{ + struct key *keyring; + struct key_type *restrict_type = NULL; + struct key_restriction *restrict_link; + int ret = 0; + + keyring = key_ref_to_ptr(keyring_ref); + key_check(keyring); + + if (keyring->type != &key_type_keyring) + return -ENOTDIR; + + if (!type) { + restrict_link = keyring_restriction_alloc(restrict_link_reject); + } else { + restrict_type = key_type_lookup(type); + + if (IS_ERR(restrict_type)) + return PTR_ERR(restrict_type); + + if (!restrict_type->lookup_restriction) { + ret = -ENOENT; + goto error; + } + + restrict_link = restrict_type->lookup_restriction(restriction); + } + + if (IS_ERR(restrict_link)) { + ret = PTR_ERR(restrict_link); + goto error; + } + + down_write(&keyring->sem); + down_write(&keyring_serialise_restrict_sem); + + if (keyring->restrict_link) + ret = -EEXIST; + else if (keyring_detect_restriction_cycle(keyring, restrict_link)) + ret = -EDEADLK; + else + keyring->restrict_link = restrict_link; + + up_write(&keyring_serialise_restrict_sem); + up_write(&keyring->sem); + + if (ret < 0) { + key_put(restrict_link->key); + kfree(restrict_link); + } + +error: + if (restrict_type) + key_type_put(restrict_type); + + return ret; +} +EXPORT_SYMBOL(keyring_restrict); + /* * Search the given keyring for a key that might be updated. * @@ -1033,7 +1145,7 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check) /* we've got a match but we might end up racing with * key_cleanup() if the keyring is currently 'dead' * (ie. it has a zero usage count) */ - if (!atomic_inc_not_zero(&keyring->usage)) + if (!refcount_inc_not_zero(&keyring->usage)) continue; keyring->last_used_at = current_kernel_time().tv_sec; goto out; @@ -1220,9 +1332,10 @@ void __key_link_end(struct key *keyring, */ static int __key_link_check_restriction(struct key *keyring, struct key *key) { - if (!keyring->restrict_link) + if (!keyring->restrict_link || !keyring->restrict_link->check) return 0; - return keyring->restrict_link(keyring, key->type, &key->payload); + return keyring->restrict_link->check(keyring, key->type, &key->payload, + keyring->restrict_link->key); } /** @@ -1250,14 +1363,14 @@ int key_link(struct key *keyring, struct key *key) struct assoc_array_edit *edit; int ret; - kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage)); + kenter("{%d,%d}", keyring->serial, refcount_read(&keyring->usage)); key_check(keyring); key_check(key); ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret == 0) { - kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage)); + kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage)); ret = __key_link_check_restriction(keyring, key); if (ret == 0) ret = __key_link_check_live_key(keyring, key); @@ -1266,7 +1379,7 @@ int key_link(struct key *keyring, struct key *key) __key_link_end(keyring, &key->index_key, edit); } - kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage)); + kleave(" = %d {%d,%d}", ret, keyring->serial, refcount_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); @@ -1426,3 +1539,53 @@ void keyring_gc(struct key *keyring, time_t limit) up_write(&keyring->sem); kleave(" [gc]"); } + +/* + * Garbage collect restriction pointers from a keyring. + * + * Keyring restrictions are associated with a key type, and must be cleaned + * up if the key type is unregistered. The restriction is altered to always + * reject additional keys so a keyring cannot be opened up by unregistering + * a key type. + * + * Not called with any keyring locks held. The keyring's key struct will not + * be deallocated under us as only our caller may deallocate it. + * + * The caller is required to hold key_types_sem and dead_type->sem. This is + * fulfilled by key_gc_keytype() holding the locks on behalf of + * key_garbage_collector(), which it invokes on a workqueue. + */ +void keyring_restriction_gc(struct key *keyring, struct key_type *dead_type) +{ + struct key_restriction *keyres; + + kenter("%x{%s}", keyring->serial, keyring->description ?: ""); + + /* + * keyring->restrict_link is only assigned at key allocation time + * or with the key type locked, so the only values that could be + * concurrently assigned to keyring->restrict_link are for key + * types other than dead_type. Given this, it's ok to check + * the key type before acquiring keyring->sem. + */ + if (!dead_type || !keyring->restrict_link || + keyring->restrict_link->keytype != dead_type) { + kleave(" [no restriction gc]"); + return; + } + + /* Lock the keyring to ensure that a link is not in progress */ + down_write(&keyring->sem); + + keyres = keyring->restrict_link; + + keyres->check = restrict_link_reject; + + key_put(keyres->key); + keyres->key = NULL; + keyres->keytype = NULL; + + up_write(&keyring->sem); + + kleave(" [restriction gc]"); +} diff --git a/security/keys/proc.c b/security/keys/proc.c index b9f531c9e4fa753d326752b63dc2cf599579ffeb..bf08d02b6646ae2077b15903471bfb4575b04f5a 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -252,7 +252,7 @@ static int proc_keys_show(struct seq_file *m, void *v) showflag(key, 'U', KEY_FLAG_USER_CONSTRUCT), showflag(key, 'N', KEY_FLAG_NEGATIVE), showflag(key, 'i', KEY_FLAG_INVALIDATED), - atomic_read(&key->usage), + refcount_read(&key->usage), xbuf, key->perm, from_kuid_munged(seq_user_ns(m), key->uid), @@ -340,7 +340,7 @@ static int proc_key_users_show(struct seq_file *m, void *v) seq_printf(m, "%5u: %5d %d/%d %d/%d %d/%d\n", from_kuid_munged(seq_user_ns(m), user->uid), - atomic_read(&user->usage), + refcount_read(&user->usage), atomic_read(&user->nkeys), atomic_read(&user->nikeys), user->qnkeys, diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b6fdd22205b169b663cdb00aecd5d214c7a376dd..86bced9fdbdf22eb60170584d87730e1179a2744 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -30,7 +30,7 @@ static DEFINE_MUTEX(key_user_keyring_mutex); /* The root user's tracking struct */ struct key_user root_key_user = { - .usage = ATOMIC_INIT(3), + .usage = REFCOUNT_INIT(3), .cons_lock = __MUTEX_INITIALIZER(root_key_user.cons_lock), .lock = __SPIN_LOCK_UNLOCKED(root_key_user.lock), .nkeys = ATOMIC_INIT(2), @@ -128,13 +128,18 @@ int install_user_keyrings(void) } /* - * Install a fresh thread keyring directly to new credentials. This keyring is - * allowed to overrun the quota. + * Install a thread keyring to the given credentials struct if it didn't have + * one already. This is allowed to overrun the quota. + * + * Return: 0 if a thread keyring is now present; -errno on failure. */ int install_thread_keyring_to_cred(struct cred *new) { struct key *keyring; + if (new->thread_keyring) + return 0; + keyring = keyring_alloc("_tid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, @@ -147,7 +152,9 @@ int install_thread_keyring_to_cred(struct cred *new) } /* - * Install a fresh thread keyring, discarding the old one. + * Install a thread keyring to the current task if it didn't have one already. + * + * Return: 0 if a thread keyring is now present; -errno on failure. */ static int install_thread_keyring(void) { @@ -158,8 +165,6 @@ static int install_thread_keyring(void) if (!new) return -ENOMEM; - BUG_ON(new->thread_keyring); - ret = install_thread_keyring_to_cred(new); if (ret < 0) { abort_creds(new); @@ -170,17 +175,17 @@ static int install_thread_keyring(void) } /* - * Install a process keyring directly to a credentials struct. + * Install a process keyring to the given credentials struct if it didn't have + * one already. This is allowed to overrun the quota. * - * Returns -EEXIST if there was already a process keyring, 0 if one installed, - * and other value on any other error + * Return: 0 if a process keyring is now present; -errno on failure. */ int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; if (new->process_keyring) - return -EEXIST; + return 0; keyring = keyring_alloc("_pid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, @@ -194,11 +199,9 @@ int install_process_keyring_to_cred(struct cred *new) } /* - * Make sure a process keyring is installed for the current process. The - * existing process keyring is not replaced. + * Install a process keyring to the current task if it didn't have one already. * - * Returns 0 if there is a process keyring by the end of this function, some - * error otherwise. + * Return: 0 if a process keyring is now present; -errno on failure. */ static int install_process_keyring(void) { @@ -212,14 +215,18 @@ static int install_process_keyring(void) ret = install_process_keyring_to_cred(new); if (ret < 0) { abort_creds(new); - return ret != -EEXIST ? ret : 0; + return ret; } return commit_creds(new); } /* - * Install a session keyring directly to a credentials struct. + * Install the given keyring as the session keyring of the given credentials + * struct, replacing the existing one if any. If the given keyring is NULL, + * then install a new anonymous session keyring. + * + * Return: 0 on success; -errno on failure. */ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) { @@ -254,8 +261,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* - * Install a session keyring, discarding the old one. If a keyring is not - * supplied, an empty one is invented. + * Install the given keyring as the session keyring of the current task, + * replacing the existing one if any. If the given keyring is NULL, then + * install a new anonymous session keyring. + * + * Return: 0 on success; -errno on failure. */ static int install_session_keyring(struct key *keyring) { @@ -799,15 +809,14 @@ long join_session_keyring(const char *name) ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { - key_put(keyring); ret = 0; - goto error2; + goto error3; } /* we've got a keyring - now to install it */ ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) - goto error2; + goto error3; commit_creds(new); mutex_unlock(&key_session_mutex); @@ -817,6 +826,8 @@ long join_session_keyring(const char *name) okay: return ret; +error3: + key_put(keyring); error2: mutex_unlock(&key_session_mutex); error: diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 6bbe2f535f0804d34419fff59f86d9ca26b500a7..0f062156dfb24f8ecc1ad448a2537a8976ad3c17 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -213,7 +213,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, if (ret < 0) goto error_inst; - kleave(" = {%d,%d}", authkey->serial, atomic_read(&authkey->usage)); + kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage)); return authkey; auth_key_revoked: diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 2ae31c5a87de9e9084de7e5f9350678da81b49a4..435e86e1387944d723cc569f228239f94c800a81 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -70,7 +70,7 @@ static int TSS_sha1(const unsigned char *data, unsigned int datalen, } ret = crypto_shash_digest(&sdesc->shash, data, datalen, digest); - kfree(sdesc); + kzfree(sdesc); return ret; } @@ -114,7 +114,7 @@ static int TSS_rawhmac(unsigned char *digest, const unsigned char *key, if (!ret) ret = crypto_shash_final(&sdesc->shash, digest); out: - kfree(sdesc); + kzfree(sdesc); return ret; } @@ -165,7 +165,7 @@ static int TSS_authhmac(unsigned char *digest, const unsigned char *key, paramdigest, TPM_NONCE_SIZE, h1, TPM_NONCE_SIZE, h2, 1, &c, 0, 0); out: - kfree(sdesc); + kzfree(sdesc); return ret; } @@ -246,7 +246,7 @@ static int TSS_checkhmac1(unsigned char *buffer, if (memcmp(testhmac, authdata, SHA1_DIGEST_SIZE)) ret = -EINVAL; out: - kfree(sdesc); + kzfree(sdesc); return ret; } @@ -347,7 +347,7 @@ static int TSS_checkhmac2(unsigned char *buffer, if (memcmp(testhmac2, authdata2, SHA1_DIGEST_SIZE)) ret = -EINVAL; out: - kfree(sdesc); + kzfree(sdesc); return ret; } @@ -564,7 +564,7 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype, *bloblen = storedsize; } out: - kfree(td); + kzfree(td); return ret; } @@ -678,7 +678,7 @@ static int key_seal(struct trusted_key_payload *p, if (ret < 0) pr_info("trusted_key: srkseal failed (%d)\n", ret); - kfree(tb); + kzfree(tb); return ret; } @@ -703,7 +703,7 @@ static int key_unseal(struct trusted_key_payload *p, /* pull migratable flag out of sealed key */ p->migratable = p->key[--p->key_len]; - kfree(tb); + kzfree(tb); return ret; } @@ -1037,12 +1037,12 @@ static int trusted_instantiate(struct key *key, if (!ret && options->pcrlock) ret = pcrlock(options->pcrlock); out: - kfree(datablob); - kfree(options); + kzfree(datablob); + kzfree(options); if (!ret) rcu_assign_keypointer(key, payload); else - kfree(payload); + kzfree(payload); return ret; } @@ -1051,8 +1051,7 @@ static void trusted_rcu_free(struct rcu_head *rcu) struct trusted_key_payload *p; p = container_of(rcu, struct trusted_key_payload, rcu); - memset(p->key, 0, p->key_len); - kfree(p); + kzfree(p); } /* @@ -1094,13 +1093,13 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) ret = datablob_parse(datablob, new_p, new_o); if (ret != Opt_update) { ret = -EINVAL; - kfree(new_p); + kzfree(new_p); goto out; } if (!new_o->keyhandle) { ret = -EINVAL; - kfree(new_p); + kzfree(new_p); goto out; } @@ -1114,22 +1113,22 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) ret = key_seal(new_p, new_o); if (ret < 0) { pr_info("trusted_key: key_seal failed (%d)\n", ret); - kfree(new_p); + kzfree(new_p); goto out; } if (new_o->pcrlock) { ret = pcrlock(new_o->pcrlock); if (ret < 0) { pr_info("trusted_key: pcrlock failed (%d)\n", ret); - kfree(new_p); + kzfree(new_p); goto out; } } rcu_assign_keypointer(key, new_p); call_rcu(&p->rcu, trusted_rcu_free); out: - kfree(datablob); - kfree(new_o); + kzfree(datablob); + kzfree(new_o); return ret; } @@ -1158,24 +1157,19 @@ static long trusted_read(const struct key *key, char __user *buffer, for (i = 0; i < p->blob_len; i++) bufp = hex_byte_pack(bufp, p->blob[i]); if ((copy_to_user(buffer, ascii_buf, 2 * p->blob_len)) != 0) { - kfree(ascii_buf); + kzfree(ascii_buf); return -EFAULT; } - kfree(ascii_buf); + kzfree(ascii_buf); return 2 * p->blob_len; } /* - * trusted_destroy - before freeing the key, clear the decrypted data + * trusted_destroy - clear and free the key's payload */ static void trusted_destroy(struct key *key) { - struct trusted_key_payload *p = key->payload.data[0]; - - if (!p) - return; - memset(p->key, 0, p->key_len); - kfree(key->payload.data[0]); + kzfree(key->payload.data[0]); } struct key_type key_type_trusted = { diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 26605134f17a8a3cf7c7a5c0a0c14fd5a3052fcb..3d8c68eba5160286fa7af79c8da1ead6e6b05236 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -86,10 +86,18 @@ EXPORT_SYMBOL_GPL(user_preparse); */ void user_free_preparse(struct key_preparsed_payload *prep) { - kfree(prep->payload.data[0]); + kzfree(prep->payload.data[0]); } EXPORT_SYMBOL_GPL(user_free_preparse); +static void user_free_payload_rcu(struct rcu_head *head) +{ + struct user_key_payload *payload; + + payload = container_of(head, struct user_key_payload, rcu); + kzfree(payload); +} + /* * update a user defined key * - the key's semaphore is write-locked @@ -112,7 +120,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep) prep->payload.data[0] = NULL; if (zap) - kfree_rcu(zap, rcu); + call_rcu(&zap->rcu, user_free_payload_rcu); return ret; } EXPORT_SYMBOL_GPL(user_update); @@ -130,7 +138,7 @@ void user_revoke(struct key *key) if (upayload) { rcu_assign_keypointer(key, NULL); - kfree_rcu(upayload, rcu); + call_rcu(&upayload->rcu, user_free_payload_rcu); } } @@ -143,7 +151,7 @@ void user_destroy(struct key *key) { struct user_key_payload *upayload = key->payload.data[0]; - kfree(upayload); + kzfree(upayload); } EXPORT_SYMBOL_GPL(user_destroy); diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c index 1d82eae3a5b834c4beadf9bdf79415ecb9078ffe..dbe6efde77a0a908d3a5d239df892874c2bfb292 100644 --- a/security/loadpin/loadpin.c +++ b/security/loadpin/loadpin.c @@ -174,7 +174,7 @@ static int loadpin_read_file(struct file *file, enum kernel_read_file_id id) return 0; } -static struct security_hook_list loadpin_hooks[] = { +static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sb_free_security, loadpin_sb_free_security), LSM_HOOK_INIT(kernel_read_file, loadpin_read_file), }; diff --git a/security/security.c b/security/security.c index d0e07f269b2d92b54e384d5a50708543bb7cc131..b9fea3999cf85f7181eceb5041468117f1376f32 100644 --- a/security/security.c +++ b/security/security.c @@ -32,6 +32,7 @@ /* Maximum number of letters for an LSM name string */ #define SECURITY_NAME_MAX 10 +struct security_hook_heads security_hook_heads __lsm_ro_after_init; char *lsm_names; /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = @@ -54,6 +55,12 @@ static void __init do_security_initcalls(void) */ int __init security_init(void) { + int i; + struct list_head *list = (struct list_head *) &security_hook_heads; + + for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct list_head); + i++) + INIT_LIST_HEAD(&list[i]); pr_info("Security Framework initialized\n"); /* @@ -103,10 +110,14 @@ static int lsm_append(char *new, char **result) * to avoid security registration races. This method may also be used * to check if your LSM is currently loaded during kernel initialization. * - * Return true if: - * -The passed LSM is the one chosen by user at boot time, - * -or the passed LSM is configured as the default and the user did not - * choose an alternate LSM at boot time. + * Returns: + * + * true if: + * + * - The passed LSM is the one chosen by user at boot time, + * - or the passed LSM is configured as the default and the user did not + * choose an alternate LSM at boot time. + * * Otherwise, return false. */ int __init security_module_enable(const char *module) @@ -930,6 +941,11 @@ int security_task_create(unsigned long clone_flags) return call_int_hook(task_create, 0, clone_flags); } +int security_task_alloc(struct task_struct *task, unsigned long clone_flags) +{ + return call_int_hook(task_alloc, 0, task, clone_flags); +} + void security_task_free(struct task_struct *task) { call_void_hook(task_free, task); @@ -1036,6 +1052,12 @@ int security_task_getioprio(struct task_struct *p) return call_int_hook(task_getioprio, 0, p); } +int security_task_prlimit(const struct cred *cred, const struct cred *tcred, + unsigned int flags) +{ + return call_int_hook(task_prlimit, 0, cred, tcred, flags); +} + int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) { @@ -1621,355 +1643,3 @@ int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, actx); } #endif /* CONFIG_AUDIT */ - -struct security_hook_heads security_hook_heads = { - .binder_set_context_mgr = - LIST_HEAD_INIT(security_hook_heads.binder_set_context_mgr), - .binder_transaction = - LIST_HEAD_INIT(security_hook_heads.binder_transaction), - .binder_transfer_binder = - LIST_HEAD_INIT(security_hook_heads.binder_transfer_binder), - .binder_transfer_file = - LIST_HEAD_INIT(security_hook_heads.binder_transfer_file), - - .ptrace_access_check = - LIST_HEAD_INIT(security_hook_heads.ptrace_access_check), - .ptrace_traceme = - LIST_HEAD_INIT(security_hook_heads.ptrace_traceme), - .capget = LIST_HEAD_INIT(security_hook_heads.capget), - .capset = LIST_HEAD_INIT(security_hook_heads.capset), - .capable = LIST_HEAD_INIT(security_hook_heads.capable), - .quotactl = LIST_HEAD_INIT(security_hook_heads.quotactl), - .quota_on = LIST_HEAD_INIT(security_hook_heads.quota_on), - .syslog = LIST_HEAD_INIT(security_hook_heads.syslog), - .settime = LIST_HEAD_INIT(security_hook_heads.settime), - .vm_enough_memory = - LIST_HEAD_INIT(security_hook_heads.vm_enough_memory), - .bprm_set_creds = - LIST_HEAD_INIT(security_hook_heads.bprm_set_creds), - .bprm_check_security = - LIST_HEAD_INIT(security_hook_heads.bprm_check_security), - .bprm_secureexec = - LIST_HEAD_INIT(security_hook_heads.bprm_secureexec), - .bprm_committing_creds = - LIST_HEAD_INIT(security_hook_heads.bprm_committing_creds), - .bprm_committed_creds = - LIST_HEAD_INIT(security_hook_heads.bprm_committed_creds), - .sb_alloc_security = - LIST_HEAD_INIT(security_hook_heads.sb_alloc_security), - .sb_free_security = - LIST_HEAD_INIT(security_hook_heads.sb_free_security), - .sb_copy_data = LIST_HEAD_INIT(security_hook_heads.sb_copy_data), - .sb_remount = LIST_HEAD_INIT(security_hook_heads.sb_remount), - .sb_kern_mount = - LIST_HEAD_INIT(security_hook_heads.sb_kern_mount), - .sb_show_options = - LIST_HEAD_INIT(security_hook_heads.sb_show_options), - .sb_statfs = LIST_HEAD_INIT(security_hook_heads.sb_statfs), - .sb_mount = LIST_HEAD_INIT(security_hook_heads.sb_mount), - .sb_umount = LIST_HEAD_INIT(security_hook_heads.sb_umount), - .sb_pivotroot = LIST_HEAD_INIT(security_hook_heads.sb_pivotroot), - .sb_set_mnt_opts = - LIST_HEAD_INIT(security_hook_heads.sb_set_mnt_opts), - .sb_clone_mnt_opts = - LIST_HEAD_INIT(security_hook_heads.sb_clone_mnt_opts), - .sb_parse_opts_str = - LIST_HEAD_INIT(security_hook_heads.sb_parse_opts_str), - .dentry_init_security = - LIST_HEAD_INIT(security_hook_heads.dentry_init_security), - .dentry_create_files_as = - LIST_HEAD_INIT(security_hook_heads.dentry_create_files_as), -#ifdef CONFIG_SECURITY_PATH - .path_unlink = LIST_HEAD_INIT(security_hook_heads.path_unlink), - .path_mkdir = LIST_HEAD_INIT(security_hook_heads.path_mkdir), - .path_rmdir = LIST_HEAD_INIT(security_hook_heads.path_rmdir), - .path_mknod = LIST_HEAD_INIT(security_hook_heads.path_mknod), - .path_truncate = - LIST_HEAD_INIT(security_hook_heads.path_truncate), - .path_symlink = LIST_HEAD_INIT(security_hook_heads.path_symlink), - .path_link = LIST_HEAD_INIT(security_hook_heads.path_link), - .path_rename = LIST_HEAD_INIT(security_hook_heads.path_rename), - .path_chmod = LIST_HEAD_INIT(security_hook_heads.path_chmod), - .path_chown = LIST_HEAD_INIT(security_hook_heads.path_chown), - .path_chroot = LIST_HEAD_INIT(security_hook_heads.path_chroot), -#endif - .inode_alloc_security = - LIST_HEAD_INIT(security_hook_heads.inode_alloc_security), - .inode_free_security = - LIST_HEAD_INIT(security_hook_heads.inode_free_security), - .inode_init_security = - LIST_HEAD_INIT(security_hook_heads.inode_init_security), - .inode_create = LIST_HEAD_INIT(security_hook_heads.inode_create), - .inode_link = LIST_HEAD_INIT(security_hook_heads.inode_link), - .inode_unlink = LIST_HEAD_INIT(security_hook_heads.inode_unlink), - .inode_symlink = - LIST_HEAD_INIT(security_hook_heads.inode_symlink), - .inode_mkdir = LIST_HEAD_INIT(security_hook_heads.inode_mkdir), - .inode_rmdir = LIST_HEAD_INIT(security_hook_heads.inode_rmdir), - .inode_mknod = LIST_HEAD_INIT(security_hook_heads.inode_mknod), - .inode_rename = LIST_HEAD_INIT(security_hook_heads.inode_rename), - .inode_readlink = - LIST_HEAD_INIT(security_hook_heads.inode_readlink), - .inode_follow_link = - LIST_HEAD_INIT(security_hook_heads.inode_follow_link), - .inode_permission = - LIST_HEAD_INIT(security_hook_heads.inode_permission), - .inode_setattr = - LIST_HEAD_INIT(security_hook_heads.inode_setattr), - .inode_getattr = - LIST_HEAD_INIT(security_hook_heads.inode_getattr), - .inode_setxattr = - LIST_HEAD_INIT(security_hook_heads.inode_setxattr), - .inode_post_setxattr = - LIST_HEAD_INIT(security_hook_heads.inode_post_setxattr), - .inode_getxattr = - LIST_HEAD_INIT(security_hook_heads.inode_getxattr), - .inode_listxattr = - LIST_HEAD_INIT(security_hook_heads.inode_listxattr), - .inode_removexattr = - LIST_HEAD_INIT(security_hook_heads.inode_removexattr), - .inode_need_killpriv = - LIST_HEAD_INIT(security_hook_heads.inode_need_killpriv), - .inode_killpriv = - LIST_HEAD_INIT(security_hook_heads.inode_killpriv), - .inode_getsecurity = - LIST_HEAD_INIT(security_hook_heads.inode_getsecurity), - .inode_setsecurity = - LIST_HEAD_INIT(security_hook_heads.inode_setsecurity), - .inode_listsecurity = - LIST_HEAD_INIT(security_hook_heads.inode_listsecurity), - .inode_getsecid = - LIST_HEAD_INIT(security_hook_heads.inode_getsecid), - .inode_copy_up = - LIST_HEAD_INIT(security_hook_heads.inode_copy_up), - .inode_copy_up_xattr = - LIST_HEAD_INIT(security_hook_heads.inode_copy_up_xattr), - .file_permission = - LIST_HEAD_INIT(security_hook_heads.file_permission), - .file_alloc_security = - LIST_HEAD_INIT(security_hook_heads.file_alloc_security), - .file_free_security = - LIST_HEAD_INIT(security_hook_heads.file_free_security), - .file_ioctl = LIST_HEAD_INIT(security_hook_heads.file_ioctl), - .mmap_addr = LIST_HEAD_INIT(security_hook_heads.mmap_addr), - .mmap_file = LIST_HEAD_INIT(security_hook_heads.mmap_file), - .file_mprotect = - LIST_HEAD_INIT(security_hook_heads.file_mprotect), - .file_lock = LIST_HEAD_INIT(security_hook_heads.file_lock), - .file_fcntl = LIST_HEAD_INIT(security_hook_heads.file_fcntl), - .file_set_fowner = - LIST_HEAD_INIT(security_hook_heads.file_set_fowner), - .file_send_sigiotask = - LIST_HEAD_INIT(security_hook_heads.file_send_sigiotask), - .file_receive = LIST_HEAD_INIT(security_hook_heads.file_receive), - .file_open = LIST_HEAD_INIT(security_hook_heads.file_open), - .task_create = LIST_HEAD_INIT(security_hook_heads.task_create), - .task_free = LIST_HEAD_INIT(security_hook_heads.task_free), - .cred_alloc_blank = - LIST_HEAD_INIT(security_hook_heads.cred_alloc_blank), - .cred_free = LIST_HEAD_INIT(security_hook_heads.cred_free), - .cred_prepare = LIST_HEAD_INIT(security_hook_heads.cred_prepare), - .cred_transfer = - LIST_HEAD_INIT(security_hook_heads.cred_transfer), - .kernel_act_as = - LIST_HEAD_INIT(security_hook_heads.kernel_act_as), - .kernel_create_files_as = - LIST_HEAD_INIT(security_hook_heads.kernel_create_files_as), - .kernel_module_request = - LIST_HEAD_INIT(security_hook_heads.kernel_module_request), - .kernel_read_file = - LIST_HEAD_INIT(security_hook_heads.kernel_read_file), - .kernel_post_read_file = - LIST_HEAD_INIT(security_hook_heads.kernel_post_read_file), - .task_fix_setuid = - LIST_HEAD_INIT(security_hook_heads.task_fix_setuid), - .task_setpgid = LIST_HEAD_INIT(security_hook_heads.task_setpgid), - .task_getpgid = LIST_HEAD_INIT(security_hook_heads.task_getpgid), - .task_getsid = LIST_HEAD_INIT(security_hook_heads.task_getsid), - .task_getsecid = - LIST_HEAD_INIT(security_hook_heads.task_getsecid), - .task_setnice = LIST_HEAD_INIT(security_hook_heads.task_setnice), - .task_setioprio = - LIST_HEAD_INIT(security_hook_heads.task_setioprio), - .task_getioprio = - LIST_HEAD_INIT(security_hook_heads.task_getioprio), - .task_setrlimit = - LIST_HEAD_INIT(security_hook_heads.task_setrlimit), - .task_setscheduler = - LIST_HEAD_INIT(security_hook_heads.task_setscheduler), - .task_getscheduler = - LIST_HEAD_INIT(security_hook_heads.task_getscheduler), - .task_movememory = - LIST_HEAD_INIT(security_hook_heads.task_movememory), - .task_kill = LIST_HEAD_INIT(security_hook_heads.task_kill), - .task_prctl = LIST_HEAD_INIT(security_hook_heads.task_prctl), - .task_to_inode = - LIST_HEAD_INIT(security_hook_heads.task_to_inode), - .ipc_permission = - LIST_HEAD_INIT(security_hook_heads.ipc_permission), - .ipc_getsecid = LIST_HEAD_INIT(security_hook_heads.ipc_getsecid), - .msg_msg_alloc_security = - LIST_HEAD_INIT(security_hook_heads.msg_msg_alloc_security), - .msg_msg_free_security = - LIST_HEAD_INIT(security_hook_heads.msg_msg_free_security), - .msg_queue_alloc_security = - LIST_HEAD_INIT(security_hook_heads.msg_queue_alloc_security), - .msg_queue_free_security = - LIST_HEAD_INIT(security_hook_heads.msg_queue_free_security), - .msg_queue_associate = - LIST_HEAD_INIT(security_hook_heads.msg_queue_associate), - .msg_queue_msgctl = - LIST_HEAD_INIT(security_hook_heads.msg_queue_msgctl), - .msg_queue_msgsnd = - LIST_HEAD_INIT(security_hook_heads.msg_queue_msgsnd), - .msg_queue_msgrcv = - LIST_HEAD_INIT(security_hook_heads.msg_queue_msgrcv), - .shm_alloc_security = - LIST_HEAD_INIT(security_hook_heads.shm_alloc_security), - .shm_free_security = - LIST_HEAD_INIT(security_hook_heads.shm_free_security), - .shm_associate = - LIST_HEAD_INIT(security_hook_heads.shm_associate), - .shm_shmctl = LIST_HEAD_INIT(security_hook_heads.shm_shmctl), - .shm_shmat = LIST_HEAD_INIT(security_hook_heads.shm_shmat), - .sem_alloc_security = - LIST_HEAD_INIT(security_hook_heads.sem_alloc_security), - .sem_free_security = - LIST_HEAD_INIT(security_hook_heads.sem_free_security), - .sem_associate = - LIST_HEAD_INIT(security_hook_heads.sem_associate), - .sem_semctl = LIST_HEAD_INIT(security_hook_heads.sem_semctl), - .sem_semop = LIST_HEAD_INIT(security_hook_heads.sem_semop), - .netlink_send = LIST_HEAD_INIT(security_hook_heads.netlink_send), - .d_instantiate = - LIST_HEAD_INIT(security_hook_heads.d_instantiate), - .getprocattr = LIST_HEAD_INIT(security_hook_heads.getprocattr), - .setprocattr = LIST_HEAD_INIT(security_hook_heads.setprocattr), - .ismaclabel = LIST_HEAD_INIT(security_hook_heads.ismaclabel), - .secid_to_secctx = - LIST_HEAD_INIT(security_hook_heads.secid_to_secctx), - .secctx_to_secid = - LIST_HEAD_INIT(security_hook_heads.secctx_to_secid), - .release_secctx = - LIST_HEAD_INIT(security_hook_heads.release_secctx), - .inode_invalidate_secctx = - LIST_HEAD_INIT(security_hook_heads.inode_invalidate_secctx), - .inode_notifysecctx = - LIST_HEAD_INIT(security_hook_heads.inode_notifysecctx), - .inode_setsecctx = - LIST_HEAD_INIT(security_hook_heads.inode_setsecctx), - .inode_getsecctx = - LIST_HEAD_INIT(security_hook_heads.inode_getsecctx), -#ifdef CONFIG_SECURITY_NETWORK - .unix_stream_connect = - LIST_HEAD_INIT(security_hook_heads.unix_stream_connect), - .unix_may_send = - LIST_HEAD_INIT(security_hook_heads.unix_may_send), - .socket_create = - LIST_HEAD_INIT(security_hook_heads.socket_create), - .socket_post_create = - LIST_HEAD_INIT(security_hook_heads.socket_post_create), - .socket_bind = LIST_HEAD_INIT(security_hook_heads.socket_bind), - .socket_connect = - LIST_HEAD_INIT(security_hook_heads.socket_connect), - .socket_listen = - LIST_HEAD_INIT(security_hook_heads.socket_listen), - .socket_accept = - LIST_HEAD_INIT(security_hook_heads.socket_accept), - .socket_sendmsg = - LIST_HEAD_INIT(security_hook_heads.socket_sendmsg), - .socket_recvmsg = - LIST_HEAD_INIT(security_hook_heads.socket_recvmsg), - .socket_getsockname = - LIST_HEAD_INIT(security_hook_heads.socket_getsockname), - .socket_getpeername = - LIST_HEAD_INIT(security_hook_heads.socket_getpeername), - .socket_getsockopt = - LIST_HEAD_INIT(security_hook_heads.socket_getsockopt), - .socket_setsockopt = - LIST_HEAD_INIT(security_hook_heads.socket_setsockopt), - .socket_shutdown = - LIST_HEAD_INIT(security_hook_heads.socket_shutdown), - .socket_sock_rcv_skb = - LIST_HEAD_INIT(security_hook_heads.socket_sock_rcv_skb), - .socket_getpeersec_stream = - LIST_HEAD_INIT(security_hook_heads.socket_getpeersec_stream), - .socket_getpeersec_dgram = - LIST_HEAD_INIT(security_hook_heads.socket_getpeersec_dgram), - .sk_alloc_security = - LIST_HEAD_INIT(security_hook_heads.sk_alloc_security), - .sk_free_security = - LIST_HEAD_INIT(security_hook_heads.sk_free_security), - .sk_clone_security = - LIST_HEAD_INIT(security_hook_heads.sk_clone_security), - .sk_getsecid = LIST_HEAD_INIT(security_hook_heads.sk_getsecid), - .sock_graft = LIST_HEAD_INIT(security_hook_heads.sock_graft), - .inet_conn_request = - LIST_HEAD_INIT(security_hook_heads.inet_conn_request), - .inet_csk_clone = - LIST_HEAD_INIT(security_hook_heads.inet_csk_clone), - .inet_conn_established = - LIST_HEAD_INIT(security_hook_heads.inet_conn_established), - .secmark_relabel_packet = - LIST_HEAD_INIT(security_hook_heads.secmark_relabel_packet), - .secmark_refcount_inc = - LIST_HEAD_INIT(security_hook_heads.secmark_refcount_inc), - .secmark_refcount_dec = - LIST_HEAD_INIT(security_hook_heads.secmark_refcount_dec), - .req_classify_flow = - LIST_HEAD_INIT(security_hook_heads.req_classify_flow), - .tun_dev_alloc_security = - LIST_HEAD_INIT(security_hook_heads.tun_dev_alloc_security), - .tun_dev_free_security = - LIST_HEAD_INIT(security_hook_heads.tun_dev_free_security), - .tun_dev_create = - LIST_HEAD_INIT(security_hook_heads.tun_dev_create), - .tun_dev_attach_queue = - LIST_HEAD_INIT(security_hook_heads.tun_dev_attach_queue), - .tun_dev_attach = - LIST_HEAD_INIT(security_hook_heads.tun_dev_attach), - .tun_dev_open = LIST_HEAD_INIT(security_hook_heads.tun_dev_open), -#endif /* CONFIG_SECURITY_NETWORK */ -#ifdef CONFIG_SECURITY_NETWORK_XFRM - .xfrm_policy_alloc_security = - LIST_HEAD_INIT(security_hook_heads.xfrm_policy_alloc_security), - .xfrm_policy_clone_security = - LIST_HEAD_INIT(security_hook_heads.xfrm_policy_clone_security), - .xfrm_policy_free_security = - LIST_HEAD_INIT(security_hook_heads.xfrm_policy_free_security), - .xfrm_policy_delete_security = - LIST_HEAD_INIT(security_hook_heads.xfrm_policy_delete_security), - .xfrm_state_alloc = - LIST_HEAD_INIT(security_hook_heads.xfrm_state_alloc), - .xfrm_state_alloc_acquire = - LIST_HEAD_INIT(security_hook_heads.xfrm_state_alloc_acquire), - .xfrm_state_free_security = - LIST_HEAD_INIT(security_hook_heads.xfrm_state_free_security), - .xfrm_state_delete_security = - LIST_HEAD_INIT(security_hook_heads.xfrm_state_delete_security), - .xfrm_policy_lookup = - LIST_HEAD_INIT(security_hook_heads.xfrm_policy_lookup), - .xfrm_state_pol_flow_match = - LIST_HEAD_INIT(security_hook_heads.xfrm_state_pol_flow_match), - .xfrm_decode_session = - LIST_HEAD_INIT(security_hook_heads.xfrm_decode_session), -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ -#ifdef CONFIG_KEYS - .key_alloc = LIST_HEAD_INIT(security_hook_heads.key_alloc), - .key_free = LIST_HEAD_INIT(security_hook_heads.key_free), - .key_permission = - LIST_HEAD_INIT(security_hook_heads.key_permission), - .key_getsecurity = - LIST_HEAD_INIT(security_hook_heads.key_getsecurity), -#endif /* CONFIG_KEYS */ -#ifdef CONFIG_AUDIT - .audit_rule_init = - LIST_HEAD_INIT(security_hook_heads.audit_rule_init), - .audit_rule_known = - LIST_HEAD_INIT(security_hook_heads.audit_rule_known), - .audit_rule_match = - LIST_HEAD_INIT(security_hook_heads.audit_rule_match), - .audit_rule_free = - LIST_HEAD_INIT(security_hook_heads.audit_rule_free), -#endif /* CONFIG_AUDIT */ -}; diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index ea7e3efbe0f758ed51dba589e18bd07edd61b76f..8af7a690eb40a15d06f1cba8b5ee25f60c8cdbf9 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -40,6 +40,7 @@ config SECURITY_SELINUX_BOOTPARAM_VALUE config SECURITY_SELINUX_DISABLE bool "NSA SELinux runtime disable" depends on SECURITY_SELINUX + select SECURITY_WRITABLE_HOOKS default n help This option enables writing to a selinuxfs node 'disable', which @@ -50,6 +51,11 @@ config SECURITY_SELINUX_DISABLE portability across platforms where boot parameters are difficult to employ. + NOTE: selecting this option will disable the '__ro_after_init' + kernel hardening feature for security hooks. Please consider + using the selinux=0 boot parameter instead of enabling this + option. + If you are unsure how to answer this question, answer N. config SECURITY_SELINUX_DEVELOP diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 0c2ac318aa7fb8bc11830e7c8c10fe6730f49c46..819fd6858b499dca6ad37e04d11230c4daad6ff5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1106,10 +1106,8 @@ static int selinux_parse_opts_str(char *options, opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_KERNEL); - if (!opts->mnt_opts_flags) { - kfree(opts->mnt_opts); + if (!opts->mnt_opts_flags) goto out_err; - } if (fscontext) { opts->mnt_opts[num_mnt_opts] = fscontext; @@ -1132,6 +1130,7 @@ static int selinux_parse_opts_str(char *options, return 0; out_err: + security_free_mnt_opts(opts); kfree(context); kfree(defcontext); kfree(fscontext); @@ -3920,6 +3919,21 @@ static int selinux_task_getioprio(struct task_struct *p) PROCESS__GETSCHED, NULL); } +int selinux_task_prlimit(const struct cred *cred, const struct cred *tcred, + unsigned int flags) +{ + u32 av = 0; + + if (!flags) + return 0; + if (flags & LSM_PRLIMIT_WRITE) + av |= PROCESS__SETRLIMIT; + if (flags & LSM_PRLIMIT_READ) + av |= PROCESS__GETRLIMIT; + return avc_has_perm(cred_sid(cred), cred_sid(tcred), + SECCLASS_PROCESS, av, NULL); +} + static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) { @@ -4352,10 +4366,18 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in u32 sid, node_perm; if (family == PF_INET) { + if (addrlen < sizeof(struct sockaddr_in)) { + err = -EINVAL; + goto out; + } addr4 = (struct sockaddr_in *)address; snum = ntohs(addr4->sin_port); addrp = (char *)&addr4->sin_addr.s_addr; } else { + if (addrlen < SIN6_LEN_RFC2133) { + err = -EINVAL; + goto out; + } addr6 = (struct sockaddr_in6 *)address; snum = ntohs(addr6->sin6_port); addrp = (char *)&addr6->sin6_addr.s6_addr; @@ -6108,7 +6130,7 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) #endif -static struct security_hook_list selinux_hooks[] = { +static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), LSM_HOOK_INIT(binder_transfer_binder, selinux_binder_transfer_binder), @@ -6206,6 +6228,7 @@ static struct security_hook_list selinux_hooks[] = { LSM_HOOK_INIT(task_setnice, selinux_task_setnice), LSM_HOOK_INIT(task_setioprio, selinux_task_setioprio), LSM_HOOK_INIT(task_getioprio, selinux_task_getioprio), + LSM_HOOK_INIT(task_prlimit, selinux_task_prlimit), LSM_HOOK_INIT(task_setrlimit, selinux_task_setrlimit), LSM_HOOK_INIT(task_setscheduler, selinux_task_setscheduler), LSM_HOOK_INIT(task_getscheduler, selinux_task_getscheduler), diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index d429c4a1c551536d090c264e6e60f9fc7311a2d2..1e0cc9b5de207bd41068274099aed97d19203898 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -47,7 +47,7 @@ struct security_class_mapping secclass_map[] = { "getattr", "setexec", "setfscreate", "noatsecure", "siginh", "setrlimit", "rlimitinh", "dyntransition", "setcurrent", "execmem", "execstack", "execheap", "setkeycreate", - "setsockcreate", NULL } }, + "setsockcreate", "getrlimit", NULL } }, { "system", { "ipc_info", "syslog_read", "syslog_mod", "syslog_console", "module_request", "module_load", NULL } }, diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 2ca9cde939d44976365aa67fe72f51be9b92897d..5aeaf30b7a1315a617f5c2d673043e959969fe14 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -28,7 +28,7 @@ struct nlmsg_perm { u32 perm; }; -static struct nlmsg_perm nlmsg_route_perms[] = +static const struct nlmsg_perm nlmsg_route_perms[] = { { RTM_NEWLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, @@ -69,6 +69,7 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_DELNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, @@ -80,7 +81,7 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; -static struct nlmsg_perm nlmsg_tcpdiag_perms[] = +static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, @@ -88,7 +89,7 @@ static struct nlmsg_perm nlmsg_tcpdiag_perms[] = { SOCK_DESTROY, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE }, }; -static struct nlmsg_perm nlmsg_xfrm_perms[] = +static const struct nlmsg_perm nlmsg_xfrm_perms[] = { { XFRM_MSG_NEWSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_DELSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, @@ -115,7 +116,7 @@ static struct nlmsg_perm nlmsg_xfrm_perms[] = { XFRM_MSG_MAPPING, NETLINK_XFRM_SOCKET__NLMSG_READ }, }; -static struct nlmsg_perm nlmsg_audit_perms[] = +static const struct nlmsg_perm nlmsg_audit_perms[] = { { AUDIT_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_SET, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, @@ -136,7 +137,7 @@ static struct nlmsg_perm nlmsg_audit_perms[] = }; -static int nlmsg_perm(u16 nlmsg_type, u32 *perm, struct nlmsg_perm *tab, size_t tabsize) +static int nlmsg_perm(u16 nlmsg_type, u32 *perm, const struct nlmsg_perm *tab, size_t tabsize) { int i, err = -EINVAL; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index cb3fd98fb05ae7df77b2ad20bf640d38b5133123..50062e70140dcd2c80f88a0be2d1663b0f0c6d86 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1456,10 +1456,10 @@ static int sel_avc_stats_seq_show(struct seq_file *seq, void *v) { struct avc_cache_stats *st = v; - if (v == SEQ_START_TOKEN) - seq_printf(seq, "lookups hits misses allocations reclaims " - "frees\n"); - else { + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "lookups hits misses allocations reclaims frees\n"); + } else { unsigned int lookups = st->lookups; unsigned int misses = st->misses; unsigned int hits = lookups - misses; @@ -1496,7 +1496,7 @@ static const struct file_operations sel_avc_cache_stats_ops = { static int sel_make_avc_files(struct dentry *dir) { int i; - static struct tree_descr files[] = { + static const struct tree_descr files[] = { { "cache_threshold", &sel_avc_cache_threshold_ops, S_IRUGO|S_IWUSR }, { "hash_stats", &sel_avc_hash_stats_ops, S_IRUGO }, @@ -1805,7 +1805,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; struct inode_security_struct *isec; - static struct tree_descr selinux_files[] = { + static const struct tree_descr selinux_files[] = { [SEL_LOAD] = {"load", &sel_load_ops, S_IRUSR|S_IWUSR}, [SEL_ENFORCE] = {"enforce", &sel_enforce_ops, S_IRUGO|S_IWUSR}, [SEL_CONTEXT] = {"context", &transaction_ops, S_IRUGO|S_IWUGO}, diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 34afeadd9e73ad458dcf0eefe29e1b62b83b913a..771c96afe1d53b9a4735e7f850d1266af31e8fe7 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -176,8 +176,9 @@ void cond_policydb_destroy(struct policydb *p) int cond_init_bool_indexes(struct policydb *p) { kfree(p->bool_val_to_struct); - p->bool_val_to_struct = - kmalloc(p->p_bools.nprim * sizeof(struct cond_bool_datum *), GFP_KERNEL); + p->bool_val_to_struct = kmalloc_array(p->p_bools.nprim, + sizeof(*p->bool_val_to_struct), + GFP_KERNEL); if (!p->bool_val_to_struct) return -ENOMEM; return 0; @@ -226,7 +227,7 @@ int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp) u32 len; int rc; - booldatum = kzalloc(sizeof(struct cond_bool_datum), GFP_KERNEL); + booldatum = kzalloc(sizeof(*booldatum), GFP_KERNEL); if (!booldatum) return -ENOMEM; @@ -331,7 +332,7 @@ static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum goto err; } - list = kzalloc(sizeof(struct cond_av_list), GFP_KERNEL); + list = kzalloc(sizeof(*list), GFP_KERNEL); if (!list) { rc = -ENOMEM; goto err; @@ -420,7 +421,7 @@ static int cond_read_node(struct policydb *p, struct cond_node *node, void *fp) goto err; rc = -ENOMEM; - expr = kzalloc(sizeof(struct cond_expr), GFP_KERNEL); + expr = kzalloc(sizeof(*expr), GFP_KERNEL); if (!expr) goto err; @@ -471,7 +472,7 @@ int cond_read_list(struct policydb *p, void *fp) for (i = 0; i < len; i++) { rc = -ENOMEM; - node = kzalloc(sizeof(struct cond_node), GFP_KERNEL); + node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) goto err; @@ -663,5 +664,4 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, (node->key.specified & AVTAB_XPERMS)) services_compute_xperms_drivers(xperms, node); } - return; } diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 2cc496149842825cd90e57a087c57314295f73fc..3858706a29fbb9d885e0722b8e3a06cdaa0ffcd4 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -17,15 +17,15 @@ struct hashtab *hashtab_create(u32 (*hash_value)(struct hashtab *h, const void * u32 i; p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + if (!p) return p; p->size = size; p->nel = 0; p->hash_value = hash_value; p->keycmp = keycmp; - p->htable = kmalloc(sizeof(*(p->htable)) * size, GFP_KERNEL); - if (p->htable == NULL) { + p->htable = kmalloc_array(size, sizeof(*p->htable), GFP_KERNEL); + if (!p->htable) { kfree(p); return NULL; } @@ -58,7 +58,7 @@ int hashtab_insert(struct hashtab *h, void *key, void *datum) return -EEXIST; newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); - if (newnode == NULL) + if (!newnode) return -ENOMEM; newnode->key = key; newnode->datum = datum; @@ -87,7 +87,7 @@ void *hashtab_search(struct hashtab *h, const void *key) while (cur && h->keycmp(h, key, cur->key) > 0) cur = cur->next; - if (cur == NULL || (h->keycmp(h, key, cur->key) != 0)) + if (!cur || (h->keycmp(h, key, cur->key) != 0)) return NULL; return cur->datum; diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 9c92f29a38ea4fa9b481f0e06a0e640b5538751a..0080122760ad4ad79fb5b84bada231eff4e7ce83 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -178,10 +178,9 @@ static int roles_init(struct policydb *p) int rc; struct role_datum *role; - rc = -ENOMEM; role = kzalloc(sizeof(*role), GFP_KERNEL); if (!role) - goto out; + return -ENOMEM; rc = -EINVAL; role->value = ++p->p_roles.nprim; @@ -540,23 +539,23 @@ static int policydb_index(struct policydb *p) #endif rc = -ENOMEM; - p->class_val_to_struct = - kzalloc(p->p_classes.nprim * sizeof(*(p->class_val_to_struct)), - GFP_KERNEL); + p->class_val_to_struct = kcalloc(p->p_classes.nprim, + sizeof(*p->class_val_to_struct), + GFP_KERNEL); if (!p->class_val_to_struct) goto out; rc = -ENOMEM; - p->role_val_to_struct = - kzalloc(p->p_roles.nprim * sizeof(*(p->role_val_to_struct)), - GFP_KERNEL); + p->role_val_to_struct = kcalloc(p->p_roles.nprim, + sizeof(*p->role_val_to_struct), + GFP_KERNEL); if (!p->role_val_to_struct) goto out; rc = -ENOMEM; - p->user_val_to_struct = - kzalloc(p->p_users.nprim * sizeof(*(p->user_val_to_struct)), - GFP_KERNEL); + p->user_val_to_struct = kcalloc(p->p_users.nprim, + sizeof(*p->user_val_to_struct), + GFP_KERNEL); if (!p->user_val_to_struct) goto out; @@ -880,8 +879,6 @@ void policydb_destroy(struct policydb *p) ebitmap_destroy(&p->filename_trans_ttypes); ebitmap_destroy(&p->policycaps); ebitmap_destroy(&p->permissive_map); - - return; } /* @@ -1120,10 +1117,9 @@ static int perm_read(struct policydb *p, struct hashtab *h, void *fp) __le32 buf[2]; u32 len; - rc = -ENOMEM; perdatum = kzalloc(sizeof(*perdatum), GFP_KERNEL); if (!perdatum) - goto bad; + return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) @@ -1154,10 +1150,9 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp) u32 len, nel; int i, rc; - rc = -ENOMEM; comdatum = kzalloc(sizeof(*comdatum), GFP_KERNEL); if (!comdatum) - goto bad; + return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) @@ -1320,10 +1315,9 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp) u32 len, len2, ncons, nel; int i, rc; - rc = -ENOMEM; cladatum = kzalloc(sizeof(*cladatum), GFP_KERNEL); if (!cladatum) - goto bad; + return -ENOMEM; rc = next_entry(buf, fp, sizeof(u32)*6); if (rc) @@ -1414,10 +1408,9 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp) __le32 buf[3]; u32 len; - rc = -ENOMEM; role = kzalloc(sizeof(*role), GFP_KERNEL); if (!role) - goto bad; + return -ENOMEM; if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) to_read = 3; @@ -1471,10 +1464,9 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp) __le32 buf[4]; u32 len; - rc = -ENOMEM; typdatum = kzalloc(sizeof(*typdatum), GFP_KERNEL); if (!typdatum) - goto bad; + return -ENOMEM; if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) to_read = 4; @@ -1546,10 +1538,9 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp) __le32 buf[3]; u32 len; - rc = -ENOMEM; usrdatum = kzalloc(sizeof(*usrdatum), GFP_KERNEL); if (!usrdatum) - goto bad; + return -ENOMEM; if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) to_read = 3; @@ -1597,10 +1588,9 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp) __le32 buf[2]; u32 len; - rc = -ENOMEM; levdatum = kzalloc(sizeof(*levdatum), GFP_ATOMIC); if (!levdatum) - goto bad; + return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) @@ -1614,7 +1604,7 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp) goto bad; rc = -ENOMEM; - levdatum->level = kmalloc(sizeof(struct mls_level), GFP_ATOMIC); + levdatum->level = kmalloc(sizeof(*levdatum->level), GFP_ATOMIC); if (!levdatum->level) goto bad; @@ -1639,10 +1629,9 @@ static int cat_read(struct policydb *p, struct hashtab *h, void *fp) __le32 buf[3]; u32 len; - rc = -ENOMEM; catdatum = kzalloc(sizeof(*catdatum), GFP_ATOMIC); if (!catdatum) - goto bad; + return -ENOMEM; rc = next_entry(buf, fp, sizeof buf); if (rc) @@ -1854,7 +1843,7 @@ static int range_read(struct policydb *p, void *fp) rc = next_entry(buf, fp, sizeof(u32)); if (rc) - goto out; + return rc; nel = le32_to_cpu(buf[0]); for (i = 0; i < nel; i++) { @@ -1931,7 +1920,6 @@ static int filename_trans_read(struct policydb *p, void *fp) nel = le32_to_cpu(buf[0]); for (i = 0; i < nel; i++) { - ft = NULL; otype = NULL; name = NULL; @@ -2008,7 +1996,7 @@ static int genfs_read(struct policydb *p, void *fp) rc = next_entry(buf, fp, sizeof(u32)); if (rc) - goto out; + return rc; nel = le32_to_cpu(buf[0]); for (i = 0; i < nel; i++) { @@ -2100,9 +2088,10 @@ static int genfs_read(struct policydb *p, void *fp) } rc = 0; out: - if (newgenfs) + if (newgenfs) { kfree(newgenfs->fstype); - kfree(newgenfs); + kfree(newgenfs); + } ocontext_destroy(newc, OCON_FSUSE); return rc; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b4aa491a0a23d8e025f00fde82b317952816773f..60d9b02523215aa37b0399e1446726e05b99cb8f 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -157,7 +157,7 @@ static int selinux_set_mapping(struct policydb *pol, } k = 0; - while (p_in->perms && p_in->perms[k]) { + while (p_in->perms[k]) { /* An empty permission string skips ahead */ if (!*p_in->perms[k]) { k++; diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 5840a35155fc3d613ec10abceee8420c1b40073f..f6915f257486a4c3840af609a4dfa86b6cd65c63 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -18,7 +18,7 @@ int sidtab_init(struct sidtab *s) { int i; - s->htable = kmalloc(sizeof(*(s->htable)) * SIDTAB_SIZE, GFP_ATOMIC); + s->htable = kmalloc_array(SIDTAB_SIZE, sizeof(*s->htable), GFP_ATOMIC); if (!s->htable) return -ENOMEM; for (i = 0; i < SIDTAB_SIZE; i++) @@ -54,7 +54,7 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) } newnode = kmalloc(sizeof(*newnode), GFP_ATOMIC); - if (newnode == NULL) { + if (!newnode) { rc = -ENOMEM; goto out; } @@ -98,7 +98,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) if (force && cur && sid == cur->sid && cur->context.len) return &cur->context; - if (cur == NULL || sid != cur->sid || cur->context.len) { + if (!cur || sid != cur->sid || cur->context.len) { /* Remap invalid SIDs to the unlabeled SID. */ sid = SECINITSID_UNLABELED; hvalue = SIDTAB_HASH(sid); diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 356e3764cad9e93b80dfeac5a3010e391e9339c1..a4b2e6b94abd49f8dde1aa04c4f61f99b40a0ff5 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -504,7 +504,7 @@ int smk_netlbl_mls(int level, char *catset, struct netlbl_lsm_secattr *sap, if ((m & *cp) == 0) continue; rc = netlbl_catmap_setbit(&sap->attr.mls.cat, - cat, GFP_ATOMIC); + cat, GFP_KERNEL); if (rc < 0) { netlbl_catmap_free(sap->attr.mls.cat); return rc; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index fc8fb31fc24f6ef1041806c574ebb21a8697f56f..658f5d8c7e76671a27b2629c61ba93eeb449d4a8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -695,10 +695,8 @@ static int smack_parse_opts_str(char *options, opts->mnt_opts_flags = kcalloc(NUM_SMK_MNT_OPTS, sizeof(int), GFP_KERNEL); - if (!opts->mnt_opts_flags) { - kfree(opts->mnt_opts); + if (!opts->mnt_opts_flags) goto out_err; - } if (fsdefault) { opts->mnt_opts[num_mnt_opts] = fsdefault; @@ -4633,7 +4631,7 @@ static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) return 0; } -static struct security_hook_list smack_hooks[] = { +static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, smack_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), LSM_HOOK_INIT(syslog, smack_syslog), diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 366b8356f75b45315c43b6067aa569680c20eea4..f6482e53d55a82d97882d40d04736247281a4239 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2855,7 +2855,7 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) int rc; struct inode *root_inode; - static struct tree_descr smack_files[] = { + static const struct tree_descr smack_files[] = { [SMK_LOAD] = { "load", &smk_load_ops, S_IRUGO|S_IWUSR}, [SMK_CIPSO] = { diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index 7041a580019ec56d356fd6c009cff6518edabc27..223f21ffa632e5f3bb1344f174e75eb50e3d74ab 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -692,7 +692,7 @@ int tomoyo_path_number_perm(const u8 type, const struct path *path, { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { - .path1 = *path, + .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int error = -ENOMEM; struct tomoyo_path_info buf; @@ -740,7 +740,7 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, struct tomoyo_path_info buf; struct tomoyo_request_info r; struct tomoyo_obj_info obj = { - .path1 = *path, + .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int idx; @@ -786,7 +786,7 @@ int tomoyo_path_perm(const u8 operation, const struct path *path, const char *ta { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { - .path1 = *path, + .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int error; struct tomoyo_path_info buf; @@ -843,7 +843,7 @@ int tomoyo_mkdev_perm(const u8 operation, const struct path *path, { struct tomoyo_request_info r; struct tomoyo_obj_info obj = { - .path1 = *path, + .path1 = { .mnt = path->mnt, .dentry = path->dentry }, }; int error = -ENOMEM; struct tomoyo_path_info buf; @@ -890,8 +890,8 @@ int tomoyo_path2_perm(const u8 operation, const struct path *path1, struct tomoyo_path_info buf2; struct tomoyo_request_info r; struct tomoyo_obj_info obj = { - .path1 = *path1, - .path2 = *path2, + .path1 = { .mnt = path1->mnt, .dentry = path1->dentry }, + .path2 = { .mnt = path2->mnt, .dentry = path2->dentry } }; int idx; diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c index 97527710a72a5bd41679337c59d82c87b44928a9..6c02ac4782477f35c8a79c3289714d3e503cd31d 100644 --- a/security/tomoyo/network.c +++ b/security/tomoyo/network.c @@ -608,7 +608,7 @@ static int tomoyo_check_unix_address(struct sockaddr *addr, static bool tomoyo_kernel_service(void) { /* Nothing to do if I am a kernel service. */ - return segment_eq(get_fs(), KERNEL_DS); + return uaccess_kernel(); } /** diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index edc52d620f29cf7b027dd962ab0976a75eef3ba4..130b4fa4f65fde0771952399a8bc4e2db60b790c 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -165,7 +165,7 @@ static int tomoyo_path_truncate(const struct path *path) */ static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry) { - struct path path = { parent->mnt, dentry }; + struct path path = { .mnt = parent->mnt, .dentry = dentry }; return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL); } @@ -181,7 +181,7 @@ static int tomoyo_path_unlink(const struct path *parent, struct dentry *dentry) static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry, umode_t mode) { - struct path path = { parent->mnt, dentry }; + struct path path = { .mnt = parent->mnt, .dentry = dentry }; return tomoyo_path_number_perm(TOMOYO_TYPE_MKDIR, &path, mode & S_IALLUGO); } @@ -196,7 +196,7 @@ static int tomoyo_path_mkdir(const struct path *parent, struct dentry *dentry, */ static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry) { - struct path path = { parent->mnt, dentry }; + struct path path = { .mnt = parent->mnt, .dentry = dentry }; return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL); } @@ -212,7 +212,7 @@ static int tomoyo_path_rmdir(const struct path *parent, struct dentry *dentry) static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry, const char *old_name) { - struct path path = { parent->mnt, dentry }; + struct path path = { .mnt = parent->mnt, .dentry = dentry }; return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path, old_name); } @@ -229,7 +229,7 @@ static int tomoyo_path_symlink(const struct path *parent, struct dentry *dentry, static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry, umode_t mode, unsigned int dev) { - struct path path = { parent->mnt, dentry }; + struct path path = { .mnt = parent->mnt, .dentry = dentry }; int type = TOMOYO_TYPE_CREATE; const unsigned int perm = mode & S_IALLUGO; @@ -268,8 +268,8 @@ static int tomoyo_path_mknod(const struct path *parent, struct dentry *dentry, static int tomoyo_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { - struct path path1 = { new_dir->mnt, old_dentry }; - struct path path2 = { new_dir->mnt, new_dentry }; + struct path path1 = { .mnt = new_dir->mnt, .dentry = old_dentry }; + struct path path2 = { .mnt = new_dir->mnt, .dentry = new_dentry }; return tomoyo_path2_perm(TOMOYO_TYPE_LINK, &path1, &path2); } @@ -288,8 +288,8 @@ static int tomoyo_path_rename(const struct path *old_parent, const struct path *new_parent, struct dentry *new_dentry) { - struct path path1 = { old_parent->mnt, old_dentry }; - struct path path2 = { new_parent->mnt, new_dentry }; + struct path path1 = { .mnt = old_parent->mnt, .dentry = old_dentry }; + struct path path2 = { .mnt = new_parent->mnt, .dentry = new_dentry }; return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2); } @@ -417,7 +417,7 @@ static int tomoyo_sb_mount(const char *dev_name, const struct path *path, */ static int tomoyo_sb_umount(struct vfsmount *mnt, int flags) { - struct path path = { mnt, mnt->mnt_root }; + struct path path = { .mnt = mnt, .dentry = mnt->mnt_root }; return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path, NULL); } @@ -496,7 +496,7 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, * tomoyo_security_ops is a "struct security_operations" which is used for * registering TOMOYO. */ -static struct security_hook_list tomoyo_hooks[] = { +static struct security_hook_list tomoyo_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(cred_alloc_blank, tomoyo_cred_alloc_blank), LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), LSM_HOOK_INIT(cred_transfer, tomoyo_cred_transfer), diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 88271a3bf37f8378fbc6be1d63dd6b4d2de8054c..8298e094f4f7fa6338814902f3c6067b276a6032 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -428,7 +428,7 @@ int yama_ptrace_traceme(struct task_struct *parent) return rc; } -static struct security_hook_list yama_hooks[] = { +static struct security_hook_list yama_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, yama_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, yama_ptrace_traceme), LSM_HOOK_INIT(task_prctl, yama_task_prctl), diff --git a/sound/Kconfig b/sound/Kconfig index ee2e69a9ecd14ecd9fef1107c7a803f0b1a836de..6a215a8c0490dda16f094aa061bc9b9b1791a609 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -115,6 +115,7 @@ endif # SND menuconfig SOUND_PRIME tristate "Open Sound System (DEPRECATED)" select SOUND_OSS_CORE + depends on BROKEN help Say 'Y' or 'M' to enable Open Sound System drivers. diff --git a/sound/core/seq/seq_lock.c b/sound/core/seq/seq_lock.c index 3b693e924db745c0ec8be74171bb189f17dfc53d..12ba83367b1bc882f6d6fbab9329185d58125090 100644 --- a/sound/core/seq/seq_lock.c +++ b/sound/core/seq/seq_lock.c @@ -28,19 +28,16 @@ /* wait until all locks are released */ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) { - int max_count = 5 * HZ; + int warn_count = 5 * HZ; if (atomic_read(lockp) < 0) { pr_warn("ALSA: seq_lock: lock trouble [counter = %d] in %s:%d\n", atomic_read(lockp), file, line); return; } while (atomic_read(lockp) > 0) { - if (max_count == 0) { - pr_warn("ALSA: seq_lock: timeout [%d left] in %s:%d\n", atomic_read(lockp), file, line); - break; - } + if (warn_count-- == 0) + pr_warn("ALSA: seq_lock: waiting [%d left] in %s:%d\n", atomic_read(lockp), file, line); schedule_timeout_uninterruptible(1); - max_count--; } } diff --git a/sound/core/timer.c b/sound/core/timer.c index 6d4fbc439246925712f21c1b82e6b46ec859e021..cd67d1c12cf1ca9a32daa4de797dc0a5ec7bbb86 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1277,6 +1277,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, struct timespec tstamp; int prev, append = 0; + memset(&r1, 0, sizeof(r1)); memset(&tstamp, 0, sizeof(tstamp)); spin_lock(&tu->qlock); if ((tu->filter & ((1 << SNDRV_TIMER_EVENT_RESOLUTION) | @@ -1292,7 +1293,6 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, } if ((tu->filter & (1 << SNDRV_TIMER_EVENT_RESOLUTION)) && tu->last_resolution != resolution) { - memset(&r1, 0, sizeof(r1)); r1.event = SNDRV_TIMER_EVENT_RESOLUTION; r1.tstamp = tstamp; r1.val = resolution; @@ -1430,18 +1430,13 @@ static int snd_timer_user_next_device(struct snd_timer_id __user *_tid) if (id.card < 0) { id.card = 0; } else { - if (id.card < 0) { - id.card = 0; + if (id.device < 0) { + id.device = 0; } else { - if (id.device < 0) { - id.device = 0; - } else { - if (id.subdevice < 0) { - id.subdevice = 0; - } else { - id.subdevice++; - } - } + if (id.subdevice < 0) + id.subdevice = 0; + else + id.subdevice++; } } list_for_each(p, &snd_timer_list) { @@ -1623,6 +1618,7 @@ static int snd_timer_user_tselect(struct file *file, if (err < 0) goto __err; + tu->qhead = tu->qtail = tu->qused = 0; kfree(tu->queue); tu->queue = NULL; kfree(tu->tqueue); @@ -1964,6 +1960,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, tu = file->private_data; unit = tu->tread ? sizeof(struct snd_timer_tread) : sizeof(struct snd_timer_read); + mutex_lock(&tu->ioctl_lock); spin_lock_irq(&tu->qlock); while ((long)count - result >= unit) { while (!tu->qused) { @@ -1979,7 +1976,9 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, add_wait_queue(&tu->qchange_sleep, &wait); spin_unlock_irq(&tu->qlock); + mutex_unlock(&tu->ioctl_lock); schedule(); + mutex_lock(&tu->ioctl_lock); spin_lock_irq(&tu->qlock); remove_wait_queue(&tu->qchange_sleep, &wait); @@ -1999,7 +1998,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, tu->qused--; spin_unlock_irq(&tu->qlock); - mutex_lock(&tu->ioctl_lock); if (tu->tread) { if (copy_to_user(buffer, &tu->tqueue[qhead], sizeof(struct snd_timer_tread))) @@ -2009,7 +2007,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, sizeof(struct snd_timer_read))) err = -EFAULT; } - mutex_unlock(&tu->ioctl_lock); spin_lock_irq(&tu->qlock); if (err < 0) @@ -2019,6 +2016,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, } _error: spin_unlock_irq(&tu->qlock); + mutex_unlock(&tu->ioctl_lock); return result > 0 ? result : err; } diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c index fed7e7e2177b7c0dbfc09c0fb20d8c15ad958b07..9b86e00d7d95ffada3ac89b52671be20a2d30d38 100644 --- a/sound/drivers/mpu401/mpu401.c +++ b/sound/drivers/mpu401/mpu401.c @@ -53,9 +53,9 @@ MODULE_PARM_DESC(enable, "Enable MPU-401 device."); module_param_array(pnp, bool, NULL, 0444); MODULE_PARM_DESC(pnp, "PnP detection for MPU-401 device."); #endif -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for MPU-401 device."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for MPU-401 device."); module_param_array(uart_enter, bool, NULL, 0444); MODULE_PARM_DESC(uart_enter, "Issue UART_ENTER command at open."); diff --git a/sound/drivers/mtpav.c b/sound/drivers/mtpav.c index 00b31f92c504ddf7c7a9810b38d6b411a66a22a3..0f6392001e307ca008957d0760b76e1083d779ce 100644 --- a/sound/drivers/mtpav.c +++ b/sound/drivers/mtpav.c @@ -86,9 +86,9 @@ module_param(index, int, 0444); MODULE_PARM_DESC(index, "Index value for MotuMTPAV MIDI."); module_param(id, charp, 0444); MODULE_PARM_DESC(id, "ID string for MotuMTPAV MIDI."); -module_param(port, long, 0444); +module_param_hw(port, long, ioport, 0444); MODULE_PARM_DESC(port, "Parallel port # for MotuMTPAV MIDI."); -module_param(irq, int, 0444); +module_param_hw(irq, int, irq, 0444); MODULE_PARM_DESC(irq, "Parallel IRQ # for MotuMTPAV MIDI."); module_param(hwports, int, 0444); MODULE_PARM_DESC(hwports, "Hardware ports # for MotuMTPAV MIDI."); diff --git a/sound/drivers/serial-u16550.c b/sound/drivers/serial-u16550.c index 60d51ac4ccfebded6c55e47812fdd16c67ae4359..88e66ea0306dd820b82e6339c4e2640ffce426cc 100644 --- a/sound/drivers/serial-u16550.c +++ b/sound/drivers/serial-u16550.c @@ -84,9 +84,9 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for Serial MIDI."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable UART16550A chip."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for UART16550A chip."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for UART16550A chip."); module_param_array(speed, int, NULL, 0444); MODULE_PARM_DESC(speed, "Speed in bauds."); diff --git a/sound/drivers/vx/vx_core.c b/sound/drivers/vx/vx_core.c index 289f041706cd2526be2743281ef6f1b94be49a5b..f684fffd1397552096fda2f74c45a97e173469d7 100644 --- a/sound/drivers/vx/vx_core.c +++ b/sound/drivers/vx/vx_core.c @@ -795,10 +795,8 @@ struct vx_core *snd_vx_create(struct snd_card *card, struct snd_vx_hardware *hw, return NULL; chip = kzalloc(sizeof(*chip) + extra_size, GFP_KERNEL); - if (! chip) { - snd_printk(KERN_ERR "vx_core: no memory\n"); + if (! chip) return NULL; - } mutex_init(&chip->lock); chip->irq = -1; chip->hw = hw; diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig index 9f00696c4e4a1dc1b1ec8204ca680871c3341ce3..529d9f405fa9f04e5c241a35c0488b98d6e0c993 100644 --- a/sound/firewire/Kconfig +++ b/sound/firewire/Kconfig @@ -140,4 +140,24 @@ config SND_FIREWIRE_TASCAM To compile this driver as a module, choose M here: the module will be called snd-firewire-tascam. +config SND_FIREWIRE_MOTU + tristate "Mark of the unicorn FireWire series support" + select SND_FIREWIRE_LIB + select SND_HWDEP + help + Say Y here to enable support for FireWire devices which MOTU produced: + * 828mk2 + * 828mk3 + + To compile this driver as a module, choose M here: the module + will be called snd-firewire-motu. + +config SND_FIREFACE + tristate "RME Fireface series support" + select SND_FIREWIRE_LIB + select SND_HWDEP + help + Say Y here to include support for RME fireface series. + * Fireface 400 + endif # SND_FIREWIRE diff --git a/sound/firewire/Makefile b/sound/firewire/Makefile index 0ee1fb115d8833bb076b74079de1b1222d8d98e3..1b98fa3fa3d460602b3a15871fa8f5bca68c8d1d 100644 --- a/sound/firewire/Makefile +++ b/sound/firewire/Makefile @@ -13,3 +13,5 @@ obj-$(CONFIG_SND_FIREWORKS) += fireworks/ obj-$(CONFIG_SND_BEBOB) += bebob/ obj-$(CONFIG_SND_FIREWIRE_DIGI00X) += digi00x/ obj-$(CONFIG_SND_FIREWIRE_TASCAM) += tascam/ +obj-$(CONFIG_SND_FIREWIRE_MOTU) += motu/ +obj-$(CONFIG_SND_FIREFACE) += fireface/ diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h index 9c04faf206b287e5f949120f5fa2860a5690964b..ea0d486652c838cb08a424eed7d347315bceda42 100644 --- a/sound/firewire/amdtp-stream-trace.h +++ b/sound/firewire/amdtp-stream-trace.h @@ -14,8 +14,8 @@ #include TRACE_EVENT(in_packet, - TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_quadlets, unsigned int index), - TP_ARGS(s, cycles, cip_header, payload_quadlets, index), + TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_length, unsigned int index), + TP_ARGS(s, cycles, cip_header, payload_length, index), TP_STRUCT__entry( __field(unsigned int, second) __field(unsigned int, cycle) @@ -37,7 +37,7 @@ TRACE_EVENT(in_packet, __entry->dest = fw_parent_device(s->unit)->card->node_id; __entry->cip_header0 = cip_header[0]; __entry->cip_header1 = cip_header[1]; - __entry->payload_quadlets = payload_quadlets; + __entry->payload_quadlets = payload_length / 4; __entry->packet_index = s->packet_index; __entry->irq = !!in_interrupt(); __entry->index = index; @@ -101,6 +101,94 @@ TRACE_EVENT(out_packet, __entry->index) ); +TRACE_EVENT(in_packet_without_header, + TP_PROTO(const struct amdtp_stream *s, u32 cycles, unsigned int payload_quadlets, unsigned int data_blocks, unsigned int index), + TP_ARGS(s, cycles, payload_quadlets, data_blocks, index), + TP_STRUCT__entry( + __field(unsigned int, second) + __field(unsigned int, cycle) + __field(int, channel) + __field(int, src) + __field(int, dest) + __field(unsigned int, payload_quadlets) + __field(unsigned int, data_blocks) + __field(unsigned int, data_block_counter) + __field(unsigned int, packet_index) + __field(unsigned int, irq) + __field(unsigned int, index) + ), + TP_fast_assign( + __entry->second = cycles / CYCLES_PER_SECOND; + __entry->cycle = cycles % CYCLES_PER_SECOND; + __entry->channel = s->context->channel; + __entry->src = fw_parent_device(s->unit)->node_id; + __entry->dest = fw_parent_device(s->unit)->card->node_id; + __entry->payload_quadlets = payload_quadlets; + __entry->data_blocks = data_blocks, + __entry->data_block_counter = s->data_block_counter, + __entry->packet_index = s->packet_index; + __entry->irq = !!in_interrupt(); + __entry->index = index; + ), + TP_printk( + "%02u %04u %04x %04x %02d %03u %3u %3u %02u %01u %02u", + __entry->second, + __entry->cycle, + __entry->src, + __entry->dest, + __entry->channel, + __entry->payload_quadlets, + __entry->data_blocks, + __entry->data_block_counter, + __entry->packet_index, + __entry->irq, + __entry->index) +); + +TRACE_EVENT(out_packet_without_header, + TP_PROTO(const struct amdtp_stream *s, u32 cycles, unsigned int payload_length, unsigned int data_blocks, unsigned int index), + TP_ARGS(s, cycles, payload_length, data_blocks, index), + TP_STRUCT__entry( + __field(unsigned int, second) + __field(unsigned int, cycle) + __field(int, channel) + __field(int, src) + __field(int, dest) + __field(unsigned int, payload_quadlets) + __field(unsigned int, data_blocks) + __field(unsigned int, data_block_counter) + __field(unsigned int, packet_index) + __field(unsigned int, irq) + __field(unsigned int, index) + ), + TP_fast_assign( + __entry->second = cycles / CYCLES_PER_SECOND; + __entry->cycle = cycles % CYCLES_PER_SECOND; + __entry->channel = s->context->channel; + __entry->src = fw_parent_device(s->unit)->card->node_id; + __entry->dest = fw_parent_device(s->unit)->node_id; + __entry->payload_quadlets = payload_length / 4; + __entry->data_blocks = data_blocks, + __entry->data_blocks = s->data_block_counter, + __entry->packet_index = s->packet_index; + __entry->irq = !!in_interrupt(); + __entry->index = index; + ), + TP_printk( + "%02u %04u %04x %04x %02d %03u %02u %03u %02u %01u %02u", + __entry->second, + __entry->cycle, + __entry->src, + __entry->dest, + __entry->channel, + __entry->payload_quadlets, + __entry->data_blocks, + __entry->data_block_counter, + __entry->packet_index, + __entry->irq, + __entry->index) +); + #endif #undef TRACE_INCLUDE_PATH diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 00060c4a9deb4ec239f34dc857ceebf75615d65c..9e6f54f8c45d2330ee339a7f4c7d1ac86c8e4774 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -27,6 +27,7 @@ /* isochronous header parameters */ #define ISO_DATA_LENGTH_SHIFT 16 +#define TAG_NO_CIP_HEADER 0 #define TAG_CIP 1 /* common isochronous packet header parameters */ @@ -37,6 +38,8 @@ #define CIP_SID_MASK 0x3f000000 #define CIP_DBS_MASK 0x00ff0000 #define CIP_DBS_SHIFT 16 +#define CIP_SPH_MASK 0x00000400 +#define CIP_SPH_SHIFT 10 #define CIP_DBC_MASK 0x000000ff #define CIP_FMT_SHIFT 24 #define CIP_FMT_MASK 0x3f000000 @@ -232,11 +235,15 @@ EXPORT_SYMBOL(amdtp_stream_set_parameters); unsigned int amdtp_stream_get_max_payload(struct amdtp_stream *s) { unsigned int multiplier = 1; + unsigned int header_size = 0; if (s->flags & CIP_JUMBO_PAYLOAD) multiplier = 5; + if (!(s->flags & CIP_NO_HEADER)) + header_size = 8; - return 8 + s->syt_interval * s->data_block_quadlets * 4 * multiplier; + return header_size + + s->syt_interval * s->data_block_quadlets * 4 * multiplier; } EXPORT_SYMBOL(amdtp_stream_get_max_payload); @@ -378,7 +385,7 @@ static int queue_packet(struct amdtp_stream *s, unsigned int header_length, goto end; p.interrupt = IS_ALIGNED(s->packet_index + 1, INTERRUPT_INTERVAL); - p.tag = TAG_CIP; + p.tag = s->tag; p.header_length = header_length; if (payload_length > 0) p.payload_length = payload_length; @@ -405,17 +412,16 @@ static inline int queue_out_packet(struct amdtp_stream *s, static inline int queue_in_packet(struct amdtp_stream *s) { - return queue_packet(s, IN_PACKET_HEADER_SIZE, - amdtp_stream_get_max_payload(s)); + return queue_packet(s, IN_PACKET_HEADER_SIZE, s->max_payload_length); } -static int handle_out_packet(struct amdtp_stream *s, unsigned int cycle, +static int handle_out_packet(struct amdtp_stream *s, + unsigned int payload_length, unsigned int cycle, unsigned int index) { __be32 *buffer; unsigned int syt; unsigned int data_blocks; - unsigned int payload_length; unsigned int pcm_frames; struct snd_pcm_substream *pcm; @@ -424,15 +430,22 @@ static int handle_out_packet(struct amdtp_stream *s, unsigned int cycle, data_blocks = calculate_data_blocks(s, syt); pcm_frames = s->process_data_blocks(s, buffer + 2, data_blocks, &syt); + if (s->flags & CIP_DBC_IS_END_EVENT) + s->data_block_counter = + (s->data_block_counter + data_blocks) & 0xff; + buffer[0] = cpu_to_be32(ACCESS_ONCE(s->source_node_id_field) | (s->data_block_quadlets << CIP_DBS_SHIFT) | + ((s->sph << CIP_SPH_SHIFT) & CIP_SPH_MASK) | s->data_block_counter); buffer[1] = cpu_to_be32(CIP_EOH | ((s->fmt << CIP_FMT_SHIFT) & CIP_FMT_MASK) | ((s->fdf << CIP_FDF_SHIFT) & CIP_FDF_MASK) | (syt & CIP_SYT_MASK)); - s->data_block_counter = (s->data_block_counter + data_blocks) & 0xff; + if (!(s->flags & CIP_DBC_IS_END_EVENT)) + s->data_block_counter = + (s->data_block_counter + data_blocks) & 0xff; payload_length = 8 + data_blocks * 4 * s->data_block_quadlets; trace_out_packet(s, cycle, buffer, payload_length, index); @@ -448,13 +461,45 @@ static int handle_out_packet(struct amdtp_stream *s, unsigned int cycle, return 0; } +static int handle_out_packet_without_header(struct amdtp_stream *s, + unsigned int payload_length, unsigned int cycle, + unsigned int index) +{ + __be32 *buffer; + unsigned int syt; + unsigned int data_blocks; + unsigned int pcm_frames; + struct snd_pcm_substream *pcm; + + buffer = s->buffer.packets[s->packet_index].buffer; + syt = calculate_syt(s, cycle); + data_blocks = calculate_data_blocks(s, syt); + pcm_frames = s->process_data_blocks(s, buffer, data_blocks, &syt); + s->data_block_counter = (s->data_block_counter + data_blocks) & 0xff; + + payload_length = data_blocks * 4 * s->data_block_quadlets; + + trace_out_packet_without_header(s, cycle, payload_length, data_blocks, + index); + + if (queue_out_packet(s, payload_length) < 0) + return -EIO; + + pcm = ACCESS_ONCE(s->pcm); + if (pcm && pcm_frames > 0) + update_pcm_pointers(s, pcm, pcm_frames); + + /* No need to return the number of handled data blocks. */ + return 0; +} + static int handle_in_packet(struct amdtp_stream *s, - unsigned int payload_quadlets, unsigned int cycle, + unsigned int payload_length, unsigned int cycle, unsigned int index) { __be32 *buffer; u32 cip_header[2]; - unsigned int fmt, fdf, syt; + unsigned int sph, fmt, fdf, syt; unsigned int data_block_quadlets, data_block_counter, dbc_interval; unsigned int data_blocks; struct snd_pcm_substream *pcm; @@ -465,14 +510,15 @@ static int handle_in_packet(struct amdtp_stream *s, cip_header[0] = be32_to_cpu(buffer[0]); cip_header[1] = be32_to_cpu(buffer[1]); - trace_in_packet(s, cycle, cip_header, payload_quadlets, index); + trace_in_packet(s, cycle, cip_header, payload_length, index); /* * This module supports 'Two-quadlet CIP header with SYT field'. * For convenience, also check FMT field is AM824 or not. */ - if (((cip_header[0] & CIP_EOH_MASK) == CIP_EOH) || - ((cip_header[1] & CIP_EOH_MASK) != CIP_EOH)) { + if ((((cip_header[0] & CIP_EOH_MASK) == CIP_EOH) || + ((cip_header[1] & CIP_EOH_MASK) != CIP_EOH)) && + (!(s->flags & CIP_HEADER_WITHOUT_EOH))) { dev_info_ratelimited(&s->unit->device, "Invalid CIP header for AMDTP: %08X:%08X\n", cip_header[0], cip_header[1]); @@ -482,8 +528,9 @@ static int handle_in_packet(struct amdtp_stream *s, } /* Check valid protocol or not. */ + sph = (cip_header[0] & CIP_SPH_MASK) >> CIP_SPH_SHIFT; fmt = (cip_header[1] & CIP_FMT_MASK) >> CIP_FMT_SHIFT; - if (fmt != s->fmt) { + if (sph != s->sph || fmt != s->fmt) { dev_info_ratelimited(&s->unit->device, "Detect unexpected protocol: %08x %08x\n", cip_header[0], cip_header[1]); @@ -494,7 +541,7 @@ static int handle_in_packet(struct amdtp_stream *s, /* Calculate data blocks */ fdf = (cip_header[1] & CIP_FDF_MASK) >> CIP_FDF_SHIFT; - if (payload_quadlets < 3 || + if (payload_length < 12 || (fmt == CIP_FMT_AM && fdf == AMDTP_FDF_NO_DATA)) { data_blocks = 0; } else { @@ -510,7 +557,8 @@ static int handle_in_packet(struct amdtp_stream *s, if (s->flags & CIP_WRONG_DBS) data_block_quadlets = s->data_block_quadlets; - data_blocks = (payload_quadlets - 2) / data_block_quadlets; + data_blocks = (payload_length / 4 - 2) / + data_block_quadlets; } /* Check data block counter continuity */ @@ -561,6 +609,34 @@ static int handle_in_packet(struct amdtp_stream *s, return 0; } +static int handle_in_packet_without_header(struct amdtp_stream *s, + unsigned int payload_quadlets, unsigned int cycle, + unsigned int index) +{ + __be32 *buffer; + unsigned int data_blocks; + struct snd_pcm_substream *pcm; + unsigned int pcm_frames; + + buffer = s->buffer.packets[s->packet_index].buffer; + data_blocks = payload_quadlets / s->data_block_quadlets; + + trace_in_packet_without_header(s, cycle, payload_quadlets, data_blocks, + index); + + pcm_frames = s->process_data_blocks(s, buffer, data_blocks, NULL); + s->data_block_counter = (s->data_block_counter + data_blocks) & 0xff; + + if (queue_in_packet(s) < 0) + return -EIO; + + pcm = ACCESS_ONCE(s->pcm); + if (pcm && pcm_frames > 0) + update_pcm_pointers(s, pcm, pcm_frames); + + return 0; +} + /* * In CYCLE_TIMER register of IEEE 1394, 7 bits are used to represent second. On * the other hand, in DMA descriptors of 1394 OHCI, 3 bits are used to represent @@ -604,7 +680,7 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp, for (i = 0; i < packets; ++i) { cycle = increment_cycle_count(cycle, 1); - if (handle_out_packet(s, cycle, i) < 0) { + if (s->handle_packet(s, 0, cycle, i) < 0) { s->packet_index = -1; amdtp_stream_pcm_abort(s); return; @@ -620,7 +696,7 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp, { struct amdtp_stream *s = private_data; unsigned int i, packets; - unsigned int payload_quadlets, max_payload_quadlets; + unsigned int payload_length, max_payload_length; __be32 *headers = header; u32 cycle; @@ -636,22 +712,22 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp, cycle = decrement_cycle_count(cycle, packets); /* For buffer-over-run prevention. */ - max_payload_quadlets = amdtp_stream_get_max_payload(s) / 4; + max_payload_length = s->max_payload_length; for (i = 0; i < packets; i++) { cycle = increment_cycle_count(cycle, 1); - /* The number of quadlets in this packet */ - payload_quadlets = - (be32_to_cpu(headers[i]) >> ISO_DATA_LENGTH_SHIFT) / 4; - if (payload_quadlets > max_payload_quadlets) { + /* The number of bytes in this packet */ + payload_length = + (be32_to_cpu(headers[i]) >> ISO_DATA_LENGTH_SHIFT); + if (payload_length > max_payload_length) { dev_err(&s->unit->device, - "Detect jumbo payload: %02x %02x\n", - payload_quadlets, max_payload_quadlets); + "Detect jumbo payload: %04x %04x\n", + payload_length, max_payload_length); break; } - if (handle_in_packet(s, payload_quadlets, cycle, i) < 0) + if (s->handle_packet(s, payload_length, cycle, i) < 0) break; } @@ -671,6 +747,10 @@ static void amdtp_stream_first_callback(struct fw_iso_context *context, void *header, void *private_data) { struct amdtp_stream *s = private_data; + u32 cycle; + unsigned int packets; + + s->max_payload_length = amdtp_stream_get_max_payload(s); /* * For in-stream, first packet has come. @@ -679,10 +759,27 @@ static void amdtp_stream_first_callback(struct fw_iso_context *context, s->callbacked = true; wake_up(&s->callback_wait); - if (s->direction == AMDTP_IN_STREAM) + cycle = compute_cycle_count(tstamp); + + if (s->direction == AMDTP_IN_STREAM) { + packets = header_length / IN_PACKET_HEADER_SIZE; + cycle = decrement_cycle_count(cycle, packets); context->callback.sc = in_stream_callback; - else + if (s->flags & CIP_NO_HEADER) + s->handle_packet = handle_in_packet_without_header; + else + s->handle_packet = handle_in_packet; + } else { + packets = header_length / 4; + cycle = increment_cycle_count(cycle, QUEUE_LENGTH - packets); context->callback.sc = out_stream_callback; + if (s->flags & CIP_NO_HEADER) + s->handle_packet = handle_out_packet_without_header; + else + s->handle_packet = handle_out_packet; + } + + s->start_cycle = cycle; context->callback.sc(context, tstamp, header_length, header, s); } @@ -759,6 +856,11 @@ int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed) amdtp_stream_update(s); + if (s->flags & CIP_NO_HEADER) + s->tag = TAG_NO_CIP_HEADER; + else + s->tag = TAG_CIP; + s->packet_index = 0; do { if (s->direction == AMDTP_IN_STREAM) @@ -771,7 +873,7 @@ int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed) /* NOTE: TAG1 matches CIP. This just affects in stream. */ tag = FW_ISO_CONTEXT_MATCH_TAG1; - if (s->flags & CIP_EMPTY_WITH_TAG0) + if ((s->flags & CIP_EMPTY_WITH_TAG0) || (s->flags & CIP_NO_HEADER)) tag |= FW_ISO_CONTEXT_MATCH_TAG0; s->callbacked = false; diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index c1bc7fad056e82c62ee81e8f6a615f0a5d5fa85b..7e88317228212a63ad38e6e9880655f6ea567c85 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -18,8 +18,8 @@ * SYT_INTERVAL samples, with these two types alternating so that * the overall sample rate comes out right. * @CIP_EMPTY_WITH_TAG0: Only for in-stream. Empty in-packets have TAG0. - * @CIP_DBC_IS_END_EVENT: Only for in-stream. The value of dbc in an in-packet - * corresponds to the end of event in the packet. Out of IEC 61883. + * @CIP_DBC_IS_END_EVENT: The value of dbc in an packet corresponds to the end + * of event in the packet. Out of IEC 61883. * @CIP_WRONG_DBS: Only for in-stream. The value of dbs is wrong in in-packets. * The value of data_block_quadlets is used instead of reported value. * @CIP_SKIP_DBC_ZERO_CHECK: Only for in-stream. Packets with zero in dbc is @@ -29,6 +29,9 @@ * @CIP_JUMBO_PAYLOAD: Only for in-stream. The number of data blocks in an * packet is larger than IEC 61883-6 defines. Current implementation * allows 5 times as large as IEC 61883-6 defines. + * @CIP_HEADER_WITHOUT_EOH: Only for in-stream. CIP Header doesn't include + * valid EOH. + * @CIP_NO_HEADERS: a lack of headers in packets */ enum cip_flags { CIP_NONBLOCKING = 0x00, @@ -39,6 +42,8 @@ enum cip_flags { CIP_SKIP_DBC_ZERO_CHECK = 0x10, CIP_EMPTY_HAS_WRONG_DBC = 0x20, CIP_JUMBO_PAYLOAD = 0x40, + CIP_HEADER_WITHOUT_EOH = 0x80, + CIP_NO_HEADER = 0x100, }; /** @@ -101,11 +106,17 @@ struct amdtp_stream { struct fw_iso_context *context; struct iso_packets_buffer buffer; int packet_index; + int tag; + int (*handle_packet)(struct amdtp_stream *s, + unsigned int payload_quadlets, unsigned int cycle, + unsigned int index); + unsigned int max_payload_length; /* For CIP headers. */ unsigned int source_node_id_field; unsigned int data_block_quadlets; unsigned int data_block_counter; + unsigned int sph; unsigned int fmt; unsigned int fdf; /* quirk: fixed interval of dbc between previos/current packets. */ @@ -130,6 +141,7 @@ struct amdtp_stream { /* To wait for first packet. */ bool callbacked; wait_queue_head_t callback_wait; + u32 start_cycle; /* For backends to process data blocks. */ void *protocol; diff --git a/sound/firewire/bebob/bebob_command.c b/sound/firewire/bebob/bebob_command.c index 9402cc15dbc1bcaa1b5d9321413dda37e19237d0..f9b4225dd86fe9473e62015e54375ef17eaae74b 100644 --- a/sound/firewire/bebob/bebob_command.c +++ b/sound/firewire/bebob/bebob_command.c @@ -31,13 +31,15 @@ int avc_audio_set_selector(struct fw_unit *unit, unsigned int subunit_id, err = fcp_avc_transaction(unit, buf, 12, buf, 12, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(8)); - if (err > 0 && err < 9) + if (err < 0) + ; + else if (err < 9) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; else if (buf[0] == 0x0a) /* REJECTED */ err = -EINVAL; - else if (err > 0) + else err = 0; kfree(buf); @@ -67,7 +69,9 @@ int avc_audio_get_selector(struct fw_unit *unit, unsigned int subunit_id, err = fcp_avc_transaction(unit, buf, 12, buf, 12, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(8)); - if (err > 0 && err < 9) + if (err < 0) + ; + else if (err < 9) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -120,7 +124,9 @@ int avc_bridgeco_get_plug_type(struct fw_unit *unit, err = fcp_avc_transaction(unit, buf, 12, buf, 12, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(9)); - if ((err >= 0) && (err < 8)) + if (err < 0) + ; + else if (err < 11) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -150,7 +156,9 @@ int avc_bridgeco_get_plug_ch_pos(struct fw_unit *unit, err = fcp_avc_transaction(unit, buf, 12, buf, 256, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(9)); - if ((err >= 0) && (err < 8)) + if (err < 0) + ; + else if (err < 11) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -187,7 +195,9 @@ int avc_bridgeco_get_plug_section_type(struct fw_unit *unit, err = fcp_avc_transaction(unit, buf, 12, buf, 12, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(9) | BIT(10)); - if ((err >= 0) && (err < 8)) + if (err < 0) + ; + else if (err < 12) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -221,7 +231,9 @@ int avc_bridgeco_get_plug_input(struct fw_unit *unit, err = fcp_avc_transaction(unit, buf, 16, buf, 16, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7)); - if ((err >= 0) && (err < 8)) + if (err < 0) + ; + else if (err < 16) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -260,7 +272,9 @@ int avc_bridgeco_get_plug_strm_fmt(struct fw_unit *unit, err = fcp_avc_transaction(unit, buf, 12, buf, *len, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(10)); - if ((err >= 0) && (err < 12)) + if (err < 0) + ; + else if (err < 12) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; diff --git a/sound/firewire/digi00x/amdtp-dot.c b/sound/firewire/digi00x/amdtp-dot.c index b3cffd01a19f711f3572920a33fa75098f39ffde..a4688545339ccd58ee558a7e35546c5a18b9fb13 100644 --- a/sound/firewire/digi00x/amdtp-dot.c +++ b/sound/firewire/digi00x/amdtp-dot.c @@ -28,6 +28,9 @@ */ #define MAX_MIDI_RX_BLOCKS 8 +/* 3 = MAX(DOT_MIDI_IN_PORTS, DOT_MIDI_OUT_PORTS) + 1. */ +#define MAX_MIDI_PORTS 3 + /* * The double-oh-three algorithm was discovered by Robin Gareus and Damien * Zammit in 2012, with reverse-engineering for Digi 003 Rack. @@ -42,10 +45,8 @@ struct amdtp_dot { unsigned int pcm_channels; struct dot_state state; - unsigned int midi_ports; - /* 2 = MAX(DOT_MIDI_IN_PORTS, DOT_MIDI_OUT_PORTS) */ - struct snd_rawmidi_substream *midi[2]; - int midi_fifo_used[2]; + struct snd_rawmidi_substream *midi[MAX_MIDI_PORTS]; + int midi_fifo_used[MAX_MIDI_PORTS]; int midi_fifo_limit; void (*transfer_samples)(struct amdtp_stream *s, @@ -124,8 +125,8 @@ int amdtp_dot_set_parameters(struct amdtp_stream *s, unsigned int rate, return -EBUSY; /* - * A first data channel is for MIDI conformant data channel, the rest is - * Multi Bit Linear Audio data channel. + * A first data channel is for MIDI messages, the rest is Multi Bit + * Linear Audio data channel. */ err = amdtp_stream_set_parameters(s, rate, pcm_channels + 1); if (err < 0) @@ -135,11 +136,6 @@ int amdtp_dot_set_parameters(struct amdtp_stream *s, unsigned int rate, p->pcm_channels = pcm_channels; - if (s->direction == AMDTP_IN_STREAM) - p->midi_ports = DOT_MIDI_IN_PORTS; - else - p->midi_ports = DOT_MIDI_OUT_PORTS; - /* * We do not know the actual MIDI FIFO size of most devices. Just * assume two bytes, i.e., one byte can be received over the bus while @@ -281,13 +277,25 @@ static void write_midi_messages(struct amdtp_stream *s, __be32 *buffer, b = (u8 *)&buffer[0]; len = 0; - if (port < p->midi_ports && + if (port < MAX_MIDI_PORTS && midi_ratelimit_per_packet(s, port) && p->midi[port] != NULL) len = snd_rawmidi_transmit(p->midi[port], b + 1, 2); if (len > 0) { - b[3] = (0x10 << port) | len; + /* + * Upper 4 bits of LSB represent port number. + * - 0000b: physical MIDI port 1. + * - 0010b: physical MIDI port 2. + * - 1110b: console MIDI port. + */ + if (port == 2) + b[3] = 0xe0; + else if (port == 1) + b[3] = 0x20; + else + b[3] = 0x00; + b[3] |= len; midi_use_bytes(s, port, len); } else { b[1] = 0; @@ -309,11 +317,22 @@ static void read_midi_messages(struct amdtp_stream *s, __be32 *buffer, for (f = 0; f < data_blocks; f++) { b = (u8 *)&buffer[0]; - port = b[3] >> 4; - len = b[3] & 0x0f; - if (port < p->midi_ports && p->midi[port] && len > 0) - snd_rawmidi_receive(p->midi[port], b + 1, len); + len = b[3] & 0x0f; + if (len > 0) { + /* + * Upper 4 bits of LSB represent port number. + * - 0000b: physical MIDI port 1. Use port 0. + * - 1110b: console MIDI port. Use port 2. + */ + if (b[3] >> 4 > 0) + port = 2; + else + port = 0; + + if (port < MAX_MIDI_PORTS && p->midi[port]) + snd_rawmidi_receive(p->midi[port], b + 1, len); + } buffer += s->data_block_quadlets; } @@ -364,7 +383,7 @@ void amdtp_dot_midi_trigger(struct amdtp_stream *s, unsigned int port, { struct amdtp_dot *p = s->protocol; - if (port < p->midi_ports) + if (port < MAX_MIDI_PORTS) ACCESS_ONCE(p->midi[port]) = midi; } diff --git a/sound/firewire/digi00x/digi00x-midi.c b/sound/firewire/digi00x/digi00x-midi.c index 915d2a21223e683f1f0f5fb613e0d5a8c7fb886b..7ab3d0810f6b4356ce8280da158241ba74f6f89a 100644 --- a/sound/firewire/digi00x/digi00x-midi.c +++ b/sound/firewire/digi00x/digi00x-midi.c @@ -8,7 +8,7 @@ #include "digi00x.h" -static int midi_phys_open(struct snd_rawmidi_substream *substream) +static int midi_open(struct snd_rawmidi_substream *substream) { struct snd_dg00x *dg00x = substream->rmidi->private_data; int err; @@ -27,7 +27,7 @@ static int midi_phys_open(struct snd_rawmidi_substream *substream) return err; } -static int midi_phys_close(struct snd_rawmidi_substream *substream) +static int midi_close(struct snd_rawmidi_substream *substream) { struct snd_dg00x *dg00x = substream->rmidi->private_data; @@ -40,180 +40,130 @@ static int midi_phys_close(struct snd_rawmidi_substream *substream) return 0; } -static void midi_phys_capture_trigger(struct snd_rawmidi_substream *substream, - int up) +static void midi_capture_trigger(struct snd_rawmidi_substream *substream, + int up) { struct snd_dg00x *dg00x = substream->rmidi->private_data; + unsigned int port; unsigned long flags; - spin_lock_irqsave(&dg00x->lock, flags); - - if (up) - amdtp_dot_midi_trigger(&dg00x->tx_stream, substream->number, - substream); + if (substream->rmidi->device == 0) + port = substream->number; else - amdtp_dot_midi_trigger(&dg00x->tx_stream, substream->number, - NULL); - - spin_unlock_irqrestore(&dg00x->lock, flags); -} - -static void midi_phys_playback_trigger(struct snd_rawmidi_substream *substream, - int up) -{ - struct snd_dg00x *dg00x = substream->rmidi->private_data; - unsigned long flags; + port = 2; spin_lock_irqsave(&dg00x->lock, flags); if (up) - amdtp_dot_midi_trigger(&dg00x->rx_stream, substream->number, - substream); + amdtp_dot_midi_trigger(&dg00x->tx_stream, port, substream); else - amdtp_dot_midi_trigger(&dg00x->rx_stream, substream->number, - NULL); + amdtp_dot_midi_trigger(&dg00x->tx_stream, port, NULL); spin_unlock_irqrestore(&dg00x->lock, flags); } -static int midi_ctl_open(struct snd_rawmidi_substream *substream) -{ - /* Do nothing. */ - return 0; -} - -static int midi_ctl_capture_close(struct snd_rawmidi_substream *substream) -{ - /* Do nothing. */ - return 0; -} - -static int midi_ctl_playback_close(struct snd_rawmidi_substream *substream) -{ - struct snd_dg00x *dg00x = substream->rmidi->private_data; - - snd_fw_async_midi_port_finish(&dg00x->out_control); - - return 0; -} - -static void midi_ctl_capture_trigger(struct snd_rawmidi_substream *substream, - int up) +static void midi_playback_trigger(struct snd_rawmidi_substream *substream, + int up) { struct snd_dg00x *dg00x = substream->rmidi->private_data; + unsigned int port; unsigned long flags; - spin_lock_irqsave(&dg00x->lock, flags); - - if (up) - dg00x->in_control = substream; + if (substream->rmidi->device == 0) + port = substream->number; else - dg00x->in_control = NULL; - - spin_unlock_irqrestore(&dg00x->lock, flags); -} - -static void midi_ctl_playback_trigger(struct snd_rawmidi_substream *substream, - int up) -{ - struct snd_dg00x *dg00x = substream->rmidi->private_data; - unsigned long flags; + port = 2; spin_lock_irqsave(&dg00x->lock, flags); if (up) - snd_fw_async_midi_port_run(&dg00x->out_control, substream); + amdtp_dot_midi_trigger(&dg00x->rx_stream, port, substream); + else + amdtp_dot_midi_trigger(&dg00x->rx_stream, port, NULL); spin_unlock_irqrestore(&dg00x->lock, flags); } -static void set_midi_substream_names(struct snd_dg00x *dg00x, - struct snd_rawmidi_str *str, - bool is_ctl) +static void set_substream_names(struct snd_dg00x *dg00x, + struct snd_rawmidi *rmidi, bool is_console) { struct snd_rawmidi_substream *subs; - - list_for_each_entry(subs, &str->substreams, list) { - if (!is_ctl) - snprintf(subs->name, sizeof(subs->name), - "%s MIDI %d", - dg00x->card->shortname, subs->number + 1); - else - /* This port is for asynchronous transaction. */ - snprintf(subs->name, sizeof(subs->name), - "%s control", - dg00x->card->shortname); + struct snd_rawmidi_str *str; + int i; + + for (i = 0; i < 2; ++i) { + str = &rmidi->streams[i]; + + list_for_each_entry(subs, &str->substreams, list) { + if (!is_console) { + snprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", + dg00x->card->shortname, + subs->number + 1); + } else { + snprintf(subs->name, sizeof(subs->name), + "%s control", + dg00x->card->shortname); + } + } } } -int snd_dg00x_create_midi_devices(struct snd_dg00x *dg00x) +static int add_substream_pair(struct snd_dg00x *dg00x, unsigned int out_ports, + unsigned int in_ports, bool is_console) { - static const struct snd_rawmidi_ops phys_capture_ops = { - .open = midi_phys_open, - .close = midi_phys_close, - .trigger = midi_phys_capture_trigger, - }; - static const struct snd_rawmidi_ops phys_playback_ops = { - .open = midi_phys_open, - .close = midi_phys_close, - .trigger = midi_phys_playback_trigger, + static const struct snd_rawmidi_ops capture_ops = { + .open = midi_open, + .close = midi_close, + .trigger = midi_capture_trigger, }; - static const struct snd_rawmidi_ops ctl_capture_ops = { - .open = midi_ctl_open, - .close = midi_ctl_capture_close, - .trigger = midi_ctl_capture_trigger, + static const struct snd_rawmidi_ops playback_ops = { + .open = midi_open, + .close = midi_close, + .trigger = midi_playback_trigger, }; - static const struct snd_rawmidi_ops ctl_playback_ops = { - .open = midi_ctl_open, - .close = midi_ctl_playback_close, - .trigger = midi_ctl_playback_trigger, - }; - struct snd_rawmidi *rmidi[2]; - struct snd_rawmidi_str *str; - unsigned int i; + const char *label; + struct snd_rawmidi *rmidi; int err; /* Add physical midi ports. */ - err = snd_rawmidi_new(dg00x->card, dg00x->card->driver, 0, - DOT_MIDI_OUT_PORTS, DOT_MIDI_IN_PORTS, &rmidi[0]); + err = snd_rawmidi_new(dg00x->card, dg00x->card->driver, is_console, + out_ports, in_ports, &rmidi); if (err < 0) return err; + rmidi->private_data = dg00x; - snprintf(rmidi[0]->name, sizeof(rmidi[0]->name), - "%s MIDI", dg00x->card->shortname); - - snd_rawmidi_set_ops(rmidi[0], SNDRV_RAWMIDI_STREAM_INPUT, - &phys_capture_ops); - snd_rawmidi_set_ops(rmidi[0], SNDRV_RAWMIDI_STREAM_OUTPUT, - &phys_playback_ops); + if (!is_console) + label = "%s control"; + else + label = "%s MIDI"; + snprintf(rmidi->name, sizeof(rmidi->name), label, + dg00x->card->shortname); - /* Add a pair of control midi ports. */ - err = snd_rawmidi_new(dg00x->card, dg00x->card->driver, 1, - 1, 1, &rmidi[1]); - if (err < 0) - return err; + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &playback_ops); + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &capture_ops); - snprintf(rmidi[1]->name, sizeof(rmidi[1]->name), - "%s control", dg00x->card->shortname); + rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT | + SNDRV_RAWMIDI_INFO_OUTPUT | + SNDRV_RAWMIDI_INFO_DUPLEX; - snd_rawmidi_set_ops(rmidi[1], SNDRV_RAWMIDI_STREAM_INPUT, - &ctl_capture_ops); - snd_rawmidi_set_ops(rmidi[1], SNDRV_RAWMIDI_STREAM_OUTPUT, - &ctl_playback_ops); + set_substream_names(dg00x, rmidi, is_console); - for (i = 0; i < ARRAY_SIZE(rmidi); i++) { - rmidi[i]->private_data = dg00x; + return 0; +} - rmidi[i]->info_flags |= SNDRV_RAWMIDI_INFO_INPUT; - str = &rmidi[i]->streams[SNDRV_RAWMIDI_STREAM_INPUT]; - set_midi_substream_names(dg00x, str, i); +int snd_dg00x_create_midi_devices(struct snd_dg00x *dg00x) +{ + int err; - rmidi[i]->info_flags |= SNDRV_RAWMIDI_INFO_OUTPUT; - str = &rmidi[i]->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]; - set_midi_substream_names(dg00x, str, i); + /* Add physical midi ports. */ + err = add_substream_pair(dg00x, DOT_MIDI_OUT_PORTS, DOT_MIDI_IN_PORTS, + false); + if (err < 0) + return err; - rmidi[i]->info_flags |= SNDRV_RAWMIDI_INFO_DUPLEX; - } + if (dg00x->is_console) + err = add_substream_pair(dg00x, 1, 1, true); - return 0; + return err; } diff --git a/sound/firewire/digi00x/digi00x-transaction.c b/sound/firewire/digi00x/digi00x-transaction.c index 735d35640807c581ea43a6f4152348a549c0427d..af9bc85047819daed40d8de28e002ab2e24febe5 100644 --- a/sound/firewire/digi00x/digi00x-transaction.c +++ b/sound/firewire/digi00x/digi00x-transaction.c @@ -9,40 +9,6 @@ #include #include "digi00x.h" -static int fill_midi_message(struct snd_rawmidi_substream *substream, u8 *buf) -{ - int bytes; - - buf[0] = 0x80; - bytes = snd_rawmidi_transmit_peek(substream, buf + 1, 2); - if (bytes >= 0) - buf[3] = 0xc0 | bytes; - - return bytes; -} - -static void handle_midi_control(struct snd_dg00x *dg00x, __be32 *buf, - unsigned int length) -{ - struct snd_rawmidi_substream *substream; - unsigned int i; - unsigned int len; - u8 *b; - - substream = ACCESS_ONCE(dg00x->in_control); - if (substream == NULL) - return; - - length /= 4; - - for (i = 0; i < length; i++) { - b = (u8 *)&buf[i]; - len = b[3] & 0xf; - if (len > 0) - snd_rawmidi_receive(dg00x->in_control, b + 1, len); - } -} - static void handle_unknown_message(struct snd_dg00x *dg00x, unsigned long long offset, __be32 *buf) { @@ -63,39 +29,36 @@ static void handle_message(struct fw_card *card, struct fw_request *request, struct snd_dg00x *dg00x = callback_data; __be32 *buf = (__be32 *)data; + fw_send_response(card, request, RCODE_COMPLETE); + if (offset == dg00x->async_handler.offset) handle_unknown_message(dg00x, offset, buf); - else if (offset == dg00x->async_handler.offset + 4) - handle_midi_control(dg00x, buf, length); - - fw_send_response(card, request, RCODE_COMPLETE); } int snd_dg00x_transaction_reregister(struct snd_dg00x *dg00x) { struct fw_device *device = fw_parent_device(dg00x->unit); __be32 data[2]; - int err; /* Unknown. 4bytes. */ data[0] = cpu_to_be32((device->card->node_id << 16) | (dg00x->async_handler.offset >> 32)); data[1] = cpu_to_be32(dg00x->async_handler.offset); - err = snd_fw_transaction(dg00x->unit, TCODE_WRITE_BLOCK_REQUEST, - DG00X_ADDR_BASE + DG00X_OFFSET_MESSAGE_ADDR, - &data, sizeof(data), 0); - if (err < 0) - return err; - - /* Asynchronous transactions for MIDI control message. */ - data[0] = cpu_to_be32((device->card->node_id << 16) | - (dg00x->async_handler.offset >> 32)); - data[1] = cpu_to_be32(dg00x->async_handler.offset + 4); return snd_fw_transaction(dg00x->unit, TCODE_WRITE_BLOCK_REQUEST, - DG00X_ADDR_BASE + DG00X_OFFSET_MIDI_CTL_ADDR, + DG00X_ADDR_BASE + DG00X_OFFSET_MESSAGE_ADDR, &data, sizeof(data), 0); } +void snd_dg00x_transaction_unregister(struct snd_dg00x *dg00x) +{ + if (dg00x->async_handler.callback_data == NULL) + return; + + fw_core_remove_address_handler(&dg00x->async_handler); + + dg00x->async_handler.callback_data = NULL; +} + int snd_dg00x_transaction_register(struct snd_dg00x *dg00x) { static const struct fw_address_region resp_register_region = { @@ -104,7 +67,7 @@ int snd_dg00x_transaction_register(struct snd_dg00x *dg00x) }; int err; - dg00x->async_handler.length = 12; + dg00x->async_handler.length = 4; dg00x->async_handler.address_callback = handle_message; dg00x->async_handler.callback_data = dg00x; @@ -115,28 +78,7 @@ int snd_dg00x_transaction_register(struct snd_dg00x *dg00x) err = snd_dg00x_transaction_reregister(dg00x); if (err < 0) - goto error; - - err = snd_fw_async_midi_port_init(&dg00x->out_control, dg00x->unit, - DG00X_ADDR_BASE + DG00X_OFFSET_MMC, - 4, fill_midi_message); - if (err < 0) - goto error; + snd_dg00x_transaction_unregister(dg00x); return err; -error: - fw_core_remove_address_handler(&dg00x->async_handler); - dg00x->async_handler.callback_data = NULL; - return err; -} - -void snd_dg00x_transaction_unregister(struct snd_dg00x *dg00x) -{ - if (dg00x->async_handler.callback_data == NULL) - return; - - snd_fw_async_midi_port_destroy(&dg00x->out_control); - fw_core_remove_address_handler(&dg00x->async_handler); - - dg00x->async_handler.callback_data = NULL; } diff --git a/sound/firewire/digi00x/digi00x.c b/sound/firewire/digi00x/digi00x.c index cc4776c6ded313673ef1c767101f2ed7e0c4369f..1f5e1d23f31a7132a5dfb5f3f34468b1517a89d0 100644 --- a/sound/firewire/digi00x/digi00x.c +++ b/sound/firewire/digi00x/digi00x.c @@ -13,7 +13,8 @@ MODULE_AUTHOR("Takashi Sakamoto "); MODULE_LICENSE("GPL v2"); #define VENDOR_DIGIDESIGN 0x00a07e -#define MODEL_DIGI00X 0x000002 +#define MODEL_CONSOLE 0x000001 +#define MODEL_RACK 0x000002 static int name_card(struct snd_dg00x *dg00x) { @@ -129,6 +130,8 @@ static int snd_dg00x_probe(struct fw_unit *unit, spin_lock_init(&dg00x->lock); init_waitqueue_head(&dg00x->hwdep_wait); + dg00x->is_console = entry->model_id == MODEL_CONSOLE; + /* Allocate and register this sound card later. */ INIT_DEFERRABLE_WORK(&dg00x->dwork, do_registration); snd_fw_schedule_registration(unit, &dg00x->dwork); @@ -183,7 +186,13 @@ static const struct ieee1394_device_id snd_dg00x_id_table[] = { .match_flags = IEEE1394_MATCH_VENDOR_ID | IEEE1394_MATCH_MODEL_ID, .vendor_id = VENDOR_DIGIDESIGN, - .model_id = MODEL_DIGI00X, + .model_id = MODEL_CONSOLE, + }, + { + .match_flags = IEEE1394_MATCH_VENDOR_ID | + IEEE1394_MATCH_MODEL_ID, + .vendor_id = VENDOR_DIGIDESIGN, + .model_id = MODEL_RACK, }, {} }; diff --git a/sound/firewire/digi00x/digi00x.h b/sound/firewire/digi00x/digi00x.h index 9dc761bdacca719897073a71aa75c41ce9d65bd8..1275a50956c08542d022dd926b0d541b06c92d10 100644 --- a/sound/firewire/digi00x/digi00x.h +++ b/sound/firewire/digi00x/digi00x.h @@ -58,16 +58,15 @@ struct snd_dg00x { struct fw_address_handler async_handler; u32 msg; - /* For asynchronous MIDI controls. */ - struct snd_rawmidi_substream *in_control; - struct snd_fw_async_midi_port out_control; + /* Console models have additional MIDI ports for control surface. */ + bool is_console; }; #define DG00X_ADDR_BASE 0xffffe0000000ull #define DG00X_OFFSET_STREAMING_STATE 0x0000 #define DG00X_OFFSET_STREAMING_SET 0x0004 -#define DG00X_OFFSET_MIDI_CTL_ADDR 0x0008 +/* unknown but address in host space 0x0008 */ /* For LSB of the address 0x000c */ /* unknown 0x0010 */ #define DG00X_OFFSET_MESSAGE_ADDR 0x0014 diff --git a/sound/firewire/fcp.c b/sound/firewire/fcp.c index cce19768f43d0a3f32c9d3c0187c1077f5bbfeab..61dda828f767bd58a41b765334598225d54b3565 100644 --- a/sound/firewire/fcp.c +++ b/sound/firewire/fcp.c @@ -63,7 +63,9 @@ int avc_general_set_sig_fmt(struct fw_unit *unit, unsigned int rate, /* do transaction and check buf[1-5] are the same against command */ err = fcp_avc_transaction(unit, buf, 8, buf, 8, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5)); - if (err >= 0 && err < 8) + if (err < 0) + ; + else if (err < 8) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -106,7 +108,9 @@ int avc_general_get_sig_fmt(struct fw_unit *unit, unsigned int *rate, /* do transaction and check buf[1-4] are the same against command */ err = fcp_avc_transaction(unit, buf, 8, buf, 8, BIT(1) | BIT(2) | BIT(3) | BIT(4)); - if (err >= 0 && err < 8) + if (err < 0) + ; + else if (err < 8) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -154,7 +158,9 @@ int avc_general_get_plug_info(struct fw_unit *unit, unsigned int subunit_type, buf[3] = 0xff & subfunction; err = fcp_avc_transaction(unit, buf, 8, buf, 8, BIT(1) | BIT(2)); - if (err >= 0 && err < 8) + if (err < 0) + ; + else if (err < 8) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; diff --git a/sound/firewire/fireface/Makefile b/sound/firewire/fireface/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8f807284ba54344fd11d5d576e4627d5993e3a2c --- /dev/null +++ b/sound/firewire/fireface/Makefile @@ -0,0 +1,3 @@ +snd-fireface-objs := ff.o ff-transaction.o ff-midi.o ff-proc.o amdtp-ff.o \ + ff-stream.o ff-pcm.o ff-hwdep.o ff-protocol-ff400.o +obj-$(CONFIG_SND_FIREFACE) += snd-fireface.o diff --git a/sound/firewire/fireface/amdtp-ff.c b/sound/firewire/fireface/amdtp-ff.c new file mode 100644 index 0000000000000000000000000000000000000000..780da9deb2f01b75b6b6fe4c29d352a134f2103d --- /dev/null +++ b/sound/firewire/fireface/amdtp-ff.c @@ -0,0 +1,155 @@ +/* + * amdtp-ff.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include +#include "ff.h" + +struct amdtp_ff { + unsigned int pcm_channels; +}; + +int amdtp_ff_set_parameters(struct amdtp_stream *s, unsigned int rate, + unsigned int pcm_channels) +{ + struct amdtp_ff *p = s->protocol; + unsigned int data_channels; + + if (amdtp_stream_running(s)) + return -EBUSY; + + p->pcm_channels = pcm_channels; + data_channels = pcm_channels; + + return amdtp_stream_set_parameters(s, rate, data_channels); +} + +static void write_pcm_s32(struct amdtp_stream *s, + struct snd_pcm_substream *pcm, + __le32 *buffer, unsigned int frames) +{ + struct amdtp_ff *p = s->protocol; + struct snd_pcm_runtime *runtime = pcm->runtime; + unsigned int channels, remaining_frames, i, c; + const u32 *src; + + channels = p->pcm_channels; + src = (void *)runtime->dma_area + + frames_to_bytes(runtime, s->pcm_buffer_pointer); + remaining_frames = runtime->buffer_size - s->pcm_buffer_pointer; + + for (i = 0; i < frames; ++i) { + for (c = 0; c < channels; ++c) { + buffer[c] = cpu_to_le32(*src); + src++; + } + buffer += s->data_block_quadlets; + if (--remaining_frames == 0) + src = (void *)runtime->dma_area; + } +} + +static void read_pcm_s32(struct amdtp_stream *s, + struct snd_pcm_substream *pcm, + __le32 *buffer, unsigned int frames) +{ + struct amdtp_ff *p = s->protocol; + struct snd_pcm_runtime *runtime = pcm->runtime; + unsigned int channels, remaining_frames, i, c; + u32 *dst; + + channels = p->pcm_channels; + dst = (void *)runtime->dma_area + + frames_to_bytes(runtime, s->pcm_buffer_pointer); + remaining_frames = runtime->buffer_size - s->pcm_buffer_pointer; + + for (i = 0; i < frames; ++i) { + for (c = 0; c < channels; ++c) { + *dst = le32_to_cpu(buffer[c]) & 0xffffff00; + dst++; + } + buffer += s->data_block_quadlets; + if (--remaining_frames == 0) + dst = (void *)runtime->dma_area; + } +} + +static void write_pcm_silence(struct amdtp_stream *s, + __le32 *buffer, unsigned int frames) +{ + struct amdtp_ff *p = s->protocol; + unsigned int i, c, channels = p->pcm_channels; + + for (i = 0; i < frames; ++i) { + for (c = 0; c < channels; ++c) + buffer[c] = cpu_to_le32(0x00000000); + buffer += s->data_block_quadlets; + } +} + +int amdtp_ff_add_pcm_hw_constraints(struct amdtp_stream *s, + struct snd_pcm_runtime *runtime) +{ + int err; + + err = snd_pcm_hw_constraint_msbits(runtime, 0, 32, 24); + if (err < 0) + return err; + + return amdtp_stream_add_pcm_hw_constraints(s, runtime); +} + +static unsigned int process_rx_data_blocks(struct amdtp_stream *s, + __be32 *buffer, + unsigned int data_blocks, + unsigned int *syt) +{ + struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + unsigned int pcm_frames; + + if (pcm) { + write_pcm_s32(s, pcm, (__le32 *)buffer, data_blocks); + pcm_frames = data_blocks; + } else { + write_pcm_silence(s, (__le32 *)buffer, data_blocks); + pcm_frames = 0; + } + + return pcm_frames; +} + +static unsigned int process_tx_data_blocks(struct amdtp_stream *s, + __be32 *buffer, + unsigned int data_blocks, + unsigned int *syt) +{ + struct snd_pcm_substream *pcm = ACCESS_ONCE(s->pcm); + unsigned int pcm_frames; + + if (pcm) { + read_pcm_s32(s, pcm, (__le32 *)buffer, data_blocks); + pcm_frames = data_blocks; + } else { + pcm_frames = 0; + } + + return pcm_frames; +} + +int amdtp_ff_init(struct amdtp_stream *s, struct fw_unit *unit, + enum amdtp_stream_direction dir) +{ + amdtp_stream_process_data_blocks_t process_data_blocks; + + if (dir == AMDTP_IN_STREAM) + process_data_blocks = process_tx_data_blocks; + else + process_data_blocks = process_rx_data_blocks; + + return amdtp_stream_init(s, unit, dir, CIP_NO_HEADER, 0, + process_data_blocks, sizeof(struct amdtp_ff)); +} diff --git a/sound/firewire/fireface/ff-hwdep.c b/sound/firewire/fireface/ff-hwdep.c new file mode 100644 index 0000000000000000000000000000000000000000..3ee04b05458507f9fea3393dd20c2b85e37879d5 --- /dev/null +++ b/sound/firewire/fireface/ff-hwdep.c @@ -0,0 +1,191 @@ +/* + * ff-hwdep.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +/* + * This codes give three functionality. + * + * 1.get firewire node information + * 2.get notification about starting/stopping stream + * 3.lock/unlock stream + */ + +#include "ff.h" + +static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, + loff_t *offset) +{ + struct snd_ff *ff = hwdep->private_data; + DEFINE_WAIT(wait); + union snd_firewire_event event; + + spin_lock_irq(&ff->lock); + + while (!ff->dev_lock_changed) { + prepare_to_wait(&ff->hwdep_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&ff->lock); + schedule(); + finish_wait(&ff->hwdep_wait, &wait); + if (signal_pending(current)) + return -ERESTARTSYS; + spin_lock_irq(&ff->lock); + } + + memset(&event, 0, sizeof(event)); + if (ff->dev_lock_changed) { + event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; + event.lock_status.status = (ff->dev_lock_count > 0); + ff->dev_lock_changed = false; + + count = min_t(long, count, sizeof(event.lock_status)); + } + + spin_unlock_irq(&ff->lock); + + if (copy_to_user(buf, &event, count)) + return -EFAULT; + + return count; +} + +static unsigned int hwdep_poll(struct snd_hwdep *hwdep, struct file *file, + poll_table *wait) +{ + struct snd_ff *ff = hwdep->private_data; + unsigned int events; + + poll_wait(file, &ff->hwdep_wait, wait); + + spin_lock_irq(&ff->lock); + if (ff->dev_lock_changed) + events = POLLIN | POLLRDNORM; + else + events = 0; + spin_unlock_irq(&ff->lock); + + return events; +} + +static int hwdep_get_info(struct snd_ff *ff, void __user *arg) +{ + struct fw_device *dev = fw_parent_device(ff->unit); + struct snd_firewire_get_info info; + + memset(&info, 0, sizeof(info)); + info.type = SNDRV_FIREWIRE_TYPE_FIREFACE; + info.card = dev->card->index; + *(__be32 *)&info.guid[0] = cpu_to_be32(dev->config_rom[3]); + *(__be32 *)&info.guid[4] = cpu_to_be32(dev->config_rom[4]); + strlcpy(info.device_name, dev_name(&dev->device), + sizeof(info.device_name)); + + if (copy_to_user(arg, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static int hwdep_lock(struct snd_ff *ff) +{ + int err; + + spin_lock_irq(&ff->lock); + + if (ff->dev_lock_count == 0) { + ff->dev_lock_count = -1; + err = 0; + } else { + err = -EBUSY; + } + + spin_unlock_irq(&ff->lock); + + return err; +} + +static int hwdep_unlock(struct snd_ff *ff) +{ + int err; + + spin_lock_irq(&ff->lock); + + if (ff->dev_lock_count == -1) { + ff->dev_lock_count = 0; + err = 0; + } else { + err = -EBADFD; + } + + spin_unlock_irq(&ff->lock); + + return err; +} + +static int hwdep_release(struct snd_hwdep *hwdep, struct file *file) +{ + struct snd_ff *ff = hwdep->private_data; + + spin_lock_irq(&ff->lock); + if (ff->dev_lock_count == -1) + ff->dev_lock_count = 0; + spin_unlock_irq(&ff->lock); + + return 0; +} + +static int hwdep_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct snd_ff *ff = hwdep->private_data; + + switch (cmd) { + case SNDRV_FIREWIRE_IOCTL_GET_INFO: + return hwdep_get_info(ff, (void __user *)arg); + case SNDRV_FIREWIRE_IOCTL_LOCK: + return hwdep_lock(ff); + case SNDRV_FIREWIRE_IOCTL_UNLOCK: + return hwdep_unlock(ff); + default: + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +static int hwdep_compat_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return hwdep_ioctl(hwdep, file, cmd, + (unsigned long)compat_ptr(arg)); +} +#else +#define hwdep_compat_ioctl NULL +#endif + +int snd_ff_create_hwdep_devices(struct snd_ff *ff) +{ + static const struct snd_hwdep_ops hwdep_ops = { + .read = hwdep_read, + .release = hwdep_release, + .poll = hwdep_poll, + .ioctl = hwdep_ioctl, + .ioctl_compat = hwdep_compat_ioctl, + }; + struct snd_hwdep *hwdep; + int err; + + err = snd_hwdep_new(ff->card, ff->card->driver, 0, &hwdep); + if (err < 0) + return err; + + strcpy(hwdep->name, ff->card->driver); + hwdep->iface = SNDRV_HWDEP_IFACE_FW_FIREFACE; + hwdep->ops = hwdep_ops; + hwdep->private_data = ff; + hwdep->exclusive = true; + + return 0; +} diff --git a/sound/firewire/fireface/ff-midi.c b/sound/firewire/fireface/ff-midi.c new file mode 100644 index 0000000000000000000000000000000000000000..29ee0a7365c36a4a8d448811b9b6faa9856c2df6 --- /dev/null +++ b/sound/firewire/fireface/ff-midi.c @@ -0,0 +1,131 @@ +/* + * ff-midi.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "ff.h" + +static int midi_capture_open(struct snd_rawmidi_substream *substream) +{ + /* Do nothing. */ + return 0; +} + +static int midi_playback_open(struct snd_rawmidi_substream *substream) +{ + struct snd_ff *ff = substream->rmidi->private_data; + + /* Initialize internal status. */ + ff->running_status[substream->number] = 0; + ff->rx_midi_error[substream->number] = false; + + ACCESS_ONCE(ff->rx_midi_substreams[substream->number]) = substream; + + return 0; +} + +static int midi_capture_close(struct snd_rawmidi_substream *substream) +{ + /* Do nothing. */ + return 0; +} + +static int midi_playback_close(struct snd_rawmidi_substream *substream) +{ + struct snd_ff *ff = substream->rmidi->private_data; + + cancel_work_sync(&ff->rx_midi_work[substream->number]); + ACCESS_ONCE(ff->rx_midi_substreams[substream->number]) = NULL; + + return 0; +} + +static void midi_capture_trigger(struct snd_rawmidi_substream *substream, + int up) +{ + struct snd_ff *ff = substream->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&ff->lock, flags); + + if (up) + ACCESS_ONCE(ff->tx_midi_substreams[substream->number]) = + substream; + else + ACCESS_ONCE(ff->tx_midi_substreams[substream->number]) = NULL; + + spin_unlock_irqrestore(&ff->lock, flags); +} + +static void midi_playback_trigger(struct snd_rawmidi_substream *substream, + int up) +{ + struct snd_ff *ff = substream->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&ff->lock, flags); + + if (up || !ff->rx_midi_error[substream->number]) + schedule_work(&ff->rx_midi_work[substream->number]); + + spin_unlock_irqrestore(&ff->lock, flags); +} + +static struct snd_rawmidi_ops midi_capture_ops = { + .open = midi_capture_open, + .close = midi_capture_close, + .trigger = midi_capture_trigger, +}; + +static struct snd_rawmidi_ops midi_playback_ops = { + .open = midi_playback_open, + .close = midi_playback_close, + .trigger = midi_playback_trigger, +}; + +static void set_midi_substream_names(struct snd_rawmidi_str *stream, + const char *const name) +{ + struct snd_rawmidi_substream *substream; + + list_for_each_entry(substream, &stream->substreams, list) { + snprintf(substream->name, sizeof(substream->name), + "%s MIDI %d", name, substream->number + 1); + } +} + +int snd_ff_create_midi_devices(struct snd_ff *ff) +{ + struct snd_rawmidi *rmidi; + struct snd_rawmidi_str *stream; + int err; + + err = snd_rawmidi_new(ff->card, ff->card->driver, 0, + ff->spec->midi_out_ports, ff->spec->midi_in_ports, + &rmidi); + if (err < 0) + return err; + + snprintf(rmidi->name, sizeof(rmidi->name), + "%s MIDI", ff->card->shortname); + rmidi->private_data = ff; + + rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT; + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, + &midi_capture_ops); + stream = &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]; + set_midi_substream_names(stream, ff->card->shortname); + + rmidi->info_flags |= SNDRV_RAWMIDI_INFO_OUTPUT; + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, + &midi_playback_ops); + stream = &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]; + set_midi_substream_names(stream, ff->card->shortname); + + rmidi->info_flags |= SNDRV_RAWMIDI_INFO_DUPLEX; + + return 0; +} diff --git a/sound/firewire/fireface/ff-pcm.c b/sound/firewire/fireface/ff-pcm.c new file mode 100644 index 0000000000000000000000000000000000000000..93cee1978e8e36f72f87d2ac5b3083c62e4c71b8 --- /dev/null +++ b/sound/firewire/fireface/ff-pcm.c @@ -0,0 +1,409 @@ +/* + * ff-pcm.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "ff.h" + +static inline unsigned int get_multiplier_mode_with_index(unsigned int index) +{ + return ((int)index - 1) / 2; +} + +static int hw_rule_rate(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + const unsigned int *pcm_channels = rule->private; + struct snd_interval *r = + hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + const struct snd_interval *c = + hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_CHANNELS); + struct snd_interval t = { + .min = UINT_MAX, .max = 0, .integer = 1 + }; + unsigned int i, mode; + + for (i = 0; i < ARRAY_SIZE(amdtp_rate_table); i++) { + mode = get_multiplier_mode_with_index(i); + if (!snd_interval_test(c, pcm_channels[mode])) + continue; + + t.min = min(t.min, amdtp_rate_table[i]); + t.max = max(t.max, amdtp_rate_table[i]); + } + + return snd_interval_refine(r, &t); +} + +static int hw_rule_channels(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + const unsigned int *pcm_channels = rule->private; + struct snd_interval *c = + hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + const struct snd_interval *r = + hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE); + struct snd_interval t = { + .min = UINT_MAX, .max = 0, .integer = 1 + }; + unsigned int i, mode; + + for (i = 0; i < ARRAY_SIZE(amdtp_rate_table); i++) { + mode = get_multiplier_mode_with_index(i); + if (!snd_interval_test(r, amdtp_rate_table[i])) + continue; + + t.min = min(t.min, pcm_channels[mode]); + t.max = max(t.max, pcm_channels[mode]); + } + + return snd_interval_refine(c, &t); +} + +static void limit_channels_and_rates(struct snd_pcm_hardware *hw, + const unsigned int *pcm_channels) +{ + unsigned int mode; + unsigned int rate, channels; + int i; + + hw->channels_min = UINT_MAX; + hw->channels_max = 0; + hw->rate_min = UINT_MAX; + hw->rate_max = 0; + + for (i = 0; i < ARRAY_SIZE(amdtp_rate_table); i++) { + mode = get_multiplier_mode_with_index(i); + + channels = pcm_channels[mode]; + if (pcm_channels[mode] == 0) + continue; + hw->channels_min = min(hw->channels_min, channels); + hw->channels_max = max(hw->channels_max, channels); + + rate = amdtp_rate_table[i]; + hw->rates |= snd_pcm_rate_to_rate_bit(rate); + hw->rate_min = min(hw->rate_min, rate); + hw->rate_max = max(hw->rate_max, rate); + } +} + +static void limit_period_and_buffer(struct snd_pcm_hardware *hw) +{ + hw->periods_min = 2; /* SNDRV_PCM_INFO_BATCH */ + hw->periods_max = UINT_MAX; + + hw->period_bytes_min = 4 * hw->channels_max; /* bytes for a frame */ + + /* Just to prevent from allocating much pages. */ + hw->period_bytes_max = hw->period_bytes_min * 2048; + hw->buffer_bytes_max = hw->period_bytes_max * hw->periods_min; +} + +static int pcm_init_hw_params(struct snd_ff *ff, + struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct amdtp_stream *s; + const unsigned int *pcm_channels; + int err; + + runtime->hw.info = SNDRV_PCM_INFO_BATCH | + SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_JOINT_DUPLEX | + SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID; + + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { + runtime->hw.formats = SNDRV_PCM_FMTBIT_S32; + s = &ff->tx_stream; + pcm_channels = ff->spec->pcm_capture_channels; + } else { + runtime->hw.formats = SNDRV_PCM_FMTBIT_S32; + s = &ff->rx_stream; + pcm_channels = ff->spec->pcm_playback_channels; + } + + /* limit rates */ + limit_channels_and_rates(&runtime->hw, pcm_channels); + limit_period_and_buffer(&runtime->hw); + + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, + hw_rule_channels, (void *)pcm_channels, + SNDRV_PCM_HW_PARAM_RATE, -1); + if (err < 0) + return err; + + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, + hw_rule_rate, (void *)pcm_channels, + SNDRV_PCM_HW_PARAM_CHANNELS, -1); + if (err < 0) + return err; + + return amdtp_ff_add_pcm_hw_constraints(s, runtime); +} + +static int pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_ff *ff = substream->private_data; + unsigned int rate; + enum snd_ff_clock_src src; + int i, err; + + err = snd_ff_stream_lock_try(ff); + if (err < 0) + return err; + + err = pcm_init_hw_params(ff, substream); + if (err < 0) { + snd_ff_stream_lock_release(ff); + return err; + } + + err = ff->spec->protocol->get_clock(ff, &rate, &src); + if (err < 0) { + snd_ff_stream_lock_release(ff); + return err; + } + + if (src != SND_FF_CLOCK_SRC_INTERNAL) { + for (i = 0; i < CIP_SFC_COUNT; ++i) { + if (amdtp_rate_table[i] == rate) + break; + } + /* + * The unit is configured at sampling frequency which packet + * streaming engine can't support. + */ + if (i >= CIP_SFC_COUNT) { + snd_ff_stream_lock_release(ff); + return -EIO; + } + + substream->runtime->hw.rate_min = rate; + substream->runtime->hw.rate_max = rate; + } else { + if (amdtp_stream_pcm_running(&ff->rx_stream) || + amdtp_stream_pcm_running(&ff->tx_stream)) { + rate = amdtp_rate_table[ff->rx_stream.sfc]; + substream->runtime->hw.rate_min = rate; + substream->runtime->hw.rate_max = rate; + } + } + + snd_pcm_set_sync(substream); + + return 0; +} + +static int pcm_close(struct snd_pcm_substream *substream) +{ + struct snd_ff *ff = substream->private_data; + + snd_ff_stream_lock_release(ff); + + return 0; +} + +static int pcm_capture_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct snd_ff *ff = substream->private_data; + int err; + + err = snd_pcm_lib_alloc_vmalloc_buffer(substream, + params_buffer_bytes(hw_params)); + if (err < 0) + return err; + + if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN) { + mutex_lock(&ff->mutex); + ff->substreams_counter++; + mutex_unlock(&ff->mutex); + } + + return 0; +} + +static int pcm_playback_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct snd_ff *ff = substream->private_data; + int err; + + err = snd_pcm_lib_alloc_vmalloc_buffer(substream, + params_buffer_bytes(hw_params)); + if (err < 0) + return err; + + if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN) { + mutex_lock(&ff->mutex); + ff->substreams_counter++; + mutex_unlock(&ff->mutex); + } + + return 0; +} + +static int pcm_capture_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_ff *ff = substream->private_data; + + mutex_lock(&ff->mutex); + + if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) + ff->substreams_counter--; + + snd_ff_stream_stop_duplex(ff); + + mutex_unlock(&ff->mutex); + + return snd_pcm_lib_free_vmalloc_buffer(substream); +} + +static int pcm_playback_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_ff *ff = substream->private_data; + + mutex_lock(&ff->mutex); + + if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) + ff->substreams_counter--; + + snd_ff_stream_stop_duplex(ff); + + mutex_unlock(&ff->mutex); + + return snd_pcm_lib_free_vmalloc_buffer(substream); +} + +static int pcm_capture_prepare(struct snd_pcm_substream *substream) +{ + struct snd_ff *ff = substream->private_data; + struct snd_pcm_runtime *runtime = substream->runtime; + int err; + + mutex_lock(&ff->mutex); + + err = snd_ff_stream_start_duplex(ff, runtime->rate); + if (err >= 0) + amdtp_stream_pcm_prepare(&ff->tx_stream); + + mutex_unlock(&ff->mutex); + + return err; +} + +static int pcm_playback_prepare(struct snd_pcm_substream *substream) +{ + struct snd_ff *ff = substream->private_data; + struct snd_pcm_runtime *runtime = substream->runtime; + int err; + + mutex_lock(&ff->mutex); + + err = snd_ff_stream_start_duplex(ff, runtime->rate); + if (err >= 0) + amdtp_stream_pcm_prepare(&ff->rx_stream); + + mutex_unlock(&ff->mutex); + + return err; +} + +static int pcm_capture_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_ff *ff = substream->private_data; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + amdtp_stream_pcm_trigger(&ff->tx_stream, substream); + break; + case SNDRV_PCM_TRIGGER_STOP: + amdtp_stream_pcm_trigger(&ff->tx_stream, NULL); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int pcm_playback_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_ff *ff = substream->private_data; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + amdtp_stream_pcm_trigger(&ff->rx_stream, substream); + break; + case SNDRV_PCM_TRIGGER_STOP: + amdtp_stream_pcm_trigger(&ff->rx_stream, NULL); + break; + default: + return -EINVAL; + } + + return 0; +} + +static snd_pcm_uframes_t pcm_capture_pointer(struct snd_pcm_substream *sbstrm) +{ + struct snd_ff *ff = sbstrm->private_data; + + return amdtp_stream_pcm_pointer(&ff->tx_stream); +} + +static snd_pcm_uframes_t pcm_playback_pointer(struct snd_pcm_substream *sbstrm) +{ + struct snd_ff *ff = sbstrm->private_data; + + return amdtp_stream_pcm_pointer(&ff->rx_stream); +} + +static struct snd_pcm_ops pcm_capture_ops = { + .open = pcm_open, + .close = pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = pcm_capture_hw_params, + .hw_free = pcm_capture_hw_free, + .prepare = pcm_capture_prepare, + .trigger = pcm_capture_trigger, + .pointer = pcm_capture_pointer, + .page = snd_pcm_lib_get_vmalloc_page, +}; + +static struct snd_pcm_ops pcm_playback_ops = { + .open = pcm_open, + .close = pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = pcm_playback_hw_params, + .hw_free = pcm_playback_hw_free, + .prepare = pcm_playback_prepare, + .trigger = pcm_playback_trigger, + .pointer = pcm_playback_pointer, + .page = snd_pcm_lib_get_vmalloc_page, + .mmap = snd_pcm_lib_mmap_vmalloc, +}; + +int snd_ff_create_pcm_devices(struct snd_ff *ff) +{ + struct snd_pcm *pcm; + int err; + + err = snd_pcm_new(ff->card, ff->card->driver, 0, 1, 1, &pcm); + if (err < 0) + return err; + + pcm->private_data = ff; + snprintf(pcm->name, sizeof(pcm->name), + "%s PCM", ff->card->shortname); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &pcm_playback_ops); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &pcm_capture_ops); + + return 0; +} diff --git a/sound/firewire/fireface/ff-proc.c b/sound/firewire/fireface/ff-proc.c new file mode 100644 index 0000000000000000000000000000000000000000..69441d121f71b4662d6106e31304d081f14de2c5 --- /dev/null +++ b/sound/firewire/fireface/ff-proc.c @@ -0,0 +1,63 @@ +/* + * ff-proc.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "./ff.h" + +static void proc_dump_clock_config(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + struct snd_ff *ff = entry->private_data; + + ff->spec->protocol->dump_clock_config(ff, buffer); +} + +static void proc_dump_sync_status(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + struct snd_ff *ff = entry->private_data; + + ff->spec->protocol->dump_sync_status(ff, buffer); +} + +static void add_node(struct snd_ff *ff, struct snd_info_entry *root, + const char *name, + void (*op)(struct snd_info_entry *e, + struct snd_info_buffer *b)) +{ + struct snd_info_entry *entry; + + entry = snd_info_create_card_entry(ff->card, name, root); + if (entry == NULL) + return; + + snd_info_set_text_ops(entry, ff, op); + if (snd_info_register(entry) < 0) + snd_info_free_entry(entry); +} + +void snd_ff_proc_init(struct snd_ff *ff) +{ + struct snd_info_entry *root; + + /* + * All nodes are automatically removed at snd_card_disconnect(), + * by following to link list. + */ + root = snd_info_create_card_entry(ff->card, "firewire", + ff->card->proc_root); + if (root == NULL) + return; + root->mode = S_IFDIR | S_IRUGO | S_IXUGO; + if (snd_info_register(root) < 0) { + snd_info_free_entry(root); + return; + } + + add_node(ff, root, "clock-config", proc_dump_clock_config); + add_node(ff, root, "sync-status", proc_dump_sync_status); +} diff --git a/sound/firewire/fireface/ff-protocol-ff400.c b/sound/firewire/fireface/ff-protocol-ff400.c new file mode 100644 index 0000000000000000000000000000000000000000..fcec6de80eebc724e3f59c7761c783f6371aa804 --- /dev/null +++ b/sound/firewire/fireface/ff-protocol-ff400.c @@ -0,0 +1,371 @@ +/* + * ff-protocol-ff400.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include +#include "ff.h" + +#define FF400_STF 0x000080100500ull +#define FF400_RX_PACKET_FORMAT 0x000080100504ull +#define FF400_ISOC_COMM_START 0x000080100508ull +#define FF400_TX_PACKET_FORMAT 0x00008010050cull +#define FF400_ISOC_COMM_STOP 0x000080100510ull +#define FF400_SYNC_STATUS 0x0000801c0000ull +#define FF400_FETCH_PCM_FRAMES 0x0000801c0000ull /* For block request. */ +#define FF400_CLOCK_CONFIG 0x0000801c0004ull + +#define FF400_MIDI_HIGH_ADDR 0x0000801003f4ull +#define FF400_MIDI_RX_PORT_0 0x000080180000ull +#define FF400_MIDI_RX_PORT_1 0x000080190000ull + +static int ff400_get_clock(struct snd_ff *ff, unsigned int *rate, + enum snd_ff_clock_src *src) +{ + __le32 reg; + u32 data; + int err; + + err = snd_fw_transaction(ff->unit, TCODE_READ_QUADLET_REQUEST, + FF400_SYNC_STATUS, ®, sizeof(reg), 0); + if (err < 0) + return err; + data = le32_to_cpu(reg); + + /* Calculate sampling rate. */ + switch ((data >> 1) & 0x03) { + case 0x01: + *rate = 32000; + break; + case 0x00: + *rate = 44100; + break; + case 0x03: + *rate = 48000; + break; + case 0x02: + default: + return -EIO; + } + + if (data & 0x08) + *rate *= 2; + else if (data & 0x10) + *rate *= 4; + + /* Calculate source of clock. */ + if (data & 0x01) { + *src = SND_FF_CLOCK_SRC_INTERNAL; + } else { + /* TODO: 0x00, 0x01, 0x02, 0x06, 0x07? */ + switch ((data >> 10) & 0x07) { + case 0x03: + *src = SND_FF_CLOCK_SRC_SPDIF; + break; + case 0x04: + *src = SND_FF_CLOCK_SRC_WORD; + break; + case 0x05: + *src = SND_FF_CLOCK_SRC_LTC; + break; + case 0x00: + default: + *src = SND_FF_CLOCK_SRC_ADAT; + break; + } + } + + return 0; +} + +static int ff400_begin_session(struct snd_ff *ff, unsigned int rate) +{ + __le32 reg; + int i, err; + + /* Check whether the given value is supported or not. */ + for (i = 0; i < CIP_SFC_COUNT; i++) { + if (amdtp_rate_table[i] == rate) + break; + } + if (i == CIP_SFC_COUNT) + return -EINVAL; + + /* Set the number of data blocks transferred in a second. */ + reg = cpu_to_le32(rate); + err = snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + FF400_STF, ®, sizeof(reg), 0); + if (err < 0) + return err; + + msleep(100); + + /* + * Set isochronous channel and the number of quadlets of received + * packets. + */ + reg = cpu_to_le32(((ff->rx_stream.data_block_quadlets << 3) << 8) | + ff->rx_resources.channel); + err = snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + FF400_RX_PACKET_FORMAT, ®, sizeof(reg), 0); + if (err < 0) + return err; + + /* + * Set isochronous channel and the number of quadlets of transmitted + * packet. + */ + /* TODO: investigate the purpose of this 0x80. */ + reg = cpu_to_le32((0x80 << 24) | + (ff->tx_resources.channel << 5) | + (ff->tx_stream.data_block_quadlets)); + err = snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + FF400_TX_PACKET_FORMAT, ®, sizeof(reg), 0); + if (err < 0) + return err; + + /* Allow to transmit packets. */ + reg = cpu_to_le32(0x00000001); + return snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + FF400_ISOC_COMM_START, ®, sizeof(reg), 0); +} + +static void ff400_finish_session(struct snd_ff *ff) +{ + __le32 reg; + + reg = cpu_to_le32(0x80000000); + snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + FF400_ISOC_COMM_STOP, ®, sizeof(reg), 0); +} + +static int ff400_switch_fetching_mode(struct snd_ff *ff, bool enable) +{ + __le32 *reg; + int i; + + reg = kzalloc(sizeof(__le32) * 18, GFP_KERNEL); + if (reg == NULL) + return -ENOMEM; + + if (enable) { + /* + * Each quadlet is corresponding to data channels in a data + * blocks in reverse order. Precisely, quadlets for available + * data channels should be enabled. Here, I take second best + * to fetch PCM frames from all of data channels regardless of + * stf. + */ + for (i = 0; i < 18; ++i) + reg[i] = cpu_to_le32(0x00000001); + } + + return snd_fw_transaction(ff->unit, TCODE_WRITE_BLOCK_REQUEST, + FF400_FETCH_PCM_FRAMES, reg, + sizeof(__le32) * 18, 0); +} + +static void ff400_dump_sync_status(struct snd_ff *ff, + struct snd_info_buffer *buffer) +{ + __le32 reg; + u32 data; + int err; + + err = snd_fw_transaction(ff->unit, TCODE_READ_QUADLET_REQUEST, + FF400_SYNC_STATUS, ®, sizeof(reg), 0); + if (err < 0) + return; + + data = le32_to_cpu(reg); + + snd_iprintf(buffer, "External source detection:\n"); + + snd_iprintf(buffer, "Word Clock:"); + if ((data >> 24) & 0x20) { + if ((data >> 24) & 0x40) + snd_iprintf(buffer, "sync\n"); + else + snd_iprintf(buffer, "lock\n"); + } else { + snd_iprintf(buffer, "none\n"); + } + + snd_iprintf(buffer, "S/PDIF:"); + if ((data >> 16) & 0x10) { + if ((data >> 16) & 0x04) + snd_iprintf(buffer, "sync\n"); + else + snd_iprintf(buffer, "lock\n"); + } else { + snd_iprintf(buffer, "none\n"); + } + + snd_iprintf(buffer, "ADAT:"); + if ((data >> 8) & 0x04) { + if ((data >> 8) & 0x10) + snd_iprintf(buffer, "sync\n"); + else + snd_iprintf(buffer, "lock\n"); + } else { + snd_iprintf(buffer, "none\n"); + } + + snd_iprintf(buffer, "\nUsed external source:\n"); + + if (((data >> 22) & 0x07) == 0x07) { + snd_iprintf(buffer, "None\n"); + } else { + switch ((data >> 22) & 0x07) { + case 0x00: + snd_iprintf(buffer, "ADAT:"); + break; + case 0x03: + snd_iprintf(buffer, "S/PDIF:"); + break; + case 0x04: + snd_iprintf(buffer, "Word:"); + break; + case 0x07: + snd_iprintf(buffer, "Nothing:"); + break; + case 0x01: + case 0x02: + case 0x05: + case 0x06: + default: + snd_iprintf(buffer, "unknown:"); + break; + } + + if ((data >> 25) & 0x07) { + switch ((data >> 25) & 0x07) { + case 0x01: + snd_iprintf(buffer, "32000\n"); + break; + case 0x02: + snd_iprintf(buffer, "44100\n"); + break; + case 0x03: + snd_iprintf(buffer, "48000\n"); + break; + case 0x04: + snd_iprintf(buffer, "64000\n"); + break; + case 0x05: + snd_iprintf(buffer, "88200\n"); + break; + case 0x06: + snd_iprintf(buffer, "96000\n"); + break; + case 0x07: + snd_iprintf(buffer, "128000\n"); + break; + case 0x08: + snd_iprintf(buffer, "176400\n"); + break; + case 0x09: + snd_iprintf(buffer, "192000\n"); + break; + case 0x00: + snd_iprintf(buffer, "unknown\n"); + break; + } + } + } + + snd_iprintf(buffer, "Multiplied:"); + snd_iprintf(buffer, "%d\n", (data & 0x3ff) * 250); +} + +static void ff400_dump_clock_config(struct snd_ff *ff, + struct snd_info_buffer *buffer) +{ + __le32 reg; + u32 data; + unsigned int rate; + const char *src; + int err; + + err = snd_fw_transaction(ff->unit, TCODE_READ_BLOCK_REQUEST, + FF400_CLOCK_CONFIG, ®, sizeof(reg), 0); + if (err < 0) + return; + + data = le32_to_cpu(reg); + + snd_iprintf(buffer, "Output S/PDIF format: %s (Emphasis: %s)\n", + (data & 0x20) ? "Professional" : "Consumer", + (data & 0x40) ? "on" : "off"); + + snd_iprintf(buffer, "Optical output interface format: %s\n", + ((data >> 8) & 0x01) ? "S/PDIF" : "ADAT"); + + snd_iprintf(buffer, "Word output single speed: %s\n", + ((data >> 8) & 0x20) ? "on" : "off"); + + snd_iprintf(buffer, "S/PDIF input interface: %s\n", + ((data >> 8) & 0x02) ? "Optical" : "Coaxial"); + + switch ((data >> 1) & 0x03) { + case 0x01: + rate = 32000; + break; + case 0x00: + rate = 44100; + break; + case 0x03: + rate = 48000; + break; + case 0x02: + default: + return; + } + + if (data & 0x08) + rate *= 2; + else if (data & 0x10) + rate *= 4; + + snd_iprintf(buffer, "Sampling rate: %d\n", rate); + + if (data & 0x01) { + src = "Internal"; + } else { + switch ((data >> 10) & 0x07) { + case 0x00: + src = "ADAT"; + break; + case 0x03: + src = "S/PDIF"; + break; + case 0x04: + src = "Word"; + break; + case 0x05: + src = "LTC"; + break; + default: + return; + } + } + + snd_iprintf(buffer, "Sync to clock source: %s\n", src); +} + +struct snd_ff_protocol snd_ff_protocol_ff400 = { + .get_clock = ff400_get_clock, + .begin_session = ff400_begin_session, + .finish_session = ff400_finish_session, + .switch_fetching_mode = ff400_switch_fetching_mode, + + .dump_sync_status = ff400_dump_sync_status, + .dump_clock_config = ff400_dump_clock_config, + + .midi_high_addr_reg = FF400_MIDI_HIGH_ADDR, + .midi_rx_port_0_reg = FF400_MIDI_RX_PORT_0, + .midi_rx_port_1_reg = FF400_MIDI_RX_PORT_1, +}; diff --git a/sound/firewire/fireface/ff-stream.c b/sound/firewire/fireface/ff-stream.c new file mode 100644 index 0000000000000000000000000000000000000000..78880922120e77b7493b47f0c691f79d8d3cb176 --- /dev/null +++ b/sound/firewire/fireface/ff-stream.c @@ -0,0 +1,282 @@ +/* + * ff-stream.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "ff.h" + +#define CALLBACK_TIMEOUT_MS 200 + +static int get_rate_mode(unsigned int rate, unsigned int *mode) +{ + int i; + + for (i = 0; i < CIP_SFC_COUNT; i++) { + if (amdtp_rate_table[i] == rate) + break; + } + + if (i == CIP_SFC_COUNT) + return -EINVAL; + + *mode = ((int)i - 1) / 2; + + return 0; +} + +/* + * Fireface 400 manages isochronous channel number in 3 bit field. Therefore, + * we can allocate between 0 and 7 channel. + */ +static int keep_resources(struct snd_ff *ff, unsigned int rate) +{ + int mode; + int err; + + err = get_rate_mode(rate, &mode); + if (err < 0) + return err; + + /* Keep resources for in-stream. */ + err = amdtp_ff_set_parameters(&ff->tx_stream, rate, + ff->spec->pcm_capture_channels[mode]); + if (err < 0) + return err; + ff->tx_resources.channels_mask = 0x00000000000000ffuLL; + err = fw_iso_resources_allocate(&ff->tx_resources, + amdtp_stream_get_max_payload(&ff->tx_stream), + fw_parent_device(ff->unit)->max_speed); + if (err < 0) + return err; + + /* Keep resources for out-stream. */ + err = amdtp_ff_set_parameters(&ff->rx_stream, rate, + ff->spec->pcm_playback_channels[mode]); + if (err < 0) + return err; + ff->rx_resources.channels_mask = 0x00000000000000ffuLL; + err = fw_iso_resources_allocate(&ff->rx_resources, + amdtp_stream_get_max_payload(&ff->rx_stream), + fw_parent_device(ff->unit)->max_speed); + if (err < 0) + fw_iso_resources_free(&ff->tx_resources); + + return err; +} + +static void release_resources(struct snd_ff *ff) +{ + fw_iso_resources_free(&ff->tx_resources); + fw_iso_resources_free(&ff->rx_resources); +} + +static inline void finish_session(struct snd_ff *ff) +{ + ff->spec->protocol->finish_session(ff); + ff->spec->protocol->switch_fetching_mode(ff, false); +} + +static int init_stream(struct snd_ff *ff, enum amdtp_stream_direction dir) +{ + int err; + struct fw_iso_resources *resources; + struct amdtp_stream *stream; + + if (dir == AMDTP_IN_STREAM) { + resources = &ff->tx_resources; + stream = &ff->tx_stream; + } else { + resources = &ff->rx_resources; + stream = &ff->rx_stream; + } + + err = fw_iso_resources_init(resources, ff->unit); + if (err < 0) + return err; + + err = amdtp_ff_init(stream, ff->unit, dir); + if (err < 0) + fw_iso_resources_destroy(resources); + + return err; +} + +static void destroy_stream(struct snd_ff *ff, enum amdtp_stream_direction dir) +{ + if (dir == AMDTP_IN_STREAM) { + amdtp_stream_destroy(&ff->tx_stream); + fw_iso_resources_destroy(&ff->tx_resources); + } else { + amdtp_stream_destroy(&ff->rx_stream); + fw_iso_resources_destroy(&ff->rx_resources); + } +} + +int snd_ff_stream_init_duplex(struct snd_ff *ff) +{ + int err; + + err = init_stream(ff, AMDTP_OUT_STREAM); + if (err < 0) + goto end; + + err = init_stream(ff, AMDTP_IN_STREAM); + if (err < 0) + destroy_stream(ff, AMDTP_OUT_STREAM); +end: + return err; +} + +/* + * This function should be called before starting streams or after stopping + * streams. + */ +void snd_ff_stream_destroy_duplex(struct snd_ff *ff) +{ + destroy_stream(ff, AMDTP_IN_STREAM); + destroy_stream(ff, AMDTP_OUT_STREAM); +} + +int snd_ff_stream_start_duplex(struct snd_ff *ff, unsigned int rate) +{ + unsigned int curr_rate; + enum snd_ff_clock_src src; + int err; + + if (ff->substreams_counter == 0) + return 0; + + err = ff->spec->protocol->get_clock(ff, &curr_rate, &src); + if (err < 0) + return err; + if (curr_rate != rate || + amdtp_streaming_error(&ff->tx_stream) || + amdtp_streaming_error(&ff->rx_stream)) { + finish_session(ff); + + amdtp_stream_stop(&ff->tx_stream); + amdtp_stream_stop(&ff->rx_stream); + + release_resources(ff); + } + + /* + * Regardless of current source of clock signal, drivers transfer some + * packets. Then, the device transfers packets. + */ + if (!amdtp_stream_running(&ff->rx_stream)) { + err = keep_resources(ff, rate); + if (err < 0) + goto error; + + err = ff->spec->protocol->begin_session(ff, rate); + if (err < 0) + goto error; + + err = amdtp_stream_start(&ff->rx_stream, + ff->rx_resources.channel, + fw_parent_device(ff->unit)->max_speed); + if (err < 0) + goto error; + + if (!amdtp_stream_wait_callback(&ff->rx_stream, + CALLBACK_TIMEOUT_MS)) { + err = -ETIMEDOUT; + goto error; + } + + err = ff->spec->protocol->switch_fetching_mode(ff, true); + if (err < 0) + goto error; + } + + if (!amdtp_stream_running(&ff->tx_stream)) { + err = amdtp_stream_start(&ff->tx_stream, + ff->tx_resources.channel, + fw_parent_device(ff->unit)->max_speed); + if (err < 0) + goto error; + + if (!amdtp_stream_wait_callback(&ff->tx_stream, + CALLBACK_TIMEOUT_MS)) { + err = -ETIMEDOUT; + goto error; + } + } + + return 0; +error: + amdtp_stream_stop(&ff->tx_stream); + amdtp_stream_stop(&ff->rx_stream); + + finish_session(ff); + release_resources(ff); + + return err; +} + +void snd_ff_stream_stop_duplex(struct snd_ff *ff) +{ + if (ff->substreams_counter > 0) + return; + + amdtp_stream_stop(&ff->tx_stream); + amdtp_stream_stop(&ff->rx_stream); + finish_session(ff); + release_resources(ff); +} + +void snd_ff_stream_update_duplex(struct snd_ff *ff) +{ + /* The device discontinue to transfer packets. */ + amdtp_stream_pcm_abort(&ff->tx_stream); + amdtp_stream_stop(&ff->tx_stream); + + amdtp_stream_pcm_abort(&ff->rx_stream); + amdtp_stream_stop(&ff->rx_stream); + + fw_iso_resources_update(&ff->tx_resources); + fw_iso_resources_update(&ff->rx_resources); +} + +void snd_ff_stream_lock_changed(struct snd_ff *ff) +{ + ff->dev_lock_changed = true; + wake_up(&ff->hwdep_wait); +} + +int snd_ff_stream_lock_try(struct snd_ff *ff) +{ + int err; + + spin_lock_irq(&ff->lock); + + /* user land lock this */ + if (ff->dev_lock_count < 0) { + err = -EBUSY; + goto end; + } + + /* this is the first time */ + if (ff->dev_lock_count++ == 0) + snd_ff_stream_lock_changed(ff); + err = 0; +end: + spin_unlock_irq(&ff->lock); + return err; +} + +void snd_ff_stream_lock_release(struct snd_ff *ff) +{ + spin_lock_irq(&ff->lock); + + if (WARN_ON(ff->dev_lock_count <= 0)) + goto end; + if (--ff->dev_lock_count == 0) + snd_ff_stream_lock_changed(ff); +end: + spin_unlock_irq(&ff->lock); +} diff --git a/sound/firewire/fireface/ff-transaction.c b/sound/firewire/fireface/ff-transaction.c new file mode 100644 index 0000000000000000000000000000000000000000..dd6c8e839647f64d8eddc203b199e30014bc2738 --- /dev/null +++ b/sound/firewire/fireface/ff-transaction.c @@ -0,0 +1,295 @@ +/* + * ff-transaction.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "ff.h" + +static void finish_transmit_midi_msg(struct snd_ff *ff, unsigned int port, + int rcode) +{ + struct snd_rawmidi_substream *substream = + ACCESS_ONCE(ff->rx_midi_substreams[port]); + + if (rcode_is_permanent_error(rcode)) { + ff->rx_midi_error[port] = true; + return; + } + + if (rcode != RCODE_COMPLETE) { + /* Transfer the message again, immediately. */ + ff->next_ktime[port] = 0; + schedule_work(&ff->rx_midi_work[port]); + return; + } + + snd_rawmidi_transmit_ack(substream, ff->rx_bytes[port]); + ff->rx_bytes[port] = 0; + + if (!snd_rawmidi_transmit_empty(substream)) + schedule_work(&ff->rx_midi_work[port]); +} + +static void finish_transmit_midi0_msg(struct fw_card *card, int rcode, + void *data, size_t length, + void *callback_data) +{ + struct snd_ff *ff = + container_of(callback_data, struct snd_ff, transactions[0]); + finish_transmit_midi_msg(ff, 0, rcode); +} + +static void finish_transmit_midi1_msg(struct fw_card *card, int rcode, + void *data, size_t length, + void *callback_data) +{ + struct snd_ff *ff = + container_of(callback_data, struct snd_ff, transactions[1]); + finish_transmit_midi_msg(ff, 1, rcode); +} + +static inline void fill_midi_buf(struct snd_ff *ff, unsigned int port, + unsigned int index, u8 byte) +{ + ff->msg_buf[port][index] = cpu_to_le32(byte); +} + +static void transmit_midi_msg(struct snd_ff *ff, unsigned int port) +{ + struct snd_rawmidi_substream *substream = + ACCESS_ONCE(ff->rx_midi_substreams[port]); + u8 *buf = (u8 *)ff->msg_buf[port]; + int i, len; + + struct fw_device *fw_dev = fw_parent_device(ff->unit); + unsigned long long addr; + int generation; + fw_transaction_callback_t callback; + + if (substream == NULL || snd_rawmidi_transmit_empty(substream)) + return; + + if (ff->rx_bytes[port] > 0 || ff->rx_midi_error[port]) + return; + + /* Do it in next chance. */ + if (ktime_after(ff->next_ktime[port], ktime_get())) { + schedule_work(&ff->rx_midi_work[port]); + return; + } + + len = snd_rawmidi_transmit_peek(substream, buf, + SND_FF_MAXIMIM_MIDI_QUADS); + if (len <= 0) + return; + + for (i = len - 1; i >= 0; i--) + fill_midi_buf(ff, port, i, buf[i]); + + if (port == 0) { + addr = ff->spec->protocol->midi_rx_port_0_reg; + callback = finish_transmit_midi0_msg; + } else { + addr = ff->spec->protocol->midi_rx_port_1_reg; + callback = finish_transmit_midi1_msg; + } + + /* Set interval to next transaction. */ + ff->next_ktime[port] = ktime_add_ns(ktime_get(), + len * 8 * NSEC_PER_SEC / 31250); + ff->rx_bytes[port] = len; + + /* + * In Linux FireWire core, when generation is updated with memory + * barrier, node id has already been updated. In this module, After + * this smp_rmb(), load/store instructions to memory are completed. + * Thus, both of generation and node id are available with recent + * values. This is a light-serialization solution to handle bus reset + * events on IEEE 1394 bus. + */ + generation = fw_dev->generation; + smp_rmb(); + fw_send_request(fw_dev->card, &ff->transactions[port], + TCODE_WRITE_BLOCK_REQUEST, + fw_dev->node_id, generation, fw_dev->max_speed, + addr, &ff->msg_buf[port], len * 4, + callback, &ff->transactions[port]); +} + +static void transmit_midi0_msg(struct work_struct *work) +{ + struct snd_ff *ff = container_of(work, struct snd_ff, rx_midi_work[0]); + + transmit_midi_msg(ff, 0); +} + +static void transmit_midi1_msg(struct work_struct *work) +{ + struct snd_ff *ff = container_of(work, struct snd_ff, rx_midi_work[1]); + + transmit_midi_msg(ff, 1); +} + +static void handle_midi_msg(struct fw_card *card, struct fw_request *request, + int tcode, int destination, int source, + int generation, unsigned long long offset, + void *data, size_t length, void *callback_data) +{ + struct snd_ff *ff = callback_data; + __le32 *buf = data; + u32 quad; + u8 byte; + unsigned int index; + struct snd_rawmidi_substream *substream; + int i; + + fw_send_response(card, request, RCODE_COMPLETE); + + for (i = 0; i < length / 4; i++) { + quad = le32_to_cpu(buf[i]); + + /* Message in first port. */ + /* + * This value may represent the index of this unit when the same + * units are on the same IEEE 1394 bus. This driver doesn't use + * it. + */ + index = (quad >> 8) & 0xff; + if (index > 0) { + substream = ACCESS_ONCE(ff->tx_midi_substreams[0]); + if (substream != NULL) { + byte = quad & 0xff; + snd_rawmidi_receive(substream, &byte, 1); + } + } + + /* Message in second port. */ + index = (quad >> 24) & 0xff; + if (index > 0) { + substream = ACCESS_ONCE(ff->tx_midi_substreams[1]); + if (substream != NULL) { + byte = (quad >> 16) & 0xff; + snd_rawmidi_receive(substream, &byte, 1); + } + } + } +} + +static int allocate_own_address(struct snd_ff *ff, int i) +{ + struct fw_address_region midi_msg_region; + int err; + + ff->async_handler.length = SND_FF_MAXIMIM_MIDI_QUADS * 4; + ff->async_handler.address_callback = handle_midi_msg; + ff->async_handler.callback_data = ff; + + midi_msg_region.start = 0x000100000000ull * i; + midi_msg_region.end = midi_msg_region.start + ff->async_handler.length; + + err = fw_core_add_address_handler(&ff->async_handler, &midi_msg_region); + if (err >= 0) { + /* Controllers are allowed to register this region. */ + if (ff->async_handler.offset & 0x0000ffffffff) { + fw_core_remove_address_handler(&ff->async_handler); + err = -EAGAIN; + } + } + + return err; +} + +/* + * The configuration to start asynchronous transactions for MIDI messages is in + * 0x'0000'8010'051c. This register includes the other options, thus this driver + * doesn't touch it and leaves the decision to userspace. The userspace MUST add + * 0x04000000 to write transactions to the register to receive any MIDI + * messages. + * + * Here, I just describe MIDI-related offsets of the register, in little-endian + * order. + * + * Controllers are allowed to register higher 4 bytes of address to receive + * the transactions. The register is 0x'0000'8010'03f4. On the other hand, the + * controllers are not allowed to register lower 4 bytes of the address. They + * are forced to select from 4 options by writing corresponding bits to + * 0x'0000'8010'051c. + * + * The 3rd-6th bits in MSB of this register are used to indicate lower 4 bytes + * of address to which the device transferrs the transactions. + * - 6th: 0x'....'....'0000'0180 + * - 5th: 0x'....'....'0000'0100 + * - 4th: 0x'....'....'0000'0080 + * - 3rd: 0x'....'....'0000'0000 + * + * This driver configure 0x'....'....'0000'0000 for units to receive MIDI + * messages. 3rd bit of the register should be configured, however this driver + * deligates this task to user space applications due to a restriction that + * this register is write-only and the other bits have own effects. + * + * The 1st and 2nd bits in LSB of this register are used to cancel transferring + * asynchronous transactions. These two bits have the same effect. + * - 1st/2nd: cancel transferring + */ +int snd_ff_transaction_reregister(struct snd_ff *ff) +{ + struct fw_card *fw_card = fw_parent_device(ff->unit)->card; + u32 addr; + __le32 reg; + + /* + * Controllers are allowed to register its node ID and upper 2 byte of + * local address to listen asynchronous transactions. + */ + addr = (fw_card->node_id << 16) | (ff->async_handler.offset >> 32); + reg = cpu_to_le32(addr); + return snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + ff->spec->protocol->midi_high_addr_reg, + ®, sizeof(reg), 0); +} + +int snd_ff_transaction_register(struct snd_ff *ff) +{ + int i, err; + + /* + * Allocate in Memory Space of IEC 13213, but lower 4 byte in LSB should + * be zero due to device specification. + */ + for (i = 0; i < 0xffff; i++) { + err = allocate_own_address(ff, i); + if (err != -EBUSY && err != -EAGAIN) + break; + } + if (err < 0) + return err; + + err = snd_ff_transaction_reregister(ff); + if (err < 0) + return err; + + INIT_WORK(&ff->rx_midi_work[0], transmit_midi0_msg); + INIT_WORK(&ff->rx_midi_work[1], transmit_midi1_msg); + + return 0; +} + +void snd_ff_transaction_unregister(struct snd_ff *ff) +{ + __le32 reg; + + if (ff->async_handler.callback_data == NULL) + return; + ff->async_handler.callback_data = NULL; + + /* Release higher 4 bytes of address. */ + reg = cpu_to_le32(0x00000000); + snd_fw_transaction(ff->unit, TCODE_WRITE_QUADLET_REQUEST, + ff->spec->protocol->midi_high_addr_reg, + ®, sizeof(reg), 0); + + fw_core_remove_address_handler(&ff->async_handler); +} diff --git a/sound/firewire/fireface/ff.c b/sound/firewire/fireface/ff.c new file mode 100644 index 0000000000000000000000000000000000000000..eee7c8eac7a61908b53dfa0862b0ca0b5e98ab2a --- /dev/null +++ b/sound/firewire/fireface/ff.c @@ -0,0 +1,209 @@ +/* + * ff.c - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "ff.h" + +#define OUI_RME 0x000a35 + +MODULE_DESCRIPTION("RME Fireface series Driver"); +MODULE_AUTHOR("Takashi Sakamoto "); +MODULE_LICENSE("GPL v2"); + +static void name_card(struct snd_ff *ff) +{ + struct fw_device *fw_dev = fw_parent_device(ff->unit); + + strcpy(ff->card->driver, "Fireface"); + strcpy(ff->card->shortname, ff->spec->name); + strcpy(ff->card->mixername, ff->spec->name); + snprintf(ff->card->longname, sizeof(ff->card->longname), + "RME %s, GUID %08x%08x at %s, S%d", ff->spec->name, + fw_dev->config_rom[3], fw_dev->config_rom[4], + dev_name(&ff->unit->device), 100 << fw_dev->max_speed); +} + +static void ff_free(struct snd_ff *ff) +{ + snd_ff_stream_destroy_duplex(ff); + snd_ff_transaction_unregister(ff); + + fw_unit_put(ff->unit); + + mutex_destroy(&ff->mutex); + kfree(ff); +} + +static void ff_card_free(struct snd_card *card) +{ + ff_free(card->private_data); +} + +static void do_registration(struct work_struct *work) +{ + struct snd_ff *ff = container_of(work, struct snd_ff, dwork.work); + int err; + + if (ff->registered) + return; + + err = snd_card_new(&ff->unit->device, -1, NULL, THIS_MODULE, 0, + &ff->card); + if (err < 0) + return; + + err = snd_ff_transaction_register(ff); + if (err < 0) + goto error; + + name_card(ff); + + err = snd_ff_stream_init_duplex(ff); + if (err < 0) + goto error; + + snd_ff_proc_init(ff); + + err = snd_ff_create_midi_devices(ff); + if (err < 0) + goto error; + + err = snd_ff_create_pcm_devices(ff); + if (err < 0) + goto error; + + err = snd_ff_create_hwdep_devices(ff); + if (err < 0) + goto error; + + err = snd_card_register(ff->card); + if (err < 0) + goto error; + + ff->card->private_free = ff_card_free; + ff->card->private_data = ff; + ff->registered = true; + + return; +error: + snd_ff_transaction_unregister(ff); + snd_ff_stream_destroy_duplex(ff); + snd_card_free(ff->card); + dev_info(&ff->unit->device, + "Sound card registration failed: %d\n", err); +} + +static int snd_ff_probe(struct fw_unit *unit, + const struct ieee1394_device_id *entry) +{ + struct snd_ff *ff; + + ff = kzalloc(sizeof(struct snd_ff), GFP_KERNEL); + if (ff == NULL) + return -ENOMEM; + + /* initialize myself */ + ff->unit = fw_unit_get(unit); + dev_set_drvdata(&unit->device, ff); + + mutex_init(&ff->mutex); + spin_lock_init(&ff->lock); + init_waitqueue_head(&ff->hwdep_wait); + + ff->spec = (const struct snd_ff_spec *)entry->driver_data; + + /* Register this sound card later. */ + INIT_DEFERRABLE_WORK(&ff->dwork, do_registration); + snd_fw_schedule_registration(unit, &ff->dwork); + + return 0; +} + +static void snd_ff_update(struct fw_unit *unit) +{ + struct snd_ff *ff = dev_get_drvdata(&unit->device); + + /* Postpone a workqueue for deferred registration. */ + if (!ff->registered) + snd_fw_schedule_registration(unit, &ff->dwork); + + snd_ff_transaction_reregister(ff); + + if (ff->registered) + snd_ff_stream_update_duplex(ff); +} + +static void snd_ff_remove(struct fw_unit *unit) +{ + struct snd_ff *ff = dev_get_drvdata(&unit->device); + + /* + * Confirm to stop the work for registration before the sound card is + * going to be released. The work is not scheduled again because bus + * reset handler is not called anymore. + */ + cancel_work_sync(&ff->dwork.work); + + if (ff->registered) { + /* No need to wait for releasing card object in this context. */ + snd_card_free_when_closed(ff->card); + } else { + /* Don't forget this case. */ + ff_free(ff); + } +} + +static struct snd_ff_spec spec_ff400 = { + .name = "Fireface400", + .pcm_capture_channels = {18, 14, 10}, + .pcm_playback_channels = {18, 14, 10}, + .midi_in_ports = 2, + .midi_out_ports = 2, + .protocol = &snd_ff_protocol_ff400, +}; + +static const struct ieee1394_device_id snd_ff_id_table[] = { + /* Fireface 400 */ + { + .match_flags = IEEE1394_MATCH_VENDOR_ID | + IEEE1394_MATCH_SPECIFIER_ID | + IEEE1394_MATCH_VERSION | + IEEE1394_MATCH_MODEL_ID, + .vendor_id = OUI_RME, + .specifier_id = 0x000a35, + .version = 0x000002, + .model_id = 0x101800, + .driver_data = (kernel_ulong_t)&spec_ff400, + }, + {} +}; +MODULE_DEVICE_TABLE(ieee1394, snd_ff_id_table); + +static struct fw_driver ff_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "snd-fireface", + .bus = &fw_bus_type, + }, + .probe = snd_ff_probe, + .update = snd_ff_update, + .remove = snd_ff_remove, + .id_table = snd_ff_id_table, +}; + +static int __init snd_ff_init(void) +{ + return driver_register(&ff_driver.driver); +} + +static void __exit snd_ff_exit(void) +{ + driver_unregister(&ff_driver.driver); +} + +module_init(snd_ff_init); +module_exit(snd_ff_exit); diff --git a/sound/firewire/fireface/ff.h b/sound/firewire/fireface/ff.h new file mode 100644 index 0000000000000000000000000000000000000000..3cb812a500305eaa45d9cb3dcfad77a85eff2687 --- /dev/null +++ b/sound/firewire/fireface/ff.h @@ -0,0 +1,146 @@ +/* + * ff.h - a part of driver for RME Fireface series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#ifndef SOUND_FIREFACE_H_INCLUDED +#define SOUND_FIREFACE_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../lib.h" +#include "../amdtp-stream.h" +#include "../iso-resources.h" + +#define SND_FF_STREAM_MODES 3 + +#define SND_FF_MAXIMIM_MIDI_QUADS 9 +#define SND_FF_IN_MIDI_PORTS 2 +#define SND_FF_OUT_MIDI_PORTS 2 + +struct snd_ff_protocol; +struct snd_ff_spec { + const char *const name; + + const unsigned int pcm_capture_channels[SND_FF_STREAM_MODES]; + const unsigned int pcm_playback_channels[SND_FF_STREAM_MODES]; + + unsigned int midi_in_ports; + unsigned int midi_out_ports; + + struct snd_ff_protocol *protocol; +}; + +struct snd_ff { + struct snd_card *card; + struct fw_unit *unit; + struct mutex mutex; + spinlock_t lock; + + bool registered; + struct delayed_work dwork; + + const struct snd_ff_spec *spec; + + /* To handle MIDI tx. */ + struct snd_rawmidi_substream *tx_midi_substreams[SND_FF_IN_MIDI_PORTS]; + struct fw_address_handler async_handler; + + /* TO handle MIDI rx. */ + struct snd_rawmidi_substream *rx_midi_substreams[SND_FF_OUT_MIDI_PORTS]; + u8 running_status[SND_FF_OUT_MIDI_PORTS]; + __le32 msg_buf[SND_FF_OUT_MIDI_PORTS][SND_FF_MAXIMIM_MIDI_QUADS]; + struct work_struct rx_midi_work[SND_FF_OUT_MIDI_PORTS]; + struct fw_transaction transactions[SND_FF_OUT_MIDI_PORTS]; + ktime_t next_ktime[SND_FF_OUT_MIDI_PORTS]; + bool rx_midi_error[SND_FF_OUT_MIDI_PORTS]; + unsigned int rx_bytes[SND_FF_OUT_MIDI_PORTS]; + + unsigned int substreams_counter; + struct amdtp_stream tx_stream; + struct amdtp_stream rx_stream; + struct fw_iso_resources tx_resources; + struct fw_iso_resources rx_resources; + + int dev_lock_count; + bool dev_lock_changed; + wait_queue_head_t hwdep_wait; +}; + +enum snd_ff_clock_src { + SND_FF_CLOCK_SRC_INTERNAL, + SND_FF_CLOCK_SRC_SPDIF, + SND_FF_CLOCK_SRC_ADAT, + SND_FF_CLOCK_SRC_WORD, + SND_FF_CLOCK_SRC_LTC, + /* TODO: perhaps ADAT2 and TCO exists. */ +}; + +struct snd_ff_protocol { + int (*get_clock)(struct snd_ff *ff, unsigned int *rate, + enum snd_ff_clock_src *src); + int (*begin_session)(struct snd_ff *ff, unsigned int rate); + void (*finish_session)(struct snd_ff *ff); + int (*switch_fetching_mode)(struct snd_ff *ff, bool enable); + + void (*dump_sync_status)(struct snd_ff *ff, + struct snd_info_buffer *buffer); + void (*dump_clock_config)(struct snd_ff *ff, + struct snd_info_buffer *buffer); + + u64 midi_high_addr_reg; + u64 midi_rx_port_0_reg; + u64 midi_rx_port_1_reg; +}; + +extern struct snd_ff_protocol snd_ff_protocol_ff400; + +int snd_ff_transaction_register(struct snd_ff *ff); +int snd_ff_transaction_reregister(struct snd_ff *ff); +void snd_ff_transaction_unregister(struct snd_ff *ff); + +int amdtp_ff_set_parameters(struct amdtp_stream *s, unsigned int rate, + unsigned int pcm_channels); +int amdtp_ff_add_pcm_hw_constraints(struct amdtp_stream *s, + struct snd_pcm_runtime *runtime); +int amdtp_ff_init(struct amdtp_stream *s, struct fw_unit *unit, + enum amdtp_stream_direction dir); + +int snd_ff_stream_init_duplex(struct snd_ff *ff); +void snd_ff_stream_destroy_duplex(struct snd_ff *ff); +int snd_ff_stream_start_duplex(struct snd_ff *ff, unsigned int rate); +void snd_ff_stream_stop_duplex(struct snd_ff *ff); +void snd_ff_stream_update_duplex(struct snd_ff *ff); + +void snd_ff_stream_lock_changed(struct snd_ff *ff); +int snd_ff_stream_lock_try(struct snd_ff *ff); +void snd_ff_stream_lock_release(struct snd_ff *ff); + +void snd_ff_proc_init(struct snd_ff *ff); + +int snd_ff_create_midi_devices(struct snd_ff *ff); + +int snd_ff_create_pcm_devices(struct snd_ff *ff); + +int snd_ff_create_hwdep_devices(struct snd_ff *ff); + +#endif diff --git a/sound/firewire/lib.c b/sound/firewire/lib.c index 7683238283b6c528e2eafa18f64a877fcbbe02c3..39dfa74906ef00f5ae7b6634cf9532898e148bf3 100644 --- a/sound/firewire/lib.c +++ b/sound/firewire/lib.c @@ -99,147 +99,6 @@ void snd_fw_schedule_registration(struct fw_unit *unit, } EXPORT_SYMBOL(snd_fw_schedule_registration); -static void async_midi_port_callback(struct fw_card *card, int rcode, - void *data, size_t length, - void *callback_data) -{ - struct snd_fw_async_midi_port *port = callback_data; - struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); - - /* This port is closed. */ - if (substream == NULL) - return; - - if (rcode == RCODE_COMPLETE) - snd_rawmidi_transmit_ack(substream, port->consume_bytes); - else if (!rcode_is_permanent_error(rcode)) - /* To start next transaction immediately for recovery. */ - port->next_ktime = 0; - else - /* Don't continue processing. */ - port->error = true; - - port->idling = true; - - if (!snd_rawmidi_transmit_empty(substream)) - schedule_work(&port->work); -} - -static void midi_port_work(struct work_struct *work) -{ - struct snd_fw_async_midi_port *port = - container_of(work, struct snd_fw_async_midi_port, work); - struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); - int generation; - int type; - - /* Under transacting or error state. */ - if (!port->idling || port->error) - return; - - /* Nothing to do. */ - if (substream == NULL || snd_rawmidi_transmit_empty(substream)) - return; - - /* Do it in next chance. */ - if (ktime_after(port->next_ktime, ktime_get())) { - schedule_work(&port->work); - return; - } - - /* - * Fill the buffer. The callee must use snd_rawmidi_transmit_peek(). - * Later, snd_rawmidi_transmit_ack() is called. - */ - memset(port->buf, 0, port->len); - port->consume_bytes = port->fill(substream, port->buf); - if (port->consume_bytes <= 0) { - /* Do it in next chance, immediately. */ - if (port->consume_bytes == 0) { - port->next_ktime = 0; - schedule_work(&port->work); - } else { - /* Fatal error. */ - port->error = true; - } - return; - } - - /* Calculate type of transaction. */ - if (port->len == 4) - type = TCODE_WRITE_QUADLET_REQUEST; - else - type = TCODE_WRITE_BLOCK_REQUEST; - - /* Set interval to next transaction. */ - port->next_ktime = ktime_add_ns(ktime_get(), - port->consume_bytes * 8 * NSEC_PER_SEC / 31250); - - /* Start this transaction. */ - port->idling = false; - - /* - * In Linux FireWire core, when generation is updated with memory - * barrier, node id has already been updated. In this module, After - * this smp_rmb(), load/store instructions to memory are completed. - * Thus, both of generation and node id are available with recent - * values. This is a light-serialization solution to handle bus reset - * events on IEEE 1394 bus. - */ - generation = port->parent->generation; - smp_rmb(); - - fw_send_request(port->parent->card, &port->transaction, type, - port->parent->node_id, generation, - port->parent->max_speed, port->addr, - port->buf, port->len, async_midi_port_callback, - port); -} - -/** - * snd_fw_async_midi_port_init - initialize asynchronous MIDI port structure - * @port: the asynchronous MIDI port to initialize - * @unit: the target of the asynchronous transaction - * @addr: the address to which transactions are transferred - * @len: the length of transaction - * @fill: the callback function to fill given buffer, and returns the - * number of consumed bytes for MIDI message. - * - */ -int snd_fw_async_midi_port_init(struct snd_fw_async_midi_port *port, - struct fw_unit *unit, u64 addr, unsigned int len, - snd_fw_async_midi_port_fill fill) -{ - port->len = DIV_ROUND_UP(len, 4) * 4; - port->buf = kzalloc(port->len, GFP_KERNEL); - if (port->buf == NULL) - return -ENOMEM; - - port->parent = fw_parent_device(unit); - port->addr = addr; - port->fill = fill; - port->idling = true; - port->next_ktime = 0; - port->error = false; - - INIT_WORK(&port->work, midi_port_work); - - return 0; -} -EXPORT_SYMBOL(snd_fw_async_midi_port_init); - -/** - * snd_fw_async_midi_port_destroy - free asynchronous MIDI port structure - * @port: the asynchronous MIDI port structure - */ -void snd_fw_async_midi_port_destroy(struct snd_fw_async_midi_port *port) -{ - snd_fw_async_midi_port_finish(port); - cancel_work_sync(&port->work); - kfree(port->buf); -} -EXPORT_SYMBOL(snd_fw_async_midi_port_destroy); - MODULE_DESCRIPTION("FireWire audio helper functions"); MODULE_AUTHOR("Clemens Ladisch "); MODULE_LICENSE("GPL v2"); diff --git a/sound/firewire/lib.h b/sound/firewire/lib.h index f6769312ebfccbe473ac291e81b386b48b45488f..eef70922ed89b2f0fd1ec86c87272f69e0080571 100644 --- a/sound/firewire/lib.h +++ b/sound/firewire/lib.h @@ -25,58 +25,4 @@ static inline bool rcode_is_permanent_error(int rcode) void snd_fw_schedule_registration(struct fw_unit *unit, struct delayed_work *dwork); -struct snd_fw_async_midi_port; -typedef int (*snd_fw_async_midi_port_fill)( - struct snd_rawmidi_substream *substream, - u8 *buf); - -struct snd_fw_async_midi_port { - struct fw_device *parent; - struct work_struct work; - bool idling; - ktime_t next_ktime; - bool error; - - u64 addr; - struct fw_transaction transaction; - - u8 *buf; - unsigned int len; - - struct snd_rawmidi_substream *substream; - snd_fw_async_midi_port_fill fill; - unsigned int consume_bytes; -}; - -int snd_fw_async_midi_port_init(struct snd_fw_async_midi_port *port, - struct fw_unit *unit, u64 addr, unsigned int len, - snd_fw_async_midi_port_fill fill); -void snd_fw_async_midi_port_destroy(struct snd_fw_async_midi_port *port); - -/** - * snd_fw_async_midi_port_run - run transactions for the async MIDI port - * @port: the asynchronous MIDI port - * @substream: the MIDI substream - */ -static inline void -snd_fw_async_midi_port_run(struct snd_fw_async_midi_port *port, - struct snd_rawmidi_substream *substream) -{ - if (!port->error) { - port->substream = substream; - schedule_work(&port->work); - } -} - -/** - * snd_fw_async_midi_port_finish - finish the asynchronous MIDI port - * @port: the asynchronous MIDI port - */ -static inline void -snd_fw_async_midi_port_finish(struct snd_fw_async_midi_port *port) -{ - port->substream = NULL; - port->error = false; -} - #endif diff --git a/sound/firewire/motu/Makefile b/sound/firewire/motu/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..728f586e754b64437ba05db96e70e0c0b66a6292 --- /dev/null +++ b/sound/firewire/motu/Makefile @@ -0,0 +1,6 @@ +CFLAGS_amdtp-motu.o := -I$(src) + +snd-firewire-motu-objs := motu.o amdtp-motu.o motu-transaction.o motu-stream.o \ + motu-proc.o motu-pcm.o motu-midi.o motu-hwdep.o \ + motu-protocol-v2.o motu-protocol-v3.o +obj-$(CONFIG_SND_FIREWIRE_MOTU) += snd-firewire-motu.o diff --git a/sound/firewire/motu/amdtp-motu-trace.h b/sound/firewire/motu/amdtp-motu-trace.h new file mode 100644 index 0000000000000000000000000000000000000000..cd0cbfa9f96f3d25c96a82cc27c069fecf21fabb --- /dev/null +++ b/sound/firewire/motu/amdtp-motu-trace.h @@ -0,0 +1,123 @@ +/* + * amdtp-motu-trace.h - tracepoint definitions to dump a part of packet data + * + * Copyright (c) 2017 Takashi Sakamoto + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM snd_firewire_motu + +#if !defined(_SND_FIREWIRE_MOTU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _SND_FIREWIRE_MOTU_TRACE_H + +#include + +static void copy_sph(u32 *frame, __be32 *buffer, unsigned int data_blocks, + unsigned int data_block_quadlets); +static void copy_message(u64 *frames, __be32 *buffer, unsigned int data_blocks, + unsigned int data_block_quadlets); + +TRACE_EVENT(in_data_block_sph, + TP_PROTO(struct amdtp_stream *s, unsigned int data_blocks, __be32 *buffer), + TP_ARGS(s, data_blocks, buffer), + TP_STRUCT__entry( + __field(int, src) + __field(int, dst) + __field(unsigned int, data_blocks) + __dynamic_array(u32, tstamps, data_blocks) + ), + TP_fast_assign( + __entry->src = fw_parent_device(s->unit)->node_id; + __entry->dst = fw_parent_device(s->unit)->card->node_id; + __entry->data_blocks = data_blocks; + copy_sph(__get_dynamic_array(tstamps), buffer, data_blocks, s->data_block_quadlets); + ), + TP_printk( + "%04x %04x %u %s", + __entry->src, + __entry->dst, + __entry->data_blocks, + __print_array(__get_dynamic_array(tstamps), __entry->data_blocks, 4) + ) +); + +TRACE_EVENT(out_data_block_sph, + TP_PROTO(struct amdtp_stream *s, unsigned int data_blocks, __be32 *buffer), + TP_ARGS(s, data_blocks, buffer), + TP_STRUCT__entry( + __field(int, src) + __field(int, dst) + __field(unsigned int, data_blocks) + __dynamic_array(u32, tstamps, data_blocks) + ), + TP_fast_assign( + __entry->src = fw_parent_device(s->unit)->card->node_id; + __entry->dst = fw_parent_device(s->unit)->node_id; + __entry->data_blocks = data_blocks; + copy_sph(__get_dynamic_array(tstamps), buffer, data_blocks, s->data_block_quadlets); + ), + TP_printk( + "%04x %04x %u %s", + __entry->src, + __entry->dst, + __entry->data_blocks, + __print_array(__get_dynamic_array(tstamps), __entry->data_blocks, 4) + ) +); + +TRACE_EVENT(in_data_block_message, + TP_PROTO(struct amdtp_stream *s, unsigned int data_blocks, __be32 *buffer), + TP_ARGS(s, data_blocks, buffer), + TP_STRUCT__entry( + __field(int, src) + __field(int, dst) + __field(unsigned int, data_blocks) + __dynamic_array(u64, messages, data_blocks) + ), + TP_fast_assign( + __entry->src = fw_parent_device(s->unit)->node_id; + __entry->dst = fw_parent_device(s->unit)->card->node_id; + __entry->data_blocks = data_blocks; + copy_message(__get_dynamic_array(messages), buffer, data_blocks, s->data_block_quadlets); + ), + TP_printk( + "%04x %04x %u %s", + __entry->src, + __entry->dst, + __entry->data_blocks, + __print_array(__get_dynamic_array(messages), __entry->data_blocks, 8) + ) +); + +TRACE_EVENT(out_data_block_message, + TP_PROTO(struct amdtp_stream *s, unsigned int data_blocks, __be32 *buffer), + TP_ARGS(s, data_blocks, buffer), + TP_STRUCT__entry( + __field(int, src) + __field(int, dst) + __field(unsigned int, data_blocks) + __dynamic_array(u64, messages, data_blocks) + ), + TP_fast_assign( + __entry->src = fw_parent_device(s->unit)->card->node_id; + __entry->dst = fw_parent_device(s->unit)->node_id; + __entry->data_blocks = data_blocks; + copy_message(__get_dynamic_array(messages), buffer, data_blocks, s->data_block_quadlets); + ), + TP_printk( + "%04x %04x %u %s", + __entry->src, + __entry->dst, + __entry->data_blocks, + __print_array(__get_dynamic_array(messages), __entry->data_blocks, 8) + ) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE amdtp-motu-trace +#include diff --git a/sound/firewire/motu/amdtp-motu.c b/sound/firewire/motu/amdtp-motu.c new file mode 100644 index 0000000000000000000000000000000000000000..96f0091144bb2ad48cb7f1d12fc51db57e208e20 --- /dev/null +++ b/sound/firewire/motu/amdtp-motu.c @@ -0,0 +1,427 @@ +/* + * amdtp-motu.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include +#include +#include "motu.h" + +#define CREATE_TRACE_POINTS +#include "amdtp-motu-trace.h" + +#define CIP_FMT_MOTU 0x02 +#define CIP_FMT_MOTU_TX_V3 0x22 +#define MOTU_FDF_AM824 0x22 + +/* + * Nominally 3125 bytes/second, but the MIDI port's clock might be + * 1% too slow, and the bus clock 100 ppm too fast. + */ +#define MIDI_BYTES_PER_SECOND 3093 + +struct amdtp_motu { + /* For timestamp processing. */ + unsigned int quotient_ticks_per_event; + unsigned int remainder_ticks_per_event; + unsigned int next_ticks; + unsigned int next_accumulated; + unsigned int next_cycles; + unsigned int next_seconds; + + unsigned int pcm_chunks; + unsigned int pcm_byte_offset; + + struct snd_rawmidi_substream *midi; + unsigned int midi_ports; + unsigned int midi_flag_offset; + unsigned int midi_byte_offset; + + int midi_db_count; + unsigned int midi_db_interval; +}; + +int amdtp_motu_set_parameters(struct amdtp_stream *s, unsigned int rate, + unsigned int midi_ports, + struct snd_motu_packet_format *formats) +{ + static const struct { + unsigned int quotient_ticks_per_event; + unsigned int remainder_ticks_per_event; + } params[] = { + [CIP_SFC_44100] = { 557, 123 }, + [CIP_SFC_48000] = { 512, 0 }, + [CIP_SFC_88200] = { 278, 282 }, + [CIP_SFC_96000] = { 256, 0 }, + [CIP_SFC_176400] = { 139, 141 }, + [CIP_SFC_192000] = { 128, 0 }, + }; + struct amdtp_motu *p = s->protocol; + unsigned int pcm_chunks, data_chunks, data_block_quadlets; + unsigned int delay; + unsigned int mode; + int i, err; + + if (amdtp_stream_running(s)) + return -EBUSY; + + for (i = 0; i < ARRAY_SIZE(snd_motu_clock_rates); ++i) { + if (snd_motu_clock_rates[i] == rate) { + mode = i >> 1; + break; + } + } + if (i == ARRAY_SIZE(snd_motu_clock_rates)) + return -EINVAL; + + pcm_chunks = formats->fixed_part_pcm_chunks[mode] + + formats->differed_part_pcm_chunks[mode]; + data_chunks = formats->msg_chunks + pcm_chunks; + + /* + * Each data block includes SPH in its head. Data chunks follow with + * 3 byte alignment. Padding follows with zero to conform to quadlet + * alignment. + */ + data_block_quadlets = 1 + DIV_ROUND_UP(data_chunks * 3, 4); + + err = amdtp_stream_set_parameters(s, rate, data_block_quadlets); + if (err < 0) + return err; + + p->pcm_chunks = pcm_chunks; + p->pcm_byte_offset = formats->pcm_byte_offset; + + p->midi_ports = midi_ports; + p->midi_flag_offset = formats->midi_flag_offset; + p->midi_byte_offset = formats->midi_byte_offset; + + p->midi_db_count = 0; + p->midi_db_interval = rate / MIDI_BYTES_PER_SECOND; + + /* IEEE 1394 bus requires. */ + delay = 0x2e00; + + /* For no-data or empty packets to adjust PCM sampling frequency. */ + delay += 8000 * 3072 * s->syt_interval / rate; + + p->next_seconds = 0; + p->next_cycles = delay / 3072; + p->quotient_ticks_per_event = params[s->sfc].quotient_ticks_per_event; + p->remainder_ticks_per_event = params[s->sfc].remainder_ticks_per_event; + p->next_ticks = delay % 3072; + p->next_accumulated = 0; + + return 0; +} + +static void read_pcm_s32(struct amdtp_stream *s, + struct snd_pcm_runtime *runtime, + __be32 *buffer, unsigned int data_blocks) +{ + struct amdtp_motu *p = s->protocol; + unsigned int channels, remaining_frames, i, c; + u8 *byte; + u32 *dst; + + channels = p->pcm_chunks; + dst = (void *)runtime->dma_area + + frames_to_bytes(runtime, s->pcm_buffer_pointer); + remaining_frames = runtime->buffer_size - s->pcm_buffer_pointer; + + for (i = 0; i < data_blocks; ++i) { + byte = (u8 *)buffer + p->pcm_byte_offset; + + for (c = 0; c < channels; ++c) { + *dst = (byte[0] << 24) | (byte[1] << 16) | byte[2]; + byte += 3; + dst++; + } + buffer += s->data_block_quadlets; + if (--remaining_frames == 0) + dst = (void *)runtime->dma_area; + } +} + +static void write_pcm_s32(struct amdtp_stream *s, + struct snd_pcm_runtime *runtime, + __be32 *buffer, unsigned int data_blocks) +{ + struct amdtp_motu *p = s->protocol; + unsigned int channels, remaining_frames, i, c; + u8 *byte; + const u32 *src; + + channels = p->pcm_chunks; + src = (void *)runtime->dma_area + + frames_to_bytes(runtime, s->pcm_buffer_pointer); + remaining_frames = runtime->buffer_size - s->pcm_buffer_pointer; + + for (i = 0; i < data_blocks; ++i) { + byte = (u8 *)buffer + p->pcm_byte_offset; + + for (c = 0; c < channels; ++c) { + byte[0] = (*src >> 24) & 0xff; + byte[1] = (*src >> 16) & 0xff; + byte[2] = (*src >> 8) & 0xff; + byte += 3; + src++; + } + + buffer += s->data_block_quadlets; + if (--remaining_frames == 0) + src = (void *)runtime->dma_area; + } +} + +static void write_pcm_silence(struct amdtp_stream *s, __be32 *buffer, + unsigned int data_blocks) +{ + struct amdtp_motu *p = s->protocol; + unsigned int channels, i, c; + u8 *byte; + + channels = p->pcm_chunks; + + for (i = 0; i < data_blocks; ++i) { + byte = (u8 *)buffer + p->pcm_byte_offset; + + for (c = 0; c < channels; ++c) { + byte[0] = 0; + byte[1] = 0; + byte[2] = 0; + byte += 3; + } + + buffer += s->data_block_quadlets; + } +} + +int amdtp_motu_add_pcm_hw_constraints(struct amdtp_stream *s, + struct snd_pcm_runtime *runtime) +{ + int err; + + /* TODO: how to set an constraint for exactly 24bit PCM sample? */ + err = snd_pcm_hw_constraint_msbits(runtime, 0, 32, 24); + if (err < 0) + return err; + + return amdtp_stream_add_pcm_hw_constraints(s, runtime); +} + +void amdtp_motu_midi_trigger(struct amdtp_stream *s, unsigned int port, + struct snd_rawmidi_substream *midi) +{ + struct amdtp_motu *p = s->protocol; + + if (port < p->midi_ports) + WRITE_ONCE(p->midi, midi); +} + +static void write_midi_messages(struct amdtp_stream *s, __be32 *buffer, + unsigned int data_blocks) +{ + struct amdtp_motu *p = s->protocol; + struct snd_rawmidi_substream *midi = READ_ONCE(p->midi); + u8 *b; + int i; + + for (i = 0; i < data_blocks; i++) { + b = (u8 *)buffer; + + if (midi && p->midi_db_count == 0 && + snd_rawmidi_transmit(midi, b + p->midi_byte_offset, 1) == 1) { + b[p->midi_flag_offset] = 0x01; + } else { + b[p->midi_byte_offset] = 0x00; + b[p->midi_flag_offset] = 0x00; + } + + buffer += s->data_block_quadlets; + + if (--p->midi_db_count < 0) + p->midi_db_count = p->midi_db_interval; + } +} + +static void read_midi_messages(struct amdtp_stream *s, __be32 *buffer, + unsigned int data_blocks) +{ + struct amdtp_motu *p = s->protocol; + struct snd_rawmidi_substream *midi; + u8 *b; + int i; + + for (i = 0; i < data_blocks; i++) { + b = (u8 *)buffer; + midi = READ_ONCE(p->midi); + + if (midi && (b[p->midi_flag_offset] & 0x01)) + snd_rawmidi_receive(midi, b + p->midi_byte_offset, 1); + + buffer += s->data_block_quadlets; + } +} + +/* For tracepoints. */ +static void __maybe_unused copy_sph(u32 *frames, __be32 *buffer, + unsigned int data_blocks, + unsigned int data_block_quadlets) +{ + unsigned int i; + + for (i = 0; i < data_blocks; ++i) { + *frames = be32_to_cpu(*buffer); + buffer += data_block_quadlets; + frames++; + } +} + +/* For tracepoints. */ +static void __maybe_unused copy_message(u64 *frames, __be32 *buffer, + unsigned int data_blocks, + unsigned int data_block_quadlets) +{ + unsigned int i; + + /* This is just for v2/v3 protocol. */ + for (i = 0; i < data_blocks; ++i) { + *frames = (be32_to_cpu(buffer[1]) << 16) | + (be32_to_cpu(buffer[2]) >> 16); + buffer += data_block_quadlets; + frames++; + } +} + +static unsigned int process_tx_data_blocks(struct amdtp_stream *s, + __be32 *buffer, unsigned int data_blocks, + unsigned int *syt) +{ + struct amdtp_motu *p = s->protocol; + struct snd_pcm_substream *pcm; + + trace_in_data_block_sph(s, data_blocks, buffer); + trace_in_data_block_message(s, data_blocks, buffer); + + if (p->midi_ports) + read_midi_messages(s, buffer, data_blocks); + + pcm = ACCESS_ONCE(s->pcm); + if (data_blocks > 0 && pcm) + read_pcm_s32(s, pcm->runtime, buffer, data_blocks); + + return data_blocks; +} + +static inline void compute_next_elapse_from_start(struct amdtp_motu *p) +{ + p->next_accumulated += p->remainder_ticks_per_event; + if (p->next_accumulated >= 441) { + p->next_accumulated -= 441; + p->next_ticks++; + } + + p->next_ticks += p->quotient_ticks_per_event; + if (p->next_ticks >= 3072) { + p->next_ticks -= 3072; + p->next_cycles++; + } + + if (p->next_cycles >= 8000) { + p->next_cycles -= 8000; + p->next_seconds++; + } + + if (p->next_seconds >= 128) + p->next_seconds -= 128; +} + +static void write_sph(struct amdtp_stream *s, __be32 *buffer, + unsigned int data_blocks) +{ + struct amdtp_motu *p = s->protocol; + unsigned int next_cycles; + unsigned int i; + u32 sph; + + for (i = 0; i < data_blocks; i++) { + next_cycles = (s->start_cycle + p->next_cycles) % 8000; + sph = ((next_cycles << 12) | p->next_ticks) & 0x01ffffff; + *buffer = cpu_to_be32(sph); + + compute_next_elapse_from_start(p); + + buffer += s->data_block_quadlets; + } +} + +static unsigned int process_rx_data_blocks(struct amdtp_stream *s, + __be32 *buffer, unsigned int data_blocks, + unsigned int *syt) +{ + struct amdtp_motu *p = (struct amdtp_motu *)s->protocol; + struct snd_pcm_substream *pcm; + + /* Not used. */ + *syt = 0xffff; + + /* TODO: how to interact control messages between userspace? */ + + if (p->midi_ports) + write_midi_messages(s, buffer, data_blocks); + + pcm = ACCESS_ONCE(s->pcm); + if (pcm) + write_pcm_s32(s, pcm->runtime, buffer, data_blocks); + else + write_pcm_silence(s, buffer, data_blocks); + + write_sph(s, buffer, data_blocks); + + trace_out_data_block_sph(s, data_blocks, buffer); + trace_out_data_block_message(s, data_blocks, buffer); + + return data_blocks; +} + +int amdtp_motu_init(struct amdtp_stream *s, struct fw_unit *unit, + enum amdtp_stream_direction dir, + const struct snd_motu_protocol *const protocol) +{ + amdtp_stream_process_data_blocks_t process_data_blocks; + int fmt = CIP_FMT_MOTU; + int flags = CIP_BLOCKING; + int err; + + if (dir == AMDTP_IN_STREAM) { + process_data_blocks = process_tx_data_blocks; + + /* + * Units of version 3 transmits packets with invalid CIP header + * against IEC 61883-1. + */ + if (protocol == &snd_motu_protocol_v3) { + flags |= CIP_WRONG_DBS | + CIP_SKIP_DBC_ZERO_CHECK | + CIP_HEADER_WITHOUT_EOH; + fmt = CIP_FMT_MOTU_TX_V3; + } + } else { + process_data_blocks = process_rx_data_blocks; + flags |= CIP_DBC_IS_END_EVENT; + } + + err = amdtp_stream_init(s, unit, dir, flags, fmt, process_data_blocks, + sizeof(struct amdtp_motu)); + if (err < 0) + return err; + + s->sph = 1; + s->fdf = MOTU_FDF_AM824; + + return 0; +} diff --git a/sound/firewire/motu/motu-hwdep.c b/sound/firewire/motu/motu-hwdep.c new file mode 100644 index 0000000000000000000000000000000000000000..b87ccb69d597978c75f89157defb8406f3c681b3 --- /dev/null +++ b/sound/firewire/motu/motu-hwdep.c @@ -0,0 +1,198 @@ +/* + * motu-hwdep.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +/* + * This codes have five functionalities. + * + * 1.get information about firewire node + * 2.get notification about starting/stopping stream + * 3.lock/unlock streaming + * + */ + +#include "motu.h" + +static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count, + loff_t *offset) +{ + struct snd_motu *motu = hwdep->private_data; + DEFINE_WAIT(wait); + union snd_firewire_event event; + + spin_lock_irq(&motu->lock); + + while (!motu->dev_lock_changed && motu->msg == 0) { + prepare_to_wait(&motu->hwdep_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&motu->lock); + schedule(); + finish_wait(&motu->hwdep_wait, &wait); + if (signal_pending(current)) + return -ERESTARTSYS; + spin_lock_irq(&motu->lock); + } + + memset(&event, 0, sizeof(event)); + if (motu->dev_lock_changed) { + event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS; + event.lock_status.status = (motu->dev_lock_count > 0); + motu->dev_lock_changed = false; + + count = min_t(long, count, sizeof(event.lock_status)); + } else { + event.motu_notification.type = SNDRV_FIREWIRE_EVENT_MOTU_NOTIFICATION; + event.motu_notification.message = motu->msg; + motu->msg = 0; + + count = min_t(long, count, sizeof(event.motu_notification)); + } + + spin_unlock_irq(&motu->lock); + + if (copy_to_user(buf, &event, count)) + return -EFAULT; + + return count; +} + +static unsigned int hwdep_poll(struct snd_hwdep *hwdep, struct file *file, + poll_table *wait) +{ + struct snd_motu *motu = hwdep->private_data; + unsigned int events; + + poll_wait(file, &motu->hwdep_wait, wait); + + spin_lock_irq(&motu->lock); + if (motu->dev_lock_changed || motu->msg) + events = POLLIN | POLLRDNORM; + else + events = 0; + spin_unlock_irq(&motu->lock); + + return events | POLLOUT; +} + +static int hwdep_get_info(struct snd_motu *motu, void __user *arg) +{ + struct fw_device *dev = fw_parent_device(motu->unit); + struct snd_firewire_get_info info; + + memset(&info, 0, sizeof(info)); + info.type = SNDRV_FIREWIRE_TYPE_MOTU; + info.card = dev->card->index; + *(__be32 *)&info.guid[0] = cpu_to_be32(dev->config_rom[3]); + *(__be32 *)&info.guid[4] = cpu_to_be32(dev->config_rom[4]); + strlcpy(info.device_name, dev_name(&dev->device), + sizeof(info.device_name)); + + if (copy_to_user(arg, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + +static int hwdep_lock(struct snd_motu *motu) +{ + int err; + + spin_lock_irq(&motu->lock); + + if (motu->dev_lock_count == 0) { + motu->dev_lock_count = -1; + err = 0; + } else { + err = -EBUSY; + } + + spin_unlock_irq(&motu->lock); + + return err; +} + +static int hwdep_unlock(struct snd_motu *motu) +{ + int err; + + spin_lock_irq(&motu->lock); + + if (motu->dev_lock_count == -1) { + motu->dev_lock_count = 0; + err = 0; + } else { + err = -EBADFD; + } + + spin_unlock_irq(&motu->lock); + + return err; +} + +static int hwdep_release(struct snd_hwdep *hwdep, struct file *file) +{ + struct snd_motu *motu = hwdep->private_data; + + spin_lock_irq(&motu->lock); + if (motu->dev_lock_count == -1) + motu->dev_lock_count = 0; + spin_unlock_irq(&motu->lock); + + return 0; +} + +static int hwdep_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct snd_motu *motu = hwdep->private_data; + + switch (cmd) { + case SNDRV_FIREWIRE_IOCTL_GET_INFO: + return hwdep_get_info(motu, (void __user *)arg); + case SNDRV_FIREWIRE_IOCTL_LOCK: + return hwdep_lock(motu); + case SNDRV_FIREWIRE_IOCTL_UNLOCK: + return hwdep_unlock(motu); + default: + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +static int hwdep_compat_ioctl(struct snd_hwdep *hwdep, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return hwdep_ioctl(hwdep, file, cmd, + (unsigned long)compat_ptr(arg)); +} +#else +#define hwdep_compat_ioctl NULL +#endif + +int snd_motu_create_hwdep_device(struct snd_motu *motu) +{ + static const struct snd_hwdep_ops ops = { + .read = hwdep_read, + .release = hwdep_release, + .poll = hwdep_poll, + .ioctl = hwdep_ioctl, + .ioctl_compat = hwdep_compat_ioctl, + }; + struct snd_hwdep *hwdep; + int err; + + err = snd_hwdep_new(motu->card, motu->card->driver, 0, &hwdep); + if (err < 0) + return err; + + strcpy(hwdep->name, "MOTU"); + hwdep->iface = SNDRV_HWDEP_IFACE_FW_MOTU; + hwdep->ops = ops; + hwdep->private_data = motu; + hwdep->exclusive = true; + + return 0; +} diff --git a/sound/firewire/motu/motu-midi.c b/sound/firewire/motu/motu-midi.c new file mode 100644 index 0000000000000000000000000000000000000000..e3acfcc53f4e3a02b4393dc3e8c97b46be3a68ef --- /dev/null +++ b/sound/firewire/motu/motu-midi.c @@ -0,0 +1,169 @@ +/* + * motu-midi.h - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ +#include "motu.h" + +static int midi_capture_open(struct snd_rawmidi_substream *substream) +{ + struct snd_motu *motu = substream->rmidi->private_data; + int err; + + err = snd_motu_stream_lock_try(motu); + if (err < 0) + return err; + + mutex_lock(&motu->mutex); + + motu->capture_substreams++; + err = snd_motu_stream_start_duplex(motu, 0); + + mutex_unlock(&motu->mutex); + + if (err < 0) + snd_motu_stream_lock_release(motu); + + return err; +} + +static int midi_playback_open(struct snd_rawmidi_substream *substream) +{ + struct snd_motu *motu = substream->rmidi->private_data; + int err; + + err = snd_motu_stream_lock_try(motu); + if (err < 0) + return err; + + mutex_lock(&motu->mutex); + + motu->playback_substreams++; + err = snd_motu_stream_start_duplex(motu, 0); + + mutex_unlock(&motu->mutex); + + if (err < 0) + snd_motu_stream_lock_release(motu); + + return err; +} + +static int midi_capture_close(struct snd_rawmidi_substream *substream) +{ + struct snd_motu *motu = substream->rmidi->private_data; + + mutex_lock(&motu->mutex); + + motu->capture_substreams--; + snd_motu_stream_stop_duplex(motu); + + mutex_unlock(&motu->mutex); + + snd_motu_stream_lock_release(motu); + return 0; +} + +static int midi_playback_close(struct snd_rawmidi_substream *substream) +{ + struct snd_motu *motu = substream->rmidi->private_data; + + mutex_lock(&motu->mutex); + + motu->playback_substreams--; + snd_motu_stream_stop_duplex(motu); + + mutex_unlock(&motu->mutex); + + snd_motu_stream_lock_release(motu); + return 0; +} + +static void midi_capture_trigger(struct snd_rawmidi_substream *substrm, int up) +{ + struct snd_motu *motu = substrm->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&motu->lock, flags); + + if (up) + amdtp_motu_midi_trigger(&motu->tx_stream, substrm->number, + substrm); + else + amdtp_motu_midi_trigger(&motu->tx_stream, substrm->number, + NULL); + + spin_unlock_irqrestore(&motu->lock, flags); +} + +static void midi_playback_trigger(struct snd_rawmidi_substream *substrm, int up) +{ + struct snd_motu *motu = substrm->rmidi->private_data; + unsigned long flags; + + spin_lock_irqsave(&motu->lock, flags); + + if (up) + amdtp_motu_midi_trigger(&motu->rx_stream, substrm->number, + substrm); + else + amdtp_motu_midi_trigger(&motu->rx_stream, substrm->number, + NULL); + + spin_unlock_irqrestore(&motu->lock, flags); +} + +static void set_midi_substream_names(struct snd_motu *motu, + struct snd_rawmidi_str *str) +{ + struct snd_rawmidi_substream *subs; + + list_for_each_entry(subs, &str->substreams, list) { + snprintf(subs->name, sizeof(subs->name), + "%s MIDI %d", motu->card->shortname, subs->number + 1); + } +} + +int snd_motu_create_midi_devices(struct snd_motu *motu) +{ + static struct snd_rawmidi_ops capture_ops = { + .open = midi_capture_open, + .close = midi_capture_close, + .trigger = midi_capture_trigger, + }; + static struct snd_rawmidi_ops playback_ops = { + .open = midi_playback_open, + .close = midi_playback_close, + .trigger = midi_playback_trigger, + }; + struct snd_rawmidi *rmidi; + struct snd_rawmidi_str *str; + int err; + + /* create midi ports */ + err = snd_rawmidi_new(motu->card, motu->card->driver, 0, 1, 1, &rmidi); + if (err < 0) + return err; + + snprintf(rmidi->name, sizeof(rmidi->name), + "%s MIDI", motu->card->shortname); + rmidi->private_data = motu; + + rmidi->info_flags |= SNDRV_RAWMIDI_INFO_INPUT | + SNDRV_RAWMIDI_INFO_OUTPUT | + SNDRV_RAWMIDI_INFO_DUPLEX; + + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, + &capture_ops); + str = &rmidi->streams[SNDRV_RAWMIDI_STREAM_INPUT]; + set_midi_substream_names(motu, str); + + snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, + &playback_ops); + str = &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT]; + set_midi_substream_names(motu, str); + + return 0; +} diff --git a/sound/firewire/motu/motu-pcm.c b/sound/firewire/motu/motu-pcm.c new file mode 100644 index 0000000000000000000000000000000000000000..94558f3d218b382fca39bd4e2b3dc619f4e91083 --- /dev/null +++ b/sound/firewire/motu/motu-pcm.c @@ -0,0 +1,398 @@ +/* + * motu-pcm.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include +#include "motu.h" + +static int motu_rate_constraint(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_motu_packet_format *formats = rule->private; + + const struct snd_interval *c = + hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_CHANNELS); + struct snd_interval *r = + hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + struct snd_interval rates = { + .min = UINT_MAX, .max = 0, .integer = 1 + }; + unsigned int i, pcm_channels, rate, mode; + + for (i = 0; i < ARRAY_SIZE(snd_motu_clock_rates); ++i) { + rate = snd_motu_clock_rates[i]; + mode = i / 2; + + pcm_channels = formats->fixed_part_pcm_chunks[mode] + + formats->differed_part_pcm_chunks[mode]; + if (!snd_interval_test(c, pcm_channels)) + continue; + + rates.min = min(rates.min, rate); + rates.max = max(rates.max, rate); + } + + return snd_interval_refine(r, &rates); +} + +static int motu_channels_constraint(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_motu_packet_format *formats = rule->private; + + const struct snd_interval *r = + hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE); + struct snd_interval *c = + hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + struct snd_interval channels = { + .min = UINT_MAX, .max = 0, .integer = 1 + }; + unsigned int i, pcm_channels, rate, mode; + + for (i = 0; i < ARRAY_SIZE(snd_motu_clock_rates); ++i) { + rate = snd_motu_clock_rates[i]; + mode = i / 2; + + if (!snd_interval_test(r, rate)) + continue; + + pcm_channels = formats->fixed_part_pcm_chunks[mode] + + formats->differed_part_pcm_chunks[mode]; + channels.min = min(channels.min, pcm_channels); + channels.max = max(channels.max, pcm_channels); + } + + return snd_interval_refine(c, &channels); +} + +static void limit_channels_and_rates(struct snd_motu *motu, + struct snd_pcm_runtime *runtime, + struct snd_motu_packet_format *formats) +{ + struct snd_pcm_hardware *hw = &runtime->hw; + unsigned int i, pcm_channels, rate, mode; + + hw->channels_min = UINT_MAX; + hw->channels_max = 0; + + for (i = 0; i < ARRAY_SIZE(snd_motu_clock_rates); ++i) { + rate = snd_motu_clock_rates[i]; + mode = i / 2; + + pcm_channels = formats->fixed_part_pcm_chunks[mode] + + formats->differed_part_pcm_chunks[mode]; + if (pcm_channels == 0) + continue; + + hw->rates |= snd_pcm_rate_to_rate_bit(rate); + hw->channels_min = min(hw->channels_min, pcm_channels); + hw->channels_max = max(hw->channels_max, pcm_channels); + } + + snd_pcm_limit_hw_rates(runtime); +} + +static void limit_period_and_buffer(struct snd_pcm_hardware *hw) +{ + hw->periods_min = 2; /* SNDRV_PCM_INFO_BATCH */ + hw->periods_max = UINT_MAX; + + hw->period_bytes_min = 4 * hw->channels_max; /* byte for a frame */ + + /* Just to prevent from allocating much pages. */ + hw->period_bytes_max = hw->period_bytes_min * 2048; + hw->buffer_bytes_max = hw->period_bytes_max * hw->periods_min; +} + +static int init_hw_info(struct snd_motu *motu, + struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_hardware *hw = &runtime->hw; + struct amdtp_stream *stream; + struct snd_motu_packet_format *formats; + int err; + + hw->info = SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_BATCH | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_JOINT_DUPLEX | + SNDRV_PCM_INFO_BLOCK_TRANSFER; + + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { + hw->formats = SNDRV_PCM_FMTBIT_S32; + stream = &motu->tx_stream; + formats = &motu->tx_packet_formats; + } else { + hw->formats = SNDRV_PCM_FMTBIT_S32; + stream = &motu->rx_stream; + formats = &motu->rx_packet_formats; + } + + limit_channels_and_rates(motu, runtime, formats); + limit_period_and_buffer(hw); + + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, + motu_rate_constraint, formats, + SNDRV_PCM_HW_PARAM_CHANNELS, -1); + if (err < 0) + return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, + motu_channels_constraint, formats, + SNDRV_PCM_HW_PARAM_RATE, -1); + if (err < 0) + return err; + + return amdtp_motu_add_pcm_hw_constraints(stream, runtime); +} + +static int pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + const struct snd_motu_protocol *const protocol = motu->spec->protocol; + enum snd_motu_clock_source src; + unsigned int rate; + int err; + + err = snd_motu_stream_lock_try(motu); + if (err < 0) + return err; + + mutex_lock(&motu->mutex); + + err = protocol->cache_packet_formats(motu); + if (err < 0) + goto err_locked; + + err = init_hw_info(motu, substream); + if (err < 0) + goto err_locked; + + /* + * When source of clock is not internal or any PCM streams are running, + * available sampling rate is limited at current sampling rate. + */ + err = protocol->get_clock_source(motu, &src); + if (err < 0) + goto err_locked; + if (src != SND_MOTU_CLOCK_SOURCE_INTERNAL || + amdtp_stream_pcm_running(&motu->tx_stream) || + amdtp_stream_pcm_running(&motu->rx_stream)) { + err = protocol->get_clock_rate(motu, &rate); + if (err < 0) + goto err_locked; + substream->runtime->hw.rate_min = rate; + substream->runtime->hw.rate_max = rate; + } + + snd_pcm_set_sync(substream); + + mutex_unlock(&motu->mutex); + + return err; +err_locked: + mutex_unlock(&motu->mutex); + snd_motu_stream_lock_release(motu); + return err; +} + +static int pcm_close(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + + snd_motu_stream_lock_release(motu); + + return 0; +} + +static int capture_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct snd_motu *motu = substream->private_data; + int err; + + err = snd_pcm_lib_alloc_vmalloc_buffer(substream, + params_buffer_bytes(hw_params)); + if (err < 0) + return err; + + if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN) { + mutex_lock(&motu->mutex); + motu->capture_substreams++; + mutex_unlock(&motu->mutex); + } + + return 0; +} +static int playback_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct snd_motu *motu = substream->private_data; + int err; + + err = snd_pcm_lib_alloc_vmalloc_buffer(substream, + params_buffer_bytes(hw_params)); + if (err < 0) + return err; + + if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN) { + mutex_lock(&motu->mutex); + motu->playback_substreams++; + mutex_unlock(&motu->mutex); + } + + return 0; +} + +static int capture_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + + mutex_lock(&motu->mutex); + + if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) + motu->capture_substreams--; + + snd_motu_stream_stop_duplex(motu); + + mutex_unlock(&motu->mutex); + + return snd_pcm_lib_free_vmalloc_buffer(substream); +} + +static int playback_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + + mutex_lock(&motu->mutex); + + if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) + motu->playback_substreams--; + + snd_motu_stream_stop_duplex(motu); + + mutex_unlock(&motu->mutex); + + return snd_pcm_lib_free_vmalloc_buffer(substream); +} + +static int capture_prepare(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + int err; + + mutex_lock(&motu->mutex); + err = snd_motu_stream_start_duplex(motu, substream->runtime->rate); + mutex_unlock(&motu->mutex); + if (err >= 0) + amdtp_stream_pcm_prepare(&motu->tx_stream); + + return 0; +} +static int playback_prepare(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + int err; + + mutex_lock(&motu->mutex); + err = snd_motu_stream_start_duplex(motu, substream->runtime->rate); + mutex_unlock(&motu->mutex); + if (err >= 0) + amdtp_stream_pcm_prepare(&motu->rx_stream); + + return err; +} + +static int capture_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_motu *motu = substream->private_data; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + amdtp_stream_pcm_trigger(&motu->tx_stream, substream); + break; + case SNDRV_PCM_TRIGGER_STOP: + amdtp_stream_pcm_trigger(&motu->tx_stream, NULL); + break; + default: + return -EINVAL; + } + + return 0; +} +static int playback_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_motu *motu = substream->private_data; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + amdtp_stream_pcm_trigger(&motu->rx_stream, substream); + break; + case SNDRV_PCM_TRIGGER_STOP: + amdtp_stream_pcm_trigger(&motu->rx_stream, NULL); + break; + default: + return -EINVAL; + } + + return 0; +} + +static snd_pcm_uframes_t capture_pointer(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + + return amdtp_stream_pcm_pointer(&motu->tx_stream); +} +static snd_pcm_uframes_t playback_pointer(struct snd_pcm_substream *substream) +{ + struct snd_motu *motu = substream->private_data; + + return amdtp_stream_pcm_pointer(&motu->rx_stream); +} + +int snd_motu_create_pcm_devices(struct snd_motu *motu) +{ + static struct snd_pcm_ops capture_ops = { + .open = pcm_open, + .close = pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = capture_hw_params, + .hw_free = capture_hw_free, + .prepare = capture_prepare, + .trigger = capture_trigger, + .pointer = capture_pointer, + .page = snd_pcm_lib_get_vmalloc_page, + .mmap = snd_pcm_lib_mmap_vmalloc, + }; + static struct snd_pcm_ops playback_ops = { + .open = pcm_open, + .close = pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = playback_hw_params, + .hw_free = playback_hw_free, + .prepare = playback_prepare, + .trigger = playback_trigger, + .pointer = playback_pointer, + .page = snd_pcm_lib_get_vmalloc_page, + .mmap = snd_pcm_lib_mmap_vmalloc, + }; + struct snd_pcm *pcm; + int err; + + err = snd_pcm_new(motu->card, motu->card->driver, 0, 1, 1, &pcm); + if (err < 0) + return err; + pcm->private_data = motu; + strcpy(pcm->name, motu->card->shortname); + + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &capture_ops); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &playback_ops); + + return 0; +} diff --git a/sound/firewire/motu/motu-proc.c b/sound/firewire/motu/motu-proc.c new file mode 100644 index 0000000000000000000000000000000000000000..4edc064999ede2aa370d689313ec5c09056cdc88 --- /dev/null +++ b/sound/firewire/motu/motu-proc.c @@ -0,0 +1,118 @@ +/* + * motu-proc.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "./motu.h" + +static const char *const clock_names[] = { + [SND_MOTU_CLOCK_SOURCE_INTERNAL] = "Internal", + [SND_MOTU_CLOCK_SOURCE_ADAT_ON_DSUB] = "ADAT on Dsub-9pin interface", + [SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT] = "ADAT on optical interface", + [SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT_A] = "ADAT on optical interface A", + [SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT_B] = "ADAT on optical interface B", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT] = "S/PDIF on optical interface", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_A] = "S/PDIF on optical interface A", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_B] = "S/PDIF on optical interface B", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PCIF on coaxial interface", + [SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR] = "AESEBU on XLR interface", + [SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC] = "Word clock on BNC interface", +}; + +static void proc_read_clock(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + + struct snd_motu *motu = entry->private_data; + const struct snd_motu_protocol *const protocol = motu->spec->protocol; + unsigned int rate; + enum snd_motu_clock_source source; + + if (protocol->get_clock_rate(motu, &rate) < 0) + return; + if (protocol->get_clock_source(motu, &source) < 0) + return; + + snd_iprintf(buffer, "Rate:\t%d\n", rate); + snd_iprintf(buffer, "Source:\t%s\n", clock_names[source]); +} + +static void proc_read_format(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + struct snd_motu *motu = entry->private_data; + const struct snd_motu_protocol *const protocol = motu->spec->protocol; + unsigned int mode; + struct snd_motu_packet_format *formats; + int i; + + if (protocol->cache_packet_formats(motu) < 0) + return; + + snd_iprintf(buffer, "tx:\tmsg\tfixed\tdiffered\n"); + for (i = 0; i < SND_MOTU_CLOCK_RATE_COUNT; ++i) { + mode = i >> 1; + + formats = &motu->tx_packet_formats; + snd_iprintf(buffer, + "%u:\t%u\t%u\t%u\n", + snd_motu_clock_rates[i], + formats->msg_chunks, + formats->fixed_part_pcm_chunks[mode], + formats->differed_part_pcm_chunks[mode]); + } + + snd_iprintf(buffer, "rx:\tmsg\tfixed\tdiffered\n"); + for (i = 0; i < SND_MOTU_CLOCK_RATE_COUNT; ++i) { + mode = i >> 1; + + formats = &motu->rx_packet_formats; + snd_iprintf(buffer, + "%u:\t%u\t%u\t%u\n", + snd_motu_clock_rates[i], + formats->msg_chunks, + formats->fixed_part_pcm_chunks[mode], + formats->differed_part_pcm_chunks[mode]); + } +} + +static void add_node(struct snd_motu *motu, struct snd_info_entry *root, + const char *name, + void (*op)(struct snd_info_entry *e, + struct snd_info_buffer *b)) +{ + struct snd_info_entry *entry; + + entry = snd_info_create_card_entry(motu->card, name, root); + if (entry == NULL) + return; + + snd_info_set_text_ops(entry, motu, op); + if (snd_info_register(entry) < 0) + snd_info_free_entry(entry); +} + +void snd_motu_proc_init(struct snd_motu *motu) +{ + struct snd_info_entry *root; + + /* + * All nodes are automatically removed at snd_card_disconnect(), + * by following to link list. + */ + root = snd_info_create_card_entry(motu->card, "firewire", + motu->card->proc_root); + if (root == NULL) + return; + root->mode = S_IFDIR | S_IRUGO | S_IXUGO; + if (snd_info_register(root) < 0) { + snd_info_free_entry(root); + return; + } + + add_node(motu, root, "clock", proc_read_clock); + add_node(motu, root, "format", proc_read_format); +} diff --git a/sound/firewire/motu/motu-protocol-v2.c b/sound/firewire/motu/motu-protocol-v2.c new file mode 100644 index 0000000000000000000000000000000000000000..05b5d287c2f3ddd3b1e0c8d9d0287fd05d7629f3 --- /dev/null +++ b/sound/firewire/motu/motu-protocol-v2.c @@ -0,0 +1,237 @@ +/* + * motu-protocol-v2.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "motu.h" + +#define V2_CLOCK_STATUS_OFFSET 0x0b14 +#define V2_CLOCK_RATE_MASK 0x00000038 +#define V2_CLOCK_RATE_SHIFT 3 +#define V2_CLOCK_SRC_MASK 0x00000007 +#define V2_CLOCK_SRC_SHIFT 0 + +#define V2_IN_OUT_CONF_OFFSET 0x0c04 +#define V2_OPT_OUT_IFACE_MASK 0x00000c00 +#define V2_OPT_OUT_IFACE_SHIFT 10 +#define V2_OPT_IN_IFACE_MASK 0x00000300 +#define V2_OPT_IN_IFACE_SHIFT 8 +#define V2_OPT_IFACE_MODE_NONE 0 +#define V2_OPT_IFACE_MODE_ADAT 1 +#define V2_OPT_IFACE_MODE_SPDIF 2 + +static int v2_get_clock_rate(struct snd_motu *motu, unsigned int *rate) +{ + __be32 reg; + unsigned int index; + int err; + + err = snd_motu_transaction_read(motu, V2_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + + index = (be32_to_cpu(reg) & V2_CLOCK_RATE_MASK) >> V2_CLOCK_RATE_SHIFT; + if (index >= ARRAY_SIZE(snd_motu_clock_rates)) + return -EIO; + + *rate = snd_motu_clock_rates[index]; + + return 0; +} + +static int v2_set_clock_rate(struct snd_motu *motu, unsigned int rate) +{ + __be32 reg; + u32 data; + int i; + int err; + + for (i = 0; i < ARRAY_SIZE(snd_motu_clock_rates); ++i) { + if (snd_motu_clock_rates[i] == rate) + break; + } + if (i == ARRAY_SIZE(snd_motu_clock_rates)) + return -EINVAL; + + err = snd_motu_transaction_read(motu, V2_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + data &= ~V2_CLOCK_RATE_MASK; + data |= i << V2_CLOCK_RATE_SHIFT; + + reg = cpu_to_be32(data); + return snd_motu_transaction_write(motu, V2_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); +} + +static int v2_get_clock_source(struct snd_motu *motu, + enum snd_motu_clock_source *src) +{ + __be32 reg; + unsigned int index; + int err; + + err = snd_motu_transaction_read(motu, V2_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + + index = be32_to_cpu(reg) & V2_CLOCK_SRC_MASK; + if (index > 5) + return -EIO; + + /* To check the configuration of optical interface. */ + err = snd_motu_transaction_read(motu, V2_IN_OUT_CONF_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + + switch (index) { + case 0: + *src = SND_MOTU_CLOCK_SOURCE_INTERNAL; + break; + case 1: + if (be32_to_cpu(reg) & 0x00000200) + *src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT; + else + *src = SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT; + break; + case 2: + *src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX; + break; + case 4: + *src = SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC; + break; + case 5: + *src = SND_MOTU_CLOCK_SOURCE_ADAT_ON_DSUB; + break; + default: + return -EIO; + } + + return 0; +} + +static int v2_switch_fetching_mode(struct snd_motu *motu, bool enable) +{ + /* V2 protocol doesn't have this feature. */ + return 0; +} + +static void calculate_fixed_part(struct snd_motu_packet_format *formats, + enum amdtp_stream_direction dir, + enum snd_motu_spec_flags flags, + unsigned char analog_ports) +{ + unsigned char pcm_chunks[3] = {0, 0, 0}; + + formats->msg_chunks = 2; + + pcm_chunks[0] = analog_ports; + pcm_chunks[1] = analog_ports; + if (flags & SND_MOTU_SPEC_SUPPORT_CLOCK_X4) + pcm_chunks[2] = analog_ports; + + if (dir == AMDTP_IN_STREAM) { + if (flags & SND_MOTU_SPEC_TX_MICINST_CHUNK) { + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + } + if (flags & SND_MOTU_SPEC_TX_RETURN_CHUNK) { + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + } + } else { + /* + * Packets to v2 units transfer main-out-1/2 and phone-out-1/2. + */ + pcm_chunks[0] += 4; + pcm_chunks[1] += 4; + } + + /* + * All of v2 models have a pair of coaxial interfaces for digital in/out + * port. At 44.1/48.0/88.2/96.0 kHz, packets includes PCM from these + * ports. + */ + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + + /* This part should be multiples of 4. */ + formats->fixed_part_pcm_chunks[0] = round_up(2 + pcm_chunks[0], 4) - 2; + formats->fixed_part_pcm_chunks[1] = round_up(2 + pcm_chunks[1], 4) - 2; + if (flags & SND_MOTU_SPEC_SUPPORT_CLOCK_X4) + formats->fixed_part_pcm_chunks[2] = + round_up(2 + pcm_chunks[2], 4) - 2; +} + +static void calculate_differed_part(struct snd_motu_packet_format *formats, + enum snd_motu_spec_flags flags, + u32 data, u32 mask, u32 shift) +{ + unsigned char pcm_chunks[3] = {0, 0}; + + /* + * When optical interfaces are configured for S/PDIF (TOSLINK), + * the above PCM frames come from them, instead of coaxial + * interfaces. + */ + data = (data & mask) >> shift; + if ((flags & SND_MOTU_SPEC_HAS_OPT_IFACE_A) && + data == V2_OPT_IFACE_MODE_ADAT) { + pcm_chunks[0] += 8; + pcm_chunks[1] += 4; + } + + /* At mode x4, no data chunks are supported in this part. */ + formats->differed_part_pcm_chunks[0] = pcm_chunks[0]; + formats->differed_part_pcm_chunks[1] = pcm_chunks[1]; +} + +static int v2_cache_packet_formats(struct snd_motu *motu) +{ + __be32 reg; + u32 data; + int err; + + err = snd_motu_transaction_read(motu, V2_IN_OUT_CONF_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + calculate_fixed_part(&motu->tx_packet_formats, AMDTP_IN_STREAM, + motu->spec->flags, motu->spec->analog_in_ports); + calculate_differed_part(&motu->tx_packet_formats, motu->spec->flags, + data, V2_OPT_IN_IFACE_MASK, V2_OPT_IN_IFACE_SHIFT); + + calculate_fixed_part(&motu->rx_packet_formats, AMDTP_OUT_STREAM, + motu->spec->flags, motu->spec->analog_out_ports); + calculate_differed_part(&motu->rx_packet_formats, motu->spec->flags, + data, V2_OPT_OUT_IFACE_MASK, V2_OPT_OUT_IFACE_SHIFT); + + motu->tx_packet_formats.midi_flag_offset = 4; + motu->tx_packet_formats.midi_byte_offset = 6; + motu->tx_packet_formats.pcm_byte_offset = 10; + + motu->rx_packet_formats.midi_flag_offset = 4; + motu->rx_packet_formats.midi_byte_offset = 6; + motu->rx_packet_formats.pcm_byte_offset = 10; + + return 0; +} + +const struct snd_motu_protocol snd_motu_protocol_v2 = { + .get_clock_rate = v2_get_clock_rate, + .set_clock_rate = v2_set_clock_rate, + .get_clock_source = v2_get_clock_source, + .switch_fetching_mode = v2_switch_fetching_mode, + .cache_packet_formats = v2_cache_packet_formats, +}; diff --git a/sound/firewire/motu/motu-protocol-v3.c b/sound/firewire/motu/motu-protocol-v3.c new file mode 100644 index 0000000000000000000000000000000000000000..ddb647254ed224b860f0ae75b372cc32f7f91383 --- /dev/null +++ b/sound/firewire/motu/motu-protocol-v3.c @@ -0,0 +1,311 @@ +/* + * motu-protocol-v3.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include +#include "motu.h" + +#define V3_CLOCK_STATUS_OFFSET 0x0b14 +#define V3_FETCH_PCM_FRAMES 0x02000000 +#define V3_CLOCK_RATE_MASK 0x0000ff00 +#define V3_CLOCK_RATE_SHIFT 8 +#define V3_CLOCK_SOURCE_MASK 0x000000ff + +#define V3_OPT_IFACE_MODE_OFFSET 0x0c94 +#define V3_ENABLE_OPT_IN_IFACE_A 0x00000001 +#define V3_ENABLE_OPT_IN_IFACE_B 0x00000002 +#define V3_ENABLE_OPT_OUT_IFACE_A 0x00000100 +#define V3_ENABLE_OPT_OUT_IFACE_B 0x00000200 +#define V3_NO_ADAT_OPT_IN_IFACE_A 0x00010000 +#define V3_NO_ADAT_OPT_IN_IFACE_B 0x00100000 +#define V3_NO_ADAT_OPT_OUT_IFACE_A 0x00040000 +#define V3_NO_ADAT_OPT_OUT_IFACE_B 0x00400000 + +static int v3_get_clock_rate(struct snd_motu *motu, unsigned int *rate) +{ + __be32 reg; + u32 data; + int err; + + err = snd_motu_transaction_read(motu, V3_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + data = (data & V3_CLOCK_RATE_MASK) >> V3_CLOCK_RATE_SHIFT; + if (data >= ARRAY_SIZE(snd_motu_clock_rates)) + return -EIO; + + *rate = snd_motu_clock_rates[data]; + + return 0; +} + +static int v3_set_clock_rate(struct snd_motu *motu, unsigned int rate) +{ + __be32 reg; + u32 data; + bool need_to_wait; + int i, err; + + for (i = 0; i < ARRAY_SIZE(snd_motu_clock_rates); ++i) { + if (snd_motu_clock_rates[i] == rate) + break; + } + if (i == ARRAY_SIZE(snd_motu_clock_rates)) + return -EINVAL; + + err = snd_motu_transaction_read(motu, V3_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + data &= ~(V3_CLOCK_RATE_MASK | V3_FETCH_PCM_FRAMES); + data |= i << V3_CLOCK_RATE_SHIFT; + + need_to_wait = data != be32_to_cpu(reg); + + reg = cpu_to_be32(data); + err = snd_motu_transaction_write(motu, V3_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + + if (need_to_wait) { + /* Cost expensive. */ + if (msleep_interruptible(4000) > 0) + return -EINTR; + } + + return 0; +} + +static int v3_get_clock_source(struct snd_motu *motu, + enum snd_motu_clock_source *src) +{ + __be32 reg; + u32 data; + unsigned int val; + int err; + + err = snd_motu_transaction_read(motu, V3_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + val = data & V3_CLOCK_SOURCE_MASK; + if (val == 0x00) { + *src = SND_MOTU_CLOCK_SOURCE_INTERNAL; + } else if (val == 0x01) { + *src = SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC; + } else if (val == 0x10) { + *src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX; + } else if (val == 0x18 || val == 0x19) { + err = snd_motu_transaction_read(motu, V3_OPT_IFACE_MODE_OFFSET, + ®, sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + if (val == 0x18) { + if (data & V3_NO_ADAT_OPT_IN_IFACE_A) + *src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_A; + else + *src = SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT_A; + } else { + if (data & V3_NO_ADAT_OPT_IN_IFACE_B) + *src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_B; + else + *src = SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT_B; + } + } else { + *src = SND_MOTU_CLOCK_SOURCE_UNKNOWN; + } + + return 0; +} + +static int v3_switch_fetching_mode(struct snd_motu *motu, bool enable) +{ + __be32 reg; + u32 data; + int err; + + err = snd_motu_transaction_read(motu, V3_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return 0; + data = be32_to_cpu(reg); + + if (enable) + data |= V3_FETCH_PCM_FRAMES; + else + data &= ~V3_FETCH_PCM_FRAMES; + + reg = cpu_to_be32(data); + return snd_motu_transaction_write(motu, V3_CLOCK_STATUS_OFFSET, ®, + sizeof(reg)); +} + +static void calculate_fixed_part(struct snd_motu_packet_format *formats, + enum amdtp_stream_direction dir, + enum snd_motu_spec_flags flags, + unsigned char analog_ports) +{ + unsigned char pcm_chunks[3] = {0, 0, 0}; + + formats->msg_chunks = 2; + + pcm_chunks[0] = analog_ports; + pcm_chunks[1] = analog_ports; + if (flags & SND_MOTU_SPEC_SUPPORT_CLOCK_X4) + pcm_chunks[2] = analog_ports; + + if (dir == AMDTP_IN_STREAM) { + if (flags & SND_MOTU_SPEC_TX_MICINST_CHUNK) { + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + if (flags & SND_MOTU_SPEC_SUPPORT_CLOCK_X4) + pcm_chunks[2] += 2; + } + + if (flags & SND_MOTU_SPEC_TX_RETURN_CHUNK) { + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + if (flags & SND_MOTU_SPEC_SUPPORT_CLOCK_X4) + pcm_chunks[2] += 2; + } + + if (flags & SND_MOTU_SPEC_TX_REVERB_CHUNK) { + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + } + } else { + /* + * Packets to v2 units transfer main-out-1/2 and phone-out-1/2. + */ + pcm_chunks[0] += 4; + pcm_chunks[1] += 4; + } + + /* + * At least, packets have two data chunks for S/PDIF on coaxial + * interface. + */ + pcm_chunks[0] += 2; + pcm_chunks[1] += 2; + + /* + * Fixed part consists of PCM chunks multiple of 4, with msg chunks. As + * a result, this part can includes empty data chunks. + */ + formats->fixed_part_pcm_chunks[0] = round_up(2 + pcm_chunks[0], 4) - 2; + formats->fixed_part_pcm_chunks[1] = round_up(2 + pcm_chunks[1], 4) - 2; + if (flags & SND_MOTU_SPEC_SUPPORT_CLOCK_X4) + formats->fixed_part_pcm_chunks[2] = + round_up(2 + pcm_chunks[2], 4) - 2; +} + +static void calculate_differed_part(struct snd_motu_packet_format *formats, + enum snd_motu_spec_flags flags, u32 data, + u32 a_enable_mask, u32 a_no_adat_mask, + u32 b_enable_mask, u32 b_no_adat_mask) +{ + unsigned char pcm_chunks[3] = {0, 0, 0}; + int i; + + if ((flags & SND_MOTU_SPEC_HAS_OPT_IFACE_A) && (data & a_enable_mask)) { + if (data & a_no_adat_mask) { + /* + * Additional two data chunks for S/PDIF on optical + * interface A. This includes empty data chunks. + */ + pcm_chunks[0] += 4; + pcm_chunks[1] += 4; + } else { + /* + * Additional data chunks for ADAT on optical interface + * A. + */ + pcm_chunks[0] += 8; + pcm_chunks[1] += 4; + } + } + + if ((flags & SND_MOTU_SPEC_HAS_OPT_IFACE_B) && (data & b_enable_mask)) { + if (data & b_no_adat_mask) { + /* + * Additional two data chunks for S/PDIF on optical + * interface B. This includes empty data chunks. + */ + pcm_chunks[0] += 4; + pcm_chunks[1] += 4; + } else { + /* + * Additional data chunks for ADAT on optical interface + * B. + */ + pcm_chunks[0] += 8; + pcm_chunks[1] += 4; + } + } + + for (i = 0; i < 3; ++i) { + if (pcm_chunks[i] > 0) + pcm_chunks[i] = round_up(pcm_chunks[i], 4); + + formats->differed_part_pcm_chunks[i] = pcm_chunks[i]; + } +} + +static int v3_cache_packet_formats(struct snd_motu *motu) +{ + __be32 reg; + u32 data; + int err; + + err = snd_motu_transaction_read(motu, V3_OPT_IFACE_MODE_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + calculate_fixed_part(&motu->tx_packet_formats, AMDTP_IN_STREAM, + motu->spec->flags, motu->spec->analog_in_ports); + calculate_differed_part(&motu->tx_packet_formats, + motu->spec->flags, data, + V3_ENABLE_OPT_IN_IFACE_A, V3_NO_ADAT_OPT_IN_IFACE_A, + V3_ENABLE_OPT_IN_IFACE_B, V3_NO_ADAT_OPT_IN_IFACE_B); + + calculate_fixed_part(&motu->rx_packet_formats, AMDTP_OUT_STREAM, + motu->spec->flags, motu->spec->analog_out_ports); + calculate_differed_part(&motu->rx_packet_formats, + motu->spec->flags, data, + V3_ENABLE_OPT_OUT_IFACE_A, V3_NO_ADAT_OPT_OUT_IFACE_A, + V3_ENABLE_OPT_OUT_IFACE_B, V3_NO_ADAT_OPT_OUT_IFACE_B); + + motu->tx_packet_formats.midi_flag_offset = 8; + motu->tx_packet_formats.midi_byte_offset = 7; + motu->tx_packet_formats.pcm_byte_offset = 10; + + motu->rx_packet_formats.midi_flag_offset = 8; + motu->rx_packet_formats.midi_byte_offset = 7; + motu->rx_packet_formats.pcm_byte_offset = 10; + + return 0; +} + +const struct snd_motu_protocol snd_motu_protocol_v3 = { + .get_clock_rate = v3_get_clock_rate, + .set_clock_rate = v3_set_clock_rate, + .get_clock_source = v3_get_clock_source, + .switch_fetching_mode = v3_switch_fetching_mode, + .cache_packet_formats = v3_cache_packet_formats, +}; diff --git a/sound/firewire/motu/motu-stream.c b/sound/firewire/motu/motu-stream.c new file mode 100644 index 0000000000000000000000000000000000000000..bd458029099e35f1872af4a53f3759e188302226 --- /dev/null +++ b/sound/firewire/motu/motu-stream.c @@ -0,0 +1,381 @@ +/* + * motu-stream.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "motu.h" + +#define CALLBACK_TIMEOUT 200 + +#define ISOC_COMM_CONTROL_OFFSET 0x0b00 +#define ISOC_COMM_CONTROL_MASK 0xffff0000 +#define CHANGE_RX_ISOC_COMM_STATE 0x80000000 +#define RX_ISOC_COMM_IS_ACTIVATED 0x40000000 +#define RX_ISOC_COMM_CHANNEL_MASK 0x3f000000 +#define RX_ISOC_COMM_CHANNEL_SHIFT 24 +#define CHANGE_TX_ISOC_COMM_STATE 0x00800000 +#define TX_ISOC_COMM_IS_ACTIVATED 0x00400000 +#define TX_ISOC_COMM_CHANNEL_MASK 0x003f0000 +#define TX_ISOC_COMM_CHANNEL_SHIFT 16 + +#define PACKET_FORMAT_OFFSET 0x0b10 +#define TX_PACKET_EXCLUDE_DIFFERED_DATA_CHUNKS 0x00000080 +#define RX_PACKET_EXCLUDE_DIFFERED_DATA_CHUNKS 0x00000040 +#define TX_PACKET_TRANSMISSION_SPEED_MASK 0x0000000f + +static int start_both_streams(struct snd_motu *motu, unsigned int rate) +{ + unsigned int midi_ports = 0; + __be32 reg; + u32 data; + int err; + + if (motu->spec->flags & SND_MOTU_SPEC_HAS_MIDI) + midi_ports = 1; + + /* Set packet formation to our packet streaming engine. */ + err = amdtp_motu_set_parameters(&motu->rx_stream, rate, midi_ports, + &motu->rx_packet_formats); + if (err < 0) + return err; + + err = amdtp_motu_set_parameters(&motu->tx_stream, rate, midi_ports, + &motu->tx_packet_formats); + if (err < 0) + return err; + + /* Get isochronous resources on the bus. */ + err = fw_iso_resources_allocate(&motu->rx_resources, + amdtp_stream_get_max_payload(&motu->rx_stream), + fw_parent_device(motu->unit)->max_speed); + if (err < 0) + return err; + + err = fw_iso_resources_allocate(&motu->tx_resources, + amdtp_stream_get_max_payload(&motu->tx_stream), + fw_parent_device(motu->unit)->max_speed); + if (err < 0) + return err; + + /* Configure the unit to start isochronous communication. */ + err = snd_motu_transaction_read(motu, ISOC_COMM_CONTROL_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg) & ~ISOC_COMM_CONTROL_MASK; + + data |= CHANGE_RX_ISOC_COMM_STATE | RX_ISOC_COMM_IS_ACTIVATED | + (motu->rx_resources.channel << RX_ISOC_COMM_CHANNEL_SHIFT) | + CHANGE_TX_ISOC_COMM_STATE | TX_ISOC_COMM_IS_ACTIVATED | + (motu->tx_resources.channel << TX_ISOC_COMM_CHANNEL_SHIFT); + + reg = cpu_to_be32(data); + return snd_motu_transaction_write(motu, ISOC_COMM_CONTROL_OFFSET, ®, + sizeof(reg)); +} + +static void stop_both_streams(struct snd_motu *motu) +{ + __be32 reg; + u32 data; + int err; + + err = motu->spec->protocol->switch_fetching_mode(motu, false); + if (err < 0) + return; + + err = snd_motu_transaction_read(motu, ISOC_COMM_CONTROL_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return; + data = be32_to_cpu(reg); + + data &= ~(RX_ISOC_COMM_IS_ACTIVATED | TX_ISOC_COMM_IS_ACTIVATED); + data |= CHANGE_RX_ISOC_COMM_STATE | CHANGE_TX_ISOC_COMM_STATE; + + reg = cpu_to_be32(data); + snd_motu_transaction_write(motu, ISOC_COMM_CONTROL_OFFSET, ®, + sizeof(reg)); + + fw_iso_resources_free(&motu->tx_resources); + fw_iso_resources_free(&motu->rx_resources); +} + +static int start_isoc_ctx(struct snd_motu *motu, struct amdtp_stream *stream) +{ + struct fw_iso_resources *resources; + int err; + + if (stream == &motu->rx_stream) + resources = &motu->rx_resources; + else + resources = &motu->tx_resources; + + err = amdtp_stream_start(stream, resources->channel, + fw_parent_device(motu->unit)->max_speed); + if (err < 0) + return err; + + if (!amdtp_stream_wait_callback(stream, CALLBACK_TIMEOUT)) { + amdtp_stream_stop(stream); + fw_iso_resources_free(resources); + return -ETIMEDOUT; + } + + return 0; +} + +static void stop_isoc_ctx(struct snd_motu *motu, struct amdtp_stream *stream) +{ + struct fw_iso_resources *resources; + + if (stream == &motu->rx_stream) + resources = &motu->rx_resources; + else + resources = &motu->tx_resources; + + amdtp_stream_stop(stream); + fw_iso_resources_free(resources); +} + +static int ensure_packet_formats(struct snd_motu *motu) +{ + __be32 reg; + u32 data; + int err; + + err = snd_motu_transaction_read(motu, PACKET_FORMAT_OFFSET, ®, + sizeof(reg)); + if (err < 0) + return err; + data = be32_to_cpu(reg); + + data &= ~(TX_PACKET_EXCLUDE_DIFFERED_DATA_CHUNKS | + RX_PACKET_EXCLUDE_DIFFERED_DATA_CHUNKS| + TX_PACKET_TRANSMISSION_SPEED_MASK); + if (motu->tx_packet_formats.differed_part_pcm_chunks[0] == 0) + data |= TX_PACKET_EXCLUDE_DIFFERED_DATA_CHUNKS; + if (motu->rx_packet_formats.differed_part_pcm_chunks[0] == 0) + data |= RX_PACKET_EXCLUDE_DIFFERED_DATA_CHUNKS; + data |= fw_parent_device(motu->unit)->max_speed; + + reg = cpu_to_be32(data); + return snd_motu_transaction_write(motu, PACKET_FORMAT_OFFSET, ®, + sizeof(reg)); +} + +int snd_motu_stream_start_duplex(struct snd_motu *motu, unsigned int rate) +{ + const struct snd_motu_protocol *protocol = motu->spec->protocol; + unsigned int curr_rate; + int err = 0; + + if (motu->capture_substreams == 0 && motu->playback_substreams == 0) + return 0; + + /* Some packet queueing errors. */ + if (amdtp_streaming_error(&motu->rx_stream) || + amdtp_streaming_error(&motu->tx_stream)) { + amdtp_stream_stop(&motu->rx_stream); + amdtp_stream_stop(&motu->tx_stream); + stop_both_streams(motu); + } + + err = protocol->cache_packet_formats(motu); + if (err < 0) + return err; + + /* Stop stream if rate is different. */ + err = protocol->get_clock_rate(motu, &curr_rate); + if (err < 0) { + dev_err(&motu->unit->device, + "fail to get sampling rate: %d\n", err); + return err; + } + if (rate == 0) + rate = curr_rate; + if (rate != curr_rate) { + amdtp_stream_stop(&motu->rx_stream); + amdtp_stream_stop(&motu->tx_stream); + stop_both_streams(motu); + } + + if (!amdtp_stream_running(&motu->rx_stream)) { + err = protocol->set_clock_rate(motu, rate); + if (err < 0) { + dev_err(&motu->unit->device, + "fail to set sampling rate: %d\n", err); + return err; + } + + err = ensure_packet_formats(motu); + if (err < 0) + return err; + + err = start_both_streams(motu, rate); + if (err < 0) { + dev_err(&motu->unit->device, + "fail to start isochronous comm: %d\n", err); + stop_both_streams(motu); + return err; + } + + err = start_isoc_ctx(motu, &motu->rx_stream); + if (err < 0) { + dev_err(&motu->unit->device, + "fail to start IT context: %d\n", err); + stop_both_streams(motu); + return err; + } + + err = protocol->switch_fetching_mode(motu, true); + if (err < 0) { + dev_err(&motu->unit->device, + "fail to enable frame fetching: %d\n", err); + stop_both_streams(motu); + return err; + } + } + + if (!amdtp_stream_running(&motu->tx_stream) && + motu->capture_substreams > 0) { + err = start_isoc_ctx(motu, &motu->tx_stream); + if (err < 0) { + dev_err(&motu->unit->device, + "fail to start IR context: %d", err); + amdtp_stream_stop(&motu->rx_stream); + stop_both_streams(motu); + return err; + } + } + + return 0; +} + +void snd_motu_stream_stop_duplex(struct snd_motu *motu) +{ + if (motu->capture_substreams == 0) { + if (amdtp_stream_running(&motu->tx_stream)) + stop_isoc_ctx(motu, &motu->tx_stream); + + if (motu->playback_substreams == 0) { + if (amdtp_stream_running(&motu->rx_stream)) + stop_isoc_ctx(motu, &motu->rx_stream); + stop_both_streams(motu); + } + } +} + +static int init_stream(struct snd_motu *motu, enum amdtp_stream_direction dir) +{ + int err; + struct amdtp_stream *stream; + struct fw_iso_resources *resources; + + if (dir == AMDTP_IN_STREAM) { + stream = &motu->tx_stream; + resources = &motu->tx_resources; + } else { + stream = &motu->rx_stream; + resources = &motu->rx_resources; + } + + err = fw_iso_resources_init(resources, motu->unit); + if (err < 0) + return err; + + err = amdtp_motu_init(stream, motu->unit, dir, motu->spec->protocol); + if (err < 0) { + amdtp_stream_destroy(stream); + fw_iso_resources_destroy(resources); + } + + return err; +} + +static void destroy_stream(struct snd_motu *motu, + enum amdtp_stream_direction dir) +{ + struct amdtp_stream *stream; + struct fw_iso_resources *resources; + + if (dir == AMDTP_IN_STREAM) { + stream = &motu->tx_stream; + resources = &motu->tx_resources; + } else { + stream = &motu->rx_stream; + resources = &motu->rx_resources; + } + + amdtp_stream_destroy(stream); + fw_iso_resources_free(resources); +} + +int snd_motu_stream_init_duplex(struct snd_motu *motu) +{ + int err; + + err = init_stream(motu, AMDTP_IN_STREAM); + if (err < 0) + return err; + + err = init_stream(motu, AMDTP_OUT_STREAM); + if (err < 0) + destroy_stream(motu, AMDTP_IN_STREAM); + + return err; +} + +/* + * This function should be called before starting streams or after stopping + * streams. + */ +void snd_motu_stream_destroy_duplex(struct snd_motu *motu) +{ + destroy_stream(motu, AMDTP_IN_STREAM); + destroy_stream(motu, AMDTP_OUT_STREAM); + + motu->playback_substreams = 0; + motu->capture_substreams = 0; +} + +static void motu_lock_changed(struct snd_motu *motu) +{ + motu->dev_lock_changed = true; + wake_up(&motu->hwdep_wait); +} + +int snd_motu_stream_lock_try(struct snd_motu *motu) +{ + int err; + + spin_lock_irq(&motu->lock); + + if (motu->dev_lock_count < 0) { + err = -EBUSY; + goto out; + } + + if (motu->dev_lock_count++ == 0) + motu_lock_changed(motu); + err = 0; +out: + spin_unlock_irq(&motu->lock); + return err; +} + +void snd_motu_stream_lock_release(struct snd_motu *motu) +{ + spin_lock_irq(&motu->lock); + + if (WARN_ON(motu->dev_lock_count <= 0)) + goto out; + + if (--motu->dev_lock_count == 0) + motu_lock_changed(motu); +out: + spin_unlock_irq(&motu->lock); +} diff --git a/sound/firewire/motu/motu-transaction.c b/sound/firewire/motu/motu-transaction.c new file mode 100644 index 0000000000000000000000000000000000000000..7fc30091e0deef80656382ce4c2cdf6423dbc816 --- /dev/null +++ b/sound/firewire/motu/motu-transaction.c @@ -0,0 +1,137 @@ +/* + * motu-transaction.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + + +#include "motu.h" + +#define SND_MOTU_ADDR_BASE 0xfffff0000000ULL +#define ASYNC_ADDR_HI 0x0b04 +#define ASYNC_ADDR_LO 0x0b08 + +int snd_motu_transaction_read(struct snd_motu *motu, u32 offset, __be32 *reg, + size_t size) +{ + int tcode; + + if (size % sizeof(__be32) > 0 || size <= 0) + return -EINVAL; + if (size == sizeof(__be32)) + tcode = TCODE_READ_QUADLET_REQUEST; + else + tcode = TCODE_READ_BLOCK_REQUEST; + + return snd_fw_transaction(motu->unit, tcode, + SND_MOTU_ADDR_BASE + offset, reg, size, 0); +} + +int snd_motu_transaction_write(struct snd_motu *motu, u32 offset, __be32 *reg, + size_t size) +{ + int tcode; + + if (size % sizeof(__be32) > 0 || size <= 0) + return -EINVAL; + if (size == sizeof(__be32)) + tcode = TCODE_WRITE_QUADLET_REQUEST; + else + tcode = TCODE_WRITE_BLOCK_REQUEST; + + return snd_fw_transaction(motu->unit, tcode, + SND_MOTU_ADDR_BASE + offset, reg, size, 0); +} + +static void handle_message(struct fw_card *card, struct fw_request *request, + int tcode, int destination, int source, + int generation, unsigned long long offset, + void *data, size_t length, void *callback_data) +{ + struct snd_motu *motu = callback_data; + __be32 *buf = (__be32 *)data; + unsigned long flags; + + if (tcode != TCODE_WRITE_QUADLET_REQUEST) { + fw_send_response(card, request, RCODE_COMPLETE); + return; + } + + if (offset != motu->async_handler.offset || length != 4) { + fw_send_response(card, request, RCODE_ADDRESS_ERROR); + return; + } + + spin_lock_irqsave(&motu->lock, flags); + motu->msg = be32_to_cpu(*buf); + spin_unlock_irqrestore(&motu->lock, flags); + + fw_send_response(card, request, RCODE_COMPLETE); + + wake_up(&motu->hwdep_wait); +} + +int snd_motu_transaction_reregister(struct snd_motu *motu) +{ + struct fw_device *device = fw_parent_device(motu->unit); + __be32 data; + int err; + + if (motu->async_handler.callback_data == NULL) + return -EINVAL; + + /* Register messaging address. Block transaction is not allowed. */ + data = cpu_to_be32((device->card->node_id << 16) | + (motu->async_handler.offset >> 32)); + err = snd_motu_transaction_write(motu, ASYNC_ADDR_HI, &data, + sizeof(data)); + if (err < 0) + return err; + + data = cpu_to_be32(motu->async_handler.offset); + return snd_motu_transaction_write(motu, ASYNC_ADDR_LO, &data, + sizeof(data)); +} + +int snd_motu_transaction_register(struct snd_motu *motu) +{ + static const struct fw_address_region resp_register_region = { + .start = 0xffffe0000000ull, + .end = 0xffffe000ffffull, + }; + int err; + + /* Perhaps, 4 byte messages are transferred. */ + motu->async_handler.length = 4; + motu->async_handler.address_callback = handle_message; + motu->async_handler.callback_data = motu; + + err = fw_core_add_address_handler(&motu->async_handler, + &resp_register_region); + if (err < 0) + return err; + + err = snd_motu_transaction_reregister(motu); + if (err < 0) { + fw_core_remove_address_handler(&motu->async_handler); + motu->async_handler.address_callback = NULL; + } + + return err; +} + +void snd_motu_transaction_unregister(struct snd_motu *motu) +{ + __be32 data; + + if (motu->async_handler.address_callback != NULL) + fw_core_remove_address_handler(&motu->async_handler); + motu->async_handler.address_callback = NULL; + + /* Unregister the address. */ + data = cpu_to_be32(0x00000000); + snd_motu_transaction_write(motu, ASYNC_ADDR_HI, &data, sizeof(data)); + snd_motu_transaction_write(motu, ASYNC_ADDR_LO, &data, sizeof(data)); +} diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c new file mode 100644 index 0000000000000000000000000000000000000000..bf779cfeef0dfaea62ea5684997314a9fe02d4c0 --- /dev/null +++ b/sound/firewire/motu/motu.c @@ -0,0 +1,264 @@ +/* + * motu.c - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#include "motu.h" + +#define OUI_MOTU 0x0001f2 + +MODULE_DESCRIPTION("MOTU FireWire driver"); +MODULE_AUTHOR("Takashi Sakamoto "); +MODULE_LICENSE("GPL v2"); + +const unsigned int snd_motu_clock_rates[SND_MOTU_CLOCK_RATE_COUNT] = { + /* mode 0 */ + [0] = 44100, + [1] = 48000, + /* mode 1 */ + [2] = 88200, + [3] = 96000, + /* mode 2 */ + [4] = 176400, + [5] = 192000, +}; + +static void name_card(struct snd_motu *motu) +{ + struct fw_device *fw_dev = fw_parent_device(motu->unit); + struct fw_csr_iterator it; + int key, val; + u32 version = 0; + + fw_csr_iterator_init(&it, motu->unit->directory); + while (fw_csr_iterator_next(&it, &key, &val)) { + switch (key) { + case CSR_VERSION: + version = val; + break; + } + } + + strcpy(motu->card->driver, "FW-MOTU"); + strcpy(motu->card->shortname, motu->spec->name); + strcpy(motu->card->mixername, motu->spec->name); + snprintf(motu->card->longname, sizeof(motu->card->longname), + "MOTU %s (version:%d), GUID %08x%08x at %s, S%d", + motu->spec->name, version, + fw_dev->config_rom[3], fw_dev->config_rom[4], + dev_name(&motu->unit->device), 100 << fw_dev->max_speed); +} + +static void motu_free(struct snd_motu *motu) +{ + snd_motu_transaction_unregister(motu); + + snd_motu_stream_destroy_duplex(motu); + fw_unit_put(motu->unit); + + mutex_destroy(&motu->mutex); + kfree(motu); +} + +/* + * This module releases the FireWire unit data after all ALSA character devices + * are released by applications. This is for releasing stream data or finishing + * transactions safely. Thus at returning from .remove(), this module still keep + * references for the unit. + */ +static void motu_card_free(struct snd_card *card) +{ + motu_free(card->private_data); +} + +static void do_registration(struct work_struct *work) +{ + struct snd_motu *motu = container_of(work, struct snd_motu, dwork.work); + int err; + + if (motu->registered) + return; + + err = snd_card_new(&motu->unit->device, -1, NULL, THIS_MODULE, 0, + &motu->card); + if (err < 0) + return; + + name_card(motu); + + err = snd_motu_transaction_register(motu); + if (err < 0) + goto error; + + err = snd_motu_stream_init_duplex(motu); + if (err < 0) + goto error; + + snd_motu_proc_init(motu); + + err = snd_motu_create_pcm_devices(motu); + if (err < 0) + goto error; + + if (motu->spec->flags & SND_MOTU_SPEC_HAS_MIDI) { + err = snd_motu_create_midi_devices(motu); + if (err < 0) + goto error; + } + + err = snd_motu_create_hwdep_device(motu); + if (err < 0) + goto error; + + err = snd_card_register(motu->card); + if (err < 0) + goto error; + + /* + * After registered, motu instance can be released corresponding to + * releasing the sound card instance. + */ + motu->card->private_free = motu_card_free; + motu->card->private_data = motu; + motu->registered = true; + + return; +error: + snd_motu_transaction_unregister(motu); + snd_card_free(motu->card); + dev_info(&motu->unit->device, + "Sound card registration failed: %d\n", err); +} + +static int motu_probe(struct fw_unit *unit, + const struct ieee1394_device_id *entry) +{ + struct snd_motu *motu; + + /* Allocate this independently of sound card instance. */ + motu = kzalloc(sizeof(struct snd_motu), GFP_KERNEL); + if (motu == NULL) + return -ENOMEM; + + motu->spec = (const struct snd_motu_spec *)entry->driver_data; + motu->unit = fw_unit_get(unit); + dev_set_drvdata(&unit->device, motu); + + mutex_init(&motu->mutex); + spin_lock_init(&motu->lock); + init_waitqueue_head(&motu->hwdep_wait); + + /* Allocate and register this sound card later. */ + INIT_DEFERRABLE_WORK(&motu->dwork, do_registration); + snd_fw_schedule_registration(unit, &motu->dwork); + + return 0; +} + +static void motu_remove(struct fw_unit *unit) +{ + struct snd_motu *motu = dev_get_drvdata(&unit->device); + + /* + * Confirm to stop the work for registration before the sound card is + * going to be released. The work is not scheduled again because bus + * reset handler is not called anymore. + */ + cancel_delayed_work_sync(&motu->dwork); + + if (motu->registered) { + /* No need to wait for releasing card object in this context. */ + snd_card_free_when_closed(motu->card); + } else { + /* Don't forget this case. */ + motu_free(motu); + } +} + +static void motu_bus_update(struct fw_unit *unit) +{ + struct snd_motu *motu = dev_get_drvdata(&unit->device); + + /* Postpone a workqueue for deferred registration. */ + if (!motu->registered) + snd_fw_schedule_registration(unit, &motu->dwork); + + /* The handler address register becomes initialized. */ + snd_motu_transaction_reregister(motu); +} + +static struct snd_motu_spec motu_828mk2 = { + .name = "828mk2", + .protocol = &snd_motu_protocol_v2, + .flags = SND_MOTU_SPEC_SUPPORT_CLOCK_X2 | + SND_MOTU_SPEC_TX_MICINST_CHUNK | + SND_MOTU_SPEC_TX_RETURN_CHUNK | + SND_MOTU_SPEC_HAS_OPT_IFACE_A | + SND_MOTU_SPEC_HAS_MIDI, + + .analog_in_ports = 8, + .analog_out_ports = 8, +}; + +static struct snd_motu_spec motu_828mk3 = { + .name = "828mk3", + .protocol = &snd_motu_protocol_v3, + .flags = SND_MOTU_SPEC_SUPPORT_CLOCK_X2 | + SND_MOTU_SPEC_SUPPORT_CLOCK_X4 | + SND_MOTU_SPEC_TX_MICINST_CHUNK | + SND_MOTU_SPEC_TX_RETURN_CHUNK | + SND_MOTU_SPEC_TX_REVERB_CHUNK | + SND_MOTU_SPEC_HAS_OPT_IFACE_A | + SND_MOTU_SPEC_HAS_OPT_IFACE_B | + SND_MOTU_SPEC_HAS_MIDI, + + .analog_in_ports = 8, + .analog_out_ports = 8, +}; + +#define SND_MOTU_DEV_ENTRY(model, data) \ +{ \ + .match_flags = IEEE1394_MATCH_VENDOR_ID | \ + IEEE1394_MATCH_MODEL_ID | \ + IEEE1394_MATCH_SPECIFIER_ID, \ + .vendor_id = OUI_MOTU, \ + .model_id = model, \ + .specifier_id = OUI_MOTU, \ + .driver_data = (kernel_ulong_t)data, \ +} + +static const struct ieee1394_device_id motu_id_table[] = { + SND_MOTU_DEV_ENTRY(0x101800, &motu_828mk2), + SND_MOTU_DEV_ENTRY(0x106800, &motu_828mk3), /* FireWire only. */ + SND_MOTU_DEV_ENTRY(0x100800, &motu_828mk3), /* Hybrid. */ + { } +}; +MODULE_DEVICE_TABLE(ieee1394, motu_id_table); + +static struct fw_driver motu_driver = { + .driver = { + .owner = THIS_MODULE, + .name = KBUILD_MODNAME, + .bus = &fw_bus_type, + }, + .probe = motu_probe, + .update = motu_bus_update, + .remove = motu_remove, + .id_table = motu_id_table, +}; + +static int __init alsa_motu_init(void) +{ + return driver_register(&motu_driver.driver); +} + +static void __exit alsa_motu_exit(void) +{ + driver_unregister(&motu_driver.driver); +} + +module_init(alsa_motu_init); +module_exit(alsa_motu_exit); diff --git a/sound/firewire/motu/motu.h b/sound/firewire/motu/motu.h new file mode 100644 index 0000000000000000000000000000000000000000..8d6a4a3af9cc136056a380d39cbefbfcc397c4cc --- /dev/null +++ b/sound/firewire/motu/motu.h @@ -0,0 +1,161 @@ +/* + * motu.h - a part of driver for MOTU FireWire series + * + * Copyright (c) 2015-2017 Takashi Sakamoto + * + * Licensed under the terms of the GNU General Public License, version 2. + */ + +#ifndef SOUND_FIREWIRE_MOTU_H_INCLUDED +#define SOUND_FIREWIRE_MOTU_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../lib.h" +#include "../amdtp-stream.h" +#include "../iso-resources.h" + +struct snd_motu_packet_format { + unsigned char midi_flag_offset; + unsigned char midi_byte_offset; + unsigned char pcm_byte_offset; + + unsigned char msg_chunks; + unsigned char fixed_part_pcm_chunks[3]; + unsigned char differed_part_pcm_chunks[3]; +}; + +struct snd_motu { + struct snd_card *card; + struct fw_unit *unit; + struct mutex mutex; + spinlock_t lock; + + bool registered; + struct delayed_work dwork; + + /* Model dependent information. */ + const struct snd_motu_spec *spec; + + /* For packet streaming */ + struct snd_motu_packet_format tx_packet_formats; + struct snd_motu_packet_format rx_packet_formats; + struct amdtp_stream tx_stream; + struct amdtp_stream rx_stream; + struct fw_iso_resources tx_resources; + struct fw_iso_resources rx_resources; + unsigned int capture_substreams; + unsigned int playback_substreams; + + /* For notification. */ + struct fw_address_handler async_handler; + u32 msg; + + /* For uapi */ + int dev_lock_count; + bool dev_lock_changed; + wait_queue_head_t hwdep_wait; +}; + +enum snd_motu_spec_flags { + SND_MOTU_SPEC_SUPPORT_CLOCK_X2 = 0x0001, + SND_MOTU_SPEC_SUPPORT_CLOCK_X4 = 0x0002, + SND_MOTU_SPEC_TX_MICINST_CHUNK = 0x0004, + SND_MOTU_SPEC_TX_RETURN_CHUNK = 0x0008, + SND_MOTU_SPEC_TX_REVERB_CHUNK = 0x0010, + SND_MOTU_SPEC_TX_AESEBU_CHUNK = 0x0020, + SND_MOTU_SPEC_HAS_OPT_IFACE_A = 0x0040, + SND_MOTU_SPEC_HAS_OPT_IFACE_B = 0x0080, + SND_MOTU_SPEC_HAS_MIDI = 0x0100, +}; + +#define SND_MOTU_CLOCK_RATE_COUNT 6 +extern const unsigned int snd_motu_clock_rates[SND_MOTU_CLOCK_RATE_COUNT]; + +enum snd_motu_clock_source { + SND_MOTU_CLOCK_SOURCE_INTERNAL, + SND_MOTU_CLOCK_SOURCE_ADAT_ON_DSUB, + SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT, + SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT_A, + SND_MOTU_CLOCK_SOURCE_ADAT_ON_OPT_B, + SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT, + SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_A, + SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_B, + SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX, + SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR, + SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC, + SND_MOTU_CLOCK_SOURCE_UNKNOWN, +}; + +struct snd_motu_protocol { + int (*get_clock_rate)(struct snd_motu *motu, unsigned int *rate); + int (*set_clock_rate)(struct snd_motu *motu, unsigned int rate); + int (*get_clock_source)(struct snd_motu *motu, + enum snd_motu_clock_source *source); + int (*switch_fetching_mode)(struct snd_motu *motu, bool enable); + int (*cache_packet_formats)(struct snd_motu *motu); +}; + +struct snd_motu_spec { + const char *const name; + enum snd_motu_spec_flags flags; + + unsigned char analog_in_ports; + unsigned char analog_out_ports; + + const struct snd_motu_protocol *const protocol; +}; + +extern const struct snd_motu_protocol snd_motu_protocol_v2; +extern const struct snd_motu_protocol snd_motu_protocol_v3; + +int amdtp_motu_init(struct amdtp_stream *s, struct fw_unit *unit, + enum amdtp_stream_direction dir, + const struct snd_motu_protocol *const protocol); +int amdtp_motu_set_parameters(struct amdtp_stream *s, unsigned int rate, + unsigned int midi_ports, + struct snd_motu_packet_format *formats); +int amdtp_motu_add_pcm_hw_constraints(struct amdtp_stream *s, + struct snd_pcm_runtime *runtime); +void amdtp_motu_midi_trigger(struct amdtp_stream *s, unsigned int port, + struct snd_rawmidi_substream *midi); + +int snd_motu_transaction_read(struct snd_motu *motu, u32 offset, __be32 *reg, + size_t size); +int snd_motu_transaction_write(struct snd_motu *motu, u32 offset, __be32 *reg, + size_t size); +int snd_motu_transaction_register(struct snd_motu *motu); +int snd_motu_transaction_reregister(struct snd_motu *motu); +void snd_motu_transaction_unregister(struct snd_motu *motu); + +int snd_motu_stream_init_duplex(struct snd_motu *motu); +void snd_motu_stream_destroy_duplex(struct snd_motu *motu); +int snd_motu_stream_start_duplex(struct snd_motu *motu, unsigned int rate); +void snd_motu_stream_stop_duplex(struct snd_motu *motu); +int snd_motu_stream_lock_try(struct snd_motu *motu); +void snd_motu_stream_lock_release(struct snd_motu *motu); + +void snd_motu_proc_init(struct snd_motu *motu); + +int snd_motu_create_pcm_devices(struct snd_motu *motu); + +int snd_motu_create_midi_devices(struct snd_motu *motu); + +int snd_motu_create_hwdep_device(struct snd_motu *motu); +#endif diff --git a/sound/firewire/oxfw/oxfw-command.c b/sound/firewire/oxfw/oxfw-command.c index 12ef3253bc890391767a2c59785531176e8a8a67..ac3e2e3016662503422fa9cec0a5e1cdb785206b 100644 --- a/sound/firewire/oxfw/oxfw-command.c +++ b/sound/firewire/oxfw/oxfw-command.c @@ -34,7 +34,9 @@ int avc_stream_set_format(struct fw_unit *unit, enum avc_general_plug_dir dir, err = fcp_avc_transaction(unit, buf, len + 10, buf, len + 10, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(8)); - if ((err > 0) && (err < len + 10)) + if (err < 0) + ; + else if (err < len + 10) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -77,7 +79,9 @@ int avc_stream_get_format(struct fw_unit *unit, err = fcp_avc_transaction(unit, buf, 12, buf, *len, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5) | BIT(6) | BIT(7)); - if ((err > 0) && (err < 10)) + if (err < 0) + ; + else if (err < 12) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; @@ -139,7 +143,9 @@ int avc_general_inquiry_sig_fmt(struct fw_unit *unit, unsigned int rate, /* do transaction and check buf[1-5] are the same against command */ err = fcp_avc_transaction(unit, buf, 8, buf, 8, BIT(1) | BIT(2) | BIT(3) | BIT(4) | BIT(5)); - if ((err > 0) && (err < 8)) + if (err < 0) + ; + else if (err < 8) err = -EIO; else if (buf[0] == 0x08) /* NOT IMPLEMENTED */ err = -ENOSYS; diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 74d7fb6efce6ca8258ece95a83c3460588a3e99e..413ab6313bb66515c284734ca92c87bef014c5fd 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -227,11 +227,11 @@ static void do_registration(struct work_struct *work) if (err < 0) goto error; - err = detect_quirks(oxfw); + err = snd_oxfw_stream_discover(oxfw); if (err < 0) goto error; - err = snd_oxfw_stream_discover(oxfw); + err = detect_quirks(oxfw); if (err < 0) goto error; diff --git a/sound/firewire/tascam/tascam-midi.c b/sound/firewire/tascam/tascam-midi.c index df4f95d65925ec36099123f09fb15c27f5ec1b17..4a741570d536d394d047969d51faf871cc81e5b2 100644 --- a/sound/firewire/tascam/tascam-midi.c +++ b/sound/firewire/tascam/tascam-midi.c @@ -18,9 +18,8 @@ static int midi_playback_open(struct snd_rawmidi_substream *substream) { struct snd_tscm *tscm = substream->rmidi->private_data; - /* Initialize internal status. */ - tscm->running_status[substream->number] = 0; - tscm->on_sysex[substream->number] = 0; + snd_fw_async_midi_port_init(&tscm->out_ports[substream->number]); + return 0; } @@ -31,12 +30,15 @@ static int midi_capture_close(struct snd_rawmidi_substream *substream) } static int midi_playback_close(struct snd_rawmidi_substream *substream) +{ + return 0; +} + +static void midi_playback_drain(struct snd_rawmidi_substream *substream) { struct snd_tscm *tscm = substream->rmidi->private_data; snd_fw_async_midi_port_finish(&tscm->out_ports[substream->number]); - - return 0; } static void midi_capture_trigger(struct snd_rawmidi_substream *substrm, int up) @@ -78,6 +80,7 @@ int snd_tscm_create_midi_devices(struct snd_tscm *tscm) static const struct snd_rawmidi_ops playback_ops = { .open = midi_playback_open, .close = midi_playback_close, + .drain = midi_playback_drain, .trigger = midi_playback_trigger, }; struct snd_rawmidi *rmidi; diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c index 040a96d1ba8ec1fa1dbfa4521a46e7efb865f435..8967c52f503284a2d610352fcc7324a53a088b6d 100644 --- a/sound/firewire/tascam/tascam-transaction.c +++ b/sound/firewire/tascam/tascam-transaction.c @@ -58,39 +58,38 @@ static inline int calculate_message_bytes(u8 status) return -EINVAL; } -static int fill_message(struct snd_rawmidi_substream *substream, u8 *buf) +static int fill_message(struct snd_fw_async_midi_port *port, + struct snd_rawmidi_substream *substream) { - struct snd_tscm *tscm = substream->rmidi->private_data; - unsigned int port = substream->number; int i, len, consume; u8 *label, *msg; u8 status; /* The first byte is used for label, the rest for MIDI bytes. */ - label = buf; - msg = buf + 1; + label = port->buf; + msg = port->buf + 1; consume = snd_rawmidi_transmit_peek(substream, msg, 3); if (consume == 0) return 0; /* On exclusive message. */ - if (tscm->on_sysex[port]) { + if (port->on_sysex) { /* Seek the end of exclusives. */ for (i = 0; i < consume; ++i) { if (msg[i] == 0xf7) { - tscm->on_sysex[port] = false; + port->on_sysex = false; break; } } /* At the end of exclusive message, use label 0x07. */ - if (!tscm->on_sysex[port]) { + if (!port->on_sysex) { consume = i + 1; - *label = (port << 4) | 0x07; + *label = (substream->number << 4) | 0x07; /* During exclusive message, use label 0x04. */ } else if (consume == 3) { - *label = (port << 4) | 0x04; + *label = (substream->number << 4) | 0x04; /* We need to fill whole 3 bytes. Go to next change. */ } else { return 0; @@ -101,12 +100,12 @@ static int fill_message(struct snd_rawmidi_substream *substream, u8 *buf) /* The beginning of exclusives. */ if (msg[0] == 0xf0) { /* Transfer it in next chance in another condition. */ - tscm->on_sysex[port] = true; + port->on_sysex = true; return 0; } else { /* On running-status. */ if ((msg[0] & 0x80) != 0x80) - status = tscm->running_status[port]; + status = port->running_status; else status = msg[0]; @@ -124,18 +123,18 @@ static int fill_message(struct snd_rawmidi_substream *substream, u8 *buf) msg[2] = msg[1]; msg[1] = msg[0]; - msg[0] = tscm->running_status[port]; + msg[0] = port->running_status; } else { /* Enough MIDI bytes were not retrieved. */ if (consume < len) return 0; consume = len; - tscm->running_status[port] = msg[0]; + port->running_status = msg[0]; } } - *label = (port << 4) | (msg[0] >> 4); + *label = (substream->number << 4) | (msg[0] >> 4); } if (len > 0 && len < 3) @@ -144,6 +143,106 @@ static int fill_message(struct snd_rawmidi_substream *substream, u8 *buf) return consume; } +static void async_midi_port_callback(struct fw_card *card, int rcode, + void *data, size_t length, + void *callback_data) +{ + struct snd_fw_async_midi_port *port = callback_data; + struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); + + /* This port is closed. */ + if (substream == NULL) + return; + + if (rcode == RCODE_COMPLETE) + snd_rawmidi_transmit_ack(substream, port->consume_bytes); + else if (!rcode_is_permanent_error(rcode)) + /* To start next transaction immediately for recovery. */ + port->next_ktime = 0; + else + /* Don't continue processing. */ + port->error = true; + + port->idling = true; + + if (!snd_rawmidi_transmit_empty(substream)) + schedule_work(&port->work); +} + +static void midi_port_work(struct work_struct *work) +{ + struct snd_fw_async_midi_port *port = + container_of(work, struct snd_fw_async_midi_port, work); + struct snd_rawmidi_substream *substream = ACCESS_ONCE(port->substream); + int generation; + + /* Under transacting or error state. */ + if (!port->idling || port->error) + return; + + /* Nothing to do. */ + if (substream == NULL || snd_rawmidi_transmit_empty(substream)) + return; + + /* Do it in next chance. */ + if (ktime_after(port->next_ktime, ktime_get())) { + schedule_work(&port->work); + return; + } + + /* + * Fill the buffer. The callee must use snd_rawmidi_transmit_peek(). + * Later, snd_rawmidi_transmit_ack() is called. + */ + memset(port->buf, 0, 4); + port->consume_bytes = fill_message(port, substream); + if (port->consume_bytes <= 0) { + /* Do it in next chance, immediately. */ + if (port->consume_bytes == 0) { + port->next_ktime = 0; + schedule_work(&port->work); + } else { + /* Fatal error. */ + port->error = true; + } + return; + } + + /* Set interval to next transaction. */ + port->next_ktime = ktime_add_ns(ktime_get(), + port->consume_bytes * 8 * NSEC_PER_SEC / 31250); + + /* Start this transaction. */ + port->idling = false; + + /* + * In Linux FireWire core, when generation is updated with memory + * barrier, node id has already been updated. In this module, After + * this smp_rmb(), load/store instructions to memory are completed. + * Thus, both of generation and node id are available with recent + * values. This is a light-serialization solution to handle bus reset + * events on IEEE 1394 bus. + */ + generation = port->parent->generation; + smp_rmb(); + + fw_send_request(port->parent->card, &port->transaction, + TCODE_WRITE_QUADLET_REQUEST, + port->parent->node_id, generation, + port->parent->max_speed, + TSCM_ADDR_BASE + TSCM_OFFSET_MIDI_RX_QUAD, + port->buf, 4, async_midi_port_callback, + port); +} + +void snd_fw_async_midi_port_init(struct snd_fw_async_midi_port *port) +{ + port->idling = true; + port->error = false; + port->running_status = 0; + port->on_sysex = false; +} + static void handle_midi_tx(struct fw_card *card, struct fw_request *request, int tcode, int destination, int source, int generation, unsigned long long offset, @@ -219,12 +318,9 @@ int snd_tscm_transaction_register(struct snd_tscm *tscm) goto error; for (i = 0; i < TSCM_MIDI_OUT_PORT_MAX; i++) { - err = snd_fw_async_midi_port_init( - &tscm->out_ports[i], tscm->unit, - TSCM_ADDR_BASE + TSCM_OFFSET_MIDI_RX_QUAD, - 4, fill_message); - if (err < 0) - goto error; + tscm->out_ports[i].parent = fw_parent_device(tscm->unit); + tscm->out_ports[i].next_ktime = 0; + INIT_WORK(&tscm->out_ports[i].work, midi_port_work); } return err; @@ -275,7 +371,6 @@ int snd_tscm_transaction_reregister(struct snd_tscm *tscm) void snd_tscm_transaction_unregister(struct snd_tscm *tscm) { __be32 reg; - unsigned int i; if (tscm->async_handler.callback_data == NULL) return; @@ -302,7 +397,4 @@ void snd_tscm_transaction_unregister(struct snd_tscm *tscm) fw_core_remove_address_handler(&tscm->async_handler); tscm->async_handler.callback_data = NULL; - - for (i = 0; i < TSCM_MIDI_OUT_PORT_MAX; i++) - snd_fw_async_midi_port_destroy(&tscm->out_ports[i]); } diff --git a/sound/firewire/tascam/tascam.h b/sound/firewire/tascam/tascam.h index d3cd4065722b332166416db0dba49cff586fa8e1..08ecfae5c5840aa3082b85c1ed588f2d323ab4df 100644 --- a/sound/firewire/tascam/tascam.h +++ b/sound/firewire/tascam/tascam.h @@ -45,6 +45,23 @@ struct snd_tscm_spec { #define TSCM_MIDI_IN_PORT_MAX 4 #define TSCM_MIDI_OUT_PORT_MAX 4 +struct snd_fw_async_midi_port { + struct fw_device *parent; + struct work_struct work; + bool idling; + ktime_t next_ktime; + bool error; + + struct fw_transaction transaction; + + u8 buf[4]; + u8 running_status; + bool on_sysex; + + struct snd_rawmidi_substream *substream; + int consume_bytes; +}; + struct snd_tscm { struct snd_card *card; struct fw_unit *unit; @@ -72,8 +89,6 @@ struct snd_tscm { /* For MIDI message outgoing transactions. */ struct snd_fw_async_midi_port out_ports[TSCM_MIDI_OUT_PORT_MAX]; - u8 running_status[TSCM_MIDI_OUT_PORT_MAX]; - bool on_sysex[TSCM_MIDI_OUT_PORT_MAX]; }; #define TSCM_ADDR_BASE 0xffff00000000ull @@ -131,6 +146,26 @@ void snd_tscm_stream_lock_changed(struct snd_tscm *tscm); int snd_tscm_stream_lock_try(struct snd_tscm *tscm); void snd_tscm_stream_lock_release(struct snd_tscm *tscm); +void snd_fw_async_midi_port_init(struct snd_fw_async_midi_port *port); + +static inline void +snd_fw_async_midi_port_run(struct snd_fw_async_midi_port *port, + struct snd_rawmidi_substream *substream) +{ + if (!port->error) { + port->substream = substream; + schedule_work(&port->work); + } +} + +static inline void +snd_fw_async_midi_port_finish(struct snd_fw_async_midi_port *port) +{ + port->substream = NULL; + cancel_work_sync(&port->work); + port->error = false; +} + int snd_tscm_transaction_register(struct snd_tscm *tscm); int snd_tscm_transaction_reregister(struct snd_tscm *tscm); void snd_tscm_transaction_unregister(struct snd_tscm *tscm); diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 261469188566c95eb1677266b8c92a7f34d53ad5..84f3b81687164d419b4cff063011d774790273f3 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -171,7 +171,7 @@ static int check_hdac_link_power_active(struct hdac_ext_link *link, bool enable) { int timeout; u32 val; - int mask = (1 << AZX_MLCTL_CPA); + int mask = (1 << AZX_MLCTL_CPA_SHIFT); udelay(3); timeout = 150; @@ -179,10 +179,10 @@ static int check_hdac_link_power_active(struct hdac_ext_link *link, bool enable) do { val = readl(link->ml_addr + AZX_REG_ML_LCTL); if (enable) { - if (((val & mask) >> AZX_MLCTL_CPA)) + if (((val & mask) >> AZX_MLCTL_CPA_SHIFT)) return 0; } else { - if (!((val & mask) >> AZX_MLCTL_CPA)) + if (!((val & mask) >> AZX_MLCTL_CPA_SHIFT)) return 0; } udelay(3); diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 043065867656d0bf2a5221fb9741bfc29c14c439..978dc1801b3aceb8e2245b819097954c76595d77 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -106,7 +106,11 @@ void snd_hdac_bus_stop_cmd_io(struct hdac_bus *bus) /* disable ringbuffer DMAs */ snd_hdac_chip_writeb(bus, RIRBCTL, 0); snd_hdac_chip_writeb(bus, CORBCTL, 0); + spin_unlock_irq(&bus->reg_lock); + hdac_wait_for_cmd_dmas(bus); + + spin_lock_irq(&bus->reg_lock); /* disable unsolicited responses */ snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, 0); spin_unlock_irq(&bus->reg_lock); @@ -268,11 +272,11 @@ int snd_hdac_bus_parse_capabilities(struct hdac_bus *bus) unsigned int offset; unsigned int counter = 0; - offset = snd_hdac_chip_readl(bus, LLCH); + offset = snd_hdac_chip_readw(bus, LLCH); /* Lets walk the linked capabilities list */ do { - cur_cap = _snd_hdac_chip_read(l, bus, offset); + cur_cap = _snd_hdac_chip_readl(bus, offset); dev_dbg(bus->dev, "Capability version: 0x%x\n", (cur_cap & AZX_CAP_HDR_VER_MASK) >> AZX_CAP_HDR_VER_OFF); diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index c6994ebb4567bf7f6120f9d94405ce1b557a0970..e1472c7ab6c176095df80c2bd4df100cc0fda588 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -555,12 +555,12 @@ void snd_hdac_stream_sync_trigger(struct hdac_stream *azx_dev, bool set, if (!reg) reg = AZX_REG_SSYNC; - val = _snd_hdac_chip_read(l, bus, reg); + val = _snd_hdac_chip_readl(bus, reg); if (set) val |= streams; else val &= ~streams; - _snd_hdac_chip_write(l, bus, reg, val); + _snd_hdac_chip_writel(bus, reg, val); } EXPORT_SYMBOL_GPL(snd_hdac_stream_sync_trigger); diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index a302d1f8d14f410dca12d4433b6b66cef7add47f..e739b1c85c25b98c43127d1afc6270195974296b 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -55,11 +55,11 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver."); module_param_array(thinkpad, bool, NULL, 0444); MODULE_PARM_DESC(thinkpad, "Enable only for the onboard CS4248 of IBM Thinkpad 360/750/755 series."); diff --git a/sound/isa/adlib.c b/sound/isa/adlib.c index 8d3060fd7ad7e3319553c4cf604d29dc7d73bd45..5fb619eca5c8decd5c3e94e7040de166e002fcae 100644 --- a/sound/isa/adlib.c +++ b/sound/isa/adlib.c @@ -27,7 +27,7 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); static int snd_adlib_match(struct device *dev, unsigned int n) diff --git a/sound/isa/cmi8328.c b/sound/isa/cmi8328.c index 787475084f4662cfd9fd03b5165d47c2f48477ea..8e1756c3b9bb9a5729bab4cf0f35f8e75c85d01a 100644 --- a/sound/isa/cmi8328.c +++ b/sound/isa/cmi8328.c @@ -51,18 +51,18 @@ MODULE_PARM_DESC(index, "Index value for CMI8328 soundcard."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for CMI8328 soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for CMI8328 driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for CMI8328 driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 for CMI8328 driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 for CMI8328 driver."); -module_param_array(mpuport, long, NULL, 0444); +module_param_hw_array(mpuport, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpuport, "MPU-401 port # for CMI8328 driver."); -module_param_array(mpuirq, int, NULL, 0444); +module_param_hw_array(mpuirq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpuirq, "IRQ # for CMI8328 MPU-401 port."); #ifdef SUPPORT_JOYSTICK module_param_array(gameport, bool, NULL, 0444); diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c index dfedfd85f205460095ad09b1f0086c8f25925ea9..f64b29ab5cc7005eb4ea35bd76133e8a0a0d6d24 100644 --- a/sound/isa/cmi8330.c +++ b/sound/isa/cmi8330.c @@ -95,27 +95,27 @@ module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard."); #endif -module_param_array(sbport, long, NULL, 0444); +module_param_hw_array(sbport, long, ioport, NULL, 0444); MODULE_PARM_DESC(sbport, "Port # for CMI8330/CMI8329 SB driver."); -module_param_array(sbirq, int, NULL, 0444); +module_param_hw_array(sbirq, int, irq, NULL, 0444); MODULE_PARM_DESC(sbirq, "IRQ # for CMI8330/CMI8329 SB driver."); -module_param_array(sbdma8, int, NULL, 0444); +module_param_hw_array(sbdma8, int, dma, NULL, 0444); MODULE_PARM_DESC(sbdma8, "DMA8 for CMI8330/CMI8329 SB driver."); -module_param_array(sbdma16, int, NULL, 0444); +module_param_hw_array(sbdma16, int, dma, NULL, 0444); MODULE_PARM_DESC(sbdma16, "DMA16 for CMI8330/CMI8329 SB driver."); -module_param_array(wssport, long, NULL, 0444); +module_param_hw_array(wssport, long, ioport, NULL, 0444); MODULE_PARM_DESC(wssport, "Port # for CMI8330/CMI8329 WSS driver."); -module_param_array(wssirq, int, NULL, 0444); +module_param_hw_array(wssirq, int, irq, NULL, 0444); MODULE_PARM_DESC(wssirq, "IRQ # for CMI8330/CMI8329 WSS driver."); -module_param_array(wssdma, int, NULL, 0444); +module_param_hw_array(wssdma, int, dma, NULL, 0444); MODULE_PARM_DESC(wssdma, "DMA for CMI8330/CMI8329 WSS driver."); -module_param_array(fmport, long, NULL, 0444); +module_param_hw_array(fmport, long, ioport, NULL, 0444); MODULE_PARM_DESC(fmport, "FM port # for CMI8330/CMI8329 driver."); -module_param_array(mpuport, long, NULL, 0444); +module_param_hw_array(mpuport, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpuport, "MPU-401 port # for CMI8330/CMI8329 driver."); -module_param_array(mpuirq, int, NULL, 0444); +module_param_hw_array(mpuirq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpuirq, "IRQ # for CMI8330/CMI8329 MPU-401 port."); #ifdef CONFIG_PNP static int isa_registered; diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index ef7448e9f81300e40bf39ee760cbcad55b92600b..e8edd9017a2f22205b64a19b2adb68fe3f27beca 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -55,17 +55,17 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for " CRD_NAME " driver."); static int snd_cs4231_match(struct device *dev, unsigned int n) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 9d7582c90a95d90a66290c6e386aa4749b8ff056..1f9a3b2be7a1f705e40e15e8358a46b93e50a0f9 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -98,23 +98,23 @@ MODULE_PARM_DESC(enable, "Enable " IDENT " soundcard."); module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "ISA PnP detection for specified soundcard."); #endif -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " IDENT " driver."); -module_param_array(cport, long, NULL, 0444); +module_param_hw_array(cport, long, ioport, NULL, 0444); MODULE_PARM_DESC(cport, "Control port # for " IDENT " driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " IDENT " driver."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port # for " IDENT " driver."); -module_param_array(sb_port, long, NULL, 0444); +module_param_hw_array(sb_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(sb_port, "SB port # for " IDENT " driver (optional)."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for " IDENT " driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " IDENT " driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for " IDENT " driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for " IDENT " driver."); #ifdef CONFIG_PNP diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index 1901c2bb6c3bca0cb0ace9a95617f5137a6fa07a..36320e7f278931719d7b8f8adcb935967ce2ecab 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -71,17 +71,17 @@ module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard."); #endif MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver."); -module_param_array(irq, int, NULL, 0444); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port # for ES1688 driver."); MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver."); -module_param_array(dma8, int, NULL, 0444); +module_param_hw_array(dma8, int, dma, NULL, 0444); MODULE_PARM_DESC(dma8, "8-bit DMA # for " CRD_NAME " driver."); #ifdef CONFIG_PNP diff --git a/sound/isa/es1688/es1688_lib.c b/sound/isa/es1688/es1688_lib.c index e2cf508841b1ef2890eb2f253ec6acddc5be91b8..81cf26fa28d60427d4280b0ba43e3a6576ac7fa1 100644 --- a/sound/isa/es1688/es1688_lib.c +++ b/sound/isa/es1688/es1688_lib.c @@ -742,7 +742,7 @@ int snd_es1688_pcm(struct snd_card *card, struct snd_es1688 *chip, int device) pcm->private_data = chip; pcm->info_flags = SNDRV_PCM_INFO_HALF_DUPLEX; - sprintf(pcm->name, snd_es1688_chip_id(chip)); + strcpy(pcm->name, snd_es1688_chip_id(chip)); chip->pcm = pcm; snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c index 5094b62d8f7718c0888306b5fe4eeca60326c1e2..0cabe2b8974f574e0d4e42be908eaf58f462cb69 100644 --- a/sound/isa/es18xx.c +++ b/sound/isa/es18xx.c @@ -1999,17 +1999,17 @@ MODULE_PARM_DESC(enable, "Enable ES18xx soundcard."); module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard."); #endif -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for ES18xx driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for ES18xx driver."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port # for ES18xx driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for ES18xx driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA 1 # for ES18xx driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA 2 # for ES18xx driver."); #ifdef CONFIG_PNP diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c index 379abe2cbeb2317345c14c6fdca684129e35e5c7..b9994cc9f5fb4f86c3644671545c56625adb926e 100644 --- a/sound/isa/galaxy/galaxy.c +++ b/sound/isa/galaxy/galaxy.c @@ -53,21 +53,21 @@ static int mpu_irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ; static int dma1[SNDRV_CARDS] = SNDRV_DEFAULT_DMA; static int dma2[SNDRV_CARDS] = SNDRV_DEFAULT_DMA; -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); -module_param_array(wss_port, long, NULL, 0444); +module_param_hw_array(wss_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(wss_port, "WSS port # for " CRD_NAME " driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port # for " CRD_NAME " driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "Playback DMA # for " CRD_NAME " driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "Capture DMA # for " CRD_NAME " driver."); /* diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c index c169be49ed713f302e1b35d8541e431727a89f00..92a997ab1229184a5d01441ecadf1e91c7debbca 100644 --- a/sound/isa/gus/gusclassic.c +++ b/sound/isa/gus/gusclassic.c @@ -58,13 +58,13 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for " CRD_NAME " driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for " CRD_NAME " driver."); module_param_array(joystick_dac, int, NULL, 0444); MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for " CRD_NAME " driver."); diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c index 77ac2fd723b4a413c4f597d630ace5e80b2979d5..beb52c0f70ea09eb83050b94548cbad734fa7cf4 100644 --- a/sound/isa/gus/gusextreme.c +++ b/sound/isa/gus/gusextreme.c @@ -66,21 +66,21 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for " CRD_NAME " soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable " CRD_NAME " soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for " CRD_NAME " driver."); -module_param_array(gf1_port, long, NULL, 0444); +module_param_hw_array(gf1_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(gf1_port, "GF1 port # for " CRD_NAME " driver (optional)."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for " CRD_NAME " driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for " CRD_NAME " driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for " CRD_NAME " driver."); -module_param_array(gf1_irq, int, NULL, 0444); +module_param_hw_array(gf1_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(gf1_irq, "GF1 IRQ # for " CRD_NAME " driver."); -module_param_array(dma8, int, NULL, 0444); +module_param_hw_array(dma8, int, dma, NULL, 0444); MODULE_PARM_DESC(dma8, "8-bit DMA # for " CRD_NAME " driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "GF1 DMA # for " CRD_NAME " driver."); module_param_array(joystick_dac, int, NULL, 0444); MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for " CRD_NAME " driver."); diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c index dd88c9d33492bba72365f9785319ee665e5d92cc..63309a4531402bf2ce82dcb7a5dff970cdcae0bc 100644 --- a/sound/isa/gus/gusmax.c +++ b/sound/isa/gus/gusmax.c @@ -56,13 +56,13 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for GUS MAX soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable GUS MAX soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for GUS MAX driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for GUS MAX driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for GUS MAX driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for GUS MAX driver."); module_param_array(joystick_dac, int, NULL, 0444); MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for GUS MAX driver."); diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c index 70d0040484c89c7e2bd0d15303a917c71154b729..0687b7ef3e53b5f6a6b45d7aa3ed2b7654ce735b 100644 --- a/sound/isa/gus/interwave.c +++ b/sound/isa/gus/interwave.c @@ -92,17 +92,17 @@ MODULE_PARM_DESC(enable, "Enable InterWave soundcard."); module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "ISA PnP detection for specified soundcard."); #endif -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for InterWave driver."); #ifdef SNDRV_STB -module_param_array(port_tc, long, NULL, 0444); +module_param_hw_array(port_tc, long, ioport, NULL, 0444); MODULE_PARM_DESC(port_tc, "Tone control (TEA6330T - i2c bus) port # for InterWave driver."); #endif -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for InterWave driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for InterWave driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for InterWave driver."); module_param_array(joystick_dac, int, NULL, 0444); MODULE_PARM_DESC(joystick_dac, "Joystick DAC level 0.59V-4.52V or 0.389V-2.98V for InterWave driver."); diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c index 4c072666115dbd1d2559974fb5701592e7b8ba2a..ad4897337df57ea71c2d08ac7c3fa82dd29be5fc 100644 --- a/sound/isa/msnd/msnd_pinnacle.c +++ b/sound/isa/msnd/msnd_pinnacle.c @@ -800,22 +800,22 @@ MODULE_LICENSE("GPL"); MODULE_FIRMWARE(INITCODEFILE); MODULE_FIRMWARE(PERMCODEFILE); -module_param_array(io, long, NULL, S_IRUGO); +module_param_hw_array(io, long, ioport, NULL, S_IRUGO); MODULE_PARM_DESC(io, "IO port #"); -module_param_array(irq, int, NULL, S_IRUGO); -module_param_array(mem, long, NULL, S_IRUGO); +module_param_hw_array(irq, int, irq, NULL, S_IRUGO); +module_param_hw_array(mem, long, iomem, NULL, S_IRUGO); module_param_array(write_ndelay, int, NULL, S_IRUGO); module_param(calibrate_signal, int, S_IRUGO); #ifndef MSND_CLASSIC module_param_array(digital, int, NULL, S_IRUGO); -module_param_array(cfg, long, NULL, S_IRUGO); +module_param_hw_array(cfg, long, ioport, NULL, S_IRUGO); module_param_array(reset, int, 0, S_IRUGO); -module_param_array(mpu_io, long, NULL, S_IRUGO); -module_param_array(mpu_irq, int, NULL, S_IRUGO); -module_param_array(ide_io0, long, NULL, S_IRUGO); -module_param_array(ide_io1, long, NULL, S_IRUGO); -module_param_array(ide_irq, int, NULL, S_IRUGO); -module_param_array(joystick_io, long, NULL, S_IRUGO); +module_param_hw_array(mpu_io, long, ioport, NULL, S_IRUGO); +module_param_hw_array(mpu_irq, int, irq, NULL, S_IRUGO); +module_param_hw_array(ide_io0, long, ioport, NULL, S_IRUGO); +module_param_hw_array(ide_io1, long, ioport, NULL, S_IRUGO); +module_param_hw_array(ide_irq, int, irq, NULL, S_IRUGO); +module_param_hw_array(joystick_io, long, ioport, NULL, S_IRUGO); #endif diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index ae133633a420c4886181326f933161820de85696..4098e3e0353d8f6d7e57428d70d491301585ac3e 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -69,21 +69,21 @@ MODULE_PARM_DESC(enable, "Enable OPL3-SA soundcard."); module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard."); #endif -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for OPL3-SA driver."); -module_param_array(sb_port, long, NULL, 0444); +module_param_hw_array(sb_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(sb_port, "SB port # for OPL3-SA driver."); -module_param_array(wss_port, long, NULL, 0444); +module_param_hw_array(wss_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(wss_port, "WSS port # for OPL3-SA driver."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port # for OPL3-SA driver."); -module_param_array(midi_port, long, NULL, 0444); +module_param_hw_array(midi_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(midi_port, "MIDI port # for OPL3-SA driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for OPL3-SA driver."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for OPL3-SA driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for OPL3-SA driver."); module_param_array(opl3sa3_ymode, int, NULL, 0444); MODULE_PARM_DESC(opl3sa3_ymode, "Speaker size selection for 3D Enhancement mode: Desktop/Large Notebook/Small Notebook/HiFi."); diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 3a9067db1a842605518c70eabf84f5691413db00..bcbff56f060d04e3e55b0400b0c4be79f119b0cb 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -69,19 +69,19 @@ module_param(index, int, 0444); MODULE_PARM_DESC(index, "Index value for miro soundcard."); module_param(id, charp, 0444); MODULE_PARM_DESC(id, "ID string for miro soundcard."); -module_param(port, long, 0444); +module_param_hw(port, long, ioport, 0444); MODULE_PARM_DESC(port, "WSS port # for miro driver."); -module_param(mpu_port, long, 0444); +module_param_hw(mpu_port, long, ioport, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for miro driver."); -module_param(fm_port, long, 0444); +module_param_hw(fm_port, long, ioport, 0444); MODULE_PARM_DESC(fm_port, "FM Port # for miro driver."); -module_param(irq, int, 0444); +module_param_hw(irq, int, irq, 0444); MODULE_PARM_DESC(irq, "WSS irq # for miro driver."); -module_param(mpu_irq, int, 0444); +module_param_hw(mpu_irq, int, irq, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 irq # for miro driver."); -module_param(dma1, int, 0444); +module_param_hw(dma1, int, dma, 0444); MODULE_PARM_DESC(dma1, "1st dma # for miro driver."); -module_param(dma2, int, 0444); +module_param_hw(dma2, int, dma, 0444); MODULE_PARM_DESC(dma2, "2nd dma # for miro driver."); module_param(wss, int, 0444); MODULE_PARM_DESC(wss, "wss mode"); diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 0a52660037866b75480f125d04d6ef9a96413bf6..ceddb392b1e336b1bb7db83d9c8823ec1d4571a9 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -88,20 +88,20 @@ MODULE_PARM_DESC(id, "ID string for opti9xx based soundcard."); module_param(isapnp, bool, 0444); MODULE_PARM_DESC(isapnp, "Enable ISA PnP detection for specified soundcard."); #endif -module_param(port, long, 0444); +module_param_hw(port, long, ioport, 0444); MODULE_PARM_DESC(port, "WSS port # for opti9xx driver."); -module_param(mpu_port, long, 0444); +module_param_hw(mpu_port, long, ioport, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for opti9xx driver."); -module_param(fm_port, long, 0444); +module_param_hw(fm_port, long, ioport, 0444); MODULE_PARM_DESC(fm_port, "FM port # for opti9xx driver."); -module_param(irq, int, 0444); +module_param_hw(irq, int, irq, 0444); MODULE_PARM_DESC(irq, "WSS irq # for opti9xx driver."); -module_param(mpu_irq, int, 0444); +module_param_hw(mpu_irq, int, irq, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 irq # for opti9xx driver."); -module_param(dma1, int, 0444); +module_param_hw(dma1, int, dma, 0444); MODULE_PARM_DESC(dma1, "1st dma # for opti9xx driver."); #if defined(CS4231) || defined(OPTi93X) -module_param(dma2, int, 0444); +module_param_hw(dma2, int, dma, 0444); MODULE_PARM_DESC(dma2, "2nd dma # for opti9xx driver."); #endif /* CS4231 || OPTi93X */ diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c index 4d909971eedbb5b8ee458ec46d08162b2302bb44..bfa0055e1fd6677dc2395f8b45918454ee6b2619 100644 --- a/sound/isa/sb/jazz16.c +++ b/sound/isa/sb/jazz16.c @@ -50,17 +50,17 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for Media Vision Jazz16 based soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Media Vision Jazz16 based soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for jazz16 driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for jazz16 driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for jazz16 driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for jazz16 driver."); -module_param_array(dma8, int, NULL, 0444); +module_param_hw_array(dma8, int, dma, NULL, 0444); MODULE_PARM_DESC(dma8, "DMA8 # for jazz16 driver."); -module_param_array(dma16, int, NULL, 0444); +module_param_hw_array(dma16, int, dma, NULL, 0444); MODULE_PARM_DESC(dma16, "DMA16 # for jazz16 driver."); #define SB_JAZZ16_WAKEUP 0xaf diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c index 4a7d7c89808fd0fbc574a0e3b66b4b31ea7c11e2..3b2e4f405ff27bdb60f622907bcdf594e7a59e2f 100644 --- a/sound/isa/sb/sb16.c +++ b/sound/isa/sb/sb16.c @@ -99,21 +99,21 @@ MODULE_PARM_DESC(enable, "Enable SoundBlaster 16 soundcard."); module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "PnP detection for specified soundcard."); #endif -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for SB16 driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for SB16 driver."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port # for SB16 PnP driver."); #ifdef SNDRV_SBAWE_EMU8000 -module_param_array(awe_port, long, NULL, 0444); +module_param_hw_array(awe_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(awe_port, "AWE port # for SB16 PnP driver."); #endif -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for SB16 driver."); -module_param_array(dma8, int, NULL, 0444); +module_param_hw_array(dma8, int, dma, NULL, 0444); MODULE_PARM_DESC(dma8, "8-bit DMA # for SB16 driver."); -module_param_array(dma16, int, NULL, 0444); +module_param_hw_array(dma16, int, dma, NULL, 0444); MODULE_PARM_DESC(dma16, "16-bit DMA # for SB16 driver."); module_param_array(mic_agc, int, NULL, 0444); MODULE_PARM_DESC(mic_agc, "Mic Auto-Gain-Control switch."); diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index ad42d2364199fd6a7200000b4f992ad9043c7f30..d77dcba276b544b750c452b622981c6c7a0ff585 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -47,11 +47,11 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for Sound Blaster soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Sound Blaster soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for SB8 driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for SB8 driver."); -module_param_array(dma8, int, NULL, 0444); +module_param_hw_array(dma8, int, dma, NULL, 0444); MODULE_PARM_DESC(dma8, "8-bit DMA # for SB8 driver."); struct snd_sb8 { diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index b61a6633d8f2866ff9b3f586afad912abaccbb57..c09d9b914efe0dad0f4f82ff477910258e427b3e 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -64,17 +64,17 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for sc-6000 based soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable sc-6000 based soundcard."); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for sc-6000 driver."); -module_param_array(mss_port, long, NULL, 0444); +module_param_hw_array(mss_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mss_port, "MSS Port # for sc-6000 driver."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port # for sc-6000 driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for sc-6000 driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ # for sc-6000 driver."); -module_param_array(dma, int, NULL, 0444); +module_param_hw_array(dma, int, dma, NULL, 0444); MODULE_PARM_DESC(dma, "DMA # for sc-6000 driver."); module_param_array(joystick, bool, NULL, 0444); MODULE_PARM_DESC(joystick, "Enable gameport."); diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index fdcfa29e220551b473534938d8995c7abf7ebe84..54f5758a1bb3aaa41b463cd4b03d55dde2e84a7d 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -63,22 +63,22 @@ MODULE_PARM_DESC(index, "Index number for SoundScape soundcard"); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "Description for SoundScape card"); -module_param_array(port, long, NULL, 0444); +module_param_hw_array(port, long, ioport, NULL, 0444); MODULE_PARM_DESC(port, "Port # for SoundScape driver."); -module_param_array(wss_port, long, NULL, 0444); +module_param_hw_array(wss_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(wss_port, "WSS Port # for SoundScape driver."); -module_param_array(irq, int, NULL, 0444); +module_param_hw_array(irq, int, irq, NULL, 0444); MODULE_PARM_DESC(irq, "IRQ # for SoundScape driver."); -module_param_array(mpu_irq, int, NULL, 0444); +module_param_hw_array(mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(mpu_irq, "MPU401 IRQ # for SoundScape driver."); -module_param_array(dma, int, NULL, 0444); +module_param_hw_array(dma, int, dma, NULL, 0444); MODULE_PARM_DESC(dma, "DMA # for SoundScape driver."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for SoundScape driver."); module_param_array(joystick, bool, NULL, 0444); diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c index a0987a57c8a918e6eff7fd94a871da35db0a3839..da4e9a85f0afe98402f8d12494d197d8660344ea 100644 --- a/sound/isa/wavefront/wavefront.c +++ b/sound/isa/wavefront/wavefront.c @@ -63,23 +63,23 @@ MODULE_PARM_DESC(enable, "Enable WaveFront soundcard."); module_param_array(isapnp, bool, NULL, 0444); MODULE_PARM_DESC(isapnp, "ISA PnP detection for WaveFront soundcards."); #endif -module_param_array(cs4232_pcm_port, long, NULL, 0444); +module_param_hw_array(cs4232_pcm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(cs4232_pcm_port, "Port # for CS4232 PCM interface."); -module_param_array(cs4232_pcm_irq, int, NULL, 0444); +module_param_hw_array(cs4232_pcm_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(cs4232_pcm_irq, "IRQ # for CS4232 PCM interface."); -module_param_array(dma1, int, NULL, 0444); +module_param_hw_array(dma1, int, dma, NULL, 0444); MODULE_PARM_DESC(dma1, "DMA1 # for CS4232 PCM interface."); -module_param_array(dma2, int, NULL, 0444); +module_param_hw_array(dma2, int, dma, NULL, 0444); MODULE_PARM_DESC(dma2, "DMA2 # for CS4232 PCM interface."); -module_param_array(cs4232_mpu_port, long, NULL, 0444); +module_param_hw_array(cs4232_mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(cs4232_mpu_port, "port # for CS4232 MPU-401 interface."); -module_param_array(cs4232_mpu_irq, int, NULL, 0444); +module_param_hw_array(cs4232_mpu_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(cs4232_mpu_irq, "IRQ # for CS4232 MPU-401 interface."); -module_param_array(ics2115_irq, int, NULL, 0444); +module_param_hw_array(ics2115_irq, int, irq, NULL, 0444); MODULE_PARM_DESC(ics2115_irq, "IRQ # for ICS2115."); -module_param_array(ics2115_port, long, NULL, 0444); +module_param_hw_array(ics2115_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(ics2115_port, "Port # for ICS2115."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port #."); module_param_array(use_cs4232_midi, bool, NULL, 0444); MODULE_PARM_DESC(use_cs4232_midi, "Use CS4232 MPU-401 interface (inaccessibly located inside your computer)"); diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c index f6156d8169d05b259c1fb35219efa7d6e3def735..2421f59cf27996cb560f27fb90c6823ed69b67f0 100644 --- a/sound/oss/ad1848.c +++ b/sound/oss/ad1848.c @@ -2805,10 +2805,10 @@ static int __initdata dma = -1; static int __initdata dma2 = -1; static int __initdata type = 0; -module_param(io, int, 0); /* I/O for a raw AD1848 card */ -module_param(irq, int, 0); /* IRQ to use */ -module_param(dma, int, 0); /* First DMA channel */ -module_param(dma2, int, 0); /* Second DMA channel */ +module_param_hw(io, int, ioport, 0); /* I/O for a raw AD1848 card */ +module_param_hw(irq, int, irq, 0); /* IRQ to use */ +module_param_hw(dma, int, dma, 0); /* First DMA channel */ +module_param_hw(dma2, int, dma, 0); /* Second DMA channel */ module_param(type, int, 0); /* Card type */ module_param(deskpro_xl, bool, 0); /* Special magic for Deskpro XL boxen */ module_param(deskpro_m, bool, 0); /* Special magic for Deskpro M box */ diff --git a/sound/oss/aedsp16.c b/sound/oss/aedsp16.c index bb477d5c8528611776abee1fe7584f1d65389d89..f058ed6bdb697396c8b6c3f09b567c82be9626ac 100644 --- a/sound/oss/aedsp16.c +++ b/sound/oss/aedsp16.c @@ -1303,17 +1303,17 @@ static int __initdata mpu_irq = -1; static int __initdata mss_base = -1; static int __initdata mpu_base = -1; -module_param(io, int, 0); +module_param_hw(io, int, ioport, 0); MODULE_PARM_DESC(io, "I/O base address (0x220 0x240)"); -module_param(irq, int, 0); +module_param_hw(irq, int, irq, 0); MODULE_PARM_DESC(irq, "IRQ line (5 7 9 10 11)"); -module_param(dma, int, 0); +module_param_hw(dma, int, dma, 0); MODULE_PARM_DESC(dma, "dma line (0 1 3)"); -module_param(mpu_irq, int, 0); +module_param_hw(mpu_irq, int, irq, 0); MODULE_PARM_DESC(mpu_irq, "MPU-401 IRQ line (5 7 9 10 0)"); -module_param(mss_base, int, 0); +module_param_hw(mss_base, int, ioport, 0); MODULE_PARM_DESC(mss_base, "MSS emulation I/O base address (0x530 0xE80)"); -module_param(mpu_base, int, 0); +module_param_hw(mpu_base, int, ioport, 0); MODULE_PARM_DESC(mpu_base,"MPU-401 I/O base address (0x300 0x310 0x320 0x330)"); MODULE_AUTHOR("Riccardo Facchetti "); MODULE_DESCRIPTION("Audio Excel DSP 16 Driver Version " VERSION); diff --git a/sound/oss/mpu401.c b/sound/oss/mpu401.c index 862735005b4318eff1cfb9ad7af95385c3f7a405..20e8fa46f6473ef7023452755090e5a77023d6cb 100644 --- a/sound/oss/mpu401.c +++ b/sound/oss/mpu401.c @@ -1748,8 +1748,8 @@ static struct address_info cfg; static int io = -1; static int irq = -1; -module_param(irq, int, 0); -module_param(io, int, 0); +module_param_hw(irq, int, irq, 0); +module_param_hw(io, int, ioport, 0); static int __init init_mpu401(void) { diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c index f34ec01d22394d75987418564aa25785e3398929..d2abc2cf3213b7f3f94ac2a5e5eb889bc47eab53 100644 --- a/sound/oss/msnd_pinnacle.c +++ b/sound/oss/msnd_pinnacle.c @@ -1727,22 +1727,22 @@ static int calibrate_signal __initdata = CONFIG_MSND_CALSIGNAL; #endif /* MODULE */ -module_param (io, int, 0); -module_param (irq, int, 0); -module_param (mem, int, 0); +module_param_hw (io, int, ioport, 0); +module_param_hw (irq, int, irq, 0); +module_param_hw (mem, int, iomem, 0); module_param (write_ndelay, int, 0); module_param (fifosize, int, 0); module_param (calibrate_signal, int, 0); #ifndef MSND_CLASSIC module_param (digital, bool, 0); -module_param (cfg, int, 0); +module_param_hw (cfg, int, ioport, 0); module_param (reset, int, 0); -module_param (mpu_io, int, 0); -module_param (mpu_irq, int, 0); -module_param (ide_io0, int, 0); -module_param (ide_io1, int, 0); -module_param (ide_irq, int, 0); -module_param (joystick_io, int, 0); +module_param_hw (mpu_io, int, ioport, 0); +module_param_hw (mpu_irq, int, irq, 0); +module_param_hw (ide_io0, int, ioport, 0); +module_param_hw (ide_io1, int, ioport, 0); +module_param_hw (ide_irq, int, irq, 0); +module_param_hw (joystick_io, int, ioport, 0); #endif static int __init msnd_init(void) diff --git a/sound/oss/opl3.c b/sound/oss/opl3.c index b6d19adf8f4111d575c88b235142d6ed0580654f..f0f5b5be63140bdc11ad67334ebd7d51910d4f6b 100644 --- a/sound/oss/opl3.c +++ b/sound/oss/opl3.c @@ -1200,7 +1200,7 @@ static int me; static int io = -1; -module_param(io, int, 0); +module_param_hw(io, int, ioport, 0); static int __init init_opl3 (void) { diff --git a/sound/oss/pas2_card.c b/sound/oss/pas2_card.c index b07954a7953679b077b12b190343492a3271510f..769fca692d2aa7e5b51381cab8a7d4b37921610c 100644 --- a/sound/oss/pas2_card.c +++ b/sound/oss/pas2_card.c @@ -383,15 +383,15 @@ static int __initdata sb_irq = -1; static int __initdata sb_dma = -1; static int __initdata sb_dma16 = -1; -module_param(io, int, 0); -module_param(irq, int, 0); -module_param(dma, int, 0); -module_param(dma16, int, 0); - -module_param(sb_io, int, 0); -module_param(sb_irq, int, 0); -module_param(sb_dma, int, 0); -module_param(sb_dma16, int, 0); +module_param_hw(io, int, ioport, 0); +module_param_hw(irq, int, irq, 0); +module_param_hw(dma, int, dma, 0); +module_param_hw(dma16, int, dma, 0); + +module_param_hw(sb_io, int, ioport, 0); +module_param_hw(sb_irq, int, irq, 0); +module_param_hw(sb_dma, int, dma, 0); +module_param_hw(sb_dma16, int, dma, 0); module_param(joystick, bool, 0); module_param(symphony, bool, 0); diff --git a/sound/oss/pss.c b/sound/oss/pss.c index 81314f9e2ccb9c8bffccdc699cca698820081625..33c3a442e162616caf3ccee97d640e45f749414a 100644 --- a/sound/oss/pss.c +++ b/sound/oss/pss.c @@ -1139,19 +1139,19 @@ static bool pss_no_sound = 0; /* Just configure non-sound components */ static bool pss_keep_settings = 1; /* Keep hardware settings at module exit */ static char *pss_firmware = "/etc/sound/pss_synth"; -module_param(pss_io, int, 0); +module_param_hw(pss_io, int, ioport, 0); MODULE_PARM_DESC(pss_io, "Set i/o base of PSS card (probably 0x220 or 0x240)"); -module_param(mss_io, int, 0); +module_param_hw(mss_io, int, ioport, 0); MODULE_PARM_DESC(mss_io, "Set WSS (audio) i/o base (0x530, 0x604, 0xE80, 0xF40, or other. Address must end in 0 or 4 and must be from 0x100 to 0xFF4)"); -module_param(mss_irq, int, 0); +module_param_hw(mss_irq, int, irq, 0); MODULE_PARM_DESC(mss_irq, "Set WSS (audio) IRQ (3, 5, 7, 9, 10, 11, 12)"); -module_param(mss_dma, int, 0); +module_param_hw(mss_dma, int, dma, 0); MODULE_PARM_DESC(mss_dma, "Set WSS (audio) DMA (0, 1, 3)"); -module_param(mpu_io, int, 0); +module_param_hw(mpu_io, int, ioport, 0); MODULE_PARM_DESC(mpu_io, "Set MIDI i/o base (0x330 or other. Address must be on 4 location boundaries and must be from 0x100 to 0xFFC)"); -module_param(mpu_irq, int, 0); +module_param_hw(mpu_irq, int, irq, 0); MODULE_PARM_DESC(mpu_irq, "Set MIDI IRQ (3, 5, 7, 9, 10, 11, 12)"); -module_param(pss_cdrom_port, int, 0); +module_param_hw(pss_cdrom_port, int, ioport, 0); MODULE_PARM_DESC(pss_cdrom_port, "Set the PSS CDROM port i/o base (0x340 or other)"); module_param(pss_enable_joystick, bool, 0); MODULE_PARM_DESC(pss_enable_joystick, "Enables the PSS joystick port (1 to enable, 0 to disable)"); diff --git a/sound/oss/sb_card.c b/sound/oss/sb_card.c index fb5d7250de38d75f5f7b27cafcc7e674390ae7d0..2a92cfe6cfe97500511488708be3686ad4d2aa84 100644 --- a/sound/oss/sb_card.c +++ b/sound/oss/sb_card.c @@ -61,15 +61,15 @@ static int __initdata uart401 = 0; static int __initdata pnp = 0; #endif -module_param(io, int, 000); +module_param_hw(io, int, ioport, 000); MODULE_PARM_DESC(io, "Soundblaster i/o base address (0x220,0x240,0x260,0x280)"); -module_param(irq, int, 000); +module_param_hw(irq, int, irq, 000); MODULE_PARM_DESC(irq, "IRQ (5,7,9,10)"); -module_param(dma, int, 000); +module_param_hw(dma, int, dma, 000); MODULE_PARM_DESC(dma, "8-bit DMA channel (0,1,3)"); -module_param(dma16, int, 000); +module_param_hw(dma16, int, dma, 000); MODULE_PARM_DESC(dma16, "16-bit DMA channel (5,6,7)"); -module_param(mpu_io, int, 000); +module_param_hw(mpu_io, int, ioport, 000); MODULE_PARM_DESC(mpu_io, "MPU base address"); module_param(type, int, 000); MODULE_PARM_DESC(type, "You can set this to specific card type (doesn't " \ diff --git a/sound/oss/trix.c b/sound/oss/trix.c index 3c494dc93b936d4680aca2a70ccee94d1fba7938..a57bc635d7585fe7a1bbed1e64a6b395861fd0fa 100644 --- a/sound/oss/trix.c +++ b/sound/oss/trix.c @@ -413,15 +413,15 @@ static int __initdata sb_irq = -1; static int __initdata mpu_io = -1; static int __initdata mpu_irq = -1; -module_param(io, int, 0); -module_param(irq, int, 0); -module_param(dma, int, 0); -module_param(dma2, int, 0); -module_param(sb_io, int, 0); -module_param(sb_dma, int, 0); -module_param(sb_irq, int, 0); -module_param(mpu_io, int, 0); -module_param(mpu_irq, int, 0); +module_param_hw(io, int, ioport, 0); +module_param_hw(irq, int, irq, 0); +module_param_hw(dma, int, dma, 0); +module_param_hw(dma2, int, dma, 0); +module_param_hw(sb_io, int, ioport, 0); +module_param_hw(sb_dma, int, dma, 0); +module_param_hw(sb_irq, int, irq, 0); +module_param_hw(mpu_io, int, ioport, 0); +module_param_hw(mpu_irq, int, irq, 0); module_param(joystick, bool, 0); static int __init init_trix(void) diff --git a/sound/oss/uart401.c b/sound/oss/uart401.c index dae4d4344407875bb627acbe25b115b5fb734a19..83dcc85b86882af55cdb0c6de5f75b36c23857de 100644 --- a/sound/oss/uart401.c +++ b/sound/oss/uart401.c @@ -429,8 +429,8 @@ static struct address_info cfg_mpu; static int io = -1; static int irq = -1; -module_param(io, int, 0444); -module_param(irq, int, 0444); +module_param_hw(io, int, ioport, 0444); +module_param_hw(irq, int, irq, 0444); static int __init init_uart401(void) diff --git a/sound/oss/uart6850.c b/sound/oss/uart6850.c index 1079133dd6ab17fc856677a328ea54563028c555..eda32d7eddbdc89a9181e5e319cb1d65cf1d4b81 100644 --- a/sound/oss/uart6850.c +++ b/sound/oss/uart6850.c @@ -315,8 +315,8 @@ static struct address_info cfg_mpu; static int __initdata io = -1; static int __initdata irq = -1; -module_param(io, int, 0); -module_param(irq, int, 0); +module_param_hw(io, int, ioport, 0); +module_param_hw(irq, int, irq, 0); static int __init init_uart6850(void) { diff --git a/sound/oss/waveartist.c b/sound/oss/waveartist.c index 0b8d0de872732e618a55f6b26e9bec9935956d60..4f0c3a232e41a7686fb04795deb7300d03220ea6 100644 --- a/sound/oss/waveartist.c +++ b/sound/oss/waveartist.c @@ -2036,8 +2036,8 @@ __setup("waveartist=", setup_waveartist); #endif MODULE_DESCRIPTION("Rockwell WaveArtist RWA-010 sound driver"); -module_param(io, int, 0); /* IO base */ -module_param(irq, int, 0); /* IRQ */ -module_param(dma, int, 0); /* DMA */ -module_param(dma2, int, 0); /* DMA2 */ +module_param_hw(io, int, ioport, 0); /* IO base */ +module_param_hw(irq, int, irq, 0); /* IRQ */ +module_param_hw(dma, int, dma, 0); /* DMA */ +module_param_hw(dma2, int, dma, 0); /* DMA2 */ MODULE_LICENSE("GPL"); diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index 92b819e4f7290c7ac056d72e35ce553829dce64a..34bbc2e730a65767fe9ba6c26f162ef6db7e7b39 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -1339,7 +1339,7 @@ static int snd_ali_prepare(struct snd_pcm_substream *substream) rate = snd_ali_get_spdif_in_rate(codec); if (rate == 0) { dev_warn(codec->card->dev, - "ali_capture_preapre: spdif rate detect err!\n"); + "ali_capture_prepare: spdif rate detect err!\n"); rate = 48000; } spin_lock_irq(&codec->reg_lock); diff --git a/sound/pci/als4000.c b/sound/pci/als4000.c index 92bc06d0128833c96902c5684e75bfbc0af8a5f5..7844a75d8ed97f7a6bda5b747e2872600cca4da6 100644 --- a/sound/pci/als4000.c +++ b/sound/pci/als4000.c @@ -102,7 +102,7 @@ MODULE_PARM_DESC(id, "ID string for ALS4000 soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable ALS4000 soundcard."); #ifdef SUPPORT_JOYSTICK -module_param_array(joystick_port, int, NULL, 0444); +module_param_hw_array(joystick_port, int, ioport, NULL, 0444); MODULE_PARM_DESC(joystick_port, "Joystick port address for ALS4000 soundcard. (0 = disabled)"); #endif diff --git a/sound/pci/au88x0/au88x0_a3d.c b/sound/pci/au88x0/au88x0_a3d.c index ab0f873129117b23461489309bc9f4cd883483d9..7a4558a70fb931be9ee3f6b62e8ca014dd6a06be 100644 --- a/sound/pci/au88x0/au88x0_a3d.c +++ b/sound/pci/au88x0/au88x0_a3d.c @@ -846,7 +846,7 @@ snd_vortex_a3d_filter_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new vortex_a3d_kcontrol = { +static const struct snd_kcontrol_new vortex_a3d_kcontrol = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "Playback PCM advanced processing", .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index e1af24f875665f12426e1fc4bfae2657d492f820..c308a4f7055042540574ae0add505c08e8602883 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -2279,6 +2279,9 @@ vortex_adb_allocroute(vortex_t *vortex, int dma, int nr_ch, int dir, } else { int src[2], mix[2]; + if (nr_ch < 1) + return -EINVAL; + /* Get SRC and MIXER hardware resources. */ for (i = 0; i < nr_ch; i++) { if ((mix[i] = diff --git a/sound/pci/au88x0/au88x0_eq.c b/sound/pci/au88x0/au88x0_eq.c index 9585c5c63b9636809fa04e591ea8b3a4eff0fb55..b566b44e4da7ebbdc58ad70f04e0fd1d40d03e68 100644 --- a/sound/pci/au88x0/au88x0_eq.c +++ b/sound/pci/au88x0/au88x0_eq.c @@ -757,7 +757,7 @@ snd_vortex_eqtoggle_put(struct snd_kcontrol *kcontrol, return 1; /* Allways changes */ } -static struct snd_kcontrol_new vortex_eqtoggle_kcontrol = { +static const struct snd_kcontrol_new vortex_eqtoggle_kcontrol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "EQ Enable", .index = 0, @@ -815,7 +815,7 @@ snd_vortex_eq_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucon return changed; } -static struct snd_kcontrol_new vortex_eq_kcontrol = { +static const struct snd_kcontrol_new vortex_eq_kcontrol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = " .", .index = 0, @@ -855,7 +855,7 @@ snd_vortex_peaks_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *u return 0; } -static struct snd_kcontrol_new vortex_levels_kcontrol = { +static const struct snd_kcontrol_new vortex_levels_kcontrol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "EQ Peaks", .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, diff --git a/sound/pci/au88x0/au88x0_pcm.c b/sound/pci/au88x0/au88x0_pcm.c index df5741a78fd23089eff63ef26d213f7341d8b60a..335979a753fefa7555adb0b15b8f3959d2e4fc1c 100644 --- a/sound/pci/au88x0/au88x0_pcm.c +++ b/sound/pci/au88x0/au88x0_pcm.c @@ -601,7 +601,7 @@ static int snd_vortex_pcm_vol_put(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_MINMAX(vortex_pcm_vol_db_scale, -9600, 2400); -static struct snd_kcontrol_new snd_vortex_pcm_vol = { +static const struct snd_kcontrol_new snd_vortex_pcm_vol = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Playback Volume", .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index 57bbb87d0c628f09f7a4c32e59725f076c303354..8356180bfe0e15ada0031e23369370036593dcff 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -202,7 +202,7 @@ static const struct snd_pcm_ops snd_aw2_capture_ops = { .pointer = snd_aw2_pcm_pointer_capture, }; -static struct snd_kcontrol_new aw2_control = { +static const struct snd_kcontrol_new aw2_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Capture Route", .index = 0, diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c index f2c0709d74411f3042d5eda7b9de1063bbf3fab2..099efb046b1c8e337772869ef1f149748d098058 100644 --- a/sound/pci/bt87x.c +++ b/sound/pci/bt87x.c @@ -598,7 +598,7 @@ static int snd_bt87x_capture_volume_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_bt87x_capture_volume = { +static const struct snd_kcontrol_new snd_bt87x_capture_volume = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Volume", .info = snd_bt87x_capture_volume_info, @@ -634,7 +634,7 @@ static int snd_bt87x_capture_boost_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_bt87x_capture_boost = { +static const struct snd_kcontrol_new snd_bt87x_capture_boost = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Boost", .info = snd_bt87x_capture_boost_info, @@ -676,7 +676,7 @@ static int snd_bt87x_capture_source_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_bt87x_capture_source = { +static const struct snd_kcontrol_new snd_bt87x_capture_source = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Source", .info = snd_bt87x_capture_source_info, diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index 025805cba7795e27ab365fcf7dd7f5495ace0ee7..b4d3415331f63c3008f283708f36e08c578c694f 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -301,7 +301,7 @@ static int snd_ca0106_capture_mic_line_in_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ca0106_capture_mic_line_in = +static const struct snd_kcontrol_new snd_ca0106_capture_mic_line_in = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Shared Mic/Line in Capture Switch", @@ -310,7 +310,7 @@ static struct snd_kcontrol_new snd_ca0106_capture_mic_line_in = .put = snd_ca0106_capture_mic_line_in_put }; -static struct snd_kcontrol_new snd_ca0106_capture_line_in_side_out = +static const struct snd_kcontrol_new snd_ca0106_capture_line_in_side_out = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Shared Line in/Side out Capture Switch", diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index aeedc270ed9b0834c4bde8d3b0cab638c8328e94..745a0a3743b479cd9e8bd846fbec7e53f1b9eb53 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -68,14 +68,14 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for C-Media PCI soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable C-Media PCI soundcard."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM port."); module_param_array(soft_ac3, bool, NULL, 0444); MODULE_PARM_DESC(soft_ac3, "Software-conversion of raw SPDIF packets (model 033 only)."); #ifdef SUPPORT_JOYSTICK -module_param_array(joystick_port, int, NULL, 0444); +module_param_hw_array(joystick_port, int, ioport, NULL, 0444); MODULE_PARM_DESC(joystick_port, "Joystick port address."); #endif @@ -1045,7 +1045,7 @@ static int snd_cmipci_spdif_default_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_cmipci_spdif_default = +static const struct snd_kcontrol_new snd_cmipci_spdif_default = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), @@ -1072,7 +1072,7 @@ static int snd_cmipci_spdif_mask_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_cmipci_spdif_mask = +static const struct snd_kcontrol_new snd_cmipci_spdif_mask = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1119,7 +1119,7 @@ static int snd_cmipci_spdif_stream_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_cmipci_spdif_stream = +static const struct snd_kcontrol_new snd_cmipci_spdif_stream = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, diff --git a/sound/pci/cs4281.c b/sound/pci/cs4281.c index fa7c51684dd23856fb7a8618e75c7e1f9d873002..f870697aca67182dea8896e1bd08f0ab3fa902ab 100644 --- a/sound/pci/cs4281.c +++ b/sound/pci/cs4281.c @@ -1055,7 +1055,7 @@ static int snd_cs4281_put_volume(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_SCALE(db_scale_dsp, -4650, 150, 0); -static struct snd_kcontrol_new snd_cs4281_fm_vol = +static const struct snd_kcontrol_new snd_cs4281_fm_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Synth Playback Volume", @@ -1066,7 +1066,7 @@ static struct snd_kcontrol_new snd_cs4281_fm_vol = .tlv = { .p = db_scale_dsp }, }; -static struct snd_kcontrol_new snd_cs4281_pcm_vol = +static const struct snd_kcontrol_new snd_cs4281_pcm_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Stream Playback Volume", diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 937071760bc4ac7197c4931452e903689fed671f..d15ecf9febbfd6810e727c29f293fadbc8974904 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1039,7 +1039,7 @@ static int snd_echo_output_gain_put(struct snd_kcontrol *kcontrol, #ifdef ECHOCARD_HAS_LINE_OUT_GAIN /* On the Mia this one controls the line-out volume */ -static struct snd_kcontrol_new snd_echo_line_output_gain = { +static const struct snd_kcontrol_new snd_echo_line_output_gain = { .name = "Line Playback Volume", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | @@ -1050,7 +1050,7 @@ static struct snd_kcontrol_new snd_echo_line_output_gain = { .tlv = {.p = db_scale_output_gain}, }; #else -static struct snd_kcontrol_new snd_echo_pcm_output_gain = { +static const struct snd_kcontrol_new snd_echo_pcm_output_gain = { .name = "PCM Playback Volume", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, @@ -1120,7 +1120,7 @@ static int snd_echo_input_gain_put(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_SCALE(db_scale_input_gain, -2500, 50, 0); -static struct snd_kcontrol_new snd_echo_line_input_gain = { +static const struct snd_kcontrol_new snd_echo_line_input_gain = { .name = "Line Capture Volume", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, @@ -1184,7 +1184,7 @@ static int snd_echo_output_nominal_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_echo_output_nominal_level = { +static const struct snd_kcontrol_new snd_echo_output_nominal_level = { .name = "Line Playback Switch (-10dBV)", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = snd_echo_output_nominal_info, @@ -1250,7 +1250,7 @@ static int snd_echo_input_nominal_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_echo_intput_nominal_level = { +static const struct snd_kcontrol_new snd_echo_intput_nominal_level = { .name = "Line Capture Switch (-10dBV)", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = snd_echo_input_nominal_info, @@ -1477,7 +1477,7 @@ static int snd_echo_digital_mode_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_echo_digital_mode_switch = { +static const struct snd_kcontrol_new snd_echo_digital_mode_switch = { .name = "Digital mode Switch", .iface = SNDRV_CTL_ELEM_IFACE_CARD, .info = snd_echo_digital_mode_info, @@ -1527,7 +1527,7 @@ static int snd_echo_spdif_mode_put(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_echo_spdif_mode_switch = { +static const struct snd_kcontrol_new snd_echo_spdif_mode_switch = { .name = "S/PDIF mode Switch", .iface = SNDRV_CTL_ELEM_IFACE_CARD, .info = snd_echo_spdif_mode_info, @@ -1600,7 +1600,7 @@ static int snd_echo_clock_source_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_echo_clock_source_switch = { +static const struct snd_kcontrol_new snd_echo_clock_source_switch = { .name = "Sample Clock Source", .iface = SNDRV_CTL_ELEM_IFACE_PCM, .info = snd_echo_clock_source_info, @@ -1643,7 +1643,7 @@ static int snd_echo_phantom_power_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_echo_phantom_power_switch = { +static const struct snd_kcontrol_new snd_echo_phantom_power_switch = { .name = "Phantom power Switch", .iface = SNDRV_CTL_ELEM_IFACE_CARD, .info = snd_echo_phantom_power_info, @@ -1686,7 +1686,7 @@ static int snd_echo_automute_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_echo_automute_switch = { +static const struct snd_kcontrol_new snd_echo_automute_switch = { .name = "Digital Capture Switch (automute)", .iface = SNDRV_CTL_ELEM_IFACE_CARD, .info = snd_echo_automute_info, @@ -1713,7 +1713,7 @@ static int snd_echo_vumeters_switch_put(struct snd_kcontrol *kcontrol, return 1; } -static struct snd_kcontrol_new snd_echo_vumeters_switch = { +static const struct snd_kcontrol_new snd_echo_vumeters_switch = { .name = "VU-meters Switch", .iface = SNDRV_CTL_ELEM_IFACE_CARD, .access = SNDRV_CTL_ELEM_ACCESS_WRITE, @@ -1751,7 +1751,7 @@ static int snd_echo_vumeters_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_echo_vumeters = { +static const struct snd_kcontrol_new snd_echo_vumeters = { .name = "VU-meters", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READ | @@ -1804,7 +1804,7 @@ static int snd_echo_channels_info_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_echo_channels_info = { +static const struct snd_kcontrol_new snd_echo_channels_info = { .name = "Channels info", .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index 32842734ada61bfec55f12deead13e6ddd6cabfd..77a4413f4564cefdc4545d637908fb0f84f9eeb1 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1112,7 +1112,7 @@ static int snd_emu10k1x_shared_spdif_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1x_shared_spdif = +static const struct snd_kcontrol_new snd_emu10k1x_shared_spdif = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Analog/Digital Output Jack", @@ -1171,7 +1171,7 @@ static int snd_emu10k1x_spdif_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1x_spdif_mask_control = +static const struct snd_kcontrol_new snd_emu10k1x_spdif_mask_control = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1181,7 +1181,7 @@ static struct snd_kcontrol_new snd_emu10k1x_spdif_mask_control = .get = snd_emu10k1x_spdif_get_mask }; -static struct snd_kcontrol_new snd_emu10k1x_spdif_control = +static const struct snd_kcontrol_new snd_emu10k1x_spdif_control = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index 076b117009c58bf13a6a5b2a363f6c2c3b695fc4..b2219a73c17c03ad5216be882732fcde26325123 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -795,7 +795,7 @@ static int snd_emu1010_internal_clock_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu1010_internal_clock = +static const struct snd_kcontrol_new snd_emu1010_internal_clock = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .iface = SNDRV_CTL_ELEM_IFACE_MIXER, @@ -847,7 +847,7 @@ static int snd_emu1010_optical_out_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu1010_optical_out = { +static const struct snd_kcontrol_new snd_emu1010_optical_out = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Optical Output Mode", @@ -898,7 +898,7 @@ static int snd_emu1010_optical_in_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu1010_optical_in = { +static const struct snd_kcontrol_new snd_emu1010_optical_in = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Optical Input Mode", @@ -978,7 +978,7 @@ static int snd_audigy_i2c_capture_source_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_audigy_i2c_capture_source = +static const struct snd_kcontrol_new snd_audigy_i2c_capture_source = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Source", @@ -1177,7 +1177,7 @@ static int snd_emu10k1_spdif_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_spdif_mask_control = +static const struct snd_kcontrol_new snd_emu10k1_spdif_mask_control = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1187,7 +1187,7 @@ static struct snd_kcontrol_new snd_emu10k1_spdif_mask_control = .get = snd_emu10k1_spdif_get_mask }; -static struct snd_kcontrol_new snd_emu10k1_spdif_control = +static const struct snd_kcontrol_new snd_emu10k1_spdif_control = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), @@ -1293,7 +1293,7 @@ static int snd_emu10k1_send_routing_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_send_routing_control = +static const struct snd_kcontrol_new snd_emu10k1_send_routing_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1364,7 +1364,7 @@ static int snd_emu10k1_send_volume_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_send_volume_control = +static const struct snd_kcontrol_new snd_emu10k1_send_volume_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1429,7 +1429,7 @@ static int snd_emu10k1_attn_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_attn_control = +static const struct snd_kcontrol_new snd_emu10k1_attn_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1501,7 +1501,7 @@ static int snd_emu10k1_efx_send_routing_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_efx_send_routing_control = +static const struct snd_kcontrol_new snd_emu10k1_efx_send_routing_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1568,7 +1568,7 @@ static int snd_emu10k1_efx_send_volume_put(struct snd_kcontrol *kcontrol, } -static struct snd_kcontrol_new snd_emu10k1_efx_send_volume_control = +static const struct snd_kcontrol_new snd_emu10k1_efx_send_volume_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1626,7 +1626,7 @@ static int snd_emu10k1_efx_attn_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_efx_attn_control = +static const struct snd_kcontrol_new snd_emu10k1_efx_attn_control = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1691,7 +1691,7 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_emu10k1_shared_spdif = +static const struct snd_kcontrol_new snd_emu10k1_shared_spdif = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "SB Live Analog/Digital Output Jack", @@ -1700,7 +1700,7 @@ static struct snd_kcontrol_new snd_emu10k1_shared_spdif = .put = snd_emu10k1_shared_spdif_put }; -static struct snd_kcontrol_new snd_audigy_shared_spdif = +static const struct snd_kcontrol_new snd_audigy_shared_spdif = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Audigy Analog/Digital Output Jack", @@ -1738,7 +1738,7 @@ static int snd_audigy_capture_boost_put(struct snd_kcontrol *kcontrol, return snd_ac97_update(emu->ac97, AC97_REC_GAIN, val); } -static struct snd_kcontrol_new snd_audigy_capture_boost = +static const struct snd_kcontrol_new snd_audigy_capture_boost = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Mic Extra Boost", diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 37be1e14d756626a4c9d887c26249807f48dbc81..ef1cf530c929a59e893b7817c8504bda96342a2a 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -1542,7 +1542,7 @@ static int snd_emu10k1_pcm_efx_voices_mask_put(struct snd_kcontrol *kcontrol, st return change; } -static struct snd_kcontrol_new snd_emu10k1_pcm_efx_voices_mask = { +static const struct snd_kcontrol_new snd_emu10k1_pcm_efx_voices_mask = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "Captured FX8010 Outputs", .info = snd_emu10k1_pcm_efx_voices_mask_info, diff --git a/sound/pci/ens1370.c b/sound/pci/ens1370.c index 164adad916506cf1b492b45ef59452c3b0079462..09a63ef41ef2ec56eca7c679fde580d968e906d7 100644 --- a/sound/pci/ens1370.c +++ b/sound/pci/ens1370.c @@ -106,7 +106,7 @@ module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Ensoniq AudioPCI soundcard."); #ifdef SUPPORT_JOYSTICK #ifdef CHIP1371 -module_param_array(joystick_port, int, NULL, 0444); +module_param_hw_array(joystick_port, int, ioport, NULL, 0444); MODULE_PARM_DESC(joystick_port, "Joystick port address."); #else module_param_array(joystick, bool, NULL, 0444); @@ -1530,7 +1530,7 @@ static int snd_es1373_rear_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ens1373_rear = +static const struct snd_kcontrol_new snd_ens1373_rear = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "AC97 2ch->4ch Copy Switch", @@ -1575,7 +1575,7 @@ static int snd_es1373_line_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new snd_ens1373_line = +static const struct snd_kcontrol_new snd_ens1373_line = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Line In->Rear Out Switch", diff --git a/sound/pci/hda/dell_wmi_helper.c b/sound/pci/hda/dell_wmi_helper.c index 19d41da79f93cc95500859c9e1690a2d89935d23..7efa7bd7acb25089e1ff2f8ece32d97ccd7e3113 100644 --- a/sound/pci/hda/dell_wmi_helper.c +++ b/sound/pci/hda/dell_wmi_helper.c @@ -2,11 +2,11 @@ * to be included from codec driver */ -#if IS_ENABLED(CONFIG_LEDS_DELL_NETBOOKS) +#if IS_ENABLED(CONFIG_DELL_LAPTOP) #include static int dell_led_value; -static int (*dell_led_set_func)(int, int); +static int (*dell_micmute_led_set_func)(int); static void (*dell_old_cap_hook)(struct hda_codec *, struct snd_kcontrol *, struct snd_ctl_elem_value *); @@ -18,7 +18,7 @@ static void update_dell_wmi_micmute_led(struct hda_codec *codec, if (dell_old_cap_hook) dell_old_cap_hook(codec, kcontrol, ucontrol); - if (!ucontrol || !dell_led_set_func) + if (!ucontrol || !dell_micmute_led_set_func) return; if (strcmp("Capture Switch", ucontrol->id.name) == 0 && ucontrol->id.index == 0) { /* TODO: How do I verify if it's a mono or stereo here? */ @@ -26,8 +26,8 @@ static void update_dell_wmi_micmute_led(struct hda_codec *codec, if (val == dell_led_value) return; dell_led_value = val; - if (dell_led_set_func) - dell_led_set_func(DELL_LED_MICMUTE, dell_led_value); + if (dell_micmute_led_set_func) + dell_micmute_led_set_func(dell_led_value); } } @@ -39,15 +39,15 @@ static void alc_fixup_dell_wmi(struct hda_codec *codec, bool removefunc = false; if (action == HDA_FIXUP_ACT_PROBE) { - if (!dell_led_set_func) - dell_led_set_func = symbol_request(dell_app_wmi_led_set); - if (!dell_led_set_func) { - codec_warn(codec, "Failed to find dell wmi symbol dell_app_wmi_led_set\n"); + if (!dell_micmute_led_set_func) + dell_micmute_led_set_func = symbol_request(dell_micmute_led_set); + if (!dell_micmute_led_set_func) { + codec_warn(codec, "Failed to find dell wmi symbol dell_micmute_led_set\n"); return; } removefunc = true; - if (dell_led_set_func(DELL_LED_MICMUTE, false) >= 0) { + if (dell_micmute_led_set_func(false) >= 0) { dell_led_value = 0; if (spec->gen.num_adc_nids > 1 && !spec->gen.dyn_adc_switch) codec_dbg(codec, "Skipping micmute LED control due to several ADCs"); @@ -60,17 +60,17 @@ static void alc_fixup_dell_wmi(struct hda_codec *codec, } - if (dell_led_set_func && (action == HDA_FIXUP_ACT_FREE || removefunc)) { - symbol_put(dell_app_wmi_led_set); - dell_led_set_func = NULL; + if (dell_micmute_led_set_func && (action == HDA_FIXUP_ACT_FREE || removefunc)) { + symbol_put(dell_micmute_led_set); + dell_micmute_led_set_func = NULL; dell_old_cap_hook = NULL; } } -#else /* CONFIG_LEDS_DELL_NETBOOKS */ +#else /* CONFIG_DELL_LAPTOP */ static void alc_fixup_dell_wmi(struct hda_codec *codec, const struct hda_fixup *fix, int action) { } -#endif /* CONFIG_LEDS_DELL_NETBOOKS */ +#endif /* CONFIG_DELL_LAPTOP */ diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index a03cf68d0bcd6ce350b658e96de7dc9b9561dd1a..d3ea73171a3d22a2e41554e7df8d1fedc72a9751 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -580,6 +580,7 @@ const char *hda_get_autocfg_input_label(struct hda_codec *codec, has_multiple_pins = 1; if (has_multiple_pins && type == AUTO_PIN_MIC) has_multiple_pins &= check_mic_location_need(codec, cfg, input); + has_multiple_pins |= codec->force_pin_prefix; return hda_get_input_pin_label(codec, &cfg->inputs[input], cfg->inputs[input].pin, has_multiple_pins); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 8fd745cb3f36c49c09213dccab6b818c6f6a00bf..70bb365a08d2673d383df4c923f41248c89944e6 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1965,7 +1965,7 @@ static int vmaster_mute_mode_put(struct snd_kcontrol *kcontrol, return 1; } -static struct snd_kcontrol_new vmaster_mute_mode = { +static const struct snd_kcontrol_new vmaster_mute_mode = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Mute-LED Mode", .info = vmaster_mute_mode_info, @@ -2705,7 +2705,7 @@ static int spdif_share_sw_put(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new spdif_share_sw = { +static const struct snd_kcontrol_new spdif_share_sw = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "IEC958 Default PCM Playback Switch", .info = snd_ctl_boolean_mono_info, diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index f17f25245e52acdee1a57f3ae5ab7b84fd97a37b..d6fb2d5d01a70ef21eb533f1b86cb9c9c8f2be84 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -256,6 +256,7 @@ struct hda_codec { unsigned int dump_coef:1; /* dump processing coefs in codec proc file */ unsigned int power_save_node:1; /* advanced PM for each widget */ unsigned int auto_runtime_pm:1; /* enable automatic codec runtime pm */ + unsigned int force_pin_prefix:1; /* Add location prefix */ #ifdef CONFIG_PM unsigned long power_on_acct; unsigned long power_off_acct; diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index e7c8f4f076d5f3297946b088bc31cf7ecc0d66ad..2842c82363c0435f90edfe06793df254c36d43ef 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -196,6 +196,9 @@ static void parse_user_hints(struct hda_codec *codec) val = snd_hda_get_bool_hint(codec, "hp_mic_detect"); if (val >= 0) spec->suppress_hp_mic_detect = !val; + val = snd_hda_get_bool_hint(codec, "vmaster"); + if (val >= 0) + spec->suppress_vmaster = !val; if (!snd_hda_get_int_hint(codec, "mixer_nid", &val)) spec->mixer_nid = val; @@ -1125,6 +1128,7 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch, *index = 0; if (cfg->line_outs == 1 && !spec->multi_ios && + !codec->force_pin_prefix && !cfg->hp_outs && !cfg->speaker_outs) return spec->vmaster_mute.hook ? "PCM" : "Master"; @@ -1132,6 +1136,7 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch, * use it master (or "PCM" if a vmaster hook is present) */ if (spec->multiout.num_dacs == 1 && !spec->mixer_nid && + !codec->force_pin_prefix && !spec->multiout.hp_out_nid[0] && !spec->multiout.extra_out_nid[0]) return spec->vmaster_mute.hook ? "PCM" : "Master"; @@ -5031,7 +5036,7 @@ int snd_hda_gen_build_controls(struct hda_codec *codec) } /* if we have no master control, let's create it */ - if (!spec->no_analog && + if (!spec->no_analog && !spec->suppress_vmaster && !snd_hda_find_mixer_ctl(codec, "Master Playback Volume")) { err = snd_hda_add_vmaster(codec, "Master Playback Volume", spec->vmaster_tlv, slave_pfxs, @@ -5039,7 +5044,7 @@ int snd_hda_gen_build_controls(struct hda_codec *codec) if (err < 0) return err; } - if (!spec->no_analog && + if (!spec->no_analog && !spec->suppress_vmaster && !snd_hda_find_mixer_ctl(codec, "Master Playback Switch")) { err = __snd_hda_add_vmaster(codec, "Master Playback Switch", NULL, slave_pfxs, diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index f66fc7e25e07ff10f68f7aa216d00639f3adad3a..61772317de46b4ac49d5890a9fbd822a26bc4938 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -229,6 +229,7 @@ struct hda_gen_spec { unsigned int add_jack_modes:1; /* add i/o jack mode enum ctls */ unsigned int power_down_unused:1; /* power down unused widgets */ unsigned int dac_min_mute:1; /* minimal = mute for DACs */ + unsigned int suppress_vmaster:1; /* don't create vmaster kctls */ /* other internal flags */ unsigned int no_analog:1; /* digital I/O only */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c8256a89375a90149f790390588b232a53e9e67b..1770f085c2a694a398e2b30312cb79d6c3fd617a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -53,7 +53,7 @@ #ifdef CONFIG_X86 /* for snoop control */ #include -#include +#include #include #endif #include @@ -77,6 +77,7 @@ enum { POS_FIX_POSBUF, POS_FIX_VIACOMBO, POS_FIX_COMBO, + POS_FIX_SKL, }; /* Defines for ATI HD Audio support in SB450 south bridge */ @@ -148,7 +149,7 @@ module_param_array(model, charp, NULL, 0444); MODULE_PARM_DESC(model, "Use the given board model."); module_param_array(position_fix, int, NULL, 0444); MODULE_PARM_DESC(position_fix, "DMA pointer read method." - "(-1 = system default, 0 = auto, 1 = LPIB, 2 = POSBUF, 3 = VIACOMBO, 4 = COMBO)."); + "(-1 = system default, 0 = auto, 1 = LPIB, 2 = POSBUF, 3 = VIACOMBO, 4 = COMBO, 5 = SKL+)."); module_param_array(bdl_pos_adj, int, NULL, 0644); MODULE_PARM_DESC(bdl_pos_adj, "BDL position adjustment offset."); module_param_array(probe_mask, int, NULL, 0444); @@ -369,8 +370,10 @@ enum { #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198) #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ - IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) + IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) || \ + IS_GLK(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -534,9 +537,101 @@ static void bxt_reduce_dma_latency(struct azx *chip) { u32 val; - val = azx_readl(chip, SKL_EM4L); + val = azx_readl(chip, VS_EM4L); val &= (0x3 << 20); - azx_writel(chip, SKL_EM4L, val); + azx_writel(chip, VS_EM4L, val); +} + +/* + * ML_LCAP bits: + * bit 0: 6 MHz Supported + * bit 1: 12 MHz Supported + * bit 2: 24 MHz Supported + * bit 3: 48 MHz Supported + * bit 4: 96 MHz Supported + * bit 5: 192 MHz Supported + */ +static int intel_get_lctl_scf(struct azx *chip) +{ + struct hdac_bus *bus = azx_bus(chip); + static int preferred_bits[] = { 2, 3, 1, 4, 5 }; + u32 val, t; + int i; + + val = readl(bus->mlcap + AZX_ML_BASE + AZX_REG_ML_LCAP); + + for (i = 0; i < ARRAY_SIZE(preferred_bits); i++) { + t = preferred_bits[i]; + if (val & (1 << t)) + return t; + } + + dev_warn(chip->card->dev, "set audio clock frequency to 6MHz"); + return 0; +} + +static int intel_ml_lctl_set_power(struct azx *chip, int state) +{ + struct hdac_bus *bus = azx_bus(chip); + u32 val; + int timeout; + + /* + * the codecs are sharing the first link setting by default + * If other links are enabled for stream, they need similar fix + */ + val = readl(bus->mlcap + AZX_ML_BASE + AZX_REG_ML_LCTL); + val &= ~AZX_MLCTL_SPA; + val |= state << AZX_MLCTL_SPA_SHIFT; + writel(val, bus->mlcap + AZX_ML_BASE + AZX_REG_ML_LCTL); + /* wait for CPA */ + timeout = 50; + while (timeout) { + if (((readl(bus->mlcap + AZX_ML_BASE + AZX_REG_ML_LCTL)) & + AZX_MLCTL_CPA) == (state << AZX_MLCTL_CPA_SHIFT)) + return 0; + timeout--; + udelay(10); + } + + return -1; +} + +static void intel_init_lctl(struct azx *chip) +{ + struct hdac_bus *bus = azx_bus(chip); + u32 val; + int ret; + + /* 0. check lctl register value is correct or not */ + val = readl(bus->mlcap + AZX_ML_BASE + AZX_REG_ML_LCTL); + /* if SCF is already set, let's use it */ + if ((val & ML_LCTL_SCF_MASK) != 0) + return; + + /* + * Before operating on SPA, CPA must match SPA. + * Any deviation may result in undefined behavior. + */ + if (((val & AZX_MLCTL_SPA) >> AZX_MLCTL_SPA_SHIFT) != + ((val & AZX_MLCTL_CPA) >> AZX_MLCTL_CPA_SHIFT)) + return; + + /* 1. turn link down: set SPA to 0 and wait CPA to 0 */ + ret = intel_ml_lctl_set_power(chip, 0); + udelay(100); + if (ret) + goto set_spa; + + /* 2. update SCF to select a properly audio clock*/ + val &= ~ML_LCTL_SCF_MASK; + val |= intel_get_lctl_scf(chip); + writel(val, bus->mlcap + AZX_ML_BASE + AZX_REG_ML_LCTL); + +set_spa: + /* 4. turn link up: set SPA to 1 and wait CPA to 1 */ + intel_ml_lctl_set_power(chip, 1); + udelay(100); } static void hda_intel_init_chip(struct azx *chip, bool full_reset) @@ -564,6 +659,9 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset) /* reduce dma latency to avoid noise */ if (IS_BXT(pci)) bxt_reduce_dma_latency(chip); + + if (bus->mlcap != NULL) + intel_init_lctl(chip); } /* calculate runtime delay from LPIB */ @@ -815,6 +913,31 @@ static unsigned int azx_via_get_position(struct azx *chip, return bound_pos + mod_dma_pos; } +static unsigned int azx_skl_get_dpib_pos(struct azx *chip, + struct azx_dev *azx_dev) +{ + return _snd_hdac_chip_readl(azx_bus(chip), + AZX_REG_VS_SDXDPIB_XBASE + + (AZX_REG_VS_SDXDPIB_XINTERVAL * + azx_dev->core.index)); +} + +/* get the current DMA position with correction on SKL+ chips */ +static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev) +{ + /* DPIB register gives a more accurate position for playback */ + if (azx_dev->core.substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + return azx_skl_get_dpib_pos(chip, azx_dev); + + /* For capture, we need to read posbuf, but it requires a delay + * for the possible boundary overlap; the read of DPIB fetches the + * actual posbuf + */ + udelay(20); + azx_skl_get_dpib_pos(chip, azx_dev); + return azx_get_pos_posbuf(chip, azx_dev); +} + #ifdef CONFIG_PM static DEFINE_MUTEX(card_list_lock); static LIST_HEAD(card_list); @@ -1351,6 +1474,7 @@ static int check_position_fix(struct azx *chip, int fix) case POS_FIX_POSBUF: case POS_FIX_VIACOMBO: case POS_FIX_COMBO: + case POS_FIX_SKL: return fix; } @@ -1371,6 +1495,10 @@ static int check_position_fix(struct azx *chip, int fix) dev_dbg(chip->card->dev, "Using LPIB position fix\n"); return POS_FIX_LPIB; } + if (IS_SKL_PLUS(chip->pci)) { + dev_dbg(chip->card->dev, "Using SKL position fix\n"); + return POS_FIX_SKL; + } return POS_FIX_AUTO; } @@ -1382,6 +1510,7 @@ static void assign_position_fix(struct azx *chip, int fix) [POS_FIX_POSBUF] = azx_get_pos_posbuf, [POS_FIX_VIACOMBO] = azx_via_get_position, [POS_FIX_COMBO] = azx_get_pos_lpib, + [POS_FIX_SKL] = azx_get_pos_skl, }; chip->get_position[0] = chip->get_position[1] = callbacks[fix]; @@ -1390,7 +1519,7 @@ static void assign_position_fix(struct azx *chip, int fix) if (fix == POS_FIX_COMBO) chip->get_position[1] = NULL; - if (fix == POS_FIX_POSBUF && + if ((fix == POS_FIX_POSBUF || fix == POS_FIX_SKL) && (chip->driver_caps & AZX_DCAPS_COUNT_LPIB_DELAY)) { chip->get_delay[0] = chip->get_delay[1] = azx_get_delay_from_lpib; diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 07a9deb1747779245b42bd1f50e8e423c5f2a8f1..a148176c16a98acccad573a20baade15520f83d1 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -857,7 +857,7 @@ static int chipio_write_address(struct hda_codec *codec, chip_addx >> 16); } - spec->curr_chip_addx = (res < 0) ? ~0UL : chip_addx; + spec->curr_chip_addx = (res < 0) ? ~0U : chip_addx; return res; } @@ -882,7 +882,7 @@ static int chipio_write_data(struct hda_codec *codec, unsigned int data) /*If no error encountered, automatically increment the address as per chip behaviour*/ spec->curr_chip_addx = (res != -EIO) ? - (spec->curr_chip_addx + 4) : ~0UL; + (spec->curr_chip_addx + 4) : ~0U; return res; } @@ -933,7 +933,7 @@ static int chipio_read_data(struct hda_codec *codec, unsigned int *data) /*If no error encountered, automatically increment the address as per chip behaviour*/ spec->curr_chip_addx = (res != -EIO) ? - (spec->curr_chip_addx + 4) : ~0UL; + (spec->curr_chip_addx + 4) : ~0U; return res; } @@ -1168,7 +1168,7 @@ static int dspio_write_multiple(struct hda_codec *codec, int status = 0; unsigned int count; - if ((buffer == NULL)) + if (buffer == NULL) return -EINVAL; count = 0; @@ -1210,7 +1210,7 @@ static int dspio_read_multiple(struct hda_codec *codec, unsigned int *buffer, unsigned int skip_count; unsigned int dummy; - if ((buffer == NULL)) + if (buffer == NULL) return -1; count = 0; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 69266b8ea2ad7b498097c4bc231fbad6e55ff37e..63bc894ddf5e8102c3f1ed93c80fef50c3f95700 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -52,6 +52,12 @@ struct conexant_spec { bool dc_enable; unsigned int dc_input_bias; /* offset into olpc_xo_dc_bias */ struct nid_path *dc_mode_path; + + int mute_led_polarity; + unsigned int gpio_led; + unsigned int gpio_mute_led_mask; + unsigned int gpio_mic_led_mask; + }; @@ -264,6 +270,7 @@ enum { CXT_FIXUP_HP_DOCK, CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, + CXT_FIXUP_MUTE_LED_GPIO, }; /* for hda_fixup_thinkpad_acpi() */ @@ -646,6 +653,74 @@ static void cxt_fixup_hp_gate_mic_jack(struct hda_codec *codec, snd_hda_jack_set_gating_jack(codec, 0x19, 0x16); } +/* update LED status via GPIO */ +static void cxt_update_gpio_led(struct hda_codec *codec, unsigned int mask, + bool enabled) +{ + struct conexant_spec *spec = codec->spec; + unsigned int oldval = spec->gpio_led; + + if (spec->mute_led_polarity) + enabled = !enabled; + + if (enabled) + spec->gpio_led &= ~mask; + else + spec->gpio_led |= mask; + if (spec->gpio_led != oldval) + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, + spec->gpio_led); +} + +/* turn on/off mute LED via GPIO per vmaster hook */ +static void cxt_fixup_gpio_mute_hook(void *private_data, int enabled) +{ + struct hda_codec *codec = private_data; + struct conexant_spec *spec = codec->spec; + + cxt_update_gpio_led(codec, spec->gpio_mute_led_mask, enabled); +} + +/* turn on/off mic-mute LED via GPIO per capture hook */ +static void cxt_fixup_gpio_mic_mute_hook(struct hda_codec *codec, + struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct conexant_spec *spec = codec->spec; + + if (ucontrol) + cxt_update_gpio_led(codec, spec->gpio_mic_led_mask, + ucontrol->value.integer.value[0] || + ucontrol->value.integer.value[1]); +} + + +static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct conexant_spec *spec = codec->spec; + static const struct hda_verb gpio_init[] = { + { 0x01, AC_VERB_SET_GPIO_MASK, 0x03 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03 }, + {} + }; + codec_info(codec, "action: %d gpio_led: %d\n", action, spec->gpio_led); + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.vmaster_mute.hook = cxt_fixup_gpio_mute_hook; + spec->gen.cap_sync_hook = cxt_fixup_gpio_mic_mute_hook; + spec->gpio_led = 0; + spec->mute_led_polarity = 0; + spec->gpio_mute_led_mask = 0x01; + spec->gpio_mic_led_mask = 0x02; + } + snd_hda_add_verbs(codec, gpio_init); + if (spec->gpio_led) + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, + spec->gpio_led); +} + + /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { { 0x16, 0x042140ff }, /* HP (seq# overridden) */ @@ -785,7 +860,9 @@ static const struct hda_fixup cxt_fixups[] = { { 0x16, 0x21011020 }, /* line-out */ { 0x18, 0x2181103f }, /* line-in */ { } - } + }, + .chained = true, + .chain_id = CXT_FIXUP_MUTE_LED_GPIO, }, [CXT_FIXUP_HP_SPECTRE] = { .type = HDA_FIXUP_PINS, @@ -799,6 +876,10 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_hp_gate_mic_jack, }, + [CXT_FIXUP_MUTE_LED_GPIO] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_mute_led_gpio, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -851,6 +932,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), + SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), @@ -882,6 +964,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" }, { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" }, + { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" }, {} }; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 1461ef8eb7495d1d0f46ff0d6b5a15ece3c1a1da..90e4ff87445e866088f39c11cddf1326e2ea4e4d 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -177,6 +177,7 @@ struct hdmi_spec { bool i915_bound; /* was i915 bound in this driver? */ struct hdac_chmap chmap; + hda_nid_t vendor_nid; }; #ifdef CONFIG_SND_HDA_I915 @@ -383,7 +384,7 @@ static int hdmi_eld_ctl_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new eld_bytes_ctl = { +static const struct snd_kcontrol_new eld_bytes_ctl = { .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "ELD", @@ -2372,6 +2373,7 @@ static void intel_haswell_fixup_connect_list(struct hda_codec *codec, } #define INTEL_VENDOR_NID 0x08 +#define INTEL_GLK_VENDOR_NID 0x0B #define INTEL_GET_VENDOR_VERB 0xf81 #define INTEL_SET_VENDOR_VERB 0x781 #define INTEL_EN_DP12 0x02 /* enable DP 1.2 features */ @@ -2381,14 +2383,15 @@ static void intel_haswell_enable_all_pins(struct hda_codec *codec, bool update_tree) { unsigned int vendor_param; + struct hdmi_spec *spec = codec->spec; - vendor_param = snd_hda_codec_read(codec, INTEL_VENDOR_NID, 0, + vendor_param = snd_hda_codec_read(codec, spec->vendor_nid, 0, INTEL_GET_VENDOR_VERB, 0); if (vendor_param == -1 || vendor_param & INTEL_EN_ALL_PIN_CVTS) return; vendor_param |= INTEL_EN_ALL_PIN_CVTS; - vendor_param = snd_hda_codec_read(codec, INTEL_VENDOR_NID, 0, + vendor_param = snd_hda_codec_read(codec, spec->vendor_nid, 0, INTEL_SET_VENDOR_VERB, vendor_param); if (vendor_param == -1) return; @@ -2400,8 +2403,9 @@ static void intel_haswell_enable_all_pins(struct hda_codec *codec, static void intel_haswell_fixup_enable_dp12(struct hda_codec *codec) { unsigned int vendor_param; + struct hdmi_spec *spec = codec->spec; - vendor_param = snd_hda_codec_read(codec, INTEL_VENDOR_NID, 0, + vendor_param = snd_hda_codec_read(codec, spec->vendor_nid, 0, INTEL_GET_VENDOR_VERB, 0); if (vendor_param == -1 || vendor_param & INTEL_EN_DP12) return; @@ -2409,7 +2413,7 @@ static void intel_haswell_fixup_enable_dp12(struct hda_codec *codec) /* enable DP1.2 mode */ vendor_param |= INTEL_EN_DP12; snd_hdac_regmap_add_vendor_verb(&codec->core, INTEL_SET_VENDOR_VERB); - snd_hda_codec_write_cache(codec, INTEL_VENDOR_NID, 0, + snd_hda_codec_write_cache(codec, spec->vendor_nid, 0, INTEL_SET_VENDOR_VERB, vendor_param); } @@ -2503,7 +2507,7 @@ static void i915_pin_cvt_fixup(struct hda_codec *codec, } /* Intel Haswell and onwards; audio component with eld notifier */ -static int patch_i915_hsw_hdmi(struct hda_codec *codec) +static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid) { struct hdmi_spec *spec; int err; @@ -2520,6 +2524,7 @@ static int patch_i915_hsw_hdmi(struct hda_codec *codec) spec = codec->spec; codec->dp_mst = true; spec->dyn_pcm_assign = true; + spec->vendor_nid = vendor_nid; intel_haswell_enable_all_pins(codec, true); intel_haswell_fixup_enable_dp12(codec); @@ -2548,6 +2553,16 @@ static int patch_i915_hsw_hdmi(struct hda_codec *codec) return 0; } +static int patch_i915_hsw_hdmi(struct hda_codec *codec) +{ + return intel_hsw_common_init(codec, INTEL_VENDOR_NID); +} + +static int patch_i915_glk_hdmi(struct hda_codec *codec) +{ + return intel_hsw_common_init(codec, INTEL_GLK_VENDOR_NID); +} + /* Intel Baytrail and Braswell; with eld notifier */ static int patch_i915_byt_hdmi(struct hda_codec *codec) { @@ -3800,7 +3815,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), -HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_hsw_hdmi), +HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 299835d1fbaadb5f312ee86502deacd4f7643603..cbeebc0a9711e8283090762450525a84022db600 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1800,6 +1800,7 @@ enum { ALC882_FIXUP_NO_PRIMARY_HP, ALC887_FIXUP_ASUS_BASS, ALC887_FIXUP_BASS_CHMAP, + ALC1220_FIXUP_GB_DUAL_CODECS, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -1962,6 +1963,61 @@ static void alc882_fixup_no_primary_hp(struct hda_codec *codec, static void alc_fixup_bass_chmap(struct hda_codec *codec, const struct hda_fixup *fix, int action); +/* For dual-codec configuration, we need to disable some features to avoid + * conflicts of kctls and PCM streams + */ +static void alc_fixup_dual_codecs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + /* disable vmaster */ + spec->gen.suppress_vmaster = 1; + /* auto-mute and auto-mic switch don't work with multiple codecs */ + spec->gen.suppress_auto_mute = 1; + spec->gen.suppress_auto_mic = 1; + /* disable aamix as well */ + spec->gen.mixer_nid = 0; + /* add location prefix to avoid conflicts */ + codec->force_pin_prefix = 1; +} + +static void rename_ctl(struct hda_codec *codec, const char *oldname, + const char *newname) +{ + struct snd_kcontrol *kctl; + + kctl = snd_hda_find_mixer_ctl(codec, oldname); + if (kctl) + strcpy(kctl->id.name, newname); +} + +static void alc1220_fixup_gb_dual_codecs(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + alc_fixup_dual_codecs(codec, fix, action); + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* override card longname to provide a unique UCM profile */ + strcpy(codec->card->longname, "HDAudio-Gigabyte-ALC1220DualCodecs"); + break; + case HDA_FIXUP_ACT_BUILD: + /* rename Capture controls depending on the codec */ + rename_ctl(codec, "Capture Volume", + codec->addr == 0 ? + "Rear-Panel Capture Volume" : + "Front-Panel Capture Volume"); + rename_ctl(codec, "Capture Switch", + codec->addr == 0 ? + "Rear-Panel Capture Switch" : + "Front-Panel Capture Switch"); + break; + } +} + static const struct hda_fixup alc882_fixups[] = { [ALC882_FIXUP_ABIT_AW9D_MAX] = { .type = HDA_FIXUP_PINS, @@ -2198,6 +2254,10 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_bass_chmap, }, + [ALC1220_FIXUP_GB_DUAL_CODECS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc1220_fixup_gb_dual_codecs, + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2264,9 +2324,11 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), + SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), + SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), + SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), - SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), @@ -2281,6 +2343,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = { {.id = ALC883_FIXUP_ACER_EAPD, .name = "acer-aspire"}, {.id = ALC882_FIXUP_INV_DMIC, .name = "inv-dmic"}, {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"}, + {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"}, {} }; @@ -4663,7 +4726,6 @@ static void alc282_fixup_asus_tx300(struct hda_codec *codec, { 0x1b, 0x21114000 }, /* dock speaker pin */ {} }; - struct snd_kcontrol *kctl; switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: @@ -4678,12 +4740,10 @@ static void alc282_fixup_asus_tx300(struct hda_codec *codec, /* this is a bit tricky; give more sane names for the main * (tablet) speaker and the dock speaker, respectively */ - kctl = snd_hda_find_mixer_ctl(codec, "Speaker Playback Switch"); - if (kctl) - strcpy(kctl->id.name, "Dock Speaker Playback Switch"); - kctl = snd_hda_find_mixer_ctl(codec, "Bass Speaker Playback Switch"); - if (kctl) - strcpy(kctl->id.name, "Speaker Playback Switch"); + rename_ctl(codec, "Speaker Playback Switch", + "Dock Speaker Playback Switch"); + rename_ctl(codec, "Bass Speaker Playback Switch", + "Speaker Playback Switch"); break; } } @@ -4766,6 +4826,30 @@ static void alc280_fixup_hp_9480m(struct hda_codec *codec, } } +static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + alc_fixup_dual_codecs(codec, fix, action); + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* override card longname to provide a unique UCM profile */ + strcpy(codec->card->longname, "HDAudio-Lenovo-DualCodecs"); + break; + case HDA_FIXUP_ACT_BUILD: + /* rename Capture controls depending on the codec */ + rename_ctl(codec, "Capture Volume", + codec->addr == 0 ? + "Rear-Panel Capture Volume" : + "Front-Panel Capture Volume"); + rename_ctl(codec, "Capture Switch", + codec->addr == 0 ? + "Rear-Panel Capture Switch" : + "Front-Panel Capture Switch"); + break; + } +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -4833,6 +4917,8 @@ enum { ALC290_FIXUP_SUBWOOFER_HSJACK, ALC269_FIXUP_THINKPAD_ACPI, ALC269_FIXUP_DMIC_THINKPAD_ACPI, + ALC255_FIXUP_ACER_MIC_NO_PRESENCE, + ALC255_FIXUP_ASUS_MIC_NO_PRESENCE, ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, ALC255_FIXUP_HEADSET_MODE, @@ -4872,6 +4958,12 @@ enum { ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, ALC221_FIXUP_HP_MIC_NO_PRESENCE, + ALC256_FIXUP_ASUS_HEADSET_MODE, + ALC256_FIXUP_ASUS_MIC, + ALC256_FIXUP_ASUS_AIO_GPIO2, + ALC233_FIXUP_ASUS_MIC_NO_PRESENCE, + ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE, + ALC233_FIXUP_LENOVO_MULTI_CODECS, }; static const struct hda_fixup alc269_fixups[] = { @@ -5289,6 +5381,24 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_THINKPAD_ACPI, }, + [ALC255_FIXUP_ACER_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC255_FIXUP_HEADSET_MODE + }, + [ALC255_FIXUP_ASUS_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC255_FIXUP_HEADSET_MODE + }, [ALC255_FIXUP_DELL1_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5579,6 +5689,54 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC256_FIXUP_ASUS_HEADSET_MODE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_mode, + }, + [ALC256_FIXUP_ASUS_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x13, 0x90a60160 }, /* use as internal mic */ + { 0x19, 0x04a11120 }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE + }, + [ALC256_FIXUP_ASUS_AIO_GPIO2] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Set up GPIO2 for the speaker amp */ + { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 }, + { 0x01, AC_VERB_SET_GPIO_DATA, 0x04 }, + {} + }, + }, + [ALC233_FIXUP_ASUS_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, + [ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Enables internal speaker */ + {0x20, AC_VERB_SET_COEF_INDEX, 0x40}, + {0x20, AC_VERB_SET_PROC_COEF, 0x8800}, + {} + }, + .chained = true, + .chain_id = ALC233_FIXUP_ASUS_MIC_NO_PRESENCE + }, + [ALC233_FIXUP_LENOVO_MULTI_CODECS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_alc662_fixup_lenovo_dual_codecs, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5692,15 +5850,28 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC), SND_PCI_QUIRK(0x103c, 0x82bf, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x82c0, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1290, "ASUS X441SA", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), + SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), @@ -5721,6 +5892,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), @@ -5845,6 +6017,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"}, {.id = ALC292_FIXUP_TPT440, .name = "tpt440"}, {.id = ALC292_FIXUP_TPT460, .name = "tpt460"}, + {.id = ALC233_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, {} }; #define ALC225_STANDARD_PINS \ @@ -5875,6 +6048,18 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {0x21, 0x03211020} static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1025, "Acer", ALC255_FIXUP_ACER_MIC_NO_PRESENCE, + {0x12, 0x90a601c0}, + {0x14, 0x90171120}, + {0x21, 0x02211030}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1043, "ASUS", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x14, 0x90170110}, + {0x1b, 0x90a70130}, + {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1043, "ASUS", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x1a, 0x90a70130}, + {0x1b, 0x90170110}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x12, 0xb7a60130}, @@ -5995,6 +6180,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, ALC256_STANDARD_PINS), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC, + {0x14, 0x90170110}, + {0x1b, 0x90a70130}, + {0x21, 0x04211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC, + {0x14, 0x90170110}, + {0x1b, 0x90a70130}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x14, 0x90170110}, @@ -6276,8 +6469,11 @@ static int patch_alc269(struct hda_codec *codec) break; case 0x10ec0225: case 0x10ec0295: + spec->codec_variant = ALC269_TYPE_ALC225; + break; case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; + spec->gen.mixer_nid = 0; /* no loopback on ALC299 */ break; case 0x10ec0234: case 0x10ec0274: @@ -6714,6 +6910,7 @@ enum { ALC668_FIXUP_DELL_DISABLE_AAMIX, ALC668_FIXUP_DELL_XPS13, ALC662_FIXUP_ASUS_Nx50, + ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, ALC668_FIXUP_ASUS_Nx51, ALC891_FIXUP_HEADSET_MODE, ALC891_FIXUP_DELL_MIC_NO_PRESENCE, @@ -6721,6 +6918,7 @@ enum { ALC892_FIXUP_ASROCK_MOBO, ALC662_FIXUP_USI_FUNC, ALC662_FIXUP_USI_HEADSET_MODE, + ALC662_FIXUP_LENOVO_MULTI_CODECS, }; static const struct hda_fixup alc662_fixups[] = { @@ -6967,14 +7165,21 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_BASS_1A }, + [ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_mode_alc668, + .chain_id = ALC662_FIXUP_BASS_CHMAP + }, [ALC668_FIXUP_ASUS_Nx51] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { - {0x1a, 0x90170151}, /* bass speaker */ + { 0x19, 0x03a1913d }, /* use as headphone mic, without its own jack detect */ + { 0x1a, 0x90170151 }, /* bass speaker */ + { 0x1b, 0x03a1113c }, /* use as headset mic, without its own jack detect */ {} }, .chained = true, - .chain_id = ALC662_FIXUP_BASS_CHMAP, + .chain_id = ALC668_FIXUP_ASUS_Nx51_HEADSET_MODE, }, [ALC891_FIXUP_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, @@ -7019,6 +7224,10 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_USI_FUNC }, + [ALC662_FIXUP_LENOVO_MULTI_CODECS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_alc662_fixup_lenovo_dual_codecs, + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -7056,6 +7265,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), + SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO), @@ -7135,6 +7345,7 @@ static const struct hda_model_fixup alc662_fixup_models[] = { {.id = ALC662_FIXUP_ASUS_MODE8, .name = "asus-mode8"}, {.id = ALC662_FIXUP_INV_DMIC, .name = "inv-dmic"}, {.id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, + {.id = ALC662_FIXUP_LENOVO_MULTI_CODECS, .name = "dual-codecs"}, {} }; diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index faa3d38bac0b7e51ab206440e8d17f4f6d751530..6cefdf6c0b758770e2615126285d7cd83d736d64 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1559,6 +1559,8 @@ static const struct snd_pci_quirk stac9200_fixup_tbl[] = { "Dell Inspiron 1501", STAC_9200_DELL_M26), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x01f6, "unknown Dell", STAC_9200_DELL_M26), + SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0201, + "Dell Latitude D430", STAC_9200_DELL_M22), /* Panasonic */ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC), /* Gateway machines needs EAPD to be set on resume */ diff --git a/sound/pci/ice1712/delta.c b/sound/pci/ice1712/delta.c index 3bfdc78cbc5f94b2f2f244c3f05ed1b36a224cbb..da5f37b7fdd0d6f2932d12c9f2baaf379331b52d 100644 --- a/sound/pci/ice1712/delta.c +++ b/sound/pci/ice1712/delta.c @@ -432,7 +432,7 @@ static int snd_ice1712_delta1010lt_wordclock_status_get(struct snd_kcontrol *kco return 0; } -static struct snd_kcontrol_new snd_ice1712_delta1010lt_wordclock_status = +static const struct snd_kcontrol_new snd_ice1712_delta1010lt_wordclock_status = { .access = (SNDRV_CTL_ELEM_ACCESS_READ), .iface = SNDRV_CTL_ELEM_IFACE_MIXER, diff --git a/sound/pci/ice1712/ews.c b/sound/pci/ice1712/ews.c index 5cb587cf360e50cffbc69a352970e3a2d057d6c0..ec07136fc288ca349fa2ab18eac27e3a7e7451eb 100644 --- a/sound/pci/ice1712/ews.c +++ b/sound/pci/ice1712/ews.c @@ -719,7 +719,7 @@ static int snd_ice1712_ews88mt_input_sense_put(struct snd_kcontrol *kcontrol, st return ndata != data; } -static struct snd_kcontrol_new snd_ice1712_ews88mt_input_sense = { +static const struct snd_kcontrol_new snd_ice1712_ews88mt_input_sense = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Input Sensitivity Switch", .info = snd_ice1712_ewx_io_sense_info, @@ -728,7 +728,7 @@ static struct snd_kcontrol_new snd_ice1712_ews88mt_input_sense = { .count = 8, }; -static struct snd_kcontrol_new snd_ice1712_ews88mt_output_sense = { +static const struct snd_kcontrol_new snd_ice1712_ews88mt_output_sense = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Output Sensitivity Switch", .info = snd_ice1712_ewx_io_sense_info, diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index b4aa4c1370a85f9561cf3d40c9af312a95041407..1d8612cabb9e3fada675c005b2706a79fb7e205b 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -279,7 +279,7 @@ static int snd_ice1712_digmix_route_ac97_put(struct snd_kcontrol *kcontrol, stru return val != nval; } -static struct snd_kcontrol_new snd_ice1712_mixer_digmix_route_ac97 = { +static const struct snd_kcontrol_new snd_ice1712_mixer_digmix_route_ac97 = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Digital Mixer To AC97", .info = snd_ice1712_digmix_route_ac97_info, @@ -1410,7 +1410,7 @@ static struct snd_kcontrol_new snd_ice1712_multi_capture_analog_switch = { .private_value = 10, }; -static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_switch = { +static const struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("Multi ", CAPTURE, SWITCH), .info = snd_ice1712_pro_mixer_switch_info, @@ -1432,7 +1432,7 @@ static struct snd_kcontrol_new snd_ice1712_multi_capture_analog_volume = { .tlv = { .p = db_scale_playback } }; -static struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_volume = { +static const struct snd_kcontrol_new snd_ice1712_multi_capture_spdif_volume = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("Multi ", CAPTURE, VOLUME), .info = snd_ice1712_pro_mixer_volume_info, @@ -1630,7 +1630,7 @@ static int snd_ice1712_eeprom_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ice1712_eeprom = { +static const struct snd_kcontrol_new snd_ice1712_eeprom = { .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = "ICE1712 EEPROM", .access = SNDRV_CTL_ELEM_ACCESS_READ, @@ -1666,7 +1666,7 @@ static int snd_ice1712_spdif_default_put(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ice1712_spdif_default = +static const struct snd_kcontrol_new snd_ice1712_spdif_default = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT), @@ -1717,7 +1717,7 @@ static int snd_ice1712_spdif_maskp_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ice1712_spdif_maskc = +static const struct snd_kcontrol_new snd_ice1712_spdif_maskc = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1726,7 +1726,7 @@ static struct snd_kcontrol_new snd_ice1712_spdif_maskc = .get = snd_ice1712_spdif_maskc_get, }; -static struct snd_kcontrol_new snd_ice1712_spdif_maskp = +static const struct snd_kcontrol_new snd_ice1712_spdif_maskp = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1753,7 +1753,7 @@ static int snd_ice1712_spdif_stream_put(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ice1712_spdif_stream = +static const struct snd_kcontrol_new snd_ice1712_spdif_stream = { .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE), @@ -1878,7 +1878,7 @@ static int snd_ice1712_pro_internal_clock_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ice1712_pro_internal_clock = { +static const struct snd_kcontrol_new snd_ice1712_pro_internal_clock = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Internal Clock", .info = snd_ice1712_pro_internal_clock_info, @@ -1943,7 +1943,7 @@ static int snd_ice1712_pro_internal_clock_default_put(struct snd_kcontrol *kcont return change; } -static struct snd_kcontrol_new snd_ice1712_pro_internal_clock_default = { +static const struct snd_kcontrol_new snd_ice1712_pro_internal_clock_default = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Internal Clock Default", .info = snd_ice1712_pro_internal_clock_default_info, @@ -1974,7 +1974,7 @@ static int snd_ice1712_pro_rate_locking_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ice1712_pro_rate_locking = { +static const struct snd_kcontrol_new snd_ice1712_pro_rate_locking = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Rate Locking", .info = snd_ice1712_pro_rate_locking_info, @@ -2005,7 +2005,7 @@ static int snd_ice1712_pro_rate_reset_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ice1712_pro_rate_reset = { +static const struct snd_kcontrol_new snd_ice1712_pro_rate_reset = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Rate Reset", .info = snd_ice1712_pro_rate_reset_info, @@ -2173,7 +2173,7 @@ static struct snd_kcontrol_new snd_ice1712_mixer_pro_analog_route = { .put = snd_ice1712_pro_route_analog_put, }; -static struct snd_kcontrol_new snd_ice1712_mixer_pro_spdif_route = { +static const struct snd_kcontrol_new snd_ice1712_mixer_pro_spdif_route = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, NONE) "Route", .info = snd_ice1712_pro_route_info, @@ -2215,7 +2215,7 @@ static int snd_ice1712_pro_volume_rate_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ice1712_mixer_pro_volume_rate = { +static const struct snd_kcontrol_new snd_ice1712_mixer_pro_volume_rate = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Volume Rate", .info = snd_ice1712_pro_volume_rate_info, @@ -2248,7 +2248,7 @@ static int snd_ice1712_pro_peak_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ice1712_mixer_pro_peak = { +static const struct snd_kcontrol_new snd_ice1712_mixer_pro_peak = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "Multi Track Peak", .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index 842744e7a139bc873ba3b7077a7dcd3ac40f3518..9cd6e55c0642386c835a4d1277e3463e6ef9067d 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -1598,7 +1598,7 @@ static int snd_vt1724_eeprom_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_vt1724_eeprom = { +static const struct snd_kcontrol_new snd_vt1724_eeprom = { .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = "ICE1724 EEPROM", .access = SNDRV_CTL_ELEM_ACCESS_READ, @@ -1711,7 +1711,7 @@ static int snd_vt1724_spdif_default_put(struct snd_kcontrol *kcontrol, return val != old; } -static struct snd_kcontrol_new snd_vt1724_spdif_default = +static const struct snd_kcontrol_new snd_vt1724_spdif_default = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, DEFAULT), @@ -1743,7 +1743,7 @@ static int snd_vt1724_spdif_maskp_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_vt1724_spdif_maskc = +static const struct snd_kcontrol_new snd_vt1724_spdif_maskc = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1752,7 +1752,7 @@ static struct snd_kcontrol_new snd_vt1724_spdif_maskc = .get = snd_vt1724_spdif_maskc_get, }; -static struct snd_kcontrol_new snd_vt1724_spdif_maskp = +static const struct snd_kcontrol_new snd_vt1724_spdif_maskp = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1789,7 +1789,7 @@ static int snd_vt1724_spdif_sw_put(struct snd_kcontrol *kcontrol, return old != val; } -static struct snd_kcontrol_new snd_vt1724_spdif_switch = +static const struct snd_kcontrol_new snd_vt1724_spdif_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, /* FIXME: the following conflict with IEC958 Playback Route */ @@ -1964,7 +1964,7 @@ static int snd_vt1724_pro_internal_clock_put(struct snd_kcontrol *kcontrol, return old_rate != new_rate; } -static struct snd_kcontrol_new snd_vt1724_pro_internal_clock = { +static const struct snd_kcontrol_new snd_vt1724_pro_internal_clock = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Internal Clock", .info = snd_vt1724_pro_internal_clock_info, @@ -1995,7 +1995,7 @@ static int snd_vt1724_pro_rate_locking_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_vt1724_pro_rate_locking = { +static const struct snd_kcontrol_new snd_vt1724_pro_rate_locking = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Rate Locking", .info = snd_vt1724_pro_rate_locking_info, @@ -2026,7 +2026,7 @@ static int snd_vt1724_pro_rate_reset_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_vt1724_pro_rate_reset = { +static const struct snd_kcontrol_new snd_vt1724_pro_rate_reset = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Multi Track Rate Reset", .info = snd_vt1724_pro_rate_reset_info, @@ -2151,7 +2151,7 @@ static struct snd_kcontrol_new snd_vt1724_mixer_pro_analog_route = .put = snd_vt1724_pro_route_analog_put, }; -static struct snd_kcontrol_new snd_vt1724_mixer_pro_spdif_route = { +static const struct snd_kcontrol_new snd_vt1724_mixer_pro_spdif_route = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("", PLAYBACK, NONE) "Route", .info = snd_vt1724_pro_route_info, @@ -2187,7 +2187,7 @@ static int snd_vt1724_pro_peak_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_vt1724_mixer_pro_peak = { +static const struct snd_kcontrol_new snd_vt1724_mixer_pro_peak = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "Multi Track Peak", .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 9720a30dbfff31fb412b385370647927e6ecef3a..6d17b171c17bfeee104eec7c9939aa45b3868f5d 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -40,7 +40,9 @@ #include /* for 440MX workaround */ #include -#include +#ifdef CONFIG_X86 +#include +#endif MODULE_AUTHOR("Jaroslav Kysela "); MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; SiS 7012; Ali 5455"); diff --git a/sound/pci/lola/lola_mixer.c b/sound/pci/lola/lola_mixer.c index e7fe15dd5a90199057e8232a1701dbab3a3b2c49..cb25acf7bc4965a4e2e02e734bf8dba42c334545 100644 --- a/sound/pci/lola/lola_mixer.c +++ b/sound/pci/lola/lola_mixer.c @@ -645,7 +645,7 @@ static int lola_input_src_put(struct snd_kcontrol *kcontrol, return lola_set_src_config(chip, mask, true); } -static struct snd_kcontrol_new lola_input_src_mixer = { +static const struct snd_kcontrol_new lola_input_src_mixer = { .name = "Digital SRC Capture Switch", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = lola_input_src_info, diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index c0f0c349c3ec19fd26ab3955e1a202d9796f611b..f9c3e86d55d5515b2b834fc6e0b96410b65c8ecf 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -899,7 +899,7 @@ static int lx_control_playback_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new lx_control_playback_switch = { +static const struct snd_kcontrol_new lx_control_playback_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Playback Switch", .index = 0, diff --git a/sound/pci/mixart/mixart_mixer.c b/sound/pci/mixart/mixart_mixer.c index 51e53497f0ad1801ab338f548fcc7cbe88ab9a26..4a4616aac78750a4d1825e3a1d4b6134fe7d4ef8 100644 --- a/sound/pci/mixart/mixart_mixer.c +++ b/sound/pci/mixart/mixart_mixer.c @@ -448,7 +448,7 @@ static int mixart_audio_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_ele return changed; } -static struct snd_kcontrol_new mixart_control_output_switch = { +static const struct snd_kcontrol_new mixart_control_output_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Master Playback Switch", .info = mixart_sw_info, /* shared */ @@ -1024,7 +1024,7 @@ static int mixart_monitor_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_ return changed; } -static struct snd_kcontrol_new mixart_control_monitor_vol = { +static const struct snd_kcontrol_new mixart_control_monitor_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ), @@ -1091,7 +1091,7 @@ static int mixart_monitor_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_e return (changed != 0); } -static struct snd_kcontrol_new mixart_control_monitor_sw = { +static const struct snd_kcontrol_new mixart_control_monitor_sw = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitoring Switch", .info = mixart_sw_info, /* shared */ diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index 74afb6b75976c04a049d46aedc1e2b710dabdfe4..e36ed8af55adcb82c7cf2f69186271d7edcb7125 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -767,6 +767,8 @@ static int get_oxygen_model(struct oxygen *chip, [MODEL_FANTASIA] = "TempoTec HiFier Fantasia", [MODEL_SERENADE] = "TempoTec HiFier Serenade", [MODEL_HG2PCI] = "CMI8787-HG2PCI", + [MODEL_XONAR_DG] = "Xonar DG", + [MODEL_XONAR_DGX] = "Xonar DGX", }; chip->model = model_generic; @@ -829,12 +831,8 @@ static int get_oxygen_model(struct oxygen *chip, chip->model.dac_channels_mixer = 2; break; case MODEL_XONAR_DG: - chip->model = model_xonar_dg; - chip->model.shortname = "Xonar DG"; - break; case MODEL_XONAR_DGX: chip->model = model_xonar_dg; - chip->model.shortname = "Xonar DGX"; break; } if (id->driver_data == MODEL_MERIDIAN || diff --git a/sound/pci/pcxhr/pcxhr_mix22.c b/sound/pci/pcxhr/pcxhr_mix22.c index 6a56e5306a6594f5977fcd43d9dc05f3e2cad062..8b4d0282efb8ecde05fc61870e9a2fec226c93c2 100644 --- a/sound/pci/pcxhr/pcxhr_mix22.c +++ b/sound/pci/pcxhr/pcxhr_mix22.c @@ -744,7 +744,7 @@ static int hr222_mic_vol_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new hr222_control_mic_level = { +static const struct snd_kcontrol_new hr222_control_mic_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ), @@ -794,7 +794,7 @@ static int hr222_mic_boost_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new hr222_control_mic_boost = { +static const struct snd_kcontrol_new hr222_control_mic_boost = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ), @@ -836,7 +836,7 @@ static int hr222_phantom_power_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new hr222_phantom_power_switch = { +static const struct snd_kcontrol_new hr222_phantom_power_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Phantom Power Switch", .info = hr222_phantom_power_info, diff --git a/sound/pci/pcxhr/pcxhr_mixer.c b/sound/pci/pcxhr/pcxhr_mixer.c index 63136c4f3f3d29cd3aa449f7a278d3bf45df6b98..36875df30dbf0035287b787dcda5d37ec449b881 100644 --- a/sound/pci/pcxhr/pcxhr_mixer.c +++ b/sound/pci/pcxhr/pcxhr_mixer.c @@ -235,7 +235,7 @@ static int pcxhr_audio_sw_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new pcxhr_control_output_switch = { +static const struct snd_kcontrol_new pcxhr_control_output_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Master Playback Switch", .info = pcxhr_sw_info, /* shared */ @@ -460,7 +460,7 @@ static int pcxhr_pcm_sw_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new pcxhr_control_pcm_switch = { +static const struct snd_kcontrol_new pcxhr_control_pcm_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Playback Switch", .count = PCXHR_PLAYBACK_STREAMS, @@ -509,7 +509,7 @@ static int pcxhr_monitor_vol_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new pcxhr_control_monitor_vol = { +static const struct snd_kcontrol_new pcxhr_control_monitor_vol = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ), @@ -562,7 +562,7 @@ static int pcxhr_monitor_sw_put(struct snd_kcontrol *kcontrol, return (changed != 0); } -static struct snd_kcontrol_new pcxhr_control_monitor_sw = { +static const struct snd_kcontrol_new pcxhr_control_monitor_sw = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitoring Playback Switch", .info = pcxhr_sw_info, /* shared */ @@ -697,7 +697,7 @@ static int pcxhr_audio_src_put(struct snd_kcontrol *kcontrol, return ret; } -static struct snd_kcontrol_new pcxhr_control_audio_src = { +static const struct snd_kcontrol_new pcxhr_control_audio_src = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capture Source", .info = pcxhr_audio_src_info, @@ -798,7 +798,7 @@ static int pcxhr_clock_type_put(struct snd_kcontrol *kcontrol, return ret; } -static struct snd_kcontrol_new pcxhr_control_clock_type = { +static const struct snd_kcontrol_new pcxhr_control_clock_type = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Clock Mode", .info = pcxhr_clock_type_info, @@ -842,7 +842,7 @@ static int pcxhr_clock_rate_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new pcxhr_control_clock_rate = { +static const struct snd_kcontrol_new pcxhr_control_clock_rate = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_CARD, .name = "Clock Rates", @@ -1017,14 +1017,14 @@ static int pcxhr_iec958_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new pcxhr_control_playback_iec958_mask = { +static const struct snd_kcontrol_new pcxhr_control_playback_iec958_mask = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,MASK), .info = pcxhr_iec958_info, .get = pcxhr_iec958_mask_get }; -static struct snd_kcontrol_new pcxhr_control_playback_iec958 = { +static const struct snd_kcontrol_new pcxhr_control_playback_iec958 = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), .info = pcxhr_iec958_info, @@ -1033,14 +1033,14 @@ static struct snd_kcontrol_new pcxhr_control_playback_iec958 = { .private_value = 0 /* playback */ }; -static struct snd_kcontrol_new pcxhr_control_capture_iec958_mask = { +static const struct snd_kcontrol_new pcxhr_control_capture_iec958_mask = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",CAPTURE,MASK), .info = pcxhr_iec958_info, .get = pcxhr_iec958_mask_get }; -static struct snd_kcontrol_new pcxhr_control_capture_iec958 = { +static const struct snd_kcontrol_new pcxhr_control_capture_iec958 = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",CAPTURE,DEFAULT), diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index 19c9df6b0f3df2e5cc0b431c816e652b82dacc3a..f067c76d77f82978469f8d58aa0bc6960df95a7f 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -137,12 +137,12 @@ MODULE_PARM_DESC(id, "ID string for Riptide soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Riptide soundcard."); #ifdef SUPPORT_JOYSTICK -module_param_array(joystick_port, int, NULL, 0444); +module_param_hw_array(joystick_port, int, ioport, NULL, 0444); MODULE_PARM_DESC(joystick_port, "Joystick port # for Riptide soundcard."); #endif -module_param_array(mpu_port, int, NULL, 0444); +module_param_hw_array(mpu_port, int, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU401 port # for Riptide driver."); -module_param_array(opl3_port, int, NULL, 0444); +module_param_hw_array(opl3_port, int, ioport, NULL, 0444); MODULE_PARM_DESC(opl3_port, "OPL3 port # for Riptide driver."); /* diff --git a/sound/pci/sonicvibes.c b/sound/pci/sonicvibes.c index a6aa48c5b9693949d2544a285d1a13685ec5bf6b..8e3d4ec39c35cbdc5ba231f724403b9e0f33d859 100644 --- a/sound/pci/sonicvibes.c +++ b/sound/pci/sonicvibes.c @@ -66,7 +66,7 @@ module_param_array(reverb, bool, NULL, 0444); MODULE_PARM_DESC(reverb, "Enable reverb (SRAM is present) for S3 SonicVibes soundcard."); module_param_array(mge, bool, NULL, 0444); MODULE_PARM_DESC(mge, "MIC Gain Enable for S3 SonicVibes soundcard."); -module_param(dmaio, uint, 0444); +module_param_hw(dmaio, uint, ioport, 0444); MODULE_PARM_DESC(dmaio, "DDMA i/o base address for S3 SonicVibes soundcard."); /* diff --git a/sound/pci/trident/trident_main.c b/sound/pci/trident/trident_main.c index 92ad2d7a6bf8b72c27ad5371b982e093e2076d23..64d3b8eba4bb55cab9c8aa6959d252a101b6d021 100644 --- a/sound/pci/trident/trident_main.c +++ b/sound/pci/trident/trident_main.c @@ -2356,7 +2356,7 @@ static int snd_trident_spdif_control_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_spdif_control = +static const struct snd_kcontrol_new snd_trident_spdif_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,SWITCH), @@ -2419,7 +2419,7 @@ static int snd_trident_spdif_default_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_spdif_default = +static const struct snd_kcontrol_new snd_trident_spdif_default = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), @@ -2452,7 +2452,7 @@ static int snd_trident_spdif_mask_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_trident_spdif_mask = +static const struct snd_kcontrol_new snd_trident_spdif_mask = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -2514,7 +2514,7 @@ static int snd_trident_spdif_stream_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_spdif_stream = +static const struct snd_kcontrol_new snd_trident_spdif_stream = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -2564,7 +2564,7 @@ static int snd_trident_ac97_control_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_ac97_rear_control = +static const struct snd_kcontrol_new snd_trident_ac97_rear_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Rear Path", @@ -2622,7 +2622,7 @@ static int snd_trident_vol_control_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_vol_music_control = +static const struct snd_kcontrol_new snd_trident_vol_music_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Music Playback Volume", @@ -2633,7 +2633,7 @@ static struct snd_kcontrol_new snd_trident_vol_music_control = .tlv = { .p = db_scale_gvol }, }; -static struct snd_kcontrol_new snd_trident_vol_wave_control = +static const struct snd_kcontrol_new snd_trident_vol_wave_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Wave Playback Volume", @@ -2700,7 +2700,7 @@ static int snd_trident_pcm_vol_control_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_pcm_vol_control = +static const struct snd_kcontrol_new snd_trident_pcm_vol_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Front Playback Volume", @@ -2764,7 +2764,7 @@ static int snd_trident_pcm_pan_control_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_pcm_pan_control = +static const struct snd_kcontrol_new snd_trident_pcm_pan_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Pan Playback Control", @@ -2821,7 +2821,7 @@ static int snd_trident_pcm_rvol_control_put(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_SCALE(db_scale_crvol, -3175, 25, 1); -static struct snd_kcontrol_new snd_trident_pcm_rvol_control = +static const struct snd_kcontrol_new snd_trident_pcm_rvol_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Reverb Playback Volume", @@ -2877,7 +2877,7 @@ static int snd_trident_pcm_cvol_control_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_trident_pcm_cvol_control = +static const struct snd_kcontrol_new snd_trident_pcm_cvol_control = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Chorus Playback Volume", diff --git a/sound/pci/via82xx.c b/sound/pci/via82xx.c index 2d8c14e3f8d29fefb3164fe0e0c3d8b6a5ff8397..b6c84d15b10bbdcbb26ce71dbdd2d895f7a961a0 100644 --- a/sound/pci/via82xx.c +++ b/sound/pci/via82xx.c @@ -92,7 +92,7 @@ module_param(index, int, 0444); MODULE_PARM_DESC(index, "Index value for VIA 82xx bridge."); module_param(id, charp, 0444); MODULE_PARM_DESC(id, "ID string for VIA 82xx bridge."); -module_param(mpu_port, long, 0444); +module_param_hw(mpu_port, long, ioport, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 port. (VT82C686x only)"); #ifdef SUPPORT_JOYSTICK module_param(joystick, bool, 0444); @@ -1683,7 +1683,7 @@ static int snd_via8233_dxs3_spdif_put(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_via8233_dxs3_spdif_control = { +static const struct snd_kcontrol_new snd_via8233_dxs3_spdif_control = { .name = SNDRV_CTL_NAME_IEC958("Output ",NONE,SWITCH), .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = snd_via8233_dxs3_spdif_info, @@ -1772,7 +1772,7 @@ static int snd_via8233_pcmdxs_volume_put(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_SCALE(db_scale_dxs, -4650, 150, 1); -static struct snd_kcontrol_new snd_via8233_pcmdxs_volume_control = { +static const struct snd_kcontrol_new snd_via8233_pcmdxs_volume_control = { .name = "PCM Playback Volume", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | @@ -1783,7 +1783,7 @@ static struct snd_kcontrol_new snd_via8233_pcmdxs_volume_control = { .tlv = { .p = db_scale_dxs } }; -static struct snd_kcontrol_new snd_via8233_dxs_volume_control = { +static const struct snd_kcontrol_new snd_via8233_dxs_volume_control = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .device = 0, /* .subdevice set later */ diff --git a/sound/pci/vx222/vx222_ops.c b/sound/pci/vx222/vx222_ops.c index 8e457ea27f8918e13a5e949d7ee243503b2f294f..7df1663ea510766aa73a6f271412fdbaa2d530bd 100644 --- a/sound/pci/vx222/vx222_ops.c +++ b/sound/pci/vx222/vx222_ops.c @@ -945,7 +945,7 @@ static int vx_mic_level_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_v return 0; } -static struct snd_kcontrol_new vx_control_input_level = { +static const struct snd_kcontrol_new vx_control_input_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ), @@ -956,7 +956,7 @@ static struct snd_kcontrol_new vx_control_input_level = { .tlv = { .p = db_scale_mic }, }; -static struct snd_kcontrol_new vx_control_mic_level = { +static const struct snd_kcontrol_new vx_control_mic_level = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ), diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c index 812e27a1bcbc04b0fbd7fbbab3efd44e41aded48..4faf3e1ed06a78ca3c1a46dc16fd596f51c7752b 100644 --- a/sound/pci/ymfpci/ymfpci.c +++ b/sound/pci/ymfpci/ymfpci.c @@ -55,12 +55,12 @@ module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for the Yamaha DS-1 PCI soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable Yamaha DS-1 soundcard."); -module_param_array(mpu_port, long, NULL, 0444); +module_param_hw_array(mpu_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(mpu_port, "MPU-401 Port."); -module_param_array(fm_port, long, NULL, 0444); +module_param_hw_array(fm_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(fm_port, "FM OPL-3 Port."); #ifdef SUPPORT_JOYSTICK -module_param_array(joystick_port, long, NULL, 0444); +module_param_hw_array(joystick_port, long, ioport, NULL, 0444); MODULE_PARM_DESC(joystick_port, "Joystick port address"); #endif module_param_array(rear_switch, bool, NULL, 0444); diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c index ffee284898b37d0dd2d649b1b7882941909fa00d..fe4ba463b57c5f76f4030c700bb0975eacf057eb 100644 --- a/sound/pci/ymfpci/ymfpci_main.c +++ b/sound/pci/ymfpci/ymfpci_main.c @@ -1316,7 +1316,7 @@ static int snd_ymfpci_spdif_default_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ymfpci_spdif_default = +static const struct snd_kcontrol_new snd_ymfpci_spdif_default = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,DEFAULT), @@ -1344,7 +1344,7 @@ static int snd_ymfpci_spdif_mask_get(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ymfpci_spdif_mask = +static const struct snd_kcontrol_new snd_ymfpci_spdif_mask = { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1391,7 +1391,7 @@ static int snd_ymfpci_spdif_stream_put(struct snd_kcontrol *kcontrol, return change; } -static struct snd_kcontrol_new snd_ymfpci_spdif_stream = +static const struct snd_kcontrol_new snd_ymfpci_spdif_stream = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_INACTIVE, .iface = SNDRV_CTL_ELEM_IFACE_PCM, @@ -1439,7 +1439,7 @@ static int snd_ymfpci_drec_source_put(struct snd_kcontrol *kcontrol, struct snd_ return reg != old_reg; } -static struct snd_kcontrol_new snd_ymfpci_drec_source = { +static const struct snd_kcontrol_new snd_ymfpci_drec_source = { .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Direct Recording Source", @@ -1609,7 +1609,7 @@ static int snd_ymfpci_put_dup4ch(struct snd_kcontrol *kcontrol, struct snd_ctl_e return change; } -static struct snd_kcontrol_new snd_ymfpci_dup4ch = { +static const struct snd_kcontrol_new snd_ymfpci_dup4ch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "4ch Duplication", .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, @@ -1712,7 +1712,7 @@ static int snd_ymfpci_gpio_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_ return 0; } -static struct snd_kcontrol_new snd_ymfpci_rear_shared = { +static const struct snd_kcontrol_new snd_ymfpci_rear_shared = { .name = "Shared Rear/Line-In Switch", .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .info = snd_ymfpci_gpio_sw_info, @@ -1776,7 +1776,7 @@ static int snd_ymfpci_pcm_vol_put(struct snd_kcontrol *kcontrol, return 0; } -static struct snd_kcontrol_new snd_ymfpci_pcm_volume = { +static const struct snd_kcontrol_new snd_ymfpci_pcm_volume = { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Playback Volume", .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index 182d92efc7c80166edea6374456df776aa8079e9..c0abad2067e14a07feb3eee54209a3467517040c 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -47,6 +47,7 @@ source "sound/soc/cirrus/Kconfig" source "sound/soc/davinci/Kconfig" source "sound/soc/dwc/Kconfig" source "sound/soc/fsl/Kconfig" +source "sound/soc/hisilicon/Kconfig" source "sound/soc/jz4740/Kconfig" source "sound/soc/nuc900/Kconfig" source "sound/soc/omap/Kconfig" @@ -63,6 +64,7 @@ source "sound/soc/sh/Kconfig" source "sound/soc/sirf/Kconfig" source "sound/soc/spear/Kconfig" source "sound/soc/sti/Kconfig" +source "sound/soc/stm/Kconfig" source "sound/soc/sunxi/Kconfig" source "sound/soc/tegra/Kconfig" source "sound/soc/txx9/Kconfig" diff --git a/sound/soc/Makefile b/sound/soc/Makefile index 9a30f21d16ee8efb2ddfcb84e757245e1b395bee..39c27a58158db38c3f4db2a72af842747f7ef830 100644 --- a/sound/soc/Makefile +++ b/sound/soc/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_SND_SOC) += cirrus/ obj-$(CONFIG_SND_SOC) += davinci/ obj-$(CONFIG_SND_SOC) += dwc/ obj-$(CONFIG_SND_SOC) += fsl/ +obj-$(CONFIG_SND_SOC) += hisilicon/ obj-$(CONFIG_SND_SOC) += jz4740/ obj-$(CONFIG_SND_SOC) += img/ obj-$(CONFIG_SND_SOC) += intel/ @@ -43,6 +44,7 @@ obj-$(CONFIG_SND_SOC) += sh/ obj-$(CONFIG_SND_SOC) += sirf/ obj-$(CONFIG_SND_SOC) += spear/ obj-$(CONFIG_SND_SOC) += sti/ +obj-$(CONFIG_SND_SOC) += stm/ obj-$(CONFIG_SND_SOC) += sunxi/ obj-$(CONFIG_SND_SOC) += tegra/ obj-$(CONFIG_SND_SOC) += txx9/ diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index 7ae46c2647d453bcad1176b6877fcbbae110416b..b7ef8c59b49a2bdb2895f2203e0c207170227c6f 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -301,6 +301,14 @@ static int atmel_classd_codec_probe(struct snd_soc_codec *codec) return 0; } +static int atmel_classd_codec_resume(struct snd_soc_codec *codec) +{ + struct snd_soc_card *card = snd_soc_codec_get_drvdata(codec); + struct atmel_classd *dd = snd_soc_card_get_drvdata(card); + + return regcache_sync(dd->regmap); +} + static struct regmap *atmel_classd_codec_get_remap(struct device *dev) { return dev_get_regmap(dev, NULL); @@ -308,6 +316,7 @@ static struct regmap *atmel_classd_codec_get_remap(struct device *dev) static struct snd_soc_codec_driver soc_codec_dev_classd = { .probe = atmel_classd_codec_probe, + .resume = atmel_classd_codec_resume, .get_regmap = atmel_classd_codec_get_remap, .component_driver = { .controls = atmel_classd_snd_controls, diff --git a/sound/soc/blackfin/bfin-eval-adau1373.c b/sound/soc/blackfin/bfin-eval-adau1373.c index 72ac789884262bbdc41e63adfcabe1dee3911814..64b88fdc1f6cfc3af2bcc5500a70731c12d2f462 100644 --- a/sound/soc/blackfin/bfin-eval-adau1373.c +++ b/sound/soc/blackfin/bfin-eval-adau1373.c @@ -119,7 +119,7 @@ static int bfin_eval_adau1373_codec_init(struct snd_soc_pcm_runtime *rtd) return ret; } -static struct snd_soc_ops bfin_eval_adau1373_ops = { +static const struct snd_soc_ops bfin_eval_adau1373_ops = { .hw_params = bfin_eval_adau1373_hw_params, }; diff --git a/sound/soc/blackfin/bfin-eval-adav80x.c b/sound/soc/blackfin/bfin-eval-adav80x.c index 1037477d10b2f962cf6d7ea13844d01539bdeafb..99e5ecabdcda3c23d8beef43bdca4ccd56b0e160 100644 --- a/sound/soc/blackfin/bfin-eval-adav80x.c +++ b/sound/soc/blackfin/bfin-eval-adav80x.c @@ -64,7 +64,7 @@ static int bfin_eval_adav80x_codec_init(struct snd_soc_pcm_runtime *rtd) return 0; } -static struct snd_soc_ops bfin_eval_adav80x_ops = { +static const struct snd_soc_ops bfin_eval_adav80x_ops = { .hw_params = bfin_eval_adav80x_hw_params, }; diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index e49e9da7f1f6e41028a0ca832e76e7dce0bc1c01..883ed4c8a5510fb9090ad1889976003960781d89 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -49,6 +49,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_CS35L32 if I2C select SND_SOC_CS35L33 if I2C select SND_SOC_CS35L34 if I2C + select SND_SOC_CS35L35 if I2C select SND_SOC_CS42L42 if I2C select SND_SOC_CS42L51_I2C if I2C select SND_SOC_CS42L52 if I2C && INPUT @@ -69,9 +70,11 @@ config SND_SOC_ALL_CODECS select SND_SOC_DA7219 if I2C select SND_SOC_DA732X if I2C select SND_SOC_DA9055 if I2C + select SND_SOC_DIO2125 select SND_SOC_DMIC select SND_SOC_ES8328_SPI if SPI_MASTER select SND_SOC_ES8328_I2C if I2C + select SND_SOC_ES7134 select SND_SOC_GTM601 select SND_SOC_HDAC_HDMI select SND_SOC_ICS43432 @@ -89,6 +92,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_MAX9867 if I2C select SND_SOC_MAX98925 if I2C select SND_SOC_MAX98926 if I2C + select SND_SOC_MAX98927 if I2C select SND_SOC_MAX9850 if I2C select SND_SOC_MAX9860 if I2C select SND_SOC_MAX9768 if I2C @@ -97,6 +101,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_ML26124 if I2C select SND_SOC_NAU8540 if I2C select SND_SOC_NAU8810 if I2C + select SND_SOC_NAU8824 if I2C select SND_SOC_NAU8825 if I2C select SND_SOC_HDMI_CODEC select SND_SOC_PCM1681 if I2C @@ -303,12 +308,14 @@ config SND_SOC_ADAU1761 select SND_SOC_ADAU17X1 config SND_SOC_ADAU1761_I2C - tristate + tristate "Analog Devices AU1761 CODEC - I2C" + depends on I2C select SND_SOC_ADAU1761 select REGMAP_I2C config SND_SOC_ADAU1761_SPI - tristate + tristate "Analog Devices AU1761 CODEC - SPI" + depends on SPI select SND_SOC_ADAU1761 select REGMAP_SPI @@ -408,6 +415,10 @@ config SND_SOC_CS35L34 tristate "Cirrus Logic CS35L34 CODEC" depends on I2C +config SND_SOC_CS35L35 + tristate "Cirrus Logic CS35L35 CODEC" + depends on I2C + config SND_SOC_CS42L42 tristate "Cirrus Logic CS42L42 CODEC" depends on I2C @@ -516,6 +527,10 @@ config SND_SOC_DA732X config SND_SOC_DA9055 tristate +config SND_SOC_DIO2125 + tristate "Dioo DIO2125 Amplifier" + select GPIOLIB + config SND_SOC_DMIC tristate @@ -525,6 +540,9 @@ config SND_SOC_HDMI_CODEC select SND_PCM_IEC958 select HDMI +config SND_SOC_ES7134 + tristate "Everest Semi ES7134 CODEC" + config SND_SOC_ES8328 tristate @@ -588,6 +606,10 @@ config SND_SOC_MAX98925 config SND_SOC_MAX98926 tristate +config SND_SOC_MAX98927 + tristate "Maxim Integrated MAX98927 Speaker Amplifier" + depends on I2C + config SND_SOC_MAX9850 tristate @@ -1116,6 +1138,10 @@ config SND_SOC_NAU8810 tristate "Nuvoton Technology Corporation NAU88C10 CODEC" depends on I2C +config SND_SOC_NAU8824 + tristate "Nuvoton Technology Corporation NAU88L24 CODEC" + depends on I2C + config SND_SOC_NAU8825 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 1796cb987e712c78b0fe8f3aa8ec71177256b5b2..28a63fdaf982fdf6b15afcdc56fd11f89698c250 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -39,6 +39,7 @@ snd-soc-cq93vc-objs := cq93vc.o snd-soc-cs35l32-objs := cs35l32.o snd-soc-cs35l33-objs := cs35l33.o snd-soc-cs35l34-objs := cs35l34.o +snd-soc-cs35l35-objs := cs35l35.o snd-soc-cs42l42-objs := cs42l42.o snd-soc-cs42l51-objs := cs42l51.o snd-soc-cs42l51-i2c-objs := cs42l51-i2c.o @@ -63,6 +64,7 @@ snd-soc-da7219-objs := da7219.o da7219-aad.o snd-soc-da732x-objs := da732x.o snd-soc-da9055-objs := da9055.o snd-soc-dmic-objs := dmic.o +snd-soc-es7134-objs := es7134.o snd-soc-es8328-objs := es8328.o snd-soc-es8328-i2c-objs := es8328-i2c.o snd-soc-es8328-spi-objs := es8328-spi.o @@ -84,6 +86,7 @@ snd-soc-max98371-objs := max98371.o snd-soc-max9867-objs := max9867.o snd-soc-max98925-objs := max98925.o snd-soc-max98926-objs := max98926.o +snd-soc-max98927-objs := max98927.o snd-soc-max9850-objs := max9850.o snd-soc-max9860-objs := max9860.o snd-soc-mc13783-objs := mc13783.o @@ -92,6 +95,7 @@ snd-soc-msm8916-analog-objs := msm8916-wcd-analog.o snd-soc-msm8916-digital-objs := msm8916-wcd-digital.o snd-soc-nau8540-objs := nau8540.o snd-soc-nau8810-objs := nau8810.o +snd-soc-nau8824-objs := nau8824.o snd-soc-nau8825-objs := nau8825.o snd-soc-hdmi-codec-objs := hdmi-codec.o snd-soc-pcm1681-objs := pcm1681.o @@ -221,6 +225,7 @@ snd-soc-wm9712-objs := wm9712.o snd-soc-wm9713-objs := wm9713.o snd-soc-wm-hubs-objs := wm_hubs.o # Amp +snd-soc-dio2125-objs := dio2125.o snd-soc-max9877-objs := max9877.o snd-soc-max98504-objs := max98504.o snd-soc-tpa6130a2-objs := tpa6130a2.o @@ -269,6 +274,7 @@ obj-$(CONFIG_SND_SOC_CQ0093VC) += snd-soc-cq93vc.o obj-$(CONFIG_SND_SOC_CS35L32) += snd-soc-cs35l32.o obj-$(CONFIG_SND_SOC_CS35L33) += snd-soc-cs35l33.o obj-$(CONFIG_SND_SOC_CS35L34) += snd-soc-cs35l34.o +obj-$(CONFIG_SND_SOC_CS35L35) += snd-soc-cs35l35.o obj-$(CONFIG_SND_SOC_CS42L42) += snd-soc-cs42l42.o obj-$(CONFIG_SND_SOC_CS42L51) += snd-soc-cs42l51.o obj-$(CONFIG_SND_SOC_CS42L51_I2C) += snd-soc-cs42l51-i2c.o @@ -293,6 +299,7 @@ obj-$(CONFIG_SND_SOC_DA7219) += snd-soc-da7219.o obj-$(CONFIG_SND_SOC_DA732X) += snd-soc-da732x.o obj-$(CONFIG_SND_SOC_DA9055) += snd-soc-da9055.o obj-$(CONFIG_SND_SOC_DMIC) += snd-soc-dmic.o +obj-$(CONFIG_SND_SOC_ES7134) += snd-soc-es7134.o obj-$(CONFIG_SND_SOC_ES8328) += snd-soc-es8328.o obj-$(CONFIG_SND_SOC_ES8328_I2C)+= snd-soc-es8328-i2c.o obj-$(CONFIG_SND_SOC_ES8328_SPI)+= snd-soc-es8328-spi.o @@ -313,6 +320,7 @@ obj-$(CONFIG_SND_SOC_MAX98357A) += snd-soc-max98357a.o obj-$(CONFIG_SND_SOC_MAX9867) += snd-soc-max9867.o obj-$(CONFIG_SND_SOC_MAX98925) += snd-soc-max98925.o obj-$(CONFIG_SND_SOC_MAX98926) += snd-soc-max98926.o +obj-$(CONFIG_SND_SOC_MAX98927) += snd-soc-max98927.o obj-$(CONFIG_SND_SOC_MAX9850) += snd-soc-max9850.o obj-$(CONFIG_SND_SOC_MAX9860) += snd-soc-max9860.o obj-$(CONFIG_SND_SOC_MC13783) += snd-soc-mc13783.o @@ -321,6 +329,7 @@ obj-$(CONFIG_SND_SOC_MSM8916_WCD_ANALOG) +=snd-soc-msm8916-analog.o obj-$(CONFIG_SND_SOC_MSM8916_WCD_DIGITAL) +=snd-soc-msm8916-digital.o obj-$(CONFIG_SND_SOC_NAU8540) += snd-soc-nau8540.o obj-$(CONFIG_SND_SOC_NAU8810) += snd-soc-nau8810.o +obj-$(CONFIG_SND_SOC_NAU8824) += snd-soc-nau8824.o obj-$(CONFIG_SND_SOC_NAU8825) += snd-soc-nau8825.o obj-$(CONFIG_SND_SOC_HDMI_CODEC) += snd-soc-hdmi-codec.o obj-$(CONFIG_SND_SOC_PCM1681) += snd-soc-pcm1681.o @@ -448,6 +457,7 @@ obj-$(CONFIG_SND_SOC_WM_ADSP) += snd-soc-wm-adsp.o obj-$(CONFIG_SND_SOC_WM_HUBS) += snd-soc-wm-hubs.o # Amp +obj-$(CONFIG_SND_SOC_DIO2125) += snd-soc-dio2125.o obj-$(CONFIG_SND_SOC_MAX9877) += snd-soc-max9877.o obj-$(CONFIG_SND_SOC_MAX98504) += snd-soc-max98504.o obj-$(CONFIG_SND_SOC_TPA6130A2) += snd-soc-tpa6130a2.o diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index e819dd8c82fdced146515a9da7c604f783bc2d32..b2dfddead22774cc85a9b4612b8edbfd3ceffa5c 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -75,6 +75,12 @@ #define DFS_DOUBLE_SPEED (1 << 2) #define DFS_QUAD_SPEED (2 << 2) +/* ICTRL */ +#define ICTRL_MASK (0x3) + +/* OCTRL */ +#define OCTRL_MASK (0x3F) + struct ak4613_formats { unsigned int width; unsigned int fmt; @@ -365,8 +371,8 @@ static int ak4613_dai_hw_params(struct snd_pcm_substream *substream, snd_soc_update_bits(codec, CTRL1, FMT_MASK, fmt_ctrl); snd_soc_update_bits(codec, CTRL2, DFS_MASK, ctrl2); - snd_soc_write(codec, ICTRL, priv->ic); - snd_soc_write(codec, OCTRL, priv->oc); + snd_soc_update_bits(codec, ICTRL, ICTRL_MASK, priv->ic); + snd_soc_update_bits(codec, OCTRL, OCTRL_MASK, priv->oc); hw_params_end: if (ret < 0) diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c new file mode 100644 index 0000000000000000000000000000000000000000..f8aef5869b03afad906a0159f5836655f7495c51 --- /dev/null +++ b/sound/soc/codecs/cs35l35.c @@ -0,0 +1,1580 @@ +/* + * cs35l35.c -- CS35L35 ALSA SoC audio driver + * + * Copyright 2017 Cirrus Logic, Inc. + * + * Author: Brian Austin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cs35l35.h" + +/* + * Some fields take zero as a valid value so use a high bit flag that won't + * get written to the device to mark those. + */ +#define CS35L35_VALID_PDATA 0x80000000 + +static const struct reg_default cs35l35_reg[] = { + {CS35L35_PWRCTL1, 0x01}, + {CS35L35_PWRCTL2, 0x11}, + {CS35L35_PWRCTL3, 0x00}, + {CS35L35_CLK_CTL1, 0x04}, + {CS35L35_CLK_CTL2, 0x12}, + {CS35L35_CLK_CTL3, 0xCF}, + {CS35L35_SP_FMT_CTL1, 0x20}, + {CS35L35_SP_FMT_CTL2, 0x00}, + {CS35L35_SP_FMT_CTL3, 0x02}, + {CS35L35_MAG_COMP_CTL, 0x00}, + {CS35L35_AMP_INP_DRV_CTL, 0x01}, + {CS35L35_AMP_DIG_VOL_CTL, 0x12}, + {CS35L35_AMP_DIG_VOL, 0x00}, + {CS35L35_ADV_DIG_VOL, 0x00}, + {CS35L35_PROTECT_CTL, 0x06}, + {CS35L35_AMP_GAIN_AUD_CTL, 0x13}, + {CS35L35_AMP_GAIN_PDM_CTL, 0x00}, + {CS35L35_AMP_GAIN_ADV_CTL, 0x00}, + {CS35L35_GPI_CTL, 0x00}, + {CS35L35_BST_CVTR_V_CTL, 0x00}, + {CS35L35_BST_PEAK_I, 0x07}, + {CS35L35_BST_RAMP_CTL, 0x85}, + {CS35L35_BST_CONV_COEF_1, 0x24}, + {CS35L35_BST_CONV_COEF_2, 0x24}, + {CS35L35_BST_CONV_SLOPE_COMP, 0x4E}, + {CS35L35_BST_CONV_SW_FREQ, 0x04}, + {CS35L35_CLASS_H_CTL, 0x0B}, + {CS35L35_CLASS_H_HEADRM_CTL, 0x0B}, + {CS35L35_CLASS_H_RELEASE_RATE, 0x08}, + {CS35L35_CLASS_H_FET_DRIVE_CTL, 0x41}, + {CS35L35_CLASS_H_VP_CTL, 0xC5}, + {CS35L35_VPBR_CTL, 0x0A}, + {CS35L35_VPBR_VOL_CTL, 0x90}, + {CS35L35_VPBR_TIMING_CTL, 0x6A}, + {CS35L35_VPBR_MODE_VOL_CTL, 0x00}, + {CS35L35_SPKR_MON_CTL, 0xC0}, + {CS35L35_IMON_SCALE_CTL, 0x30}, + {CS35L35_AUDIN_RXLOC_CTL, 0x00}, + {CS35L35_ADVIN_RXLOC_CTL, 0x80}, + {CS35L35_VMON_TXLOC_CTL, 0x00}, + {CS35L35_IMON_TXLOC_CTL, 0x80}, + {CS35L35_VPMON_TXLOC_CTL, 0x04}, + {CS35L35_VBSTMON_TXLOC_CTL, 0x84}, + {CS35L35_VPBR_STATUS_TXLOC_CTL, 0x04}, + {CS35L35_ZERO_FILL_LOC_CTL, 0x00}, + {CS35L35_AUDIN_DEPTH_CTL, 0x0F}, + {CS35L35_SPKMON_DEPTH_CTL, 0x0F}, + {CS35L35_SUPMON_DEPTH_CTL, 0x0F}, + {CS35L35_ZEROFILL_DEPTH_CTL, 0x00}, + {CS35L35_MULT_DEV_SYNCH1, 0x02}, + {CS35L35_MULT_DEV_SYNCH2, 0x80}, + {CS35L35_PROT_RELEASE_CTL, 0x00}, + {CS35L35_DIAG_MODE_REG_LOCK, 0x00}, + {CS35L35_DIAG_MODE_CTL_1, 0x40}, + {CS35L35_DIAG_MODE_CTL_2, 0x00}, + {CS35L35_INT_MASK_1, 0xFF}, + {CS35L35_INT_MASK_2, 0xFF}, + {CS35L35_INT_MASK_3, 0xFF}, + {CS35L35_INT_MASK_4, 0xFF}, + +}; + +static bool cs35l35_volatile_register(struct device *dev, unsigned int reg) +{ + switch (reg) { + case CS35L35_INT_STATUS_1: + case CS35L35_INT_STATUS_2: + case CS35L35_INT_STATUS_3: + case CS35L35_INT_STATUS_4: + case CS35L35_PLL_STATUS: + case CS35L35_OTP_TRIM_STATUS: + return true; + default: + return false; + } +} + +static bool cs35l35_readable_register(struct device *dev, unsigned int reg) +{ + switch (reg) { + case CS35L35_DEVID_AB ... CS35L35_PWRCTL3: + case CS35L35_CLK_CTL1 ... CS35L35_SP_FMT_CTL3: + case CS35L35_MAG_COMP_CTL ... CS35L35_AMP_GAIN_AUD_CTL: + case CS35L35_AMP_GAIN_PDM_CTL ... CS35L35_BST_PEAK_I: + case CS35L35_BST_RAMP_CTL ... CS35L35_BST_CONV_SW_FREQ: + case CS35L35_CLASS_H_CTL ... CS35L35_CLASS_H_VP_CTL: + case CS35L35_CLASS_H_STATUS: + case CS35L35_VPBR_CTL ... CS35L35_VPBR_MODE_VOL_CTL: + case CS35L35_VPBR_ATTEN_STATUS: + case CS35L35_SPKR_MON_CTL: + case CS35L35_IMON_SCALE_CTL ... CS35L35_ZEROFILL_DEPTH_CTL: + case CS35L35_MULT_DEV_SYNCH1 ... CS35L35_PROT_RELEASE_CTL: + case CS35L35_DIAG_MODE_REG_LOCK ... CS35L35_DIAG_MODE_CTL_2: + case CS35L35_INT_MASK_1 ... CS35L35_PLL_STATUS: + case CS35L35_OTP_TRIM_STATUS: + return true; + default: + return false; + } +} + +static bool cs35l35_precious_register(struct device *dev, unsigned int reg) +{ + switch (reg) { + case CS35L35_INT_STATUS_1: + case CS35L35_INT_STATUS_2: + case CS35L35_INT_STATUS_3: + case CS35L35_INT_STATUS_4: + case CS35L35_PLL_STATUS: + case CS35L35_OTP_TRIM_STATUS: + return true; + default: + return false; + } +} + +static int cs35l35_wait_for_pdn(struct cs35l35_private *cs35l35) +{ + int ret; + + if (cs35l35->pdata.ext_bst) { + usleep_range(5000, 5500); + return 0; + } + + reinit_completion(&cs35l35->pdn_done); + + ret = wait_for_completion_timeout(&cs35l35->pdn_done, + msecs_to_jiffies(100)); + if (ret == 0) { + dev_err(cs35l35->dev, "PDN_DONE did not complete\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int cs35l35_sdin_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + int ret = 0; + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL1, + CS35L35_MCLK_DIS_MASK, + 0 << CS35L35_MCLK_DIS_SHIFT); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL1, + CS35L35_DISCHG_FILT_MASK, + 0 << CS35L35_DISCHG_FILT_SHIFT); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL1, + CS35L35_PDN_ALL_MASK, 0); + break; + case SND_SOC_DAPM_POST_PMD: + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL1, + CS35L35_DISCHG_FILT_MASK, + 1 << CS35L35_DISCHG_FILT_SHIFT); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL1, + CS35L35_PDN_ALL_MASK, 1); + + /* Already muted, so disable volume ramp for faster shutdown */ + regmap_update_bits(cs35l35->regmap, CS35L35_AMP_DIG_VOL_CTL, + CS35L35_AMP_DIGSFT_MASK, 0); + + ret = cs35l35_wait_for_pdn(cs35l35); + + regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL1, + CS35L35_MCLK_DIS_MASK, + 1 << CS35L35_MCLK_DIS_SHIFT); + + regmap_update_bits(cs35l35->regmap, CS35L35_AMP_DIG_VOL_CTL, + CS35L35_AMP_DIGSFT_MASK, + 1 << CS35L35_AMP_DIGSFT_SHIFT); + break; + default: + dev_err(codec->dev, "Invalid event = 0x%x\n", event); + ret = -EINVAL; + } + return ret; +} + +static int cs35l35_main_amp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + unsigned int reg[4]; + int i; + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + if (cs35l35->pdata.bst_pdn_fet_on) + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_BST_MASK, + 0 << CS35L35_PDN_BST_FETON_SHIFT); + else + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_BST_MASK, + 0 << CS35L35_PDN_BST_FETOFF_SHIFT); + break; + case SND_SOC_DAPM_POST_PMU: + usleep_range(5000, 5100); + /* If in PDM mode we must use VP for Voltage control */ + if (cs35l35->pdm_mode) + regmap_update_bits(cs35l35->regmap, + CS35L35_BST_CVTR_V_CTL, + CS35L35_BST_CTL_MASK, + 0 << CS35L35_BST_CTL_SHIFT); + + regmap_update_bits(cs35l35->regmap, CS35L35_PROTECT_CTL, + CS35L35_AMP_MUTE_MASK, 0); + + for (i = 0; i < 2; i++) + regmap_bulk_read(cs35l35->regmap, CS35L35_INT_STATUS_1, + ®, ARRAY_SIZE(reg)); + + break; + case SND_SOC_DAPM_PRE_PMD: + regmap_update_bits(cs35l35->regmap, CS35L35_PROTECT_CTL, + CS35L35_AMP_MUTE_MASK, + 1 << CS35L35_AMP_MUTE_SHIFT); + if (cs35l35->pdata.bst_pdn_fet_on) + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_BST_MASK, + 1 << CS35L35_PDN_BST_FETON_SHIFT); + else + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_BST_MASK, + 1 << CS35L35_PDN_BST_FETOFF_SHIFT); + break; + case SND_SOC_DAPM_POST_PMD: + usleep_range(5000, 5100); + /* + * If PDM mode we should switch back to pdata value + * for Voltage control when we go down + */ + if (cs35l35->pdm_mode) + regmap_update_bits(cs35l35->regmap, + CS35L35_BST_CVTR_V_CTL, + CS35L35_BST_CTL_MASK, + cs35l35->pdata.bst_vctl + << CS35L35_BST_CTL_SHIFT); + + break; + default: + dev_err(codec->dev, "Invalid event = 0x%x\n", event); + } + return 0; +} + +static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1); +static DECLARE_TLV_DB_SCALE(dig_vol_tlv, -10200, 50, 0); + +static const struct snd_kcontrol_new cs35l35_aud_controls[] = { + SOC_SINGLE_SX_TLV("Digital Audio Volume", CS35L35_AMP_DIG_VOL, + 0, 0x34, 0xE4, dig_vol_tlv), + SOC_SINGLE_TLV("Analog Audio Volume", CS35L35_AMP_GAIN_AUD_CTL, 0, 19, 0, + amp_gain_tlv), + SOC_SINGLE_TLV("PDM Volume", CS35L35_AMP_GAIN_PDM_CTL, 0, 19, 0, + amp_gain_tlv), +}; + +static const struct snd_kcontrol_new cs35l35_adv_controls[] = { + SOC_SINGLE_SX_TLV("Digital Advisory Volume", CS35L35_ADV_DIG_VOL, + 0, 0x34, 0xE4, dig_vol_tlv), + SOC_SINGLE_TLV("Analog Advisory Volume", CS35L35_AMP_GAIN_ADV_CTL, 0, 19, 0, + amp_gain_tlv), +}; + +static const struct snd_soc_dapm_widget cs35l35_dapm_widgets[] = { + SND_SOC_DAPM_AIF_IN_E("SDIN", NULL, 0, CS35L35_PWRCTL3, 1, 1, + cs35l35_sdin_event, SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_AIF_OUT("SDOUT", NULL, 0, CS35L35_PWRCTL3, 2, 1), + + SND_SOC_DAPM_OUTPUT("SPK"), + + SND_SOC_DAPM_INPUT("VP"), + SND_SOC_DAPM_INPUT("VBST"), + SND_SOC_DAPM_INPUT("ISENSE"), + SND_SOC_DAPM_INPUT("VSENSE"), + + SND_SOC_DAPM_ADC("VMON ADC", NULL, CS35L35_PWRCTL2, 7, 1), + SND_SOC_DAPM_ADC("IMON ADC", NULL, CS35L35_PWRCTL2, 6, 1), + SND_SOC_DAPM_ADC("VPMON ADC", NULL, CS35L35_PWRCTL3, 3, 1), + SND_SOC_DAPM_ADC("VBSTMON ADC", NULL, CS35L35_PWRCTL3, 4, 1), + SND_SOC_DAPM_ADC("CLASS H", NULL, CS35L35_PWRCTL2, 5, 1), + + SND_SOC_DAPM_OUT_DRV_E("Main AMP", CS35L35_PWRCTL2, 0, 1, NULL, 0, + cs35l35_main_amp_event, SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD | SND_SOC_DAPM_POST_PMU | + SND_SOC_DAPM_PRE_PMD), +}; + +static const struct snd_soc_dapm_route cs35l35_audio_map[] = { + {"VPMON ADC", NULL, "VP"}, + {"VBSTMON ADC", NULL, "VBST"}, + {"IMON ADC", NULL, "ISENSE"}, + {"VMON ADC", NULL, "VSENSE"}, + {"SDOUT", NULL, "IMON ADC"}, + {"SDOUT", NULL, "VMON ADC"}, + {"SDOUT", NULL, "VBSTMON ADC"}, + {"SDOUT", NULL, "VPMON ADC"}, + {"AMP Capture", NULL, "SDOUT"}, + + {"SDIN", NULL, "AMP Playback"}, + {"CLASS H", NULL, "SDIN"}, + {"Main AMP", NULL, "CLASS H"}, + {"SPK", NULL, "Main AMP"}, +}; + +static int cs35l35_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL1, + CS35L35_MS_MASK, 1 << CS35L35_MS_SHIFT); + cs35l35->slave_mode = false; + break; + case SND_SOC_DAIFMT_CBS_CFS: + regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL1, + CS35L35_MS_MASK, 0 << CS35L35_MS_SHIFT); + cs35l35->slave_mode = true; + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + cs35l35->i2s_mode = true; + cs35l35->pdm_mode = false; + break; + case SND_SOC_DAIFMT_PDM: + cs35l35->pdm_mode = true; + cs35l35->i2s_mode = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +struct cs35l35_sysclk_config { + int sysclk; + int srate; + u8 clk_cfg; +}; + +static struct cs35l35_sysclk_config cs35l35_clk_ctl[] = { + + /* SYSCLK, Sample Rate, Serial Port Cfg */ + {5644800, 44100, 0x00}, + {5644800, 88200, 0x40}, + {6144000, 48000, 0x10}, + {6144000, 96000, 0x50}, + {11289600, 44100, 0x01}, + {11289600, 88200, 0x41}, + {11289600, 176400, 0x81}, + {12000000, 44100, 0x03}, + {12000000, 48000, 0x13}, + {12000000, 88200, 0x43}, + {12000000, 96000, 0x53}, + {12000000, 176400, 0x83}, + {12000000, 192000, 0x93}, + {12288000, 48000, 0x11}, + {12288000, 96000, 0x51}, + {12288000, 192000, 0x91}, + {13000000, 44100, 0x07}, + {13000000, 48000, 0x17}, + {13000000, 88200, 0x47}, + {13000000, 96000, 0x57}, + {13000000, 176400, 0x87}, + {13000000, 192000, 0x97}, + {22579200, 44100, 0x02}, + {22579200, 88200, 0x42}, + {22579200, 176400, 0x82}, + {24000000, 44100, 0x0B}, + {24000000, 48000, 0x1B}, + {24000000, 88200, 0x4B}, + {24000000, 96000, 0x5B}, + {24000000, 176400, 0x8B}, + {24000000, 192000, 0x9B}, + {24576000, 48000, 0x12}, + {24576000, 96000, 0x52}, + {24576000, 192000, 0x92}, + {26000000, 44100, 0x0F}, + {26000000, 48000, 0x1F}, + {26000000, 88200, 0x4F}, + {26000000, 96000, 0x5F}, + {26000000, 176400, 0x8F}, + {26000000, 192000, 0x9F}, +}; + +static int cs35l35_get_clk_config(int sysclk, int srate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cs35l35_clk_ctl); i++) { + if (cs35l35_clk_ctl[i].sysclk == sysclk && + cs35l35_clk_ctl[i].srate == srate) + return cs35l35_clk_ctl[i].clk_cfg; + } + return -EINVAL; +} + +static int cs35l35_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + struct classh_cfg *classh = &cs35l35->pdata.classh_algo; + int srate = params_rate(params); + int ret = 0; + u8 sp_sclks; + int audin_format; + int errata_chk; + + int clk_ctl = cs35l35_get_clk_config(cs35l35->sysclk, srate); + + if (clk_ctl < 0) { + dev_err(codec->dev, "Invalid CLK:Rate %d:%d\n", + cs35l35->sysclk, srate); + return -EINVAL; + } + + ret = regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL2, + CS35L35_CLK_CTL2_MASK, clk_ctl); + if (ret != 0) { + dev_err(codec->dev, "Failed to set port config %d\n", ret); + return ret; + } + + /* + * Rev A0 Errata + * When configured for the weak-drive detection path (CH_WKFET_DIS = 0) + * the Class H algorithm does not enable weak-drive operation for + * nonzero values of CH_WKFET_DELAY if SP_RATE = 01 or 10 + */ + errata_chk = clk_ctl & CS35L35_SP_RATE_MASK; + + if (classh->classh_wk_fet_disable == 0x00 && + (errata_chk == 0x01 || errata_chk == 0x03)) { + ret = regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_FET_DRIVE_CTL, + CS35L35_CH_WKFET_DEL_MASK, + 0 << CS35L35_CH_WKFET_DEL_SHIFT); + if (ret != 0) { + dev_err(codec->dev, "Failed to set fet config %d\n", + ret); + return ret; + } + } + + /* + * You can pull more Monitor data from the SDOUT pin than going to SDIN + * Just make sure your SCLK is fast enough to fill the frame + */ + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + switch (params_width(params)) { + case 8: + audin_format = CS35L35_SDIN_DEPTH_8; + break; + case 16: + audin_format = CS35L35_SDIN_DEPTH_16; + break; + case 24: + audin_format = CS35L35_SDIN_DEPTH_24; + break; + default: + dev_err(codec->dev, "Unsupported Width %d\n", + params_width(params)); + return -EINVAL; + } + regmap_update_bits(cs35l35->regmap, + CS35L35_AUDIN_DEPTH_CTL, + CS35L35_AUDIN_DEPTH_MASK, + audin_format << + CS35L35_AUDIN_DEPTH_SHIFT); + if (cs35l35->pdata.stereo) { + regmap_update_bits(cs35l35->regmap, + CS35L35_AUDIN_DEPTH_CTL, + CS35L35_ADVIN_DEPTH_MASK, + audin_format << + CS35L35_ADVIN_DEPTH_SHIFT); + } + } + + if (cs35l35->i2s_mode) { + /* We have to take the SCLK to derive num sclks + * to configure the CLOCK_CTL3 register correctly + */ + if ((cs35l35->sclk / srate) % 4) { + dev_err(codec->dev, "Unsupported sclk/fs ratio %d:%d\n", + cs35l35->sclk, srate); + return -EINVAL; + } + sp_sclks = ((cs35l35->sclk / srate) / 4) - 1; + + /* Only certain ratios are supported in I2S Slave Mode */ + if (cs35l35->slave_mode) { + switch (sp_sclks) { + case CS35L35_SP_SCLKS_32FS: + case CS35L35_SP_SCLKS_48FS: + case CS35L35_SP_SCLKS_64FS: + break; + default: + dev_err(codec->dev, "ratio not supported\n"); + return -EINVAL; + } + } else { + /* Only certain ratios supported in I2S MASTER Mode */ + switch (sp_sclks) { + case CS35L35_SP_SCLKS_32FS: + case CS35L35_SP_SCLKS_64FS: + break; + default: + dev_err(codec->dev, "ratio not supported\n"); + return -EINVAL; + } + } + ret = regmap_update_bits(cs35l35->regmap, + CS35L35_CLK_CTL3, + CS35L35_SP_SCLKS_MASK, sp_sclks << + CS35L35_SP_SCLKS_SHIFT); + if (ret != 0) { + dev_err(codec->dev, "Failed to set fsclk %d\n", ret); + return ret; + } + } + + return ret; +} + +static const unsigned int cs35l35_src_rates[] = { + 44100, 48000, 88200, 96000, 176400, 192000 +}; + +static const struct snd_pcm_hw_constraint_list cs35l35_constraints = { + .count = ARRAY_SIZE(cs35l35_src_rates), + .list = cs35l35_src_rates, +}; + +static int cs35l35_pcm_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + + if (!substream->runtime) + return 0; + + snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, &cs35l35_constraints); + + regmap_update_bits(cs35l35->regmap, CS35L35_AMP_INP_DRV_CTL, + CS35L35_PDM_MODE_MASK, + 0 << CS35L35_PDM_MODE_SHIFT); + + return 0; +} + +static const unsigned int cs35l35_pdm_rates[] = { + 44100, 48000, 88200, 96000 +}; + +static const struct snd_pcm_hw_constraint_list cs35l35_pdm_constraints = { + .count = ARRAY_SIZE(cs35l35_pdm_rates), + .list = cs35l35_pdm_rates, +}; + +static int cs35l35_pdm_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + + if (!substream->runtime) + return 0; + + snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + &cs35l35_pdm_constraints); + + regmap_update_bits(cs35l35->regmap, CS35L35_AMP_INP_DRV_CTL, + CS35L35_PDM_MODE_MASK, + 1 << CS35L35_PDM_MODE_SHIFT); + + return 0; +} + +static int cs35l35_dai_set_sysclk(struct snd_soc_dai *dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = dai->codec; + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + + /* Need the SCLK Frequency regardless of sysclk source for I2S */ + cs35l35->sclk = freq; + + return 0; +} + +static const struct snd_soc_dai_ops cs35l35_ops = { + .startup = cs35l35_pcm_startup, + .set_fmt = cs35l35_set_dai_fmt, + .hw_params = cs35l35_hw_params, + .set_sysclk = cs35l35_dai_set_sysclk, +}; + +static const struct snd_soc_dai_ops cs35l35_pdm_ops = { + .startup = cs35l35_pdm_startup, + .set_fmt = cs35l35_set_dai_fmt, + .hw_params = cs35l35_hw_params, +}; + +static struct snd_soc_dai_driver cs35l35_dai[] = { + { + .name = "cs35l35-pcm", + .id = 0, + .playback = { + .stream_name = "AMP Playback", + .channels_min = 1, + .channels_max = 8, + .rates = SNDRV_PCM_RATE_KNOT, + .formats = CS35L35_FORMATS, + }, + .capture = { + .stream_name = "AMP Capture", + .channels_min = 1, + .channels_max = 8, + .rates = SNDRV_PCM_RATE_KNOT, + .formats = CS35L35_FORMATS, + }, + .ops = &cs35l35_ops, + .symmetric_rates = 1, + }, + { + .name = "cs35l35-pdm", + .id = 1, + .playback = { + .stream_name = "PDM Playback", + .channels_min = 1, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_KNOT, + .formats = CS35L35_FORMATS, + }, + .ops = &cs35l35_pdm_ops, + }, +}; + +static int cs35l35_codec_set_sysclk(struct snd_soc_codec *codec, + int clk_id, int source, unsigned int freq, + int dir) +{ + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + int clksrc; + int ret = 0; + + switch (clk_id) { + case 0: + clksrc = CS35L35_CLK_SOURCE_MCLK; + break; + case 1: + clksrc = CS35L35_CLK_SOURCE_SCLK; + break; + case 2: + clksrc = CS35L35_CLK_SOURCE_PDM; + break; + default: + dev_err(codec->dev, "Invalid CLK Source\n"); + return -EINVAL; + } + + switch (freq) { + case 5644800: + case 6144000: + case 11289600: + case 12000000: + case 12288000: + case 13000000: + case 22579200: + case 24000000: + case 24576000: + case 26000000: + cs35l35->sysclk = freq; + break; + default: + dev_err(codec->dev, "Invalid CLK Frequency Input : %d\n", freq); + return -EINVAL; + } + + ret = regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL1, + CS35L35_CLK_SOURCE_MASK, + clksrc << CS35L35_CLK_SOURCE_SHIFT); + if (ret != 0) { + dev_err(codec->dev, "Failed to set sysclk %d\n", ret); + return ret; + } + + return ret; +} + +static int cs35l35_codec_probe(struct snd_soc_codec *codec) +{ + struct cs35l35_private *cs35l35 = snd_soc_codec_get_drvdata(codec); + struct classh_cfg *classh = &cs35l35->pdata.classh_algo; + struct monitor_cfg *monitor_config = &cs35l35->pdata.mon_cfg; + int ret; + + /* Set Platform Data */ + if (cs35l35->pdata.bst_vctl) + regmap_update_bits(cs35l35->regmap, CS35L35_BST_CVTR_V_CTL, + CS35L35_BST_CTL_MASK, + cs35l35->pdata.bst_vctl); + + if (cs35l35->pdata.bst_ipk) + regmap_update_bits(cs35l35->regmap, CS35L35_BST_PEAK_I, + CS35L35_BST_IPK_MASK, + cs35l35->pdata.bst_ipk << + CS35L35_BST_IPK_SHIFT); + + if (cs35l35->pdata.gain_zc) + regmap_update_bits(cs35l35->regmap, CS35L35_PROTECT_CTL, + CS35L35_AMP_GAIN_ZC_MASK, + cs35l35->pdata.gain_zc << + CS35L35_AMP_GAIN_ZC_SHIFT); + + if (cs35l35->pdata.aud_channel) + regmap_update_bits(cs35l35->regmap, + CS35L35_AUDIN_RXLOC_CTL, + CS35L35_AUD_IN_LR_MASK, + cs35l35->pdata.aud_channel << + CS35L35_AUD_IN_LR_SHIFT); + + if (cs35l35->pdata.stereo) { + regmap_update_bits(cs35l35->regmap, + CS35L35_ADVIN_RXLOC_CTL, + CS35L35_ADV_IN_LR_MASK, + cs35l35->pdata.adv_channel << + CS35L35_ADV_IN_LR_SHIFT); + if (cs35l35->pdata.shared_bst) + regmap_update_bits(cs35l35->regmap, CS35L35_CLASS_H_CTL, + CS35L35_CH_STEREO_MASK, + 1 << CS35L35_CH_STEREO_SHIFT); + ret = snd_soc_add_codec_controls(codec, cs35l35_adv_controls, + ARRAY_SIZE(cs35l35_adv_controls)); + if (ret) + return ret; + } + + if (cs35l35->pdata.sp_drv_str) + regmap_update_bits(cs35l35->regmap, CS35L35_CLK_CTL1, + CS35L35_SP_DRV_MASK, + cs35l35->pdata.sp_drv_str << + CS35L35_SP_DRV_SHIFT); + if (cs35l35->pdata.sp_drv_unused) + regmap_update_bits(cs35l35->regmap, CS35L35_SP_FMT_CTL3, + CS35L35_SP_I2S_DRV_MASK, + cs35l35->pdata.sp_drv_unused << + CS35L35_SP_I2S_DRV_SHIFT); + + if (classh->classh_algo_enable) { + if (classh->classh_bst_override) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_CTL, + CS35L35_CH_BST_OVR_MASK, + classh->classh_bst_override << + CS35L35_CH_BST_OVR_SHIFT); + if (classh->classh_bst_max_limit) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_CTL, + CS35L35_CH_BST_LIM_MASK, + classh->classh_bst_max_limit << + CS35L35_CH_BST_LIM_SHIFT); + if (classh->classh_mem_depth) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_CTL, + CS35L35_CH_MEM_DEPTH_MASK, + classh->classh_mem_depth << + CS35L35_CH_MEM_DEPTH_SHIFT); + if (classh->classh_headroom) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_HEADRM_CTL, + CS35L35_CH_HDRM_CTL_MASK, + classh->classh_headroom << + CS35L35_CH_HDRM_CTL_SHIFT); + if (classh->classh_release_rate) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_RELEASE_RATE, + CS35L35_CH_REL_RATE_MASK, + classh->classh_release_rate << + CS35L35_CH_REL_RATE_SHIFT); + if (classh->classh_wk_fet_disable) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_FET_DRIVE_CTL, + CS35L35_CH_WKFET_DIS_MASK, + classh->classh_wk_fet_disable << + CS35L35_CH_WKFET_DIS_SHIFT); + if (classh->classh_wk_fet_delay) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_FET_DRIVE_CTL, + CS35L35_CH_WKFET_DEL_MASK, + classh->classh_wk_fet_delay << + CS35L35_CH_WKFET_DEL_SHIFT); + if (classh->classh_wk_fet_thld) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_FET_DRIVE_CTL, + CS35L35_CH_WKFET_THLD_MASK, + classh->classh_wk_fet_thld << + CS35L35_CH_WKFET_THLD_SHIFT); + if (classh->classh_vpch_auto) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_VP_CTL, + CS35L35_CH_VP_AUTO_MASK, + classh->classh_vpch_auto << + CS35L35_CH_VP_AUTO_SHIFT); + if (classh->classh_vpch_rate) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_VP_CTL, + CS35L35_CH_VP_RATE_MASK, + classh->classh_vpch_rate << + CS35L35_CH_VP_RATE_SHIFT); + if (classh->classh_vpch_man) + regmap_update_bits(cs35l35->regmap, + CS35L35_CLASS_H_VP_CTL, + CS35L35_CH_VP_MAN_MASK, + classh->classh_vpch_man << + CS35L35_CH_VP_MAN_SHIFT); + } + + if (monitor_config->is_present) { + if (monitor_config->vmon_specs) { + regmap_update_bits(cs35l35->regmap, + CS35L35_SPKMON_DEPTH_CTL, + CS35L35_VMON_DEPTH_MASK, + monitor_config->vmon_dpth << + CS35L35_VMON_DEPTH_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VMON_TXLOC_CTL, + CS35L35_MON_TXLOC_MASK, + monitor_config->vmon_loc << + CS35L35_MON_TXLOC_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VMON_TXLOC_CTL, + CS35L35_MON_FRM_MASK, + monitor_config->vmon_frm << + CS35L35_MON_FRM_SHIFT); + } + if (monitor_config->imon_specs) { + regmap_update_bits(cs35l35->regmap, + CS35L35_SPKMON_DEPTH_CTL, + CS35L35_IMON_DEPTH_MASK, + monitor_config->imon_dpth << + CS35L35_IMON_DEPTH_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_IMON_TXLOC_CTL, + CS35L35_MON_TXLOC_MASK, + monitor_config->imon_loc << + CS35L35_MON_TXLOC_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_IMON_TXLOC_CTL, + CS35L35_MON_FRM_MASK, + monitor_config->imon_frm << + CS35L35_MON_FRM_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_IMON_SCALE_CTL, + CS35L35_IMON_SCALE_MASK, + monitor_config->imon_scale << + CS35L35_IMON_SCALE_SHIFT); + } + if (monitor_config->vpmon_specs) { + regmap_update_bits(cs35l35->regmap, + CS35L35_SUPMON_DEPTH_CTL, + CS35L35_VPMON_DEPTH_MASK, + monitor_config->vpmon_dpth << + CS35L35_VPMON_DEPTH_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VPMON_TXLOC_CTL, + CS35L35_MON_TXLOC_MASK, + monitor_config->vpmon_loc << + CS35L35_MON_TXLOC_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VPMON_TXLOC_CTL, + CS35L35_MON_FRM_MASK, + monitor_config->vpmon_frm << + CS35L35_MON_FRM_SHIFT); + } + if (monitor_config->vbstmon_specs) { + regmap_update_bits(cs35l35->regmap, + CS35L35_SUPMON_DEPTH_CTL, + CS35L35_VBSTMON_DEPTH_MASK, + monitor_config->vpmon_dpth << + CS35L35_VBSTMON_DEPTH_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VBSTMON_TXLOC_CTL, + CS35L35_MON_TXLOC_MASK, + monitor_config->vbstmon_loc << + CS35L35_MON_TXLOC_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VBSTMON_TXLOC_CTL, + CS35L35_MON_FRM_MASK, + monitor_config->vbstmon_frm << + CS35L35_MON_FRM_SHIFT); + } + if (monitor_config->vpbrstat_specs) { + regmap_update_bits(cs35l35->regmap, + CS35L35_SUPMON_DEPTH_CTL, + CS35L35_VPBRSTAT_DEPTH_MASK, + monitor_config->vpbrstat_dpth << + CS35L35_VPBRSTAT_DEPTH_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VPBR_STATUS_TXLOC_CTL, + CS35L35_MON_TXLOC_MASK, + monitor_config->vpbrstat_loc << + CS35L35_MON_TXLOC_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_VPBR_STATUS_TXLOC_CTL, + CS35L35_MON_FRM_MASK, + monitor_config->vpbrstat_frm << + CS35L35_MON_FRM_SHIFT); + } + if (monitor_config->zerofill_specs) { + regmap_update_bits(cs35l35->regmap, + CS35L35_SUPMON_DEPTH_CTL, + CS35L35_ZEROFILL_DEPTH_MASK, + monitor_config->zerofill_dpth << + CS35L35_ZEROFILL_DEPTH_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_ZERO_FILL_LOC_CTL, + CS35L35_MON_TXLOC_MASK, + monitor_config->zerofill_loc << + CS35L35_MON_TXLOC_SHIFT); + regmap_update_bits(cs35l35->regmap, + CS35L35_ZERO_FILL_LOC_CTL, + CS35L35_MON_FRM_MASK, + monitor_config->zerofill_frm << + CS35L35_MON_FRM_SHIFT); + } + } + + return 0; +} + +static struct snd_soc_codec_driver soc_codec_dev_cs35l35 = { + .probe = cs35l35_codec_probe, + .set_sysclk = cs35l35_codec_set_sysclk, + .component_driver = { + .dapm_widgets = cs35l35_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(cs35l35_dapm_widgets), + + .dapm_routes = cs35l35_audio_map, + .num_dapm_routes = ARRAY_SIZE(cs35l35_audio_map), + + .controls = cs35l35_aud_controls, + .num_controls = ARRAY_SIZE(cs35l35_aud_controls), + }, + +}; + +static struct regmap_config cs35l35_regmap = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = CS35L35_MAX_REGISTER, + .reg_defaults = cs35l35_reg, + .num_reg_defaults = ARRAY_SIZE(cs35l35_reg), + .volatile_reg = cs35l35_volatile_register, + .readable_reg = cs35l35_readable_register, + .precious_reg = cs35l35_precious_register, + .cache_type = REGCACHE_RBTREE, +}; + +static irqreturn_t cs35l35_irq(int irq, void *data) +{ + struct cs35l35_private *cs35l35 = data; + unsigned int sticky1, sticky2, sticky3, sticky4; + unsigned int mask1, mask2, mask3, mask4, current1; + + /* ack the irq by reading all status registers */ + regmap_read(cs35l35->regmap, CS35L35_INT_STATUS_4, &sticky4); + regmap_read(cs35l35->regmap, CS35L35_INT_STATUS_3, &sticky3); + regmap_read(cs35l35->regmap, CS35L35_INT_STATUS_2, &sticky2); + regmap_read(cs35l35->regmap, CS35L35_INT_STATUS_1, &sticky1); + + regmap_read(cs35l35->regmap, CS35L35_INT_MASK_4, &mask4); + regmap_read(cs35l35->regmap, CS35L35_INT_MASK_3, &mask3); + regmap_read(cs35l35->regmap, CS35L35_INT_MASK_2, &mask2); + regmap_read(cs35l35->regmap, CS35L35_INT_MASK_1, &mask1); + + /* Check to see if unmasked bits are active */ + if (!(sticky1 & ~mask1) && !(sticky2 & ~mask2) && !(sticky3 & ~mask3) + && !(sticky4 & ~mask4)) + return IRQ_NONE; + + if (sticky2 & CS35L35_PDN_DONE) + complete(&cs35l35->pdn_done); + + /* read the current values */ + regmap_read(cs35l35->regmap, CS35L35_INT_STATUS_1, ¤t1); + + /* handle the interrupts */ + if (sticky1 & CS35L35_CAL_ERR) { + dev_crit(cs35l35->dev, "Calibration Error\n"); + + /* error is no longer asserted; safe to reset */ + if (!(current1 & CS35L35_CAL_ERR)) { + pr_debug("%s : Cal error release\n", __func__); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_CAL_ERR_RLS, 0); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_CAL_ERR_RLS, + CS35L35_CAL_ERR_RLS); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_CAL_ERR_RLS, 0); + } + } + + if (sticky1 & CS35L35_AMP_SHORT) { + dev_crit(cs35l35->dev, "AMP Short Error\n"); + /* error is no longer asserted; safe to reset */ + if (!(current1 & CS35L35_AMP_SHORT)) { + dev_dbg(cs35l35->dev, "Amp short error release\n"); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_SHORT_RLS, 0); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_SHORT_RLS, + CS35L35_SHORT_RLS); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_SHORT_RLS, 0); + } + } + + if (sticky1 & CS35L35_OTW) { + dev_warn(cs35l35->dev, "Over temperature warning\n"); + + /* error is no longer asserted; safe to reset */ + if (!(current1 & CS35L35_OTW)) { + dev_dbg(cs35l35->dev, "Over temperature warn release\n"); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_OTW_RLS, 0); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_OTW_RLS, + CS35L35_OTW_RLS); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_OTW_RLS, 0); + } + } + + if (sticky1 & CS35L35_OTE) { + dev_crit(cs35l35->dev, "Over temperature error\n"); + /* error is no longer asserted; safe to reset */ + if (!(current1 & CS35L35_OTE)) { + dev_dbg(cs35l35->dev, "Over temperature error release\n"); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_OTE_RLS, 0); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_OTE_RLS, + CS35L35_OTE_RLS); + regmap_update_bits(cs35l35->regmap, + CS35L35_PROT_RELEASE_CTL, + CS35L35_OTE_RLS, 0); + } + } + + if (sticky3 & CS35L35_BST_HIGH) { + dev_crit(cs35l35->dev, "VBST error: powering off!\n"); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_AMP, CS35L35_PDN_AMP); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL1, + CS35L35_PDN_ALL, CS35L35_PDN_ALL); + } + + if (sticky3 & CS35L35_LBST_SHORT) { + dev_crit(cs35l35->dev, "LBST error: powering off!\n"); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_AMP, CS35L35_PDN_AMP); + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL1, + CS35L35_PDN_ALL, CS35L35_PDN_ALL); + } + + if (sticky2 & CS35L35_VPBR_ERR) + dev_dbg(cs35l35->dev, "Error: Reactive Brownout\n"); + + if (sticky4 & CS35L35_VMON_OVFL) + dev_dbg(cs35l35->dev, "Error: VMON overflow\n"); + + if (sticky4 & CS35L35_IMON_OVFL) + dev_dbg(cs35l35->dev, "Error: IMON overflow\n"); + + return IRQ_HANDLED; +} + + +static int cs35l35_handle_of_data(struct i2c_client *i2c_client, + struct cs35l35_platform_data *pdata) +{ + struct device_node *np = i2c_client->dev.of_node; + struct device_node *classh, *signal_format; + struct classh_cfg *classh_config = &pdata->classh_algo; + struct monitor_cfg *monitor_config = &pdata->mon_cfg; + unsigned int val32 = 0; + u8 monitor_array[4]; + const int imon_array_size = ARRAY_SIZE(monitor_array); + const int mon_array_size = imon_array_size - 1; + int ret = 0; + + if (!np) + return 0; + + pdata->bst_pdn_fet_on = of_property_read_bool(np, + "cirrus,boost-pdn-fet-on"); + + ret = of_property_read_u32(np, "cirrus,boost-ctl-millivolt", &val32); + if (ret >= 0) { + if (val32 < 2600 || val32 > 9000) { + dev_err(&i2c_client->dev, + "Invalid Boost Voltage %d mV\n", val32); + return -EINVAL; + } + pdata->bst_vctl = ((val32 - 2600) / 100) + 1; + } + + ret = of_property_read_u32(np, "cirrus,boost-peak-milliamp", &val32); + if (ret >= 0) { + if (val32 < 1680 || val32 > 4480) { + dev_err(&i2c_client->dev, + "Invalid Boost Peak Current %u mA\n", val32); + return -EINVAL; + } + + pdata->bst_ipk = (val32 - 1680) / 110; + } + + if (of_property_read_u32(np, "cirrus,sp-drv-strength", &val32) >= 0) + pdata->sp_drv_str = val32; + if (of_property_read_u32(np, "cirrus,sp-drv-unused", &val32) >= 0) + pdata->sp_drv_unused = val32 | CS35L35_VALID_PDATA; + + pdata->stereo = of_property_read_bool(np, "cirrus,stereo-config"); + + if (pdata->stereo) { + ret = of_property_read_u32(np, "cirrus,audio-channel", &val32); + if (ret >= 0) + pdata->aud_channel = val32; + + ret = of_property_read_u32(np, "cirrus,advisory-channel", + &val32); + if (ret >= 0) + pdata->adv_channel = val32; + + pdata->shared_bst = of_property_read_bool(np, + "cirrus,shared-boost"); + } + + pdata->ext_bst = of_property_read_bool(np, "cirrus,external-boost"); + + pdata->gain_zc = of_property_read_bool(np, "cirrus,amp-gain-zc"); + + classh = of_get_child_by_name(np, "cirrus,classh-internal-algo"); + classh_config->classh_algo_enable = classh ? true : false; + + if (classh_config->classh_algo_enable) { + classh_config->classh_bst_override = + of_property_read_bool(np, "cirrus,classh-bst-overide"); + + ret = of_property_read_u32(classh, + "cirrus,classh-bst-max-limit", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_bst_max_limit = val32; + } + + ret = of_property_read_u32(classh, + "cirrus,classh-bst-max-limit", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_bst_max_limit = val32; + } + + ret = of_property_read_u32(classh, "cirrus,classh-mem-depth", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_mem_depth = val32; + } + + ret = of_property_read_u32(classh, "cirrus,classh-release-rate", + &val32); + if (ret >= 0) + classh_config->classh_release_rate = val32; + + ret = of_property_read_u32(classh, "cirrus,classh-headroom", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_headroom = val32; + } + + ret = of_property_read_u32(classh, + "cirrus,classh-wk-fet-disable", + &val32); + if (ret >= 0) + classh_config->classh_wk_fet_disable = val32; + + ret = of_property_read_u32(classh, "cirrus,classh-wk-fet-delay", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_wk_fet_delay = val32; + } + + ret = of_property_read_u32(classh, "cirrus,classh-wk-fet-thld", + &val32); + if (ret >= 0) + classh_config->classh_wk_fet_thld = val32; + + ret = of_property_read_u32(classh, "cirrus,classh-vpch-auto", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_vpch_auto = val32; + } + + ret = of_property_read_u32(classh, "cirrus,classh-vpch-rate", + &val32); + if (ret >= 0) { + val32 |= CS35L35_VALID_PDATA; + classh_config->classh_vpch_rate = val32; + } + + ret = of_property_read_u32(classh, "cirrus,classh-vpch-man", + &val32); + if (ret >= 0) + classh_config->classh_vpch_man = val32; + } + of_node_put(classh); + + /* frame depth location */ + signal_format = of_get_child_by_name(np, "cirrus,monitor-signal-format"); + monitor_config->is_present = signal_format ? true : false; + if (monitor_config->is_present) { + ret = of_property_read_u8_array(signal_format, "cirrus,imon", + monitor_array, imon_array_size); + if (!ret) { + monitor_config->imon_specs = true; + monitor_config->imon_dpth = monitor_array[0]; + monitor_config->imon_loc = monitor_array[1]; + monitor_config->imon_frm = monitor_array[2]; + monitor_config->imon_scale = monitor_array[3]; + } + ret = of_property_read_u8_array(signal_format, "cirrus,vmon", + monitor_array, mon_array_size); + if (!ret) { + monitor_config->vmon_specs = true; + monitor_config->vmon_dpth = monitor_array[0]; + monitor_config->vmon_loc = monitor_array[1]; + monitor_config->vmon_frm = monitor_array[2]; + } + ret = of_property_read_u8_array(signal_format, "cirrus,vpmon", + monitor_array, mon_array_size); + if (!ret) { + monitor_config->vpmon_specs = true; + monitor_config->vpmon_dpth = monitor_array[0]; + monitor_config->vpmon_loc = monitor_array[1]; + monitor_config->vpmon_frm = monitor_array[2]; + } + ret = of_property_read_u8_array(signal_format, "cirrus,vbstmon", + monitor_array, mon_array_size); + if (!ret) { + monitor_config->vbstmon_specs = true; + monitor_config->vbstmon_dpth = monitor_array[0]; + monitor_config->vbstmon_loc = monitor_array[1]; + monitor_config->vbstmon_frm = monitor_array[2]; + } + ret = of_property_read_u8_array(signal_format, "cirrus,vpbrstat", + monitor_array, mon_array_size); + if (!ret) { + monitor_config->vpbrstat_specs = true; + monitor_config->vpbrstat_dpth = monitor_array[0]; + monitor_config->vpbrstat_loc = monitor_array[1]; + monitor_config->vpbrstat_frm = monitor_array[2]; + } + ret = of_property_read_u8_array(signal_format, "cirrus,zerofill", + monitor_array, mon_array_size); + if (!ret) { + monitor_config->zerofill_specs = true; + monitor_config->zerofill_dpth = monitor_array[0]; + monitor_config->zerofill_loc = monitor_array[1]; + monitor_config->zerofill_frm = monitor_array[2]; + } + } + of_node_put(signal_format); + + return 0; +} + +/* Errata Rev A0 */ +static const struct reg_sequence cs35l35_errata_patch[] = { + + { 0x7F, 0x99 }, + { 0x00, 0x99 }, + { 0x52, 0x22 }, + { 0x04, 0x14 }, + { 0x6D, 0x44 }, + { 0x24, 0x10 }, + { 0x58, 0xC4 }, + { 0x00, 0x98 }, + { 0x18, 0x08 }, + { 0x00, 0x00 }, + { 0x7F, 0x00 }, +}; + +static int cs35l35_i2c_probe(struct i2c_client *i2c_client, + const struct i2c_device_id *id) +{ + struct cs35l35_private *cs35l35; + struct device *dev = &i2c_client->dev; + struct cs35l35_platform_data *pdata = dev_get_platdata(dev); + int i; + int ret; + unsigned int devid = 0; + unsigned int reg; + + cs35l35 = devm_kzalloc(dev, sizeof(struct cs35l35_private), GFP_KERNEL); + if (!cs35l35) + return -ENOMEM; + + cs35l35->dev = dev; + + i2c_set_clientdata(i2c_client, cs35l35); + cs35l35->regmap = devm_regmap_init_i2c(i2c_client, &cs35l35_regmap); + if (IS_ERR(cs35l35->regmap)) { + ret = PTR_ERR(cs35l35->regmap); + dev_err(dev, "regmap_init() failed: %d\n", ret); + goto err; + } + + for (i = 0; i < ARRAY_SIZE(cs35l35_supplies); i++) + cs35l35->supplies[i].supply = cs35l35_supplies[i]; + + cs35l35->num_supplies = ARRAY_SIZE(cs35l35_supplies); + + ret = devm_regulator_bulk_get(dev, cs35l35->num_supplies, + cs35l35->supplies); + if (ret != 0) { + dev_err(dev, "Failed to request core supplies: %d\n", ret); + return ret; + } + + if (pdata) { + cs35l35->pdata = *pdata; + } else { + pdata = devm_kzalloc(dev, sizeof(struct cs35l35_platform_data), + GFP_KERNEL); + if (!pdata) + return -ENOMEM; + if (i2c_client->dev.of_node) { + ret = cs35l35_handle_of_data(i2c_client, pdata); + if (ret != 0) + return ret; + + } + cs35l35->pdata = *pdata; + } + + ret = regulator_bulk_enable(cs35l35->num_supplies, + cs35l35->supplies); + if (ret != 0) { + dev_err(dev, "Failed to enable core supplies: %d\n", ret); + return ret; + } + + /* returning NULL can be valid if in stereo mode */ + cs35l35->reset_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_OUT_LOW); + if (IS_ERR(cs35l35->reset_gpio)) { + ret = PTR_ERR(cs35l35->reset_gpio); + cs35l35->reset_gpio = NULL; + if (ret == -EBUSY) { + dev_info(dev, + "Reset line busy, assuming shared reset\n"); + } else { + dev_err(dev, "Failed to get reset GPIO: %d\n", ret); + goto err; + } + } + + gpiod_set_value_cansleep(cs35l35->reset_gpio, 1); + + init_completion(&cs35l35->pdn_done); + + ret = devm_request_threaded_irq(dev, i2c_client->irq, NULL, cs35l35_irq, + IRQF_ONESHOT | IRQF_TRIGGER_LOW | + IRQF_SHARED, "cs35l35", cs35l35); + if (ret != 0) { + dev_err(dev, "Failed to request IRQ: %d\n", ret); + goto err; + } + /* initialize codec */ + ret = regmap_read(cs35l35->regmap, CS35L35_DEVID_AB, ®); + + devid = (reg & 0xFF) << 12; + ret = regmap_read(cs35l35->regmap, CS35L35_DEVID_CD, ®); + devid |= (reg & 0xFF) << 4; + ret = regmap_read(cs35l35->regmap, CS35L35_DEVID_E, ®); + devid |= (reg & 0xF0) >> 4; + + if (devid != CS35L35_CHIP_ID) { + dev_err(dev, "CS35L35 Device ID (%X). Expected ID %X\n", + devid, CS35L35_CHIP_ID); + ret = -ENODEV; + goto err; + } + + ret = regmap_read(cs35l35->regmap, CS35L35_REV_ID, ®); + if (ret < 0) { + dev_err(dev, "Get Revision ID failed: %d\n", ret); + goto err; + } + + ret = regmap_register_patch(cs35l35->regmap, cs35l35_errata_patch, + ARRAY_SIZE(cs35l35_errata_patch)); + if (ret < 0) { + dev_err(dev, "Failed to apply errata patch: %d\n", ret); + goto err; + } + + dev_info(dev, "Cirrus Logic CS35L35 (%x), Revision: %02X\n", + devid, reg & 0xFF); + + /* Set the INT Masks for critical errors */ + regmap_write(cs35l35->regmap, CS35L35_INT_MASK_1, + CS35L35_INT1_CRIT_MASK); + regmap_write(cs35l35->regmap, CS35L35_INT_MASK_2, + CS35L35_INT2_CRIT_MASK); + regmap_write(cs35l35->regmap, CS35L35_INT_MASK_3, + CS35L35_INT3_CRIT_MASK); + regmap_write(cs35l35->regmap, CS35L35_INT_MASK_4, + CS35L35_INT4_CRIT_MASK); + + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PWR2_PDN_MASK, + CS35L35_PWR2_PDN_MASK); + + if (cs35l35->pdata.bst_pdn_fet_on) + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_BST_MASK, + 1 << CS35L35_PDN_BST_FETON_SHIFT); + else + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL2, + CS35L35_PDN_BST_MASK, + 1 << CS35L35_PDN_BST_FETOFF_SHIFT); + + regmap_update_bits(cs35l35->regmap, CS35L35_PWRCTL3, + CS35L35_PWR3_PDN_MASK, + CS35L35_PWR3_PDN_MASK); + + regmap_update_bits(cs35l35->regmap, CS35L35_PROTECT_CTL, + CS35L35_AMP_MUTE_MASK, 1 << CS35L35_AMP_MUTE_SHIFT); + + ret = snd_soc_register_codec(dev, &soc_codec_dev_cs35l35, cs35l35_dai, + ARRAY_SIZE(cs35l35_dai)); + if (ret < 0) { + dev_err(dev, "Failed to register codec: %d\n", ret); + goto err; + } + + return 0; + +err: + regulator_bulk_disable(cs35l35->num_supplies, + cs35l35->supplies); + gpiod_set_value_cansleep(cs35l35->reset_gpio, 0); + + return ret; +} + +static int cs35l35_i2c_remove(struct i2c_client *client) +{ + snd_soc_unregister_codec(&client->dev); + return 0; +} + +static const struct of_device_id cs35l35_of_match[] = { + {.compatible = "cirrus,cs35l35"}, + {}, +}; +MODULE_DEVICE_TABLE(of, cs35l35_of_match); + +static const struct i2c_device_id cs35l35_id[] = { + {"cs35l35", 0}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, cs35l35_id); + +static struct i2c_driver cs35l35_i2c_driver = { + .driver = { + .name = "cs35l35", + .of_match_table = cs35l35_of_match, + }, + .id_table = cs35l35_id, + .probe = cs35l35_i2c_probe, + .remove = cs35l35_i2c_remove, +}; + +module_i2c_driver(cs35l35_i2c_driver); + +MODULE_DESCRIPTION("ASoC CS35L35 driver"); +MODULE_AUTHOR("Brian Austin, Cirrus Logic Inc, "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/cs35l35.h b/sound/soc/codecs/cs35l35.h new file mode 100644 index 0000000000000000000000000000000000000000..5a6e43a87c4ddc3e1a6d527c0242cf69af6e87a8 --- /dev/null +++ b/sound/soc/codecs/cs35l35.h @@ -0,0 +1,294 @@ +/* + * cs35l35.h -- CS35L35 ALSA SoC audio driver + * + * Copyright 2016 Cirrus Logic, Inc. + * + * Author: Brian Austin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __CS35L35_H__ +#define __CS35L35_H__ + +#define CS35L35_FIRSTREG 0x01 +#define CS35L35_LASTREG 0x7E +#define CS35L35_CHIP_ID 0x00035A35 +#define CS35L35_DEVID_AB 0x01 /* Device ID A & B [RO] */ +#define CS35L35_DEVID_CD 0x02 /* Device ID C & D [RO] */ +#define CS35L35_DEVID_E 0x03 /* Device ID E [RO] */ +#define CS35L35_FAB_ID 0x04 /* Fab ID [RO] */ +#define CS35L35_REV_ID 0x05 /* Revision ID [RO] */ +#define CS35L35_PWRCTL1 0x06 /* Power Ctl 1 */ +#define CS35L35_PWRCTL2 0x07 /* Power Ctl 2 */ +#define CS35L35_PWRCTL3 0x08 /* Power Ctl 3 */ +#define CS35L35_CLK_CTL1 0x0A /* Clocking Ctl 1 */ +#define CS35L35_CLK_CTL2 0x0B /* Clocking Ctl 2 */ +#define CS35L35_CLK_CTL3 0x0C /* Clocking Ctl 3 */ +#define CS35L35_SP_FMT_CTL1 0x0D /* Serial Port Format CTL1 */ +#define CS35L35_SP_FMT_CTL2 0x0E /* Serial Port Format CTL2 */ +#define CS35L35_SP_FMT_CTL3 0x0F /* Serial Port Format CTL3 */ +#define CS35L35_MAG_COMP_CTL 0x13 /* Magnitude Comp CTL */ +#define CS35L35_AMP_INP_DRV_CTL 0x14 /* Amp Input Drive Ctl */ +#define CS35L35_AMP_DIG_VOL_CTL 0x15 /* Amplifier Dig Volume Ctl */ +#define CS35L35_AMP_DIG_VOL 0x16 /* Amplifier Dig Volume */ +#define CS35L35_ADV_DIG_VOL 0x17 /* Advisory Digital Volume */ +#define CS35L35_PROTECT_CTL 0x18 /* Amp Gain - Prot Ctl Param */ +#define CS35L35_AMP_GAIN_AUD_CTL 0x19 /* Amp Serial Port Gain Ctl */ +#define CS35L35_AMP_GAIN_PDM_CTL 0x1A /* Amplifier Gain PDM Ctl */ +#define CS35L35_AMP_GAIN_ADV_CTL 0x1B /* Amplifier Gain Ctl */ +#define CS35L35_GPI_CTL 0x1C /* GPI Ctl */ +#define CS35L35_BST_CVTR_V_CTL 0x1D /* Boost Conv Voltage Ctl */ +#define CS35L35_BST_PEAK_I 0x1E /* Boost Conv Peak Current */ +#define CS35L35_BST_RAMP_CTL 0x20 /* Boost Conv Soft Ramp Ctl */ +#define CS35L35_BST_CONV_COEF_1 0x21 /* Boost Conv Coefficients 1 */ +#define CS35L35_BST_CONV_COEF_2 0x22 /* Boost Conv Coefficients 2 */ +#define CS35L35_BST_CONV_SLOPE_COMP 0x23 /* Boost Conv Slope Comp */ +#define CS35L35_BST_CONV_SW_FREQ 0x24 /* Boost Conv L BST SW Freq */ +#define CS35L35_CLASS_H_CTL 0x30 /* CLS H Control */ +#define CS35L35_CLASS_H_HEADRM_CTL 0x31 /* CLS H Headroom Ctl */ +#define CS35L35_CLASS_H_RELEASE_RATE 0x32 /* CLS H Release Rate */ +#define CS35L35_CLASS_H_FET_DRIVE_CTL 0x33 /* CLS H Weak FET Drive Ctl */ +#define CS35L35_CLASS_H_VP_CTL 0x34 /* CLS H VP Ctl */ +#define CS35L35_CLASS_H_STATUS 0x38 /* CLS H Status */ +#define CS35L35_VPBR_CTL 0x3A /* VPBR Ctl */ +#define CS35L35_VPBR_VOL_CTL 0x3B /* VPBR Volume Ctl */ +#define CS35L35_VPBR_TIMING_CTL 0x3C /* VPBR Timing Ctl */ +#define CS35L35_VPBR_MODE_VOL_CTL 0x3D /* VPBR Mode/Attack Vol Ctl */ +#define CS35L35_VPBR_ATTEN_STATUS 0x4B /* VPBR Attenuation Status */ +#define CS35L35_SPKR_MON_CTL 0x4E /* Speaker Monitoring Ctl */ +#define CS35L35_IMON_SCALE_CTL 0x51 /* IMON Scale Ctl */ +#define CS35L35_AUDIN_RXLOC_CTL 0x52 /* Audio Input RX Loc Ctl */ +#define CS35L35_ADVIN_RXLOC_CTL 0x53 /* Advisory Input RX Loc Ctl */ +#define CS35L35_VMON_TXLOC_CTL 0x54 /* VMON TX Loc Ctl */ +#define CS35L35_IMON_TXLOC_CTL 0x55 /* IMON TX Loc Ctl */ +#define CS35L35_VPMON_TXLOC_CTL 0x56 /* VPMON TX Loc Ctl */ +#define CS35L35_VBSTMON_TXLOC_CTL 0x57 /* VBSTMON TX Loc Ctl */ +#define CS35L35_VPBR_STATUS_TXLOC_CTL 0x58 /* VPBR Status TX Loc Ctl */ +#define CS35L35_ZERO_FILL_LOC_CTL 0x59 /* Zero Fill Loc Ctl */ +#define CS35L35_AUDIN_DEPTH_CTL 0x5A /* Audio Input Depth Ctl */ +#define CS35L35_SPKMON_DEPTH_CTL 0x5B /* SPK Mon Output Depth Ctl */ +#define CS35L35_SUPMON_DEPTH_CTL 0x5C /* Supply Mon Out Depth Ctl */ +#define CS35L35_ZEROFILL_DEPTH_CTL 0x5D /* Zero Fill Mon Output Ctl */ +#define CS35L35_MULT_DEV_SYNCH1 0x62 /* Multidevice Synch */ +#define CS35L35_MULT_DEV_SYNCH2 0x63 /* Multidevice Synch 2 */ +#define CS35L35_PROT_RELEASE_CTL 0x64 /* Protection Release Ctl */ +#define CS35L35_DIAG_MODE_REG_LOCK 0x68 /* Diagnostic Mode Reg Lock */ +#define CS35L35_DIAG_MODE_CTL_1 0x69 /* Diagnostic Mode Ctl 1 */ +#define CS35L35_DIAG_MODE_CTL_2 0x6A /* Diagnostic Mode Ctl 2 */ +#define CS35L35_INT_MASK_1 0x70 /* Interrupt Mask 1 */ +#define CS35L35_INT_MASK_2 0x71 /* Interrupt Mask 2 */ +#define CS35L35_INT_MASK_3 0x72 /* Interrupt Mask 3 */ +#define CS35L35_INT_MASK_4 0x73 /* Interrupt Mask 4 */ +#define CS35L35_INT_STATUS_1 0x74 /* Interrupt Status 1 */ +#define CS35L35_INT_STATUS_2 0x75 /* Interrupt Status 2 */ +#define CS35L35_INT_STATUS_3 0x76 /* Interrupt Status 3 */ +#define CS35L35_INT_STATUS_4 0x77 /* Interrupt Status 4 */ +#define CS35L35_PLL_STATUS 0x78 /* PLL Status */ +#define CS35L35_OTP_TRIM_STATUS 0x7E /* OTP Trim Status */ + +#define CS35L35_MAX_REGISTER 0x7F + +/* CS35L35_PWRCTL1 */ +#define CS35L35_SFT_RST 0x80 +#define CS35L35_DISCHG_FLT 0x02 +#define CS35L35_PDN_ALL 0x01 + +/* CS35L35_PWRCTL2 */ +#define CS35L35_PDN_VMON 0x80 +#define CS35L35_PDN_IMON 0x40 +#define CS35L35_PDN_CLASSH 0x20 +#define CS35L35_PDN_VPBR 0x10 +#define CS35L35_PDN_BST 0x04 +#define CS35L35_PDN_AMP 0x01 + +/* CS35L35_PWRCTL3 */ +#define CS35L35_PDN_VBSTMON_OUT 0x10 +#define CS35L35_PDN_VMON_OUT 0x08 + +#define CS35L35_AUDIN_DEPTH_MASK 0x03 +#define CS35L35_AUDIN_DEPTH_SHIFT 0 +#define CS35L35_ADVIN_DEPTH_MASK 0x0C +#define CS35L35_ADVIN_DEPTH_SHIFT 2 +#define CS35L35_SDIN_DEPTH_8 0x01 +#define CS35L35_SDIN_DEPTH_16 0x02 +#define CS35L35_SDIN_DEPTH_24 0x03 + +#define CS35L35_SDOUT_DEPTH_8 0x01 +#define CS35L35_SDOUT_DEPTH_12 0x02 +#define CS35L35_SDOUT_DEPTH_16 0x03 + +#define CS35L35_AUD_IN_LR_MASK 0x80 +#define CS35L35_AUD_IN_LR_SHIFT 7 +#define CS35L35_ADV_IN_LR_MASK 0x80 +#define CS35L35_ADV_IN_LR_SHIFT 7 +#define CS35L35_AUD_IN_LOC_MASK 0x0F +#define CS35L35_AUD_IN_LOC_SHIFT 0 +#define CS35L35_ADV_IN_LOC_MASK 0x0F +#define CS35L35_ADV_IN_LOC_SHIFT 0 + +#define CS35L35_IMON_DEPTH_MASK 0x03 +#define CS35L35_IMON_DEPTH_SHIFT 0 +#define CS35L35_VMON_DEPTH_MASK 0x0C +#define CS35L35_VMON_DEPTH_SHIFT 2 +#define CS35L35_VBSTMON_DEPTH_MASK 0x03 +#define CS35L35_VBSTMON_DEPTH_SHIFT 0 +#define CS35L35_VPMON_DEPTH_MASK 0x0C +#define CS35L35_VPMON_DEPTH_SHIFT 2 +#define CS35L35_VPBRSTAT_DEPTH_MASK 0x30 +#define CS35L35_VPBRSTAT_DEPTH_SHIFT 4 +#define CS35L35_ZEROFILL_DEPTH_MASK 0x03 +#define CS35L35_ZEROFILL_DEPTH_SHIFT 0x00 + +#define CS35L35_MON_TXLOC_MASK 0x3F +#define CS35L35_MON_TXLOC_SHIFT 0 +#define CS35L35_MON_FRM_MASK 0x80 +#define CS35L35_MON_FRM_SHIFT 7 + +#define CS35L35_IMON_SCALE_MASK 0xF8 +#define CS35L35_IMON_SCALE_SHIFT 3 + +#define CS35L35_MS_MASK 0x80 +#define CS35L35_MS_SHIFT 7 +#define CS35L35_SPMODE_MASK 0x40 +#define CS35L35_SP_DRV_MASK 0x10 +#define CS35L35_SP_DRV_SHIFT 4 +#define CS35L35_CLK_CTL2_MASK 0xFF +#define CS35L35_PDM_MODE_MASK 0x40 +#define CS35L35_PDM_MODE_SHIFT 6 +#define CS35L35_CLK_SOURCE_MASK 0x03 +#define CS35L35_CLK_SOURCE_SHIFT 0 +#define CS35L35_CLK_SOURCE_MCLK 0 +#define CS35L35_CLK_SOURCE_SCLK 1 +#define CS35L35_CLK_SOURCE_PDM 2 + +#define CS35L35_SP_SCLKS_MASK 0x0F +#define CS35L35_SP_SCLKS_SHIFT 0x00 +#define CS35L35_SP_SCLKS_16FS 0x03 +#define CS35L35_SP_SCLKS_32FS 0x07 +#define CS35L35_SP_SCLKS_48FS 0x0B +#define CS35L35_SP_SCLKS_64FS 0x0F +#define CS35L35_SP_RATE_MASK 0xC0 + +#define CS35L35_PDN_BST_MASK 0x06 +#define CS35L35_PDN_BST_FETON_SHIFT 1 +#define CS35L35_PDN_BST_FETOFF_SHIFT 2 +#define CS35L35_PWR2_PDN_MASK 0xE0 +#define CS35L35_PWR3_PDN_MASK 0x1E +#define CS35L35_PDN_ALL_MASK 0x01 +#define CS35L35_DISCHG_FILT_MASK 0x02 +#define CS35L35_DISCHG_FILT_SHIFT 1 +#define CS35L35_MCLK_DIS_MASK 0x04 +#define CS35L35_MCLK_DIS_SHIFT 2 + +#define CS35L35_BST_CTL_MASK 0x7F +#define CS35L35_BST_CTL_SHIFT 0 +#define CS35L35_BST_IPK_MASK 0x1F +#define CS35L35_BST_IPK_SHIFT 0 +#define CS35L35_AMP_MUTE_MASK 0x20 +#define CS35L35_AMP_MUTE_SHIFT 5 +#define CS35L35_AMP_GAIN_ZC_MASK 0x10 +#define CS35L35_AMP_GAIN_ZC_SHIFT 4 + +#define CS35L35_AMP_DIGSFT_MASK 0x02 +#define CS35L35_AMP_DIGSFT_SHIFT 1 + +/* CS35L35_SP_FMT_CTL3 */ +#define CS35L35_SP_I2S_DRV_MASK 0x03 +#define CS35L35_SP_I2S_DRV_SHIFT 0 + +/* Class H Algorithm Control */ +#define CS35L35_CH_STEREO_MASK 0x40 +#define CS35L35_CH_STEREO_SHIFT 6 +#define CS35L35_CH_BST_OVR_MASK 0x04 +#define CS35L35_CH_BST_OVR_SHIFT 2 +#define CS35L35_CH_BST_LIM_MASK 0x08 +#define CS35L35_CH_BST_LIM_SHIFT 3 +#define CS35L35_CH_MEM_DEPTH_MASK 0x01 +#define CS35L35_CH_MEM_DEPTH_SHIFT 0 +#define CS35L35_CH_HDRM_CTL_MASK 0x3F +#define CS35L35_CH_HDRM_CTL_SHIFT 0 +#define CS35L35_CH_REL_RATE_MASK 0xFF +#define CS35L35_CH_REL_RATE_SHIFT 0 +#define CS35L35_CH_WKFET_DIS_MASK 0x80 +#define CS35L35_CH_WKFET_DIS_SHIFT 7 +#define CS35L35_CH_WKFET_DEL_MASK 0x70 +#define CS35L35_CH_WKFET_DEL_SHIFT 4 +#define CS35L35_CH_WKFET_THLD_MASK 0x0F +#define CS35L35_CH_WKFET_THLD_SHIFT 0 +#define CS35L35_CH_VP_AUTO_MASK 0x80 +#define CS35L35_CH_VP_AUTO_SHIFT 7 +#define CS35L35_CH_VP_RATE_MASK 0x60 +#define CS35L35_CH_VP_RATE_SHIFT 5 +#define CS35L35_CH_VP_MAN_MASK 0x1F +#define CS35L35_CH_VP_MAN_SHIFT 0 + +/* CS35L35_PROT_RELEASE_CTL */ +#define CS35L35_CAL_ERR_RLS 0x80 +#define CS35L35_SHORT_RLS 0x04 +#define CS35L35_OTW_RLS 0x02 +#define CS35L35_OTE_RLS 0x01 + +/* INT Mask Registers */ +#define CS35L35_INT1_CRIT_MASK 0x38 +#define CS35L35_INT2_CRIT_MASK 0xEF +#define CS35L35_INT3_CRIT_MASK 0xEE +#define CS35L35_INT4_CRIT_MASK 0xFF + +/* PDN DONE Masks */ +#define CS35L35_M_PDN_DONE_SHIFT 4 +#define CS35L35_M_PDN_DONE_MASK 0x10 + +/* CS35L35_INT_1 */ +#define CS35L35_CAL_ERR 0x80 +#define CS35L35_OTP_ERR 0x40 +#define CS35L35_LRCLK_ERR 0x20 +#define CS35L35_SPCLK_ERR 0x10 +#define CS35L35_MCLK_ERR 0x08 +#define CS35L35_AMP_SHORT 0x04 +#define CS35L35_OTW 0x02 +#define CS35L35_OTE 0x01 + +/* CS35L35_INT_2 */ +#define CS35L35_PDN_DONE 0x10 +#define CS35L35_VPBR_ERR 0x02 +#define CS35L35_VPBR_CLR 0x01 + +/* CS35L35_INT_3 */ +#define CS35L35_BST_HIGH 0x10 +#define CS35L35_BST_HIGH_FLAG 0x08 +#define CS35L35_BST_IPK_FLAG 0x04 +#define CS35L35_LBST_SHORT 0x01 + +/* CS35L35_INT_4 */ +#define CS35L35_VMON_OVFL 0x08 +#define CS35L35_IMON_OVFL 0x04 + +#define CS35L35_FORMATS (SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) + +struct cs35l35_private { + struct device *dev; + struct cs35l35_platform_data pdata; + struct regmap *regmap; + struct regulator_bulk_data supplies[2]; + int num_supplies; + int sysclk; + int sclk; + bool pdm_mode; + bool i2s_mode; + bool slave_mode; + /* GPIO for /RST */ + struct gpio_desc *reset_gpio; + struct completion pdn_done; +}; + +static const char * const cs35l35_supplies[] = { + "VA", + "VP", +}; + +#endif diff --git a/sound/soc/codecs/cs4271.c b/sound/soc/codecs/cs4271.c index 8c0f3b89b5bc8a37178f6b630fc20560af742b2a..e78b5f055f25a5961d04fb58ecf19001cbaa055c 100644 --- a/sound/soc/codecs/cs4271.c +++ b/sound/soc/codecs/cs4271.c @@ -498,7 +498,7 @@ static int cs4271_reset(struct snd_soc_codec *codec) struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec); if (gpio_is_valid(cs4271->gpio_nreset)) { - gpio_set_value(cs4271->gpio_nreset, 0); + gpio_direction_output(cs4271->gpio_nreset, 0); mdelay(1); gpio_set_value(cs4271->gpio_nreset, 1); mdelay(1); diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index cb47fb595ff412fd6a4a80aaf5a7d1e7f67f3ca6..1e0d5973b758e7563ec8e356d025c1b48f2a505e 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -1130,6 +1130,7 @@ MODULE_DEVICE_TABLE(i2c, cs53l30_id); static struct i2c_driver cs53l30_i2c_driver = { .driver = { .name = "cs53l30", + .of_match_table = cs53l30_of_match, .pm = &cs53l30_runtime_pm, }, .id_table = cs53l30_id, diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index 12da55882c06b6c075878b7cee0d1a38da5efffb..024d83fa6a7f78b81da8b55c8dbdec977871ef46 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -12,6 +12,7 @@ * option) any later version. */ +#include #include #include #include @@ -771,7 +772,7 @@ static int da7213_dai_event(struct snd_soc_dapm_widget *w, ++i; msleep(50); } - } while ((i < DA7213_SRM_CHECK_RETRIES) & (!srm_lock)); + } while ((i < DA7213_SRM_CHECK_RETRIES) && (!srm_lock)); if (!srm_lock) dev_warn(codec->dev, "SRM failed to lock\n"); @@ -1528,12 +1529,23 @@ static int da7213_set_bias_level(struct snd_soc_codec *codec, return 0; } +#if defined(CONFIG_OF) /* DT */ static const struct of_device_id da7213_of_match[] = { { .compatible = "dlg,da7213", }, { } }; MODULE_DEVICE_TABLE(of, da7213_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id da7213_acpi_match[] = { + { "DLGS7212", 0}, + { "DLGS7213", 0}, + { }, +}; +MODULE_DEVICE_TABLE(acpi, da7213_acpi_match); +#endif static enum da7213_micbias_voltage da7213_of_micbias_lvl(struct snd_soc_codec *codec, u32 val) @@ -1844,6 +1856,7 @@ static struct i2c_driver da7213_i2c_driver = { .driver = { .name = "da7213", .of_match_table = of_match_ptr(da7213_of_match), + .acpi_match_table = ACPI_PTR(da7213_acpi_match), }, .probe = da7213_i2c_probe, .remove = da7213_remove, diff --git a/sound/soc/codecs/dio2125.c b/sound/soc/codecs/dio2125.c new file mode 100644 index 0000000000000000000000000000000000000000..09451cd44f9b980d6afb3dc14614f846c3d13b3e --- /dev/null +++ b/sound/soc/codecs/dio2125.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 BayLibre, SAS. + * Author: Jerome Brunet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + */ + +#include +#include +#include + +#define DRV_NAME "dio2125" + +struct dio2125 { + struct gpio_desc *gpiod_enable; +}; + +static int drv_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *control, int event) +{ + struct snd_soc_component *c = snd_soc_dapm_to_component(w->dapm); + struct dio2125 *priv = snd_soc_component_get_drvdata(c); + int val; + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + val = 1; + break; + case SND_SOC_DAPM_PRE_PMD: + val = 0; + break; + default: + WARN(1, "Unexpected event"); + return -EINVAL; + } + + gpiod_set_value_cansleep(priv->gpiod_enable, val); + + return 0; +} + +static const struct snd_soc_dapm_widget dio2125_dapm_widgets[] = { + SND_SOC_DAPM_INPUT("INL"), + SND_SOC_DAPM_INPUT("INR"), + SND_SOC_DAPM_OUT_DRV_E("DRV", SND_SOC_NOPM, 0, 0, NULL, 0, drv_event, + (SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD)), + SND_SOC_DAPM_OUTPUT("OUTL"), + SND_SOC_DAPM_OUTPUT("OUTR"), +}; + +static const struct snd_soc_dapm_route dio2125_dapm_routes[] = { + { "DRV", NULL, "INL" }, + { "DRV", NULL, "INR" }, + { "OUTL", NULL, "DRV" }, + { "OUTR", NULL, "DRV" }, +}; + +static const struct snd_soc_component_driver dio2125_component_driver = { + .dapm_widgets = dio2125_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(dio2125_dapm_widgets), + .dapm_routes = dio2125_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(dio2125_dapm_routes), +}; + +static int dio2125_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dio2125 *priv; + int err; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + platform_set_drvdata(pdev, priv); + + priv->gpiod_enable = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); + if (IS_ERR(priv->gpiod_enable)) { + err = PTR_ERR(priv->gpiod_enable); + if (err != -EPROBE_DEFER) + dev_err(dev, "Failed to get 'enable' gpio: %d", err); + return err; + } + + return devm_snd_soc_register_component(dev, &dio2125_component_driver, + NULL, 0); +} + +#ifdef CONFIG_OF +static const struct of_device_id dio2125_ids[] = { + { .compatible = "dioo,dio2125", }, + { } +}; +MODULE_DEVICE_TABLE(of, dio2125_ids); +#endif + +static struct platform_driver dio2125_driver = { + .driver = { + .name = DRV_NAME, + .of_match_table = of_match_ptr(dio2125_ids), + }, + .probe = dio2125_probe, +}; + +module_platform_driver(dio2125_driver); + +MODULE_DESCRIPTION("ASoC DIO2125 output driver"); +MODULE_AUTHOR("Jerome Brunet "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/es7134.c b/sound/soc/codecs/es7134.c new file mode 100644 index 0000000000000000000000000000000000000000..25ede825d349a4aeefab9ef3899c328d8c8d6d31 --- /dev/null +++ b/sound/soc/codecs/es7134.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 BayLibre, SAS. + * Author: Jerome Brunet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * The full GNU General Public License is included in this distribution + * in the file called COPYING. + */ + +#include +#include + +/* + * The everest 7134 is a very simple DA converter with no register + */ + +static int es7134_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) +{ + fmt &= (SND_SOC_DAIFMT_FORMAT_MASK | SND_SOC_DAIFMT_INV_MASK | + SND_SOC_DAIFMT_MASTER_MASK); + + if (fmt != (SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBS_CFS)) { + dev_err(codec_dai->dev, "Invalid DAI format\n"); + return -EINVAL; + } + + return 0; +} + +static const struct snd_soc_dai_ops es7134_dai_ops = { + .set_fmt = es7134_set_fmt, +}; + +static struct snd_soc_dai_driver es7134_dai = { + .name = "es7134-hifi", + .playback = { + .stream_name = "Playback", + .channels_min = 2, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_8000_192000, + .formats = (SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S18_3LE | + SNDRV_PCM_FMTBIT_S20_3LE | + SNDRV_PCM_FMTBIT_S24_3LE | + SNDRV_PCM_FMTBIT_S24_LE), + }, + .ops = &es7134_dai_ops, +}; + +static const struct snd_soc_dapm_widget es7134_dapm_widgets[] = { + SND_SOC_DAPM_OUTPUT("AOUTL"), + SND_SOC_DAPM_OUTPUT("AOUTR"), + SND_SOC_DAPM_DAC("DAC", "Playback", SND_SOC_NOPM, 0, 0), +}; + +static const struct snd_soc_dapm_route es7134_dapm_routes[] = { + { "AOUTL", NULL, "DAC" }, + { "AOUTR", NULL, "DAC" }, +}; + +static struct snd_soc_codec_driver es7134_codec_driver = { + .component_driver = { + .dapm_widgets = es7134_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(es7134_dapm_widgets), + .dapm_routes = es7134_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(es7134_dapm_routes), + }, +}; + +static int es7134_probe(struct platform_device *pdev) +{ + return snd_soc_register_codec(&pdev->dev, + &es7134_codec_driver, + &es7134_dai, 1); +} + +static int es7134_remove(struct platform_device *pdev) +{ + snd_soc_unregister_codec(&pdev->dev); + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id es7134_ids[] = { + { .compatible = "everest,es7134", }, + { .compatible = "everest,es7144", }, + { } +}; +MODULE_DEVICE_TABLE(of, es7134_ids); +#endif + +static struct platform_driver es7134_driver = { + .driver = { + .name = "es7134", + .of_match_table = of_match_ptr(es7134_ids), + }, + .probe = es7134_probe, + .remove = es7134_remove, +}; + +module_platform_driver(es7134_driver); + +MODULE_DESCRIPTION("ASoC ES7134 audio codec driver"); +MODULE_AUTHOR("Jerome Brunet "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index 3f84fbd071e275e52d8c004ae10d7474035c1fc7..ed7cc42d1ee2c16ece5306beb5f1aca7506dcaf5 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -69,14 +69,10 @@ static const char * const supply_names[ES8328_SUPPLY_NUM] = { "HPVDD", }; -#define ES8328_RATES (SNDRV_PCM_RATE_96000 | \ - SNDRV_PCM_RATE_48000 | \ - SNDRV_PCM_RATE_44100 | \ - SNDRV_PCM_RATE_32000 | \ - SNDRV_PCM_RATE_22050 | \ - SNDRV_PCM_RATE_16000 | \ - SNDRV_PCM_RATE_11025 | \ - SNDRV_PCM_RATE_8000) +#define ES8328_RATES (SNDRV_PCM_RATE_192000 | \ + SNDRV_PCM_RATE_96000 | \ + SNDRV_PCM_RATE_88200 | \ + SNDRV_PCM_RATE_8000_48000) #define ES8328_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ SNDRV_PCM_FMTBIT_S18_3LE | \ SNDRV_PCM_FMTBIT_S20_3LE | \ @@ -91,6 +87,7 @@ struct es8328_priv { int mclkdiv2; const struct snd_pcm_hw_constraint_list *sysclk_constraints; const int *mclk_ratios; + bool master; struct regulator_bulk_data supplies[ES8328_SUPPLY_NUM]; }; @@ -469,7 +466,7 @@ static int es8328_startup(struct snd_pcm_substream *substream, struct snd_soc_codec *codec = dai->codec; struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec); - if (es8328->sysclk_constraints) + if (es8328->master && es8328->sysclk_constraints) snd_pcm_hw_constraint_list(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, es8328->sysclk_constraints); @@ -488,27 +485,34 @@ static int es8328_hw_params(struct snd_pcm_substream *substream, int wl; int ratio; - if (!es8328->sysclk_constraints) { - dev_err(codec->dev, "No MCLK configured\n"); - return -EINVAL; - } - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) reg = ES8328_DACCONTROL2; else reg = ES8328_ADCCONTROL5; - for (i = 0; i < es8328->sysclk_constraints->count; i++) - if (es8328->sysclk_constraints->list[i] == params_rate(params)) - break; + if (es8328->master) { + if (!es8328->sysclk_constraints) { + dev_err(codec->dev, "No MCLK configured\n"); + return -EINVAL; + } - if (i == es8328->sysclk_constraints->count) { - dev_err(codec->dev, "LRCLK %d unsupported with current clock\n", - params_rate(params)); - return -EINVAL; + for (i = 0; i < es8328->sysclk_constraints->count; i++) + if (es8328->sysclk_constraints->list[i] == + params_rate(params)) + break; + + if (i == es8328->sysclk_constraints->count) { + dev_err(codec->dev, + "LRCLK %d unsupported with current clock\n", + params_rate(params)); + return -EINVAL; + } + ratio = es8328->mclk_ratios[i]; + } else { + ratio = 0; + es8328->mclkdiv2 = 0; } - ratio = es8328->mclk_ratios[i]; snd_soc_update_bits(codec, ES8328_MASTERMODE, ES8328_MASTERMODE_MCLKDIV2, es8328->mclkdiv2 ? ES8328_MASTERMODE_MCLKDIV2 : 0); @@ -586,6 +590,7 @@ static int es8328_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_codec *codec = codec_dai->codec; + struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec); u8 dac_mode = 0; u8 adc_mode = 0; @@ -595,11 +600,13 @@ static int es8328_set_dai_fmt(struct snd_soc_dai *codec_dai, snd_soc_update_bits(codec, ES8328_MASTERMODE, ES8328_MASTERMODE_MSC, ES8328_MASTERMODE_MSC); + es8328->master = true; break; case SND_SOC_DAIFMT_CBS_CFS: /* Slave serial port mode */ snd_soc_update_bits(codec, ES8328_MASTERMODE, ES8328_MASTERMODE_MSC, 0); + es8328->master = false; break; default: return -EINVAL; diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index fd272a40485b077039c68f786671218a7f4204e2..bc2e74ff3b2deddf653f1823329d22bf3449f8b6 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -469,7 +469,7 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, format = snd_hdac_calc_stream_format(params_rate(hparams), params_channels(hparams), params_format(hparams), - 24, 0); + dai->driver->playback.sig_bits, 0); pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, dai_map->cvt); if (!pcm) @@ -1419,8 +1419,8 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdac, hdmi_dais[i].playback.rate_min = rate_min; hdmi_dais[i].playback.channels_min = 2; hdmi_dais[i].playback.channels_max = 2; + hdmi_dais[i].playback.sig_bits = bps; hdmi_dais[i].ops = &hdmi_dai_ops; - i++; } diff --git a/sound/soc/codecs/max9867.c b/sound/soc/codecs/max9867.c index 6cdf15ab46de7fa8dad6d8f14eaac4c549309feb..0247edc9c84ebebfc0e9150e3b1b7c408659b1a6 100644 --- a/sound/soc/codecs/max9867.c +++ b/sound/soc/codecs/max9867.c @@ -516,13 +516,13 @@ static const struct i2c_device_id max9867_i2c_id[] = { { "max9867", 0 }, { } }; +MODULE_DEVICE_TABLE(i2c, max9867_i2c_id); static const struct of_device_id max9867_of_match[] = { { .compatible = "maxim,max9867", }, { } }; - -MODULE_DEVICE_TABLE(i2c, max9867_i2c_id); +MODULE_DEVICE_TABLE(of, max9867_of_match); static const struct dev_pm_ops max9867_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(max9867_suspend, max9867_resume) diff --git a/sound/soc/codecs/max98927.c b/sound/soc/codecs/max98927.c new file mode 100644 index 0000000000000000000000000000000000000000..b5ee29499e166d887f6e62e3b2dfcfc05ef3aa4c --- /dev/null +++ b/sound/soc/codecs/max98927.c @@ -0,0 +1,841 @@ +/* + * max98927.c -- MAX98927 ALSA Soc Audio driver + * + * Copyright (C) 2016 Maxim Integrated Products + * Author: Ryan Lee + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "max98927.h" + +static struct reg_default max98927_reg[] = { + {MAX98927_R0001_INT_RAW1, 0x00}, + {MAX98927_R0002_INT_RAW2, 0x00}, + {MAX98927_R0003_INT_RAW3, 0x00}, + {MAX98927_R0004_INT_STATE1, 0x00}, + {MAX98927_R0005_INT_STATE2, 0x00}, + {MAX98927_R0006_INT_STATE3, 0x00}, + {MAX98927_R0007_INT_FLAG1, 0x00}, + {MAX98927_R0008_INT_FLAG2, 0x00}, + {MAX98927_R0009_INT_FLAG3, 0x00}, + {MAX98927_R000A_INT_EN1, 0x00}, + {MAX98927_R000B_INT_EN2, 0x00}, + {MAX98927_R000C_INT_EN3, 0x00}, + {MAX98927_R000D_INT_FLAG_CLR1, 0x00}, + {MAX98927_R000E_INT_FLAG_CLR2, 0x00}, + {MAX98927_R000F_INT_FLAG_CLR3, 0x00}, + {MAX98927_R0010_IRQ_CTRL, 0x00}, + {MAX98927_R0011_CLK_MON, 0x00}, + {MAX98927_R0012_WDOG_CTRL, 0x00}, + {MAX98927_R0013_WDOG_RST, 0x00}, + {MAX98927_R0014_MEAS_ADC_THERM_WARN_THRESH, 0x00}, + {MAX98927_R0015_MEAS_ADC_THERM_SHDN_THRESH, 0x00}, + {MAX98927_R0016_MEAS_ADC_THERM_HYSTERESIS, 0x00}, + {MAX98927_R0017_PIN_CFG, 0x55}, + {MAX98927_R0018_PCM_RX_EN_A, 0x00}, + {MAX98927_R0019_PCM_RX_EN_B, 0x00}, + {MAX98927_R001A_PCM_TX_EN_A, 0x00}, + {MAX98927_R001B_PCM_TX_EN_B, 0x00}, + {MAX98927_R001C_PCM_TX_HIZ_CTRL_A, 0x00}, + {MAX98927_R001D_PCM_TX_HIZ_CTRL_B, 0x00}, + {MAX98927_R001E_PCM_TX_CH_SRC_A, 0x00}, + {MAX98927_R001F_PCM_TX_CH_SRC_B, 0x00}, + {MAX98927_R0020_PCM_MODE_CFG, 0x40}, + {MAX98927_R0021_PCM_MASTER_MODE, 0x00}, + {MAX98927_R0022_PCM_CLK_SETUP, 0x22}, + {MAX98927_R0023_PCM_SR_SETUP1, 0x00}, + {MAX98927_R0024_PCM_SR_SETUP2, 0x00}, + {MAX98927_R0025_PCM_TO_SPK_MONOMIX_A, 0x00}, + {MAX98927_R0026_PCM_TO_SPK_MONOMIX_B, 0x00}, + {MAX98927_R0027_ICC_RX_EN_A, 0x00}, + {MAX98927_R0028_ICC_RX_EN_B, 0x00}, + {MAX98927_R002B_ICC_TX_EN_A, 0x00}, + {MAX98927_R002C_ICC_TX_EN_B, 0x00}, + {MAX98927_R002E_ICC_HIZ_MANUAL_MODE, 0x00}, + {MAX98927_R002F_ICC_TX_HIZ_EN_A, 0x00}, + {MAX98927_R0030_ICC_TX_HIZ_EN_B, 0x00}, + {MAX98927_R0031_ICC_LNK_EN, 0x00}, + {MAX98927_R0032_PDM_TX_EN, 0x00}, + {MAX98927_R0033_PDM_TX_HIZ_CTRL, 0x00}, + {MAX98927_R0034_PDM_TX_CTRL, 0x00}, + {MAX98927_R0035_PDM_RX_CTRL, 0x00}, + {MAX98927_R0036_AMP_VOL_CTRL, 0x00}, + {MAX98927_R0037_AMP_DSP_CFG, 0x02}, + {MAX98927_R0038_TONE_GEN_DC_CFG, 0x00}, + {MAX98927_R0039_DRE_CTRL, 0x01}, + {MAX98927_R003A_AMP_EN, 0x00}, + {MAX98927_R003B_SPK_SRC_SEL, 0x00}, + {MAX98927_R003C_SPK_GAIN, 0x00}, + {MAX98927_R003D_SSM_CFG, 0x01}, + {MAX98927_R003E_MEAS_EN, 0x00}, + {MAX98927_R003F_MEAS_DSP_CFG, 0x04}, + {MAX98927_R0040_BOOST_CTRL0, 0x00}, + {MAX98927_R0041_BOOST_CTRL3, 0x00}, + {MAX98927_R0042_BOOST_CTRL1, 0x00}, + {MAX98927_R0043_MEAS_ADC_CFG, 0x00}, + {MAX98927_R0044_MEAS_ADC_BASE_MSB, 0x00}, + {MAX98927_R0045_MEAS_ADC_BASE_LSB, 0x00}, + {MAX98927_R0046_ADC_CH0_DIVIDE, 0x00}, + {MAX98927_R0047_ADC_CH1_DIVIDE, 0x00}, + {MAX98927_R0048_ADC_CH2_DIVIDE, 0x00}, + {MAX98927_R0049_ADC_CH0_FILT_CFG, 0x00}, + {MAX98927_R004A_ADC_CH1_FILT_CFG, 0x00}, + {MAX98927_R004B_ADC_CH2_FILT_CFG, 0x00}, + {MAX98927_R004C_MEAS_ADC_CH0_READ, 0x00}, + {MAX98927_R004D_MEAS_ADC_CH1_READ, 0x00}, + {MAX98927_R004E_MEAS_ADC_CH2_READ, 0x00}, + {MAX98927_R0051_BROWNOUT_STATUS, 0x00}, + {MAX98927_R0052_BROWNOUT_EN, 0x00}, + {MAX98927_R0053_BROWNOUT_INFINITE_HOLD, 0x00}, + {MAX98927_R0054_BROWNOUT_INFINITE_HOLD_CLR, 0x00}, + {MAX98927_R0055_BROWNOUT_LVL_HOLD, 0x00}, + {MAX98927_R005A_BROWNOUT_LVL1_THRESH, 0x00}, + {MAX98927_R005B_BROWNOUT_LVL2_THRESH, 0x00}, + {MAX98927_R005C_BROWNOUT_LVL3_THRESH, 0x00}, + {MAX98927_R005D_BROWNOUT_LVL4_THRESH, 0x00}, + {MAX98927_R005E_BROWNOUT_THRESH_HYSTERYSIS, 0x00}, + {MAX98927_R005F_BROWNOUT_AMP_LIMITER_ATK_REL, 0x00}, + {MAX98927_R0060_BROWNOUT_AMP_GAIN_ATK_REL, 0x00}, + {MAX98927_R0061_BROWNOUT_AMP1_CLIP_MODE, 0x00}, + {MAX98927_R0072_BROWNOUT_LVL1_CUR_LIMIT, 0x00}, + {MAX98927_R0073_BROWNOUT_LVL1_AMP1_CTRL1, 0x00}, + {MAX98927_R0074_BROWNOUT_LVL1_AMP1_CTRL2, 0x00}, + {MAX98927_R0075_BROWNOUT_LVL1_AMP1_CTRL3, 0x00}, + {MAX98927_R0076_BROWNOUT_LVL2_CUR_LIMIT, 0x00}, + {MAX98927_R0077_BROWNOUT_LVL2_AMP1_CTRL1, 0x00}, + {MAX98927_R0078_BROWNOUT_LVL2_AMP1_CTRL2, 0x00}, + {MAX98927_R0079_BROWNOUT_LVL2_AMP1_CTRL3, 0x00}, + {MAX98927_R007A_BROWNOUT_LVL3_CUR_LIMIT, 0x00}, + {MAX98927_R007B_BROWNOUT_LVL3_AMP1_CTRL1, 0x00}, + {MAX98927_R007C_BROWNOUT_LVL3_AMP1_CTRL2, 0x00}, + {MAX98927_R007D_BROWNOUT_LVL3_AMP1_CTRL3, 0x00}, + {MAX98927_R007E_BROWNOUT_LVL4_CUR_LIMIT, 0x00}, + {MAX98927_R007F_BROWNOUT_LVL4_AMP1_CTRL1, 0x00}, + {MAX98927_R0080_BROWNOUT_LVL4_AMP1_CTRL2, 0x00}, + {MAX98927_R0081_BROWNOUT_LVL4_AMP1_CTRL3, 0x00}, + {MAX98927_R0082_ENV_TRACK_VOUT_HEADROOM, 0x00}, + {MAX98927_R0083_ENV_TRACK_BOOST_VOUT_DELAY, 0x00}, + {MAX98927_R0084_ENV_TRACK_REL_RATE, 0x00}, + {MAX98927_R0085_ENV_TRACK_HOLD_RATE, 0x00}, + {MAX98927_R0086_ENV_TRACK_CTRL, 0x00}, + {MAX98927_R0087_ENV_TRACK_BOOST_VOUT_READ, 0x00}, + {MAX98927_R00FF_GLOBAL_SHDN, 0x00}, + {MAX98927_R0100_SOFT_RESET, 0x00}, + {MAX98927_R01FF_REV_ID, 0x40}, +}; + +static int max98927_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct max98927_priv *max98927 = snd_soc_codec_get_drvdata(codec); + unsigned int mode = 0; + unsigned int format = 0; + unsigned int invert = 0; + + dev_dbg(codec->dev, "%s: fmt 0x%08X\n", __func__, fmt); + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + mode = MAX98927_PCM_MASTER_MODE_SLAVE; + break; + case SND_SOC_DAIFMT_CBM_CFM: + max98927->master = true; + mode = MAX98927_PCM_MASTER_MODE_MASTER; + break; + default: + dev_err(codec->dev, "DAI clock mode unsupported"); + return -EINVAL; + } + + regmap_update_bits(max98927->regmap, + MAX98927_R0021_PCM_MASTER_MODE, + MAX98927_PCM_MASTER_MODE_MASK, + mode); + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + invert = MAX98927_PCM_MODE_CFG_PCM_BCLKEDGE; + break; + default: + dev_err(codec->dev, "DAI invert mode unsupported"); + return -EINVAL; + } + + regmap_update_bits(max98927->regmap, + MAX98927_R0020_PCM_MODE_CFG, + MAX98927_PCM_MODE_CFG_PCM_BCLKEDGE, + invert); + + /* interface format */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + max98927->iface |= SND_SOC_DAIFMT_I2S; + format = MAX98927_PCM_FORMAT_I2S; + break; + case SND_SOC_DAIFMT_LEFT_J: + max98927->iface |= SND_SOC_DAIFMT_LEFT_J; + format = MAX98927_PCM_FORMAT_LJ; + break; + case SND_SOC_DAIFMT_PDM: + max98927->iface |= SND_SOC_DAIFMT_PDM; + break; + default: + return -EINVAL; + } + + /* pcm channel configuration */ + if (max98927->iface & (SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_LEFT_J)) { + regmap_update_bits(max98927->regmap, + MAX98927_R0018_PCM_RX_EN_A, + MAX98927_PCM_RX_CH0_EN | MAX98927_PCM_RX_CH1_EN, + MAX98927_PCM_RX_CH0_EN | MAX98927_PCM_RX_CH1_EN); + + regmap_update_bits(max98927->regmap, + MAX98927_R0020_PCM_MODE_CFG, + MAX98927_PCM_MODE_CFG_FORMAT_MASK, + format << MAX98927_PCM_MODE_CFG_FORMAT_SHIFT); + + regmap_update_bits(max98927->regmap, + MAX98927_R003B_SPK_SRC_SEL, + MAX98927_SPK_SRC_MASK, 0); + + } else + regmap_update_bits(max98927->regmap, + MAX98927_R0018_PCM_RX_EN_A, + MAX98927_PCM_RX_CH0_EN | MAX98927_PCM_RX_CH1_EN, 0); + + /* pdm channel configuration */ + if (max98927->iface & SND_SOC_DAIFMT_PDM) { + regmap_update_bits(max98927->regmap, + MAX98927_R0035_PDM_RX_CTRL, + MAX98927_PDM_RX_EN_MASK, 1); + + regmap_update_bits(max98927->regmap, + MAX98927_R003B_SPK_SRC_SEL, + MAX98927_SPK_SRC_MASK, 3); + } else + regmap_update_bits(max98927->regmap, + MAX98927_R0035_PDM_RX_CTRL, + MAX98927_PDM_RX_EN_MASK, 0); + return 0; +} + +/* codec MCLK rate in master mode */ +static const int rate_table[] = { + 5644800, 6000000, 6144000, 6500000, + 9600000, 11289600, 12000000, 12288000, + 13000000, 19200000, +}; + +static int max98927_set_clock(struct max98927_priv *max98927, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_codec *codec = max98927->codec; + /* BCLK/LRCLK ratio calculation */ + int blr_clk_ratio = params_channels(params) * max98927->ch_size; + int value; + + if (max98927->master) { + int i; + /* match rate to closest value */ + for (i = 0; i < ARRAY_SIZE(rate_table); i++) { + if (rate_table[i] >= max98927->sysclk) + break; + } + if (i == ARRAY_SIZE(rate_table)) { + dev_err(codec->dev, "failed to find proper clock rate.\n"); + return -EINVAL; + } + regmap_update_bits(max98927->regmap, + MAX98927_R0021_PCM_MASTER_MODE, + MAX98927_PCM_MASTER_MODE_MCLK_MASK, + i << MAX98927_PCM_MASTER_MODE_MCLK_RATE_SHIFT); + } + + switch (blr_clk_ratio) { + case 32: + value = 2; + break; + case 48: + value = 3; + break; + case 64: + value = 4; + break; + default: + return -EINVAL; + } + regmap_update_bits(max98927->regmap, + MAX98927_R0022_PCM_CLK_SETUP, + MAX98927_PCM_CLK_SETUP_BSEL_MASK, + value); + return 0; +} + +static int max98927_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct max98927_priv *max98927 = snd_soc_codec_get_drvdata(codec); + unsigned int sampling_rate = 0; + unsigned int chan_sz = 0; + + /* pcm mode configuration */ + switch (snd_pcm_format_width(params_format(params))) { + case 16: + chan_sz = MAX98927_PCM_MODE_CFG_CHANSZ_16; + break; + case 24: + chan_sz = MAX98927_PCM_MODE_CFG_CHANSZ_24; + break; + case 32: + chan_sz = MAX98927_PCM_MODE_CFG_CHANSZ_32; + break; + default: + dev_err(codec->dev, "format unsupported %d", + params_format(params)); + goto err; + } + + max98927->ch_size = snd_pcm_format_width(params_format(params)); + + regmap_update_bits(max98927->regmap, + MAX98927_R0020_PCM_MODE_CFG, + MAX98927_PCM_MODE_CFG_CHANSZ_MASK, chan_sz); + + dev_dbg(codec->dev, "format supported %d", + params_format(params)); + + /* sampling rate configuration */ + switch (params_rate(params)) { + case 8000: + sampling_rate = MAX98927_PCM_SR_SET1_SR_8000; + break; + case 11025: + sampling_rate = MAX98927_PCM_SR_SET1_SR_11025; + break; + case 12000: + sampling_rate = MAX98927_PCM_SR_SET1_SR_12000; + break; + case 16000: + sampling_rate = MAX98927_PCM_SR_SET1_SR_16000; + break; + case 22050: + sampling_rate = MAX98927_PCM_SR_SET1_SR_22050; + break; + case 24000: + sampling_rate = MAX98927_PCM_SR_SET1_SR_24000; + break; + case 32000: + sampling_rate = MAX98927_PCM_SR_SET1_SR_32000; + break; + case 44100: + sampling_rate = MAX98927_PCM_SR_SET1_SR_44100; + break; + case 48000: + sampling_rate = MAX98927_PCM_SR_SET1_SR_48000; + break; + default: + dev_err(codec->dev, "rate %d not supported\n", + params_rate(params)); + goto err; + } + /* set DAI_SR to correct LRCLK frequency */ + regmap_update_bits(max98927->regmap, + MAX98927_R0023_PCM_SR_SETUP1, + MAX98927_PCM_SR_SET1_SR_MASK, + sampling_rate); + regmap_update_bits(max98927->regmap, + MAX98927_R0024_PCM_SR_SETUP2, + MAX98927_PCM_SR_SET2_SR_MASK, + sampling_rate << MAX98927_PCM_SR_SET2_SR_SHIFT); + + /* set sampling rate of IV */ + if (max98927->interleave_mode && + sampling_rate > MAX98927_PCM_SR_SET1_SR_16000) + regmap_update_bits(max98927->regmap, + MAX98927_R0024_PCM_SR_SETUP2, + MAX98927_PCM_SR_SET2_IVADC_SR_MASK, + sampling_rate - 3); + else + regmap_update_bits(max98927->regmap, + MAX98927_R0024_PCM_SR_SETUP2, + MAX98927_PCM_SR_SET2_IVADC_SR_MASK, + sampling_rate); + return max98927_set_clock(max98927, params); +err: + return -EINVAL; +} + +#define MAX98927_RATES SNDRV_PCM_RATE_8000_48000 + +#define MAX98927_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) + +static int max98927_dai_set_sysclk(struct snd_soc_dai *dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = dai->codec; + struct max98927_priv *max98927 = snd_soc_codec_get_drvdata(codec); + + max98927->sysclk = freq; + return 0; +} + +static const struct snd_soc_dai_ops max98927_dai_ops = { + .set_sysclk = max98927_dai_set_sysclk, + .set_fmt = max98927_dai_set_fmt, + .hw_params = max98927_dai_hw_params, +}; + +static int max98927_dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct max98927_priv *max98927 = snd_soc_codec_get_drvdata(codec); + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + regmap_update_bits(max98927->regmap, + MAX98927_R003A_AMP_EN, + MAX98927_AMP_EN_MASK, 1); + /* enable VMON and IMON */ + regmap_update_bits(max98927->regmap, + MAX98927_R003E_MEAS_EN, + MAX98927_MEAS_V_EN | MAX98927_MEAS_I_EN, + MAX98927_MEAS_V_EN | MAX98927_MEAS_I_EN); + regmap_update_bits(max98927->regmap, + MAX98927_R00FF_GLOBAL_SHDN, + MAX98927_GLOBAL_EN_MASK, 1); + break; + case SND_SOC_DAPM_POST_PMD: + regmap_update_bits(max98927->regmap, + MAX98927_R00FF_GLOBAL_SHDN, + MAX98927_GLOBAL_EN_MASK, 0); + regmap_update_bits(max98927->regmap, + MAX98927_R003A_AMP_EN, + MAX98927_AMP_EN_MASK, 0); + /* disable VMON and IMON */ + regmap_update_bits(max98927->regmap, + MAX98927_R003E_MEAS_EN, + MAX98927_MEAS_V_EN | MAX98927_MEAS_I_EN, 0); + break; + default: + return 0; + } + return 0; +} + +static const char * const max98927_switch_text[] = { + "Left", "Right", "LeftRight"}; + +static const struct soc_enum dai_sel_enum = + SOC_ENUM_SINGLE(MAX98927_R0025_PCM_TO_SPK_MONOMIX_A, + MAX98927_PCM_TO_SPK_MONOMIX_CFG_SHIFT, + 3, max98927_switch_text); + +static const struct snd_kcontrol_new max98927_dai_controls = + SOC_DAPM_ENUM("DAI Sel", dai_sel_enum); + +static const struct snd_soc_dapm_widget max98927_dapm_widgets[] = { + SND_SOC_DAPM_AIF_IN("DAI_OUT", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_DAC_E("Amp Enable", "HiFi Playback", MAX98927_R003A_AMP_EN, + 0, 0, max98927_dac_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_MUX("DAI Sel Mux", SND_SOC_NOPM, 0, 0, + &max98927_dai_controls), + SND_SOC_DAPM_OUTPUT("BE_OUT"), +}; + +static DECLARE_TLV_DB_SCALE(max98927_spk_tlv, 300, 300, 0); +static DECLARE_TLV_DB_SCALE(max98927_digital_tlv, -1600, 25, 0); + +static bool max98927_readable_register(struct device *dev, unsigned int reg) +{ + switch (reg) { + case MAX98927_R0001_INT_RAW1 ... MAX98927_R0028_ICC_RX_EN_B: + case MAX98927_R002B_ICC_TX_EN_A ... MAX98927_R002C_ICC_TX_EN_B: + case MAX98927_R002E_ICC_HIZ_MANUAL_MODE + ... MAX98927_R004E_MEAS_ADC_CH2_READ: + case MAX98927_R0051_BROWNOUT_STATUS + ... MAX98927_R0055_BROWNOUT_LVL_HOLD: + case MAX98927_R005A_BROWNOUT_LVL1_THRESH + ... MAX98927_R0061_BROWNOUT_AMP1_CLIP_MODE: + case MAX98927_R0072_BROWNOUT_LVL1_CUR_LIMIT + ... MAX98927_R0087_ENV_TRACK_BOOST_VOUT_READ: + case MAX98927_R00FF_GLOBAL_SHDN: + case MAX98927_R0100_SOFT_RESET: + case MAX98927_R01FF_REV_ID: + return true; + default: + return false; + } +}; + +static bool max98927_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case MAX98927_R0001_INT_RAW1 ... MAX98927_R0009_INT_FLAG3: + return true; + default: + return false; + } +} + +static const char * const max98927_boost_voltage_text[] = { + "6.5V", "6.625V", "6.75V", "6.875V", "7V", "7.125V", "7.25V", "7.375V", + "7.5V", "7.625V", "7.75V", "7.875V", "8V", "8.125V", "8.25V", "8.375V", + "8.5V", "8.625V", "8.75V", "8.875V", "9V", "9.125V", "9.25V", "9.375V", + "9.5V", "9.625V", "9.75V", "9.875V", "10V" +}; + +static SOC_ENUM_SINGLE_DECL(max98927_boost_voltage, + MAX98927_R0040_BOOST_CTRL0, 0, + max98927_boost_voltage_text); + +static const char * const max98927_current_limit_text[] = { + "1.00A", "1.10A", "1.20A", "1.30A", "1.40A", "1.50A", "1.60A", "1.70A", + "1.80A", "1.90A", "2.00A", "2.10A", "2.20A", "2.30A", "2.40A", "2.50A", + "2.60A", "2.70A", "2.80A", "2.90A", "3.00A", "3.10A", "3.20A", "3.30A", + "3.40A", "3.50A", "3.60A", "3.70A", "3.80A", "3.90A", "4.00A", "4.10A" +}; + +static SOC_ENUM_SINGLE_DECL(max98927_current_limit, + MAX98927_R0042_BOOST_CTRL1, 1, + max98927_current_limit_text); + +static const struct snd_kcontrol_new max98927_snd_controls[] = { + SOC_SINGLE_TLV("Speaker Volume", MAX98927_R003C_SPK_GAIN, + 0, 6, 0, + max98927_spk_tlv), + SOC_SINGLE_TLV("Digital Volume", MAX98927_R0036_AMP_VOL_CTRL, + 0, (1<codec = codec; + codec->control_data = max98927->regmap; + codec->cache_bypass = 1; + + /* Software Reset */ + regmap_write(max98927->regmap, + MAX98927_R0100_SOFT_RESET, MAX98927_SOFT_RESET); + + /* IV default slot configuration */ + regmap_write(max98927->regmap, + MAX98927_R001C_PCM_TX_HIZ_CTRL_A, + 0xFF); + regmap_write(max98927->regmap, + MAX98927_R001D_PCM_TX_HIZ_CTRL_B, + 0xFF); + regmap_write(max98927->regmap, + MAX98927_R0025_PCM_TO_SPK_MONOMIX_A, + 0x80); + regmap_write(max98927->regmap, + MAX98927_R0026_PCM_TO_SPK_MONOMIX_B, + 0x1); + /* Set inital volume (+13dB) */ + regmap_write(max98927->regmap, + MAX98927_R0036_AMP_VOL_CTRL, + 0x38); + regmap_write(max98927->regmap, + MAX98927_R003C_SPK_GAIN, + 0x05); + /* Enable DC blocker */ + regmap_write(max98927->regmap, + MAX98927_R0037_AMP_DSP_CFG, + 0x03); + /* Enable IMON VMON DC blocker */ + regmap_write(max98927->regmap, + MAX98927_R003F_MEAS_DSP_CFG, + 0xF7); + /* Boost Output Voltage & Current limit */ + regmap_write(max98927->regmap, + MAX98927_R0040_BOOST_CTRL0, + 0x1C); + regmap_write(max98927->regmap, + MAX98927_R0042_BOOST_CTRL1, + 0x3E); + /* Measurement ADC config */ + regmap_write(max98927->regmap, + MAX98927_R0043_MEAS_ADC_CFG, + 0x04); + regmap_write(max98927->regmap, + MAX98927_R0044_MEAS_ADC_BASE_MSB, + 0x00); + regmap_write(max98927->regmap, + MAX98927_R0045_MEAS_ADC_BASE_LSB, + 0x24); + /* Brownout Level */ + regmap_write(max98927->regmap, + MAX98927_R007F_BROWNOUT_LVL4_AMP1_CTRL1, + 0x06); + /* Envelope Tracking configuration */ + regmap_write(max98927->regmap, + MAX98927_R0082_ENV_TRACK_VOUT_HEADROOM, + 0x08); + regmap_write(max98927->regmap, + MAX98927_R0086_ENV_TRACK_CTRL, + 0x01); + regmap_write(max98927->regmap, + MAX98927_R0087_ENV_TRACK_BOOST_VOUT_READ, + 0x10); + + /* voltage, current slot configuration */ + regmap_write(max98927->regmap, + MAX98927_R001E_PCM_TX_CH_SRC_A, + (max98927->i_l_slot<v_l_slot)&0xFF); + + if (max98927->v_l_slot < 8) { + regmap_update_bits(max98927->regmap, + MAX98927_R001C_PCM_TX_HIZ_CTRL_A, + 1 << max98927->v_l_slot, 0); + regmap_update_bits(max98927->regmap, + MAX98927_R001A_PCM_TX_EN_A, + 1 << max98927->v_l_slot, + 1 << max98927->v_l_slot); + } else { + regmap_update_bits(max98927->regmap, + MAX98927_R001D_PCM_TX_HIZ_CTRL_B, + 1 << (max98927->v_l_slot - 8), 0); + regmap_update_bits(max98927->regmap, + MAX98927_R001B_PCM_TX_EN_B, + 1 << (max98927->v_l_slot - 8), + 1 << (max98927->v_l_slot - 8)); + } + + if (max98927->i_l_slot < 8) { + regmap_update_bits(max98927->regmap, + MAX98927_R001C_PCM_TX_HIZ_CTRL_A, + 1 << max98927->i_l_slot, 0); + regmap_update_bits(max98927->regmap, + MAX98927_R001A_PCM_TX_EN_A, + 1 << max98927->i_l_slot, + 1 << max98927->i_l_slot); + } else { + regmap_update_bits(max98927->regmap, + MAX98927_R001D_PCM_TX_HIZ_CTRL_B, + 1 << (max98927->i_l_slot - 8), 0); + regmap_update_bits(max98927->regmap, + MAX98927_R001B_PCM_TX_EN_B, + 1 << (max98927->i_l_slot - 8), + 1 << (max98927->i_l_slot - 8)); + } + + /* Set interleave mode */ + if (max98927->interleave_mode) + regmap_update_bits(max98927->regmap, + MAX98927_R001F_PCM_TX_CH_SRC_B, + MAX98927_PCM_TX_CH_INTERLEAVE_MASK, + MAX98927_PCM_TX_CH_INTERLEAVE_MASK); + return 0; +} + +static const struct snd_soc_codec_driver soc_codec_dev_max98927 = { + .probe = max98927_probe, + .component_driver = { + .controls = max98927_snd_controls, + .num_controls = ARRAY_SIZE(max98927_snd_controls), + .dapm_widgets = max98927_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(max98927_dapm_widgets), + .dapm_routes = max98927_audio_map, + .num_dapm_routes = ARRAY_SIZE(max98927_audio_map), + }, +}; + +static const struct regmap_config max98927_regmap = { + .reg_bits = 16, + .val_bits = 8, + .max_register = MAX98927_R01FF_REV_ID, + .reg_defaults = max98927_reg, + .num_reg_defaults = ARRAY_SIZE(max98927_reg), + .readable_reg = max98927_readable_register, + .volatile_reg = max98927_volatile_reg, + .cache_type = REGCACHE_RBTREE, +}; + +static void max98927_slot_config(struct i2c_client *i2c, + struct max98927_priv *max98927) +{ + int value; + + if (!of_property_read_u32(i2c->dev.of_node, + "vmon-slot-no", &value)) + max98927->v_l_slot = value & 0xF; + else + max98927->v_l_slot = 0; + if (!of_property_read_u32(i2c->dev.of_node, + "imon-slot-no", &value)) + max98927->i_l_slot = value & 0xF; + else + max98927->i_l_slot = 1; +} + +static int max98927_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + + int ret = 0, value; + int reg = 0; + struct max98927_priv *max98927 = NULL; + + max98927 = devm_kzalloc(&i2c->dev, + sizeof(*max98927), GFP_KERNEL); + + if (!max98927) { + ret = -ENOMEM; + return ret; + } + i2c_set_clientdata(i2c, max98927); + + /* update interleave mode info */ + if (!of_property_read_u32(i2c->dev.of_node, + "interleave_mode", &value)) { + if (value > 0) + max98927->interleave_mode = 1; + else + max98927->interleave_mode = 0; + } else + max98927->interleave_mode = 0; + + /* regmap initialization */ + max98927->regmap + = devm_regmap_init_i2c(i2c, &max98927_regmap); + if (IS_ERR(max98927->regmap)) { + ret = PTR_ERR(max98927->regmap); + dev_err(&i2c->dev, + "Failed to allocate regmap: %d\n", ret); + return ret; + } + + /* Check Revision ID */ + ret = regmap_read(max98927->regmap, + MAX98927_R01FF_REV_ID, ®); + if (ret < 0) { + dev_err(&i2c->dev, + "Failed to read: 0x%02X\n", MAX98927_R01FF_REV_ID); + return ret; + } + dev_info(&i2c->dev, "MAX98927 revisionID: 0x%02X\n", reg); + + /* voltage/current slot configuration */ + max98927_slot_config(i2c, max98927); + + /* codec registeration */ + ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_max98927, + max98927_dai, ARRAY_SIZE(max98927_dai)); + if (ret < 0) + dev_err(&i2c->dev, "Failed to register codec: %d\n", ret); + + return ret; +} + +static int max98927_i2c_remove(struct i2c_client *client) +{ + snd_soc_unregister_codec(&client->dev); + return 0; +} + +static const struct i2c_device_id max98927_i2c_id[] = { + { "max98927", 0}, + { }, +}; + +MODULE_DEVICE_TABLE(i2c, max98927_i2c_id); + +#if defined(CONFIG_OF) +static const struct of_device_id max98927_of_match[] = { + { .compatible = "maxim,max98927", }, + { } +}; +MODULE_DEVICE_TABLE(of, max98927_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id max98927_acpi_match[] = { + { "MX98927", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, max98927_acpi_match); +#endif + +static struct i2c_driver max98927_i2c_driver = { + .driver = { + .name = "max98927", + .of_match_table = of_match_ptr(max98927_of_match), + .acpi_match_table = ACPI_PTR(max98927_acpi_match), + .pm = NULL, + }, + .probe = max98927_i2c_probe, + .remove = max98927_i2c_remove, + .id_table = max98927_i2c_id, +}; + +module_i2c_driver(max98927_i2c_driver) + +MODULE_DESCRIPTION("ALSA SoC MAX98927 driver"); +MODULE_AUTHOR("Ryan Lee "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/max98927.h b/sound/soc/codecs/max98927.h new file mode 100644 index 0000000000000000000000000000000000000000..ece6a608cbe1ee65383aa9d4feef873110bc516a --- /dev/null +++ b/sound/soc/codecs/max98927.h @@ -0,0 +1,272 @@ +/* + * max98927.h -- MAX98927 ALSA Soc Audio driver + * + * Copyright 2013-15 Maxim Integrated Products + * Author: Ryan Lee + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef _MAX98927_H +#define _MAX98927_H + +/* Register Values */ +#define MAX98927_R0001_INT_RAW1 0x0001 +#define MAX98927_R0002_INT_RAW2 0x0002 +#define MAX98927_R0003_INT_RAW3 0x0003 +#define MAX98927_R0004_INT_STATE1 0x0004 +#define MAX98927_R0005_INT_STATE2 0x0005 +#define MAX98927_R0006_INT_STATE3 0x0006 +#define MAX98927_R0007_INT_FLAG1 0x0007 +#define MAX98927_R0008_INT_FLAG2 0x0008 +#define MAX98927_R0009_INT_FLAG3 0x0009 +#define MAX98927_R000A_INT_EN1 0x000A +#define MAX98927_R000B_INT_EN2 0x000B +#define MAX98927_R000C_INT_EN3 0x000C +#define MAX98927_R000D_INT_FLAG_CLR1 0x000D +#define MAX98927_R000E_INT_FLAG_CLR2 0x000E +#define MAX98927_R000F_INT_FLAG_CLR3 0x000F +#define MAX98927_R0010_IRQ_CTRL 0x0010 +#define MAX98927_R0011_CLK_MON 0x0011 +#define MAX98927_R0012_WDOG_CTRL 0x0012 +#define MAX98927_R0013_WDOG_RST 0x0013 +#define MAX98927_R0014_MEAS_ADC_THERM_WARN_THRESH 0x0014 +#define MAX98927_R0015_MEAS_ADC_THERM_SHDN_THRESH 0x0015 +#define MAX98927_R0016_MEAS_ADC_THERM_HYSTERESIS 0x0016 +#define MAX98927_R0017_PIN_CFG 0x0017 +#define MAX98927_R0018_PCM_RX_EN_A 0x0018 +#define MAX98927_R0019_PCM_RX_EN_B 0x0019 +#define MAX98927_R001A_PCM_TX_EN_A 0x001A +#define MAX98927_R001B_PCM_TX_EN_B 0x001B +#define MAX98927_R001C_PCM_TX_HIZ_CTRL_A 0x001C +#define MAX98927_R001D_PCM_TX_HIZ_CTRL_B 0x001D +#define MAX98927_R001E_PCM_TX_CH_SRC_A 0x001E +#define MAX98927_R001F_PCM_TX_CH_SRC_B 0x001F +#define MAX98927_R0020_PCM_MODE_CFG 0x0020 +#define MAX98927_R0021_PCM_MASTER_MODE 0x0021 +#define MAX98927_R0022_PCM_CLK_SETUP 0x0022 +#define MAX98927_R0023_PCM_SR_SETUP1 0x0023 +#define MAX98927_R0024_PCM_SR_SETUP2 0x0024 +#define MAX98927_R0025_PCM_TO_SPK_MONOMIX_A 0x0025 +#define MAX98927_R0026_PCM_TO_SPK_MONOMIX_B 0x0026 +#define MAX98927_R0027_ICC_RX_EN_A 0x0027 +#define MAX98927_R0028_ICC_RX_EN_B 0x0028 +#define MAX98927_R002B_ICC_TX_EN_A 0x002B +#define MAX98927_R002C_ICC_TX_EN_B 0x002C +#define MAX98927_R002E_ICC_HIZ_MANUAL_MODE 0x002E +#define MAX98927_R002F_ICC_TX_HIZ_EN_A 0x002F +#define MAX98927_R0030_ICC_TX_HIZ_EN_B 0x0030 +#define MAX98927_R0031_ICC_LNK_EN 0x0031 +#define MAX98927_R0032_PDM_TX_EN 0x0032 +#define MAX98927_R0033_PDM_TX_HIZ_CTRL 0x0033 +#define MAX98927_R0034_PDM_TX_CTRL 0x0034 +#define MAX98927_R0035_PDM_RX_CTRL 0x0035 +#define MAX98927_R0036_AMP_VOL_CTRL 0x0036 +#define MAX98927_R0037_AMP_DSP_CFG 0x0037 +#define MAX98927_R0038_TONE_GEN_DC_CFG 0x0038 +#define MAX98927_R0039_DRE_CTRL 0x0039 +#define MAX98927_R003A_AMP_EN 0x003A +#define MAX98927_R003B_SPK_SRC_SEL 0x003B +#define MAX98927_R003C_SPK_GAIN 0x003C +#define MAX98927_R003D_SSM_CFG 0x003D +#define MAX98927_R003E_MEAS_EN 0x003E +#define MAX98927_R003F_MEAS_DSP_CFG 0x003F +#define MAX98927_R0040_BOOST_CTRL0 0x0040 +#define MAX98927_R0041_BOOST_CTRL3 0x0041 +#define MAX98927_R0042_BOOST_CTRL1 0x0042 +#define MAX98927_R0043_MEAS_ADC_CFG 0x0043 +#define MAX98927_R0044_MEAS_ADC_BASE_MSB 0x0044 +#define MAX98927_R0045_MEAS_ADC_BASE_LSB 0x0045 +#define MAX98927_R0046_ADC_CH0_DIVIDE 0x0046 +#define MAX98927_R0047_ADC_CH1_DIVIDE 0x0047 +#define MAX98927_R0048_ADC_CH2_DIVIDE 0x0048 +#define MAX98927_R0049_ADC_CH0_FILT_CFG 0x0049 +#define MAX98927_R004A_ADC_CH1_FILT_CFG 0x004A +#define MAX98927_R004B_ADC_CH2_FILT_CFG 0x004B +#define MAX98927_R004C_MEAS_ADC_CH0_READ 0x004C +#define MAX98927_R004D_MEAS_ADC_CH1_READ 0x004D +#define MAX98927_R004E_MEAS_ADC_CH2_READ 0x004E +#define MAX98927_R0051_BROWNOUT_STATUS 0x0051 +#define MAX98927_R0052_BROWNOUT_EN 0x0052 +#define MAX98927_R0053_BROWNOUT_INFINITE_HOLD 0x0053 +#define MAX98927_R0054_BROWNOUT_INFINITE_HOLD_CLR 0x0054 +#define MAX98927_R0055_BROWNOUT_LVL_HOLD 0x0055 +#define MAX98927_R005A_BROWNOUT_LVL1_THRESH 0x005A +#define MAX98927_R005B_BROWNOUT_LVL2_THRESH 0x005B +#define MAX98927_R005C_BROWNOUT_LVL3_THRESH 0x005C +#define MAX98927_R005D_BROWNOUT_LVL4_THRESH 0x005D +#define MAX98927_R005E_BROWNOUT_THRESH_HYSTERYSIS 0x005E +#define MAX98927_R005F_BROWNOUT_AMP_LIMITER_ATK_REL 0x005F +#define MAX98927_R0060_BROWNOUT_AMP_GAIN_ATK_REL 0x0060 +#define MAX98927_R0061_BROWNOUT_AMP1_CLIP_MODE 0x0061 +#define MAX98927_R0072_BROWNOUT_LVL1_CUR_LIMIT 0x0072 +#define MAX98927_R0073_BROWNOUT_LVL1_AMP1_CTRL1 0x0073 +#define MAX98927_R0074_BROWNOUT_LVL1_AMP1_CTRL2 0x0074 +#define MAX98927_R0075_BROWNOUT_LVL1_AMP1_CTRL3 0x0075 +#define MAX98927_R0076_BROWNOUT_LVL2_CUR_LIMIT 0x0076 +#define MAX98927_R0077_BROWNOUT_LVL2_AMP1_CTRL1 0x0077 +#define MAX98927_R0078_BROWNOUT_LVL2_AMP1_CTRL2 0x0078 +#define MAX98927_R0079_BROWNOUT_LVL2_AMP1_CTRL3 0x0079 +#define MAX98927_R007A_BROWNOUT_LVL3_CUR_LIMIT 0x007A +#define MAX98927_R007B_BROWNOUT_LVL3_AMP1_CTRL1 0x007B +#define MAX98927_R007C_BROWNOUT_LVL3_AMP1_CTRL2 0x007C +#define MAX98927_R007D_BROWNOUT_LVL3_AMP1_CTRL3 0x007D +#define MAX98927_R007E_BROWNOUT_LVL4_CUR_LIMIT 0x007E +#define MAX98927_R007F_BROWNOUT_LVL4_AMP1_CTRL1 0x007F +#define MAX98927_R0080_BROWNOUT_LVL4_AMP1_CTRL2 0x0080 +#define MAX98927_R0081_BROWNOUT_LVL4_AMP1_CTRL3 0x0081 +#define MAX98927_R0082_ENV_TRACK_VOUT_HEADROOM 0x0082 +#define MAX98927_R0083_ENV_TRACK_BOOST_VOUT_DELAY 0x0083 +#define MAX98927_R0084_ENV_TRACK_REL_RATE 0x0084 +#define MAX98927_R0085_ENV_TRACK_HOLD_RATE 0x0085 +#define MAX98927_R0086_ENV_TRACK_CTRL 0x0086 +#define MAX98927_R0087_ENV_TRACK_BOOST_VOUT_READ 0x0087 +#define MAX98927_R00FF_GLOBAL_SHDN 0x00FF +#define MAX98927_R0100_SOFT_RESET 0x0100 +#define MAX98927_R01FF_REV_ID 0x01FF + +/* MAX98927_R0018_PCM_RX_EN_A */ +#define MAX98927_PCM_RX_CH0_EN (0x1 << 0) +#define MAX98927_PCM_RX_CH1_EN (0x1 << 1) +#define MAX98927_PCM_RX_CH2_EN (0x1 << 2) +#define MAX98927_PCM_RX_CH3_EN (0x1 << 3) +#define MAX98927_PCM_RX_CH4_EN (0x1 << 4) +#define MAX98927_PCM_RX_CH5_EN (0x1 << 5) +#define MAX98927_PCM_RX_CH6_EN (0x1 << 6) +#define MAX98927_PCM_RX_CH7_EN (0x1 << 7) + +/* MAX98927_R001A_PCM_TX_EN_A */ +#define MAX98927_PCM_TX_CH0_EN (0x1 << 0) +#define MAX98927_PCM_TX_CH1_EN (0x1 << 1) +#define MAX98927_PCM_TX_CH2_EN (0x1 << 2) +#define MAX98927_PCM_TX_CH3_EN (0x1 << 3) +#define MAX98927_PCM_TX_CH4_EN (0x1 << 4) +#define MAX98927_PCM_TX_CH5_EN (0x1 << 5) +#define MAX98927_PCM_TX_CH6_EN (0x1 << 6) +#define MAX98927_PCM_TX_CH7_EN (0x1 << 7) + +/* MAX98927_R001E_PCM_TX_CH_SRC_A */ +#define MAX98927_PCM_TX_CH_SRC_A_V_SHIFT (0) +#define MAX98927_PCM_TX_CH_SRC_A_I_SHIFT (4) + +/* MAX98927_R001F_PCM_TX_CH_SRC_B */ +#define MAX98927_PCM_TX_CH_INTERLEAVE_MASK (0x1 << 5) + +/* MAX98927_R0020_PCM_MODE_CFG */ +#define MAX98927_PCM_MODE_CFG_PCM_BCLKEDGE (0x1 << 2) +#define MAX98927_PCM_MODE_CFG_FORMAT_MASK (0x7 << 3) +#define MAX98927_PCM_MODE_CFG_FORMAT_SHIFT (3) +#define MAX98927_PCM_FORMAT_I2S (0x0 << 0) +#define MAX98927_PCM_FORMAT_LJ (0x1 << 0) + +#define MAX98927_PCM_MODE_CFG_CHANSZ_MASK (0x3 << 6) +#define MAX98927_PCM_MODE_CFG_CHANSZ_16 (0x1 << 6) +#define MAX98927_PCM_MODE_CFG_CHANSZ_24 (0x2 << 6) +#define MAX98927_PCM_MODE_CFG_CHANSZ_32 (0x3 << 6) + +/* MAX98927_R0021_PCM_MASTER_MODE */ +#define MAX98927_PCM_MASTER_MODE_MASK (0x3 << 0) +#define MAX98927_PCM_MASTER_MODE_SLAVE (0x0 << 0) +#define MAX98927_PCM_MASTER_MODE_MASTER (0x3 << 0) + +#define MAX98927_PCM_MASTER_MODE_MCLK_MASK (0xF << 2) +#define MAX98927_PCM_MASTER_MODE_MCLK_RATE_SHIFT (2) + +/* MAX98927_R0022_PCM_CLK_SETUP */ +#define MAX98927_PCM_CLK_SETUP_BSEL_MASK (0xF << 0) + +/* MAX98927_R0023_PCM_SR_SETUP1 */ +#define MAX98927_PCM_SR_SET1_SR_MASK (0xF << 0) + +#define MAX98927_PCM_SR_SET1_SR_8000 (0x0 << 0) +#define MAX98927_PCM_SR_SET1_SR_11025 (0x1 << 0) +#define MAX98927_PCM_SR_SET1_SR_12000 (0x2 << 0) +#define MAX98927_PCM_SR_SET1_SR_16000 (0x3 << 0) +#define MAX98927_PCM_SR_SET1_SR_22050 (0x4 << 0) +#define MAX98927_PCM_SR_SET1_SR_24000 (0x5 << 0) +#define MAX98927_PCM_SR_SET1_SR_32000 (0x6 << 0) +#define MAX98927_PCM_SR_SET1_SR_44100 (0x7 << 0) +#define MAX98927_PCM_SR_SET1_SR_48000 (0x8 << 0) + +/* MAX98927_R0024_PCM_SR_SETUP2 */ +#define MAX98927_PCM_SR_SET2_SR_MASK (0xF << 4) +#define MAX98927_PCM_SR_SET2_SR_SHIFT (4) +#define MAX98927_PCM_SR_SET2_IVADC_SR_MASK (0xf << 0) + +/* MAX98927_R0025_PCM_TO_SPK_MONOMIX_A */ +#define MAX98927_PCM_TO_SPK_MONOMIX_CFG_MASK (0x3 << 6) +#define MAX98927_PCM_TO_SPK_MONOMIX_CFG_SHIFT (6) + +/* MAX98927_R0035_PDM_RX_CTRL */ +#define MAX98927_PDM_RX_EN_MASK (0x1 << 0) + +/* MAX98927_R0036_AMP_VOL_CTRL */ +#define MAX98927_AMP_VOL_SEL (0x1 << 7) +#define MAX98927_AMP_VOL_SEL_WIDTH (1) +#define MAX98927_AMP_VOL_SEL_SHIFT (7) +#define MAX98927_AMP_VOL_MASK (0x7f << 0) +#define MAX98927_AMP_VOL_WIDTH (7) +#define MAX98927_AMP_VOL_SHIFT (0) + +/* MAX98927_R0037_AMP_DSP_CFG */ +#define MAX98927_AMP_DSP_CFG_DCBLK_EN (0x1 << 0) +#define MAX98927_AMP_DSP_CFG_DITH_EN (0x1 << 1) +#define MAX98927_AMP_DSP_CFG_RMP_BYPASS (0x1 << 4) +#define MAX98927_AMP_DSP_CFG_DAC_INV (0x1 << 5) +#define MAX98927_AMP_DSP_CFG_RMP_SHIFT (4) + +/* MAX98927_R0039_DRE_CTRL */ +#define MAX98927_DRE_CTRL_DRE_EN (0x1 << 0) +#define MAX98927_DRE_EN_SHIFT 0x1 + +/* MAX98927_R003A_AMP_EN */ +#define MAX98927_AMP_EN_MASK (0x1 << 0) + +/* MAX98927_R003B_SPK_SRC_SEL */ +#define MAX98927_SPK_SRC_MASK (0x3 << 0) + +/* MAX98927_R003C_SPK_GAIN */ +#define MAX98927_SPK_PCM_GAIN_MASK (0x7 << 0) +#define MAX98927_SPK_PDM_GAIN_MASK (0x7 << 4) +#define MAX98927_SPK_GAIN_WIDTH (3) + +/* MAX98927_R003E_MEAS_EN */ +#define MAX98927_MEAS_V_EN (0x1 << 0) +#define MAX98927_MEAS_I_EN (0x1 << 1) + +/* MAX98927_R0040_BOOST_CTRL0 */ +#define MAX98927_BOOST_CTRL0_VOUT_MASK (0x1f << 0) +#define MAX98927_BOOST_CTRL0_PVDD_MASK (0x1 << 7) +#define MAX98927_BOOST_CTRL0_PVDD_EN_SHIFT (7) + +/* MAX98927_R0052_BROWNOUT_EN */ +#define MAX98927_BROWNOUT_BDE_EN (0x1 << 0) +#define MAX98927_BROWNOUT_AMP_EN (0x1 << 1) +#define MAX98927_BROWNOUT_DSP_EN (0x1 << 2) +#define MAX98927_BROWNOUT_DSP_SHIFT (2) + +/* MAX98927_R0100_SOFT_RESET */ +#define MAX98927_SOFT_RESET (0x1 << 0) + +/* MAX98927_R00FF_GLOBAL_SHDN */ +#define MAX98927_GLOBAL_EN_MASK (0x1 << 0) + +struct max98927_priv { + struct regmap *regmap; + struct snd_soc_codec *codec; + struct max98927_pdata *pdata; + unsigned int spk_gain; + unsigned int sysclk; + unsigned int v_l_slot; + unsigned int i_l_slot; + bool interleave_mode; + unsigned int ch_size; + unsigned int rate; + unsigned int iface; + unsigned int master; + unsigned int digital_gain; +}; +#endif diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index 9e8f0f4aa51aea4f0d4eced28d078e99d83768a3..c8bcb1db966de689d2e95413e6c618e7e6b8ad0e 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -39,147 +39,147 @@ /* scaling for mclk from sysclk_src output */ static const struct nau8540_fll_attr mclk_src_scaling[] = { - { 1, 0x0 }, - { 2, 0x2 }, - { 4, 0x3 }, - { 8, 0x4 }, - { 16, 0x5 }, - { 32, 0x6 }, - { 3, 0x7 }, - { 6, 0xa }, - { 12, 0xb }, - { 24, 0xc }, + { 1, 0x0 }, + { 2, 0x2 }, + { 4, 0x3 }, + { 8, 0x4 }, + { 16, 0x5 }, + { 32, 0x6 }, + { 3, 0x7 }, + { 6, 0xa }, + { 12, 0xb }, + { 24, 0xc }, }; /* ratio for input clk freq */ static const struct nau8540_fll_attr fll_ratio[] = { - { 512000, 0x01 }, - { 256000, 0x02 }, - { 128000, 0x04 }, - { 64000, 0x08 }, - { 32000, 0x10 }, - { 8000, 0x20 }, - { 4000, 0x40 }, + { 512000, 0x01 }, + { 256000, 0x02 }, + { 128000, 0x04 }, + { 64000, 0x08 }, + { 32000, 0x10 }, + { 8000, 0x20 }, + { 4000, 0x40 }, }; static const struct nau8540_fll_attr fll_pre_scalar[] = { - { 1, 0x0 }, - { 2, 0x1 }, - { 4, 0x2 }, - { 8, 0x3 }, + { 1, 0x0 }, + { 2, 0x1 }, + { 4, 0x2 }, + { 8, 0x3 }, }; /* over sampling rate */ static const struct nau8540_osr_attr osr_adc_sel[] = { - { 32, 3 }, /* OSR 32, SRC 1/8 */ - { 64, 2 }, /* OSR 64, SRC 1/4 */ - { 128, 1 }, /* OSR 128, SRC 1/2 */ - { 256, 0 }, /* OSR 256, SRC 1 */ + { 32, 3 }, /* OSR 32, SRC 1/8 */ + { 64, 2 }, /* OSR 64, SRC 1/4 */ + { 128, 1 }, /* OSR 128, SRC 1/2 */ + { 256, 0 }, /* OSR 256, SRC 1 */ }; static const struct reg_default nau8540_reg_defaults[] = { - {NAU8540_REG_POWER_MANAGEMENT, 0x0000}, - {NAU8540_REG_CLOCK_CTRL, 0x0000}, - {NAU8540_REG_CLOCK_SRC, 0x0000}, - {NAU8540_REG_FLL1, 0x0001}, - {NAU8540_REG_FLL2, 0x3126}, - {NAU8540_REG_FLL3, 0x0008}, - {NAU8540_REG_FLL4, 0x0010}, - {NAU8540_REG_FLL5, 0xC000}, - {NAU8540_REG_FLL6, 0x6000}, - {NAU8540_REG_FLL_VCO_RSV, 0xF13C}, - {NAU8540_REG_PCM_CTRL0, 0x000B}, - {NAU8540_REG_PCM_CTRL1, 0x3010}, - {NAU8540_REG_PCM_CTRL2, 0x0800}, - {NAU8540_REG_PCM_CTRL3, 0x0000}, - {NAU8540_REG_PCM_CTRL4, 0x000F}, - {NAU8540_REG_ALC_CONTROL_1, 0x0000}, - {NAU8540_REG_ALC_CONTROL_2, 0x700B}, - {NAU8540_REG_ALC_CONTROL_3, 0x0022}, - {NAU8540_REG_ALC_CONTROL_4, 0x1010}, - {NAU8540_REG_ALC_CONTROL_5, 0x1010}, - {NAU8540_REG_NOTCH_FIL1_CH1, 0x0000}, - {NAU8540_REG_NOTCH_FIL2_CH1, 0x0000}, - {NAU8540_REG_NOTCH_FIL1_CH2, 0x0000}, - {NAU8540_REG_NOTCH_FIL2_CH2, 0x0000}, - {NAU8540_REG_NOTCH_FIL1_CH3, 0x0000}, - {NAU8540_REG_NOTCH_FIL2_CH3, 0x0000}, - {NAU8540_REG_NOTCH_FIL1_CH4, 0x0000}, - {NAU8540_REG_NOTCH_FIL2_CH4, 0x0000}, - {NAU8540_REG_HPF_FILTER_CH12, 0x0000}, - {NAU8540_REG_HPF_FILTER_CH34, 0x0000}, - {NAU8540_REG_ADC_SAMPLE_RATE, 0x0002}, - {NAU8540_REG_DIGITAL_GAIN_CH1, 0x0400}, - {NAU8540_REG_DIGITAL_GAIN_CH2, 0x0400}, - {NAU8540_REG_DIGITAL_GAIN_CH3, 0x0400}, - {NAU8540_REG_DIGITAL_GAIN_CH4, 0x0400}, - {NAU8540_REG_DIGITAL_MUX, 0x00E4}, - {NAU8540_REG_GPIO_CTRL, 0x0000}, - {NAU8540_REG_MISC_CTRL, 0x0000}, - {NAU8540_REG_I2C_CTRL, 0xEFFF}, - {NAU8540_REG_VMID_CTRL, 0x0000}, - {NAU8540_REG_MUTE, 0x0000}, - {NAU8540_REG_ANALOG_ADC1, 0x0011}, - {NAU8540_REG_ANALOG_ADC2, 0x0020}, - {NAU8540_REG_ANALOG_PWR, 0x0000}, - {NAU8540_REG_MIC_BIAS, 0x0004}, - {NAU8540_REG_REFERENCE, 0x0000}, - {NAU8540_REG_FEPGA1, 0x0000}, - {NAU8540_REG_FEPGA2, 0x0000}, - {NAU8540_REG_FEPGA3, 0x0101}, - {NAU8540_REG_FEPGA4, 0x0101}, - {NAU8540_REG_PWR, 0x0000}, + {NAU8540_REG_POWER_MANAGEMENT, 0x0000}, + {NAU8540_REG_CLOCK_CTRL, 0x0000}, + {NAU8540_REG_CLOCK_SRC, 0x0000}, + {NAU8540_REG_FLL1, 0x0001}, + {NAU8540_REG_FLL2, 0x3126}, + {NAU8540_REG_FLL3, 0x0008}, + {NAU8540_REG_FLL4, 0x0010}, + {NAU8540_REG_FLL5, 0xC000}, + {NAU8540_REG_FLL6, 0x6000}, + {NAU8540_REG_FLL_VCO_RSV, 0xF13C}, + {NAU8540_REG_PCM_CTRL0, 0x000B}, + {NAU8540_REG_PCM_CTRL1, 0x3010}, + {NAU8540_REG_PCM_CTRL2, 0x0800}, + {NAU8540_REG_PCM_CTRL3, 0x0000}, + {NAU8540_REG_PCM_CTRL4, 0x000F}, + {NAU8540_REG_ALC_CONTROL_1, 0x0000}, + {NAU8540_REG_ALC_CONTROL_2, 0x700B}, + {NAU8540_REG_ALC_CONTROL_3, 0x0022}, + {NAU8540_REG_ALC_CONTROL_4, 0x1010}, + {NAU8540_REG_ALC_CONTROL_5, 0x1010}, + {NAU8540_REG_NOTCH_FIL1_CH1, 0x0000}, + {NAU8540_REG_NOTCH_FIL2_CH1, 0x0000}, + {NAU8540_REG_NOTCH_FIL1_CH2, 0x0000}, + {NAU8540_REG_NOTCH_FIL2_CH2, 0x0000}, + {NAU8540_REG_NOTCH_FIL1_CH3, 0x0000}, + {NAU8540_REG_NOTCH_FIL2_CH3, 0x0000}, + {NAU8540_REG_NOTCH_FIL1_CH4, 0x0000}, + {NAU8540_REG_NOTCH_FIL2_CH4, 0x0000}, + {NAU8540_REG_HPF_FILTER_CH12, 0x0000}, + {NAU8540_REG_HPF_FILTER_CH34, 0x0000}, + {NAU8540_REG_ADC_SAMPLE_RATE, 0x0002}, + {NAU8540_REG_DIGITAL_GAIN_CH1, 0x0400}, + {NAU8540_REG_DIGITAL_GAIN_CH2, 0x0400}, + {NAU8540_REG_DIGITAL_GAIN_CH3, 0x0400}, + {NAU8540_REG_DIGITAL_GAIN_CH4, 0x0400}, + {NAU8540_REG_DIGITAL_MUX, 0x00E4}, + {NAU8540_REG_GPIO_CTRL, 0x0000}, + {NAU8540_REG_MISC_CTRL, 0x0000}, + {NAU8540_REG_I2C_CTRL, 0xEFFF}, + {NAU8540_REG_VMID_CTRL, 0x0000}, + {NAU8540_REG_MUTE, 0x0000}, + {NAU8540_REG_ANALOG_ADC1, 0x0011}, + {NAU8540_REG_ANALOG_ADC2, 0x0020}, + {NAU8540_REG_ANALOG_PWR, 0x0000}, + {NAU8540_REG_MIC_BIAS, 0x0004}, + {NAU8540_REG_REFERENCE, 0x0000}, + {NAU8540_REG_FEPGA1, 0x0000}, + {NAU8540_REG_FEPGA2, 0x0000}, + {NAU8540_REG_FEPGA3, 0x0101}, + {NAU8540_REG_FEPGA4, 0x0101}, + {NAU8540_REG_PWR, 0x0000}, }; static bool nau8540_readable_reg(struct device *dev, unsigned int reg) { - switch (reg) { - case NAU8540_REG_POWER_MANAGEMENT ... NAU8540_REG_FLL_VCO_RSV: - case NAU8540_REG_PCM_CTRL0 ... NAU8540_REG_PCM_CTRL4: - case NAU8540_REG_ALC_CONTROL_1 ... NAU8540_REG_ALC_CONTROL_5: - case NAU8540_REG_ALC_GAIN_CH12 ... NAU8540_REG_ADC_SAMPLE_RATE: - case NAU8540_REG_DIGITAL_GAIN_CH1 ... NAU8540_REG_DIGITAL_MUX: - case NAU8540_REG_P2P_CH1 ... NAU8540_REG_I2C_CTRL: - case NAU8540_REG_I2C_DEVICE_ID: - case NAU8540_REG_VMID_CTRL ... NAU8540_REG_MUTE: - case NAU8540_REG_ANALOG_ADC1 ... NAU8540_REG_PWR: - return true; - default: - return false; - } + switch (reg) { + case NAU8540_REG_POWER_MANAGEMENT ... NAU8540_REG_FLL_VCO_RSV: + case NAU8540_REG_PCM_CTRL0 ... NAU8540_REG_PCM_CTRL4: + case NAU8540_REG_ALC_CONTROL_1 ... NAU8540_REG_ALC_CONTROL_5: + case NAU8540_REG_ALC_GAIN_CH12 ... NAU8540_REG_ADC_SAMPLE_RATE: + case NAU8540_REG_DIGITAL_GAIN_CH1 ... NAU8540_REG_DIGITAL_MUX: + case NAU8540_REG_P2P_CH1 ... NAU8540_REG_I2C_CTRL: + case NAU8540_REG_I2C_DEVICE_ID: + case NAU8540_REG_VMID_CTRL ... NAU8540_REG_MUTE: + case NAU8540_REG_ANALOG_ADC1 ... NAU8540_REG_PWR: + return true; + default: + return false; + } } static bool nau8540_writeable_reg(struct device *dev, unsigned int reg) { - switch (reg) { - case NAU8540_REG_SW_RESET ... NAU8540_REG_FLL_VCO_RSV: - case NAU8540_REG_PCM_CTRL0 ... NAU8540_REG_PCM_CTRL4: - case NAU8540_REG_ALC_CONTROL_1 ... NAU8540_REG_ALC_CONTROL_5: - case NAU8540_REG_NOTCH_FIL1_CH1 ... NAU8540_REG_ADC_SAMPLE_RATE: - case NAU8540_REG_DIGITAL_GAIN_CH1 ... NAU8540_REG_DIGITAL_MUX: - case NAU8540_REG_GPIO_CTRL ... NAU8540_REG_I2C_CTRL: - case NAU8540_REG_RST: - case NAU8540_REG_VMID_CTRL ... NAU8540_REG_MUTE: - case NAU8540_REG_ANALOG_ADC1 ... NAU8540_REG_PWR: - return true; - default: - return false; - } + switch (reg) { + case NAU8540_REG_SW_RESET ... NAU8540_REG_FLL_VCO_RSV: + case NAU8540_REG_PCM_CTRL0 ... NAU8540_REG_PCM_CTRL4: + case NAU8540_REG_ALC_CONTROL_1 ... NAU8540_REG_ALC_CONTROL_5: + case NAU8540_REG_NOTCH_FIL1_CH1 ... NAU8540_REG_ADC_SAMPLE_RATE: + case NAU8540_REG_DIGITAL_GAIN_CH1 ... NAU8540_REG_DIGITAL_MUX: + case NAU8540_REG_GPIO_CTRL ... NAU8540_REG_I2C_CTRL: + case NAU8540_REG_RST: + case NAU8540_REG_VMID_CTRL ... NAU8540_REG_MUTE: + case NAU8540_REG_ANALOG_ADC1 ... NAU8540_REG_PWR: + return true; + default: + return false; + } } static bool nau8540_volatile_reg(struct device *dev, unsigned int reg) { - switch (reg) { - case NAU8540_REG_SW_RESET: - case NAU8540_REG_ALC_GAIN_CH12 ... NAU8540_REG_ALC_STATUS: - case NAU8540_REG_P2P_CH1 ... NAU8540_REG_PEAK_CH4: - case NAU8540_REG_I2C_DEVICE_ID: - case NAU8540_REG_RST: - return true; - default: - return false; - } + switch (reg) { + case NAU8540_REG_SW_RESET: + case NAU8540_REG_ALC_GAIN_CH12 ... NAU8540_REG_ALC_STATUS: + case NAU8540_REG_P2P_CH1 ... NAU8540_REG_PEAK_CH4: + case NAU8540_REG_I2C_DEVICE_ID: + case NAU8540_REG_RST: + return true; + default: + return false; + } } @@ -187,255 +187,255 @@ static const DECLARE_TLV_DB_MINMAX(adc_vol_tlv, -12800, 3600); static const DECLARE_TLV_DB_MINMAX(fepga_gain_tlv, -100, 3600); static const struct snd_kcontrol_new nau8540_snd_controls[] = { - SOC_SINGLE_TLV("Mic1 Volume", NAU8540_REG_DIGITAL_GAIN_CH1, - 0, 0x520, 0, adc_vol_tlv), - SOC_SINGLE_TLV("Mic2 Volume", NAU8540_REG_DIGITAL_GAIN_CH2, - 0, 0x520, 0, adc_vol_tlv), - SOC_SINGLE_TLV("Mic3 Volume", NAU8540_REG_DIGITAL_GAIN_CH3, - 0, 0x520, 0, adc_vol_tlv), - SOC_SINGLE_TLV("Mic4 Volume", NAU8540_REG_DIGITAL_GAIN_CH4, - 0, 0x520, 0, adc_vol_tlv), - - SOC_SINGLE_TLV("Frontend PGA1 Volume", NAU8540_REG_FEPGA3, - 0, 0x25, 0, fepga_gain_tlv), - SOC_SINGLE_TLV("Frontend PGA2 Volume", NAU8540_REG_FEPGA3, - 8, 0x25, 0, fepga_gain_tlv), - SOC_SINGLE_TLV("Frontend PGA3 Volume", NAU8540_REG_FEPGA4, - 0, 0x25, 0, fepga_gain_tlv), - SOC_SINGLE_TLV("Frontend PGA4 Volume", NAU8540_REG_FEPGA4, - 8, 0x25, 0, fepga_gain_tlv), + SOC_SINGLE_TLV("Mic1 Volume", NAU8540_REG_DIGITAL_GAIN_CH1, + 0, 0x520, 0, adc_vol_tlv), + SOC_SINGLE_TLV("Mic2 Volume", NAU8540_REG_DIGITAL_GAIN_CH2, + 0, 0x520, 0, adc_vol_tlv), + SOC_SINGLE_TLV("Mic3 Volume", NAU8540_REG_DIGITAL_GAIN_CH3, + 0, 0x520, 0, adc_vol_tlv), + SOC_SINGLE_TLV("Mic4 Volume", NAU8540_REG_DIGITAL_GAIN_CH4, + 0, 0x520, 0, adc_vol_tlv), + + SOC_SINGLE_TLV("Frontend PGA1 Volume", NAU8540_REG_FEPGA3, + 0, 0x25, 0, fepga_gain_tlv), + SOC_SINGLE_TLV("Frontend PGA2 Volume", NAU8540_REG_FEPGA3, + 8, 0x25, 0, fepga_gain_tlv), + SOC_SINGLE_TLV("Frontend PGA3 Volume", NAU8540_REG_FEPGA4, + 0, 0x25, 0, fepga_gain_tlv), + SOC_SINGLE_TLV("Frontend PGA4 Volume", NAU8540_REG_FEPGA4, + 8, 0x25, 0, fepga_gain_tlv), }; static const char * const adc_channel[] = { - "ADC channel 1", "ADC channel 2", "ADC channel 3", "ADC channel 4" + "ADC channel 1", "ADC channel 2", "ADC channel 3", "ADC channel 4" }; static SOC_ENUM_SINGLE_DECL( - digital_ch4_enum, NAU8540_REG_DIGITAL_MUX, 6, adc_channel); + digital_ch4_enum, NAU8540_REG_DIGITAL_MUX, 6, adc_channel); static const struct snd_kcontrol_new digital_ch4_mux = - SOC_DAPM_ENUM("Digital CH4 Select", digital_ch4_enum); + SOC_DAPM_ENUM("Digital CH4 Select", digital_ch4_enum); static SOC_ENUM_SINGLE_DECL( - digital_ch3_enum, NAU8540_REG_DIGITAL_MUX, 4, adc_channel); + digital_ch3_enum, NAU8540_REG_DIGITAL_MUX, 4, adc_channel); static const struct snd_kcontrol_new digital_ch3_mux = - SOC_DAPM_ENUM("Digital CH3 Select", digital_ch3_enum); + SOC_DAPM_ENUM("Digital CH3 Select", digital_ch3_enum); static SOC_ENUM_SINGLE_DECL( - digital_ch2_enum, NAU8540_REG_DIGITAL_MUX, 2, adc_channel); + digital_ch2_enum, NAU8540_REG_DIGITAL_MUX, 2, adc_channel); static const struct snd_kcontrol_new digital_ch2_mux = - SOC_DAPM_ENUM("Digital CH2 Select", digital_ch2_enum); + SOC_DAPM_ENUM("Digital CH2 Select", digital_ch2_enum); static SOC_ENUM_SINGLE_DECL( - digital_ch1_enum, NAU8540_REG_DIGITAL_MUX, 0, adc_channel); + digital_ch1_enum, NAU8540_REG_DIGITAL_MUX, 0, adc_channel); static const struct snd_kcontrol_new digital_ch1_mux = - SOC_DAPM_ENUM("Digital CH1 Select", digital_ch1_enum); + SOC_DAPM_ENUM("Digital CH1 Select", digital_ch1_enum); static const struct snd_soc_dapm_widget nau8540_dapm_widgets[] = { - SND_SOC_DAPM_SUPPLY("MICBIAS2", NAU8540_REG_MIC_BIAS, 11, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("MICBIAS1", NAU8540_REG_MIC_BIAS, 10, 0, NULL, 0), - - SND_SOC_DAPM_INPUT("MIC1"), - SND_SOC_DAPM_INPUT("MIC2"), - SND_SOC_DAPM_INPUT("MIC3"), - SND_SOC_DAPM_INPUT("MIC4"), - - SND_SOC_DAPM_PGA("Frontend PGA1", NAU8540_REG_PWR, 12, 0, NULL, 0), - SND_SOC_DAPM_PGA("Frontend PGA2", NAU8540_REG_PWR, 13, 0, NULL, 0), - SND_SOC_DAPM_PGA("Frontend PGA3", NAU8540_REG_PWR, 14, 0, NULL, 0), - SND_SOC_DAPM_PGA("Frontend PGA4", NAU8540_REG_PWR, 15, 0, NULL, 0), - - SND_SOC_DAPM_ADC("ADC1", NULL, - NAU8540_REG_POWER_MANAGEMENT, 0, 0), - SND_SOC_DAPM_ADC("ADC2", NULL, - NAU8540_REG_POWER_MANAGEMENT, 1, 0), - SND_SOC_DAPM_ADC("ADC3", NULL, - NAU8540_REG_POWER_MANAGEMENT, 2, 0), - SND_SOC_DAPM_ADC("ADC4", NULL, - NAU8540_REG_POWER_MANAGEMENT, 3, 0), - - SND_SOC_DAPM_PGA("ADC CH1", NAU8540_REG_ANALOG_PWR, 0, 0, NULL, 0), - SND_SOC_DAPM_PGA("ADC CH2", NAU8540_REG_ANALOG_PWR, 1, 0, NULL, 0), - SND_SOC_DAPM_PGA("ADC CH3", NAU8540_REG_ANALOG_PWR, 2, 0, NULL, 0), - SND_SOC_DAPM_PGA("ADC CH4", NAU8540_REG_ANALOG_PWR, 3, 0, NULL, 0), - - SND_SOC_DAPM_MUX("Digital CH4 Mux", - SND_SOC_NOPM, 0, 0, &digital_ch4_mux), - SND_SOC_DAPM_MUX("Digital CH3 Mux", - SND_SOC_NOPM, 0, 0, &digital_ch3_mux), - SND_SOC_DAPM_MUX("Digital CH2 Mux", - SND_SOC_NOPM, 0, 0, &digital_ch2_mux), - SND_SOC_DAPM_MUX("Digital CH1 Mux", - SND_SOC_NOPM, 0, 0, &digital_ch1_mux), - - SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_SUPPLY("MICBIAS2", NAU8540_REG_MIC_BIAS, 11, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY("MICBIAS1", NAU8540_REG_MIC_BIAS, 10, 0, NULL, 0), + + SND_SOC_DAPM_INPUT("MIC1"), + SND_SOC_DAPM_INPUT("MIC2"), + SND_SOC_DAPM_INPUT("MIC3"), + SND_SOC_DAPM_INPUT("MIC4"), + + SND_SOC_DAPM_PGA("Frontend PGA1", NAU8540_REG_PWR, 12, 0, NULL, 0), + SND_SOC_DAPM_PGA("Frontend PGA2", NAU8540_REG_PWR, 13, 0, NULL, 0), + SND_SOC_DAPM_PGA("Frontend PGA3", NAU8540_REG_PWR, 14, 0, NULL, 0), + SND_SOC_DAPM_PGA("Frontend PGA4", NAU8540_REG_PWR, 15, 0, NULL, 0), + + SND_SOC_DAPM_ADC("ADC1", NULL, + NAU8540_REG_POWER_MANAGEMENT, 0, 0), + SND_SOC_DAPM_ADC("ADC2", NULL, + NAU8540_REG_POWER_MANAGEMENT, 1, 0), + SND_SOC_DAPM_ADC("ADC3", NULL, + NAU8540_REG_POWER_MANAGEMENT, 2, 0), + SND_SOC_DAPM_ADC("ADC4", NULL, + NAU8540_REG_POWER_MANAGEMENT, 3, 0), + + SND_SOC_DAPM_PGA("ADC CH1", NAU8540_REG_ANALOG_PWR, 0, 0, NULL, 0), + SND_SOC_DAPM_PGA("ADC CH2", NAU8540_REG_ANALOG_PWR, 1, 0, NULL, 0), + SND_SOC_DAPM_PGA("ADC CH3", NAU8540_REG_ANALOG_PWR, 2, 0, NULL, 0), + SND_SOC_DAPM_PGA("ADC CH4", NAU8540_REG_ANALOG_PWR, 3, 0, NULL, 0), + + SND_SOC_DAPM_MUX("Digital CH4 Mux", + SND_SOC_NOPM, 0, 0, &digital_ch4_mux), + SND_SOC_DAPM_MUX("Digital CH3 Mux", + SND_SOC_NOPM, 0, 0, &digital_ch3_mux), + SND_SOC_DAPM_MUX("Digital CH2 Mux", + SND_SOC_NOPM, 0, 0, &digital_ch2_mux), + SND_SOC_DAPM_MUX("Digital CH1 Mux", + SND_SOC_NOPM, 0, 0, &digital_ch1_mux), + + SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0), }; static const struct snd_soc_dapm_route nau8540_dapm_routes[] = { - {"Frontend PGA1", NULL, "MIC1"}, - {"Frontend PGA2", NULL, "MIC2"}, - {"Frontend PGA3", NULL, "MIC3"}, - {"Frontend PGA4", NULL, "MIC4"}, - - {"ADC1", NULL, "Frontend PGA1"}, - {"ADC2", NULL, "Frontend PGA2"}, - {"ADC3", NULL, "Frontend PGA3"}, - {"ADC4", NULL, "Frontend PGA4"}, - - {"ADC CH1", NULL, "ADC1"}, - {"ADC CH2", NULL, "ADC2"}, - {"ADC CH3", NULL, "ADC3"}, - {"ADC CH4", NULL, "ADC4"}, - - {"ADC1", NULL, "MICBIAS1"}, - {"ADC2", NULL, "MICBIAS1"}, - {"ADC3", NULL, "MICBIAS2"}, - {"ADC4", NULL, "MICBIAS2"}, - - {"Digital CH1 Mux", "ADC channel 1", "ADC CH1"}, - {"Digital CH1 Mux", "ADC channel 2", "ADC CH2"}, - {"Digital CH1 Mux", "ADC channel 3", "ADC CH3"}, - {"Digital CH1 Mux", "ADC channel 4", "ADC CH4"}, - - {"Digital CH2 Mux", "ADC channel 1", "ADC CH1"}, - {"Digital CH2 Mux", "ADC channel 2", "ADC CH2"}, - {"Digital CH2 Mux", "ADC channel 3", "ADC CH3"}, - {"Digital CH2 Mux", "ADC channel 4", "ADC CH4"}, - - {"Digital CH3 Mux", "ADC channel 1", "ADC CH1"}, - {"Digital CH3 Mux", "ADC channel 2", "ADC CH2"}, - {"Digital CH3 Mux", "ADC channel 3", "ADC CH3"}, - {"Digital CH3 Mux", "ADC channel 4", "ADC CH4"}, - - {"Digital CH4 Mux", "ADC channel 1", "ADC CH1"}, - {"Digital CH4 Mux", "ADC channel 2", "ADC CH2"}, - {"Digital CH4 Mux", "ADC channel 3", "ADC CH3"}, - {"Digital CH4 Mux", "ADC channel 4", "ADC CH4"}, - - {"AIFTX", NULL, "Digital CH1 Mux"}, - {"AIFTX", NULL, "Digital CH2 Mux"}, - {"AIFTX", NULL, "Digital CH3 Mux"}, - {"AIFTX", NULL, "Digital CH4 Mux"}, + {"Frontend PGA1", NULL, "MIC1"}, + {"Frontend PGA2", NULL, "MIC2"}, + {"Frontend PGA3", NULL, "MIC3"}, + {"Frontend PGA4", NULL, "MIC4"}, + + {"ADC1", NULL, "Frontend PGA1"}, + {"ADC2", NULL, "Frontend PGA2"}, + {"ADC3", NULL, "Frontend PGA3"}, + {"ADC4", NULL, "Frontend PGA4"}, + + {"ADC CH1", NULL, "ADC1"}, + {"ADC CH2", NULL, "ADC2"}, + {"ADC CH3", NULL, "ADC3"}, + {"ADC CH4", NULL, "ADC4"}, + + {"ADC1", NULL, "MICBIAS1"}, + {"ADC2", NULL, "MICBIAS1"}, + {"ADC3", NULL, "MICBIAS2"}, + {"ADC4", NULL, "MICBIAS2"}, + + {"Digital CH1 Mux", "ADC channel 1", "ADC CH1"}, + {"Digital CH1 Mux", "ADC channel 2", "ADC CH2"}, + {"Digital CH1 Mux", "ADC channel 3", "ADC CH3"}, + {"Digital CH1 Mux", "ADC channel 4", "ADC CH4"}, + + {"Digital CH2 Mux", "ADC channel 1", "ADC CH1"}, + {"Digital CH2 Mux", "ADC channel 2", "ADC CH2"}, + {"Digital CH2 Mux", "ADC channel 3", "ADC CH3"}, + {"Digital CH2 Mux", "ADC channel 4", "ADC CH4"}, + + {"Digital CH3 Mux", "ADC channel 1", "ADC CH1"}, + {"Digital CH3 Mux", "ADC channel 2", "ADC CH2"}, + {"Digital CH3 Mux", "ADC channel 3", "ADC CH3"}, + {"Digital CH3 Mux", "ADC channel 4", "ADC CH4"}, + + {"Digital CH4 Mux", "ADC channel 1", "ADC CH1"}, + {"Digital CH4 Mux", "ADC channel 2", "ADC CH2"}, + {"Digital CH4 Mux", "ADC channel 3", "ADC CH3"}, + {"Digital CH4 Mux", "ADC channel 4", "ADC CH4"}, + + {"AIFTX", NULL, "Digital CH1 Mux"}, + {"AIFTX", NULL, "Digital CH2 Mux"}, + {"AIFTX", NULL, "Digital CH3 Mux"}, + {"AIFTX", NULL, "Digital CH4 Mux"}, }; static int nau8540_clock_check(struct nau8540 *nau8540, int rate, int osr) { - int osrate; + int osrate; - if (osr >= ARRAY_SIZE(osr_adc_sel)) - return -EINVAL; - osrate = osr_adc_sel[osr].osr; + if (osr >= ARRAY_SIZE(osr_adc_sel)) + return -EINVAL; + osrate = osr_adc_sel[osr].osr; - if (rate * osr > CLK_ADC_MAX) { - dev_err(nau8540->dev, "exceed the maximum frequency of CLK_ADC\n"); - return -EINVAL; - } + if (rate * osr > CLK_ADC_MAX) { + dev_err(nau8540->dev, "exceed the maximum frequency of CLK_ADC\n"); + return -EINVAL; + } - return 0; + return 0; } static int nau8540_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) + struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - struct snd_soc_codec *codec = dai->codec; - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - unsigned int val_len = 0, osr; - - /* CLK_ADC = OSR * FS - * ADC clock frequency is defined as Over Sampling Rate (OSR) - * multiplied by the audio sample rate (Fs). Note that the OSR and Fs - * values must be selected such that the maximum frequency is less - * than 6.144 MHz. - */ - regmap_read(nau8540->regmap, NAU8540_REG_ADC_SAMPLE_RATE, &osr); - osr &= NAU8540_ADC_OSR_MASK; - if (nau8540_clock_check(nau8540, params_rate(params), osr)) - return -EINVAL; - regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, - NAU8540_CLK_ADC_SRC_MASK, - osr_adc_sel[osr].clk_src << NAU8540_CLK_ADC_SRC_SFT); - - switch (params_width(params)) { - case 16: - val_len |= NAU8540_I2S_DL_16; - break; - case 20: - val_len |= NAU8540_I2S_DL_20; - break; - case 24: - val_len |= NAU8540_I2S_DL_24; - break; - case 32: - val_len |= NAU8540_I2S_DL_32; - break; - default: - return -EINVAL; - } - - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL0, - NAU8540_I2S_DL_MASK, val_len); - - return 0; + struct snd_soc_codec *codec = dai->codec; + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + unsigned int val_len = 0, osr; + + /* CLK_ADC = OSR * FS + * ADC clock frequency is defined as Over Sampling Rate (OSR) + * multiplied by the audio sample rate (Fs). Note that the OSR and Fs + * values must be selected such that the maximum frequency is less + * than 6.144 MHz. + */ + regmap_read(nau8540->regmap, NAU8540_REG_ADC_SAMPLE_RATE, &osr); + osr &= NAU8540_ADC_OSR_MASK; + if (nau8540_clock_check(nau8540, params_rate(params), osr)) + return -EINVAL; + regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, + NAU8540_CLK_ADC_SRC_MASK, + osr_adc_sel[osr].clk_src << NAU8540_CLK_ADC_SRC_SFT); + + switch (params_width(params)) { + case 16: + val_len |= NAU8540_I2S_DL_16; + break; + case 20: + val_len |= NAU8540_I2S_DL_20; + break; + case 24: + val_len |= NAU8540_I2S_DL_24; + break; + case 32: + val_len |= NAU8540_I2S_DL_32; + break; + default: + return -EINVAL; + } + + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL0, + NAU8540_I2S_DL_MASK, val_len); + + return 0; } static int nau8540_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { - struct snd_soc_codec *codec = dai->codec; - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - unsigned int ctrl1_val = 0, ctrl2_val = 0; - - switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { - case SND_SOC_DAIFMT_CBM_CFM: - ctrl2_val |= NAU8540_I2S_MS_MASTER; - break; - case SND_SOC_DAIFMT_CBS_CFS: - break; - default: - return -EINVAL; - } - - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_NB_NF: - break; - case SND_SOC_DAIFMT_IB_NF: - ctrl1_val |= NAU8540_I2S_BP_INV; - break; - default: - return -EINVAL; - } - - switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { - case SND_SOC_DAIFMT_I2S: - ctrl1_val |= NAU8540_I2S_DF_I2S; - break; - case SND_SOC_DAIFMT_LEFT_J: - ctrl1_val |= NAU8540_I2S_DF_LEFT; - break; - case SND_SOC_DAIFMT_RIGHT_J: - ctrl1_val |= NAU8540_I2S_DF_RIGTH; - break; - case SND_SOC_DAIFMT_DSP_A: - ctrl1_val |= NAU8540_I2S_DF_PCM_AB; - break; - case SND_SOC_DAIFMT_DSP_B: - ctrl1_val |= NAU8540_I2S_DF_PCM_AB; - ctrl1_val |= NAU8540_I2S_PCMB_EN; - break; - default: - return -EINVAL; - } - - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL0, - NAU8540_I2S_DL_MASK | NAU8540_I2S_DF_MASK | - NAU8540_I2S_BP_INV | NAU8540_I2S_PCMB_EN, ctrl1_val); - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL1, - NAU8540_I2S_MS_MASK | NAU8540_I2S_DO12_OE, ctrl2_val); - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL2, - NAU8540_I2S_DO34_OE, 0); - - return 0; + struct snd_soc_codec *codec = dai->codec; + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + unsigned int ctrl1_val = 0, ctrl2_val = 0; + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + ctrl2_val |= NAU8540_I2S_MS_MASTER; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + ctrl1_val |= NAU8540_I2S_BP_INV; + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + ctrl1_val |= NAU8540_I2S_DF_I2S; + break; + case SND_SOC_DAIFMT_LEFT_J: + ctrl1_val |= NAU8540_I2S_DF_LEFT; + break; + case SND_SOC_DAIFMT_RIGHT_J: + ctrl1_val |= NAU8540_I2S_DF_RIGTH; + break; + case SND_SOC_DAIFMT_DSP_A: + ctrl1_val |= NAU8540_I2S_DF_PCM_AB; + break; + case SND_SOC_DAIFMT_DSP_B: + ctrl1_val |= NAU8540_I2S_DF_PCM_AB; + ctrl1_val |= NAU8540_I2S_PCMB_EN; + break; + default: + return -EINVAL; + } + + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL0, + NAU8540_I2S_DL_MASK | NAU8540_I2S_DF_MASK | + NAU8540_I2S_BP_INV | NAU8540_I2S_PCMB_EN, ctrl1_val); + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL1, + NAU8540_I2S_MS_MASK | NAU8540_I2S_DO12_OE, ctrl2_val); + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL2, + NAU8540_I2S_DO34_OE, 0); + + return 0; } /** @@ -451,55 +451,55 @@ static int nau8540_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) * Configures a DAI for TDM operation. Only support 4 slots TDM. */ static int nau8540_set_tdm_slot(struct snd_soc_dai *dai, - unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width) + unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width) { - struct snd_soc_codec *codec = dai->codec; - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - unsigned int ctrl2_val = 0, ctrl4_val = 0; - - if (slots > 4 || ((tx_mask & 0xf0) && (tx_mask & 0xf))) - return -EINVAL; - - ctrl4_val |= (NAU8540_TDM_MODE | NAU8540_TDM_OFFSET_EN); - if (tx_mask & 0xf0) { - ctrl2_val = 4 * slot_width; - ctrl4_val |= (tx_mask >> 4); - } else { - ctrl4_val |= tx_mask; - } - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL4, - NAU8540_TDM_MODE | NAU8540_TDM_OFFSET_EN | - NAU8540_TDM_TX_MASK, ctrl4_val); - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL1, - NAU8540_I2S_DO12_OE, NAU8540_I2S_DO12_OE); - regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL2, - NAU8540_I2S_DO34_OE | NAU8540_I2S_TSLOT_L_MASK, - NAU8540_I2S_DO34_OE | ctrl2_val); - - return 0; + struct snd_soc_codec *codec = dai->codec; + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + unsigned int ctrl2_val = 0, ctrl4_val = 0; + + if (slots > 4 || ((tx_mask & 0xf0) && (tx_mask & 0xf))) + return -EINVAL; + + ctrl4_val |= (NAU8540_TDM_MODE | NAU8540_TDM_OFFSET_EN); + if (tx_mask & 0xf0) { + ctrl2_val = 4 * slot_width; + ctrl4_val |= (tx_mask >> 4); + } else { + ctrl4_val |= tx_mask; + } + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL4, + NAU8540_TDM_MODE | NAU8540_TDM_OFFSET_EN | + NAU8540_TDM_TX_MASK, ctrl4_val); + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL1, + NAU8540_I2S_DO12_OE, NAU8540_I2S_DO12_OE); + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL2, + NAU8540_I2S_DO34_OE | NAU8540_I2S_TSLOT_L_MASK, + NAU8540_I2S_DO34_OE | ctrl2_val); + + return 0; } static const struct snd_soc_dai_ops nau8540_dai_ops = { - .hw_params = nau8540_hw_params, - .set_fmt = nau8540_set_fmt, - .set_tdm_slot = nau8540_set_tdm_slot, + .hw_params = nau8540_hw_params, + .set_fmt = nau8540_set_fmt, + .set_tdm_slot = nau8540_set_tdm_slot, }; #define NAU8540_RATES SNDRV_PCM_RATE_8000_48000 #define NAU8540_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE \ - | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S32_LE) + | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S32_LE) static struct snd_soc_dai_driver nau8540_dai = { - .name = "nau8540-hifi", - .capture = { - .stream_name = "Capture", - .channels_min = 1, - .channels_max = 4, - .rates = NAU8540_RATES, - .formats = NAU8540_FORMATS, - }, - .ops = &nau8540_dai_ops, + .name = "nau8540-hifi", + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 4, + .rates = NAU8540_RATES, + .formats = NAU8540_FORMATS, + }, + .ops = &nau8540_dai_ops, }; /** @@ -513,320 +513,320 @@ static struct snd_soc_dai_driver nau8540_dai = { * Returns 0 for success or negative error code. */ static int nau8540_calc_fll_param(unsigned int fll_in, - unsigned int fs, struct nau8540_fll *fll_param) + unsigned int fs, struct nau8540_fll *fll_param) { - u64 fvco, fvco_max; - unsigned int fref, i, fvco_sel; - - /* Ensure the reference clock frequency (FREF) is <= 13.5MHz by dividing - * freq_in by 1, 2, 4, or 8 using FLL pre-scalar. - * FREF = freq_in / NAU8540_FLL_REF_DIV_MASK - */ - for (i = 0; i < ARRAY_SIZE(fll_pre_scalar); i++) { - fref = fll_in / fll_pre_scalar[i].param; - if (fref <= NAU_FREF_MAX) - break; - } - if (i == ARRAY_SIZE(fll_pre_scalar)) - return -EINVAL; - fll_param->clk_ref_div = fll_pre_scalar[i].val; - - /* Choose the FLL ratio based on FREF */ - for (i = 0; i < ARRAY_SIZE(fll_ratio); i++) { - if (fref >= fll_ratio[i].param) - break; - } - if (i == ARRAY_SIZE(fll_ratio)) - return -EINVAL; - fll_param->ratio = fll_ratio[i].val; - - /* Calculate the frequency of DCO (FDCO) given freq_out = 256 * Fs. - * FDCO must be within the 90MHz - 124MHz or the FFL cannot be - * guaranteed across the full range of operation. - * FDCO = freq_out * 2 * mclk_src_scaling - */ - fvco_max = 0; - fvco_sel = ARRAY_SIZE(mclk_src_scaling); - for (i = 0; i < ARRAY_SIZE(mclk_src_scaling); i++) { - fvco = 256 * fs * 2 * mclk_src_scaling[i].param; - if (fvco > NAU_FVCO_MIN && fvco < NAU_FVCO_MAX && - fvco_max < fvco) { - fvco_max = fvco; - fvco_sel = i; - } - } - if (ARRAY_SIZE(mclk_src_scaling) == fvco_sel) - return -EINVAL; - fll_param->mclk_src = mclk_src_scaling[fvco_sel].val; - - /* Calculate the FLL 10-bit integer input and the FLL 16-bit fractional - * input based on FDCO, FREF and FLL ratio. - */ - fvco = div_u64(fvco_max << 16, fref * fll_param->ratio); - fll_param->fll_int = (fvco >> 16) & 0x3FF; - fll_param->fll_frac = fvco & 0xFFFF; - return 0; + u64 fvco, fvco_max; + unsigned int fref, i, fvco_sel; + + /* Ensure the reference clock frequency (FREF) is <= 13.5MHz by dividing + * freq_in by 1, 2, 4, or 8 using FLL pre-scalar. + * FREF = freq_in / NAU8540_FLL_REF_DIV_MASK + */ + for (i = 0; i < ARRAY_SIZE(fll_pre_scalar); i++) { + fref = fll_in / fll_pre_scalar[i].param; + if (fref <= NAU_FREF_MAX) + break; + } + if (i == ARRAY_SIZE(fll_pre_scalar)) + return -EINVAL; + fll_param->clk_ref_div = fll_pre_scalar[i].val; + + /* Choose the FLL ratio based on FREF */ + for (i = 0; i < ARRAY_SIZE(fll_ratio); i++) { + if (fref >= fll_ratio[i].param) + break; + } + if (i == ARRAY_SIZE(fll_ratio)) + return -EINVAL; + fll_param->ratio = fll_ratio[i].val; + + /* Calculate the frequency of DCO (FDCO) given freq_out = 256 * Fs. + * FDCO must be within the 90MHz - 124MHz or the FFL cannot be + * guaranteed across the full range of operation. + * FDCO = freq_out * 2 * mclk_src_scaling + */ + fvco_max = 0; + fvco_sel = ARRAY_SIZE(mclk_src_scaling); + for (i = 0; i < ARRAY_SIZE(mclk_src_scaling); i++) { + fvco = 256 * fs * 2 * mclk_src_scaling[i].param; + if (fvco > NAU_FVCO_MIN && fvco < NAU_FVCO_MAX && + fvco_max < fvco) { + fvco_max = fvco; + fvco_sel = i; + } + } + if (ARRAY_SIZE(mclk_src_scaling) == fvco_sel) + return -EINVAL; + fll_param->mclk_src = mclk_src_scaling[fvco_sel].val; + + /* Calculate the FLL 10-bit integer input and the FLL 16-bit fractional + * input based on FDCO, FREF and FLL ratio. + */ + fvco = div_u64(fvco_max << 16, fref * fll_param->ratio); + fll_param->fll_int = (fvco >> 16) & 0x3FF; + fll_param->fll_frac = fvco & 0xFFFF; + return 0; } static void nau8540_fll_apply(struct regmap *regmap, - struct nau8540_fll *fll_param) + struct nau8540_fll *fll_param) { - regmap_update_bits(regmap, NAU8540_REG_CLOCK_SRC, - NAU8540_CLK_SRC_MASK | NAU8540_CLK_MCLK_SRC_MASK, - NAU8540_CLK_SRC_MCLK | fll_param->mclk_src); - regmap_update_bits(regmap, NAU8540_REG_FLL1, - NAU8540_FLL_RATIO_MASK, fll_param->ratio); - /* FLL 16-bit fractional input */ - regmap_write(regmap, NAU8540_REG_FLL2, fll_param->fll_frac); - /* FLL 10-bit integer input */ - regmap_update_bits(regmap, NAU8540_REG_FLL3, - NAU8540_FLL_INTEGER_MASK, fll_param->fll_int); - /* FLL pre-scaler */ - regmap_update_bits(regmap, NAU8540_REG_FLL4, - NAU8540_FLL_REF_DIV_MASK, - fll_param->clk_ref_div << NAU8540_FLL_REF_DIV_SFT); - regmap_update_bits(regmap, NAU8540_REG_FLL5, - NAU8540_FLL_CLK_SW_MASK, NAU8540_FLL_CLK_SW_REF); - regmap_update_bits(regmap, - NAU8540_REG_FLL6, NAU8540_DCO_EN, 0); - if (fll_param->fll_frac) { - regmap_update_bits(regmap, NAU8540_REG_FLL5, - NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | - NAU8540_FLL_FTR_SW_MASK, - NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | - NAU8540_FLL_FTR_SW_FILTER); - regmap_update_bits(regmap, NAU8540_REG_FLL6, - NAU8540_SDM_EN, NAU8540_SDM_EN); - } else { - regmap_update_bits(regmap, NAU8540_REG_FLL5, - NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | - NAU8540_FLL_FTR_SW_MASK, NAU8540_FLL_FTR_SW_ACCU); - regmap_update_bits(regmap, - NAU8540_REG_FLL6, NAU8540_SDM_EN, 0); - } + regmap_update_bits(regmap, NAU8540_REG_CLOCK_SRC, + NAU8540_CLK_SRC_MASK | NAU8540_CLK_MCLK_SRC_MASK, + NAU8540_CLK_SRC_MCLK | fll_param->mclk_src); + regmap_update_bits(regmap, NAU8540_REG_FLL1, + NAU8540_FLL_RATIO_MASK, fll_param->ratio); + /* FLL 16-bit fractional input */ + regmap_write(regmap, NAU8540_REG_FLL2, fll_param->fll_frac); + /* FLL 10-bit integer input */ + regmap_update_bits(regmap, NAU8540_REG_FLL3, + NAU8540_FLL_INTEGER_MASK, fll_param->fll_int); + /* FLL pre-scaler */ + regmap_update_bits(regmap, NAU8540_REG_FLL4, + NAU8540_FLL_REF_DIV_MASK, + fll_param->clk_ref_div << NAU8540_FLL_REF_DIV_SFT); + regmap_update_bits(regmap, NAU8540_REG_FLL5, + NAU8540_FLL_CLK_SW_MASK, NAU8540_FLL_CLK_SW_REF); + regmap_update_bits(regmap, + NAU8540_REG_FLL6, NAU8540_DCO_EN, 0); + if (fll_param->fll_frac) { + regmap_update_bits(regmap, NAU8540_REG_FLL5, + NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | + NAU8540_FLL_FTR_SW_MASK, + NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | + NAU8540_FLL_FTR_SW_FILTER); + regmap_update_bits(regmap, NAU8540_REG_FLL6, + NAU8540_SDM_EN, NAU8540_SDM_EN); + } else { + regmap_update_bits(regmap, NAU8540_REG_FLL5, + NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | + NAU8540_FLL_FTR_SW_MASK, NAU8540_FLL_FTR_SW_ACCU); + regmap_update_bits(regmap, + NAU8540_REG_FLL6, NAU8540_SDM_EN, 0); + } } /* freq_out must be 256*Fs in order to achieve the best performance */ static int nau8540_set_pll(struct snd_soc_codec *codec, int pll_id, int source, - unsigned int freq_in, unsigned int freq_out) + unsigned int freq_in, unsigned int freq_out) { - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - struct nau8540_fll fll_param; - int ret, fs; - - switch (pll_id) { - case NAU8540_CLK_FLL_MCLK: - regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, - NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_MCLK); - break; - - case NAU8540_CLK_FLL_BLK: - regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, - NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_BLK); - break; - - case NAU8540_CLK_FLL_FS: - regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, - NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_FS); - break; - - default: - dev_err(nau8540->dev, "Invalid clock id (%d)\n", pll_id); - return -EINVAL; - } - dev_dbg(nau8540->dev, "Sysclk is %dHz and clock id is %d\n", - freq_out, pll_id); - - fs = freq_out / 256; - ret = nau8540_calc_fll_param(freq_in, fs, &fll_param); - if (ret < 0) { - dev_err(nau8540->dev, "Unsupported input clock %d\n", freq_in); - return ret; - } - dev_dbg(nau8540->dev, "mclk_src=%x ratio=%x fll_frac=%x fll_int=%x clk_ref_div=%x\n", - fll_param.mclk_src, fll_param.ratio, fll_param.fll_frac, - fll_param.fll_int, fll_param.clk_ref_div); - - nau8540_fll_apply(nau8540->regmap, &fll_param); - mdelay(2); - regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, - NAU8540_CLK_SRC_MASK, NAU8540_CLK_SRC_VCO); - - return 0; + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + struct nau8540_fll fll_param; + int ret, fs; + + switch (pll_id) { + case NAU8540_CLK_FLL_MCLK: + regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, + NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_MCLK); + break; + + case NAU8540_CLK_FLL_BLK: + regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, + NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_BLK); + break; + + case NAU8540_CLK_FLL_FS: + regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, + NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_FS); + break; + + default: + dev_err(nau8540->dev, "Invalid clock id (%d)\n", pll_id); + return -EINVAL; + } + dev_dbg(nau8540->dev, "Sysclk is %dHz and clock id is %d\n", + freq_out, pll_id); + + fs = freq_out / 256; + ret = nau8540_calc_fll_param(freq_in, fs, &fll_param); + if (ret < 0) { + dev_err(nau8540->dev, "Unsupported input clock %d\n", freq_in); + return ret; + } + dev_dbg(nau8540->dev, "mclk_src=%x ratio=%x fll_frac=%x fll_int=%x clk_ref_div=%x\n", + fll_param.mclk_src, fll_param.ratio, fll_param.fll_frac, + fll_param.fll_int, fll_param.clk_ref_div); + + nau8540_fll_apply(nau8540->regmap, &fll_param); + mdelay(2); + regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, + NAU8540_CLK_SRC_MASK, NAU8540_CLK_SRC_VCO); + + return 0; } static int nau8540_set_sysclk(struct snd_soc_codec *codec, - int clk_id, int source, unsigned int freq, int dir) + int clk_id, int source, unsigned int freq, int dir) { - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - - switch (clk_id) { - case NAU8540_CLK_DIS: - case NAU8540_CLK_MCLK: - regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, - NAU8540_CLK_SRC_MASK, NAU8540_CLK_SRC_MCLK); - regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL6, - NAU8540_DCO_EN, 0); - break; - - case NAU8540_CLK_INTERNAL: - regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL6, - NAU8540_DCO_EN, NAU8540_DCO_EN); - regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, - NAU8540_CLK_SRC_MASK, NAU8540_CLK_SRC_VCO); - break; - - default: - dev_err(nau8540->dev, "Invalid clock id (%d)\n", clk_id); - return -EINVAL; - } - - dev_dbg(nau8540->dev, "Sysclk is %dHz and clock id is %d\n", - freq, clk_id); - - return 0; + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + + switch (clk_id) { + case NAU8540_CLK_DIS: + case NAU8540_CLK_MCLK: + regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, + NAU8540_CLK_SRC_MASK, NAU8540_CLK_SRC_MCLK); + regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL6, + NAU8540_DCO_EN, 0); + break; + + case NAU8540_CLK_INTERNAL: + regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL6, + NAU8540_DCO_EN, NAU8540_DCO_EN); + regmap_update_bits(nau8540->regmap, NAU8540_REG_CLOCK_SRC, + NAU8540_CLK_SRC_MASK, NAU8540_CLK_SRC_VCO); + break; + + default: + dev_err(nau8540->dev, "Invalid clock id (%d)\n", clk_id); + return -EINVAL; + } + + dev_dbg(nau8540->dev, "Sysclk is %dHz and clock id is %d\n", + freq, clk_id); + + return 0; } static void nau8540_reset_chip(struct regmap *regmap) { - regmap_write(regmap, NAU8540_REG_SW_RESET, 0x00); - regmap_write(regmap, NAU8540_REG_SW_RESET, 0x00); + regmap_write(regmap, NAU8540_REG_SW_RESET, 0x00); + regmap_write(regmap, NAU8540_REG_SW_RESET, 0x00); } static void nau8540_init_regs(struct nau8540 *nau8540) { - struct regmap *regmap = nau8540->regmap; - - /* Enable Bias/VMID/VMID Tieoff */ - regmap_update_bits(regmap, NAU8540_REG_VMID_CTRL, - NAU8540_VMID_EN | NAU8540_VMID_SEL_MASK, - NAU8540_VMID_EN | (0x2 << NAU8540_VMID_SEL_SFT)); - regmap_update_bits(regmap, NAU8540_REG_REFERENCE, - NAU8540_PRECHARGE_DIS | NAU8540_GLOBAL_BIAS_EN, - NAU8540_PRECHARGE_DIS | NAU8540_GLOBAL_BIAS_EN); - mdelay(2); - regmap_update_bits(regmap, NAU8540_REG_MIC_BIAS, - NAU8540_PU_PRE, NAU8540_PU_PRE); - regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, - NAU8540_CLK_ADC_EN | NAU8540_CLK_I2S_EN, - NAU8540_CLK_ADC_EN | NAU8540_CLK_I2S_EN); - /* ADC OSR selection, CLK_ADC = Fs * OSR */ - regmap_update_bits(regmap, NAU8540_REG_ADC_SAMPLE_RATE, - NAU8540_ADC_OSR_MASK, NAU8540_ADC_OSR_64); + struct regmap *regmap = nau8540->regmap; + + /* Enable Bias/VMID/VMID Tieoff */ + regmap_update_bits(regmap, NAU8540_REG_VMID_CTRL, + NAU8540_VMID_EN | NAU8540_VMID_SEL_MASK, + NAU8540_VMID_EN | (0x2 << NAU8540_VMID_SEL_SFT)); + regmap_update_bits(regmap, NAU8540_REG_REFERENCE, + NAU8540_PRECHARGE_DIS | NAU8540_GLOBAL_BIAS_EN, + NAU8540_PRECHARGE_DIS | NAU8540_GLOBAL_BIAS_EN); + mdelay(2); + regmap_update_bits(regmap, NAU8540_REG_MIC_BIAS, + NAU8540_PU_PRE, NAU8540_PU_PRE); + regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, + NAU8540_CLK_ADC_EN | NAU8540_CLK_I2S_EN, + NAU8540_CLK_ADC_EN | NAU8540_CLK_I2S_EN); + /* ADC OSR selection, CLK_ADC = Fs * OSR */ + regmap_update_bits(regmap, NAU8540_REG_ADC_SAMPLE_RATE, + NAU8540_ADC_OSR_MASK, NAU8540_ADC_OSR_64); } static int __maybe_unused nau8540_suspend(struct snd_soc_codec *codec) { - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - regcache_cache_only(nau8540->regmap, true); - regcache_mark_dirty(nau8540->regmap); + regcache_cache_only(nau8540->regmap, true); + regcache_mark_dirty(nau8540->regmap); - return 0; + return 0; } static int __maybe_unused nau8540_resume(struct snd_soc_codec *codec) { - struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); - regcache_cache_only(nau8540->regmap, false); - regcache_sync(nau8540->regmap); + regcache_cache_only(nau8540->regmap, false); + regcache_sync(nau8540->regmap); - return 0; + return 0; } static struct snd_soc_codec_driver nau8540_codec_driver = { - .set_sysclk = nau8540_set_sysclk, - .set_pll = nau8540_set_pll, - .suspend = nau8540_suspend, - .resume = nau8540_resume, - .suspend_bias_off = true, - - .component_driver = { - .controls = nau8540_snd_controls, - .num_controls = ARRAY_SIZE(nau8540_snd_controls), - .dapm_widgets = nau8540_dapm_widgets, - .num_dapm_widgets = ARRAY_SIZE(nau8540_dapm_widgets), - .dapm_routes = nau8540_dapm_routes, - .num_dapm_routes = ARRAY_SIZE(nau8540_dapm_routes), - }, + .set_sysclk = nau8540_set_sysclk, + .set_pll = nau8540_set_pll, + .suspend = nau8540_suspend, + .resume = nau8540_resume, + .suspend_bias_off = true, + + .component_driver = { + .controls = nau8540_snd_controls, + .num_controls = ARRAY_SIZE(nau8540_snd_controls), + .dapm_widgets = nau8540_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(nau8540_dapm_widgets), + .dapm_routes = nau8540_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(nau8540_dapm_routes), + }, }; static const struct regmap_config nau8540_regmap_config = { - .val_bits = 16, - .reg_bits = 16, + .val_bits = 16, + .reg_bits = 16, - .max_register = NAU8540_REG_MAX, - .readable_reg = nau8540_readable_reg, - .writeable_reg = nau8540_writeable_reg, - .volatile_reg = nau8540_volatile_reg, + .max_register = NAU8540_REG_MAX, + .readable_reg = nau8540_readable_reg, + .writeable_reg = nau8540_writeable_reg, + .volatile_reg = nau8540_volatile_reg, - .cache_type = REGCACHE_RBTREE, - .reg_defaults = nau8540_reg_defaults, - .num_reg_defaults = ARRAY_SIZE(nau8540_reg_defaults), + .cache_type = REGCACHE_RBTREE, + .reg_defaults = nau8540_reg_defaults, + .num_reg_defaults = ARRAY_SIZE(nau8540_reg_defaults), }; static int nau8540_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) + const struct i2c_device_id *id) { - struct device *dev = &i2c->dev; - struct nau8540 *nau8540 = dev_get_platdata(dev); - int ret, value; - - if (!nau8540) { - nau8540 = devm_kzalloc(dev, sizeof(*nau8540), GFP_KERNEL); - if (!nau8540) - return -ENOMEM; - } - i2c_set_clientdata(i2c, nau8540); - - nau8540->regmap = devm_regmap_init_i2c(i2c, &nau8540_regmap_config); - if (IS_ERR(nau8540->regmap)) - return PTR_ERR(nau8540->regmap); - ret = regmap_read(nau8540->regmap, NAU8540_REG_I2C_DEVICE_ID, &value); - if (ret < 0) { - dev_err(dev, "Failed to read device id from the NAU85L40: %d\n", - ret); - return ret; - } - - nau8540->dev = dev; - nau8540_reset_chip(nau8540->regmap); - nau8540_init_regs(nau8540); - - return snd_soc_register_codec(dev, - &nau8540_codec_driver, &nau8540_dai, 1); + struct device *dev = &i2c->dev; + struct nau8540 *nau8540 = dev_get_platdata(dev); + int ret, value; + + if (!nau8540) { + nau8540 = devm_kzalloc(dev, sizeof(*nau8540), GFP_KERNEL); + if (!nau8540) + return -ENOMEM; + } + i2c_set_clientdata(i2c, nau8540); + + nau8540->regmap = devm_regmap_init_i2c(i2c, &nau8540_regmap_config); + if (IS_ERR(nau8540->regmap)) + return PTR_ERR(nau8540->regmap); + ret = regmap_read(nau8540->regmap, NAU8540_REG_I2C_DEVICE_ID, &value); + if (ret < 0) { + dev_err(dev, "Failed to read device id from the NAU85L40: %d\n", + ret); + return ret; + } + + nau8540->dev = dev; + nau8540_reset_chip(nau8540->regmap); + nau8540_init_regs(nau8540); + + return snd_soc_register_codec(dev, + &nau8540_codec_driver, &nau8540_dai, 1); } static int nau8540_i2c_remove(struct i2c_client *client) { - snd_soc_unregister_codec(&client->dev); - return 0; + snd_soc_unregister_codec(&client->dev); + return 0; } static const struct i2c_device_id nau8540_i2c_ids[] = { - { "nau8540", 0 }, - { } + { "nau8540", 0 }, + { } }; MODULE_DEVICE_TABLE(i2c, nau8540_i2c_ids); #ifdef CONFIG_OF static const struct of_device_id nau8540_of_ids[] = { - { .compatible = "nuvoton,nau8540", }, - {} + { .compatible = "nuvoton,nau8540", }, + {} }; MODULE_DEVICE_TABLE(of, nau8540_of_ids); #endif static struct i2c_driver nau8540_i2c_driver = { - .driver = { - .name = "nau8540", - .of_match_table = of_match_ptr(nau8540_of_ids), - }, - .probe = nau8540_i2c_probe, - .remove = nau8540_i2c_remove, - .id_table = nau8540_i2c_ids, + .driver = { + .name = "nau8540", + .of_match_table = of_match_ptr(nau8540_of_ids), + }, + .probe = nau8540_i2c_probe, + .remove = nau8540_i2c_remove, + .id_table = nau8540_i2c_ids, }; module_i2c_driver(nau8540_i2c_driver); diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index d06e65188cd531b13c6526af95299719924d3256..5db5b224944dd12ceff7df4d8244bc7bfd8d48e1 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -12,211 +12,211 @@ #ifndef __NAU8540_H__ #define __NAU8540_H__ -#define NAU8540_REG_SW_RESET 0x00 -#define NAU8540_REG_POWER_MANAGEMENT 0x01 -#define NAU8540_REG_CLOCK_CTRL 0x02 -#define NAU8540_REG_CLOCK_SRC 0x03 -#define NAU8540_REG_FLL1 0x04 -#define NAU8540_REG_FLL2 0x05 -#define NAU8540_REG_FLL3 0x06 -#define NAU8540_REG_FLL4 0x07 -#define NAU8540_REG_FLL5 0x08 -#define NAU8540_REG_FLL6 0x09 -#define NAU8540_REG_FLL_VCO_RSV 0x0A -#define NAU8540_REG_PCM_CTRL0 0x10 -#define NAU8540_REG_PCM_CTRL1 0x11 -#define NAU8540_REG_PCM_CTRL2 0x12 -#define NAU8540_REG_PCM_CTRL3 0x13 -#define NAU8540_REG_PCM_CTRL4 0x14 -#define NAU8540_REG_ALC_CONTROL_1 0x20 -#define NAU8540_REG_ALC_CONTROL_2 0x21 -#define NAU8540_REG_ALC_CONTROL_3 0x22 -#define NAU8540_REG_ALC_CONTROL_4 0x23 -#define NAU8540_REG_ALC_CONTROL_5 0x24 -#define NAU8540_REG_ALC_GAIN_CH12 0x2D -#define NAU8540_REG_ALC_GAIN_CH34 0x2E -#define NAU8540_REG_ALC_STATUS 0x2F -#define NAU8540_REG_NOTCH_FIL1_CH1 0x30 -#define NAU8540_REG_NOTCH_FIL2_CH1 0x31 -#define NAU8540_REG_NOTCH_FIL1_CH2 0x32 -#define NAU8540_REG_NOTCH_FIL2_CH2 0x33 -#define NAU8540_REG_NOTCH_FIL1_CH3 0x34 -#define NAU8540_REG_NOTCH_FIL2_CH3 0x35 -#define NAU8540_REG_NOTCH_FIL1_CH4 0x36 -#define NAU8540_REG_NOTCH_FIL2_CH4 0x37 -#define NAU8540_REG_HPF_FILTER_CH12 0x38 -#define NAU8540_REG_HPF_FILTER_CH34 0x39 -#define NAU8540_REG_ADC_SAMPLE_RATE 0x3A -#define NAU8540_REG_DIGITAL_GAIN_CH1 0x40 -#define NAU8540_REG_DIGITAL_GAIN_CH2 0x41 -#define NAU8540_REG_DIGITAL_GAIN_CH3 0x42 -#define NAU8540_REG_DIGITAL_GAIN_CH4 0x43 -#define NAU8540_REG_DIGITAL_MUX 0x44 -#define NAU8540_REG_P2P_CH1 0x48 -#define NAU8540_REG_P2P_CH2 0x49 -#define NAU8540_REG_P2P_CH3 0x4A -#define NAU8540_REG_P2P_CH4 0x4B -#define NAU8540_REG_PEAK_CH1 0x4C -#define NAU8540_REG_PEAK_CH2 0x4D -#define NAU8540_REG_PEAK_CH3 0x4E -#define NAU8540_REG_PEAK_CH4 0x4F -#define NAU8540_REG_GPIO_CTRL 0x50 -#define NAU8540_REG_MISC_CTRL 0x51 -#define NAU8540_REG_I2C_CTRL 0x52 -#define NAU8540_REG_I2C_DEVICE_ID 0x58 -#define NAU8540_REG_RST 0x5A -#define NAU8540_REG_VMID_CTRL 0x60 -#define NAU8540_REG_MUTE 0x61 -#define NAU8540_REG_ANALOG_ADC1 0x64 -#define NAU8540_REG_ANALOG_ADC2 0x65 -#define NAU8540_REG_ANALOG_PWR 0x66 -#define NAU8540_REG_MIC_BIAS 0x67 -#define NAU8540_REG_REFERENCE 0x68 -#define NAU8540_REG_FEPGA1 0x69 -#define NAU8540_REG_FEPGA2 0x6A -#define NAU8540_REG_FEPGA3 0x6B -#define NAU8540_REG_FEPGA4 0x6C -#define NAU8540_REG_PWR 0x6D -#define NAU8540_REG_MAX NAU8540_REG_PWR +#define NAU8540_REG_SW_RESET 0x00 +#define NAU8540_REG_POWER_MANAGEMENT 0x01 +#define NAU8540_REG_CLOCK_CTRL 0x02 +#define NAU8540_REG_CLOCK_SRC 0x03 +#define NAU8540_REG_FLL1 0x04 +#define NAU8540_REG_FLL2 0x05 +#define NAU8540_REG_FLL3 0x06 +#define NAU8540_REG_FLL4 0x07 +#define NAU8540_REG_FLL5 0x08 +#define NAU8540_REG_FLL6 0x09 +#define NAU8540_REG_FLL_VCO_RSV 0x0A +#define NAU8540_REG_PCM_CTRL0 0x10 +#define NAU8540_REG_PCM_CTRL1 0x11 +#define NAU8540_REG_PCM_CTRL2 0x12 +#define NAU8540_REG_PCM_CTRL3 0x13 +#define NAU8540_REG_PCM_CTRL4 0x14 +#define NAU8540_REG_ALC_CONTROL_1 0x20 +#define NAU8540_REG_ALC_CONTROL_2 0x21 +#define NAU8540_REG_ALC_CONTROL_3 0x22 +#define NAU8540_REG_ALC_CONTROL_4 0x23 +#define NAU8540_REG_ALC_CONTROL_5 0x24 +#define NAU8540_REG_ALC_GAIN_CH12 0x2D +#define NAU8540_REG_ALC_GAIN_CH34 0x2E +#define NAU8540_REG_ALC_STATUS 0x2F +#define NAU8540_REG_NOTCH_FIL1_CH1 0x30 +#define NAU8540_REG_NOTCH_FIL2_CH1 0x31 +#define NAU8540_REG_NOTCH_FIL1_CH2 0x32 +#define NAU8540_REG_NOTCH_FIL2_CH2 0x33 +#define NAU8540_REG_NOTCH_FIL1_CH3 0x34 +#define NAU8540_REG_NOTCH_FIL2_CH3 0x35 +#define NAU8540_REG_NOTCH_FIL1_CH4 0x36 +#define NAU8540_REG_NOTCH_FIL2_CH4 0x37 +#define NAU8540_REG_HPF_FILTER_CH12 0x38 +#define NAU8540_REG_HPF_FILTER_CH34 0x39 +#define NAU8540_REG_ADC_SAMPLE_RATE 0x3A +#define NAU8540_REG_DIGITAL_GAIN_CH1 0x40 +#define NAU8540_REG_DIGITAL_GAIN_CH2 0x41 +#define NAU8540_REG_DIGITAL_GAIN_CH3 0x42 +#define NAU8540_REG_DIGITAL_GAIN_CH4 0x43 +#define NAU8540_REG_DIGITAL_MUX 0x44 +#define NAU8540_REG_P2P_CH1 0x48 +#define NAU8540_REG_P2P_CH2 0x49 +#define NAU8540_REG_P2P_CH3 0x4A +#define NAU8540_REG_P2P_CH4 0x4B +#define NAU8540_REG_PEAK_CH1 0x4C +#define NAU8540_REG_PEAK_CH2 0x4D +#define NAU8540_REG_PEAK_CH3 0x4E +#define NAU8540_REG_PEAK_CH4 0x4F +#define NAU8540_REG_GPIO_CTRL 0x50 +#define NAU8540_REG_MISC_CTRL 0x51 +#define NAU8540_REG_I2C_CTRL 0x52 +#define NAU8540_REG_I2C_DEVICE_ID 0x58 +#define NAU8540_REG_RST 0x5A +#define NAU8540_REG_VMID_CTRL 0x60 +#define NAU8540_REG_MUTE 0x61 +#define NAU8540_REG_ANALOG_ADC1 0x64 +#define NAU8540_REG_ANALOG_ADC2 0x65 +#define NAU8540_REG_ANALOG_PWR 0x66 +#define NAU8540_REG_MIC_BIAS 0x67 +#define NAU8540_REG_REFERENCE 0x68 +#define NAU8540_REG_FEPGA1 0x69 +#define NAU8540_REG_FEPGA2 0x6A +#define NAU8540_REG_FEPGA3 0x6B +#define NAU8540_REG_FEPGA4 0x6C +#define NAU8540_REG_PWR 0x6D +#define NAU8540_REG_MAX NAU8540_REG_PWR /* POWER_MANAGEMENT (0x01) */ -#define NAU8540_ADC4_EN (0x1 << 3) -#define NAU8540_ADC3_EN (0x1 << 2) -#define NAU8540_ADC2_EN (0x1 << 1) -#define NAU8540_ADC1_EN 0x1 +#define NAU8540_ADC4_EN (0x1 << 3) +#define NAU8540_ADC3_EN (0x1 << 2) +#define NAU8540_ADC2_EN (0x1 << 1) +#define NAU8540_ADC1_EN 0x1 /* CLOCK_CTRL (0x02) */ -#define NAU8540_CLK_ADC_EN (0x1 << 15) -#define NAU8540_CLK_I2S_EN (0x1 << 1) +#define NAU8540_CLK_ADC_EN (0x1 << 15) +#define NAU8540_CLK_I2S_EN (0x1 << 1) /* CLOCK_SRC (0x03) */ -#define NAU8540_CLK_SRC_SFT 15 -#define NAU8540_CLK_SRC_MASK (1 << NAU8540_CLK_SRC_SFT) -#define NAU8540_CLK_SRC_VCO (1 << NAU8540_CLK_SRC_SFT) -#define NAU8540_CLK_SRC_MCLK (0 << NAU8540_CLK_SRC_SFT) -#define NAU8540_CLK_ADC_SRC_SFT 6 -#define NAU8540_CLK_ADC_SRC_MASK (0x3 << NAU8540_CLK_ADC_SRC_SFT) -#define NAU8540_CLK_MCLK_SRC_MASK 0xf +#define NAU8540_CLK_SRC_SFT 15 +#define NAU8540_CLK_SRC_MASK (1 << NAU8540_CLK_SRC_SFT) +#define NAU8540_CLK_SRC_VCO (1 << NAU8540_CLK_SRC_SFT) +#define NAU8540_CLK_SRC_MCLK (0 << NAU8540_CLK_SRC_SFT) +#define NAU8540_CLK_ADC_SRC_SFT 6 +#define NAU8540_CLK_ADC_SRC_MASK (0x3 << NAU8540_CLK_ADC_SRC_SFT) +#define NAU8540_CLK_MCLK_SRC_MASK 0xf /* FLL1 (0x04) */ -#define NAU8540_FLL_RATIO_MASK 0x7f +#define NAU8540_FLL_RATIO_MASK 0x7f /* FLL3 (0x06) */ -#define NAU8540_FLL_CLK_SRC_SFT 10 -#define NAU8540_FLL_CLK_SRC_MASK (0x3 << NAU8540_FLL_CLK_SRC_SFT) -#define NAU8540_FLL_CLK_SRC_MCLK (0 << NAU8540_FLL_CLK_SRC_SFT) -#define NAU8540_FLL_CLK_SRC_BLK (0x2 << NAU8540_FLL_CLK_SRC_SFT) -#define NAU8540_FLL_CLK_SRC_FS (0x3 << NAU8540_FLL_CLK_SRC_SFT) -#define NAU8540_FLL_INTEGER_MASK 0x3ff +#define NAU8540_FLL_CLK_SRC_SFT 10 +#define NAU8540_FLL_CLK_SRC_MASK (0x3 << NAU8540_FLL_CLK_SRC_SFT) +#define NAU8540_FLL_CLK_SRC_MCLK (0 << NAU8540_FLL_CLK_SRC_SFT) +#define NAU8540_FLL_CLK_SRC_BLK (0x2 << NAU8540_FLL_CLK_SRC_SFT) +#define NAU8540_FLL_CLK_SRC_FS (0x3 << NAU8540_FLL_CLK_SRC_SFT) +#define NAU8540_FLL_INTEGER_MASK 0x3ff /* FLL4 (0x07) */ -#define NAU8540_FLL_REF_DIV_SFT 10 -#define NAU8540_FLL_REF_DIV_MASK (0x3 << NAU8540_FLL_REF_DIV_SFT) +#define NAU8540_FLL_REF_DIV_SFT 10 +#define NAU8540_FLL_REF_DIV_MASK (0x3 << NAU8540_FLL_REF_DIV_SFT) /* FLL5 (0x08) */ -#define NAU8540_FLL_PDB_DAC_EN (0x1 << 15) -#define NAU8540_FLL_LOOP_FTR_EN (0x1 << 14) -#define NAU8540_FLL_CLK_SW_MASK (0x1 << 13) -#define NAU8540_FLL_CLK_SW_N2 (0x1 << 13) -#define NAU8540_FLL_CLK_SW_REF (0x0 << 13) -#define NAU8540_FLL_FTR_SW_MASK (0x1 << 12) -#define NAU8540_FLL_FTR_SW_ACCU (0x1 << 12) -#define NAU8540_FLL_FTR_SW_FILTER (0x0 << 12) +#define NAU8540_FLL_PDB_DAC_EN (0x1 << 15) +#define NAU8540_FLL_LOOP_FTR_EN (0x1 << 14) +#define NAU8540_FLL_CLK_SW_MASK (0x1 << 13) +#define NAU8540_FLL_CLK_SW_N2 (0x1 << 13) +#define NAU8540_FLL_CLK_SW_REF (0x0 << 13) +#define NAU8540_FLL_FTR_SW_MASK (0x1 << 12) +#define NAU8540_FLL_FTR_SW_ACCU (0x1 << 12) +#define NAU8540_FLL_FTR_SW_FILTER (0x0 << 12) /* FLL6 (0x9) */ -#define NAU8540_DCO_EN (0x1 << 15) -#define NAU8540_SDM_EN (0x1 << 14) +#define NAU8540_DCO_EN (0x1 << 15) +#define NAU8540_SDM_EN (0x1 << 14) /* PCM_CTRL0 (0x10) */ -#define NAU8540_I2S_BP_SFT 7 -#define NAU8540_I2S_BP_INV (0x1 << NAU8540_I2S_BP_SFT) -#define NAU8540_I2S_PCMB_SFT 6 -#define NAU8540_I2S_PCMB_EN (0x1 << NAU8540_I2S_PCMB_SFT) -#define NAU8540_I2S_DL_SFT 2 -#define NAU8540_I2S_DL_MASK (0x3 << NAU8540_I2S_DL_SFT) -#define NAU8540_I2S_DL_16 (0 << NAU8540_I2S_DL_SFT) -#define NAU8540_I2S_DL_20 (0x1 << NAU8540_I2S_DL_SFT) -#define NAU8540_I2S_DL_24 (0x2 << NAU8540_I2S_DL_SFT) -#define NAU8540_I2S_DL_32 (0x3 << NAU8540_I2S_DL_SFT) -#define NAU8540_I2S_DF_MASK 0x3 -#define NAU8540_I2S_DF_RIGTH 0 -#define NAU8540_I2S_DF_LEFT 0x1 -#define NAU8540_I2S_DF_I2S 0x2 -#define NAU8540_I2S_DF_PCM_AB 0x3 +#define NAU8540_I2S_BP_SFT 7 +#define NAU8540_I2S_BP_INV (0x1 << NAU8540_I2S_BP_SFT) +#define NAU8540_I2S_PCMB_SFT 6 +#define NAU8540_I2S_PCMB_EN (0x1 << NAU8540_I2S_PCMB_SFT) +#define NAU8540_I2S_DL_SFT 2 +#define NAU8540_I2S_DL_MASK (0x3 << NAU8540_I2S_DL_SFT) +#define NAU8540_I2S_DL_16 (0 << NAU8540_I2S_DL_SFT) +#define NAU8540_I2S_DL_20 (0x1 << NAU8540_I2S_DL_SFT) +#define NAU8540_I2S_DL_24 (0x2 << NAU8540_I2S_DL_SFT) +#define NAU8540_I2S_DL_32 (0x3 << NAU8540_I2S_DL_SFT) +#define NAU8540_I2S_DF_MASK 0x3 +#define NAU8540_I2S_DF_RIGTH 0 +#define NAU8540_I2S_DF_LEFT 0x1 +#define NAU8540_I2S_DF_I2S 0x2 +#define NAU8540_I2S_DF_PCM_AB 0x3 /* PCM_CTRL1 (0x11) */ -#define NAU8540_I2S_LRC_DIV_SFT 12 -#define NAU8540_I2S_LRC_DIV_MASK (0x3 << NAU8540_I2S_LRC_DIV_SFT) -#define NAU8540_I2S_DO12_OE (0x1 << 4) -#define NAU8540_I2S_MS_SFT 3 -#define NAU8540_I2S_MS_MASK (0x1 << NAU8540_I2S_MS_SFT) -#define NAU8540_I2S_MS_MASTER (0x1 << NAU8540_I2S_MS_SFT) -#define NAU8540_I2S_MS_SLAVE (0x0 << NAU8540_I2S_MS_SFT) -#define NAU8540_I2S_BLK_DIV_MASK 0x7 +#define NAU8540_I2S_LRC_DIV_SFT 12 +#define NAU8540_I2S_LRC_DIV_MASK (0x3 << NAU8540_I2S_LRC_DIV_SFT) +#define NAU8540_I2S_DO12_OE (0x1 << 4) +#define NAU8540_I2S_MS_SFT 3 +#define NAU8540_I2S_MS_MASK (0x1 << NAU8540_I2S_MS_SFT) +#define NAU8540_I2S_MS_MASTER (0x1 << NAU8540_I2S_MS_SFT) +#define NAU8540_I2S_MS_SLAVE (0x0 << NAU8540_I2S_MS_SFT) +#define NAU8540_I2S_BLK_DIV_MASK 0x7 /* PCM_CTRL1 (0x12) */ -#define NAU8540_I2S_DO34_OE (0x1 << 11) -#define NAU8540_I2S_TSLOT_L_MASK 0x3ff +#define NAU8540_I2S_DO34_OE (0x1 << 11) +#define NAU8540_I2S_TSLOT_L_MASK 0x3ff /* PCM_CTRL4 (0x14) */ -#define NAU8540_TDM_MODE (0x1 << 15) -#define NAU8540_TDM_OFFSET_EN (0x1 << 14) -#define NAU8540_TDM_TX_MASK 0xf +#define NAU8540_TDM_MODE (0x1 << 15) +#define NAU8540_TDM_OFFSET_EN (0x1 << 14) +#define NAU8540_TDM_TX_MASK 0xf /* ADC_SAMPLE_RATE (0x3A) */ -#define NAU8540_ADC_OSR_MASK 0x3 -#define NAU8540_ADC_OSR_256 0x3 -#define NAU8540_ADC_OSR_128 0x2 -#define NAU8540_ADC_OSR_64 0x1 -#define NAU8540_ADC_OSR_32 0x0 +#define NAU8540_ADC_OSR_MASK 0x3 +#define NAU8540_ADC_OSR_256 0x3 +#define NAU8540_ADC_OSR_128 0x2 +#define NAU8540_ADC_OSR_64 0x1 +#define NAU8540_ADC_OSR_32 0x0 /* VMID_CTRL (0x60) */ -#define NAU8540_VMID_EN (1 << 6) -#define NAU8540_VMID_SEL_SFT 4 -#define NAU8540_VMID_SEL_MASK (0x3 << NAU8540_VMID_SEL_SFT) +#define NAU8540_VMID_EN (1 << 6) +#define NAU8540_VMID_SEL_SFT 4 +#define NAU8540_VMID_SEL_MASK (0x3 << NAU8540_VMID_SEL_SFT) /* MIC_BIAS (0x67) */ -#define NAU8540_PU_PRE (0x1 << 8) +#define NAU8540_PU_PRE (0x1 << 8) /* REFERENCE (0x68) */ -#define NAU8540_PRECHARGE_DIS (0x1 << 13) -#define NAU8540_GLOBAL_BIAS_EN (0x1 << 12) +#define NAU8540_PRECHARGE_DIS (0x1 << 13) +#define NAU8540_GLOBAL_BIAS_EN (0x1 << 12) /* System Clock Source */ enum { - NAU8540_CLK_DIS, - NAU8540_CLK_MCLK, - NAU8540_CLK_INTERNAL, - NAU8540_CLK_FLL_MCLK, - NAU8540_CLK_FLL_BLK, - NAU8540_CLK_FLL_FS, + NAU8540_CLK_DIS, + NAU8540_CLK_MCLK, + NAU8540_CLK_INTERNAL, + NAU8540_CLK_FLL_MCLK, + NAU8540_CLK_FLL_BLK, + NAU8540_CLK_FLL_FS, }; struct nau8540 { - struct device *dev; - struct regmap *regmap; + struct device *dev; + struct regmap *regmap; }; struct nau8540_fll { - int mclk_src; - int ratio; - int fll_frac; - int fll_int; - int clk_ref_div; + int mclk_src; + int ratio; + int fll_frac; + int fll_int; + int clk_ref_div; }; struct nau8540_fll_attr { - unsigned int param; - unsigned int val; + unsigned int param; + unsigned int val; }; /* over sampling rate */ struct nau8540_osr_attr { - unsigned int osr; - unsigned int clk_src; + unsigned int osr; + unsigned int clk_src; }; -#endif /* __NAU8540_H__ */ +#endif /* __NAU8540_H__ */ diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c new file mode 100644 index 0000000000000000000000000000000000000000..cca974d26136fbdaeb46f08aa4f972d1958fa668 --- /dev/null +++ b/sound/soc/codecs/nau8824.c @@ -0,0 +1,1831 @@ +/* + * NAU88L24 ALSA SoC audio driver + * + * Copyright 2016 Nuvoton Technology Corp. + * Author: John Hsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "nau8824.h" + + +static int nau8824_config_sysclk(struct nau8824 *nau8824, + int clk_id, unsigned int freq); +static bool nau8824_is_jack_inserted(struct nau8824 *nau8824); + +/* the ADC threshold of headset */ +#define DMIC_CLK 3072000 + +/* the ADC threshold of headset */ +#define HEADSET_SARADC_THD 0x80 + +/* the parameter threshold of FLL */ +#define NAU_FREF_MAX 13500000 +#define NAU_FVCO_MAX 124000000 +#define NAU_FVCO_MIN 90000000 + +/* scaling for mclk from sysclk_src output */ +static const struct nau8824_fll_attr mclk_src_scaling[] = { + { 1, 0x0 }, + { 2, 0x2 }, + { 4, 0x3 }, + { 8, 0x4 }, + { 16, 0x5 }, + { 32, 0x6 }, + { 3, 0x7 }, + { 6, 0xa }, + { 12, 0xb }, + { 24, 0xc }, +}; + +/* ratio for input clk freq */ +static const struct nau8824_fll_attr fll_ratio[] = { + { 512000, 0x01 }, + { 256000, 0x02 }, + { 128000, 0x04 }, + { 64000, 0x08 }, + { 32000, 0x10 }, + { 8000, 0x20 }, + { 4000, 0x40 }, +}; + +static const struct nau8824_fll_attr fll_pre_scalar[] = { + { 1, 0x0 }, + { 2, 0x1 }, + { 4, 0x2 }, + { 8, 0x3 }, +}; + +/* the maximum frequency of CLK_ADC and CLK_DAC */ +#define CLK_DA_AD_MAX 6144000 + +/* over sampling rate */ +static const struct nau8824_osr_attr osr_dac_sel[] = { + { 64, 2 }, /* OSR 64, SRC 1/4 */ + { 256, 0 }, /* OSR 256, SRC 1 */ + { 128, 1 }, /* OSR 128, SRC 1/2 */ + { 0, 0 }, + { 32, 3 }, /* OSR 32, SRC 1/8 */ +}; + +static const struct nau8824_osr_attr osr_adc_sel[] = { + { 32, 3 }, /* OSR 32, SRC 1/8 */ + { 64, 2 }, /* OSR 64, SRC 1/4 */ + { 128, 1 }, /* OSR 128, SRC 1/2 */ + { 256, 0 }, /* OSR 256, SRC 1 */ +}; + +static const struct reg_default nau8824_reg_defaults[] = { + { NAU8824_REG_ENA_CTRL, 0x0000 }, + { NAU8824_REG_CLK_GATING_ENA, 0x0000 }, + { NAU8824_REG_CLK_DIVIDER, 0x0000 }, + { NAU8824_REG_FLL1, 0x0000 }, + { NAU8824_REG_FLL2, 0x3126 }, + { NAU8824_REG_FLL3, 0x0008 }, + { NAU8824_REG_FLL4, 0x0010 }, + { NAU8824_REG_FLL5, 0xC000 }, + { NAU8824_REG_FLL6, 0x6000 }, + { NAU8824_REG_FLL_VCO_RSV, 0xF13C }, + { NAU8824_REG_JACK_DET_CTRL, 0x0000 }, + { NAU8824_REG_INTERRUPT_SETTING_1, 0x0000 }, + { NAU8824_REG_IRQ, 0x0000 }, + { NAU8824_REG_CLEAR_INT_REG, 0x0000 }, + { NAU8824_REG_INTERRUPT_SETTING, 0x1000 }, + { NAU8824_REG_SAR_ADC, 0x0015 }, + { NAU8824_REG_VDET_COEFFICIENT, 0x0110 }, + { NAU8824_REG_VDET_THRESHOLD_1, 0x0000 }, + { NAU8824_REG_VDET_THRESHOLD_2, 0x0000 }, + { NAU8824_REG_VDET_THRESHOLD_3, 0x0000 }, + { NAU8824_REG_VDET_THRESHOLD_4, 0x0000 }, + { NAU8824_REG_GPIO_SEL, 0x0000 }, + { NAU8824_REG_PORT0_I2S_PCM_CTRL_1, 0x000B }, + { NAU8824_REG_PORT0_I2S_PCM_CTRL_2, 0x0010 }, + { NAU8824_REG_PORT0_LEFT_TIME_SLOT, 0x0000 }, + { NAU8824_REG_PORT0_RIGHT_TIME_SLOT, 0x0000 }, + { NAU8824_REG_TDM_CTRL, 0x0000 }, + { NAU8824_REG_ADC_HPF_FILTER, 0x0000 }, + { NAU8824_REG_ADC_FILTER_CTRL, 0x0002 }, + { NAU8824_REG_DAC_FILTER_CTRL_1, 0x0000 }, + { NAU8824_REG_DAC_FILTER_CTRL_2, 0x0000 }, + { NAU8824_REG_NOTCH_FILTER_1, 0x0000 }, + { NAU8824_REG_NOTCH_FILTER_2, 0x0000 }, + { NAU8824_REG_EQ1_LOW, 0x112C }, + { NAU8824_REG_EQ2_EQ3, 0x2C2C }, + { NAU8824_REG_EQ4_EQ5, 0x2C2C }, + { NAU8824_REG_ADC_CH0_DGAIN_CTRL, 0x0100 }, + { NAU8824_REG_ADC_CH1_DGAIN_CTRL, 0x0100 }, + { NAU8824_REG_ADC_CH2_DGAIN_CTRL, 0x0100 }, + { NAU8824_REG_ADC_CH3_DGAIN_CTRL, 0x0100 }, + { NAU8824_REG_DAC_MUTE_CTRL, 0x0000 }, + { NAU8824_REG_DAC_CH0_DGAIN_CTRL, 0x0100 }, + { NAU8824_REG_DAC_CH1_DGAIN_CTRL, 0x0100 }, + { NAU8824_REG_ADC_TO_DAC_ST, 0x0000 }, + { NAU8824_REG_DRC_KNEE_IP12_ADC_CH01, 0x1486 }, + { NAU8824_REG_DRC_KNEE_IP34_ADC_CH01, 0x0F12 }, + { NAU8824_REG_DRC_SLOPE_ADC_CH01, 0x25FF }, + { NAU8824_REG_DRC_ATKDCY_ADC_CH01, 0x3457 }, + { NAU8824_REG_DRC_KNEE_IP12_ADC_CH23, 0x1486 }, + { NAU8824_REG_DRC_KNEE_IP34_ADC_CH23, 0x0F12 }, + { NAU8824_REG_DRC_SLOPE_ADC_CH23, 0x25FF }, + { NAU8824_REG_DRC_ATKDCY_ADC_CH23, 0x3457 }, + { NAU8824_REG_DRC_GAINL_ADC0, 0x0200 }, + { NAU8824_REG_DRC_GAINL_ADC1, 0x0200 }, + { NAU8824_REG_DRC_GAINL_ADC2, 0x0200 }, + { NAU8824_REG_DRC_GAINL_ADC3, 0x0200 }, + { NAU8824_REG_DRC_KNEE_IP12_DAC, 0x1486 }, + { NAU8824_REG_DRC_KNEE_IP34_DAC, 0x0F12 }, + { NAU8824_REG_DRC_SLOPE_DAC, 0x25F9 }, + { NAU8824_REG_DRC_ATKDCY_DAC, 0x3457 }, + { NAU8824_REG_DRC_GAIN_DAC_CH0, 0x0200 }, + { NAU8824_REG_DRC_GAIN_DAC_CH1, 0x0200 }, + { NAU8824_REG_MODE, 0x0000 }, + { NAU8824_REG_MODE1, 0x0000 }, + { NAU8824_REG_MODE2, 0x0000 }, + { NAU8824_REG_CLASSG, 0x0000 }, + { NAU8824_REG_OTP_EFUSE, 0x0000 }, + { NAU8824_REG_OTPDOUT_1, 0x0000 }, + { NAU8824_REG_OTPDOUT_2, 0x0000 }, + { NAU8824_REG_MISC_CTRL, 0x0000 }, + { NAU8824_REG_I2C_TIMEOUT, 0xEFFF }, + { NAU8824_REG_TEST_MODE, 0x0000 }, + { NAU8824_REG_I2C_DEVICE_ID, 0x1AF1 }, + { NAU8824_REG_SAR_ADC_DATA_OUT, 0x00FF }, + { NAU8824_REG_BIAS_ADJ, 0x0000 }, + { NAU8824_REG_PGA_GAIN, 0x0000 }, + { NAU8824_REG_TRIM_SETTINGS, 0x0000 }, + { NAU8824_REG_ANALOG_CONTROL_1, 0x0000 }, + { NAU8824_REG_ANALOG_CONTROL_2, 0x0000 }, + { NAU8824_REG_ENABLE_LO, 0x0000 }, + { NAU8824_REG_GAIN_LO, 0x0000 }, + { NAU8824_REG_CLASSD_GAIN_1, 0x0000 }, + { NAU8824_REG_CLASSD_GAIN_2, 0x0000 }, + { NAU8824_REG_ANALOG_ADC_1, 0x0011 }, + { NAU8824_REG_ANALOG_ADC_2, 0x0020 }, + { NAU8824_REG_RDAC, 0x0008 }, + { NAU8824_REG_MIC_BIAS, 0x0006 }, + { NAU8824_REG_HS_VOLUME_CONTROL, 0x0000 }, + { NAU8824_REG_BOOST, 0x0000 }, + { NAU8824_REG_FEPGA, 0x0000 }, + { NAU8824_REG_FEPGA_II, 0x0000 }, + { NAU8824_REG_FEPGA_SE, 0x0000 }, + { NAU8824_REG_FEPGA_ATTENUATION, 0x0000 }, + { NAU8824_REG_ATT_PORT0, 0x0000 }, + { NAU8824_REG_ATT_PORT1, 0x0000 }, + { NAU8824_REG_POWER_UP_CONTROL, 0x0000 }, + { NAU8824_REG_CHARGE_PUMP_CONTROL, 0x0300 }, + { NAU8824_REG_CHARGE_PUMP_INPUT, 0x0013 }, +}; + +static int nau8824_sema_acquire(struct nau8824 *nau8824, long timeout) +{ + int ret; + + if (timeout) { + ret = down_timeout(&nau8824->jd_sem, timeout); + if (ret < 0) + dev_warn(nau8824->dev, "Acquire semaphone timeout\n"); + } else { + ret = down_interruptible(&nau8824->jd_sem); + if (ret < 0) + dev_warn(nau8824->dev, "Acquire semaphone fail\n"); + } + + return ret; +} + +static inline void nau8824_sema_release(struct nau8824 *nau8824) +{ + up(&nau8824->jd_sem); +} + +static bool nau8824_readable_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case NAU8824_REG_ENA_CTRL ... NAU8824_REG_FLL_VCO_RSV: + case NAU8824_REG_JACK_DET_CTRL: + case NAU8824_REG_INTERRUPT_SETTING_1: + case NAU8824_REG_IRQ: + case NAU8824_REG_CLEAR_INT_REG ... NAU8824_REG_VDET_THRESHOLD_4: + case NAU8824_REG_GPIO_SEL: + case NAU8824_REG_PORT0_I2S_PCM_CTRL_1 ... NAU8824_REG_TDM_CTRL: + case NAU8824_REG_ADC_HPF_FILTER ... NAU8824_REG_EQ4_EQ5: + case NAU8824_REG_ADC_CH0_DGAIN_CTRL ... NAU8824_REG_ADC_TO_DAC_ST: + case NAU8824_REG_DRC_KNEE_IP12_ADC_CH01 ... NAU8824_REG_DRC_GAINL_ADC3: + case NAU8824_REG_DRC_KNEE_IP12_DAC ... NAU8824_REG_DRC_GAIN_DAC_CH1: + case NAU8824_REG_CLASSG ... NAU8824_REG_OTP_EFUSE: + case NAU8824_REG_OTPDOUT_1 ... NAU8824_REG_OTPDOUT_2: + case NAU8824_REG_I2C_TIMEOUT: + case NAU8824_REG_I2C_DEVICE_ID ... NAU8824_REG_SAR_ADC_DATA_OUT: + case NAU8824_REG_BIAS_ADJ ... NAU8824_REG_CLASSD_GAIN_2: + case NAU8824_REG_ANALOG_ADC_1 ... NAU8824_REG_ATT_PORT1: + case NAU8824_REG_POWER_UP_CONTROL ... NAU8824_REG_CHARGE_PUMP_INPUT: + return true; + default: + return false; + } + +} + +static bool nau8824_writeable_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case NAU8824_REG_RESET ... NAU8824_REG_FLL_VCO_RSV: + case NAU8824_REG_JACK_DET_CTRL: + case NAU8824_REG_INTERRUPT_SETTING_1: + case NAU8824_REG_CLEAR_INT_REG ... NAU8824_REG_VDET_THRESHOLD_4: + case NAU8824_REG_GPIO_SEL: + case NAU8824_REG_PORT0_I2S_PCM_CTRL_1 ... NAU8824_REG_TDM_CTRL: + case NAU8824_REG_ADC_HPF_FILTER ... NAU8824_REG_EQ4_EQ5: + case NAU8824_REG_ADC_CH0_DGAIN_CTRL ... NAU8824_REG_ADC_TO_DAC_ST: + case NAU8824_REG_DRC_KNEE_IP12_ADC_CH01: + case NAU8824_REG_DRC_KNEE_IP34_ADC_CH01: + case NAU8824_REG_DRC_SLOPE_ADC_CH01: + case NAU8824_REG_DRC_ATKDCY_ADC_CH01: + case NAU8824_REG_DRC_KNEE_IP12_ADC_CH23: + case NAU8824_REG_DRC_KNEE_IP34_ADC_CH23: + case NAU8824_REG_DRC_SLOPE_ADC_CH23: + case NAU8824_REG_DRC_ATKDCY_ADC_CH23: + case NAU8824_REG_DRC_KNEE_IP12_DAC ... NAU8824_REG_DRC_ATKDCY_DAC: + case NAU8824_REG_CLASSG ... NAU8824_REG_OTP_EFUSE: + case NAU8824_REG_I2C_TIMEOUT: + case NAU8824_REG_BIAS_ADJ ... NAU8824_REG_CLASSD_GAIN_2: + case NAU8824_REG_ANALOG_ADC_1 ... NAU8824_REG_ATT_PORT1: + case NAU8824_REG_POWER_UP_CONTROL ... NAU8824_REG_CHARGE_PUMP_CONTROL: + return true; + default: + return false; + } +} + +static bool nau8824_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case NAU8824_REG_RESET: + case NAU8824_REG_IRQ ... NAU8824_REG_CLEAR_INT_REG: + case NAU8824_REG_DRC_GAINL_ADC0 ... NAU8824_REG_DRC_GAINL_ADC3: + case NAU8824_REG_DRC_GAIN_DAC_CH0 ... NAU8824_REG_DRC_GAIN_DAC_CH1: + case NAU8824_REG_OTPDOUT_1 ... NAU8824_REG_OTPDOUT_2: + case NAU8824_REG_I2C_DEVICE_ID ... NAU8824_REG_SAR_ADC_DATA_OUT: + case NAU8824_REG_CHARGE_PUMP_INPUT: + return true; + default: + return false; + } +} + +static const char * const nau8824_companding[] = { + "Off", "NC", "u-law", "A-law" }; + +static const struct soc_enum nau8824_companding_adc_enum = + SOC_ENUM_SINGLE(NAU8824_REG_PORT0_I2S_PCM_CTRL_1, 12, + ARRAY_SIZE(nau8824_companding), nau8824_companding); + +static const struct soc_enum nau8824_companding_dac_enum = + SOC_ENUM_SINGLE(NAU8824_REG_PORT0_I2S_PCM_CTRL_1, 14, + ARRAY_SIZE(nau8824_companding), nau8824_companding); + +static const char * const nau8824_adc_decimation[] = { + "32", "64", "128", "256" }; + +static const struct soc_enum nau8824_adc_decimation_enum = + SOC_ENUM_SINGLE(NAU8824_REG_ADC_FILTER_CTRL, 0, + ARRAY_SIZE(nau8824_adc_decimation), nau8824_adc_decimation); + +static const char * const nau8824_dac_oversampl[] = { + "64", "256", "128", "", "32" }; + +static const struct soc_enum nau8824_dac_oversampl_enum = + SOC_ENUM_SINGLE(NAU8824_REG_DAC_FILTER_CTRL_1, 0, + ARRAY_SIZE(nau8824_dac_oversampl), nau8824_dac_oversampl); + +static const char * const nau8824_input_channel[] = { + "Input CH0", "Input CH1", "Input CH2", "Input CH3" }; + +static const struct soc_enum nau8824_adc_ch0_enum = + SOC_ENUM_SINGLE(NAU8824_REG_ADC_CH0_DGAIN_CTRL, 9, + ARRAY_SIZE(nau8824_input_channel), nau8824_input_channel); + +static const struct soc_enum nau8824_adc_ch1_enum = + SOC_ENUM_SINGLE(NAU8824_REG_ADC_CH1_DGAIN_CTRL, 9, + ARRAY_SIZE(nau8824_input_channel), nau8824_input_channel); + +static const struct soc_enum nau8824_adc_ch2_enum = + SOC_ENUM_SINGLE(NAU8824_REG_ADC_CH2_DGAIN_CTRL, 9, + ARRAY_SIZE(nau8824_input_channel), nau8824_input_channel); + +static const struct soc_enum nau8824_adc_ch3_enum = + SOC_ENUM_SINGLE(NAU8824_REG_ADC_CH3_DGAIN_CTRL, 9, + ARRAY_SIZE(nau8824_input_channel), nau8824_input_channel); + +static const char * const nau8824_tdm_slot[] = { + "Slot 0", "Slot 1", "Slot 2", "Slot 3" }; + +static const struct soc_enum nau8824_dac_left_sel_enum = + SOC_ENUM_SINGLE(NAU8824_REG_TDM_CTRL, 6, + ARRAY_SIZE(nau8824_tdm_slot), nau8824_tdm_slot); + +static const struct soc_enum nau8824_dac_right_sel_enum = + SOC_ENUM_SINGLE(NAU8824_REG_TDM_CTRL, 4, + ARRAY_SIZE(nau8824_tdm_slot), nau8824_tdm_slot); + +static const DECLARE_TLV_DB_MINMAX_MUTE(spk_vol_tlv, 0, 2400); +static const DECLARE_TLV_DB_MINMAX(hp_vol_tlv, -3000, 0); +static const DECLARE_TLV_DB_SCALE(mic_vol_tlv, 0, 200, 0); +static const DECLARE_TLV_DB_SCALE(dmic_vol_tlv, -12800, 50, 0); + +static const struct snd_kcontrol_new nau8824_snd_controls[] = { + SOC_ENUM("ADC Companding", nau8824_companding_adc_enum), + SOC_ENUM("DAC Companding", nau8824_companding_dac_enum), + + SOC_ENUM("ADC Decimation Rate", nau8824_adc_decimation_enum), + SOC_ENUM("DAC Oversampling Rate", nau8824_dac_oversampl_enum), + + SOC_SINGLE_TLV("Speaker Right DACR Volume", + NAU8824_REG_CLASSD_GAIN_1, 8, 0x1f, 0, spk_vol_tlv), + SOC_SINGLE_TLV("Speaker Left DACL Volume", + NAU8824_REG_CLASSD_GAIN_2, 0, 0x1f, 0, spk_vol_tlv), + SOC_SINGLE_TLV("Speaker Left DACR Volume", + NAU8824_REG_CLASSD_GAIN_1, 0, 0x1f, 0, spk_vol_tlv), + SOC_SINGLE_TLV("Speaker Right DACL Volume", + NAU8824_REG_CLASSD_GAIN_2, 8, 0x1f, 0, spk_vol_tlv), + + SOC_SINGLE_TLV("Headphone Right DACR Volume", + NAU8824_REG_ATT_PORT0, 8, 0x1f, 0, hp_vol_tlv), + SOC_SINGLE_TLV("Headphone Left DACL Volume", + NAU8824_REG_ATT_PORT0, 0, 0x1f, 0, hp_vol_tlv), + SOC_SINGLE_TLV("Headphone Right DACL Volume", + NAU8824_REG_ATT_PORT1, 8, 0x1f, 0, hp_vol_tlv), + SOC_SINGLE_TLV("Headphone Left DACR Volume", + NAU8824_REG_ATT_PORT1, 0, 0x1f, 0, hp_vol_tlv), + + SOC_SINGLE_TLV("MIC1 Volume", NAU8824_REG_FEPGA_II, + NAU8824_FEPGA_GAINL_SFT, 0x12, 0, mic_vol_tlv), + SOC_SINGLE_TLV("MIC2 Volume", NAU8824_REG_FEPGA_II, + NAU8824_FEPGA_GAINR_SFT, 0x12, 0, mic_vol_tlv), + + SOC_SINGLE_TLV("DMIC1 Volume", NAU8824_REG_ADC_CH0_DGAIN_CTRL, + 0, 0x164, 0, dmic_vol_tlv), + SOC_SINGLE_TLV("DMIC2 Volume", NAU8824_REG_ADC_CH1_DGAIN_CTRL, + 0, 0x164, 0, dmic_vol_tlv), + SOC_SINGLE_TLV("DMIC3 Volume", NAU8824_REG_ADC_CH2_DGAIN_CTRL, + 0, 0x164, 0, dmic_vol_tlv), + SOC_SINGLE_TLV("DMIC4 Volume", NAU8824_REG_ADC_CH3_DGAIN_CTRL, + 0, 0x164, 0, dmic_vol_tlv), + + SOC_ENUM("ADC CH0 Select", nau8824_adc_ch0_enum), + SOC_ENUM("ADC CH1 Select", nau8824_adc_ch1_enum), + SOC_ENUM("ADC CH2 Select", nau8824_adc_ch2_enum), + SOC_ENUM("ADC CH3 Select", nau8824_adc_ch3_enum), + + SOC_SINGLE("ADC CH0 TX Switch", NAU8824_REG_TDM_CTRL, 0, 1, 0), + SOC_SINGLE("ADC CH1 TX Switch", NAU8824_REG_TDM_CTRL, 1, 1, 0), + SOC_SINGLE("ADC CH2 TX Switch", NAU8824_REG_TDM_CTRL, 2, 1, 0), + SOC_SINGLE("ADC CH3 TX Switch", NAU8824_REG_TDM_CTRL, 3, 1, 0), + + SOC_ENUM("DACL Channel Source", nau8824_dac_left_sel_enum), + SOC_ENUM("DACR Channel Source", nau8824_dac_right_sel_enum), + + SOC_SINGLE("DACL LR Mix", NAU8824_REG_DAC_MUTE_CTRL, 0, 1, 0), + SOC_SINGLE("DACR LR Mix", NAU8824_REG_DAC_MUTE_CTRL, 1, 1, 0), +}; + +static int nau8824_output_dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + /* Disables the TESTDAC to let DAC signal pass through. */ + regmap_update_bits(nau8824->regmap, NAU8824_REG_ENABLE_LO, + NAU8824_TEST_DAC_EN, 0); + break; + case SND_SOC_DAPM_POST_PMD: + regmap_update_bits(nau8824->regmap, NAU8824_REG_ENABLE_LO, + NAU8824_TEST_DAC_EN, NAU8824_TEST_DAC_EN); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nau8824_spk_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + regmap_update_bits(nau8824->regmap, + NAU8824_REG_ANALOG_CONTROL_2, + NAU8824_CLASSD_CLAMP_DIS, NAU8824_CLASSD_CLAMP_DIS); + break; + case SND_SOC_DAPM_POST_PMD: + regmap_update_bits(nau8824->regmap, + NAU8824_REG_ANALOG_CONTROL_2, + NAU8824_CLASSD_CLAMP_DIS, 0); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nau8824_pump_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + /* Prevent startup click by letting charge pump to ramp up */ + msleep(10); + regmap_update_bits(nau8824->regmap, + NAU8824_REG_CHARGE_PUMP_CONTROL, + NAU8824_JAMNODCLOW, NAU8824_JAMNODCLOW); + break; + case SND_SOC_DAPM_PRE_PMD: + regmap_update_bits(nau8824->regmap, + NAU8824_REG_CHARGE_PUMP_CONTROL, + NAU8824_JAMNODCLOW, 0); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int system_clock_control(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + if (SND_SOC_DAPM_EVENT_OFF(event)) { + /* Set clock source to disable or internal clock before the + * playback or capture end. Codec needs clock for Jack + * detection and button press if jack inserted; otherwise, + * the clock should be closed. + */ + if (nau8824_is_jack_inserted(nau8824)) { + nau8824_config_sysclk(nau8824, + NAU8824_CLK_INTERNAL, 0); + } else { + nau8824_config_sysclk(nau8824, NAU8824_CLK_DIS, 0); + } + } + return 0; +} + +static int dmic_clock_control(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + int src; + + /* The DMIC clock is gotten from system clock (256fs) divided by + * DMIC_SRC (1, 2, 4, 8, 16, 32). The clock has to be equal or + * less than 3.072 MHz. + */ + for (src = 0; src < 5; src++) { + if ((0x1 << (8 - src)) * nau8824->fs <= DMIC_CLK) + break; + } + dev_dbg(nau8824->dev, "dmic src %d for mclk %d\n", src, nau8824->fs * 256); + regmap_update_bits(nau8824->regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_DMIC_SRC_MASK, (src << NAU8824_CLK_DMIC_SRC_SFT)); + + return 0; +} + +static const struct snd_kcontrol_new nau8824_adc_ch0_dmic = + SOC_DAPM_SINGLE("Switch", NAU8824_REG_ENA_CTRL, + NAU8824_ADC_CH0_DMIC_SFT, 1, 0); + +static const struct snd_kcontrol_new nau8824_adc_ch1_dmic = + SOC_DAPM_SINGLE("Switch", NAU8824_REG_ENA_CTRL, + NAU8824_ADC_CH1_DMIC_SFT, 1, 0); + +static const struct snd_kcontrol_new nau8824_adc_ch2_dmic = + SOC_DAPM_SINGLE("Switch", NAU8824_REG_ENA_CTRL, + NAU8824_ADC_CH2_DMIC_SFT, 1, 0); + +static const struct snd_kcontrol_new nau8824_adc_ch3_dmic = + SOC_DAPM_SINGLE("Switch", NAU8824_REG_ENA_CTRL, + NAU8824_ADC_CH3_DMIC_SFT, 1, 0); + +static const struct snd_kcontrol_new nau8824_adc_left_mixer[] = { + SOC_DAPM_SINGLE("MIC Switch", NAU8824_REG_FEPGA, + NAU8824_FEPGA_MODEL_MIC1_SFT, 1, 0), + SOC_DAPM_SINGLE("HSMIC Switch", NAU8824_REG_FEPGA, + NAU8824_FEPGA_MODEL_HSMIC_SFT, 1, 0), +}; + +static const struct snd_kcontrol_new nau8824_adc_right_mixer[] = { + SOC_DAPM_SINGLE("MIC Switch", NAU8824_REG_FEPGA, + NAU8824_FEPGA_MODER_MIC2_SFT, 1, 0), + SOC_DAPM_SINGLE("HSMIC Switch", NAU8824_REG_FEPGA, + NAU8824_FEPGA_MODER_HSMIC_SFT, 1, 0), +}; + +static const struct snd_kcontrol_new nau8824_hp_left_mixer[] = { + SOC_DAPM_SINGLE("DAC Right Switch", NAU8824_REG_ENABLE_LO, + NAU8824_DACR_HPL_EN_SFT, 1, 0), + SOC_DAPM_SINGLE("DAC Left Switch", NAU8824_REG_ENABLE_LO, + NAU8824_DACL_HPL_EN_SFT, 1, 0), +}; + +static const struct snd_kcontrol_new nau8824_hp_right_mixer[] = { + SOC_DAPM_SINGLE("DAC Left Switch", NAU8824_REG_ENABLE_LO, + NAU8824_DACL_HPR_EN_SFT, 1, 0), + SOC_DAPM_SINGLE("DAC Right Switch", NAU8824_REG_ENABLE_LO, + NAU8824_DACR_HPR_EN_SFT, 1, 0), +}; + +static const char * const nau8824_dac_src[] = { "DACL", "DACR" }; + +static SOC_ENUM_SINGLE_DECL( + nau8824_dacl_enum, NAU8824_REG_DAC_CH0_DGAIN_CTRL, + NAU8824_DAC_CH0_SEL_SFT, nau8824_dac_src); + +static SOC_ENUM_SINGLE_DECL( + nau8824_dacr_enum, NAU8824_REG_DAC_CH1_DGAIN_CTRL, + NAU8824_DAC_CH1_SEL_SFT, nau8824_dac_src); + +static const struct snd_kcontrol_new nau8824_dacl_mux = + SOC_DAPM_ENUM("DACL Source", nau8824_dacl_enum); + +static const struct snd_kcontrol_new nau8824_dacr_mux = + SOC_DAPM_ENUM("DACR Source", nau8824_dacr_enum); + + +static const struct snd_soc_dapm_widget nau8824_dapm_widgets[] = { + SND_SOC_DAPM_SUPPLY("System Clock", SND_SOC_NOPM, 0, 0, + system_clock_control, SND_SOC_DAPM_POST_PMD), + + SND_SOC_DAPM_INPUT("HSMIC1"), + SND_SOC_DAPM_INPUT("HSMIC2"), + SND_SOC_DAPM_INPUT("MIC1"), + SND_SOC_DAPM_INPUT("MIC2"), + SND_SOC_DAPM_INPUT("DMIC1"), + SND_SOC_DAPM_INPUT("DMIC2"), + SND_SOC_DAPM_INPUT("DMIC3"), + SND_SOC_DAPM_INPUT("DMIC4"), + + SND_SOC_DAPM_SUPPLY("SAR", NAU8824_REG_SAR_ADC, + NAU8824_SAR_ADC_EN_SFT, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY("MICBIAS", NAU8824_REG_MIC_BIAS, + NAU8824_MICBIAS_POWERUP_SFT, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY("DMIC12 Power", NAU8824_REG_BIAS_ADJ, + NAU8824_DMIC1_EN_SFT, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY("DMIC34 Power", NAU8824_REG_BIAS_ADJ, + NAU8824_DMIC2_EN_SFT, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY("DMIC Clock", SND_SOC_NOPM, 0, 0, + dmic_clock_control, SND_SOC_DAPM_POST_PMU), + + SND_SOC_DAPM_SWITCH("DMIC1 Enable", SND_SOC_NOPM, + 0, 0, &nau8824_adc_ch0_dmic), + SND_SOC_DAPM_SWITCH("DMIC2 Enable", SND_SOC_NOPM, + 0, 0, &nau8824_adc_ch1_dmic), + SND_SOC_DAPM_SWITCH("DMIC3 Enable", SND_SOC_NOPM, + 0, 0, &nau8824_adc_ch2_dmic), + SND_SOC_DAPM_SWITCH("DMIC4 Enable", SND_SOC_NOPM, + 0, 0, &nau8824_adc_ch3_dmic), + + SND_SOC_DAPM_MIXER("Left ADC", NAU8824_REG_POWER_UP_CONTROL, + 12, 0, nau8824_adc_left_mixer, + ARRAY_SIZE(nau8824_adc_left_mixer)), + SND_SOC_DAPM_MIXER("Right ADC", NAU8824_REG_POWER_UP_CONTROL, + 13, 0, nau8824_adc_right_mixer, + ARRAY_SIZE(nau8824_adc_right_mixer)), + + SND_SOC_DAPM_ADC("ADCL", NULL, NAU8824_REG_ANALOG_ADC_2, + NAU8824_ADCL_EN_SFT, 0), + SND_SOC_DAPM_ADC("ADCR", NULL, NAU8824_REG_ANALOG_ADC_2, + NAU8824_ADCR_EN_SFT, 0), + + SND_SOC_DAPM_AIF_OUT("AIFTX", "HiFi Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("AIFRX", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0), + + SND_SOC_DAPM_DAC("DACL", NULL, NAU8824_REG_RDAC, + NAU8824_DACL_EN_SFT, 0), + SND_SOC_DAPM_SUPPLY("DACL Clock", NAU8824_REG_RDAC, + NAU8824_DACL_CLK_SFT, 0, NULL, 0), + SND_SOC_DAPM_DAC("DACR", NULL, NAU8824_REG_RDAC, + NAU8824_DACR_EN_SFT, 0), + SND_SOC_DAPM_SUPPLY("DACR Clock", NAU8824_REG_RDAC, + NAU8824_DACR_CLK_SFT, 0, NULL, 0), + + SND_SOC_DAPM_MUX("DACL Mux", SND_SOC_NOPM, 0, 0, &nau8824_dacl_mux), + SND_SOC_DAPM_MUX("DACR Mux", SND_SOC_NOPM, 0, 0, &nau8824_dacr_mux), + + SND_SOC_DAPM_PGA_S("Output DACL", 0, NAU8824_REG_CHARGE_PUMP_CONTROL, + 8, 1, nau8824_output_dac_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_PGA_S("Output DACR", 0, NAU8824_REG_CHARGE_PUMP_CONTROL, + 9, 1, nau8824_output_dac_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + + SND_SOC_DAPM_PGA_S("ClassD", 0, NAU8824_REG_CLASSD_GAIN_1, + NAU8824_CLASSD_EN_SFT, 0, nau8824_spk_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + + SND_SOC_DAPM_MIXER("Left Headphone", NAU8824_REG_CLASSG, + NAU8824_CLASSG_LDAC_EN_SFT, 0, nau8824_hp_left_mixer, + ARRAY_SIZE(nau8824_hp_left_mixer)), + SND_SOC_DAPM_MIXER("Right Headphone", NAU8824_REG_CLASSG, + NAU8824_CLASSG_RDAC_EN_SFT, 0, nau8824_hp_right_mixer, + ARRAY_SIZE(nau8824_hp_right_mixer)), + SND_SOC_DAPM_PGA_S("Charge Pump", 1, NAU8824_REG_CHARGE_PUMP_CONTROL, + NAU8824_CHARGE_PUMP_EN_SFT, 0, nau8824_pump_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PGA("Output Driver L", + NAU8824_REG_POWER_UP_CONTROL, 3, 0, NULL, 0), + SND_SOC_DAPM_PGA("Output Driver R", + NAU8824_REG_POWER_UP_CONTROL, 2, 0, NULL, 0), + SND_SOC_DAPM_PGA("Main Driver L", + NAU8824_REG_POWER_UP_CONTROL, 1, 0, NULL, 0), + SND_SOC_DAPM_PGA("Main Driver R", + NAU8824_REG_POWER_UP_CONTROL, 0, 0, NULL, 0), + SND_SOC_DAPM_PGA("HP Boost Driver", NAU8824_REG_BOOST, + NAU8824_HP_BOOST_DIS_SFT, 1, NULL, 0), + SND_SOC_DAPM_PGA("Class G", NAU8824_REG_CLASSG, + NAU8824_CLASSG_EN_SFT, 0, NULL, 0), + + SND_SOC_DAPM_OUTPUT("SPKOUTL"), + SND_SOC_DAPM_OUTPUT("SPKOUTR"), + SND_SOC_DAPM_OUTPUT("HPOL"), + SND_SOC_DAPM_OUTPUT("HPOR"), +}; + +static const struct snd_soc_dapm_route nau8824_dapm_routes[] = { + {"DMIC1 Enable", "Switch", "DMIC1"}, + {"DMIC2 Enable", "Switch", "DMIC2"}, + {"DMIC3 Enable", "Switch", "DMIC3"}, + {"DMIC4 Enable", "Switch", "DMIC4"}, + + {"DMIC1", NULL, "DMIC12 Power"}, + {"DMIC2", NULL, "DMIC12 Power"}, + {"DMIC3", NULL, "DMIC34 Power"}, + {"DMIC4", NULL, "DMIC34 Power"}, + {"DMIC12 Power", NULL, "DMIC Clock"}, + {"DMIC34 Power", NULL, "DMIC Clock"}, + + {"Left ADC", "MIC Switch", "MIC1"}, + {"Left ADC", "HSMIC Switch", "HSMIC1"}, + {"Right ADC", "MIC Switch", "MIC2"}, + {"Right ADC", "HSMIC Switch", "HSMIC2"}, + + {"ADCL", NULL, "Left ADC"}, + {"ADCR", NULL, "Right ADC"}, + + {"AIFTX", NULL, "MICBIAS"}, + {"AIFTX", NULL, "ADCL"}, + {"AIFTX", NULL, "ADCR"}, + {"AIFTX", NULL, "DMIC1 Enable"}, + {"AIFTX", NULL, "DMIC2 Enable"}, + {"AIFTX", NULL, "DMIC3 Enable"}, + {"AIFTX", NULL, "DMIC4 Enable"}, + + {"AIFTX", NULL, "System Clock"}, + {"AIFRX", NULL, "System Clock"}, + + {"DACL", NULL, "AIFRX"}, + {"DACL", NULL, "DACL Clock"}, + {"DACR", NULL, "AIFRX"}, + {"DACR", NULL, "DACR Clock"}, + + {"DACL Mux", "DACL", "DACL"}, + {"DACL Mux", "DACR", "DACR"}, + {"DACR Mux", "DACL", "DACL"}, + {"DACR Mux", "DACR", "DACR"}, + + {"Output DACL", NULL, "DACL Mux"}, + {"Output DACR", NULL, "DACR Mux"}, + + {"ClassD", NULL, "Output DACL"}, + {"ClassD", NULL, "Output DACR"}, + + {"Left Headphone", "DAC Left Switch", "Output DACL"}, + {"Left Headphone", "DAC Right Switch", "Output DACR"}, + {"Right Headphone", "DAC Left Switch", "Output DACL"}, + {"Right Headphone", "DAC Right Switch", "Output DACR"}, + + {"Charge Pump", NULL, "Left Headphone"}, + {"Charge Pump", NULL, "Right Headphone"}, + {"Output Driver L", NULL, "Charge Pump"}, + {"Output Driver R", NULL, "Charge Pump"}, + {"Main Driver L", NULL, "Output Driver L"}, + {"Main Driver R", NULL, "Output Driver R"}, + {"Class G", NULL, "Main Driver L"}, + {"Class G", NULL, "Main Driver R"}, + {"HP Boost Driver", NULL, "Class G"}, + + {"SPKOUTL", NULL, "ClassD"}, + {"SPKOUTR", NULL, "ClassD"}, + {"HPOL", NULL, "HP Boost Driver"}, + {"HPOR", NULL, "HP Boost Driver"}, +}; + +static bool nau8824_is_jack_inserted(struct nau8824 *nau8824) +{ + struct snd_soc_jack *jack = nau8824->jack; + bool insert = FALSE; + + if (nau8824->irq && jack) + insert = jack->status & SND_JACK_HEADPHONE; + + return insert; +} + +static void nau8824_int_status_clear_all(struct regmap *regmap) +{ + int active_irq, clear_irq, i; + + /* Reset the intrruption status from rightmost bit if the corres- + * ponding irq event occurs. + */ + regmap_read(regmap, NAU8824_REG_IRQ, &active_irq); + for (i = 0; i < NAU8824_REG_DATA_LEN; i++) { + clear_irq = (0x1 << i); + if (active_irq & clear_irq) + regmap_write(regmap, + NAU8824_REG_CLEAR_INT_REG, clear_irq); + } +} + +static void nau8824_eject_jack(struct nau8824 *nau8824) +{ + struct snd_soc_dapm_context *dapm = nau8824->dapm; + struct regmap *regmap = nau8824->regmap; + + /* Clear all interruption status */ + nau8824_int_status_clear_all(regmap); + + snd_soc_dapm_disable_pin(dapm, "SAR"); + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + snd_soc_dapm_sync(dapm); + + /* Enable the insertion interruption, disable the ejection + * interruption, and then bypass de-bounce circuit. + */ + regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING, + NAU8824_IRQ_KEY_RELEASE_DIS | NAU8824_IRQ_KEY_SHORT_PRESS_DIS | + NAU8824_IRQ_EJECT_DIS | NAU8824_IRQ_INSERT_DIS, + NAU8824_IRQ_KEY_RELEASE_DIS | NAU8824_IRQ_KEY_SHORT_PRESS_DIS | + NAU8824_IRQ_EJECT_DIS); + regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING_1, + NAU8824_IRQ_INSERT_EN | NAU8824_IRQ_EJECT_EN, + NAU8824_IRQ_INSERT_EN); + regmap_update_bits(regmap, NAU8824_REG_ENA_CTRL, + NAU8824_JD_SLEEP_MODE, NAU8824_JD_SLEEP_MODE); + + /* Close clock for jack type detection at manual mode */ + nau8824_config_sysclk(nau8824, NAU8824_CLK_DIS, 0); +} + +static void nau8824_jdet_work(struct work_struct *work) +{ + struct nau8824 *nau8824 = container_of( + work, struct nau8824, jdet_work); + struct snd_soc_dapm_context *dapm = nau8824->dapm; + struct regmap *regmap = nau8824->regmap; + int adc_value, event = 0, event_mask = 0; + + snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); + snd_soc_dapm_force_enable_pin(dapm, "SAR"); + snd_soc_dapm_sync(dapm); + + msleep(100); + + regmap_read(regmap, NAU8824_REG_SAR_ADC_DATA_OUT, &adc_value); + adc_value = adc_value & NAU8824_SAR_ADC_DATA_MASK; + dev_dbg(nau8824->dev, "SAR ADC data 0x%02x\n", adc_value); + if (adc_value < HEADSET_SARADC_THD) { + event |= SND_JACK_HEADPHONE; + + snd_soc_dapm_disable_pin(dapm, "SAR"); + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + snd_soc_dapm_sync(dapm); + } else { + event |= SND_JACK_HEADSET; + } + event_mask |= SND_JACK_HEADSET; + snd_soc_jack_report(nau8824->jack, event, event_mask); + + nau8824_sema_release(nau8824); +} + +static void nau8824_setup_auto_irq(struct nau8824 *nau8824) +{ + struct regmap *regmap = nau8824->regmap; + + /* Enable jack ejection, short key press and release interruption. */ + regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING_1, + NAU8824_IRQ_INSERT_EN | NAU8824_IRQ_EJECT_EN, + NAU8824_IRQ_EJECT_EN); + regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING, + NAU8824_IRQ_EJECT_DIS | NAU8824_IRQ_KEY_RELEASE_DIS | + NAU8824_IRQ_KEY_SHORT_PRESS_DIS, 0); + /* Enable internal VCO needed for interruptions */ + nau8824_config_sysclk(nau8824, NAU8824_CLK_INTERNAL, 0); + regmap_update_bits(regmap, NAU8824_REG_ENA_CTRL, + NAU8824_JD_SLEEP_MODE, 0); +} + +static int nau8824_button_decode(int value) +{ + int buttons = 0; + + /* The chip supports up to 8 buttons, but ALSA defines + * only 6 buttons. + */ + if (value & BIT(0)) + buttons |= SND_JACK_BTN_0; + if (value & BIT(1)) + buttons |= SND_JACK_BTN_1; + if (value & BIT(2)) + buttons |= SND_JACK_BTN_2; + if (value & BIT(3)) + buttons |= SND_JACK_BTN_3; + if (value & BIT(4)) + buttons |= SND_JACK_BTN_4; + if (value & BIT(5)) + buttons |= SND_JACK_BTN_5; + + return buttons; +} + +#define NAU8824_BUTTONS (SND_JACK_BTN_0 | SND_JACK_BTN_1 | \ + SND_JACK_BTN_2 | SND_JACK_BTN_3) + +static irqreturn_t nau8824_interrupt(int irq, void *data) +{ + struct nau8824 *nau8824 = (struct nau8824 *)data; + struct regmap *regmap = nau8824->regmap; + int active_irq, clear_irq = 0, event = 0, event_mask = 0; + + if (regmap_read(regmap, NAU8824_REG_IRQ, &active_irq)) { + dev_err(nau8824->dev, "failed to read irq status\n"); + return IRQ_NONE; + } + dev_dbg(nau8824->dev, "IRQ %x\n", active_irq); + + if (active_irq & NAU8824_JACK_EJECTION_DETECTED) { + nau8824_eject_jack(nau8824); + event_mask |= SND_JACK_HEADSET; + clear_irq = NAU8824_JACK_EJECTION_DETECTED; + /* release semaphore held after resume, + * and cancel jack detection + */ + nau8824_sema_release(nau8824); + cancel_work_sync(&nau8824->jdet_work); + } else if (active_irq & NAU8824_KEY_SHORT_PRESS_IRQ) { + int key_status, button_pressed; + + regmap_read(regmap, NAU8824_REG_CLEAR_INT_REG, + &key_status); + + /* lower 8 bits of the register are for pressed keys */ + button_pressed = nau8824_button_decode(key_status); + + event |= button_pressed; + dev_dbg(nau8824->dev, "button %x pressed\n", event); + event_mask |= NAU8824_BUTTONS; + clear_irq = NAU8824_KEY_SHORT_PRESS_IRQ; + } else if (active_irq & NAU8824_KEY_RELEASE_IRQ) { + event_mask = NAU8824_BUTTONS; + clear_irq = NAU8824_KEY_RELEASE_IRQ; + } else if (active_irq & NAU8824_JACK_INSERTION_DETECTED) { + /* Turn off insertion interruption at manual mode */ + regmap_update_bits(regmap, + NAU8824_REG_INTERRUPT_SETTING, + NAU8824_IRQ_INSERT_DIS, + NAU8824_IRQ_INSERT_DIS); + regmap_update_bits(regmap, + NAU8824_REG_INTERRUPT_SETTING_1, + NAU8824_IRQ_INSERT_EN, 0); + /* detect microphone and jack type */ + cancel_work_sync(&nau8824->jdet_work); + schedule_work(&nau8824->jdet_work); + + /* Enable interruption for jack type detection at audo + * mode which can detect microphone and jack type. + */ + nau8824_setup_auto_irq(nau8824); + } + + if (!clear_irq) + clear_irq = active_irq; + /* clears the rightmost interruption */ + regmap_write(regmap, NAU8824_REG_CLEAR_INT_REG, clear_irq); + + if (event_mask) + snd_soc_jack_report(nau8824->jack, event, event_mask); + + return IRQ_HANDLED; +} + +static int nau8824_clock_check(struct nau8824 *nau8824, + int stream, int rate, int osr) +{ + int osrate; + + if (stream == SNDRV_PCM_STREAM_PLAYBACK) { + if (osr >= ARRAY_SIZE(osr_dac_sel)) + return -EINVAL; + osrate = osr_dac_sel[osr].osr; + } else { + if (osr >= ARRAY_SIZE(osr_adc_sel)) + return -EINVAL; + osrate = osr_adc_sel[osr].osr; + } + + if (!osrate || rate * osr > CLK_DA_AD_MAX) { + dev_err(nau8824->dev, "exceed the maximum frequency of CLK_ADC or CLK_DAC\n"); + return -EINVAL; + } + + return 0; +} + +static int nau8824_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + unsigned int val_len = 0, osr, ctrl_val, bclk_fs, bclk_div; + + nau8824_sema_acquire(nau8824, HZ); + + /* CLK_DAC or CLK_ADC = OSR * FS + * DAC or ADC clock frequency is defined as Over Sampling Rate (OSR) + * multiplied by the audio sample rate (Fs). Note that the OSR and Fs + * values must be selected such that the maximum frequency is less + * than 6.144 MHz. + */ + nau8824->fs = params_rate(params); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + regmap_read(nau8824->regmap, + NAU8824_REG_DAC_FILTER_CTRL_1, &osr); + osr &= NAU8824_DAC_OVERSAMPLE_MASK; + if (nau8824_clock_check(nau8824, substream->stream, + nau8824->fs, osr)) + return -EINVAL; + regmap_update_bits(nau8824->regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_DAC_SRC_MASK, + osr_dac_sel[osr].clk_src << NAU8824_CLK_DAC_SRC_SFT); + } else { + regmap_read(nau8824->regmap, + NAU8824_REG_ADC_FILTER_CTRL, &osr); + osr &= NAU8824_ADC_SYNC_DOWN_MASK; + if (nau8824_clock_check(nau8824, substream->stream, + nau8824->fs, osr)) + return -EINVAL; + regmap_update_bits(nau8824->regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_ADC_SRC_MASK, + osr_adc_sel[osr].clk_src << NAU8824_CLK_ADC_SRC_SFT); + } + + /* make BCLK and LRC divde configuration if the codec as master. */ + regmap_read(nau8824->regmap, + NAU8824_REG_PORT0_I2S_PCM_CTRL_2, &ctrl_val); + if (ctrl_val & NAU8824_I2S_MS_MASTER) { + /* get the bclk and fs ratio */ + bclk_fs = snd_soc_params_to_bclk(params) / nau8824->fs; + if (bclk_fs <= 32) + bclk_div = 0x3; + else if (bclk_fs <= 64) + bclk_div = 0x2; + else if (bclk_fs <= 128) + bclk_div = 0x1; + else if (bclk_fs <= 256) + bclk_div = 0; + else + return -EINVAL; + regmap_update_bits(nau8824->regmap, + NAU8824_REG_PORT0_I2S_PCM_CTRL_2, + NAU8824_I2S_LRC_DIV_MASK | NAU8824_I2S_BLK_DIV_MASK, + (bclk_div << NAU8824_I2S_LRC_DIV_SFT) | bclk_div); + } + + switch (params_width(params)) { + case 16: + val_len |= NAU8824_I2S_DL_16; + break; + case 20: + val_len |= NAU8824_I2S_DL_20; + break; + case 24: + val_len |= NAU8824_I2S_DL_24; + break; + case 32: + val_len |= NAU8824_I2S_DL_32; + break; + default: + return -EINVAL; + } + + regmap_update_bits(nau8824->regmap, NAU8824_REG_PORT0_I2S_PCM_CTRL_1, + NAU8824_I2S_DL_MASK, val_len); + + nau8824_sema_release(nau8824); + + return 0; +} + +static int nau8824_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) +{ + struct snd_soc_codec *codec = dai->codec; + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + unsigned int ctrl1_val = 0, ctrl2_val = 0; + + nau8824_sema_acquire(nau8824, HZ); + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + ctrl2_val |= NAU8824_I2S_MS_MASTER; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + ctrl1_val |= NAU8824_I2S_BP_INV; + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + ctrl1_val |= NAU8824_I2S_DF_I2S; + break; + case SND_SOC_DAIFMT_LEFT_J: + ctrl1_val |= NAU8824_I2S_DF_LEFT; + break; + case SND_SOC_DAIFMT_RIGHT_J: + ctrl1_val |= NAU8824_I2S_DF_RIGTH; + break; + case SND_SOC_DAIFMT_DSP_A: + ctrl1_val |= NAU8824_I2S_DF_PCM_AB; + break; + case SND_SOC_DAIFMT_DSP_B: + ctrl1_val |= NAU8824_I2S_DF_PCM_AB; + ctrl1_val |= NAU8824_I2S_PCMB_EN; + break; + default: + return -EINVAL; + } + + regmap_update_bits(nau8824->regmap, NAU8824_REG_PORT0_I2S_PCM_CTRL_1, + NAU8824_I2S_DF_MASK | NAU8824_I2S_BP_MASK | + NAU8824_I2S_PCMB_EN, ctrl1_val); + regmap_update_bits(nau8824->regmap, NAU8824_REG_PORT0_I2S_PCM_CTRL_2, + NAU8824_I2S_MS_MASK, ctrl2_val); + + nau8824_sema_release(nau8824); + + return 0; +} + +/** + * nau8824_calc_fll_param - Calculate FLL parameters. + * @fll_in: external clock provided to codec. + * @fs: sampling rate. + * @fll_param: Pointer to structure of FLL parameters. + * + * Calculate FLL parameters to configure codec. + * + * Returns 0 for success or negative error code. + */ +static int nau8824_calc_fll_param(unsigned int fll_in, + unsigned int fs, struct nau8824_fll *fll_param) +{ + u64 fvco, fvco_max; + unsigned int fref, i, fvco_sel; + + /* Ensure the reference clock frequency (FREF) is <= 13.5MHz by dividing + * freq_in by 1, 2, 4, or 8 using FLL pre-scalar. + * FREF = freq_in / NAU8824_FLL_REF_DIV_MASK + */ + for (i = 0; i < ARRAY_SIZE(fll_pre_scalar); i++) { + fref = fll_in / fll_pre_scalar[i].param; + if (fref <= NAU_FREF_MAX) + break; + } + if (i == ARRAY_SIZE(fll_pre_scalar)) + return -EINVAL; + fll_param->clk_ref_div = fll_pre_scalar[i].val; + + /* Choose the FLL ratio based on FREF */ + for (i = 0; i < ARRAY_SIZE(fll_ratio); i++) { + if (fref >= fll_ratio[i].param) + break; + } + if (i == ARRAY_SIZE(fll_ratio)) + return -EINVAL; + fll_param->ratio = fll_ratio[i].val; + + /* Calculate the frequency of DCO (FDCO) given freq_out = 256 * Fs. + * FDCO must be within the 90MHz - 124MHz or the FFL cannot be + * guaranteed across the full range of operation. + * FDCO = freq_out * 2 * mclk_src_scaling + */ + fvco_max = 0; + fvco_sel = ARRAY_SIZE(mclk_src_scaling); + for (i = 0; i < ARRAY_SIZE(mclk_src_scaling); i++) { + fvco = 256 * fs * 2 * mclk_src_scaling[i].param; + if (fvco > NAU_FVCO_MIN && fvco < NAU_FVCO_MAX && + fvco_max < fvco) { + fvco_max = fvco; + fvco_sel = i; + } + } + if (ARRAY_SIZE(mclk_src_scaling) == fvco_sel) + return -EINVAL; + fll_param->mclk_src = mclk_src_scaling[fvco_sel].val; + + /* Calculate the FLL 10-bit integer input and the FLL 16-bit fractional + * input based on FDCO, FREF and FLL ratio. + */ + fvco = div_u64(fvco_max << 16, fref * fll_param->ratio); + fll_param->fll_int = (fvco >> 16) & 0x3FF; + fll_param->fll_frac = fvco & 0xFFFF; + return 0; +} + +static void nau8824_fll_apply(struct regmap *regmap, + struct nau8824_fll *fll_param) +{ + regmap_update_bits(regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_SRC_MASK | NAU8824_CLK_MCLK_SRC_MASK, + NAU8824_CLK_SRC_MCLK | fll_param->mclk_src); + regmap_update_bits(regmap, NAU8824_REG_FLL1, + NAU8824_FLL_RATIO_MASK, fll_param->ratio); + /* FLL 16-bit fractional input */ + regmap_write(regmap, NAU8824_REG_FLL2, fll_param->fll_frac); + /* FLL 10-bit integer input */ + regmap_update_bits(regmap, NAU8824_REG_FLL3, + NAU8824_FLL_INTEGER_MASK, fll_param->fll_int); + /* FLL pre-scaler */ + regmap_update_bits(regmap, NAU8824_REG_FLL4, + NAU8824_FLL_REF_DIV_MASK, + fll_param->clk_ref_div << NAU8824_FLL_REF_DIV_SFT); + /* select divided VCO input */ + regmap_update_bits(regmap, NAU8824_REG_FLL5, + NAU8824_FLL_CLK_SW_MASK, NAU8824_FLL_CLK_SW_REF); + /* Disable free-running mode */ + regmap_update_bits(regmap, + NAU8824_REG_FLL6, NAU8824_DCO_EN, 0); + if (fll_param->fll_frac) { + regmap_update_bits(regmap, NAU8824_REG_FLL5, + NAU8824_FLL_PDB_DAC_EN | NAU8824_FLL_LOOP_FTR_EN | + NAU8824_FLL_FTR_SW_MASK, + NAU8824_FLL_PDB_DAC_EN | NAU8824_FLL_LOOP_FTR_EN | + NAU8824_FLL_FTR_SW_FILTER); + regmap_update_bits(regmap, NAU8824_REG_FLL6, + NAU8824_SDM_EN, NAU8824_SDM_EN); + } else { + regmap_update_bits(regmap, NAU8824_REG_FLL5, + NAU8824_FLL_PDB_DAC_EN | NAU8824_FLL_LOOP_FTR_EN | + NAU8824_FLL_FTR_SW_MASK, NAU8824_FLL_FTR_SW_ACCU); + regmap_update_bits(regmap, + NAU8824_REG_FLL6, NAU8824_SDM_EN, 0); + } +} + +/* freq_out must be 256*Fs in order to achieve the best performance */ +static int nau8824_set_pll(struct snd_soc_codec *codec, int pll_id, int source, + unsigned int freq_in, unsigned int freq_out) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + struct nau8824_fll fll_param; + int ret, fs; + + fs = freq_out / 256; + ret = nau8824_calc_fll_param(freq_in, fs, &fll_param); + if (ret < 0) { + dev_err(nau8824->dev, "Unsupported input clock %d\n", freq_in); + return ret; + } + dev_dbg(nau8824->dev, "mclk_src=%x ratio=%x fll_frac=%x fll_int=%x clk_ref_div=%x\n", + fll_param.mclk_src, fll_param.ratio, fll_param.fll_frac, + fll_param.fll_int, fll_param.clk_ref_div); + + nau8824_fll_apply(nau8824->regmap, &fll_param); + mdelay(2); + regmap_update_bits(nau8824->regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_SRC_MASK, NAU8824_CLK_SRC_VCO); + + return 0; +} + +static int nau8824_config_sysclk(struct nau8824 *nau8824, + int clk_id, unsigned int freq) +{ + struct regmap *regmap = nau8824->regmap; + + switch (clk_id) { + case NAU8824_CLK_DIS: + regmap_update_bits(regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_SRC_MASK, NAU8824_CLK_SRC_MCLK); + regmap_update_bits(regmap, NAU8824_REG_FLL6, + NAU8824_DCO_EN, 0); + break; + + case NAU8824_CLK_MCLK: + nau8824_sema_acquire(nau8824, HZ); + regmap_update_bits(regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_SRC_MASK, NAU8824_CLK_SRC_MCLK); + regmap_update_bits(regmap, NAU8824_REG_FLL6, + NAU8824_DCO_EN, 0); + nau8824_sema_release(nau8824); + break; + + case NAU8824_CLK_INTERNAL: + regmap_update_bits(regmap, NAU8824_REG_FLL6, + NAU8824_DCO_EN, NAU8824_DCO_EN); + regmap_update_bits(regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_SRC_MASK, NAU8824_CLK_SRC_VCO); + break; + + case NAU8824_CLK_FLL_MCLK: + nau8824_sema_acquire(nau8824, HZ); + regmap_update_bits(regmap, NAU8824_REG_FLL3, + NAU8824_FLL_CLK_SRC_MASK, NAU8824_FLL_CLK_SRC_MCLK); + nau8824_sema_release(nau8824); + break; + + case NAU8824_CLK_FLL_BLK: + nau8824_sema_acquire(nau8824, HZ); + regmap_update_bits(regmap, NAU8824_REG_FLL3, + NAU8824_FLL_CLK_SRC_MASK, NAU8824_FLL_CLK_SRC_BLK); + nau8824_sema_release(nau8824); + break; + + case NAU8824_CLK_FLL_FS: + nau8824_sema_acquire(nau8824, HZ); + regmap_update_bits(regmap, NAU8824_REG_FLL3, + NAU8824_FLL_CLK_SRC_MASK, NAU8824_FLL_CLK_SRC_FS); + nau8824_sema_release(nau8824); + break; + + default: + dev_err(nau8824->dev, "Invalid clock id (%d)\n", clk_id); + return -EINVAL; + } + + dev_dbg(nau8824->dev, "Sysclk is %dHz and clock id is %d\n", freq, + clk_id); + + return 0; +} + +static int nau8824_set_sysclk(struct snd_soc_codec *codec, + int clk_id, int source, unsigned int freq, int dir) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + return nau8824_config_sysclk(nau8824, clk_id, freq); +} + +static void nau8824_resume_setup(struct nau8824 *nau8824) +{ + nau8824_config_sysclk(nau8824, NAU8824_CLK_DIS, 0); + if (nau8824->irq) { + /* Clear all interruption status */ + nau8824_int_status_clear_all(nau8824->regmap); + /* Enable jack detection at sleep mode, insertion detection, + * and ejection detection. + */ + regmap_update_bits(nau8824->regmap, NAU8824_REG_ENA_CTRL, + NAU8824_JD_SLEEP_MODE, NAU8824_JD_SLEEP_MODE); + regmap_update_bits(nau8824->regmap, + NAU8824_REG_INTERRUPT_SETTING_1, + NAU8824_IRQ_EJECT_EN | NAU8824_IRQ_INSERT_EN, + NAU8824_IRQ_EJECT_EN | NAU8824_IRQ_INSERT_EN); + regmap_update_bits(nau8824->regmap, + NAU8824_REG_INTERRUPT_SETTING, + NAU8824_IRQ_EJECT_DIS | NAU8824_IRQ_INSERT_DIS, 0); + } +} + +static int nau8824_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + switch (level) { + case SND_SOC_BIAS_ON: + break; + + case SND_SOC_BIAS_PREPARE: + break; + + case SND_SOC_BIAS_STANDBY: + if (snd_soc_codec_get_bias_level(codec) == SND_SOC_BIAS_OFF) { + /* Setup codec configuration after resume */ + nau8824_resume_setup(nau8824); + } + break; + + case SND_SOC_BIAS_OFF: + regmap_update_bits(nau8824->regmap, + NAU8824_REG_INTERRUPT_SETTING, 0x3ff, 0x3ff); + regmap_update_bits(nau8824->regmap, + NAU8824_REG_INTERRUPT_SETTING_1, + NAU8824_IRQ_EJECT_EN | NAU8824_IRQ_INSERT_EN, 0); + break; + } + + return 0; +} + +static int nau8824_codec_probe(struct snd_soc_codec *codec) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec); + + nau8824->dapm = dapm; + + return 0; +} + +static int __maybe_unused nau8824_suspend(struct snd_soc_codec *codec) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + if (nau8824->irq) { + disable_irq(nau8824->irq); + snd_soc_codec_force_bias_level(codec, SND_SOC_BIAS_OFF); + } + regcache_cache_only(nau8824->regmap, true); + regcache_mark_dirty(nau8824->regmap); + + return 0; +} + +static int __maybe_unused nau8824_resume(struct snd_soc_codec *codec) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + + regcache_cache_only(nau8824->regmap, false); + regcache_sync(nau8824->regmap); + if (nau8824->irq) { + /* Hold semaphore to postpone playback happening + * until jack detection done. + */ + nau8824_sema_acquire(nau8824, 0); + enable_irq(nau8824->irq); + } + + return 0; +} + +static struct snd_soc_codec_driver nau8824_codec_driver = { + .probe = nau8824_codec_probe, + .set_sysclk = nau8824_set_sysclk, + .set_pll = nau8824_set_pll, + .set_bias_level = nau8824_set_bias_level, + .suspend = nau8824_suspend, + .resume = nau8824_resume, + .suspend_bias_off = true, + + .component_driver = { + .controls = nau8824_snd_controls, + .num_controls = ARRAY_SIZE(nau8824_snd_controls), + .dapm_widgets = nau8824_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(nau8824_dapm_widgets), + .dapm_routes = nau8824_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(nau8824_dapm_routes), + }, +}; + +static const struct snd_soc_dai_ops nau8824_dai_ops = { + .hw_params = nau8824_hw_params, + .set_fmt = nau8824_set_fmt, +}; + +#define NAU8824_RATES SNDRV_PCM_RATE_8000_192000 +#define NAU8824_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE \ + | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S32_LE) + +static struct snd_soc_dai_driver nau8824_dai = { + .name = NAU8824_CODEC_DAI, + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = NAU8824_RATES, + .formats = NAU8824_FORMATS, + }, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = NAU8824_RATES, + .formats = NAU8824_FORMATS, + }, + .ops = &nau8824_dai_ops, +}; + +static const struct regmap_config nau8824_regmap_config = { + .val_bits = NAU8824_REG_ADDR_LEN, + .reg_bits = NAU8824_REG_DATA_LEN, + + .max_register = NAU8824_REG_MAX, + .readable_reg = nau8824_readable_reg, + .writeable_reg = nau8824_writeable_reg, + .volatile_reg = nau8824_volatile_reg, + + .cache_type = REGCACHE_RBTREE, + .reg_defaults = nau8824_reg_defaults, + .num_reg_defaults = ARRAY_SIZE(nau8824_reg_defaults), +}; + +/** + * nau8824_enable_jack_detect - Specify a jack for event reporting + * + * @component: component to register the jack with + * @jack: jack to use to report headset and button events on + * + * After this function has been called the headset insert/remove and button + * events will be routed to the given jack. Jack can be null to stop + * reporting. + */ +int nau8824_enable_jack_detect(struct snd_soc_codec *codec, + struct snd_soc_jack *jack) +{ + struct nau8824 *nau8824 = snd_soc_codec_get_drvdata(codec); + int ret; + + nau8824->jack = jack; + /* Initiate jack detection work queue */ + INIT_WORK(&nau8824->jdet_work, nau8824_jdet_work); + ret = devm_request_threaded_irq(nau8824->dev, nau8824->irq, NULL, + nau8824_interrupt, IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "nau8824", nau8824); + if (ret) { + dev_err(nau8824->dev, "Cannot request irq %d (%d)\n", + nau8824->irq, ret); + } + + return ret; +} +EXPORT_SYMBOL_GPL(nau8824_enable_jack_detect); + +static void nau8824_reset_chip(struct regmap *regmap) +{ + regmap_write(regmap, NAU8824_REG_RESET, 0x00); + regmap_write(regmap, NAU8824_REG_RESET, 0x00); +} + +static void nau8824_setup_buttons(struct nau8824 *nau8824) +{ + struct regmap *regmap = nau8824->regmap; + + regmap_update_bits(regmap, NAU8824_REG_SAR_ADC, + NAU8824_SAR_TRACKING_GAIN_MASK, + nau8824->sar_voltage << NAU8824_SAR_TRACKING_GAIN_SFT); + regmap_update_bits(regmap, NAU8824_REG_SAR_ADC, + NAU8824_SAR_COMPARE_TIME_MASK, + nau8824->sar_compare_time << NAU8824_SAR_COMPARE_TIME_SFT); + regmap_update_bits(regmap, NAU8824_REG_SAR_ADC, + NAU8824_SAR_SAMPLING_TIME_MASK, + nau8824->sar_sampling_time << NAU8824_SAR_SAMPLING_TIME_SFT); + + regmap_update_bits(regmap, NAU8824_REG_VDET_COEFFICIENT, + NAU8824_LEVELS_NR_MASK, + (nau8824->sar_threshold_num - 1) << NAU8824_LEVELS_NR_SFT); + regmap_update_bits(regmap, NAU8824_REG_VDET_COEFFICIENT, + NAU8824_HYSTERESIS_MASK, + nau8824->sar_hysteresis << NAU8824_HYSTERESIS_SFT); + regmap_update_bits(regmap, NAU8824_REG_VDET_COEFFICIENT, + NAU8824_SHORTKEY_DEBOUNCE_MASK, + nau8824->key_debounce << NAU8824_SHORTKEY_DEBOUNCE_SFT); + + regmap_write(regmap, NAU8824_REG_VDET_THRESHOLD_1, + (nau8824->sar_threshold[0] << 8) | nau8824->sar_threshold[1]); + regmap_write(regmap, NAU8824_REG_VDET_THRESHOLD_2, + (nau8824->sar_threshold[2] << 8) | nau8824->sar_threshold[3]); + regmap_write(regmap, NAU8824_REG_VDET_THRESHOLD_3, + (nau8824->sar_threshold[4] << 8) | nau8824->sar_threshold[5]); + regmap_write(regmap, NAU8824_REG_VDET_THRESHOLD_4, + (nau8824->sar_threshold[6] << 8) | nau8824->sar_threshold[7]); +} + +static void nau8824_init_regs(struct nau8824 *nau8824) +{ + struct regmap *regmap = nau8824->regmap; + + /* Enable Bias/VMID/VMID Tieoff */ + regmap_update_bits(regmap, NAU8824_REG_BIAS_ADJ, + NAU8824_VMID | NAU8824_VMID_SEL_MASK, NAU8824_VMID | + (nau8824->vref_impedance << NAU8824_VMID_SEL_SFT)); + regmap_update_bits(regmap, NAU8824_REG_BOOST, + NAU8824_GLOBAL_BIAS_EN, NAU8824_GLOBAL_BIAS_EN); + mdelay(2); + regmap_update_bits(regmap, NAU8824_REG_MIC_BIAS, + NAU8824_MICBIAS_VOLTAGE_MASK, nau8824->micbias_voltage); + /* Disable Boost Driver, Automatic Short circuit protection enable */ + regmap_update_bits(regmap, NAU8824_REG_BOOST, + NAU8824_PRECHARGE_DIS | NAU8824_HP_BOOST_DIS | + NAU8824_HP_BOOST_G_DIS | NAU8824_SHORT_SHUTDOWN_EN, + NAU8824_PRECHARGE_DIS | NAU8824_HP_BOOST_DIS | + NAU8824_HP_BOOST_G_DIS | NAU8824_SHORT_SHUTDOWN_EN); + /* Scaling for ADC and DAC clock */ + regmap_update_bits(regmap, NAU8824_REG_CLK_DIVIDER, + NAU8824_CLK_ADC_SRC_MASK | NAU8824_CLK_DAC_SRC_MASK, + (0x1 << NAU8824_CLK_ADC_SRC_SFT) | + (0x1 << NAU8824_CLK_DAC_SRC_SFT)); + regmap_update_bits(regmap, NAU8824_REG_DAC_MUTE_CTRL, + NAU8824_DAC_ZC_EN, NAU8824_DAC_ZC_EN); + regmap_update_bits(regmap, NAU8824_REG_ENA_CTRL, + NAU8824_DAC_CH1_EN | NAU8824_DAC_CH0_EN | + NAU8824_ADC_CH0_EN | NAU8824_ADC_CH1_EN | + NAU8824_ADC_CH2_EN | NAU8824_ADC_CH3_EN, + NAU8824_DAC_CH1_EN | NAU8824_DAC_CH0_EN | + NAU8824_ADC_CH0_EN | NAU8824_ADC_CH1_EN | + NAU8824_ADC_CH2_EN | NAU8824_ADC_CH3_EN); + regmap_update_bits(regmap, NAU8824_REG_CLK_GATING_ENA, + NAU8824_CLK_ADC_CH23_EN | NAU8824_CLK_ADC_CH01_EN | + NAU8824_CLK_DAC_CH1_EN | NAU8824_CLK_DAC_CH0_EN | + NAU8824_CLK_I2S_EN | NAU8824_CLK_GAIN_EN | + NAU8824_CLK_SAR_EN | NAU8824_CLK_DMIC_CH23_EN, + NAU8824_CLK_ADC_CH23_EN | NAU8824_CLK_ADC_CH01_EN | + NAU8824_CLK_DAC_CH1_EN | NAU8824_CLK_DAC_CH0_EN | + NAU8824_CLK_I2S_EN | NAU8824_CLK_GAIN_EN | + NAU8824_CLK_SAR_EN | NAU8824_CLK_DMIC_CH23_EN); + /* Class G timer 64ms */ + regmap_update_bits(regmap, NAU8824_REG_CLASSG, + NAU8824_CLASSG_TIMER_MASK, + 0x20 << NAU8824_CLASSG_TIMER_SFT); + regmap_update_bits(regmap, NAU8824_REG_TRIM_SETTINGS, + NAU8824_DRV_CURR_INC, NAU8824_DRV_CURR_INC); + /* Disable DACR/L power */ + regmap_update_bits(regmap, NAU8824_REG_CHARGE_PUMP_CONTROL, + NAU8824_SPKR_PULL_DOWN | NAU8824_SPKL_PULL_DOWN | + NAU8824_POWER_DOWN_DACR | NAU8824_POWER_DOWN_DACL, + NAU8824_SPKR_PULL_DOWN | NAU8824_SPKL_PULL_DOWN | + NAU8824_POWER_DOWN_DACR | NAU8824_POWER_DOWN_DACL); + /* Enable TESTDAC. This sets the analog DAC inputs to a '0' input + * signal to avoid any glitches due to power up transients in both + * the analog and digital DAC circuit. + */ + regmap_update_bits(regmap, NAU8824_REG_ENABLE_LO, + NAU8824_TEST_DAC_EN, NAU8824_TEST_DAC_EN); + /* Config L/R channel */ + regmap_update_bits(regmap, NAU8824_REG_DAC_CH0_DGAIN_CTRL, + NAU8824_DAC_CH0_SEL_MASK, NAU8824_DAC_CH0_SEL_I2S0); + regmap_update_bits(regmap, NAU8824_REG_DAC_CH1_DGAIN_CTRL, + NAU8824_DAC_CH1_SEL_MASK, NAU8824_DAC_CH1_SEL_I2S1); + regmap_update_bits(regmap, NAU8824_REG_ENABLE_LO, + NAU8824_DACR_HPR_EN | NAU8824_DACL_HPL_EN, + NAU8824_DACR_HPR_EN | NAU8824_DACL_HPL_EN); + /* Default oversampling/decimations settings are unusable + * (audible hiss). Set it to something better. + */ + regmap_update_bits(regmap, NAU8824_REG_ADC_FILTER_CTRL, + NAU8824_ADC_SYNC_DOWN_MASK, NAU8824_ADC_SYNC_DOWN_64); + regmap_update_bits(regmap, NAU8824_REG_DAC_FILTER_CTRL_1, + NAU8824_DAC_CICCLP_OFF | NAU8824_DAC_OVERSAMPLE_MASK, + NAU8824_DAC_CICCLP_OFF | NAU8824_DAC_OVERSAMPLE_64); + /* DAC clock delay 2ns, VREF */ + regmap_update_bits(regmap, NAU8824_REG_RDAC, + NAU8824_RDAC_CLK_DELAY_MASK | NAU8824_RDAC_VREF_MASK, + (0x2 << NAU8824_RDAC_CLK_DELAY_SFT) | + (0x3 << NAU8824_RDAC_VREF_SFT)); + /* PGA input mode selection */ + regmap_update_bits(regmap, NAU8824_REG_FEPGA, + NAU8824_FEPGA_MODEL_SHORT_EN | NAU8824_FEPGA_MODER_SHORT_EN, + NAU8824_FEPGA_MODEL_SHORT_EN | NAU8824_FEPGA_MODER_SHORT_EN); + /* Digital microphone control */ + regmap_update_bits(regmap, NAU8824_REG_ANALOG_CONTROL_1, + NAU8824_DMIC_CLK_DRV_STRG | NAU8824_DMIC_CLK_SLEW_FAST, + NAU8824_DMIC_CLK_DRV_STRG | NAU8824_DMIC_CLK_SLEW_FAST); + regmap_update_bits(regmap, NAU8824_REG_JACK_DET_CTRL, + NAU8824_JACK_LOGIC, + /* jkdet_polarity - 1 is for active-low */ + nau8824->jkdet_polarity ? 0 : NAU8824_JACK_LOGIC); + regmap_update_bits(regmap, + NAU8824_REG_JACK_DET_CTRL, NAU8824_JACK_EJECT_DT_MASK, + (nau8824->jack_eject_debounce << NAU8824_JACK_EJECT_DT_SFT)); + if (nau8824->sar_threshold_num) + nau8824_setup_buttons(nau8824); +} + +static int nau8824_setup_irq(struct nau8824 *nau8824) +{ + /* Disable interruption before codec initiation done */ + regmap_update_bits(nau8824->regmap, NAU8824_REG_ENA_CTRL, + NAU8824_JD_SLEEP_MODE, NAU8824_JD_SLEEP_MODE); + regmap_update_bits(nau8824->regmap, + NAU8824_REG_INTERRUPT_SETTING, 0x3ff, 0x3ff); + regmap_update_bits(nau8824->regmap, NAU8824_REG_INTERRUPT_SETTING_1, + NAU8824_IRQ_EJECT_EN | NAU8824_IRQ_INSERT_EN, 0); + + return 0; +} + +static void nau8824_print_device_properties(struct nau8824 *nau8824) +{ + struct device *dev = nau8824->dev; + int i; + + dev_dbg(dev, "jkdet-polarity: %d\n", nau8824->jkdet_polarity); + dev_dbg(dev, "micbias-voltage: %d\n", nau8824->micbias_voltage); + dev_dbg(dev, "vref-impedance: %d\n", nau8824->vref_impedance); + + dev_dbg(dev, "sar-threshold-num: %d\n", nau8824->sar_threshold_num); + for (i = 0; i < nau8824->sar_threshold_num; i++) + dev_dbg(dev, "sar-threshold[%d]=%x\n", i, + nau8824->sar_threshold[i]); + + dev_dbg(dev, "sar-hysteresis: %d\n", nau8824->sar_hysteresis); + dev_dbg(dev, "sar-voltage: %d\n", nau8824->sar_voltage); + dev_dbg(dev, "sar-compare-time: %d\n", nau8824->sar_compare_time); + dev_dbg(dev, "sar-sampling-time: %d\n", nau8824->sar_sampling_time); + dev_dbg(dev, "short-key-debounce: %d\n", nau8824->key_debounce); + dev_dbg(dev, "jack-eject-debounce: %d\n", + nau8824->jack_eject_debounce); +} + +static int nau8824_read_device_properties(struct device *dev, + struct nau8824 *nau8824) { + int ret; + + ret = device_property_read_u32(dev, "nuvoton,jkdet-polarity", + &nau8824->jkdet_polarity); + if (ret) + nau8824->jkdet_polarity = 1; + ret = device_property_read_u32(dev, "nuvoton,micbias-voltage", + &nau8824->micbias_voltage); + if (ret) + nau8824->micbias_voltage = 6; + ret = device_property_read_u32(dev, "nuvoton,vref-impedance", + &nau8824->vref_impedance); + if (ret) + nau8824->vref_impedance = 2; + ret = device_property_read_u32(dev, "nuvoton,sar-threshold-num", + &nau8824->sar_threshold_num); + if (ret) + nau8824->sar_threshold_num = 4; + ret = device_property_read_u32_array(dev, "nuvoton,sar-threshold", + nau8824->sar_threshold, nau8824->sar_threshold_num); + if (ret) { + nau8824->sar_threshold[0] = 0x0a; + nau8824->sar_threshold[1] = 0x14; + nau8824->sar_threshold[2] = 0x26; + nau8824->sar_threshold[3] = 0x73; + } + ret = device_property_read_u32(dev, "nuvoton,sar-hysteresis", + &nau8824->sar_hysteresis); + if (ret) + nau8824->sar_hysteresis = 0; + ret = device_property_read_u32(dev, "nuvoton,sar-voltage", + &nau8824->sar_voltage); + if (ret) + nau8824->sar_voltage = 6; + ret = device_property_read_u32(dev, "nuvoton,sar-compare-time", + &nau8824->sar_compare_time); + if (ret) + nau8824->sar_compare_time = 1; + ret = device_property_read_u32(dev, "nuvoton,sar-sampling-time", + &nau8824->sar_sampling_time); + if (ret) + nau8824->sar_sampling_time = 1; + ret = device_property_read_u32(dev, "nuvoton,short-key-debounce", + &nau8824->key_debounce); + if (ret) + nau8824->key_debounce = 0; + ret = device_property_read_u32(dev, "nuvoton,jack-eject-debounce", + &nau8824->jack_eject_debounce); + if (ret) + nau8824->jack_eject_debounce = 1; + + return 0; +} + +static int nau8824_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct device *dev = &i2c->dev; + struct nau8824 *nau8824 = dev_get_platdata(dev); + int ret, value; + + if (!nau8824) { + nau8824 = devm_kzalloc(dev, sizeof(*nau8824), GFP_KERNEL); + if (!nau8824) + return -ENOMEM; + ret = nau8824_read_device_properties(dev, nau8824); + if (ret) + return ret; + } + i2c_set_clientdata(i2c, nau8824); + + nau8824->regmap = devm_regmap_init_i2c(i2c, &nau8824_regmap_config); + if (IS_ERR(nau8824->regmap)) + return PTR_ERR(nau8824->regmap); + nau8824->dev = dev; + nau8824->irq = i2c->irq; + sema_init(&nau8824->jd_sem, 1); + + nau8824_print_device_properties(nau8824); + + ret = regmap_read(nau8824->regmap, NAU8824_REG_I2C_DEVICE_ID, &value); + if (ret < 0) { + dev_err(dev, "Failed to read device id from the NAU8824: %d\n", + ret); + return ret; + } + nau8824_reset_chip(nau8824->regmap); + nau8824_init_regs(nau8824); + + if (i2c->irq) + nau8824_setup_irq(nau8824); + + return snd_soc_register_codec(dev, + &nau8824_codec_driver, &nau8824_dai, 1); +} + + +static int nau8824_i2c_remove(struct i2c_client *client) +{ + snd_soc_unregister_codec(&client->dev); + return 0; +} + +static const struct i2c_device_id nau8824_i2c_ids[] = { + { "nau8824", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, nau8824_i2c_ids); + +#ifdef CONFIG_OF +static const struct of_device_id nau8824_of_ids[] = { + { .compatible = "nuvoton,nau8824", }, + {} +}; +MODULE_DEVICE_TABLE(of, nau8824_of_ids); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id nau8824_acpi_match[] = { + { "10508824", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, nau8824_acpi_match); +#endif + +static struct i2c_driver nau8824_i2c_driver = { + .driver = { + .name = "nau8824", + .of_match_table = of_match_ptr(nau8824_of_ids), + .acpi_match_table = ACPI_PTR(nau8824_acpi_match), + }, + .probe = nau8824_i2c_probe, + .remove = nau8824_i2c_remove, + .id_table = nau8824_i2c_ids, +}; +module_i2c_driver(nau8824_i2c_driver); + + +MODULE_DESCRIPTION("ASoC NAU88L24 driver"); +MODULE_AUTHOR("John Hsu "); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/nau8824.h b/sound/soc/codecs/nau8824.h new file mode 100644 index 0000000000000000000000000000000000000000..87ac9a382aede5ab898d1a0861751c9d63e70a48 --- /dev/null +++ b/sound/soc/codecs/nau8824.h @@ -0,0 +1,466 @@ +/* + * NAU88L24 ALSA SoC audio driver + * + * Copyright 2016 Nuvoton Technology Corp. + * Author: John Hsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __NAU8824_H__ +#define __NAU8824_H__ + +#define NAU8824_REG_RESET 0x00 +#define NAU8824_REG_ENA_CTRL 0x01 +#define NAU8824_REG_CLK_GATING_ENA 0x02 +#define NAU8824_REG_CLK_DIVIDER 0x03 +#define NAU8824_REG_FLL1 0x04 +#define NAU8824_REG_FLL2 0x05 +#define NAU8824_REG_FLL3 0x06 +#define NAU8824_REG_FLL4 0x07 +#define NAU8824_REG_FLL5 0x08 +#define NAU8824_REG_FLL6 0x09 +#define NAU8824_REG_FLL_VCO_RSV 0x0A +#define NAU8824_REG_JACK_DET_CTRL 0x0D +#define NAU8824_REG_INTERRUPT_SETTING_1 0x0F +#define NAU8824_REG_IRQ 0x10 +#define NAU8824_REG_CLEAR_INT_REG 0x11 +#define NAU8824_REG_INTERRUPT_SETTING 0x12 +#define NAU8824_REG_SAR_ADC 0x13 +#define NAU8824_REG_VDET_COEFFICIENT 0x14 +#define NAU8824_REG_VDET_THRESHOLD_1 0x15 +#define NAU8824_REG_VDET_THRESHOLD_2 0x16 +#define NAU8824_REG_VDET_THRESHOLD_3 0x17 +#define NAU8824_REG_VDET_THRESHOLD_4 0x18 +#define NAU8824_REG_GPIO_SEL 0x1A +#define NAU8824_REG_PORT0_I2S_PCM_CTRL_1 0x1C +#define NAU8824_REG_PORT0_I2S_PCM_CTRL_2 0x1D +#define NAU8824_REG_PORT0_LEFT_TIME_SLOT 0x1E +#define NAU8824_REG_PORT0_RIGHT_TIME_SLOT 0x1F +#define NAU8824_REG_TDM_CTRL 0x20 +#define NAU8824_REG_ADC_HPF_FILTER 0x23 +#define NAU8824_REG_ADC_FILTER_CTRL 0x24 +#define NAU8824_REG_DAC_FILTER_CTRL_1 0x25 +#define NAU8824_REG_DAC_FILTER_CTRL_2 0x26 +#define NAU8824_REG_NOTCH_FILTER_1 0x27 +#define NAU8824_REG_NOTCH_FILTER_2 0x28 +#define NAU8824_REG_EQ1_LOW 0x29 +#define NAU8824_REG_EQ2_EQ3 0x2A +#define NAU8824_REG_EQ4_EQ5 0x2B +#define NAU8824_REG_ADC_CH0_DGAIN_CTRL 0x2D +#define NAU8824_REG_ADC_CH1_DGAIN_CTRL 0x2E +#define NAU8824_REG_ADC_CH2_DGAIN_CTRL 0x2F +#define NAU8824_REG_ADC_CH3_DGAIN_CTRL 0x30 +#define NAU8824_REG_DAC_MUTE_CTRL 0x31 +#define NAU8824_REG_DAC_CH0_DGAIN_CTRL 0x32 +#define NAU8824_REG_DAC_CH1_DGAIN_CTRL 0x33 +#define NAU8824_REG_ADC_TO_DAC_ST 0x34 +#define NAU8824_REG_DRC_KNEE_IP12_ADC_CH01 0x38 +#define NAU8824_REG_DRC_KNEE_IP34_ADC_CH01 0x39 +#define NAU8824_REG_DRC_SLOPE_ADC_CH01 0x3A +#define NAU8824_REG_DRC_ATKDCY_ADC_CH01 0x3B +#define NAU8824_REG_DRC_KNEE_IP12_ADC_CH23 0x3C +#define NAU8824_REG_DRC_KNEE_IP34_ADC_CH23 0x3D +#define NAU8824_REG_DRC_SLOPE_ADC_CH23 0x3E +#define NAU8824_REG_DRC_ATKDCY_ADC_CH23 0x3F +#define NAU8824_REG_DRC_GAINL_ADC0 0x40 +#define NAU8824_REG_DRC_GAINL_ADC1 0x41 +#define NAU8824_REG_DRC_GAINL_ADC2 0x42 +#define NAU8824_REG_DRC_GAINL_ADC3 0x43 +#define NAU8824_REG_DRC_KNEE_IP12_DAC 0x45 +#define NAU8824_REG_DRC_KNEE_IP34_DAC 0x46 +#define NAU8824_REG_DRC_SLOPE_DAC 0x47 +#define NAU8824_REG_DRC_ATKDCY_DAC 0x48 +#define NAU8824_REG_DRC_GAIN_DAC_CH0 0x49 +#define NAU8824_REG_DRC_GAIN_DAC_CH1 0x4A +#define NAU8824_REG_MODE 0x4C +#define NAU8824_REG_MODE1 0x4D +#define NAU8824_REG_MODE2 0x4E +#define NAU8824_REG_CLASSG 0x50 +#define NAU8824_REG_OTP_EFUSE 0x51 +#define NAU8824_REG_OTPDOUT_1 0x53 +#define NAU8824_REG_OTPDOUT_2 0x54 +#define NAU8824_REG_MISC_CTRL 0x55 +#define NAU8824_REG_I2C_TIMEOUT 0x56 +#define NAU8824_REG_TEST_MODE 0x57 +#define NAU8824_REG_I2C_DEVICE_ID 0x58 +#define NAU8824_REG_SAR_ADC_DATA_OUT 0x59 +#define NAU8824_REG_BIAS_ADJ 0x66 +#define NAU8824_REG_PGA_GAIN 0x67 +#define NAU8824_REG_TRIM_SETTINGS 0x68 +#define NAU8824_REG_ANALOG_CONTROL_1 0x69 +#define NAU8824_REG_ANALOG_CONTROL_2 0x6A +#define NAU8824_REG_ENABLE_LO 0x6B +#define NAU8824_REG_GAIN_LO 0x6C +#define NAU8824_REG_CLASSD_GAIN_1 0x6D +#define NAU8824_REG_CLASSD_GAIN_2 0x6E +#define NAU8824_REG_ANALOG_ADC_1 0x71 +#define NAU8824_REG_ANALOG_ADC_2 0x72 +#define NAU8824_REG_RDAC 0x73 +#define NAU8824_REG_MIC_BIAS 0x74 +#define NAU8824_REG_HS_VOLUME_CONTROL 0x75 +#define NAU8824_REG_BOOST 0x76 +#define NAU8824_REG_FEPGA 0x77 +#define NAU8824_REG_FEPGA_II 0x78 +#define NAU8824_REG_FEPGA_SE 0x79 +#define NAU8824_REG_FEPGA_ATTENUATION 0x7A +#define NAU8824_REG_ATT_PORT0 0x7B +#define NAU8824_REG_ATT_PORT1 0x7C +#define NAU8824_REG_POWER_UP_CONTROL 0x7F +#define NAU8824_REG_CHARGE_PUMP_CONTROL 0x80 +#define NAU8824_REG_CHARGE_PUMP_INPUT 0x81 +#define NAU8824_REG_MAX NAU8824_REG_CHARGE_PUMP_INPUT +/* 16-bit control register address, and 16-bits control register data */ +#define NAU8824_REG_ADDR_LEN 16 +#define NAU8824_REG_DATA_LEN 16 + + +/* ENA_CTRL (0x1) */ +#define NAU8824_DMIC_LCH_EDGE_CH23 (0x1 << 12) +#define NAU8824_DMIC_LCH_EDGE_CH01 (0x1 << 11) +#define NAU8824_JD_SLEEP_MODE (0x1 << 10) +#define NAU8824_ADC_CH3_DMIC_SFT 9 +#define NAU8824_ADC_CH3_DMIC_EN (0x1 << NAU8824_ADC_CH3_DMIC_SFT) +#define NAU8824_ADC_CH2_DMIC_SFT 8 +#define NAU8824_ADC_CH2_DMIC_EN (0x1 << NAU8824_ADC_CH2_DMIC_SFT) +#define NAU8824_ADC_CH1_DMIC_SFT 7 +#define NAU8824_ADC_CH1_DMIC_EN (0x1 << NAU8824_ADC_CH1_DMIC_SFT) +#define NAU8824_ADC_CH0_DMIC_SFT 6 +#define NAU8824_ADC_CH0_DMIC_EN (0x1 << NAU8824_ADC_CH0_DMIC_SFT) +#define NAU8824_DAC_CH1_EN (0x1 << 5) +#define NAU8824_DAC_CH0_EN (0x1 << 4) +#define NAU8824_ADC_CH3_EN (0x1 << 3) +#define NAU8824_ADC_CH2_EN (0x1 << 2) +#define NAU8824_ADC_CH1_EN (0x1 << 1) +#define NAU8824_ADC_CH0_EN 0x1 + +/* CLK_GATING_ENA (0x02) */ +#define NAU8824_CLK_ADC_CH23_EN (0x1 << 15) +#define NAU8824_CLK_ADC_CH01_EN (0x1 << 14) +#define NAU8824_CLK_DAC_CH1_EN (0x1 << 13) +#define NAU8824_CLK_DAC_CH0_EN (0x1 << 12) +#define NAU8824_CLK_I2S_EN (0x1 << 7) +#define NAU8824_CLK_GAIN_EN (0x1 << 5) +#define NAU8824_CLK_SAR_EN (0x1 << 3) +#define NAU8824_CLK_DMIC_CH23_EN (0x1 << 1) + +/* CLK_DIVIDER (0x3) */ +#define NAU8824_CLK_SRC_SFT 15 +#define NAU8824_CLK_SRC_MASK (1 << NAU8824_CLK_SRC_SFT) +#define NAU8824_CLK_SRC_VCO (1 << NAU8824_CLK_SRC_SFT) +#define NAU8824_CLK_SRC_MCLK (0 << NAU8824_CLK_SRC_SFT) +#define NAU8824_CLK_MCLK_SRC_MASK (0xf << 0) +#define NAU8824_CLK_DMIC_SRC_SFT 10 +#define NAU8824_CLK_DMIC_SRC_MASK (0x7 << NAU8824_CLK_DMIC_SRC_SFT) +#define NAU8824_CLK_ADC_SRC_SFT 6 +#define NAU8824_CLK_ADC_SRC_MASK (0x3 << NAU8824_CLK_ADC_SRC_SFT) +#define NAU8824_CLK_DAC_SRC_SFT 4 +#define NAU8824_CLK_DAC_SRC_MASK (0x3 << NAU8824_CLK_DAC_SRC_SFT) + +/* FLL1 (0x04) */ +#define NAU8824_FLL_RATIO_MASK (0x7f << 0) + +/* FLL3 (0x06) */ +#define NAU8824_FLL_INTEGER_MASK (0x3ff << 0) +#define NAU8824_FLL_CLK_SRC_SFT 10 +#define NAU8824_FLL_CLK_SRC_MASK (0x3 << NAU8824_FLL_CLK_SRC_SFT) +#define NAU8824_FLL_CLK_SRC_MCLK (0 << NAU8824_FLL_CLK_SRC_SFT) +#define NAU8824_FLL_CLK_SRC_BLK (0x2 << NAU8824_FLL_CLK_SRC_SFT) +#define NAU8824_FLL_CLK_SRC_FS (0x3 << NAU8824_FLL_CLK_SRC_SFT) + +/* FLL4 (0x07) */ +#define NAU8824_FLL_REF_DIV_SFT 10 +#define NAU8824_FLL_REF_DIV_MASK (0x3 << NAU8824_FLL_REF_DIV_SFT) + +/* FLL5 (0x08) */ +#define NAU8824_FLL_PDB_DAC_EN (0x1 << 15) +#define NAU8824_FLL_LOOP_FTR_EN (0x1 << 14) +#define NAU8824_FLL_CLK_SW_MASK (0x1 << 13) +#define NAU8824_FLL_CLK_SW_N2 (0x1 << 13) +#define NAU8824_FLL_CLK_SW_REF (0x0 << 13) +#define NAU8824_FLL_FTR_SW_MASK (0x1 << 12) +#define NAU8824_FLL_FTR_SW_ACCU (0x1 << 12) +#define NAU8824_FLL_FTR_SW_FILTER (0x0 << 12) + +/* FLL6 (0x9) */ +#define NAU8824_DCO_EN (0x1 << 15) +#define NAU8824_SDM_EN (0x1 << 14) + +/* IRQ (0x10) */ +#define NAU8824_SHORT_CIRCUIT_IRQ (0x1 << 7) +#define NAU8824_IMPEDANCE_MEAS_IRQ (0x1 << 6) +#define NAU8824_KEY_RELEASE_IRQ (0x1 << 5) +#define NAU8824_KEY_LONG_PRESS_IRQ (0x1 << 4) +#define NAU8824_KEY_SHORT_PRESS_IRQ (0x1 << 3) +#define NAU8824_JACK_EJECTION_DETECTED (0x1 << 1) +#define NAU8824_JACK_INSERTION_DETECTED 0x1 + +/* JACK_DET_CTRL (0x0D) */ +#define NAU8824_JACK_EJECT_DT_SFT 2 +#define NAU8824_JACK_EJECT_DT_MASK (0x3 << NAU8824_JACK_EJECT_DT_SFT) +#define NAU8824_JACK_LOGIC 0x1 + + +/* INTERRUPT_SETTING_1 (0x0F) */ +#define NAU8824_IRQ_EJECT_EN (0x1 << 9) +#define NAU8824_IRQ_INSERT_EN (0x1 << 8) + +/* INTERRUPT_SETTING (0x12) */ +#define NAU8824_IRQ_KEY_RELEASE_DIS (0x1 << 5) +#define NAU8824_IRQ_KEY_SHORT_PRESS_DIS (0x1 << 3) +#define NAU8824_IRQ_EJECT_DIS (0x1 << 1) +#define NAU8824_IRQ_INSERT_DIS 0x1 + +/* SAR_ADC (0x13) */ +#define NAU8824_SAR_ADC_EN_SFT 12 +#define NAU8824_SAR_TRACKING_GAIN_SFT 8 +#define NAU8824_SAR_TRACKING_GAIN_MASK (0x7 << NAU8824_SAR_TRACKING_GAIN_SFT) +#define NAU8824_SAR_COMPARE_TIME_SFT 2 +#define NAU8824_SAR_COMPARE_TIME_MASK (3 << 2) +#define NAU8824_SAR_SAMPLING_TIME_SFT 0 +#define NAU8824_SAR_SAMPLING_TIME_MASK (3 << 0) + +/* VDET_COEFFICIENT (0x14) */ +#define NAU8824_SHORTKEY_DEBOUNCE_SFT 12 +#define NAU8824_SHORTKEY_DEBOUNCE_MASK (0x3 << NAU8824_SHORTKEY_DEBOUNCE_SFT) +#define NAU8824_LEVELS_NR_SFT 8 +#define NAU8824_LEVELS_NR_MASK (0x7 << 8) +#define NAU8824_HYSTERESIS_SFT 0 +#define NAU8824_HYSTERESIS_MASK 0xf + +/* PORT0_I2S_PCM_CTRL_1 (0x1C) */ +#define NAU8824_I2S_BP_SFT 7 +#define NAU8824_I2S_BP_MASK (1 << NAU8824_I2S_BP_SFT) +#define NAU8824_I2S_BP_INV (1 << NAU8824_I2S_BP_SFT) +#define NAU8824_I2S_PCMB_SFT 6 +#define NAU8824_I2S_PCMB_EN (1 << NAU8824_I2S_PCMB_SFT) +#define NAU8824_I2S_DL_SFT 2 +#define NAU8824_I2S_DL_MASK (0x3 << NAU8824_I2S_DL_SFT) +#define NAU8824_I2S_DL_16 (0 << NAU8824_I2S_DL_SFT) +#define NAU8824_I2S_DL_20 (1 << NAU8824_I2S_DL_SFT) +#define NAU8824_I2S_DL_24 (2 << NAU8824_I2S_DL_SFT) +#define NAU8824_I2S_DL_32 (3 << NAU8824_I2S_DL_SFT) +#define NAU8824_I2S_DF_MASK 0x3 +#define NAU8824_I2S_DF_RIGTH 0 +#define NAU8824_I2S_DF_LEFT 1 +#define NAU8824_I2S_DF_I2S 2 +#define NAU8824_I2S_DF_PCM_AB 3 + + +/* PORT0_I2S_PCM_CTRL_2 (0x1D) */ +#define NAU8824_I2S_LRC_DIV_SFT 12 +#define NAU8824_I2S_LRC_DIV_MASK (0x3 << NAU8824_I2S_LRC_DIV_SFT) +#define NAU8824_I2S_MS_SFT 3 +#define NAU8824_I2S_MS_MASK (1 << NAU8824_I2S_MS_SFT) +#define NAU8824_I2S_MS_MASTER (1 << NAU8824_I2S_MS_SFT) +#define NAU8824_I2S_MS_SLAVE (0 << NAU8824_I2S_MS_SFT) +#define NAU8824_I2S_BLK_DIV_MASK 0x7 + +/* ADC_FILTER_CTRL (0x24) */ +#define NAU8824_ADC_SYNC_DOWN_MASK 0x3 +#define NAU8824_ADC_SYNC_DOWN_32 0 +#define NAU8824_ADC_SYNC_DOWN_64 1 +#define NAU8824_ADC_SYNC_DOWN_128 2 +#define NAU8824_ADC_SYNC_DOWN_256 3 + +/* DAC_FILTER_CTRL_1 (0x25) */ +#define NAU8824_DAC_CICCLP_OFF (0x1 << 7) +#define NAU8824_DAC_OVERSAMPLE_MASK 0x7 +#define NAU8824_DAC_OVERSAMPLE_64 0 +#define NAU8824_DAC_OVERSAMPLE_256 1 +#define NAU8824_DAC_OVERSAMPLE_128 2 +#define NAU8824_DAC_OVERSAMPLE_32 4 + +/* DAC_MUTE_CTRL (0x31) */ +#define NAU8824_DAC_CH01_MIX 0x3 +#define NAU8824_DAC_ZC_EN (0x1 << 11) + +/* DAC_CH0_DGAIN_CTRL (0x32) */ +#define NAU8824_DAC_CH0_SEL_SFT 9 +#define NAU8824_DAC_CH0_SEL_MASK (0x1 << NAU8824_DAC_CH0_SEL_SFT) +#define NAU8824_DAC_CH0_SEL_I2S0 (0x0 << NAU8824_DAC_CH0_SEL_SFT) +#define NAU8824_DAC_CH0_SEL_I2S1 (0x1 << NAU8824_DAC_CH0_SEL_SFT) +#define NAU8824_DAC_CH0_VOL_MASK 0x1ff + +/* DAC_CH1_DGAIN_CTRL (0x33) */ +#define NAU8824_DAC_CH1_SEL_SFT 9 +#define NAU8824_DAC_CH1_SEL_MASK (0x1 << NAU8824_DAC_CH1_SEL_SFT) +#define NAU8824_DAC_CH1_SEL_I2S0 (0x0 << NAU8824_DAC_CH1_SEL_SFT) +#define NAU8824_DAC_CH1_SEL_I2S1 (0x1 << NAU8824_DAC_CH1_SEL_SFT) +#define NAU8824_DAC_CH1_VOL_MASK 0x1ff + +/* CLASSG (0x50) */ +#define NAU8824_CLASSG_TIMER_SFT 8 +#define NAU8824_CLASSG_TIMER_MASK (0x3f << NAU8824_CLASSG_TIMER_SFT) +#define NAU8824_CLASSG_LDAC_EN_SFT 2 +#define NAU8824_CLASSG_RDAC_EN_SFT 1 +#define NAU8824_CLASSG_EN_SFT 0 + +/* SAR_ADC_DATA_OUT (0x59) */ +#define NAU8824_SAR_ADC_DATA_MASK 0xff + +/* BIAS_ADJ (0x66) */ +#define NAU8824_VMID (1 << 6) +#define NAU8824_VMID_SEL_SFT 4 +#define NAU8824_VMID_SEL_MASK (3 << NAU8824_VMID_SEL_SFT) +#define NAU8824_DMIC2_EN_SFT 3 +#define NAU8824_DMIC1_EN_SFT 2 + +/* TRIM_SETTINGS (0x68) */ +#define NAU8824_DRV_CURR_INC (1 << 15) + +/* ANALOG_CONTROL_1 (0x69) */ +#define NAU8824_DMIC_CLK_DRV_STRG (1 << 3) +#define NAU8824_DMIC_CLK_SLEW_FAST (0x7) + +/* ANALOG_CONTROL_2 (0x6A) */ +#define NAU8824_CLASSD_CLAMP_DIS_SFT 3 +#define NAU8824_CLASSD_CLAMP_DIS (0x1 << NAU8824_CLASSD_CLAMP_DIS_SFT) + +/* ENABLE_LO (0x6B) */ +#define NAU8824_TEST_DAC_SFT 14 +#define NAU8824_TEST_DAC_EN (0x3 << NAU8824_TEST_DAC_SFT) +#define NAU8824_DACL_HPR_EN_SFT 3 +#define NAU8824_DACL_HPR_EN (0x1 << NAU8824_DACL_HPR_EN_SFT) +#define NAU8824_DACR_HPR_EN_SFT 2 +#define NAU8824_DACR_HPR_EN (0x1 << NAU8824_DACR_HPR_EN_SFT) +#define NAU8824_DACR_HPL_EN_SFT 1 +#define NAU8824_DACR_HPL_EN (0x1 << NAU8824_DACR_HPL_EN_SFT) +#define NAU8824_DACL_HPL_EN_SFT 0 +#define NAU8824_DACL_HPL_EN 0x1 + +/* CLASSD_GAIN_1 (0x6D) */ +#define NAU8824_CLASSD_GAIN_1R_SFT 8 +#define NAU8824_CLASSD_GAIN_1R_MASK (0x1f << NAU8824_CLASSD_GAIN_1R_SFT) +#define NAU8824_CLASSD_EN_SFT 7 +#define NAU8824_CLASSD_EN (0x1 << NAU8824_CLASSD_EN_SFT) +#define NAU8824_CLASSD_GAIN_1L_MASK 0x1f + +/* CLASSD_GAIN_2 (0x6E) */ +#define NAU8824_CLASSD_GAIN_2R_SFT 8 +#define NAU8824_CLASSD_GAIN_2R_MASK (0x1f << NAU8824_CLASSD_GAIN_1R_SFT) +#define NAU8824_CLASSD_EN_SFT 7 +#define NAU8824_CLASSD_EN (0x1 << NAU8824_CLASSD_EN_SFT) +#define NAU8824_CLASSD_GAIN_2L_MASK 0x1f + +/* ANALOG_ADC_2 (0x72) */ +#define NAU8824_ADCR_EN_SFT 7 +#define NAU8824_ADCL_EN_SFT 6 + +/* RDAC (0x73) */ +#define NAU8824_DACR_EN_SFT 13 +#define NAU8824_DACL_EN_SFT 12 +#define NAU8824_DACR_CLK_SFT 9 +#define NAU8824_DACL_CLK_SFT 8 +#define NAU8824_RDAC_CLK_DELAY_SFT 4 +#define NAU8824_RDAC_CLK_DELAY_MASK (0x7 << NAU8824_RDAC_CLK_DELAY_SFT) +#define NAU8824_RDAC_VREF_SFT 2 +#define NAU8824_RDAC_VREF_MASK (0x3 << NAU8824_RDAC_VREF_SFT) + +/* MIC_BIAS (0x74) */ +#define NAU8824_MICBIAS_JKSLV (1 << 14) +#define NAU8824_MICBIAS_JKR2 (1 << 12) +#define NAU8824_MICBIAS_POWERUP_SFT 8 +#define NAU8824_MICBIAS_VOLTAGE_SFT 0 +#define NAU8824_MICBIAS_VOLTAGE_MASK 0x7 + +/* BOOST (0x76) */ +#define NAU8824_PRECHARGE_DIS (0x1 << 13) +#define NAU8824_GLOBAL_BIAS_EN (0x1 << 12) +#define NAU8824_HP_BOOST_DIS_SFT 9 +#define NAU8824_HP_BOOST_DIS (0x1 << NAU8824_HP_BOOST_DIS_SFT) +#define NAU8824_HP_BOOST_G_DIS_SFT 8 +#define NAU8824_HP_BOOST_G_DIS (0x1 << NAU8824_HP_BOOST_G_DIS_SFT) +#define NAU8824_SHORT_SHUTDOWN_DIG_EN (1 << 7) +#define NAU8824_SHORT_SHUTDOWN_EN (1 << 6) + +/* FEPGA (0x77) */ +#define NAU8824_FEPGA_MODER_SHORT_SFT 7 +#define NAU8824_FEPGA_MODER_SHORT_EN (0x1 << NAU8824_FEPGA_MODER_SHORT_SFT) +#define NAU8824_FEPGA_MODER_MIC2_SFT 5 +#define NAU8824_FEPGA_MODER_MIC2_EN (0x1 << NAU8824_FEPGA_MODER_MIC2_SFT) +#define NAU8824_FEPGA_MODER_HSMIC_SFT 4 +#define NAU8824_FEPGA_MODER_HSMIC_EN (0x1 << NAU8824_FEPGA_MODER_HSMIC_SFT) +#define NAU8824_FEPGA_MODEL_SHORT_SFT 3 +#define NAU8824_FEPGA_MODEL_SHORT_EN (0x1 << NAU8824_FEPGA_MODEL_SHORT_SFT) +#define NAU8824_FEPGA_MODEL_MIC1_SFT 1 +#define NAU8824_FEPGA_MODEL_MIC1_EN (0x1 << NAU8824_FEPGA_MODEL_MIC1_SFT) +#define NAU8824_FEPGA_MODEL_HSMIC_SFT 0 +#define NAU8824_FEPGA_MODEL_HSMIC_EN (0x1 << NAU8824_FEPGA_MODEL_HSMIC_SFT) + +/* FEPGA_II (0x78) */ +#define NAU8824_FEPGA_GAINR_SFT 5 +#define NAU8824_FEPGA_GAINR_MASK (0x1f << NAU8824_FEPGA_GAINR_SFT) +#define NAU8824_FEPGA_GAINL_SFT 0 +#define NAU8824_FEPGA_GAINL_MASK 0x1f + +/* CHARGE_PUMP_CONTROL (0x80) */ +#define NAU8824_JAMNODCLOW (0x1 << 15) +#define NAU8824_SPKR_PULL_DOWN (0x1 << 13) +#define NAU8824_SPKL_PULL_DOWN (0x1 << 12) +#define NAU8824_POWER_DOWN_DACR (0x1 << 9) +#define NAU8824_POWER_DOWN_DACL (0x1 << 8) +#define NAU8824_CHARGE_PUMP_EN_SFT 5 +#define NAU8824_CHARGE_PUMP_EN (0x1 << NAU8824_CHARGE_PUMP_EN_SFT) + + +#define NAU8824_CODEC_DAI "nau8824-hifi" + +/* System Clock Source */ +enum { + NAU8824_CLK_DIS, + NAU8824_CLK_MCLK, + NAU8824_CLK_INTERNAL, + NAU8824_CLK_FLL_MCLK, + NAU8824_CLK_FLL_BLK, + NAU8824_CLK_FLL_FS, +}; + +struct nau8824 { + struct device *dev; + struct regmap *regmap; + struct snd_soc_dapm_context *dapm; + struct snd_soc_jack *jack; + struct work_struct jdet_work; + struct semaphore jd_sem; + int fs; + int irq; + int micbias_voltage; + int vref_impedance; + int jkdet_polarity; + int sar_threshold_num; + int sar_threshold[8]; + int sar_hysteresis; + int sar_voltage; + int sar_compare_time; + int sar_sampling_time; + int key_debounce; + int jack_eject_debounce; +}; + +struct nau8824_fll { + int mclk_src; + int ratio; + int fll_frac; + int fll_int; + int clk_ref_div; +}; + +struct nau8824_fll_attr { + unsigned int param; + unsigned int val; +}; + +struct nau8824_osr_attr { + unsigned int osr; + unsigned int clk_src; +}; + + +int nau8824_enable_jack_detect(struct snd_soc_codec *codec, + struct snd_soc_jack *jack); + +#endif /* _NAU8824_H */ + diff --git a/sound/soc/codecs/rt286.c b/sound/soc/codecs/rt286.c index 9c365a7f758dbb9f1227c726f148b19672402f7c..7899a2cdeb42f46c5d76cd051319a4b547b1ed04 100644 --- a/sound/soc/codecs/rt286.c +++ b/sound/soc/codecs/rt286.c @@ -1108,6 +1108,13 @@ static const struct dmi_system_id force_combo_jack_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Kabylake Client platform") } }, + { + .ident = "Thinkpad Helix 2nd", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Helix 2nd") + } + }, { } }; diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index b281a46d769dc25165a7bb6073ec72d4a302fafd..f91221b1ddf0fced68a2815eaaf71f5983fea868 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -1084,13 +1084,28 @@ static int rt5514_parse_dt(struct rt5514_priv *rt5514, struct device *dev) return 0; } +static __maybe_unused int rt5514_i2c_resume(struct device *dev) +{ + struct rt5514_priv *rt5514 = dev_get_drvdata(dev); + unsigned int val; + + /* + * Add a bogus read to avoid rt5514's confusion after s2r in case it + * saw glitches on the i2c lines and thought the other side sent a + * start bit. + */ + regmap_read(rt5514->regmap, RT5514_VENDOR_ID2, &val); + + return 0; +} + static int rt5514_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { struct rt5514_platform_data *pdata = dev_get_platdata(&i2c->dev); struct rt5514_priv *rt5514; int ret; - unsigned int val; + unsigned int val = ~0; rt5514 = devm_kzalloc(&i2c->dev, sizeof(struct rt5514_priv), GFP_KERNEL); @@ -1120,8 +1135,16 @@ static int rt5514_i2c_probe(struct i2c_client *i2c, return ret; } - regmap_read(rt5514->regmap, RT5514_VENDOR_ID2, &val); - if (val != RT5514_DEVICE_ID) { + /* + * The rt5514 can get confused if the i2c lines glitch together, as + * can happen at bootup as regulators are turned off and on. If it's + * in this glitched state the first i2c read will fail, so we'll give + * it one change to retry. + */ + ret = regmap_read(rt5514->regmap, RT5514_VENDOR_ID2, &val); + if (ret || val != RT5514_DEVICE_ID) + ret = regmap_read(rt5514->regmap, RT5514_VENDOR_ID2, &val); + if (ret || val != RT5514_DEVICE_ID) { dev_err(&i2c->dev, "Device with ID register %x is not rt5514\n", val); return -ENODEV; @@ -1149,10 +1172,15 @@ static int rt5514_i2c_remove(struct i2c_client *i2c) return 0; } -struct i2c_driver rt5514_i2c_driver = { +static const struct dev_pm_ops rt5514_i2_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(NULL, rt5514_i2c_resume) +}; + +static struct i2c_driver rt5514_i2c_driver = { .driver = { .name = "rt5514", .of_match_table = of_match_ptr(rt5514_of_match), + .pm = &rt5514_i2_pm_ops, }, .probe = rt5514_i2c_probe, .remove = rt5514_i2c_remove, diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index e149f3ce540154252fa882000b020da691d4d3db..87844a45886a5f965d04abaeb670e28ab32efdee 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3542,6 +3542,15 @@ static const struct i2c_device_id rt5645_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, rt5645_i2c_id); +#ifdef CONFIG_OF +static const struct of_device_id rt5645_of_match[] = { + { .compatible = "realtek,rt5645", }, + { .compatible = "realtek,rt5650", }, + { } +}; +MODULE_DEVICE_TABLE(of, rt5645_of_match); +#endif + #ifdef CONFIG_ACPI static const struct acpi_device_id rt5645_acpi_match[] = { { "10EC5645", 0 }, @@ -3912,6 +3921,7 @@ static void rt5645_i2c_shutdown(struct i2c_client *i2c) static struct i2c_driver rt5645_i2c_driver = { .driver = { .name = "rt5645", + .of_match_table = of_match_ptr(rt5645_of_match), .acpi_match_table = ACPI_PTR(rt5645_acpi_match), }, .probe = rt5645_i2c_probe, diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index 476135ec57268cf6863e9792d5fb027c6383e190..8cd22307f5b6e6ab40e82399a19086977817b440 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -1139,7 +1139,8 @@ static void rt5665_enable_push_button_irq(struct snd_soc_codec *codec, bool enable) { if (enable) { - snd_soc_write(codec, RT5665_4BTN_IL_CMD_1, 0x000b); + snd_soc_write(codec, RT5665_4BTN_IL_CMD_1, 0x0003); + snd_soc_update_bits(codec, RT5665_SAR_IL_CMD_9, 0x1, 0x1); snd_soc_write(codec, RT5665_IL_CMD_1, 0x0048); snd_soc_update_bits(codec, RT5665_4BTN_IL_CMD_2, RT5665_4BTN_IL_MASK | RT5665_4BTN_IL_RST_MASK, @@ -1192,10 +1193,13 @@ static int rt5665_headset_detect(struct snd_soc_codec *codec, int jack_insert) } regmap_update_bits(rt5665->regmap, RT5665_EJD_CTRL_1, - 0x180, 0x180); + 0x1a0, 0x120); regmap_write(rt5665->regmap, RT5665_EJD_CTRL_3, 0x3424); + regmap_write(rt5665->regmap, RT5665_IL_CMD_1, 0x0048); regmap_write(rt5665->regmap, RT5665_SAR_IL_CMD_1, 0xa291); + usleep_range(10000, 15000); + rt5665->sar_adc_value = snd_soc_read(rt5665->codec, RT5665_SAR_IL_CMD_4) & 0x7ff; @@ -1256,8 +1260,8 @@ static void rt5665_jd_check_handler(struct work_struct *work) } } -int rt5665_set_jack_detect(struct snd_soc_codec *codec, - struct snd_soc_jack *hs_jack) +static int rt5665_set_jack_detect(struct snd_soc_codec *codec, + struct snd_soc_jack *hs_jack, void *data) { struct rt5665_priv *rt5665 = snd_soc_codec_get_drvdata(codec); @@ -1284,7 +1288,6 @@ int rt5665_set_jack_detect(struct snd_soc_codec *codec, return 0; } -EXPORT_SYMBOL_GPL(rt5665_set_jack_detect); static void rt5665_jack_detect_handler(struct work_struct *work) { @@ -2600,6 +2603,55 @@ static int rt5655_set_verf(struct snd_soc_dapm_widget *w, return 0; } +static int rt5665_i2s_pin_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + unsigned int val1, val2, mask1, mask2 = 0; + + switch (w->shift) { + case RT5665_PWR_I2S2_1_BIT: + mask1 = RT5665_GP2_PIN_MASK | RT5665_GP3_PIN_MASK | + RT5665_GP4_PIN_MASK | RT5665_GP5_PIN_MASK; + val1 = RT5665_GP2_PIN_BCLK2 | RT5665_GP3_PIN_LRCK2 | + RT5665_GP4_PIN_DACDAT2_1 | RT5665_GP5_PIN_ADCDAT2_1; + break; + case RT5665_PWR_I2S2_2_BIT: + mask1 = RT5665_GP2_PIN_MASK | RT5665_GP3_PIN_MASK | + RT5665_GP8_PIN_MASK; + val1 = RT5665_GP2_PIN_BCLK2 | RT5665_GP3_PIN_LRCK2 | + RT5665_GP8_PIN_DACDAT2_2; + mask2 = RT5665_GP9_PIN_MASK; + val2 = RT5665_GP9_PIN_ADCDAT2_2; + break; + case RT5665_PWR_I2S3_BIT: + mask1 = RT5665_GP6_PIN_MASK | RT5665_GP7_PIN_MASK | + RT5665_GP8_PIN_MASK; + val1 = RT5665_GP6_PIN_BCLK3 | RT5665_GP7_PIN_LRCK3 | + RT5665_GP8_PIN_DACDAT3; + mask2 = RT5665_GP9_PIN_MASK; + val2 = RT5665_GP9_PIN_ADCDAT3; + break; + } + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + snd_soc_update_bits(codec, RT5665_GPIO_CTRL_1, mask1, val1); + if (mask2) + snd_soc_update_bits(codec, RT5665_GPIO_CTRL_2, + mask2, val2); + break; + case SND_SOC_DAPM_POST_PMD: + snd_soc_update_bits(codec, RT5665_GPIO_CTRL_1, mask1, 0); + if (mask2) + snd_soc_update_bits(codec, RT5665_GPIO_CTRL_2, + mask2, 0); + break; + default: + return 0; + } + + return 0; +} static const struct snd_soc_dapm_widget rt5665_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("LDO2", RT5665_PWR_ANLG_3, RT5665_PWR_LDO2_BIT, 0, @@ -2852,11 +2904,14 @@ static const struct snd_soc_dapm_widget rt5665_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("I2S1_2", RT5665_PWR_DIG_1, RT5665_PWR_I2S1_2_BIT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("I2S2_1", RT5665_PWR_DIG_1, RT5665_PWR_I2S2_1_BIT, - 0, NULL, 0), + 0, rt5665_i2s_pin_event, SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_SUPPLY("I2S2_2", RT5665_PWR_DIG_1, RT5665_PWR_I2S2_2_BIT, - 0, NULL, 0), + 0, rt5665_i2s_pin_event, SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_SUPPLY("I2S3", RT5665_PWR_DIG_1, RT5665_PWR_I2S3_BIT, - 0, NULL, 0), + 0, rt5665_i2s_pin_event, SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_PGA("IF1 DAC1", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("IF1 DAC2", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("IF1 DAC3", SND_SOC_NOPM, 0, 0, NULL, 0), @@ -3963,12 +4018,68 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = { {"PDMR", NULL, "PDM R Playback"}, }; +static int rt5665_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, + unsigned int rx_mask, int slots, int slot_width) +{ + struct snd_soc_codec *codec = dai->codec; + unsigned int val = 0; + + if (rx_mask || tx_mask) + val |= RT5665_I2S1_MODE_TDM; + + switch (slots) { + case 4: + val |= RT5665_TDM_IN_CH_4; + val |= RT5665_TDM_OUT_CH_4; + break; + case 6: + val |= RT5665_TDM_IN_CH_6; + val |= RT5665_TDM_OUT_CH_6; + break; + case 8: + val |= RT5665_TDM_IN_CH_8; + val |= RT5665_TDM_OUT_CH_8; + break; + case 2: + break; + default: + return -EINVAL; + } + + switch (slot_width) { + case 20: + val |= RT5665_TDM_IN_LEN_20; + val |= RT5665_TDM_OUT_LEN_20; + break; + case 24: + val |= RT5665_TDM_IN_LEN_24; + val |= RT5665_TDM_OUT_LEN_24; + break; + case 32: + val |= RT5665_TDM_IN_LEN_32; + val |= RT5665_TDM_OUT_LEN_32; + break; + case 16: + break; + default: + return -EINVAL; + } + + snd_soc_update_bits(codec, RT5665_TDM_CTRL_1, + RT5665_I2S1_MODE_MASK | RT5665_TDM_IN_CH_MASK | + RT5665_TDM_OUT_CH_MASK | RT5665_TDM_IN_LEN_MASK | + RT5665_TDM_OUT_LEN_MASK, val); + + return 0; +} + + static int rt5665_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { struct snd_soc_codec *codec = dai->codec; struct rt5665_priv *rt5665 = snd_soc_codec_get_drvdata(codec); - unsigned int val_len = 0, val_clk, mask_clk, val_bits = 0x0100; + unsigned int val_len = 0, val_clk, reg_clk, mask_clk, val_bits = 0x0100; int pre_div, frame_size; rt5665->lrck[dai->id] = params_rate(params); @@ -4009,6 +4120,10 @@ static int rt5665_hw_params(struct snd_pcm_substream *substream, switch (dai->id) { case RT5665_AIF1_1: case RT5665_AIF1_2: + if (params_channels(params) > 2) + rt5665_set_tdm_slot(dai, 0xf, 0xf, + params_channels(params), params_width(params)); + reg_clk = RT5665_ADDA_CLK_1; mask_clk = RT5665_I2S_PD1_MASK; val_clk = pre_div << RT5665_I2S_PD1_SFT; snd_soc_update_bits(codec, RT5665_I2S1_SDP, @@ -4016,12 +4131,14 @@ static int rt5665_hw_params(struct snd_pcm_substream *substream, break; case RT5665_AIF2_1: case RT5665_AIF2_2: + reg_clk = RT5665_ADDA_CLK_2; mask_clk = RT5665_I2S_PD2_MASK; val_clk = pre_div << RT5665_I2S_PD2_SFT; snd_soc_update_bits(codec, RT5665_I2S2_SDP, RT5665_I2S_DL_MASK, val_len); break; case RT5665_AIF3: + reg_clk = RT5665_ADDA_CLK_2; mask_clk = RT5665_I2S_PD3_MASK; val_clk = pre_div << RT5665_I2S_PD3_SFT; snd_soc_update_bits(codec, RT5665_I2S3_SDP, @@ -4032,7 +4149,7 @@ static int rt5665_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - snd_soc_update_bits(codec, RT5665_ADDA_CLK_1, mask_clk, val_clk); + snd_soc_update_bits(codec, reg_clk, mask_clk, val_clk); snd_soc_update_bits(codec, RT5665_STO1_DAC_SIL_DET, 0x3700, val_bits); switch (rt5665->lrck[dai->id]) { @@ -4125,10 +4242,9 @@ static int rt5665_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) return 0; } -static int rt5665_set_dai_sysclk(struct snd_soc_dai *dai, - int clk_id, unsigned int freq, int dir) +static int rt5665_set_codec_sysclk(struct snd_soc_codec *codec, int clk_id, + int source, unsigned int freq, int dir) { - struct snd_soc_codec *codec = dai->codec; struct rt5665_priv *rt5665 = snd_soc_codec_get_drvdata(codec); unsigned int reg_val = 0; @@ -4154,20 +4270,20 @@ static int rt5665_set_dai_sysclk(struct snd_soc_dai *dai, rt5665->sysclk = freq; rt5665->sysclk_src = clk_id; - dev_dbg(dai->dev, "Sysclk is %dHz and clock id is %d\n", freq, clk_id); + dev_dbg(codec->dev, "Sysclk is %dHz and clock id is %d\n", freq, clk_id); return 0; } -static int rt5665_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int Source, - unsigned int freq_in, unsigned int freq_out) +static int rt5665_set_codec_pll(struct snd_soc_codec *codec, int pll_id, + int source, unsigned int freq_in, + unsigned int freq_out) { - struct snd_soc_codec *codec = dai->codec; struct rt5665_priv *rt5665 = snd_soc_codec_get_drvdata(codec); struct rl6231_pll_code pll_code; int ret; - if (Source == rt5665->pll_src && freq_in == rt5665->pll_in && + if (source == rt5665->pll_src && freq_in == rt5665->pll_in && freq_out == rt5665->pll_out) return 0; @@ -4181,7 +4297,7 @@ static int rt5665_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int Source, return 0; } - switch (Source) { + switch (source) { case RT5665_PLL1_S_MCLK: snd_soc_update_bits(codec, RT5665_GLB_CLK, RT5665_PLL1_SRC_MASK, RT5665_PLL1_SRC_MCLK); @@ -4199,7 +4315,7 @@ static int rt5665_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int Source, RT5665_PLL1_SRC_MASK, RT5665_PLL1_SRC_BCLK3); break; default: - dev_err(codec->dev, "Unknown PLL Source %d\n", Source); + dev_err(codec->dev, "Unknown PLL Source %d\n", source); return -EINVAL; } @@ -4221,62 +4337,7 @@ static int rt5665_set_dai_pll(struct snd_soc_dai *dai, int pll_id, int Source, rt5665->pll_in = freq_in; rt5665->pll_out = freq_out; - rt5665->pll_src = Source; - - return 0; -} - -static int rt5665_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, - unsigned int rx_mask, int slots, int slot_width) -{ - struct snd_soc_codec *codec = dai->codec; - unsigned int val = 0; - - if (rx_mask || tx_mask) - val |= RT5665_I2S1_MODE_TDM; - - switch (slots) { - case 4: - val |= RT5665_TDM_IN_CH_4; - val |= RT5665_TDM_OUT_CH_4; - break; - case 6: - val |= RT5665_TDM_IN_CH_6; - val |= RT5665_TDM_OUT_CH_6; - break; - case 8: - val |= RT5665_TDM_IN_CH_8; - val |= RT5665_TDM_OUT_CH_8; - break; - case 2: - break; - default: - return -EINVAL; - } - - switch (slot_width) { - case 20: - val |= RT5665_TDM_IN_LEN_20; - val |= RT5665_TDM_OUT_LEN_20; - break; - case 24: - val |= RT5665_TDM_IN_LEN_24; - val |= RT5665_TDM_OUT_LEN_24; - break; - case 32: - val |= RT5665_TDM_IN_LEN_32; - val |= RT5665_TDM_OUT_LEN_32; - break; - case 16: - break; - default: - return -EINVAL; - } - - snd_soc_update_bits(codec, RT5665_TDM_CTRL_1, - RT5665_I2S1_MODE_MASK | RT5665_TDM_IN_CH_MASK | - RT5665_TDM_OUT_CH_MASK | RT5665_TDM_IN_LEN_MASK | - RT5665_TDM_OUT_LEN_MASK, val); + rt5665->pll_src = source; return 0; } @@ -4393,9 +4454,7 @@ static int rt5665_resume(struct snd_soc_codec *codec) static const struct snd_soc_dai_ops rt5665_aif_dai_ops = { .hw_params = rt5665_hw_params, .set_fmt = rt5665_set_dai_fmt, - .set_sysclk = rt5665_set_dai_sysclk, .set_tdm_slot = rt5665_set_tdm_slot, - .set_pll = rt5665_set_dai_pll, .set_bclk_ratio = rt5665_set_bclk_ratio, }; @@ -4504,7 +4563,10 @@ static struct snd_soc_codec_driver soc_codec_dev_rt5665 = { .num_dapm_widgets = ARRAY_SIZE(rt5665_dapm_widgets), .dapm_routes = rt5665_dapm_routes, .num_dapm_routes = ARRAY_SIZE(rt5665_dapm_routes), - } + }, + .set_sysclk = rt5665_set_codec_sysclk, + .set_pll = rt5665_set_codec_pll, + .set_jack = rt5665_set_jack_detect, }; @@ -4783,7 +4845,7 @@ static int rt5665_i2c_probe(struct i2c_client *i2c, regmap_write(rt5665->regmap, RT5665_HP_LOGIC_CTRL_2, 0x0002); regmap_update_bits(rt5665->regmap, RT5665_EJD_CTRL_1, - 0xf000 | RT5665_VREF_POW_MASK, 0xd000 | RT5665_VREF_POW_REG); + 0xf000 | RT5665_VREF_POW_MASK, 0xe000 | RT5665_VREF_POW_REG); /* Work around for pow_pump */ regmap_update_bits(rt5665->regmap, RT5665_STO1_DAC_SIL_DET, RT5665_DEB_STO_DAC_MASK, RT5665_DEB_80_MS); diff --git a/sound/soc/codecs/rt5665.h b/sound/soc/codecs/rt5665.h index a30f5e6d062882724230e2bf2f2d1157ee823a9b..1db5c6a62a8e0c872e24fc6474031cbdfb72ab80 100644 --- a/sound/soc/codecs/rt5665.h +++ b/sound/soc/codecs/rt5665.h @@ -1984,7 +1984,5 @@ enum { int rt5665_sel_asrc_clk_src(struct snd_soc_codec *codec, unsigned int filter_mask, unsigned int clk_src); -int rt5665_set_jack_detect(struct snd_soc_codec *codec, - struct snd_soc_jack *hs_jack); #endif /* __RT5665_H__ */ diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 17d20b99f0418728d5a73e293e18508c31f2d7b0..e27c5a4a0a152333205d6a59b6b239e2a81e225b 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -2835,6 +2835,27 @@ static const struct dmi_system_id dmi_platform_intel_braswell[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Wyse 3040"), }, }, + { + .ident = "Lenovo Thinkpad Tablet 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 10"), + }, + }, + { + .ident = "Lenovo Thinkpad Tablet 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Tablet B"), + }, + }, + { + .ident = "Lenovo Thinkpad Tablet 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Miix 2 10"), + }, + }, {} }; diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index abc802a5a479013b0c1995a46790ec009f87a111..65ac4518ad06096e1ca8d04c539fc19ddf1b8f15 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -5035,6 +5035,12 @@ static const struct i2c_device_id rt5677_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, rt5677_i2c_id); +static const struct of_device_id rt5677_of_match[] = { + { .compatible = "realtek,rt5677", }, + { } +}; +MODULE_DEVICE_TABLE(of, rt5677_of_match); + static const struct acpi_gpio_params plug_det_gpio = { RT5677_GPIO_PLUG_DET, 0, false }; static const struct acpi_gpio_params mic_present_gpio = { RT5677_GPIO_MIC_PRESENT_L, 0, false }; static const struct acpi_gpio_params headphone_enable_gpio = { RT5677_GPIO_HP_AMP_SHDN_L, 0, false }; @@ -5294,6 +5300,7 @@ static int rt5677_i2c_remove(struct i2c_client *i2c) static struct i2c_driver rt5677_i2c_driver = { .driver = { .name = "rt5677", + .of_match_table = rt5677_of_match, }, .probe = rt5677_i2c_probe, .remove = rt5677_i2c_remove, diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 1589325855bc10ca41c1180de3e06b745d32ee19..5a2702edeb7709b765d744c6db8f11992c36a1ff 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -99,6 +99,13 @@ enum sgtl5000_micbias_resistor { SGTL5000_MICBIAS_8K = 8, }; +enum { + I2S_LRCLK_STRENGTH_DISABLE, + I2S_LRCLK_STRENGTH_LOW, + I2S_LRCLK_STRENGTH_MEDIUM, + I2S_LRCLK_STRENGTH_HIGH, +}; + /* sgtl5000 private structure in codec */ struct sgtl5000_priv { int sysclk; /* sysclk rate */ @@ -111,6 +118,7 @@ struct sgtl5000_priv { int revision; u8 micbias_resistor; u8 micbias_voltage; + u8 lrclk_strength; }; /* @@ -1089,6 +1097,7 @@ static int sgtl5000_enable_regulators(struct i2c_client *client) static int sgtl5000_probe(struct snd_soc_codec *codec) { int ret; + u16 reg; struct sgtl5000_priv *sgtl5000 = snd_soc_codec_get_drvdata(codec); /* power up sgtl5000 */ @@ -1118,7 +1127,8 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) SGTL5000_DAC_MUTE_RIGHT | SGTL5000_DAC_MUTE_LEFT); - snd_soc_write(codec, SGTL5000_CHIP_PAD_STRENGTH, 0x015f); + reg = ((sgtl5000->lrclk_strength) << SGTL5000_PAD_I2S_LRCLK_SHIFT | 0x5f); + snd_soc_write(codec, SGTL5000_CHIP_PAD_STRENGTH, reg); snd_soc_write(codec, SGTL5000_CHIP_ANA_CTRL, SGTL5000_HP_ZCD_EN | @@ -1347,6 +1357,13 @@ static int sgtl5000_i2c_probe(struct i2c_client *client, } } + sgtl5000->lrclk_strength = I2S_LRCLK_STRENGTH_LOW; + if (!of_property_read_u32(np, "lrclk-strength", &value)) { + if (value > I2S_LRCLK_STRENGTH_HIGH) + value = I2S_LRCLK_STRENGTH_LOW; + sgtl5000->lrclk_strength = value; + } + /* Ensure sgtl5000 will start with sane register values */ sgtl5000_fill_defaults(client); diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c index 2bb5a11c9ba19d1d81310558e2b78f550d3eafc9..a622623e8558a44090a0ec4869f2aff3f70aa524 100644 --- a/sound/soc/codecs/ssm4567.c +++ b/sound/soc/codecs/ssm4567.c @@ -485,6 +485,14 @@ static const struct i2c_device_id ssm4567_i2c_ids[] = { }; MODULE_DEVICE_TABLE(i2c, ssm4567_i2c_ids); +#ifdef CONFIG_OF +static const struct of_device_id ssm4567_of_match[] = { + { .compatible = "adi,ssm4567", }, + { } +}; +MODULE_DEVICE_TABLE(of, ssm4567_of_match); +#endif + #ifdef CONFIG_ACPI static const struct acpi_device_id ssm4567_acpi_match[] = { @@ -498,6 +506,7 @@ MODULE_DEVICE_TABLE(acpi, ssm4567_acpi_match); static struct i2c_driver ssm4567_driver = { .driver = { .name = "ssm4567", + .of_match_table = of_match_ptr(ssm4567_of_match), .acpi_match_table = ACPI_PTR(ssm4567_acpi_match), }, .probe = ssm4567_i2c_probe, diff --git a/sound/soc/codecs/sta529.c b/sound/soc/codecs/sta529.c index d4b384e4b2664cf8b080ba5271fef8bcce0df5e3..660734359bf3c174577e9363a34cf066e4bd0f3b 100644 --- a/sound/soc/codecs/sta529.c +++ b/sound/soc/codecs/sta529.c @@ -375,9 +375,16 @@ static const struct i2c_device_id sta529_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, sta529_i2c_id); +static const struct of_device_id sta529_of_match[] = { + { .compatible = "st,sta529", }, + { } +}; +MODULE_DEVICE_TABLE(of, sta529_of_match); + static struct i2c_driver sta529_i2c_driver = { .driver = { .name = "sta529", + .of_match_table = sta529_of_match, }, .probe = sta529_i2c_probe, .remove = sta529_i2c_remove, diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c index baf455e8c2f7b038b0883e43184d12e99c46cfb7..8840f72f3c4ad7f2a2b5f8bcb0cb54745d8dfbf1 100644 --- a/sound/soc/codecs/tas2552.c +++ b/sound/soc/codecs/tas2552.c @@ -611,7 +611,7 @@ static int tas2552_codec_probe(struct snd_soc_codec *codec) regulator_bulk_disable(ARRAY_SIZE(tas2552->supplies), tas2552->supplies); - return -EIO; + return ret; } static int tas2552_codec_remove(struct snd_soc_codec *codec) @@ -637,7 +637,7 @@ static int tas2552_suspend(struct snd_soc_codec *codec) if (ret != 0) dev_err(codec->dev, "Failed to disable supplies: %d\n", ret); - return 0; + return ret; } static int tas2552_resume(struct snd_soc_codec *codec) @@ -653,7 +653,7 @@ static int tas2552_resume(struct snd_soc_codec *codec) ret); } - return 0; + return ret; } #else #define tas2552_suspend NULL diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c index 410cae0f2060474313872d6b9110bf05d3368e43..628a8eeaab689eb85b6d3dba54ec5a98aefe6da0 100644 --- a/sound/soc/codecs/tlv320aic23.c +++ b/sound/soc/codecs/tlv320aic23.c @@ -174,10 +174,9 @@ static const struct snd_soc_dapm_route tlv320aic23_intercon[] = { {"ROUT", NULL, "Output Mixer"}, /* Inputs */ - {"Line Input", "NULL", "LLINEIN"}, - {"Line Input", "NULL", "RLINEIN"}, - - {"Mic Input", "NULL", "MICIN"}, + {"Line Input", NULL, "LLINEIN"}, + {"Line Input", NULL, "RLINEIN"}, + {"Mic Input", NULL, "MICIN"}, /* input mux */ {"Capture Source", "Line", "Line Input"}, diff --git a/sound/soc/codecs/twl6040.c b/sound/soc/codecs/twl6040.c index 748036e851ea4d129266ca1e4d97b38e387c4eed..2b6ad09e08868323e3e9bef25bd26b33bb5e2949 100644 --- a/sound/soc/codecs/twl6040.c +++ b/sound/soc/codecs/twl6040.c @@ -606,6 +606,14 @@ static const struct snd_kcontrol_new twl6040_snd_controls[] = { twl6040_headset_power_get_enum, twl6040_headset_power_put_enum), + /* Left HS PDM data routed to Right HSDAC */ + SOC_SINGLE("Headset Mono to Stereo Playback Switch", + TWL6040_REG_HSRCTL, 7, 1, 0), + + /* Left HF PDM data routed to Right HFDAC */ + SOC_SINGLE("Handsfree Mono to Stereo Playback Switch", + TWL6040_REG_HFRCTL, 5, 1, 0), + SOC_ENUM_EXT("PLL Selection", twl6040_power_mode_enum, twl6040_pll_get_enum, twl6040_pll_put_enum), }; diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index 2918fdb95e58581555fe7ef3ae3c6019040b86b3..61cdc79840e70743229f5919bfc5aa3293f38270 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -791,9 +791,16 @@ static const struct i2c_device_id uda1380_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, uda1380_i2c_id); +static const struct of_device_id uda1380_of_match[] = { + { .compatible = "nxp,uda1380", }, + { } +}; +MODULE_DEVICE_TABLE(of, uda1380_of_match); + static struct i2c_driver uda1380_i2c_driver = { .driver = { .name = "uda1380-codec", + .of_match_table = uda1380_of_match, }, .probe = uda1380_i2c_probe, .remove = uda1380_i2c_remove, diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index 560575000cc5e5aef666570c78e1f8df714ad718..138a84efdd54cb6047b18645c51377ab2c110570 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -2014,7 +2014,7 @@ static void wm5100_micd_irq(struct wm5100_priv *wm5100) ret = regmap_read(wm5100->regmap, WM5100_MIC_DETECT_3, &val); if (ret != 0) { - dev_err(wm5100->dev, "Failed to read micropone status: %d\n", + dev_err(wm5100->dev, "Failed to read microphone status: %d\n", ret); return; } diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 6e887c2c42b1a33808a3bc513dcf5f4b1c481de1..237eeb9a8b97fe726f7535ce46f458852a3626b5 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -115,10 +116,19 @@ static const struct reg_default wm8903_reg_defaults[] = { { 172, 0x0000 }, /* R172 - Analogue Output Bias 0 */ }; +#define WM8903_NUM_SUPPLIES 4 +static const char *wm8903_supply_names[WM8903_NUM_SUPPLIES] = { + "AVDD", + "CPVDD", + "DBVDD", + "DCVDD", +}; + struct wm8903_priv { struct wm8903_platform_data *pdata; struct device *dev; struct regmap *regmap; + struct regulator_bulk_data supplies[WM8903_NUM_SUPPLIES]; int sysclk; int irq; @@ -2030,6 +2040,23 @@ static int wm8903_i2c_probe(struct i2c_client *i2c, pdata = wm8903->pdata; + for (i = 0; i < ARRAY_SIZE(wm8903->supplies); i++) + wm8903->supplies[i].supply = wm8903_supply_names[i]; + + ret = devm_regulator_bulk_get(&i2c->dev, ARRAY_SIZE(wm8903->supplies), + wm8903->supplies); + if (ret != 0) { + dev_err(&i2c->dev, "Failed to request supplies: %d\n", ret); + return ret; + } + + ret = regulator_bulk_enable(ARRAY_SIZE(wm8903->supplies), + wm8903->supplies); + if (ret != 0) { + dev_err(&i2c->dev, "Failed to enable supplies: %d\n", ret); + return ret; + } + ret = regmap_read(wm8903->regmap, WM8903_SW_RESET_AND_ID, &val); if (ret != 0) { dev_err(&i2c->dev, "Failed to read chip ID: %d\n", ret); @@ -2160,6 +2187,8 @@ static int wm8903_i2c_probe(struct i2c_client *i2c, return 0; err: + regulator_bulk_disable(ARRAY_SIZE(wm8903->supplies), + wm8903->supplies); return ret; } @@ -2167,6 +2196,8 @@ static int wm8903_i2c_remove(struct i2c_client *client) { struct wm8903_priv *wm8903 = i2c_get_clientdata(client); + regulator_bulk_disable(ARRAY_SIZE(wm8903->supplies), + wm8903->supplies); if (client->irq) free_irq(client->irq, wm8903); wm8903_free_gpio(wm8903); diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 3bf081a7e450993324bb435887aedd23d0bc578f..9ed4557009542b8773806875dbcbc63ee3a8cac5 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -604,12 +604,150 @@ static const int bclk_divs[] = { 120, 160, 220, 240, 320, 320, 320 }; +/** + * wm8960_configure_sysclk - checks if there is a sysclk frequency available + * The sysclk must be chosen such that: + * - sysclk = MCLK / sysclk_divs + * - lrclk = sysclk / dac_divs + * - 10 * bclk = sysclk / bclk_divs + * + * If we cannot find an exact match for (sysclk, lrclk, bclk) + * triplet, we relax the bclk such that bclk is chosen as the + * closest available frequency greater than expected bclk. + * + * @wm8960_priv: wm8960 codec private data + * @mclk: MCLK used to derive sysclk + * @sysclk_idx: sysclk_divs index for found sysclk + * @dac_idx: dac_divs index for found lrclk + * @bclk_idx: bclk_divs index for found bclk + * + * Returns: + * -1, in case no sysclk frequency available found + * >=0, in case we could derive bclk and lrclk from sysclk using + * (@sysclk_idx, @dac_idx, @bclk_idx) dividers + */ +static +int wm8960_configure_sysclk(struct wm8960_priv *wm8960, int mclk, + int *sysclk_idx, int *dac_idx, int *bclk_idx) +{ + int sysclk, bclk, lrclk; + int i, j, k; + int diff, closest = mclk; + + /* marker for no match */ + *bclk_idx = -1; + + bclk = wm8960->bclk; + lrclk = wm8960->lrclk; + + /* check if the sysclk frequency is available. */ + for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) { + if (sysclk_divs[i] == -1) + continue; + sysclk = mclk / sysclk_divs[i]; + for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) { + if (sysclk != dac_divs[j] * lrclk) + continue; + for (k = 0; k < ARRAY_SIZE(bclk_divs); ++k) { + diff = sysclk - bclk * bclk_divs[k] / 10; + if (diff == 0) { + *sysclk_idx = i; + *dac_idx = j; + *bclk_idx = k; + break; + } + if (diff > 0 && closest > diff) { + *sysclk_idx = i; + *dac_idx = j; + *bclk_idx = k; + closest = diff; + } + } + if (k != ARRAY_SIZE(bclk_divs)) + break; + } + if (j != ARRAY_SIZE(dac_divs)) + break; + } + return *bclk_idx; +} + +/** + * wm8960_configure_pll - checks if there is a PLL out frequency available + * The PLL out frequency must be chosen such that: + * - sysclk = lrclk * dac_divs + * - freq_out = sysclk * sysclk_divs + * - 10 * sysclk = bclk * bclk_divs + * + * If we cannot find an exact match for (sysclk, lrclk, bclk) + * triplet, we relax the bclk such that bclk is chosen as the + * closest available frequency greater than expected bclk. + * + * @codec: codec structure + * @freq_in: input frequency used to derive freq out via PLL + * @sysclk_idx: sysclk_divs index for found sysclk + * @dac_idx: dac_divs index for found lrclk + * @bclk_idx: bclk_divs index for found bclk + * + * Returns: + * < 0, in case no PLL frequency out available was found + * >=0, in case we could derive bclk, lrclk, sysclk from PLL out using + * (@sysclk_idx, @dac_idx, @bclk_idx) dividers + */ +static +int wm8960_configure_pll(struct snd_soc_codec *codec, int freq_in, + int *sysclk_idx, int *dac_idx, int *bclk_idx) +{ + struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec); + int sysclk, bclk, lrclk, freq_out; + int diff, closest, best_freq_out; + int i, j, k; + + bclk = wm8960->bclk; + lrclk = wm8960->lrclk; + closest = freq_in; + + best_freq_out = -EINVAL; + *sysclk_idx = *dac_idx = *bclk_idx = -1; + + for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) { + if (sysclk_divs[i] == -1) + continue; + for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) { + sysclk = lrclk * dac_divs[j]; + freq_out = sysclk * sysclk_divs[i]; + + for (k = 0; k < ARRAY_SIZE(bclk_divs); ++k) { + if (!is_pll_freq_available(freq_in, freq_out)) + continue; + + diff = sysclk - bclk * bclk_divs[k] / 10; + if (diff == 0) { + *sysclk_idx = i; + *dac_idx = j; + *bclk_idx = k; + return freq_out; + } + if (diff > 0 && closest > diff) { + *sysclk_idx = i; + *dac_idx = j; + *bclk_idx = k; + closest = diff; + best_freq_out = freq_out; + } + } + } + } + + return best_freq_out; +} static int wm8960_configure_clocking(struct snd_soc_codec *codec) { struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec); - int sysclk, bclk, lrclk, freq_out, freq_in; + int freq_out, freq_in; u16 iface1 = snd_soc_read(codec, WM8960_IFACE1); int i, j, k; + int ret; if (!(iface1 & (1<<6))) { dev_dbg(codec->dev, @@ -623,8 +761,6 @@ static int wm8960_configure_clocking(struct snd_soc_codec *codec) } freq_in = wm8960->freq_in; - bclk = wm8960->bclk; - lrclk = wm8960->lrclk; /* * If it's sysclk auto mode, check if the MCLK can provide sysclk or * not. If MCLK can provide sysclk, using MCLK to provide sysclk @@ -643,60 +779,21 @@ static int wm8960_configure_clocking(struct snd_soc_codec *codec) } if (wm8960->clk_id != WM8960_SYSCLK_PLL) { - /* check if the sysclk frequency is available. */ - for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) { - if (sysclk_divs[i] == -1) - continue; - sysclk = freq_out / sysclk_divs[i]; - for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) { - if (sysclk != dac_divs[j] * lrclk) - continue; - for (k = 0; k < ARRAY_SIZE(bclk_divs); ++k) - if (sysclk == bclk * bclk_divs[k] / 10) - break; - if (k != ARRAY_SIZE(bclk_divs)) - break; - } - if (j != ARRAY_SIZE(dac_divs)) - break; - } - - if (i != ARRAY_SIZE(sysclk_divs)) { + ret = wm8960_configure_sysclk(wm8960, freq_out, &i, &j, &k); + if (ret >= 0) { goto configure_clock; } else if (wm8960->clk_id != WM8960_SYSCLK_AUTO) { dev_err(codec->dev, "failed to configure clock\n"); return -EINVAL; } } - /* get a available pll out frequency and set pll */ - for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) { - if (sysclk_divs[i] == -1) - continue; - for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) { - sysclk = lrclk * dac_divs[j]; - freq_out = sysclk * sysclk_divs[i]; - for (k = 0; k < ARRAY_SIZE(bclk_divs); ++k) { - if (sysclk == bclk * bclk_divs[k] / 10 && - is_pll_freq_available(freq_in, freq_out)) { - wm8960_set_pll(codec, - freq_in, freq_out); - break; - } else { - continue; - } - } - if (k != ARRAY_SIZE(bclk_divs)) - break; - } - if (j != ARRAY_SIZE(dac_divs)) - break; - } - - if (i == ARRAY_SIZE(sysclk_divs)) { - dev_err(codec->dev, "failed to configure clock\n"); - return -EINVAL; + freq_out = wm8960_configure_pll(codec, freq_in, &i, &j, &k); + if (freq_out < 0) { + dev_err(codec->dev, "failed to configure clock via PLL\n"); + return freq_out; } + wm8960_set_pll(codec, freq_in, freq_out); configure_clock: /* configure sysclk clock */ diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c index 90b2d418ef606247da30944b82190b92bb1e9db1..cf761e2d754692b4157eb41b1ee8544c8fde6879 100644 --- a/sound/soc/codecs/wm8978.c +++ b/sound/soc/codecs/wm8978.c @@ -1071,9 +1071,16 @@ static const struct i2c_device_id wm8978_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, wm8978_i2c_id); +static const struct of_device_id wm8978_of_match[] = { + { .compatible = "wlf,wm8978", }, + { } +}; +MODULE_DEVICE_TABLE(of, wm8978_of_match); + static struct i2c_driver wm8978_i2c_driver = { .driver = { .name = "wm8978", + .of_match_table = wm8978_of_match, }, .probe = wm8978_i2c_probe, .remove = wm8978_i2c_remove, diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index bbdb72f73df19ddf954daab19d3deb222208dc7c..20695b691aff7a8e34c255d92ace0283018b3b26 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -112,17 +112,22 @@ #define ADSP1_CLK_SEL_SHIFT 0 /* CLK_SEL_ENA */ #define ADSP1_CLK_SEL_WIDTH 3 /* CLK_SEL_ENA */ -#define ADSP2_CONTROL 0x0 -#define ADSP2_CLOCKING 0x1 -#define ADSP2_STATUS1 0x4 -#define ADSP2_WDMA_CONFIG_1 0x30 -#define ADSP2_WDMA_CONFIG_2 0x31 -#define ADSP2_RDMA_CONFIG_1 0x34 - -#define ADSP2_SCRATCH0 0x40 -#define ADSP2_SCRATCH1 0x41 -#define ADSP2_SCRATCH2 0x42 -#define ADSP2_SCRATCH3 0x43 +#define ADSP2_CONTROL 0x0 +#define ADSP2_CLOCKING 0x1 +#define ADSP2V2_CLOCKING 0x2 +#define ADSP2_STATUS1 0x4 +#define ADSP2_WDMA_CONFIG_1 0x30 +#define ADSP2_WDMA_CONFIG_2 0x31 +#define ADSP2V2_WDMA_CONFIG_2 0x32 +#define ADSP2_RDMA_CONFIG_1 0x34 + +#define ADSP2_SCRATCH0 0x40 +#define ADSP2_SCRATCH1 0x41 +#define ADSP2_SCRATCH2 0x42 +#define ADSP2_SCRATCH3 0x43 + +#define ADSP2V2_SCRATCH0_1 0x40 +#define ADSP2V2_SCRATCH2_3 0x42 /* * ADSP2 Control @@ -152,6 +157,17 @@ #define ADSP2_CLK_SEL_SHIFT 0 /* CLK_SEL_ENA */ #define ADSP2_CLK_SEL_WIDTH 3 /* CLK_SEL_ENA */ +/* + * ADSP2V2 clocking + */ +#define ADSP2V2_CLK_SEL_MASK 0x70000 /* CLK_SEL_ENA */ +#define ADSP2V2_CLK_SEL_SHIFT 16 /* CLK_SEL_ENA */ +#define ADSP2V2_CLK_SEL_WIDTH 3 /* CLK_SEL_ENA */ + +#define ADSP2V2_RATE_MASK 0x7800 /* DSP_RATE */ +#define ADSP2V2_RATE_SHIFT 11 /* DSP_RATE */ +#define ADSP2V2_RATE_WIDTH 4 /* DSP_RATE */ + /* * ADSP2 Status 1 */ @@ -160,6 +176,37 @@ #define ADSP2_RAM_RDY_SHIFT 0 #define ADSP2_RAM_RDY_WIDTH 1 +/* + * ADSP2 Lock support + */ +#define ADSP2_LOCK_CODE_0 0x5555 +#define ADSP2_LOCK_CODE_1 0xAAAA + +#define ADSP2_WATCHDOG 0x0A +#define ADSP2_BUS_ERR_ADDR 0x52 +#define ADSP2_REGION_LOCK_STATUS 0x64 +#define ADSP2_LOCK_REGION_1_LOCK_REGION_0 0x66 +#define ADSP2_LOCK_REGION_3_LOCK_REGION_2 0x68 +#define ADSP2_LOCK_REGION_5_LOCK_REGION_4 0x6A +#define ADSP2_LOCK_REGION_7_LOCK_REGION_6 0x6C +#define ADSP2_LOCK_REGION_9_LOCK_REGION_8 0x6E +#define ADSP2_LOCK_REGION_CTRL 0x7A +#define ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR 0x7C + +#define ADSP2_REGION_LOCK_ERR_MASK 0x8000 +#define ADSP2_SLAVE_ERR_MASK 0x4000 +#define ADSP2_WDT_TIMEOUT_STS_MASK 0x2000 +#define ADSP2_CTRL_ERR_PAUSE_ENA 0x0002 +#define ADSP2_CTRL_ERR_EINT 0x0001 + +#define ADSP2_BUS_ERR_ADDR_MASK 0x00FFFFFF +#define ADSP2_XMEM_ERR_ADDR_MASK 0x0000FFFF +#define ADSP2_PMEM_ERR_ADDR_MASK 0x7FFF0000 +#define ADSP2_PMEM_ERR_ADDR_SHIFT 16 +#define ADSP2_WDT_ENA_MASK 0xFFFFFFFD + +#define ADSP2_LOCK_REGION_SHIFT 16 + #define ADSP_MAX_STD_CTRL_SIZE 512 #define WM_ADSP_ACKED_CTL_TIMEOUT_MS 100 @@ -683,6 +730,9 @@ static const struct soc_enum wm_adsp_fw_enum[] = { SOC_ENUM_SINGLE(0, 1, ARRAY_SIZE(wm_adsp_fw_text), wm_adsp_fw_text), SOC_ENUM_SINGLE(0, 2, ARRAY_SIZE(wm_adsp_fw_text), wm_adsp_fw_text), SOC_ENUM_SINGLE(0, 3, ARRAY_SIZE(wm_adsp_fw_text), wm_adsp_fw_text), + SOC_ENUM_SINGLE(0, 4, ARRAY_SIZE(wm_adsp_fw_text), wm_adsp_fw_text), + SOC_ENUM_SINGLE(0, 5, ARRAY_SIZE(wm_adsp_fw_text), wm_adsp_fw_text), + SOC_ENUM_SINGLE(0, 6, ARRAY_SIZE(wm_adsp_fw_text), wm_adsp_fw_text), }; const struct snd_kcontrol_new wm_adsp_fw_controls[] = { @@ -694,6 +744,12 @@ const struct snd_kcontrol_new wm_adsp_fw_controls[] = { wm_adsp_fw_get, wm_adsp_fw_put), SOC_ENUM_EXT("DSP4 Firmware", wm_adsp_fw_enum[3], wm_adsp_fw_get, wm_adsp_fw_put), + SOC_ENUM_EXT("DSP5 Firmware", wm_adsp_fw_enum[4], + wm_adsp_fw_get, wm_adsp_fw_put), + SOC_ENUM_EXT("DSP6 Firmware", wm_adsp_fw_enum[5], + wm_adsp_fw_get, wm_adsp_fw_put), + SOC_ENUM_EXT("DSP7 Firmware", wm_adsp_fw_enum[6], + wm_adsp_fw_get, wm_adsp_fw_put), }; EXPORT_SYMBOL_GPL(wm_adsp_fw_controls); @@ -750,6 +806,29 @@ static void wm_adsp2_show_fw_status(struct wm_adsp *dsp) be16_to_cpu(scratch[3])); } +static void wm_adsp2v2_show_fw_status(struct wm_adsp *dsp) +{ + u32 scratch[2]; + int ret; + + ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1, + scratch, sizeof(scratch)); + + if (ret) { + adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret); + return; + } + + scratch[0] = be32_to_cpu(scratch[0]); + scratch[1] = be32_to_cpu(scratch[1]); + + adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n", + scratch[0] & 0xFFFF, + scratch[0] >> 16, + scratch[1] & 0xFFFF, + scratch[1] >> 16); +} + static inline struct wm_coeff_ctl *bytes_ext_to_ctl(struct soc_bytes_ext *ext) { return container_of(ext, struct wm_coeff_ctl, bytes_ext); @@ -2435,10 +2514,17 @@ static int wm_adsp2_ena(struct wm_adsp *dsp) unsigned int val; int ret, count; - ret = regmap_update_bits_async(dsp->regmap, dsp->base + ADSP2_CONTROL, - ADSP2_SYS_ENA, ADSP2_SYS_ENA); - if (ret != 0) - return ret; + switch (dsp->rev) { + case 0: + ret = regmap_update_bits_async(dsp->regmap, + dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, ADSP2_SYS_ENA); + if (ret != 0) + return ret; + break; + default: + break; + } /* Wait for the RAM to start, should be near instantaneous */ for (count = 0; count < 10; ++count) { @@ -2497,11 +2583,17 @@ static void wm_adsp2_boot_work(struct work_struct *work) if (ret != 0) goto err_ena; - /* Turn DSP back off until we are ready to run */ - ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, - ADSP2_SYS_ENA, 0); - if (ret != 0) - goto err_ena; + switch (dsp->rev) { + case 0: + /* Turn DSP back off until we are ready to run */ + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, 0); + if (ret != 0) + goto err_ena; + break; + default: + break; + } dsp->booted = true; @@ -2523,12 +2615,21 @@ static void wm_adsp2_set_dspclk(struct wm_adsp *dsp, unsigned int freq) { int ret; - ret = regmap_update_bits_async(dsp->regmap, - dsp->base + ADSP2_CLOCKING, - ADSP2_CLK_SEL_MASK, - freq << ADSP2_CLK_SEL_SHIFT); - if (ret != 0) - adsp_err(dsp, "Failed to set clock rate: %d\n", ret); + switch (dsp->rev) { + case 0: + ret = regmap_update_bits_async(dsp->regmap, + dsp->base + ADSP2_CLOCKING, + ADSP2_CLK_SEL_MASK, + freq << ADSP2_CLK_SEL_SHIFT); + if (ret) { + adsp_err(dsp, "Failed to set clock rate: %d\n", ret); + return; + } + break; + default: + /* clock is handled by parent codec driver */ + break; + } } int wm_adsp2_preloader_get(struct snd_kcontrol *kcontrol, @@ -2568,6 +2669,18 @@ int wm_adsp2_preloader_put(struct snd_kcontrol *kcontrol, } EXPORT_SYMBOL_GPL(wm_adsp2_preloader_put); +static void wm_adsp_stop_watchdog(struct wm_adsp *dsp) +{ + switch (dsp->rev) { + case 0: + case 1: + return; + default: + regmap_update_bits(dsp->regmap, dsp->base + ADSP2_WATCHDOG, + ADSP2_WDT_ENA_MASK, 0); + } +} + int wm_adsp2_early_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event, unsigned int freq) @@ -2640,6 +2753,8 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, if (ret != 0) goto err; + wm_adsp2_lock(dsp, dsp->lock_regions); + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, ADSP2_CORE_ENA | ADSP2_START, @@ -2663,23 +2778,49 @@ int wm_adsp2_event(struct snd_soc_dapm_widget *w, /* Tell the firmware to cleanup */ wm_adsp_signal_event_controls(dsp, WM_ADSP_FW_EVENT_SHUTDOWN); + wm_adsp_stop_watchdog(dsp); + /* Log firmware state, it can be useful for analysis */ - wm_adsp2_show_fw_status(dsp); + switch (dsp->rev) { + case 0: + wm_adsp2_show_fw_status(dsp); + break; + default: + wm_adsp2v2_show_fw_status(dsp); + break; + } mutex_lock(&dsp->pwr_lock); dsp->running = false; - regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + regmap_update_bits(dsp->regmap, + dsp->base + ADSP2_CONTROL, ADSP2_CORE_ENA | ADSP2_START, 0); /* Make sure DMAs are quiesced */ - regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0); - regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0); - regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_2, 0); - - regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, - ADSP2_SYS_ENA, 0); + switch (dsp->rev) { + case 0: + regmap_write(dsp->regmap, + dsp->base + ADSP2_RDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, + dsp->base + ADSP2_WDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, + dsp->base + ADSP2_WDMA_CONFIG_2, 0); + + regmap_update_bits(dsp->regmap, + dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, 0); + break; + default: + regmap_write(dsp->regmap, + dsp->base + ADSP2_RDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, + dsp->base + ADSP2_WDMA_CONFIG_1, 0); + regmap_write(dsp->regmap, + dsp->base + ADSP2V2_WDMA_CONFIG_2, 0); + break; + } if (wm_adsp_fw[dsp->fw].num_caps != 0) wm_adsp_buffer_free(dsp); @@ -2732,15 +2873,22 @@ int wm_adsp2_init(struct wm_adsp *dsp) { int ret; - /* - * Disable the DSP memory by default when in reset for a small - * power saving. - */ - ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, - ADSP2_MEM_ENA, 0); - if (ret != 0) { - adsp_err(dsp, "Failed to clear memory retention: %d\n", ret); - return ret; + switch (dsp->rev) { + case 0: + /* + * Disable the DSP memory by default when in reset for a small + * power saving. + */ + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_MEM_ENA, 0); + if (ret) { + adsp_err(dsp, + "Failed to clear memory retention: %d\n", ret); + return ret; + } + break; + default: + break; } INIT_LIST_HEAD(&dsp->alg_regions); @@ -3523,4 +3671,94 @@ int wm_adsp_compr_copy(struct snd_compr_stream *stream, char __user *buf, } EXPORT_SYMBOL_GPL(wm_adsp_compr_copy); +int wm_adsp2_lock(struct wm_adsp *dsp, unsigned int lock_regions) +{ + struct regmap *regmap = dsp->regmap; + unsigned int code0, code1, lock_reg; + + if (!(lock_regions & WM_ADSP2_REGION_ALL)) + return 0; + + lock_regions &= WM_ADSP2_REGION_ALL; + lock_reg = dsp->base + ADSP2_LOCK_REGION_1_LOCK_REGION_0; + + while (lock_regions) { + code0 = code1 = 0; + if (lock_regions & BIT(0)) { + code0 = ADSP2_LOCK_CODE_0; + code1 = ADSP2_LOCK_CODE_1; + } + if (lock_regions & BIT(1)) { + code0 |= ADSP2_LOCK_CODE_0 << ADSP2_LOCK_REGION_SHIFT; + code1 |= ADSP2_LOCK_CODE_1 << ADSP2_LOCK_REGION_SHIFT; + } + regmap_write(regmap, lock_reg, code0); + regmap_write(regmap, lock_reg, code1); + lock_regions >>= 2; + lock_reg += 2; + } + + return 0; +} +EXPORT_SYMBOL_GPL(wm_adsp2_lock); + +irqreturn_t wm_adsp2_bus_error(struct wm_adsp *dsp) +{ + unsigned int val; + struct regmap *regmap = dsp->regmap; + int ret = 0; + + ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val); + if (ret) { + adsp_err(dsp, + "Failed to read Region Lock Ctrl register: %d\n", ret); + return IRQ_HANDLED; + } + + if (val & ADSP2_WDT_TIMEOUT_STS_MASK) { + adsp_err(dsp, "watchdog timeout error\n"); + wm_adsp_stop_watchdog(dsp); + } + + if (val & (ADSP2_SLAVE_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) { + if (val & ADSP2_SLAVE_ERR_MASK) + adsp_err(dsp, "bus error: slave error\n"); + else + adsp_err(dsp, "bus error: region lock error\n"); + + ret = regmap_read(regmap, dsp->base + ADSP2_BUS_ERR_ADDR, &val); + if (ret) { + adsp_err(dsp, + "Failed to read Bus Err Addr register: %d\n", + ret); + return IRQ_HANDLED; + } + + adsp_err(dsp, "bus error address = 0x%x\n", + val & ADSP2_BUS_ERR_ADDR_MASK); + + ret = regmap_read(regmap, + dsp->base + ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR, + &val); + if (ret) { + adsp_err(dsp, + "Failed to read Pmem Xmem Err Addr register: %d\n", + ret); + return IRQ_HANDLED; + } + + adsp_err(dsp, "xmem error address = 0x%x\n", + val & ADSP2_XMEM_ERR_ADDR_MASK); + adsp_err(dsp, "pmem error address = 0x%x\n", + (val & ADSP2_PMEM_ERR_ADDR_MASK) >> + ADSP2_PMEM_ERR_ADDR_SHIFT); + } + + regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, + ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(wm_adsp2_bus_error); + MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index 3706b11053a331856e7049bc880870f3849fabc8..41cc11c19b837cbcdd815c18794d025e2156abfd 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -23,6 +23,23 @@ #define WM_ADSP_COMPR_OK 0 #define WM_ADSP_COMPR_VOICE_TRIGGER 1 +#define WM_ADSP2_REGION_0 BIT(0) +#define WM_ADSP2_REGION_1 BIT(1) +#define WM_ADSP2_REGION_2 BIT(2) +#define WM_ADSP2_REGION_3 BIT(3) +#define WM_ADSP2_REGION_4 BIT(4) +#define WM_ADSP2_REGION_5 BIT(5) +#define WM_ADSP2_REGION_6 BIT(6) +#define WM_ADSP2_REGION_7 BIT(7) +#define WM_ADSP2_REGION_8 BIT(8) +#define WM_ADSP2_REGION_9 BIT(9) +#define WM_ADSP2_REGION_1_9 (WM_ADSP2_REGION_1 | \ + WM_ADSP2_REGION_2 | WM_ADSP2_REGION_3 | \ + WM_ADSP2_REGION_4 | WM_ADSP2_REGION_5 | \ + WM_ADSP2_REGION_6 | WM_ADSP2_REGION_7 | \ + WM_ADSP2_REGION_8 | WM_ADSP2_REGION_9) +#define WM_ADSP2_REGION_ALL (WM_ADSP2_REGION_0 | WM_ADSP2_REGION_1_9) + struct wm_adsp_region { int type; unsigned int base; @@ -40,6 +57,7 @@ struct wm_adsp_compr_buf; struct wm_adsp { const char *part; + int rev; int num; int type; struct device *dev; @@ -75,6 +93,8 @@ struct wm_adsp { struct mutex pwr_lock; + unsigned int lock_regions; + #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_root; char *wmfw_file_name; @@ -113,6 +133,10 @@ int wm_adsp1_event(struct snd_soc_dapm_widget *w, int wm_adsp2_early_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event, unsigned int freq); + +int wm_adsp2_lock(struct wm_adsp *adsp, unsigned int regions); +irqreturn_t wm_adsp2_bus_error(struct wm_adsp *adsp); + int wm_adsp2_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event); diff --git a/sound/soc/dwc/Kconfig b/sound/soc/dwc/Kconfig index c297efe43861821e7313790d12c6613812e2b80c..c6fd95fa5ca69e1fda59e3a903857fe1c2137cad 100644 --- a/sound/soc/dwc/Kconfig +++ b/sound/soc/dwc/Kconfig @@ -8,10 +8,10 @@ config SND_DESIGNWARE_I2S maximum of 8 channels each for play and record. config SND_DESIGNWARE_PCM - tristate "PCM PIO extension for I2S driver" + bool "PCM PIO extension for I2S driver" depends on SND_DESIGNWARE_I2S help - Say Y, M or N if you want to add a custom ALSA extension that registers + Say Y or N if you want to add a custom ALSA extension that registers a PCM and uses PIO to transfer data. This functionality is specially suited for I2S devices that don't have diff --git a/sound/soc/dwc/Makefile b/sound/soc/dwc/Makefile index 38f1ca31c5fa021c717775070bb140d42759e165..3e24c0ff95fb8d3b4d7ca580f91ee8ce92e35faa 100644 --- a/sound/soc/dwc/Makefile +++ b/sound/soc/dwc/Makefile @@ -1,5 +1,5 @@ # SYNOPSYS Platform Support obj-$(CONFIG_SND_DESIGNWARE_I2S) += designware_i2s.o -ifdef CONFIG_SND_DESIGNWARE_PCM -obj-$(CONFIG_SND_DESIGNWARE_I2S) += designware_pcm.o -endif + +designware_i2s-y := dwc-i2s.o +designware_i2s-$(CONFIG_SND_DESIGNWARE_PCM) += dwc-pcm.o diff --git a/sound/soc/dwc/designware_pcm.c b/sound/soc/dwc/designware_pcm.c deleted file mode 100644 index 459ec861e6b6c84c04a7597dde059131523d6654..0000000000000000000000000000000000000000 --- a/sound/soc/dwc/designware_pcm.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * ALSA SoC Synopsys PIO PCM for I2S driver - * - * sound/soc/dwc/designware_pcm.c - * - * Copyright (C) 2016 Synopsys - * Jose Abreu - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include -#include -#include -#include "local.h" - -#define BUFFER_BYTES_MAX (3 * 2 * 8 * PERIOD_BYTES_MIN) -#define PERIOD_BYTES_MIN 4096 -#define PERIODS_MIN 2 - -#define dw_pcm_tx_fn(sample_bits) \ -static unsigned int dw_pcm_tx_##sample_bits(struct dw_i2s_dev *dev, \ - struct snd_pcm_runtime *runtime, unsigned int tx_ptr, \ - bool *period_elapsed) \ -{ \ - const u##sample_bits (*p)[2] = (void *)runtime->dma_area; \ - unsigned int period_pos = tx_ptr % runtime->period_size; \ - int i; \ -\ - for (i = 0; i < dev->fifo_th; i++) { \ - iowrite32(p[tx_ptr][0], dev->i2s_base + LRBR_LTHR(0)); \ - iowrite32(p[tx_ptr][1], dev->i2s_base + RRBR_RTHR(0)); \ - period_pos++; \ - if (++tx_ptr >= runtime->buffer_size) \ - tx_ptr = 0; \ - } \ - *period_elapsed = period_pos >= runtime->period_size; \ - return tx_ptr; \ -} - -#define dw_pcm_rx_fn(sample_bits) \ -static unsigned int dw_pcm_rx_##sample_bits(struct dw_i2s_dev *dev, \ - struct snd_pcm_runtime *runtime, unsigned int rx_ptr, \ - bool *period_elapsed) \ -{ \ - u##sample_bits (*p)[2] = (void *)runtime->dma_area; \ - unsigned int period_pos = rx_ptr % runtime->period_size; \ - int i; \ -\ - for (i = 0; i < dev->fifo_th; i++) { \ - p[rx_ptr][0] = ioread32(dev->i2s_base + LRBR_LTHR(0)); \ - p[rx_ptr][1] = ioread32(dev->i2s_base + RRBR_RTHR(0)); \ - period_pos++; \ - if (++rx_ptr >= runtime->buffer_size) \ - rx_ptr = 0; \ - } \ - *period_elapsed = period_pos >= runtime->period_size; \ - return rx_ptr; \ -} - -dw_pcm_tx_fn(16); -dw_pcm_tx_fn(32); -dw_pcm_rx_fn(16); -dw_pcm_rx_fn(32); - -#undef dw_pcm_tx_fn -#undef dw_pcm_rx_fn - -static const struct snd_pcm_hardware dw_pcm_hardware = { - .info = SNDRV_PCM_INFO_INTERLEAVED | - SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_BLOCK_TRANSFER, - .rates = SNDRV_PCM_RATE_32000 | - SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000, - .rate_min = 32000, - .rate_max = 48000, - .formats = SNDRV_PCM_FMTBIT_S16_LE | - SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S32_LE, - .channels_min = 2, - .channels_max = 2, - .buffer_bytes_max = BUFFER_BYTES_MAX, - .period_bytes_min = PERIOD_BYTES_MIN, - .period_bytes_max = BUFFER_BYTES_MAX / PERIODS_MIN, - .periods_min = PERIODS_MIN, - .periods_max = BUFFER_BYTES_MAX / PERIOD_BYTES_MIN, - .fifo_size = 16, -}; - -static void dw_pcm_transfer(struct dw_i2s_dev *dev, bool push) -{ - struct snd_pcm_substream *substream; - bool active, period_elapsed; - - rcu_read_lock(); - if (push) - substream = rcu_dereference(dev->tx_substream); - else - substream = rcu_dereference(dev->rx_substream); - active = substream && snd_pcm_running(substream); - if (active) { - unsigned int ptr; - unsigned int new_ptr; - - if (push) { - ptr = READ_ONCE(dev->tx_ptr); - new_ptr = dev->tx_fn(dev, substream->runtime, ptr, - &period_elapsed); - cmpxchg(&dev->tx_ptr, ptr, new_ptr); - } else { - ptr = READ_ONCE(dev->rx_ptr); - new_ptr = dev->rx_fn(dev, substream->runtime, ptr, - &period_elapsed); - cmpxchg(&dev->rx_ptr, ptr, new_ptr); - } - - if (period_elapsed) - snd_pcm_period_elapsed(substream); - } - rcu_read_unlock(); -} - -void dw_pcm_push_tx(struct dw_i2s_dev *dev) -{ - dw_pcm_transfer(dev, true); -} -EXPORT_SYMBOL_GPL(dw_pcm_push_tx); - -void dw_pcm_pop_rx(struct dw_i2s_dev *dev) -{ - dw_pcm_transfer(dev, false); -} -EXPORT_SYMBOL_GPL(dw_pcm_pop_rx); - -static int dw_pcm_open(struct snd_pcm_substream *substream) -{ - struct snd_pcm_runtime *runtime = substream->runtime; - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(rtd->cpu_dai); - - snd_soc_set_runtime_hwparams(substream, &dw_pcm_hardware); - snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); - runtime->private_data = dev; - - return 0; -} - -static int dw_pcm_close(struct snd_pcm_substream *substream) -{ - synchronize_rcu(); - return 0; -} - -static int dw_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *hw_params) -{ - struct snd_pcm_runtime *runtime = substream->runtime; - struct dw_i2s_dev *dev = runtime->private_data; - int ret; - - switch (params_channels(hw_params)) { - case 2: - break; - default: - dev_err(dev->dev, "invalid channels number\n"); - return -EINVAL; - } - - switch (params_format(hw_params)) { - case SNDRV_PCM_FORMAT_S16_LE: - dev->tx_fn = dw_pcm_tx_16; - dev->rx_fn = dw_pcm_rx_16; - break; - case SNDRV_PCM_FORMAT_S24_LE: - case SNDRV_PCM_FORMAT_S32_LE: - dev->tx_fn = dw_pcm_tx_32; - dev->rx_fn = dw_pcm_rx_32; - break; - default: - dev_err(dev->dev, "invalid format\n"); - return -EINVAL; - } - - ret = snd_pcm_lib_malloc_pages(substream, - params_buffer_bytes(hw_params)); - if (ret < 0) - return ret; - else - return 0; -} - -static int dw_pcm_hw_free(struct snd_pcm_substream *substream) -{ - return snd_pcm_lib_free_pages(substream); -} - -static int dw_pcm_trigger(struct snd_pcm_substream *substream, int cmd) -{ - struct snd_pcm_runtime *runtime = substream->runtime; - struct dw_i2s_dev *dev = runtime->private_data; - int ret = 0; - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - WRITE_ONCE(dev->tx_ptr, 0); - rcu_assign_pointer(dev->tx_substream, substream); - } else { - WRITE_ONCE(dev->rx_ptr, 0); - rcu_assign_pointer(dev->rx_substream, substream); - } - break; - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - rcu_assign_pointer(dev->tx_substream, NULL); - else - rcu_assign_pointer(dev->rx_substream, NULL); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static snd_pcm_uframes_t dw_pcm_pointer(struct snd_pcm_substream *substream) -{ - struct snd_pcm_runtime *runtime = substream->runtime; - struct dw_i2s_dev *dev = runtime->private_data; - snd_pcm_uframes_t pos; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - pos = READ_ONCE(dev->tx_ptr); - else - pos = READ_ONCE(dev->rx_ptr); - - return pos < runtime->buffer_size ? pos : 0; -} - -static int dw_pcm_new(struct snd_soc_pcm_runtime *rtd) -{ - size_t size = dw_pcm_hardware.buffer_bytes_max; - - return snd_pcm_lib_preallocate_pages_for_all(rtd->pcm, - SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_KERNEL), size, size); -} - -static void dw_pcm_free(struct snd_pcm *pcm) -{ - snd_pcm_lib_preallocate_free_for_all(pcm); -} - -static const struct snd_pcm_ops dw_pcm_ops = { - .open = dw_pcm_open, - .close = dw_pcm_close, - .ioctl = snd_pcm_lib_ioctl, - .hw_params = dw_pcm_hw_params, - .hw_free = dw_pcm_hw_free, - .trigger = dw_pcm_trigger, - .pointer = dw_pcm_pointer, -}; - -static const struct snd_soc_platform_driver dw_pcm_platform = { - .pcm_new = dw_pcm_new, - .pcm_free = dw_pcm_free, - .ops = &dw_pcm_ops, -}; - -int dw_pcm_register(struct platform_device *pdev) -{ - return devm_snd_soc_register_platform(&pdev->dev, &dw_pcm_platform); -} -EXPORT_SYMBOL_GPL(dw_pcm_register); diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/dwc-i2s.c similarity index 100% rename from sound/soc/dwc/designware_i2s.c rename to sound/soc/dwc/dwc-i2s.c diff --git a/sound/soc/dwc/dwc-pcm.c b/sound/soc/dwc/dwc-pcm.c new file mode 100644 index 0000000000000000000000000000000000000000..406fd867117b84dcdbefa4381b511338e6e12285 --- /dev/null +++ b/sound/soc/dwc/dwc-pcm.c @@ -0,0 +1,281 @@ +/* + * ALSA SoC Synopsys PIO PCM for I2S driver + * + * sound/soc/dwc/designware_pcm.c + * + * Copyright (C) 2016 Synopsys + * Jose Abreu + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include "local.h" + +#define BUFFER_BYTES_MAX (3 * 2 * 8 * PERIOD_BYTES_MIN) +#define PERIOD_BYTES_MIN 4096 +#define PERIODS_MIN 2 + +#define dw_pcm_tx_fn(sample_bits) \ +static unsigned int dw_pcm_tx_##sample_bits(struct dw_i2s_dev *dev, \ + struct snd_pcm_runtime *runtime, unsigned int tx_ptr, \ + bool *period_elapsed) \ +{ \ + const u##sample_bits (*p)[2] = (void *)runtime->dma_area; \ + unsigned int period_pos = tx_ptr % runtime->period_size; \ + int i; \ +\ + for (i = 0; i < dev->fifo_th; i++) { \ + iowrite32(p[tx_ptr][0], dev->i2s_base + LRBR_LTHR(0)); \ + iowrite32(p[tx_ptr][1], dev->i2s_base + RRBR_RTHR(0)); \ + period_pos++; \ + if (++tx_ptr >= runtime->buffer_size) \ + tx_ptr = 0; \ + } \ + *period_elapsed = period_pos >= runtime->period_size; \ + return tx_ptr; \ +} + +#define dw_pcm_rx_fn(sample_bits) \ +static unsigned int dw_pcm_rx_##sample_bits(struct dw_i2s_dev *dev, \ + struct snd_pcm_runtime *runtime, unsigned int rx_ptr, \ + bool *period_elapsed) \ +{ \ + u##sample_bits (*p)[2] = (void *)runtime->dma_area; \ + unsigned int period_pos = rx_ptr % runtime->period_size; \ + int i; \ +\ + for (i = 0; i < dev->fifo_th; i++) { \ + p[rx_ptr][0] = ioread32(dev->i2s_base + LRBR_LTHR(0)); \ + p[rx_ptr][1] = ioread32(dev->i2s_base + RRBR_RTHR(0)); \ + period_pos++; \ + if (++rx_ptr >= runtime->buffer_size) \ + rx_ptr = 0; \ + } \ + *period_elapsed = period_pos >= runtime->period_size; \ + return rx_ptr; \ +} + +dw_pcm_tx_fn(16); +dw_pcm_tx_fn(32); +dw_pcm_rx_fn(16); +dw_pcm_rx_fn(32); + +#undef dw_pcm_tx_fn +#undef dw_pcm_rx_fn + +static const struct snd_pcm_hardware dw_pcm_hardware = { + .info = SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_BLOCK_TRANSFER, + .rates = SNDRV_PCM_RATE_32000 | + SNDRV_PCM_RATE_44100 | + SNDRV_PCM_RATE_48000, + .rate_min = 32000, + .rate_max = 48000, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S24_LE | + SNDRV_PCM_FMTBIT_S32_LE, + .channels_min = 2, + .channels_max = 2, + .buffer_bytes_max = BUFFER_BYTES_MAX, + .period_bytes_min = PERIOD_BYTES_MIN, + .period_bytes_max = BUFFER_BYTES_MAX / PERIODS_MIN, + .periods_min = PERIODS_MIN, + .periods_max = BUFFER_BYTES_MAX / PERIOD_BYTES_MIN, + .fifo_size = 16, +}; + +static void dw_pcm_transfer(struct dw_i2s_dev *dev, bool push) +{ + struct snd_pcm_substream *substream; + bool active, period_elapsed; + + rcu_read_lock(); + if (push) + substream = rcu_dereference(dev->tx_substream); + else + substream = rcu_dereference(dev->rx_substream); + active = substream && snd_pcm_running(substream); + if (active) { + unsigned int ptr; + unsigned int new_ptr; + + if (push) { + ptr = READ_ONCE(dev->tx_ptr); + new_ptr = dev->tx_fn(dev, substream->runtime, ptr, + &period_elapsed); + cmpxchg(&dev->tx_ptr, ptr, new_ptr); + } else { + ptr = READ_ONCE(dev->rx_ptr); + new_ptr = dev->rx_fn(dev, substream->runtime, ptr, + &period_elapsed); + cmpxchg(&dev->rx_ptr, ptr, new_ptr); + } + + if (period_elapsed) + snd_pcm_period_elapsed(substream); + } + rcu_read_unlock(); +} + +void dw_pcm_push_tx(struct dw_i2s_dev *dev) +{ + dw_pcm_transfer(dev, true); +} + +void dw_pcm_pop_rx(struct dw_i2s_dev *dev) +{ + dw_pcm_transfer(dev, false); +} + +static int dw_pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(rtd->cpu_dai); + + snd_soc_set_runtime_hwparams(substream, &dw_pcm_hardware); + snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); + runtime->private_data = dev; + + return 0; +} + +static int dw_pcm_close(struct snd_pcm_substream *substream) +{ + synchronize_rcu(); + return 0; +} + +static int dw_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct dw_i2s_dev *dev = runtime->private_data; + int ret; + + switch (params_channels(hw_params)) { + case 2: + break; + default: + dev_err(dev->dev, "invalid channels number\n"); + return -EINVAL; + } + + switch (params_format(hw_params)) { + case SNDRV_PCM_FORMAT_S16_LE: + dev->tx_fn = dw_pcm_tx_16; + dev->rx_fn = dw_pcm_rx_16; + break; + case SNDRV_PCM_FORMAT_S24_LE: + case SNDRV_PCM_FORMAT_S32_LE: + dev->tx_fn = dw_pcm_tx_32; + dev->rx_fn = dw_pcm_rx_32; + break; + default: + dev_err(dev->dev, "invalid format\n"); + return -EINVAL; + } + + ret = snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(hw_params)); + if (ret < 0) + return ret; + else + return 0; +} + +static int dw_pcm_hw_free(struct snd_pcm_substream *substream) +{ + return snd_pcm_lib_free_pages(substream); +} + +static int dw_pcm_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct dw_i2s_dev *dev = runtime->private_data; + int ret = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + WRITE_ONCE(dev->tx_ptr, 0); + rcu_assign_pointer(dev->tx_substream, substream); + } else { + WRITE_ONCE(dev->rx_ptr, 0); + rcu_assign_pointer(dev->rx_substream, substream); + } + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + rcu_assign_pointer(dev->tx_substream, NULL); + else + rcu_assign_pointer(dev->rx_substream, NULL); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static snd_pcm_uframes_t dw_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct dw_i2s_dev *dev = runtime->private_data; + snd_pcm_uframes_t pos; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + pos = READ_ONCE(dev->tx_ptr); + else + pos = READ_ONCE(dev->rx_ptr); + + return pos < runtime->buffer_size ? pos : 0; +} + +static int dw_pcm_new(struct snd_soc_pcm_runtime *rtd) +{ + size_t size = dw_pcm_hardware.buffer_bytes_max; + + return snd_pcm_lib_preallocate_pages_for_all(rtd->pcm, + SNDRV_DMA_TYPE_CONTINUOUS, + snd_dma_continuous_data(GFP_KERNEL), size, size); +} + +static void dw_pcm_free(struct snd_pcm *pcm) +{ + snd_pcm_lib_preallocate_free_for_all(pcm); +} + +static const struct snd_pcm_ops dw_pcm_ops = { + .open = dw_pcm_open, + .close = dw_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = dw_pcm_hw_params, + .hw_free = dw_pcm_hw_free, + .trigger = dw_pcm_trigger, + .pointer = dw_pcm_pointer, +}; + +static const struct snd_soc_platform_driver dw_pcm_platform = { + .pcm_new = dw_pcm_new, + .pcm_free = dw_pcm_free, + .ops = &dw_pcm_ops, +}; + +int dw_pcm_register(struct platform_device *pdev) +{ + return devm_snd_soc_register_platform(&pdev->dev, &dw_pcm_platform); +} diff --git a/sound/soc/fsl/eukrea-tlv320.c b/sound/soc/fsl/eukrea-tlv320.c index 883087f2b092b1028e279b3cf54ce87ae46954dd..84ef6385736cd5df845aa2d675caaac67ddef1de 100644 --- a/sound/soc/fsl/eukrea-tlv320.c +++ b/sound/soc/fsl/eukrea-tlv320.c @@ -64,7 +64,7 @@ static int eukrea_tlv320_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops eukrea_tlv320_snd_ops = { +static const struct snd_soc_ops eukrea_tlv320_snd_ops = { .hw_params = eukrea_tlv320_hw_params, }; diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index dc30d780f8742437d9fd6ef3db7cd0ada76c9013..282d841840b1cfbea8d1ca97c3b764d21101d15c 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -76,7 +76,7 @@ static int fsl_asrc_dma_prepare_and_submit(struct snd_pcm_substream *substream) pair->dma_chan[!dir], runtime->dma_addr, snd_pcm_lib_buffer_bytes(substream), snd_pcm_lib_period_bytes(substream), - dir == OUT ? DMA_TO_DEVICE : DMA_FROM_DEVICE, flags); + dir == OUT ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, flags); if (!pair->desc[!dir]) { dev_err(dev, "failed to prepare slave DMA for Front-End\n"); return -ENOMEM; diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 38bfd46f4ad8c3898d376d94d9f5bd010e285139..809a069d490b1a2ba91eaaca599b387f0d496b40 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -19,7 +19,6 @@ #include "fsl_esai.h" #include "imx-pcm.h" -#define FSL_ESAI_RATES SNDRV_PCM_RATE_8000_192000 #define FSL_ESAI_FORMATS (SNDRV_PCM_FMTBIT_S8 | \ SNDRV_PCM_FMTBIT_S16_LE | \ SNDRV_PCM_FMTBIT_S20_3LE | \ @@ -647,14 +646,14 @@ static struct snd_soc_dai_driver fsl_esai_dai = { .stream_name = "CPU-Playback", .channels_min = 1, .channels_max = 12, - .rates = FSL_ESAI_RATES, + .rates = SNDRV_PCM_RATE_8000_192000, .formats = FSL_ESAI_FORMATS, }, .capture = { .stream_name = "CPU-Capture", .channels_min = 1, .channels_max = 8, - .rates = FSL_ESAI_RATES, + .rates = SNDRV_PCM_RATE_8000_192000, .formats = FSL_ESAI_FORMATS, }, .ops = &fsl_esai_dai_ops, diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index fde08660b63be270f95436424e32e443bdb8a0de..173cb8496641353b63177acba147278541c36370 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -54,16 +55,6 @@ #include "fsl_ssi.h" #include "imx-pcm.h" -/** - * FSLSSI_I2S_RATES: sample rates supported by the I2S - * - * This driver currently only supports the SSI running in I2S slave mode, - * which means the codec determines the sample rate. Therefore, we tell - * ALSA that we support all rates and let the codec driver decide what rates - * are really supported. - */ -#define FSLSSI_I2S_RATES SNDRV_PCM_RATE_CONTINUOUS - /** * FSLSSI_I2S_FORMATS: audio formats supported by the SSI * @@ -1212,14 +1203,14 @@ static struct snd_soc_dai_driver fsl_ssi_dai_template = { .stream_name = "CPU-Playback", .channels_min = 1, .channels_max = 32, - .rates = FSLSSI_I2S_RATES, + .rates = SNDRV_PCM_RATE_CONTINUOUS, .formats = FSLSSI_I2S_FORMATS, }, .capture = { .stream_name = "CPU-Capture", .channels_min = 1, .channels_max = 32, - .rates = FSLSSI_I2S_RATES, + .rates = SNDRV_PCM_RATE_CONTINUOUS, .formats = FSLSSI_I2S_FORMATS, }, .ops = &fsl_ssi_dai_ops, @@ -1325,14 +1316,10 @@ static struct snd_ac97_bus_ops fsl_ssi_ac97_ops = { */ static void make_lowercase(char *s) { - char *p = s; - char c; - - while ((c = *p)) { - if ((c >= 'A') && (c <= 'Z')) - *p = c + ('a' - 'A'); - p++; - } + if (!s) + return; + for (; *s; s++) + *s = tolower(*s); } static int fsl_ssi_imx_probe(struct platform_device *pdev, diff --git a/sound/soc/fsl/imx-mc13783.c b/sound/soc/fsl/imx-mc13783.c index bb0459018b45faa695c557371887d57d3fce4005..9d19b808f634b81cb0f82e0125077733ce2262b7 100644 --- a/sound/soc/fsl/imx-mc13783.c +++ b/sound/soc/fsl/imx-mc13783.c @@ -48,7 +48,7 @@ static int imx_mc13783_hifi_hw_params(struct snd_pcm_substream *substream, return snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 16); } -static struct snd_soc_ops imx_mc13783_hifi_ops = { +static const struct snd_soc_ops imx_mc13783_hifi_ops = { .hw_params = imx_mc13783_hifi_hw_params, }; diff --git a/sound/soc/fsl/imx-pcm-dma.c b/sound/soc/fsl/imx-pcm-dma.c index f3d3d1ffa84e4257029495785ca88e72cb013533..314814ddd2b0a406799058df39611c1dafd8935e 100644 --- a/sound/soc/fsl/imx-pcm-dma.c +++ b/sound/soc/fsl/imx-pcm-dma.c @@ -33,48 +33,20 @@ static bool filter(struct dma_chan *chan, void *param) return true; } -static const struct snd_pcm_hardware imx_pcm_hardware = { - .info = SNDRV_PCM_INFO_INTERLEAVED | - SNDRV_PCM_INFO_BLOCK_TRANSFER | - SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE | - SNDRV_PCM_INFO_RESUME, - .buffer_bytes_max = IMX_DEFAULT_DMABUF_SIZE, - .period_bytes_min = 128, - .period_bytes_max = 65535, /* Limited by SDMA engine */ - .periods_min = 2, - .periods_max = 255, - .fifo_size = 0, -}; - static const struct snd_dmaengine_pcm_config imx_dmaengine_pcm_config = { - .pcm_hardware = &imx_pcm_hardware, .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, .compat_filter_fn = filter, - .prealloc_buffer_size = IMX_DEFAULT_DMABUF_SIZE, }; int imx_pcm_dma_init(struct platform_device *pdev, size_t size) { struct snd_dmaengine_pcm_config *config; - struct snd_pcm_hardware *pcm_hardware; config = devm_kzalloc(&pdev->dev, sizeof(struct snd_dmaengine_pcm_config), GFP_KERNEL); if (!config) return -ENOMEM; *config = imx_dmaengine_pcm_config; - if (size) - config->prealloc_buffer_size = size; - - pcm_hardware = devm_kzalloc(&pdev->dev, - sizeof(struct snd_pcm_hardware), GFP_KERNEL); - *pcm_hardware = imx_pcm_hardware; - if (size) - pcm_hardware->buffer_bytes_max = size; - - config->pcm_hardware = pcm_hardware; return devm_snd_dmaengine_pcm_register(&pdev->dev, config, diff --git a/sound/soc/fsl/imx-pcm-fiq.c b/sound/soc/fsl/imx-pcm-fiq.c index dac6688540dc32ff966b56d05a3a6556d344e57c..92410f7ca1fafab6dcf6132808c758eea49f67c1 100644 --- a/sound/soc/fsl/imx-pcm-fiq.c +++ b/sound/soc/fsl/imx-pcm-fiq.c @@ -282,7 +282,7 @@ static int imx_pcm_new(struct snd_soc_pcm_runtime *rtd) return 0; } -static int ssi_irq = 0; +static int ssi_irq; static int imx_pcm_fiq_new(struct snd_soc_pcm_runtime *rtd) { diff --git a/sound/soc/fsl/imx-wm8962.c b/sound/soc/fsl/imx-wm8962.c index 1b60958e208000389d1fd1f347652c7d37abb3e7..206b898e554ce4530013a72ac78e107b06c3a95e 100644 --- a/sound/soc/fsl/imx-wm8962.c +++ b/sound/soc/fsl/imx-wm8962.c @@ -33,14 +33,14 @@ struct imx_wm8962_data { struct snd_soc_card card; char codec_dai_name[DAI_NAME_SIZE]; char platform_name[DAI_NAME_SIZE]; - struct clk *codec_clk; unsigned int clk_frequency; }; struct imx_priv { struct platform_device *pdev; + int sample_rate; + snd_pcm_format_t sample_format; }; -static struct imx_priv card_priv; static const struct snd_soc_dapm_widget imx_wm8962_dapm_widgets[] = { SND_SOC_DAPM_HP("Headphone Jack", NULL), @@ -49,14 +49,14 @@ static const struct snd_soc_dapm_widget imx_wm8962_dapm_widgets[] = { SND_SOC_DAPM_MIC("DMIC", NULL), }; -static int sample_rate = 44100; -static snd_pcm_format_t sample_format = SNDRV_PCM_FORMAT_S16_LE; - static int imx_hifi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { - sample_rate = params_rate(params); - sample_format = params_format(params); + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct imx_priv *priv = snd_soc_card_get_drvdata(rtd->card); + + priv->sample_rate = params_rate(params); + priv->sample_format = params_format(params); return 0; } @@ -71,7 +71,7 @@ static int imx_wm8962_set_bias_level(struct snd_soc_card *card, { struct snd_soc_pcm_runtime *rtd; struct snd_soc_dai *codec_dai; - struct imx_priv *priv = &card_priv; + struct imx_priv *priv = snd_soc_card_get_drvdata(card); struct imx_wm8962_data *data = snd_soc_card_get_drvdata(card); struct device *dev = &priv->pdev->dev; unsigned int pll_out; @@ -85,10 +85,10 @@ static int imx_wm8962_set_bias_level(struct snd_soc_card *card, switch (level) { case SND_SOC_BIAS_PREPARE: if (dapm->bias_level == SND_SOC_BIAS_STANDBY) { - if (sample_format == SNDRV_PCM_FORMAT_S24_LE) - pll_out = sample_rate * 384; + if (priv->sample_format == SNDRV_PCM_FORMAT_S24_LE) + pll_out = priv->sample_rate * 384; else - pll_out = sample_rate * 256; + pll_out = priv->sample_rate * 256; ret = snd_soc_dai_set_pll(codec_dai, WM8962_FLL, WM8962_FLL_MCLK, data->clk_frequency, @@ -140,7 +140,7 @@ static int imx_wm8962_late_probe(struct snd_soc_card *card) { struct snd_soc_pcm_runtime *rtd; struct snd_soc_dai *codec_dai; - struct imx_priv *priv = &card_priv; + struct imx_priv *priv = snd_soc_card_get_drvdata(card); struct imx_wm8962_data *data = snd_soc_card_get_drvdata(card); struct device *dev = &priv->pdev->dev; int ret; @@ -160,13 +160,20 @@ static int imx_wm8962_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct device_node *ssi_np, *codec_np; struct platform_device *ssi_pdev; - struct imx_priv *priv = &card_priv; struct i2c_client *codec_dev; struct imx_wm8962_data *data; + struct imx_priv *priv; + struct clk *codec_clk; int int_port, ext_port; int ret; + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + priv->pdev = pdev; + priv->sample_rate = 44100; + priv->sample_format = SNDRV_PCM_FORMAT_S16_LE; ret = of_property_read_u32(np, "mux-int-port", &int_port); if (ret) { @@ -231,19 +238,15 @@ static int imx_wm8962_probe(struct platform_device *pdev) goto fail; } - data->codec_clk = devm_clk_get(&codec_dev->dev, NULL); - if (IS_ERR(data->codec_clk)) { - ret = PTR_ERR(data->codec_clk); + codec_clk = clk_get(&codec_dev->dev, NULL); + if (IS_ERR(codec_clk)) { + ret = PTR_ERR(codec_clk); dev_err(&codec_dev->dev, "failed to get codec clk: %d\n", ret); goto fail; } - data->clk_frequency = clk_get_rate(data->codec_clk); - ret = clk_prepare_enable(data->codec_clk); - if (ret) { - dev_err(&codec_dev->dev, "failed to enable codec clk: %d\n", ret); - goto fail; - } + data->clk_frequency = clk_get_rate(codec_clk); + clk_put(codec_clk); data->dai.name = "HiFi"; data->dai.stream_name = "HiFi"; @@ -258,10 +261,10 @@ static int imx_wm8962_probe(struct platform_device *pdev) data->card.dev = &pdev->dev; ret = snd_soc_of_parse_card_name(&data->card, "model"); if (ret) - goto clk_fail; + goto fail; ret = snd_soc_of_parse_audio_routing(&data->card, "audio-routing"); if (ret) - goto clk_fail; + goto fail; data->card.num_links = 1; data->card.owner = THIS_MODULE; data->card.dai_link = &data->dai; @@ -277,16 +280,9 @@ static int imx_wm8962_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, &data->card); if (ret) { dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret); - goto clk_fail; + goto fail; } - of_node_put(ssi_np); - of_node_put(codec_np); - - return 0; - -clk_fail: - clk_disable_unprepare(data->codec_clk); fail: of_node_put(ssi_np); of_node_put(codec_np); @@ -294,17 +290,6 @@ static int imx_wm8962_probe(struct platform_device *pdev) return ret; } -static int imx_wm8962_remove(struct platform_device *pdev) -{ - struct snd_soc_card *card = platform_get_drvdata(pdev); - struct imx_wm8962_data *data = snd_soc_card_get_drvdata(card); - - if (!IS_ERR(data->codec_clk)) - clk_disable_unprepare(data->codec_clk); - - return 0; -} - static const struct of_device_id imx_wm8962_dt_ids[] = { { .compatible = "fsl,imx-audio-wm8962", }, { /* sentinel */ } @@ -318,7 +303,6 @@ static struct platform_driver imx_wm8962_driver = { .of_match_table = imx_wm8962_dt_ids, }, .probe = imx_wm8962_probe, - .remove = imx_wm8962_remove, }; module_platform_driver(imx_wm8962_driver); diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c index ddf49f30b23fc9a2faa3934c81c3712c5b2cc627..a639b52c16f6ea52be1048f5be5a814c8a908999 100644 --- a/sound/soc/fsl/mpc8610_hpcd.c +++ b/sound/soc/fsl/mpc8610_hpcd.c @@ -174,7 +174,7 @@ static int mpc8610_hpcd_machine_remove(struct snd_soc_card *card) /** * mpc8610_hpcd_ops: ASoC machine driver operations */ -static struct snd_soc_ops mpc8610_hpcd_ops = { +static const struct snd_soc_ops mpc8610_hpcd_ops = { .startup = mpc8610_hpcd_startup, }; diff --git a/sound/soc/fsl/mx27vis-aic32x4.c b/sound/soc/fsl/mx27vis-aic32x4.c index 198eeb3f3f7a05b570f7a1e280b7228ce7cb0706..d7ec3d20065c9d15a974b75804927658446837d4 100644 --- a/sound/soc/fsl/mx27vis-aic32x4.c +++ b/sound/soc/fsl/mx27vis-aic32x4.c @@ -73,7 +73,7 @@ static int mx27vis_aic32x4_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops mx27vis_aic32x4_snd_ops = { +static const struct snd_soc_ops mx27vis_aic32x4_snd_ops = { .hw_params = mx27vis_aic32x4_hw_params, }; diff --git a/sound/soc/fsl/p1022_ds.c b/sound/soc/fsl/p1022_ds.c index a1f780ecadf54d4e679d39aac2803e24c8be39ec..41c623c55c166df26d08765fbcd8692596307aa4 100644 --- a/sound/soc/fsl/p1022_ds.c +++ b/sound/soc/fsl/p1022_ds.c @@ -184,7 +184,7 @@ static int p1022_ds_machine_remove(struct snd_soc_card *card) /** * p1022_ds_ops: ASoC machine driver operations */ -static struct snd_soc_ops p1022_ds_ops = { +static const struct snd_soc_ops p1022_ds_ops = { .startup = p1022_ds_startup, }; diff --git a/sound/soc/fsl/p1022_rdk.c b/sound/soc/fsl/p1022_rdk.c index d4d88a8cb9c066296e1254445704880f3e91b34e..4afbdd610bfada7bcf71c768134dc20b99c3d444 100644 --- a/sound/soc/fsl/p1022_rdk.c +++ b/sound/soc/fsl/p1022_rdk.c @@ -188,7 +188,7 @@ static int p1022_rdk_machine_remove(struct snd_soc_card *card) /** * p1022_rdk_ops: ASoC machine driver operations */ -static struct snd_soc_ops p1022_rdk_ops = { +static const struct snd_soc_ops p1022_rdk_ops = { .startup = p1022_rdk_startup, }; diff --git a/sound/soc/fsl/phycore-ac97.c b/sound/soc/fsl/phycore-ac97.c index ae403c29688f51605af5c2f29a72fe70e9ac00e0..66fb6c4614d209b0c7dbaca9c7a147f5f4bb6aac 100644 --- a/sound/soc/fsl/phycore-ac97.c +++ b/sound/soc/fsl/phycore-ac97.c @@ -23,7 +23,7 @@ static struct snd_soc_card imx_phycore; -static struct snd_soc_ops imx_phycore_hifi_ops = { +static const struct snd_soc_ops imx_phycore_hifi_ops = { }; static struct snd_soc_dai_link imx_phycore_dai_ac97[] = { diff --git a/sound/soc/fsl/wm1133-ev1.c b/sound/soc/fsl/wm1133-ev1.c index b454972dce3521a48cb0762651bc3fad0a234690..cdaf16367b4714afa74e601ab80448cfaf947211 100644 --- a/sound/soc/fsl/wm1133-ev1.c +++ b/sound/soc/fsl/wm1133-ev1.c @@ -139,7 +139,7 @@ static int wm1133_ev1_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops wm1133_ev1_ops = { +static const struct snd_soc_ops wm1133_ev1_ops = { .hw_params = wm1133_ev1_hw_params, }; diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 85b4f18065143cd0ad82f347d6e0959c9ad84e79..bc136d2bd7cdeb68b5ca7a24db94a193ede323b3 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -40,9 +40,10 @@ struct simple_card_data { struct snd_soc_dai_link *dai_link; }; -#define simple_priv_to_dev(priv) ((priv)->snd_card.dev) -#define simple_priv_to_link(priv, i) ((priv)->snd_card.dai_link + (i)) +#define simple_priv_to_card(priv) (&(priv)->snd_card) #define simple_priv_to_props(priv, i) ((priv)->dai_props + (i)) +#define simple_priv_to_dev(priv) (simple_priv_to_card(priv)->dev) +#define simple_priv_to_link(priv, i) (simple_priv_to_card(priv)->dai_link + (i)) #define DAI "sound-dai" #define CELL "#sound-dai-cells" @@ -201,7 +202,7 @@ static int asoc_simple_card_dai_init(struct snd_soc_pcm_runtime *rtd) if (ret < 0) return ret; - ret = asoc_simple_card_init_mic(rtd->card, &priv->hp_jack, PREFIX); + ret = asoc_simple_card_init_mic(rtd->card, &priv->mic_jack, PREFIX); if (ret < 0) return ret; @@ -323,6 +324,7 @@ static int asoc_simple_card_parse_aux_devs(struct device_node *node, { struct device *dev = simple_priv_to_dev(priv); struct device_node *aux_node; + struct snd_soc_card *card = simple_priv_to_card(priv); int i, n, len; if (!of_find_property(node, PREFIX "aux-devs", &len)) @@ -332,19 +334,19 @@ static int asoc_simple_card_parse_aux_devs(struct device_node *node, if (n <= 0) return -EINVAL; - priv->snd_card.aux_dev = devm_kzalloc(dev, - n * sizeof(*priv->snd_card.aux_dev), GFP_KERNEL); - if (!priv->snd_card.aux_dev) + card->aux_dev = devm_kzalloc(dev, + n * sizeof(*card->aux_dev), GFP_KERNEL); + if (!card->aux_dev) return -ENOMEM; for (i = 0; i < n; i++) { aux_node = of_parse_phandle(node, PREFIX "aux-devs", i); if (!aux_node) return -EINVAL; - priv->snd_card.aux_dev[i].codec_of_node = aux_node; + card->aux_dev[i].codec_of_node = aux_node; } - priv->snd_card.num_aux_devs = n; + card->num_aux_devs = n; return 0; } @@ -352,6 +354,7 @@ static int asoc_simple_card_parse_of(struct device_node *node, struct simple_card_data *priv) { struct device *dev = simple_priv_to_dev(priv); + struct snd_soc_card *card = simple_priv_to_card(priv); struct device_node *dai_link; int ret; @@ -362,7 +365,7 @@ static int asoc_simple_card_parse_of(struct device_node *node, /* The off-codec widgets */ if (of_property_read_bool(node, PREFIX "widgets")) { - ret = snd_soc_of_parse_audio_simple_widgets(&priv->snd_card, + ret = snd_soc_of_parse_audio_simple_widgets(card, PREFIX "widgets"); if (ret) goto card_parse_end; @@ -370,7 +373,7 @@ static int asoc_simple_card_parse_of(struct device_node *node, /* DAPM routes */ if (of_property_read_bool(node, PREFIX "routing")) { - ret = snd_soc_of_parse_audio_routing(&priv->snd_card, + ret = snd_soc_of_parse_audio_routing(card, PREFIX "routing"); if (ret) goto card_parse_end; @@ -401,7 +404,7 @@ static int asoc_simple_card_parse_of(struct device_node *node, goto card_parse_end; } - ret = asoc_simple_card_parse_card_name(&priv->snd_card, PREFIX); + ret = asoc_simple_card_parse_card_name(card, PREFIX); if (ret < 0) goto card_parse_end; @@ -418,8 +421,9 @@ static int asoc_simple_card_probe(struct platform_device *pdev) struct simple_card_data *priv; struct snd_soc_dai_link *dai_link; struct simple_dai_props *dai_props; - struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct snd_soc_card *card; int num, ret; /* Get the number of DAI links */ @@ -442,10 +446,11 @@ static int asoc_simple_card_probe(struct platform_device *pdev) priv->dai_link = dai_link; /* Init snd_soc_card */ - priv->snd_card.owner = THIS_MODULE; - priv->snd_card.dev = dev; - priv->snd_card.dai_link = priv->dai_link; - priv->snd_card.num_links = num; + card = simple_priv_to_card(priv); + card->owner = THIS_MODULE; + card->dev = dev; + card->dai_link = priv->dai_link; + card->num_links = num; if (np && of_device_is_available(np)) { @@ -474,7 +479,7 @@ static int asoc_simple_card_probe(struct platform_device *pdev) return -EINVAL; } - priv->snd_card.name = (cinfo->card) ? cinfo->card : cinfo->name; + card->name = (cinfo->card) ? cinfo->card : cinfo->name; dai_link->name = cinfo->name; dai_link->stream_name = cinfo->name; dai_link->platform_name = cinfo->platform; @@ -489,13 +494,13 @@ static int asoc_simple_card_probe(struct platform_device *pdev) sizeof(priv->dai_props->codec_dai)); } - snd_soc_card_set_drvdata(&priv->snd_card, priv); + snd_soc_card_set_drvdata(card, priv); - ret = devm_snd_soc_register_card(&pdev->dev, &priv->snd_card); + ret = devm_snd_soc_register_card(dev, card); if (ret >= 0) return ret; err: - asoc_simple_card_clean_reference(&priv->snd_card); + asoc_simple_card_clean_reference(card); return ret; } diff --git a/sound/soc/generic/simple-scu-card.c b/sound/soc/generic/simple-scu-card.c index 308ff4c11a8d21293ddb5776e5e3e0eb900165f0..dcbcab230d1b0e0f08c87ba21b5439c39b90c088 100644 --- a/sound/soc/generic/simple-scu-card.c +++ b/sound/soc/generic/simple-scu-card.c @@ -31,9 +31,10 @@ struct simple_card_data { u32 convert_channels; }; -#define simple_priv_to_dev(priv) ((priv)->snd_card.dev) -#define simple_priv_to_link(priv, i) ((priv)->snd_card.dai_link + (i)) +#define simple_priv_to_card(priv) (&(priv)->snd_card) #define simple_priv_to_props(priv, i) ((priv)->dai_props + (i)) +#define simple_priv_to_dev(priv) (simple_priv_to_card(priv)->dev) +#define simple_priv_to_link(priv, i) (simple_priv_to_card(priv)->dai_link + (i)) #define DAI "sound-dai" #define CELL "#sound-dai-cells" @@ -109,6 +110,7 @@ static int asoc_simple_card_dai_link_of(struct device_node *np, struct device *dev = simple_priv_to_dev(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, idx); struct asoc_simple_dai *dai_props = simple_priv_to_props(priv, idx); + struct snd_soc_card *card = simple_priv_to_card(priv); int ret; if (is_fe) { @@ -163,7 +165,7 @@ static int asoc_simple_card_dai_link_of(struct device_node *np, if (ret < 0) return ret; - snd_soc_of_parse_audio_prefix(&priv->snd_card, + snd_soc_of_parse_audio_prefix(card, &priv->codec_conf, dai_link->codec_of_node, PREFIX "prefix"); @@ -201,6 +203,7 @@ static int asoc_simple_card_parse_of(struct device_node *node, { struct device *dev = simple_priv_to_dev(priv); struct device_node *np; + struct snd_soc_card *card = simple_priv_to_card(priv); unsigned int daifmt = 0; bool is_fe; int ret, i; @@ -208,7 +211,7 @@ static int asoc_simple_card_parse_of(struct device_node *node, if (!node) return -EINVAL; - ret = snd_soc_of_parse_audio_routing(&priv->snd_card, PREFIX "routing"); + ret = snd_soc_of_parse_audio_routing(card, PREFIX "routing"); if (ret < 0) return ret; @@ -239,12 +242,12 @@ static int asoc_simple_card_parse_of(struct device_node *node, i++; } - ret = asoc_simple_card_parse_card_name(&priv->snd_card, PREFIX); + ret = asoc_simple_card_parse_card_name(card, PREFIX); if (ret < 0) return ret; dev_dbg(dev, "New card: %s\n", - priv->snd_card.name ? priv->snd_card.name : ""); + card->name ? card->name : ""); dev_dbg(dev, "convert_rate %d\n", priv->convert_rate); dev_dbg(dev, "convert_channels %d\n", priv->convert_channels); @@ -256,8 +259,9 @@ static int asoc_simple_card_probe(struct platform_device *pdev) struct simple_card_data *priv; struct snd_soc_dai_link *dai_link; struct asoc_simple_dai *dai_props; + struct snd_soc_card *card; struct device *dev = &pdev->dev; - struct device_node *np = pdev->dev.of_node; + struct device_node *np = dev->of_node; int num, ret; /* Allocate the private data */ @@ -276,12 +280,13 @@ static int asoc_simple_card_probe(struct platform_device *pdev) priv->dai_link = dai_link; /* Init snd_soc_card */ - priv->snd_card.owner = THIS_MODULE; - priv->snd_card.dev = dev; - priv->snd_card.dai_link = priv->dai_link; - priv->snd_card.num_links = num; - priv->snd_card.codec_conf = &priv->codec_conf; - priv->snd_card.num_configs = 1; + card = simple_priv_to_card(priv); + card->owner = THIS_MODULE; + card->dev = dev; + card->dai_link = priv->dai_link; + card->num_links = num; + card->codec_conf = &priv->codec_conf; + card->num_configs = 1; ret = asoc_simple_card_parse_of(np, priv); if (ret < 0) { @@ -290,13 +295,13 @@ static int asoc_simple_card_probe(struct platform_device *pdev) goto err; } - snd_soc_card_set_drvdata(&priv->snd_card, priv); + snd_soc_card_set_drvdata(card, priv); - ret = devm_snd_soc_register_card(&pdev->dev, &priv->snd_card); + ret = devm_snd_soc_register_card(dev, card); if (ret >= 0) return ret; err: - asoc_simple_card_clean_reference(&priv->snd_card); + asoc_simple_card_clean_reference(card); return ret; } diff --git a/sound/soc/hisilicon/Kconfig b/sound/soc/hisilicon/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..4356d5a1d338f67cadf4d8ae408abf0f43ecae71 --- /dev/null +++ b/sound/soc/hisilicon/Kconfig @@ -0,0 +1,5 @@ +config SND_I2S_HI6210_I2S + tristate "Hisilicon I2S controller" + select SND_SOC_GENERIC_DMAENGINE_PCM + help + Hisilicon I2S diff --git a/sound/soc/hisilicon/Makefile b/sound/soc/hisilicon/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e8095e2af91a50e29748b5cee23bbc550a8f94ac --- /dev/null +++ b/sound/soc/hisilicon/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SND_I2S_HI6210_I2S) += hi6210-i2s.o diff --git a/sound/soc/hisilicon/hi6210-i2s.c b/sound/soc/hisilicon/hi6210-i2s.c new file mode 100644 index 0000000000000000000000000000000000000000..45163e5202f546faec545fb65fd0e60b1decc74b --- /dev/null +++ b/sound/soc/hisilicon/hi6210-i2s.c @@ -0,0 +1,618 @@ +/* + * linux/sound/soc/m8m/hi6210_i2s.c - I2S IP driver + * + * Copyright (C) 2015 Linaro, Ltd + * Author: Andy Green + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver only deals with S2 interface (BT) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hi6210-i2s.h" + +struct hi6210_i2s { + struct device *dev; + struct reset_control *rc; + struct clk *clk[8]; + int clocks; + struct snd_soc_dai_driver dai; + void __iomem *base; + struct regmap *sysctrl; + phys_addr_t base_phys; + struct snd_dmaengine_dai_dma_data dma_data[2]; + int clk_rate; + spinlock_t lock; + int rate; + int format; + u8 bits; + u8 channels; + u8 id; + u8 channel_length; + u8 use; + u32 master:1; + u32 status:1; +}; + +#define SC_PERIPH_CLKEN1 0x210 +#define SC_PERIPH_CLKDIS1 0x214 + +#define SC_PERIPH_CLKEN3 0x230 +#define SC_PERIPH_CLKDIS3 0x234 + +#define SC_PERIPH_CLKEN12 0x270 +#define SC_PERIPH_CLKDIS12 0x274 + +#define SC_PERIPH_RSTEN1 0x310 +#define SC_PERIPH_RSTDIS1 0x314 +#define SC_PERIPH_RSTSTAT1 0x318 + +#define SC_PERIPH_RSTEN2 0x320 +#define SC_PERIPH_RSTDIS2 0x324 +#define SC_PERIPH_RSTSTAT2 0x328 + +#define SOC_PMCTRL_BBPPLLALIAS 0x48 + +enum { + CLK_DACODEC, + CLK_I2S_BASE, +}; + +static inline void hi6210_write_reg(struct hi6210_i2s *i2s, int reg, u32 val) +{ + writel(val, i2s->base + reg); +} + +static inline u32 hi6210_read_reg(struct hi6210_i2s *i2s, int reg) +{ + return readl(i2s->base + reg); +} + +int hi6210_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *cpu_dai) +{ + struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev); + int ret, n; + u32 val; + + /* deassert reset on ABB */ + regmap_read(i2s->sysctrl, SC_PERIPH_RSTSTAT2, &val); + if (val & BIT(4)) + regmap_write(i2s->sysctrl, SC_PERIPH_RSTDIS2, BIT(4)); + + for (n = 0; n < i2s->clocks; n++) { + ret = clk_prepare_enable(i2s->clk[n]); + if (ret) { + while (n--) + clk_disable_unprepare(i2s->clk[n]); + return ret; + } + } + + ret = clk_set_rate(i2s->clk[CLK_I2S_BASE], 49152000); + if (ret) { + dev_err(i2s->dev, "%s: setting 49.152MHz base rate failed %d\n", + __func__, ret); + return ret; + } + + /* enable clock before frequency division */ + regmap_write(i2s->sysctrl, SC_PERIPH_CLKEN12, BIT(9)); + + /* enable codec working clock / == "codec bus clock" */ + regmap_write(i2s->sysctrl, SC_PERIPH_CLKEN1, BIT(5)); + + /* deassert reset on codec / interface clock / working clock */ + regmap_write(i2s->sysctrl, SC_PERIPH_RSTEN1, BIT(5)); + regmap_write(i2s->sysctrl, SC_PERIPH_RSTDIS1, BIT(5)); + + /* not interested in i2s irqs */ + val = hi6210_read_reg(i2s, HII2S_CODEC_IRQ_MASK); + val |= 0x3f; + hi6210_write_reg(i2s, HII2S_CODEC_IRQ_MASK, val); + + + /* reset the stereo downlink fifo */ + val = hi6210_read_reg(i2s, HII2S_APB_AFIFO_CFG_1); + val |= (BIT(5) | BIT(4)); + hi6210_write_reg(i2s, HII2S_APB_AFIFO_CFG_1, val); + + val = hi6210_read_reg(i2s, HII2S_APB_AFIFO_CFG_1); + val &= ~(BIT(5) | BIT(4)); + hi6210_write_reg(i2s, HII2S_APB_AFIFO_CFG_1, val); + + + val = hi6210_read_reg(i2s, HII2S_SW_RST_N); + val &= ~(HII2S_SW_RST_N__ST_DL_WORDLEN_MASK << + HII2S_SW_RST_N__ST_DL_WORDLEN_SHIFT); + val |= (HII2S_BITS_16 << HII2S_SW_RST_N__ST_DL_WORDLEN_SHIFT); + hi6210_write_reg(i2s, HII2S_SW_RST_N, val); + + val = hi6210_read_reg(i2s, HII2S_MISC_CFG); + /* mux 11/12 = APB not i2s */ + val &= ~HII2S_MISC_CFG__ST_DL_TEST_SEL; + /* BT R ch 0 = mixer op of DACR ch */ + val &= ~HII2S_MISC_CFG__S2_DOUT_RIGHT_SEL; + val &= ~HII2S_MISC_CFG__S2_DOUT_TEST_SEL; + + val |= HII2S_MISC_CFG__S2_DOUT_RIGHT_SEL; + /* BT L ch = 1 = mux 7 = "mixer output of DACL */ + val |= HII2S_MISC_CFG__S2_DOUT_TEST_SEL; + hi6210_write_reg(i2s, HII2S_MISC_CFG, val); + + val = hi6210_read_reg(i2s, HII2S_SW_RST_N); + val |= HII2S_SW_RST_N__SW_RST_N; + hi6210_write_reg(i2s, HII2S_SW_RST_N, val); + + return 0; +} +void hi6210_i2s_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *cpu_dai) +{ + struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev); + int n; + + for (n = 0; n < i2s->clocks; n++) + clk_disable_unprepare(i2s->clk[n]); + + regmap_write(i2s->sysctrl, SC_PERIPH_RSTEN1, BIT(5)); +} + +static void hi6210_i2s_txctrl(struct snd_soc_dai *cpu_dai, int on) +{ + struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev); + u32 val; + + spin_lock(&i2s->lock); + if (on) { + /* enable S2 TX */ + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val |= HII2S_I2S_CFG__S2_IF_TX_EN; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + } else { + /* disable S2 TX */ + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val &= ~HII2S_I2S_CFG__S2_IF_TX_EN; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + } + spin_unlock(&i2s->lock); +} + +static void hi6210_i2s_rxctrl(struct snd_soc_dai *cpu_dai, int on) +{ + struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev); + u32 val; + + spin_lock(&i2s->lock); + if (on) { + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val |= HII2S_I2S_CFG__S2_IF_RX_EN; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + } else { + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val &= ~HII2S_I2S_CFG__S2_IF_RX_EN; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + } + spin_unlock(&i2s->lock); +} + +static int hi6210_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) +{ + struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev); + + /* + * We don't actually set the hardware until the hw_params + * call, but we need to validate the user input here. + */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + case SND_SOC_DAIFMT_LEFT_J: + case SND_SOC_DAIFMT_RIGHT_J: + break; + default: + return -EINVAL; + } + + i2s->format = fmt; + i2s->master = (i2s->format & SND_SOC_DAIFMT_MASTER_MASK) == + SND_SOC_DAIFMT_CBS_CFS; + + return 0; +} + +static int hi6210_i2s_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *cpu_dai) +{ + struct hi6210_i2s *i2s = dev_get_drvdata(cpu_dai->dev); + u32 bits = 0, rate = 0, signed_data = 0, fmt = 0; + u32 val; + struct snd_dmaengine_dai_dma_data *dma_data; + + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_U16_LE: + signed_data = HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT; + /* fallthru */ + case SNDRV_PCM_FORMAT_S16_LE: + bits = HII2S_BITS_16; + break; + case SNDRV_PCM_FORMAT_U24_LE: + signed_data = HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT; + /* fallthru */ + case SNDRV_PCM_FORMAT_S24_LE: + bits = HII2S_BITS_24; + break; + default: + dev_err(cpu_dai->dev, "Bad format\n"); + return -EINVAL; + } + + + switch (params_rate(params)) { + case 8000: + rate = HII2S_FS_RATE_8KHZ; + break; + case 16000: + rate = HII2S_FS_RATE_16KHZ; + break; + case 32000: + rate = HII2S_FS_RATE_32KHZ; + break; + case 48000: + rate = HII2S_FS_RATE_48KHZ; + break; + case 96000: + rate = HII2S_FS_RATE_96KHZ; + break; + case 192000: + rate = HII2S_FS_RATE_192KHZ; + break; + default: + dev_err(cpu_dai->dev, "Bad rate: %d\n", params_rate(params)); + return -EINVAL; + } + + if (!(params_channels(params))) { + dev_err(cpu_dai->dev, "Bad channels\n"); + return -EINVAL; + } + + dma_data = snd_soc_dai_get_dma_data(cpu_dai, substream); + + switch (bits) { + case HII2S_BITS_24: + i2s->bits = 32; + dma_data->addr_width = 3; + break; + default: + i2s->bits = 16; + dma_data->addr_width = 2; + break; + } + i2s->rate = params_rate(params); + i2s->channels = params_channels(params); + i2s->channel_length = i2s->channels * i2s->bits; + + val = hi6210_read_reg(i2s, HII2S_ST_DL_FIFO_TH_CFG); + val &= ~((HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_MASK << + HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_SHIFT) | + (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_MASK << + HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_SHIFT) | + (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_MASK << + HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_SHIFT) | + (HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_MASK << + HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_SHIFT)); + val |= ((16 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_SHIFT) | + (30 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_SHIFT) | + (16 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_SHIFT) | + (30 << HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_SHIFT)); + hi6210_write_reg(i2s, HII2S_ST_DL_FIFO_TH_CFG, val); + + + val = hi6210_read_reg(i2s, HII2S_IF_CLK_EN_CFG); + val |= (BIT(19) | BIT(18) | BIT(17) | + HII2S_IF_CLK_EN_CFG__S2_IF_CLK_EN | + HII2S_IF_CLK_EN_CFG__S2_OL_MIXER_EN | + HII2S_IF_CLK_EN_CFG__S2_OL_SRC_EN | + HII2S_IF_CLK_EN_CFG__ST_DL_R_EN | + HII2S_IF_CLK_EN_CFG__ST_DL_L_EN); + hi6210_write_reg(i2s, HII2S_IF_CLK_EN_CFG, val); + + + val = hi6210_read_reg(i2s, HII2S_DIG_FILTER_CLK_EN_CFG); + val &= ~(HII2S_DIG_FILTER_CLK_EN_CFG__DACR_SDM_EN | + HII2S_DIG_FILTER_CLK_EN_CFG__DACR_HBF2I_EN | + HII2S_DIG_FILTER_CLK_EN_CFG__DACR_AGC_EN | + HII2S_DIG_FILTER_CLK_EN_CFG__DACL_SDM_EN | + HII2S_DIG_FILTER_CLK_EN_CFG__DACL_HBF2I_EN | + HII2S_DIG_FILTER_CLK_EN_CFG__DACL_AGC_EN); + val |= (HII2S_DIG_FILTER_CLK_EN_CFG__DACR_MIXER_EN | + HII2S_DIG_FILTER_CLK_EN_CFG__DACL_MIXER_EN); + hi6210_write_reg(i2s, HII2S_DIG_FILTER_CLK_EN_CFG, val); + + + val = hi6210_read_reg(i2s, HII2S_DIG_FILTER_MODULE_CFG); + val &= ~(HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN2_MUTE | + HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN2_MUTE); + hi6210_write_reg(i2s, HII2S_DIG_FILTER_MODULE_CFG, val); + + val = hi6210_read_reg(i2s, HII2S_MUX_TOP_MODULE_CFG); + val &= ~(HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN1_MUTE | + HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN2_MUTE | + HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN1_MUTE | + HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN2_MUTE); + hi6210_write_reg(i2s, HII2S_MUX_TOP_MODULE_CFG, val); + + + switch (i2s->format & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + i2s->master = false; + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val |= HII2S_I2S_CFG__S2_MST_SLV; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + break; + case SND_SOC_DAIFMT_CBS_CFS: + i2s->master = true; + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val &= ~HII2S_I2S_CFG__S2_MST_SLV; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + break; + default: + WARN_ONCE(1, "Invalid i2s->fmt MASTER_MASK. This shouldn't happen\n"); + return -EINVAL; + } + + switch (i2s->format & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + fmt = HII2S_FORMAT_I2S; + break; + case SND_SOC_DAIFMT_LEFT_J: + fmt = HII2S_FORMAT_LEFT_JUST; + break; + case SND_SOC_DAIFMT_RIGHT_J: + fmt = HII2S_FORMAT_RIGHT_JUST; + break; + default: + WARN_ONCE(1, "Invalid i2s->fmt FORMAT_MASK. This shouldn't happen\n"); + return -EINVAL; + } + + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val &= ~(HII2S_I2S_CFG__S2_FUNC_MODE_MASK << + HII2S_I2S_CFG__S2_FUNC_MODE_SHIFT); + val |= fmt << HII2S_I2S_CFG__S2_FUNC_MODE_SHIFT; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + + + val = hi6210_read_reg(i2s, HII2S_CLK_SEL); + val &= ~(HII2S_CLK_SEL__I2S_BT_FM_SEL | /* BT gets the I2S */ + HII2S_CLK_SEL__EXT_12_288MHZ_SEL); + hi6210_write_reg(i2s, HII2S_CLK_SEL, val); + + dma_data->maxburst = 2; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + dma_data->addr = i2s->base_phys + HII2S_ST_DL_CHANNEL; + else + dma_data->addr = i2s->base_phys + HII2S_STEREO_UPLINK_CHANNEL; + + switch (i2s->channels) { + case 1: + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val |= HII2S_I2S_CFG__S2_FRAME_MODE; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + break; + default: + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val &= ~HII2S_I2S_CFG__S2_FRAME_MODE; + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + break; + } + + /* clear loopback, set signed type and word length */ + val = hi6210_read_reg(i2s, HII2S_I2S_CFG); + val &= ~HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT; + val &= ~(HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_MASK << + HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_SHIFT); + val &= ~(HII2S_I2S_CFG__S2_DIRECT_LOOP_MASK << + HII2S_I2S_CFG__S2_DIRECT_LOOP_SHIFT); + val |= signed_data; + val |= (bits << HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_SHIFT); + hi6210_write_reg(i2s, HII2S_I2S_CFG, val); + + + if (!i2s->master) + return 0; + + /* set DAC and related units to correct rate */ + val = hi6210_read_reg(i2s, HII2S_FS_CFG); + val &= ~(HII2S_FS_CFG__FS_S2_MASK << HII2S_FS_CFG__FS_S2_SHIFT); + val &= ~(HII2S_FS_CFG__FS_DACLR_MASK << HII2S_FS_CFG__FS_DACLR_SHIFT); + val &= ~(HII2S_FS_CFG__FS_ST_DL_R_MASK << + HII2S_FS_CFG__FS_ST_DL_R_SHIFT); + val &= ~(HII2S_FS_CFG__FS_ST_DL_L_MASK << + HII2S_FS_CFG__FS_ST_DL_L_SHIFT); + val |= (rate << HII2S_FS_CFG__FS_S2_SHIFT); + val |= (rate << HII2S_FS_CFG__FS_DACLR_SHIFT); + val |= (rate << HII2S_FS_CFG__FS_ST_DL_R_SHIFT); + val |= (rate << HII2S_FS_CFG__FS_ST_DL_L_SHIFT); + hi6210_write_reg(i2s, HII2S_FS_CFG, val); + + return 0; +} + +static int hi6210_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *cpu_dai) +{ + pr_debug("%s\n", __func__); + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + hi6210_i2s_rxctrl(cpu_dai, 1); + else + hi6210_i2s_txctrl(cpu_dai, 1); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + hi6210_i2s_rxctrl(cpu_dai, 0); + else + hi6210_i2s_txctrl(cpu_dai, 0); + break; + default: + dev_err(cpu_dai->dev, "uknown cmd\n"); + return -EINVAL; + } + return 0; +} + +static int hi6210_i2s_dai_probe(struct snd_soc_dai *dai) +{ + struct hi6210_i2s *i2s = snd_soc_dai_get_drvdata(dai); + + snd_soc_dai_init_dma_data(dai, + &i2s->dma_data[SNDRV_PCM_STREAM_PLAYBACK], + &i2s->dma_data[SNDRV_PCM_STREAM_CAPTURE]); + + return 0; +} + + +static struct snd_soc_dai_ops hi6210_i2s_dai_ops = { + .trigger = hi6210_i2s_trigger, + .hw_params = hi6210_i2s_hw_params, + .set_fmt = hi6210_i2s_set_fmt, + .startup = hi6210_i2s_startup, + .shutdown = hi6210_i2s_shutdown, +}; + +struct snd_soc_dai_driver hi6210_i2s_dai_init = { + .probe = hi6210_i2s_dai_probe, + .playback = { + .channels_min = 2, + .channels_max = 2, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_U16_LE, + .rates = SNDRV_PCM_RATE_48000, + }, + .capture = { + .channels_min = 2, + .channels_max = 2, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_U16_LE, + .rates = SNDRV_PCM_RATE_48000, + }, + .ops = &hi6210_i2s_dai_ops, +}; + +static const struct snd_soc_component_driver hi6210_i2s_i2s_comp = { + .name = "hi6210_i2s-i2s", +}; + +static int hi6210_i2s_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; + struct hi6210_i2s *i2s; + struct resource *res; + int ret; + + i2s = devm_kzalloc(&pdev->dev, sizeof(*i2s), GFP_KERNEL); + if (!i2s) + return -ENOMEM; + + i2s->dev = dev; + spin_lock_init(&i2s->lock); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + i2s->base = devm_ioremap_resource(dev, res); + if (IS_ERR(i2s->base)) + return PTR_ERR(i2s->base); + + i2s->base_phys = (phys_addr_t)res->start; + i2s->dai = hi6210_i2s_dai_init; + + dev_set_drvdata(&pdev->dev, i2s); + + i2s->sysctrl = syscon_regmap_lookup_by_phandle(node, + "hisilicon,sysctrl-syscon"); + if (IS_ERR(i2s->sysctrl)) + return PTR_ERR(i2s->sysctrl); + + i2s->clk[CLK_DACODEC] = devm_clk_get(&pdev->dev, "dacodec"); + if (IS_ERR_OR_NULL(i2s->clk[CLK_DACODEC])) + return PTR_ERR(i2s->clk[CLK_DACODEC]); + i2s->clocks++; + + i2s->clk[CLK_I2S_BASE] = devm_clk_get(&pdev->dev, "i2s-base"); + if (IS_ERR_OR_NULL(i2s->clk[CLK_I2S_BASE])) + return PTR_ERR(i2s->clk[CLK_I2S_BASE]); + i2s->clocks++; + + ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + if (ret) + return ret; + + ret = devm_snd_soc_register_component(&pdev->dev, &hi6210_i2s_i2s_comp, + &i2s->dai, 1); + return ret; +} + +static const struct of_device_id hi6210_i2s_dt_ids[] = { + { .compatible = "hisilicon,hi6210-i2s" }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(of, hi6210_i2s_dt_ids); + +static struct platform_driver hi6210_i2s_driver = { + .probe = hi6210_i2s_probe, + .driver = { + .name = "hi6210_i2s", + .of_match_table = hi6210_i2s_dt_ids, + }, +}; + +module_platform_driver(hi6210_i2s_driver); + +MODULE_DESCRIPTION("Hisilicon HI6210 I2S driver"); +MODULE_AUTHOR("Andy Green "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/hisilicon/hi6210-i2s.h b/sound/soc/hisilicon/hi6210-i2s.h new file mode 100644 index 0000000000000000000000000000000000000000..85cecc4939a008992b06161af5379e9e7e237169 --- /dev/null +++ b/sound/soc/hisilicon/hi6210-i2s.h @@ -0,0 +1,276 @@ +/* + * linux/sound/soc/hisilicon/hi6210-i2s.h + * + * Copyright (C) 2015 Linaro, Ltd + * Author: Andy Green + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Note at least on 6220, S2 == BT, S1 == Digital FM Radio IF + */ + +#ifndef _HI6210_I2S_H +#define _HI6210_I2S_H + +#define HII2S_SW_RST_N 0 + +#define HII2S_SW_RST_N__STEREO_UPLINK_WORDLEN_SHIFT 28 +#define HII2S_SW_RST_N__STEREO_UPLINK_WORDLEN_MASK 3 +#define HII2S_SW_RST_N__THIRDMD_UPLINK_WORDLEN_SHIFT 26 +#define HII2S_SW_RST_N__THIRDMD_UPLINK_WORDLEN_MASK 3 +#define HII2S_SW_RST_N__VOICE_UPLINK_WORDLEN_SHIFT 24 +#define HII2S_SW_RST_N__VOICE_UPLINK_WORDLEN_MASK 3 +#define HII2S_SW_RST_N__ST_DL_WORDLEN_SHIFT 20 +#define HII2S_SW_RST_N__ST_DL_WORDLEN_MASK 3 +#define HII2S_SW_RST_N__THIRDMD_DLINK_WORDLEN_SHIFT 18 +#define HII2S_SW_RST_N__THIRDMD_DLINK_WORDLEN_MASK 3 +#define HII2S_SW_RST_N__VOICE_DLINK_WORDLEN_SHIFT 16 +#define HII2S_SW_RST_N__VOICE_DLINK_WORDLEN_MASK 3 + +#define HII2S_SW_RST_N__SW_RST_N BIT(0) + +enum hi6210_bits { + HII2S_BITS_16, + HII2S_BITS_18, + HII2S_BITS_20, + HII2S_BITS_24, +}; + + +#define HII2S_IF_CLK_EN_CFG 4 + +#define HII2S_IF_CLK_EN_CFG__THIRDMD_UPLINK_EN BIT(25) +#define HII2S_IF_CLK_EN_CFG__THIRDMD_DLINK_EN BIT(24) +#define HII2S_IF_CLK_EN_CFG__S3_IF_CLK_EN BIT(20) +#define HII2S_IF_CLK_EN_CFG__S2_IF_CLK_EN BIT(16) +#define HII2S_IF_CLK_EN_CFG__S2_OL_MIXER_EN BIT(15) +#define HII2S_IF_CLK_EN_CFG__S2_OL_SRC_EN BIT(14) +#define HII2S_IF_CLK_EN_CFG__S2_IR_PGA_EN BIT(13) +#define HII2S_IF_CLK_EN_CFG__S2_IL_PGA_EN BIT(12) +#define HII2S_IF_CLK_EN_CFG__S1_IR_PGA_EN BIT(10) +#define HII2S_IF_CLK_EN_CFG__S1_IL_PGA_EN BIT(9) +#define HII2S_IF_CLK_EN_CFG__S1_IF_CLK_EN BIT(8) +#define HII2S_IF_CLK_EN_CFG__VOICE_DLINK_SRC_EN BIT(7) +#define HII2S_IF_CLK_EN_CFG__VOICE_DLINK_EN BIT(6) +#define HII2S_IF_CLK_EN_CFG__ST_DL_R_EN BIT(5) +#define HII2S_IF_CLK_EN_CFG__ST_DL_L_EN BIT(4) +#define HII2S_IF_CLK_EN_CFG__VOICE_UPLINK_R_EN BIT(3) +#define HII2S_IF_CLK_EN_CFG__VOICE_UPLINK_L_EN BIT(2) +#define HII2S_IF_CLK_EN_CFG__STEREO_UPLINK_R_EN BIT(1) +#define HII2S_IF_CLK_EN_CFG__STEREO_UPLINK_L_EN BIT(0) + +#define HII2S_DIG_FILTER_CLK_EN_CFG 8 +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_SDM_EN BIT(30) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_HBF2I_EN BIT(28) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_MIXER_EN BIT(25) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACR_AGC_EN BIT(24) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_SDM_EN BIT(22) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_HBF2I_EN BIT(20) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_MIXER_EN BIT(17) +#define HII2S_DIG_FILTER_CLK_EN_CFG__DACL_AGC_EN BIT(16) + +#define HII2S_FS_CFG 0xc + +#define HII2S_FS_CFG__FS_S2_SHIFT 28 +#define HII2S_FS_CFG__FS_S2_MASK 7 +#define HII2S_FS_CFG__FS_S1_SHIFT 24 +#define HII2S_FS_CFG__FS_S1_MASK 7 +#define HII2S_FS_CFG__FS_ADCLR_SHIFT 20 +#define HII2S_FS_CFG__FS_ADCLR_MASK 7 +#define HII2S_FS_CFG__FS_DACLR_SHIFT 16 +#define HII2S_FS_CFG__FS_DACLR_MASK 7 +#define HII2S_FS_CFG__FS_ST_DL_R_SHIFT 8 +#define HII2S_FS_CFG__FS_ST_DL_R_MASK 7 +#define HII2S_FS_CFG__FS_ST_DL_L_SHIFT 4 +#define HII2S_FS_CFG__FS_ST_DL_L_MASK 7 +#define HII2S_FS_CFG__FS_VOICE_DLINK_SHIFT 0 +#define HII2S_FS_CFG__FS_VOICE_DLINK_MASK 7 + +enum hi6210_i2s_rates { + HII2S_FS_RATE_8KHZ = 0, + HII2S_FS_RATE_16KHZ = 1, + HII2S_FS_RATE_32KHZ = 2, + HII2S_FS_RATE_48KHZ = 4, + HII2S_FS_RATE_96KHZ = 5, + HII2S_FS_RATE_192KHZ = 6, +}; + +#define HII2S_I2S_CFG 0x10 + +#define HII2S_I2S_CFG__S2_IF_TX_EN BIT(31) +#define HII2S_I2S_CFG__S2_IF_RX_EN BIT(30) +#define HII2S_I2S_CFG__S2_FRAME_MODE BIT(29) +#define HII2S_I2S_CFG__S2_MST_SLV BIT(28) +#define HII2S_I2S_CFG__S2_LRCK_MODE BIT(27) +#define HII2S_I2S_CFG__S2_CHNNL_MODE BIT(26) +#define HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_SHIFT 24 +#define HII2S_I2S_CFG__S2_CODEC_IO_WORDLENGTH_MASK 3 +#define HII2S_I2S_CFG__S2_DIRECT_LOOP_SHIFT 22 +#define HII2S_I2S_CFG__S2_DIRECT_LOOP_MASK 3 +#define HII2S_I2S_CFG__S2_TX_CLK_SEL BIT(21) +#define HII2S_I2S_CFG__S2_RX_CLK_SEL BIT(20) +#define HII2S_I2S_CFG__S2_CODEC_DATA_FORMAT BIT(19) +#define HII2S_I2S_CFG__S2_FUNC_MODE_SHIFT 16 +#define HII2S_I2S_CFG__S2_FUNC_MODE_MASK 7 +#define HII2S_I2S_CFG__S1_IF_TX_EN BIT(15) +#define HII2S_I2S_CFG__S1_IF_RX_EN BIT(14) +#define HII2S_I2S_CFG__S1_FRAME_MODE BIT(13) +#define HII2S_I2S_CFG__S1_MST_SLV BIT(12) +#define HII2S_I2S_CFG__S1_LRCK_MODE BIT(11) +#define HII2S_I2S_CFG__S1_CHNNL_MODE BIT(10) +#define HII2S_I2S_CFG__S1_CODEC_IO_WORDLENGTH_SHIFT 8 +#define HII2S_I2S_CFG__S1_CODEC_IO_WORDLENGTH_MASK 3 +#define HII2S_I2S_CFG__S1_DIRECT_LOOP_SHIFT 6 +#define HII2S_I2S_CFG__S1_DIRECT_LOOP_MASK 3 +#define HII2S_I2S_CFG__S1_TX_CLK_SEL BIT(5) +#define HII2S_I2S_CFG__S1_RX_CLK_SEL BIT(4) +#define HII2S_I2S_CFG__S1_CODEC_DATA_FORMAT BIT(3) +#define HII2S_I2S_CFG__S1_FUNC_MODE_SHIFT 0 +#define HII2S_I2S_CFG__S1_FUNC_MODE_MASK 7 + +enum hi6210_i2s_formats { + HII2S_FORMAT_I2S, + HII2S_FORMAT_PCM_STD, + HII2S_FORMAT_PCM_USER, + HII2S_FORMAT_LEFT_JUST, + HII2S_FORMAT_RIGHT_JUST, +}; + +#define HII2S_DIG_FILTER_MODULE_CFG 0x14 + +#define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_GAIN_SHIFT 28 +#define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_GAIN_MASK 3 +#define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN4_MUTE BIT(27) +#define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN3_MUTE BIT(26) +#define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN2_MUTE BIT(25) +#define HII2S_DIG_FILTER_MODULE_CFG__DACR_MIXER_IN1_MUTE BIT(24) +#define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_GAIN_SHIFT 20 +#define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_GAIN_MASK 3 +#define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN4_MUTE BIT(19) +#define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN3_MUTE BIT(18) +#define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN2_MUTE BIT(17) +#define HII2S_DIG_FILTER_MODULE_CFG__DACL_MIXER_IN1_MUTE BIT(16) +#define HII2S_DIG_FILTER_MODULE_CFG__SW_DACR_SDM_DITHER BIT(9) +#define HII2S_DIG_FILTER_MODULE_CFG__SW_DACL_SDM_DITHER BIT(8) +#define HII2S_DIG_FILTER_MODULE_CFG__LM_CODEC_DAC2ADC_SHIFT 4 +#define HII2S_DIG_FILTER_MODULE_CFG__LM_CODEC_DAC2ADC_MASK 7 +#define HII2S_DIG_FILTER_MODULE_CFG__RM_CODEC_DAC2ADC_SHIFT 0 +#define HII2S_DIG_FILTER_MODULE_CFG__RM_CODEC_DAC2ADC_MASK 7 + +enum hi6210_gains { + HII2S_GAIN_100PC, + HII2S_GAIN_50PC, + HII2S_GAIN_25PC, +}; + +#define HII2S_MUX_TOP_MODULE_CFG 0x18 + +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_GAIN_SHIFT 14 +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_GAIN_MASK 3 +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN2_MUTE BIT(13) +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_MIXER_IN1_MUTE BIT(12) +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_GAIN_SHIFT 10 +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_GAIN_MASK 3 +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN2_MUTE BIT(9) +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_MIXER_IN1_MUTE BIT(8) +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_SRC_RDY BIT(6) +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_SRC_MODE_SHIFT 4 +#define HII2S_MUX_TOP_MODULE_CFG__S2_OL_SRC_MODE_MASK 3 +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_SRC_RDY BIT(3) +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_SRC_MODE_SHIFT 0 +#define HII2S_MUX_TOP_MODULE_CFG__VOICE_DLINK_SRC_MODE_MASK 7 + +enum hi6210_s2_src_mode { + HII2S_S2_SRC_MODE_3, + HII2S_S2_SRC_MODE_12, + HII2S_S2_SRC_MODE_6, + HII2S_S2_SRC_MODE_2, +}; + +enum hi6210_voice_dlink_src_mode { + HII2S_VOICE_DL_SRC_MODE_12 = 1, + HII2S_VOICE_DL_SRC_MODE_6, + HII2S_VOICE_DL_SRC_MODE_2, + HII2S_VOICE_DL_SRC_MODE_3, +}; + +#define HII2S_ADC_PGA_CFG 0x1c +#define HII2S_S1_INPUT_PGA_CFG 0x20 +#define HII2S_S2_INPUT_PGA_CFG 0x24 +#define HII2S_ST_DL_PGA_CFG 0x28 +#define HII2S_VOICE_SIDETONE_DLINK_PGA_CFG 0x2c +#define HII2S_APB_AFIFO_CFG_1 0x30 +#define HII2S_APB_AFIFO_CFG_2 0x34 +#define HII2S_ST_DL_FIFO_TH_CFG 0x38 + +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_SHIFT 24 +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AEMPTY_MASK 0x1f +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_SHIFT 16 +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_R_AFULL_MASK 0x1f +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_SHIFT 8 +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AEMPTY_MASK 0x1f +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_SHIFT 0 +#define HII2S_ST_DL_FIFO_TH_CFG__ST_DL_L_AFULL_MASK 0x1f + +#define HII2S_STEREO_UPLINK_FIFO_TH_CFG 0x3c +#define HII2S_VOICE_UPLINK_FIFO_TH_CFG 0x40 +#define HII2S_CODEC_IRQ_MASK 0x44 +#define HII2S_CODEC_IRQ 0x48 +#define HII2S_DACL_AGC_CFG_1 0x4c +#define HII2S_DACL_AGC_CFG_2 0x50 +#define HII2S_DACR_AGC_CFG_1 0x54 +#define HII2S_DACR_AGC_CFG_2 0x58 +#define HII2S_DMIC_SIF_CFG 0x5c +#define HII2S_MISC_CFG 0x60 + +#define HII2S_MISC_CFG__THIRDMD_DLINK_TEST_SEL BIT(17) +#define HII2S_MISC_CFG__THIRDMD_DLINK_DIN_SEL BIT(16) +#define HII2S_MISC_CFG__S3_DOUT_RIGHT_SEL BIT(14) +#define HII2S_MISC_CFG__S3_DOUT_LEFT_SEL BIT(13) +#define HII2S_MISC_CFG__S3_DIN_TEST_SEL BIT(12) +#define HII2S_MISC_CFG__VOICE_DLINK_SRC_UP_DOUT_VLD_SEL BIT(8) +#define HII2S_MISC_CFG__VOICE_DLINK_TEST_SEL BIT(7) +#define HII2S_MISC_CFG__VOICE_DLINK_DIN_SEL BIT(6) +#define HII2S_MISC_CFG__ST_DL_TEST_SEL BIT(4) +#define HII2S_MISC_CFG__S2_DOUT_RIGHT_SEL BIT(3) +#define HII2S_MISC_CFG__S2_DOUT_TEST_SEL BIT(2) +#define HII2S_MISC_CFG__S1_DOUT_TEST_SEL BIT(1) +#define HII2S_MISC_CFG__S2_DOUT_LEFT_SEL BIT(0) + +#define HII2S_S2_SRC_CFG 0x64 +#define HII2S_MEM_CFG 0x68 +#define HII2S_THIRDMD_PCM_PGA_CFG 0x6c +#define HII2S_THIRD_MODEM_FIFO_TH 0x70 +#define HII2S_S3_ANTI_FREQ_JITTER_TX_INC_CNT 0x74 +#define HII2S_S3_ANTI_FREQ_JITTER_TX_DEC_CNT 0x78 +#define HII2S_S3_ANTI_FREQ_JITTER_RX_INC_CNT 0x7c +#define HII2S_S3_ANTI_FREQ_JITTER_RX_DEC_CNT 0x80 +#define HII2S_ANTI_FREQ_JITTER_EN 0x84 +#define HII2S_CLK_SEL 0x88 + +/* 0 = BT owns the i2s */ +#define HII2S_CLK_SEL__I2S_BT_FM_SEL BIT(0) +/* 0 = internal source, 1 = ext */ +#define HII2S_CLK_SEL__EXT_12_288MHZ_SEL BIT(1) + + +#define HII2S_THIRDMD_DLINK_CHANNEL 0xe8 +#define HII2S_THIRDMD_ULINK_CHANNEL 0xec +#define HII2S_VOICE_DLINK_CHANNEL 0xf0 + +/* shovel data in here for playback */ +#define HII2S_ST_DL_CHANNEL 0xf4 +#define HII2S_STEREO_UPLINK_CHANNEL 0xf8 +#define HII2S_VOICE_UPLINK_CHANNEL 0xfc + +#endif/* _HI6210_I2S_H */ diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index 526855ad479e3c52358522764f5735fe205bada3..67968ef3bbda48528c4bac723252b0cab2684cbb 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -202,6 +202,30 @@ config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH platforms with MAX98090 audio codec it also can support TI jack chip as aux device. If unsure select "N". +config SND_SOC_INTEL_BYT_CHT_DA7213_MACH + tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with DA7212/7213 codec" + depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SOC_DA7213 + select SND_SST_ATOM_HIFI2_PLATFORM + select SND_SST_IPC_ACPI + select SND_SOC_INTEL_SST_MATCH if ACPI + help + This adds support for ASoC machine driver for Intel(R) Baytrail & CherryTrail + platforms with DA7212/7213 audio codec. + If unsure select "N". + +config SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH + tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail platform with no codec (MinnowBoard MAX, Up)" + depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SST_ATOM_HIFI2_PLATFORM + select SND_SST_IPC_ACPI + select SND_SOC_INTEL_SST_MATCH if ACPI + help + This adds support for ASoC machine driver for the MinnowBoard Max or + Up boards and provides access to I2S signals on the Low-Speed + connector + If unsure select "N". + config SND_SOC_INTEL_SKYLAKE tristate select SND_HDA_EXT_CORE diff --git a/sound/soc/intel/atom/sst/sst_acpi.c b/sound/soc/intel/atom/sst/sst_acpi.c index 747c0f393d2dfd52c000ca3ef911e625a26ba0be..dd250b8b26f24c2d09012f9a7a434ae661b5bf49 100644 --- a/sound/soc/intel/atom/sst/sst_acpi.c +++ b/sound/soc/intel/atom/sst/sst_acpi.c @@ -420,7 +420,21 @@ static const struct dmi_system_id byt_table[] = { .callback = byt_thinkpad10_quirk_cb, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "20C3001VHH"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 10"), + }, + }, + { + .callback = byt_thinkpad10_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Tablet B"), + }, + }, + { + .callback = byt_thinkpad10_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Miix 2 10"), }, }, { } @@ -480,12 +494,23 @@ static struct sst_acpi_mach sst_acpi_bytcr[] = { &byt_rvp_platform_data }, {"10EC5651", "bytcr_rt5651", "intel/fw_sst_0f28.bin", "bytcr_rt5651", NULL, &byt_rvp_platform_data }, + {"DLGS7212", "bytcht_da7213", "intel/fw_sst_0f28.bin", "bytcht_da7213", NULL, + &byt_rvp_platform_data }, + {"DLGS7213", "bytcht_da7213", "intel/fw_sst_0f28.bin", "bytcht_da7213", NULL, + &byt_rvp_platform_data }, /* some Baytrail platforms rely on RT5645, use CHT machine driver */ {"10EC5645", "cht-bsw-rt5645", "intel/fw_sst_0f28.bin", "cht-bsw", NULL, &byt_rvp_platform_data }, {"10EC5648", "cht-bsw-rt5645", "intel/fw_sst_0f28.bin", "cht-bsw", NULL, &byt_rvp_platform_data }, - +#if IS_ENABLED(CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH) + /* + * This is always last in the table so that it is selected only when + * enabled explicitly and there is no codec-related information in SSDT + */ + {"80860F28", "bytcht_nocodec", "intel/fw_sst_0f28.bin", "bytcht_nocodec", NULL, + &byt_rvp_platform_data }, +#endif {}, }; @@ -504,6 +529,10 @@ static struct sst_acpi_mach sst_acpi_chv[] = { {"193C9890", "cht-bsw-max98090", "intel/fw_sst_22a8.bin", "cht-bsw", NULL, &chv_platform_data }, + {"DLGS7212", "bytcht_da7213", "intel/fw_sst_22a8.bin", "bytcht_da7213", NULL, + &chv_platform_data }, + {"DLGS7213", "bytcht_da7213", "intel/fw_sst_22a8.bin", "bytcht_da7213", NULL, + &chv_platform_data }, /* some CHT-T platforms rely on RT5640, use Baytrail machine driver */ {"10EC5640", "bytcr_rt5640", "intel/fw_sst_22a8.bin", "bytcr_rt5640", cht_quirk, &chv_platform_data }, @@ -512,6 +541,14 @@ static struct sst_acpi_mach sst_acpi_chv[] = { /* some CHT-T platforms rely on RT5651, use Baytrail machine driver */ {"10EC5651", "bytcr_rt5651", "intel/fw_sst_22a8.bin", "bytcr_rt5651", NULL, &chv_platform_data }, +#if IS_ENABLED(CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH) + /* + * This is always last in the table so that it is selected only when + * enabled explicitly and there is no codec-related information in SSDT + */ + {"808622A8", "bytcht_nocodec", "intel/fw_sst_22a8.bin", "bytcht_nocodec", NULL, + &chv_platform_data }, +#endif {}, }; diff --git a/sound/soc/intel/atom/sst/sst_ipc.c b/sound/soc/intel/atom/sst/sst_ipc.c index 14c2d9d1818010fd8fcb1192a79873808b7107b2..20b01e02ed8f8aed51d9a6ec01b37d5599726bbf 100644 --- a/sound/soc/intel/atom/sst/sst_ipc.c +++ b/sound/soc/intel/atom/sst/sst_ipc.c @@ -236,7 +236,9 @@ static void process_fw_init(struct intel_sst_drv *sst_drv_ctx, retval = init->result; goto ret; } - dev_info(sst_drv_ctx->dev, "FW Version %02x.%02x.%02x.%02x\n", + if (memcmp(&sst_drv_ctx->fw_version, &init->fw_version, + sizeof(init->fw_version))) + dev_info(sst_drv_ctx->dev, "FW Version %02x.%02x.%02x.%02x\n", init->fw_version.type, init->fw_version.major, init->fw_version.minor, init->fw_version.build); dev_dbg(sst_drv_ctx->dev, "Build date %s Time %s\n", diff --git a/sound/soc/intel/boards/Makefile b/sound/soc/intel/boards/Makefile index 5639f10774e6ef444370027e16161b2f78a71adc..56896e09445df13f099956fd41852173ca6817cd 100644 --- a/sound/soc/intel/boards/Makefile +++ b/sound/soc/intel/boards/Makefile @@ -10,6 +10,8 @@ snd-soc-sst-bytcr-rt5651-objs := bytcr_rt5651.o snd-soc-sst-cht-bsw-rt5672-objs := cht_bsw_rt5672.o snd-soc-sst-cht-bsw-rt5645-objs := cht_bsw_rt5645.o snd-soc-sst-cht-bsw-max98090_ti-objs := cht_bsw_max98090_ti.o +snd-soc-sst-byt-cht-da7213-objs := bytcht_da7213.o +snd-soc-sst-byt-cht-nocodec-objs := bytcht_nocodec.o snd-soc-skl_rt286-objs := skl_rt286.o snd-skl_nau88l25_max98357a-objs := skl_nau88l25_max98357a.o snd-soc-skl_nau88l25_ssm4567-objs := skl_nau88l25_ssm4567.o @@ -26,6 +28,8 @@ obj-$(CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH) += snd-soc-sst-bytcr-rt5651.o obj-$(CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH) += snd-soc-sst-cht-bsw-rt5672.o obj-$(CONFIG_SND_SOC_INTEL_CHT_BSW_RT5645_MACH) += snd-soc-sst-cht-bsw-rt5645.o obj-$(CONFIG_SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH) += snd-soc-sst-cht-bsw-max98090_ti.o +obj-$(CONFIG_SND_SOC_INTEL_BYT_CHT_DA7213_MACH) += snd-soc-sst-byt-cht-da7213.o +obj-$(CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH) += snd-soc-sst-byt-cht-nocodec.o obj-$(CONFIG_SND_SOC_INTEL_SKL_RT286_MACH) += snd-soc-skl_rt286.o obj-$(CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH) += snd-skl_nau88l25_max98357a.o obj-$(CONFIG_SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH) += snd-soc-skl_nau88l25_ssm4567.o diff --git a/sound/soc/intel/boards/bdw-rt5677.c b/sound/soc/intel/boards/bdw-rt5677.c index 53c6b4cbb1e1494519f7c5b76b2051b7dcfc8cb7..14d9693c1641b460159f9b13ba0f0e8298825e69 100644 --- a/sound/soc/intel/boards/bdw-rt5677.c +++ b/sound/soc/intel/boards/bdw-rt5677.c @@ -193,13 +193,12 @@ static int bdw_rt5677_init(struct snd_soc_pcm_runtime *rtd) RT5677_CLK_SEL_I2S1_ASRC); /* Request rt5677 GPIO for headphone amp control */ - bdw_rt5677->gpio_hp_en = devm_gpiod_get_index(codec->dev, - "headphone-enable", 0, 0); + bdw_rt5677->gpio_hp_en = devm_gpiod_get(codec->dev, "headphone-enable", + GPIOD_OUT_LOW); if (IS_ERR(bdw_rt5677->gpio_hp_en)) { dev_err(codec->dev, "Can't find HP_AMP_SHDN_L gpio\n"); return PTR_ERR(bdw_rt5677->gpio_hp_en); } - gpiod_direction_output(bdw_rt5677->gpio_hp_en, 0); /* Create and initialize headphone jack */ if (!snd_soc_card_jack_new(rtd->card, "Headphone Jack", diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c index faf865bb176540c8b17d57df4a495aacc0000a98..6dcbbcefc25b446d5191362101f0bc206414cf56 100644 --- a/sound/soc/intel/boards/broadwell.c +++ b/sound/soc/intel/boards/broadwell.c @@ -269,9 +269,6 @@ static struct snd_soc_card broadwell_rt286 = { static int broadwell_audio_probe(struct platform_device *pdev) { broadwell_rt286.dev = &pdev->dev; - - snd_soc_set_dmi_name(&broadwell_rt286, NULL); - return devm_snd_soc_register_card(&pdev->dev, &broadwell_rt286); } diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c index 2cda06cde4d13823f97e90ebfa5d242cd780dd4f..3a8c4d954a917ba28ef46caaa1f4c3a18109da34 100644 --- a/sound/soc/intel/boards/bxt_da7219_max98357a.c +++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c @@ -55,6 +55,54 @@ enum { BXT_DPCM_AUDIO_HDMI3_PB, }; +static inline struct snd_soc_dai *bxt_get_codec_dai(struct snd_soc_card *card) +{ + struct snd_soc_pcm_runtime *rtd; + + list_for_each_entry(rtd, &card->rtd_list, list) { + + if (!strncmp(rtd->codec_dai->name, BXT_DIALOG_CODEC_DAI, + strlen(BXT_DIALOG_CODEC_DAI))) + return rtd->codec_dai; + } + + return NULL; +} + +static int platform_clock_control(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + int ret = 0; + struct snd_soc_dapm_context *dapm = w->dapm; + struct snd_soc_card *card = dapm->card; + struct snd_soc_dai *codec_dai; + + codec_dai = bxt_get_codec_dai(card); + if (!codec_dai) { + dev_err(card->dev, "Codec dai not found; Unable to set/unset codec pll\n"); + return -EIO; + } + + if (SND_SOC_DAPM_EVENT_OFF(event)) { + ret = snd_soc_dai_set_pll(codec_dai, 0, + DA7219_SYSCLK_MCLK, 0, 0); + if (ret) + dev_err(card->dev, "failed to stop PLL: %d\n", ret); + } else if(SND_SOC_DAPM_EVENT_ON(event)) { + ret = snd_soc_dai_set_sysclk(codec_dai, + DA7219_CLKSRC_MCLK, 19200000, SND_SOC_CLOCK_IN); + if (ret) + dev_err(card->dev, "can't set codec sysclk configuration\n"); + + ret = snd_soc_dai_set_pll(codec_dai, 0, + DA7219_SYSCLK_PLL_SRM, 0, DA7219_PLL_FREQ_OUT_98304); + if (ret) + dev_err(card->dev, "failed to start PLL: %d\n", ret); + } + + return ret; +} + static const struct snd_kcontrol_new broxton_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone Jack"), SOC_DAPM_PIN_SWITCH("Headset Mic"), @@ -69,6 +117,8 @@ static const struct snd_soc_dapm_widget broxton_widgets[] = { SND_SOC_DAPM_SPK("HDMI1", NULL), SND_SOC_DAPM_SPK("HDMI2", NULL), SND_SOC_DAPM_SPK("HDMI3", NULL), + SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, + platform_clock_control, SND_SOC_DAPM_POST_PMD|SND_SOC_DAPM_PRE_PMU), }; static const struct snd_soc_dapm_route broxton_map[] = { @@ -109,6 +159,9 @@ static const struct snd_soc_dapm_route broxton_map[] = { /* DMIC */ {"dmic01_hifi", NULL, "DMIC01 Rx"}, {"DMIC01 Rx", NULL, "DMIC AIF"}, + + { "Headphone Jack", NULL, "Platform Clock" }, + { "Headset Mic", NULL, "Platform Clock" }, }; static int broxton_ssp_fixup(struct snd_soc_pcm_runtime *rtd, @@ -243,49 +296,6 @@ static const struct snd_soc_ops broxton_da7219_fe_ops = { .startup = bxt_fe_startup, }; -static int broxton_da7219_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai = rtd->codec_dai; - int ret; - - ret = snd_soc_dai_set_sysclk(codec_dai, - DA7219_CLKSRC_MCLK, 19200000, SND_SOC_CLOCK_IN); - if (ret < 0) - dev_err(codec_dai->dev, "can't set codec sysclk configuration\n"); - - ret = snd_soc_dai_set_pll(codec_dai, 0, - DA7219_SYSCLK_PLL_SRM, 0, DA7219_PLL_FREQ_OUT_98304); - if (ret < 0) { - dev_err(codec_dai->dev, "failed to start PLL: %d\n", ret); - return -EIO; - } - - return ret; -} - -static int broxton_da7219_hw_free(struct snd_pcm_substream *substream) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai = rtd->codec_dai; - int ret; - - ret = snd_soc_dai_set_pll(codec_dai, 0, - DA7219_SYSCLK_MCLK, 0, 0); - if (ret < 0) { - dev_err(codec_dai->dev, "failed to stop PLL: %d\n", ret); - return -EIO; - } - - return ret; -} - -static const struct snd_soc_ops broxton_da7219_ops = { - .hw_params = broxton_da7219_hw_params, - .hw_free = broxton_da7219_hw_free, -}; - static int broxton_dmic_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params) { @@ -467,7 +477,6 @@ static struct snd_soc_dai_link broxton_dais[] = { SND_SOC_DAIFMT_CBS_CFS, .ignore_pmdown_time = 1, .be_hw_params_fixup = broxton_ssp_fixup, - .ops = &broxton_da7219_ops, .dpcm_playback = 1, .dpcm_capture = 1, }, diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c index 176c080a98187080c59881147c5c0617089ce936..1a68d043c803331afeb4c8acabacb3844816e02e 100644 --- a/sound/soc/intel/boards/bxt_rt298.c +++ b/sound/soc/intel/boards/bxt_rt298.c @@ -274,12 +274,15 @@ static int bxt_fe_startup(struct snd_pcm_substream *substream) * on this platform for PCM device we support: * 48Khz * stereo + * 16-bit audio */ runtime->hw.channels_max = 2; snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, &constraints_channels); + runtime->hw.formats = SNDRV_PCM_FMTBIT_S16_LE; + snd_pcm_hw_constraint_msbits(runtime, 0, 16, 16); snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &constraints_rates); diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c new file mode 100644 index 0000000000000000000000000000000000000000..18873e23f404b1aec4f5ef05a8483887529fc309 --- /dev/null +++ b/sound/soc/intel/boards/bytcht_da7213.c @@ -0,0 +1,283 @@ +/* + * bytcht-da7213.c - ASoc Machine driver for Intel Baytrail and + * Cherrytrail-based platforms, with Dialog DA7213 codec + * + * Copyright (C) 2017 Intel Corporation + * Author: Pierre-Louis Bossart + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../codecs/da7213.h" +#include "../atom/sst-atom-controls.h" +#include "../common/sst-acpi.h" + +static const struct snd_kcontrol_new controls[] = { + SOC_DAPM_PIN_SWITCH("Headphone Jack"), + SOC_DAPM_PIN_SWITCH("Headset Mic"), + SOC_DAPM_PIN_SWITCH("Mic"), + SOC_DAPM_PIN_SWITCH("Aux In"), +}; + +static const struct snd_soc_dapm_widget dapm_widgets[] = { + SND_SOC_DAPM_HP("Headphone Jack", NULL), + SND_SOC_DAPM_MIC("Headset Mic", NULL), + SND_SOC_DAPM_MIC("Mic", NULL), + SND_SOC_DAPM_LINE("Aux In", NULL), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + {"Headphone Jack", NULL, "HPL"}, + {"Headphone Jack", NULL, "HPR"}, + + {"AUXL", NULL, "Aux In"}, + {"AUXR", NULL, "Aux In"}, + + /* Assume Mic1 is linked to Headset and Mic2 to on-board mic */ + {"MIC1", NULL, "Headset Mic"}, + {"MIC2", NULL, "Mic"}, + + /* SOC-codec link */ + {"ssp2 Tx", NULL, "codec_out0"}, + {"ssp2 Tx", NULL, "codec_out1"}, + {"codec_in0", NULL, "ssp2 Rx"}, + {"codec_in1", NULL, "ssp2 Rx"}, + + {"Playback", NULL, "ssp2 Tx"}, + {"ssp2 Rx", NULL, "Capture"}, +}; + +static int codec_fixup(struct snd_soc_pcm_runtime *rtd, + struct snd_pcm_hw_params *params) +{ + int ret; + struct snd_interval *rate = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_RATE); + struct snd_interval *channels = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_CHANNELS); + + /* The DSP will convert the FE rate to 48k, stereo, 24bits */ + rate->min = rate->max = 48000; + channels->min = channels->max = 2; + + /* set SSP2 to 24-bit */ + params_set_format(params, SNDRV_PCM_FORMAT_S24_LE); + + /* + * Default mode for SSP configuration is TDM 4 slot, override config + * with explicit setting to I2S 2ch 24-bit. The word length is set with + * dai_set_tdm_slot() since there is no other API exposed + */ + ret = snd_soc_dai_set_fmt(rtd->cpu_dai, + SND_SOC_DAIFMT_I2S | + SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBS_CFS); + if (ret < 0) { + dev_err(rtd->dev, "can't set format to I2S, err %d\n", ret); + return ret; + } + + ret = snd_soc_dai_set_tdm_slot(rtd->cpu_dai, 0x3, 0x3, 2, 24); + if (ret < 0) { + dev_err(rtd->dev, "can't set I2S config, err %d\n", ret); + return ret; + } + + return 0; +} + +static int aif1_startup(struct snd_pcm_substream *substream) +{ + return snd_pcm_hw_constraint_single(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, 48000); +} + +static int aif1_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai = rtd->codec_dai; + int ret; + + ret = snd_soc_dai_set_sysclk(codec_dai, DA7213_CLKSRC_MCLK, + 19200000, SND_SOC_CLOCK_IN); + if (ret < 0) + dev_err(codec_dai->dev, "can't set codec sysclk configuration\n"); + + ret = snd_soc_dai_set_pll(codec_dai, 0, + DA7213_SYSCLK_PLL_SRM, 0, DA7213_PLL_FREQ_OUT_98304000); + if (ret < 0) { + dev_err(codec_dai->dev, "failed to start PLL: %d\n", ret); + return -EIO; + } + + return ret; +} + +static int aif1_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai = rtd->codec_dai; + int ret; + + ret = snd_soc_dai_set_pll(codec_dai, 0, + DA7213_SYSCLK_MCLK, 0, 0); + if (ret < 0) { + dev_err(codec_dai->dev, "failed to stop PLL: %d\n", ret); + return -EIO; + } + + return ret; +} + +static const struct snd_soc_ops aif1_ops = { + .startup = aif1_startup, +}; + +static const struct snd_soc_ops ssp2_ops = { + .hw_params = aif1_hw_params, + .hw_free = aif1_hw_free, + +}; + +static struct snd_soc_dai_link dailink[] = { + [MERR_DPCM_AUDIO] = { + .name = "Audio Port", + .stream_name = "Audio", + .cpu_dai_name = "media-cpu-dai", + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .platform_name = "sst-mfld-platform", + .nonatomic = true, + .dynamic = 1, + .dpcm_playback = 1, + .dpcm_capture = 1, + .ops = &aif1_ops, + }, + [MERR_DPCM_DEEP_BUFFER] = { + .name = "Deep-Buffer Audio Port", + .stream_name = "Deep-Buffer Audio", + .cpu_dai_name = "deepbuffer-cpu-dai", + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .platform_name = "sst-mfld-platform", + .nonatomic = true, + .dynamic = 1, + .dpcm_playback = 1, + .ops = &aif1_ops, + }, + [MERR_DPCM_COMPR] = { + .name = "Compressed Port", + .stream_name = "Compress", + .cpu_dai_name = "compress-cpu-dai", + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .platform_name = "sst-mfld-platform", + }, + /* CODEC<->CODEC link */ + /* back ends */ + { + .name = "SSP2-Codec", + .id = 1, + .cpu_dai_name = "ssp2-port", + .platform_name = "sst-mfld-platform", + .no_pcm = 1, + .codec_dai_name = "da7213-hifi", + .codec_name = "i2c-DLGS7213:00", + .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF + | SND_SOC_DAIFMT_CBS_CFS, + .be_hw_params_fixup = codec_fixup, + .nonatomic = true, + .dpcm_playback = 1, + .dpcm_capture = 1, + .ops = &ssp2_ops, + }, +}; + +/* SoC card */ +static struct snd_soc_card bytcht_da7213_card = { + .name = "bytcht-da7213", + .owner = THIS_MODULE, + .dai_link = dailink, + .num_links = ARRAY_SIZE(dailink), + .controls = controls, + .num_controls = ARRAY_SIZE(controls), + .dapm_widgets = dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(dapm_widgets), + .dapm_routes = audio_map, + .num_dapm_routes = ARRAY_SIZE(audio_map), +}; + +static char codec_name[16]; /* i2c-:00 with HID being 8 chars */ + +static int bytcht_da7213_probe(struct platform_device *pdev) +{ + int ret_val = 0; + int i; + struct snd_soc_card *card; + struct sst_acpi_mach *mach; + const char *i2c_name = NULL; + int dai_index = 0; + + mach = (&pdev->dev)->platform_data; + card = &bytcht_da7213_card; + card->dev = &pdev->dev; + + /* fix index of codec dai */ + dai_index = MERR_DPCM_COMPR + 1; + for (i = 0; i < ARRAY_SIZE(dailink); i++) { + if (!strcmp(dailink[i].codec_name, "i2c-DLGS7213:00")) { + dai_index = i; + break; + } + } + + /* fixup codec name based on HID */ + i2c_name = sst_acpi_find_name_from_hid(mach->id); + if (i2c_name != NULL) { + snprintf(codec_name, sizeof(codec_name), + "%s%s", "i2c-", i2c_name); + dailink[dai_index].codec_name = codec_name; + } + + ret_val = devm_snd_soc_register_card(&pdev->dev, card); + if (ret_val) { + dev_err(&pdev->dev, + "snd_soc_register_card failed %d\n", ret_val); + return ret_val; + } + platform_set_drvdata(pdev, card); + return ret_val; +} + +static struct platform_driver bytcht_da7213_driver = { + .driver = { + .name = "bytcht_da7213", + }, + .probe = bytcht_da7213_probe, +}; +module_platform_driver(bytcht_da7213_driver); + +MODULE_DESCRIPTION("ASoC Intel(R) Baytrail/Cherrytrail+DA7213 Machine driver"); +MODULE_AUTHOR("Pierre-Louis Bossart"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:bytcht_da7213"); diff --git a/sound/soc/intel/boards/bytcht_nocodec.c b/sound/soc/intel/boards/bytcht_nocodec.c new file mode 100644 index 0000000000000000000000000000000000000000..89853eeaaf9dd4f20bdcd61acc10e43c3eb29839 --- /dev/null +++ b/sound/soc/intel/boards/bytcht_nocodec.c @@ -0,0 +1,208 @@ +/* + * bytcht_nocodec.c - ASoc Machine driver for MinnowBoard Max and Up + * to make I2S signals observable on the Low-Speed connector. Audio codec + * is not managed by ASoC/DAPM + * + * Copyright (C) 2015-2017 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include "../atom/sst-atom-controls.h" + +static const struct snd_soc_dapm_widget widgets[] = { + SND_SOC_DAPM_MIC("Mic", NULL), + SND_SOC_DAPM_SPK("Speaker", NULL), +}; + +static const struct snd_kcontrol_new controls[] = { + SOC_DAPM_PIN_SWITCH("Mic"), + SOC_DAPM_PIN_SWITCH("Speaker"), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + {"ssp2 Tx", NULL, "codec_out0"}, + {"ssp2 Tx", NULL, "codec_out1"}, + {"codec_in0", NULL, "ssp2 Rx"}, + {"codec_in1", NULL, "ssp2 Rx"}, + + {"ssp2 Rx", NULL, "Mic"}, + {"Speaker", NULL, "ssp2 Tx"}, +}; + +static int codec_fixup(struct snd_soc_pcm_runtime *rtd, + struct snd_pcm_hw_params *params) +{ + struct snd_interval *rate = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_RATE); + struct snd_interval *channels = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_CHANNELS); + int ret; + + /* The DSP will convert the FE rate to 48k, stereo, 24bits */ + rate->min = rate->max = 48000; + channels->min = channels->max = 2; + + /* set SSP2 to 24-bit */ + params_set_format(params, SNDRV_PCM_FORMAT_S24_LE); + + /* + * Default mode for SSP configuration is TDM 4 slot, override config + * with explicit setting to I2S 2ch 24-bit. The word length is set with + * dai_set_tdm_slot() since there is no other API exposed + */ + ret = snd_soc_dai_set_fmt(rtd->cpu_dai, + SND_SOC_DAIFMT_I2S | + SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBS_CFS); + + if (ret < 0) { + dev_err(rtd->dev, "can't set format to I2S, err %d\n", ret); + return ret; + } + + ret = snd_soc_dai_set_tdm_slot(rtd->cpu_dai, 0x3, 0x3, 2, 24); + if (ret < 0) { + dev_err(rtd->dev, "can't set I2S config, err %d\n", ret); + return ret; + } + + return 0; +} + +static unsigned int rates_48000[] = { + 48000, +}; + +static struct snd_pcm_hw_constraint_list constraints_48000 = { + .count = ARRAY_SIZE(rates_48000), + .list = rates_48000, +}; + +static int aif1_startup(struct snd_pcm_substream *substream) +{ + return snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + &constraints_48000); +} + +static struct snd_soc_ops aif1_ops = { + .startup = aif1_startup, +}; + +static struct snd_soc_dai_link dais[] = { + [MERR_DPCM_AUDIO] = { + .name = "Audio Port", + .stream_name = "Audio", + .cpu_dai_name = "media-cpu-dai", + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .platform_name = "sst-mfld-platform", + .ignore_suspend = 1, + .nonatomic = true, + .dynamic = 1, + .dpcm_playback = 1, + .dpcm_capture = 1, + .ops = &aif1_ops, + }, + [MERR_DPCM_DEEP_BUFFER] = { + .name = "Deep-Buffer Audio Port", + .stream_name = "Deep-Buffer Audio", + .cpu_dai_name = "deepbuffer-cpu-dai", + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .platform_name = "sst-mfld-platform", + .ignore_suspend = 1, + .nonatomic = true, + .dynamic = 1, + .dpcm_playback = 1, + .ops = &aif1_ops, + }, + [MERR_DPCM_COMPR] = { + .name = "Compressed Port", + .stream_name = "Compress", + .cpu_dai_name = "compress-cpu-dai", + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .platform_name = "sst-mfld-platform", + }, + /* CODEC<->CODEC link */ + /* back ends */ + { + .name = "SSP2-LowSpeed Connector", + .id = 1, + .cpu_dai_name = "ssp2-port", + .platform_name = "sst-mfld-platform", + .no_pcm = 1, + .codec_dai_name = "snd-soc-dummy-dai", + .codec_name = "snd-soc-dummy", + .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF + | SND_SOC_DAIFMT_CBS_CFS, + .be_hw_params_fixup = codec_fixup, + .ignore_suspend = 1, + .nonatomic = true, + .dpcm_playback = 1, + .dpcm_capture = 1, + }, +}; + +/* SoC card */ +static struct snd_soc_card bytcht_nocodec_card = { + .name = "bytcht-nocodec", + .owner = THIS_MODULE, + .dai_link = dais, + .num_links = ARRAY_SIZE(dais), + .dapm_widgets = widgets, + .num_dapm_widgets = ARRAY_SIZE(widgets), + .dapm_routes = audio_map, + .num_dapm_routes = ARRAY_SIZE(audio_map), + .controls = controls, + .num_controls = ARRAY_SIZE(controls), + .fully_routed = true, +}; + +static int snd_bytcht_nocodec_mc_probe(struct platform_device *pdev) +{ + int ret_val = 0; + + /* register the soc card */ + bytcht_nocodec_card.dev = &pdev->dev; + + ret_val = devm_snd_soc_register_card(&pdev->dev, &bytcht_nocodec_card); + + if (ret_val) { + dev_err(&pdev->dev, "devm_snd_soc_register_card failed %d\n", + ret_val); + return ret_val; + } + platform_set_drvdata(pdev, &bytcht_nocodec_card); + return ret_val; +} + +static struct platform_driver snd_bytcht_nocodec_mc_driver = { + .driver = { + .name = "bytcht_nocodec", + }, + .probe = snd_bytcht_nocodec_mc_probe, +}; +module_platform_driver(snd_bytcht_nocodec_mc_driver); + +MODULE_DESCRIPTION("ASoC Intel(R) Baytrail/Cherrytrail Nocodec Machine driver"); +MODULE_AUTHOR("Pierre-Louis Bossart "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:bytcht_nocodec"); diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 5c7219fb3aa86738a49cff98bf6f16693f7a0192..4a76b099a508954c4cd3e6b3a3bfdfda1755b0fe 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -56,35 +57,88 @@ enum { struct byt_rt5640_private { struct clk *mclk; }; +static bool is_bytcr; static unsigned long byt_rt5640_quirk = BYT_RT5640_MCLK_EN; +static unsigned int quirk_override; +module_param_named(quirk, quirk_override, uint, 0444); +MODULE_PARM_DESC(quirk, "Board-specific quirk override"); static void log_quirks(struct device *dev) { - if (BYT_RT5640_MAP(byt_rt5640_quirk) == BYT_RT5640_DMIC1_MAP) - dev_info(dev, "quirk DMIC1_MAP enabled"); - if (BYT_RT5640_MAP(byt_rt5640_quirk) == BYT_RT5640_DMIC2_MAP) - dev_info(dev, "quirk DMIC2_MAP enabled"); - if (BYT_RT5640_MAP(byt_rt5640_quirk) == BYT_RT5640_IN1_MAP) - dev_info(dev, "quirk IN1_MAP enabled"); - if (BYT_RT5640_MAP(byt_rt5640_quirk) == BYT_RT5640_IN3_MAP) - dev_info(dev, "quirk IN3_MAP enabled"); - if (byt_rt5640_quirk & BYT_RT5640_DMIC_EN) - dev_info(dev, "quirk DMIC enabled"); + int map; + bool has_dmic = false; + bool has_mclk = false; + bool has_ssp0 = false; + bool has_ssp0_aif1 = false; + bool has_ssp0_aif2 = false; + bool has_ssp2_aif2 = false; + + map = BYT_RT5640_MAP(byt_rt5640_quirk); + switch (map) { + case BYT_RT5640_DMIC1_MAP: + dev_info(dev, "quirk DMIC1_MAP enabled\n"); + has_dmic = true; + break; + case BYT_RT5640_DMIC2_MAP: + dev_info(dev, "quirk DMIC2_MAP enabled\n"); + has_dmic = true; + break; + case BYT_RT5640_IN1_MAP: + dev_info(dev, "quirk IN1_MAP enabled\n"); + break; + case BYT_RT5640_IN3_MAP: + dev_info(dev, "quirk IN3_MAP enabled\n"); + break; + default: + dev_err(dev, "quirk map 0x%x is not supported, microphone input will not work\n", map); + break; + } + if (byt_rt5640_quirk & BYT_RT5640_DMIC_EN) { + if (has_dmic) + dev_info(dev, "quirk DMIC enabled\n"); + else + dev_err(dev, "quirk DMIC enabled but no DMIC input set, will be ignored\n"); + } if (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER) - dev_info(dev, "quirk MONO_SPEAKER enabled"); - if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC) - dev_info(dev, "quirk DIFF_MIC enabled"); - if (byt_rt5640_quirk & BYT_RT5640_SSP2_AIF2) - dev_info(dev, "quirk SSP2_AIF2 enabled"); - if (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF1) - dev_info(dev, "quirk SSP0_AIF1 enabled"); - if (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF2) - dev_info(dev, "quirk SSP0_AIF2 enabled"); - if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN) - dev_info(dev, "quirk MCLK_EN enabled"); - if (byt_rt5640_quirk & BYT_RT5640_MCLK_25MHZ) - dev_info(dev, "quirk MCLK_25MHZ enabled"); + dev_info(dev, "quirk MONO_SPEAKER enabled\n"); + if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC) { + if (!has_dmic) + dev_info(dev, "quirk DIFF_MIC enabled\n"); + else + dev_info(dev, "quirk DIFF_MIC enabled but DMIC input selected, will be ignored\n"); + } + if (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF1) { + dev_info(dev, "quirk SSP0_AIF1 enabled\n"); + has_ssp0 = true; + has_ssp0_aif1 = true; + } + if (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF2) { + dev_info(dev, "quirk SSP0_AIF2 enabled\n"); + has_ssp0 = true; + has_ssp0_aif2 = true; + } + if (byt_rt5640_quirk & BYT_RT5640_SSP2_AIF2) { + dev_info(dev, "quirk SSP2_AIF2 enabled\n"); + has_ssp2_aif2 = true; + } + if (is_bytcr && !has_ssp0) + dev_err(dev, "Invalid routing, bytcr detected but no SSP0-based quirk, audio cannot work with SSP2 on bytcr\n"); + if (has_ssp0_aif1 && has_ssp0_aif2) + dev_err(dev, "Invalid routing, SSP0 cannot be connected to both AIF1 and AIF2\n"); + if (has_ssp0 && has_ssp2_aif2) + dev_err(dev, "Invalid routing, cannot have both SSP0 and SSP2 connected to codec\n"); + + if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN) { + dev_info(dev, "quirk MCLK_EN enabled\n"); + has_mclk = true; + } + if (byt_rt5640_quirk & BYT_RT5640_MCLK_25MHZ) { + if (has_mclk) + dev_info(dev, "quirk MCLK_25MHZ enabled\n"); + else + dev_err(dev, "quirk MCLK_25MHZ enabled but quirk MCLK not selected, will be ignored\n"); + } } @@ -128,7 +182,7 @@ static int platform_clock_control(struct snd_soc_dapm_widget *w, ret = clk_prepare_enable(priv->mclk); if (ret < 0) { dev_err(card->dev, - "could not configure MCLK state"); + "could not configure MCLK state\n"); return ret; } } @@ -621,7 +675,7 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = { .codec_dai_name = "snd-soc-dummy-dai", .codec_name = "snd-soc-dummy", .platform_name = "sst-mfld-platform", - .ignore_suspend = 1, + .nonatomic = true, .dynamic = 1, .dpcm_playback = 1, .dpcm_capture = 1, @@ -634,7 +688,6 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = { .codec_dai_name = "snd-soc-dummy-dai", .codec_name = "snd-soc-dummy", .platform_name = "sst-mfld-platform", - .ignore_suspend = 1, .nonatomic = true, .dynamic = 1, .dpcm_playback = 1, @@ -661,6 +714,7 @@ static struct snd_soc_dai_link byt_rt5640_dais[] = { | SND_SOC_DAIFMT_CBS_CFS, .be_hw_params_fixup = byt_rt5640_codec_fixup, .ignore_suspend = 1, + .nonatomic = true, .dpcm_playback = 1, .dpcm_capture = 1, .init = byt_rt5640_init, @@ -710,8 +764,8 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) int i; int dai_index; struct byt_rt5640_private *priv; - bool is_bytcr = false; + is_bytcr = false; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_ATOMIC); if (!priv) return -ENOMEM; @@ -806,6 +860,11 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) /* check quirks before creating card */ dmi_check_system(byt_rt5640_quirk_table); + if (quirk_override) { + dev_info(&pdev->dev, "Overriding quirk 0x%x => 0x%x\n", + (unsigned int)byt_rt5640_quirk, quirk_override); + byt_rt5640_quirk = quirk_override; + } log_quirks(&pdev->dev); if ((byt_rt5640_quirk & BYT_RT5640_SSP2_AIF2) || diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index 3186f015939fb5fce3e8393fd40463362464dbf8..8164bec63bf15b874da49a4406ae9f6124cd776c 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -235,7 +235,6 @@ static struct snd_soc_dai_link byt_rt5651_dais[] = { .codec_dai_name = "snd-soc-dummy-dai", .codec_name = "snd-soc-dummy", .platform_name = "sst-mfld-platform", - .ignore_suspend = 1, .nonatomic = true, .dynamic = 1, .dpcm_playback = 1, @@ -249,7 +248,6 @@ static struct snd_soc_dai_link byt_rt5651_dais[] = { .codec_dai_name = "snd-soc-dummy-dai", .codec_name = "snd-soc-dummy", .platform_name = "sst-mfld-platform", - .ignore_suspend = 1, .nonatomic = true, .dynamic = 1, .dpcm_playback = 1, diff --git a/sound/soc/intel/haswell/sst-haswell-ipc.c b/sound/soc/intel/haswell/sst-haswell-ipc.c index a3459d1682a6bd6bb6c9ab17abb5b04633dbadfc..d33bdaf92c57c8ab9fb7648f51a1ccc185f9e324 100644 --- a/sound/soc/intel/haswell/sst-haswell-ipc.c +++ b/sound/soc/intel/haswell/sst-haswell-ipc.c @@ -2000,10 +2000,8 @@ int sst_hsw_module_set_param(struct sst_hsw *hsw, u32 param_size, char *param) { int ret; - unsigned char *data = NULL; u32 header = 0; u32 payload_size = 0, transfer_parameter_size = 0; - dma_addr_t dma_addr = 0; struct sst_hsw_transfer_parameter *parameter; struct device *dev = hsw->dev; @@ -2047,10 +2045,6 @@ int sst_hsw_module_set_param(struct sst_hsw *hsw, kfree(parameter); - if (data) - dma_free_coherent(hsw->dsp->dma_dev, - param_size, (void *)data, dma_addr); - return ret; } diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c index 15a063a403ccc28762abedb3e0124a6b1e70b78f..f5e7dbb1ba39e073fc1ee7212bb558962a85c987 100644 --- a/sound/soc/intel/skylake/bxt-sst.c +++ b/sound/soc/intel/skylake/bxt-sst.c @@ -25,7 +25,8 @@ #include "skl-sst-ipc.h" #define BXT_BASEFW_TIMEOUT 3000 -#define BXT_INIT_TIMEOUT 500 +#define BXT_INIT_TIMEOUT 300 +#define BXT_ROM_INIT_TIMEOUT 70 #define BXT_IPC_PURGE_FW 0x01004000 #define BXT_ROM_INIT 0x5 @@ -45,6 +46,8 @@ /* Delay before scheduling D0i3 entry */ #define BXT_D0I3_DELAY 5000 +#define BXT_FW_ROM_INIT_RETRY 3 + static unsigned int bxt_get_errorcode(struct sst_dsp *ctx) { return sst_dsp_shim_read(ctx, BXT_ADSP_ERROR_CODE); @@ -55,29 +58,15 @@ bxt_load_library(struct sst_dsp *ctx, struct skl_lib_info *linfo, int lib_count) { struct snd_dma_buffer dmab; struct skl_sst *skl = ctx->thread_context; - const struct firmware *fw = NULL; struct firmware stripped_fw; int ret = 0, i, dma_id, stream_tag; /* library indices start from 1 to N. 0 represents base FW */ for (i = 1; i < lib_count; i++) { - ret = request_firmware(&fw, linfo[i].name, ctx->dev); - if (ret < 0) { - dev_err(ctx->dev, "Request lib %s failed:%d\n", - linfo[i].name, ret); - return ret; - } - - if (skl->is_first_boot) { - ret = snd_skl_parse_uuids(ctx, fw, + ret = skl_prepare_lib_load(skl, &skl->lib_info[i], &stripped_fw, BXT_ADSP_FW_BIN_HDR_OFFSET, i); - if (ret < 0) - goto load_library_failed; - } - - stripped_fw.data = fw->data; - stripped_fw.size = fw->size; - skl_dsp_strip_extended_manifest(&stripped_fw); + if (ret < 0) + goto load_library_failed; stream_tag = ctx->dsp_ops.prepare(ctx->dev, 0x40, stripped_fw.size, &dmab); @@ -92,21 +81,19 @@ bxt_load_library(struct sst_dsp *ctx, struct skl_lib_info *linfo, int lib_count) memcpy(dmab.area, stripped_fw.data, stripped_fw.size); ctx->dsp_ops.trigger(ctx->dev, true, stream_tag); - ret = skl_sst_ipc_load_library(&skl->ipc, dma_id, i); + ret = skl_sst_ipc_load_library(&skl->ipc, dma_id, i, true); if (ret < 0) dev_err(ctx->dev, "IPC Load Lib for %s fail: %d\n", linfo[i].name, ret); ctx->dsp_ops.trigger(ctx->dev, false, stream_tag); ctx->dsp_ops.cleanup(ctx->dev, &dmab, stream_tag); - release_firmware(fw); - fw = NULL; } return ret; load_library_failed: - release_firmware(fw); + skl_release_library(linfo, lib_count); return ret; } @@ -156,7 +143,7 @@ static int sst_bxt_prepare_fw(struct sst_dsp *ctx, SKL_ADSP_REG_HIPCIE_DONE, BXT_INIT_TIMEOUT, "HIPCIE Done"); if (ret < 0) { - dev_err(ctx->dev, "Timout for Purge Request%d\n", ret); + dev_err(ctx->dev, "Timeout for Purge Request%d\n", ret); goto base_fw_load_failed; } @@ -173,7 +160,7 @@ static int sst_bxt_prepare_fw(struct sst_dsp *ctx, /* Step 7: Wait for ROM init */ ret = sst_dsp_register_poll(ctx, BXT_ADSP_FW_STATUS, SKL_FW_STS_MASK, - SKL_FW_INIT, BXT_INIT_TIMEOUT, "ROM Load"); + SKL_FW_INIT, BXT_ROM_INIT_TIMEOUT, "ROM Load"); if (ret < 0) { dev_err(ctx->dev, "Timeout for ROM init, ret:%d\n", ret); goto base_fw_load_failed; @@ -206,18 +193,16 @@ static int bxt_load_base_firmware(struct sst_dsp *ctx) { struct firmware stripped_fw; struct skl_sst *skl = ctx->thread_context; - int ret; + int ret, i; - ret = request_firmware(&ctx->fw, ctx->fw_name, ctx->dev); - if (ret < 0) { - dev_err(ctx->dev, "Request firmware failed %d\n", ret); - goto sst_load_base_firmware_failed; + if (ctx->fw == NULL) { + ret = request_firmware(&ctx->fw, ctx->fw_name, ctx->dev); + if (ret < 0) { + dev_err(ctx->dev, "Request firmware failed %d\n", ret); + return ret; + } } - /* check for extended manifest */ - if (ctx->fw == NULL) - goto sst_load_base_firmware_failed; - /* prase uuids on first boot */ if (skl->is_first_boot) { ret = snd_skl_parse_uuids(ctx, ctx->fw, BXT_ADSP_FW_BIN_HDR_OFFSET, 0); @@ -229,18 +214,20 @@ static int bxt_load_base_firmware(struct sst_dsp *ctx) stripped_fw.size = ctx->fw->size; skl_dsp_strip_extended_manifest(&stripped_fw); - ret = sst_bxt_prepare_fw(ctx, stripped_fw.data, stripped_fw.size); - /* Retry Enabling core and ROM load. Retry seemed to help */ - if (ret < 0) { + + for (i = 0; i < BXT_FW_ROM_INIT_RETRY; i++) { ret = sst_bxt_prepare_fw(ctx, stripped_fw.data, stripped_fw.size); - if (ret < 0) { - dev_err(ctx->dev, "Error code=0x%x: FW status=0x%x\n", + if (ret == 0) + break; + } + + if (ret < 0) { + dev_err(ctx->dev, "Error code=0x%x: FW status=0x%x\n", sst_dsp_shim_read(ctx, BXT_ADSP_ERROR_CODE), sst_dsp_shim_read(ctx, BXT_ADSP_FW_STATUS)); - dev_err(ctx->dev, "Core En/ROM load fail:%d\n", ret); - goto sst_load_base_firmware_failed; - } + dev_err(ctx->dev, "Core En/ROM load fail:%d\n", ret); + goto sst_load_base_firmware_failed; } ret = sst_transfer_fw_host_dma(ctx); @@ -265,8 +252,11 @@ static int bxt_load_base_firmware(struct sst_dsp *ctx) } } + return ret; + sst_load_base_firmware_failed: release_firmware(ctx->fw); + ctx->fw = NULL; return ret; } @@ -428,6 +418,7 @@ static int bxt_set_dsp_D0(struct sst_dsp *ctx, unsigned int core_id) return ret; } } + skl->cores.state[core_id] = SKL_DSP_RUNNING; return ret; } @@ -514,11 +505,22 @@ static int bxt_set_dsp_D3(struct sst_dsp *ctx, unsigned int core_id) ret = skl_ipc_set_dx(&skl->ipc, BXT_INSTANCE_ID, BXT_BASE_FW_MODULE_ID, &dx); - if (ret < 0) + if (ret < 0) { dev_err(ctx->dev, "Failed to set DSP to D3:core id = %d;Continue reset\n", core_id); + /* + * In case of D3 failure, re-download the firmware, so set + * fw_loaded to false. + */ + skl->fw_loaded = false; + } + if (core_id == SKL_DSP_CORE0_ID) { + /* disable Interrupt */ + skl_ipc_op_int_disable(ctx); + skl_ipc_int_disable(ctx); + } ret = skl_dsp_disable_core(ctx, core_mask); if (ret < 0) { dev_err(ctx->dev, "Failed to disable core %d\n", ret); @@ -560,23 +562,14 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, struct sst_dsp *sst; int ret; - skl = devm_kzalloc(dev, sizeof(*skl), GFP_KERNEL); - if (skl == NULL) - return -ENOMEM; - - skl->dev = dev; - skl_dev.thread_context = skl; - INIT_LIST_HEAD(&skl->uuid_list); - - skl->dsp = skl_dsp_ctx_init(dev, &skl_dev, irq); - if (!skl->dsp) { - dev_err(skl->dev, "skl_dsp_ctx_init failed\n"); - return -ENODEV; + ret = skl_sst_ctx_init(dev, irq, fw_name, dsp_ops, dsp, &skl_dev); + if (ret < 0) { + dev_err(dev, "%s: no device\n", __func__); + return ret; } + skl = *dsp; sst = skl->dsp; - sst->fw_name = fw_name; - sst->dsp_ops = dsp_ops; sst->fw_ops = bxt_fw_ops; sst->addr.lpe = mmio_base; sst->addr.shim = mmio_base; @@ -584,24 +577,15 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ), SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ); - INIT_LIST_HEAD(&sst->module_list); - ret = skl_ipc_init(dev, skl); - if (ret) - return ret; - /* set the D0i3 check */ skl->ipc.ops.check_dsp_lp_on = skl_ipc_check_D0i0; skl->cores.count = 2; skl->boot_complete = false; init_waitqueue_head(&skl->boot_wait); - skl->is_first_boot = true; INIT_DELAYED_WORK(&skl->d0i3.work, bxt_set_dsp_D0i3); skl->d0i3.state = SKL_DSP_D0I3_NONE; - if (dsp) - *dsp = skl; - return 0; } EXPORT_SYMBOL_GPL(bxt_sst_dsp_init); @@ -635,6 +619,10 @@ EXPORT_SYMBOL_GPL(bxt_sst_init_fw); void bxt_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx) { + + skl_release_library(ctx->lib_info, ctx->lib_count); + if (ctx->dsp->fw) + release_firmware(ctx->dsp->fw); skl_freeup_uuid_list(ctx); skl_ipc_free(&ctx->ipc); ctx->dsp->cl_dev.ops.cl_cleanup_controller(ctx->dsp); diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c index e66870474f1061f2cbe14b5c1014197a033acad6..ab1adc0c9cc3296e1f6d28d270916f36f8b41dd8 100644 --- a/sound/soc/intel/skylake/skl-messages.c +++ b/sound/soc/intel/skylake/skl-messages.c @@ -58,7 +58,7 @@ static int skl_free_dma_buf(struct device *dev, struct snd_dma_buffer *dmab) #define NOTIFICATION_MASK 0xf /* disable notfication for underruns/overruns from firmware module */ -static void skl_dsp_enable_notification(struct skl_sst *ctx, bool enable) +void skl_dsp_enable_notification(struct skl_sst *ctx, bool enable) { struct notification_mask mask; struct skl_ipc_large_config_msg msg = {0}; @@ -209,7 +209,7 @@ static const struct skl_dsp_ops dsp_ops[] = { { .id = 0x9d71, .loader_ops = skl_get_loader_ops, - .init = skl_sst_dsp_init, + .init = kbl_sst_dsp_init, .init_fw = skl_sst_init_fw, .cleanup = skl_sst_dsp_cleanup }, @@ -274,6 +274,7 @@ int skl_init_dsp(struct skl *skl) if (ret < 0) return ret; + skl->skl_sst->dsp_ops = ops; dev_dbg(bus->dev, "dsp registration status=%d\n", ret); return ret; @@ -284,16 +285,11 @@ int skl_free_dsp(struct skl *skl) struct hdac_ext_bus *ebus = &skl->ebus; struct hdac_bus *bus = ebus_to_hbus(ebus); struct skl_sst *ctx = skl->skl_sst; - const struct skl_dsp_ops *ops; /* disable ppcap interrupt */ snd_hdac_ext_bus_ppcap_int_enable(&skl->ebus, false); - ops = skl_get_dsp_ops(skl->pci->device); - if (!ops) - return -EIO; - - ops->cleanup(bus->dev, ctx); + ctx->dsp_ops->cleanup(bus->dev, ctx); if (ctx->dsp->addr.lpe) iounmap(ctx->dsp->addr.lpe); @@ -866,7 +862,7 @@ static void skl_clear_module_state(struct skl_module_pin *mpin, int max, } if (!found) - mcfg->m_state = SKL_MODULE_UNINIT; + mcfg->m_state = SKL_MODULE_INIT_DONE; return; } @@ -1098,7 +1094,7 @@ int skl_delete_pipe(struct skl_sst *ctx, struct skl_pipe *pipe) dev_dbg(ctx->dev, "%s: pipe = %d\n", __func__, pipe->ppl_id); /* If pipe is started, do stop the pipe in FW. */ - if (pipe->state > SKL_PIPE_STARTED) { + if (pipe->state >= SKL_PIPE_STARTED) { ret = skl_set_pipe_state(ctx, pipe, PPL_PAUSED); if (ret < 0) { dev_err(ctx->dev, "Failed to stop pipeline\n"); diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 7eb9c419dc7f0a432c0ba0acf7894d41e621aea3..e3f06672fd6df3268be67eb5790edcf91fa27585 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -24,8 +24,6 @@ static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45, 0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53}; -#define DSDT_NHLT_PATH "\\_SB.PCI0.HDAS" - struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) { acpi_handle handle; @@ -33,8 +31,9 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) struct nhlt_resource_desc *nhlt_ptr = NULL; struct nhlt_acpi_table *nhlt_table = NULL; - if (ACPI_FAILURE(acpi_get_handle(NULL, DSDT_NHLT_PATH, &handle))) { - dev_err(dev, "Requested NHLT device not found\n"); + handle = ACPI_HANDLE(dev); + if (!handle) { + dev_err(dev, "Didn't find ACPI_HANDLE\n"); return NULL; } diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index e12520e142ff5306e926e644e95bf3f81c5d2903..e91bbcffc856514d0c8eaa7af4719b0a4d684b10 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include "skl.h" @@ -155,7 +156,7 @@ int skl_pcm_host_dma_prepare(struct device *dev, struct skl_pipe_params *params) snd_hdac_ext_stream_decouple(ebus, stream, true); format_val = snd_hdac_calc_stream_format(params->s_freq, - params->ch, params->format, 32, 0); + params->ch, params->format, params->host_bps, 0); dev_dbg(dev, "format_val=%d, rate=%d, ch=%d, format=%d\n", format_val, params->s_freq, params->ch, params->format); @@ -190,8 +191,8 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params) stream = stream_to_hdac_ext_stream(hstream); snd_hdac_ext_stream_decouple(ebus, stream, true); - format_val = snd_hdac_calc_stream_format(params->s_freq, - params->ch, params->format, 24, 0); + format_val = snd_hdac_calc_stream_format(params->s_freq, params->ch, + params->format, params->link_bps, 0); dev_dbg(dev, "format_val=%d, rate=%d, ch=%d, format=%d\n", format_val, params->s_freq, params->ch, params->format); @@ -262,23 +263,6 @@ static int skl_pcm_open(struct snd_pcm_substream *substream, return 0; } -static int skl_be_prepare(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct skl *skl = get_skl_ctx(dai->dev); - struct skl_sst *ctx = skl->skl_sst; - struct skl_module_cfg *mconfig; - - if (dai->playback_widget->power || dai->capture_widget->power) - return 0; - - mconfig = skl_tplg_be_get_cpr_module(dai, substream->stream); - if (mconfig == NULL) - return -EINVAL; - - return skl_dsp_set_dma_control(ctx, mconfig); -} - static int skl_pcm_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -326,6 +310,11 @@ static int skl_pcm_hw_params(struct snd_pcm_substream *substream, p_params.host_dma_id = dma_id; p_params.stream = substream->stream; p_params.format = params_format(params); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + p_params.host_bps = dai->driver->playback.sig_bits; + else + p_params.host_bps = dai->driver->capture.sig_bits; + m_cfg = skl_tplg_fe_get_cpr_module(dai, p_params.stream); if (m_cfg) @@ -564,6 +553,11 @@ static int skl_link_hw_params(struct snd_pcm_substream *substream, p_params.link_index = link->index; p_params.format = params_format(params); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + p_params.link_bps = codec_dai->driver->playback.sig_bits; + else + p_params.link_bps = codec_dai->driver->capture.sig_bits; + return skl_tplg_be_update_params(dai, &p_params); } @@ -649,7 +643,6 @@ static struct snd_soc_dai_ops skl_dmic_dai_ops = { static struct snd_soc_dai_ops skl_be_ssp_dai_ops = { .hw_params = skl_be_hw_params, - .prepare = skl_be_prepare, }; static struct snd_soc_dai_ops skl_link_dai_ops = { @@ -670,6 +663,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_8000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE, + .sig_bits = 32, }, .capture = { .stream_name = "System Capture", @@ -677,6 +671,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { .channels_max = HDA_STEREO, .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_16000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE, + .sig_bits = 32, }, }, { @@ -688,6 +683,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { .channels_max = HDA_QUAD, .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_16000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE, + .sig_bits = 32, }, }, { @@ -699,6 +695,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { .channels_max = HDA_STEREO, .rates = SNDRV_PCM_RATE_48000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE, + .sig_bits = 32, }, }, { @@ -710,6 +707,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { .channels_max = HDA_STEREO, .rates = SNDRV_PCM_RATE_48000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE, + .sig_bits = 32, }, }, { @@ -721,6 +719,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { .channels_max = HDA_QUAD, .rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_16000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE, + .sig_bits = 32, }, }, { @@ -736,6 +735,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { SNDRV_PCM_RATE_192000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE, + .sig_bits = 32, }, }, { @@ -751,6 +751,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { SNDRV_PCM_RATE_192000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE, + .sig_bits = 32, }, }, { @@ -766,6 +767,7 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { SNDRV_PCM_RATE_192000, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE, + .sig_bits = 32, }, }, @@ -949,14 +951,12 @@ static struct snd_soc_dai_driver skl_platform_dai[] = { static int skl_platform_open(struct snd_pcm_substream *substream) { - struct snd_pcm_runtime *runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai_link *dai_link = rtd->dai_link; dev_dbg(rtd->cpu_dai->dev, "In %s:%s\n", __func__, dai_link->cpu_dai_name); - runtime = substream->runtime; snd_soc_set_runtime_hwparams(substream, &azx_pcm_hw); return 0; @@ -1062,13 +1062,31 @@ static snd_pcm_uframes_t skl_platform_pcm_pointer * HAD space reflects the actual data that is transferred. * Use the position buffer for capture, as DPIB write gets * completed earlier than the actual data written to the DDR. + * + * For capture stream following workaround is required to fix the + * incorrect position reporting. + * + * 1. Wait for 20us before reading the DMA position in buffer once + * the interrupt is generated for stream completion as update happens + * on the HDA frame boundary i.e. 20.833uSec. + * 2. Read DPIB register to flush the DMA position value. This dummy + * read is required to flush DMA position value. + * 3. Read the DMA Position-in-Buffer. This value now will be equal to + * or greater than period boundary. */ - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { pos = readl(ebus->bus.remap_addr + AZX_REG_VS_SDXDPIB_XBASE + (AZX_REG_VS_SDXDPIB_XINTERVAL * hdac_stream(hstream)->index)); - else + } else { + udelay(20); + readl(ebus->bus.remap_addr + + AZX_REG_VS_SDXDPIB_XBASE + + (AZX_REG_VS_SDXDPIB_XINTERVAL * + hdac_stream(hstream)->index)); pos = snd_hdac_stream_get_pos_posbuf(hdac_stream(hstream)); + } if (pos >= hdac_stream(hstream)->bufsize) pos = 0; @@ -1165,7 +1183,7 @@ static int skl_pcm_new(struct snd_soc_pcm_runtime *rtd) snd_dma_pci_data(skl->pci), size, MAX_PREALLOC_SIZE); if (retval) { - dev_err(dai->dev, "dma buffer allocationf fail\n"); + dev_err(dai->dev, "dma buffer allocation fail\n"); return retval; } } @@ -1173,29 +1191,52 @@ static int skl_pcm_new(struct snd_soc_pcm_runtime *rtd) return retval; } +static int skl_get_module_info(struct skl *skl, struct skl_module_cfg *mconfig) +{ + struct skl_sst *ctx = skl->skl_sst; + struct uuid_module *module; + uuid_le *uuid_mod; + + uuid_mod = (uuid_le *)mconfig->guid; + + if (list_empty(&ctx->uuid_list)) { + dev_err(ctx->dev, "Module list is empty\n"); + return -EIO; + } + + list_for_each_entry(module, &ctx->uuid_list, list) { + if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) { + mconfig->id.module_id = module->id; + mconfig->is_loadable = module->is_loadable; + return 0; + } + } + + return -EIO; +} + static int skl_populate_modules(struct skl *skl) { struct skl_pipeline *p; struct skl_pipe_module *m; struct snd_soc_dapm_widget *w; struct skl_module_cfg *mconfig; - int ret; + int ret = 0; list_for_each_entry(p, &skl->ppl_list, node) { list_for_each_entry(m, &p->pipe->w_list, node) { - w = m->w; mconfig = w->priv; - ret = snd_skl_get_module_info(skl->skl_sst, mconfig); + ret = skl_get_module_info(skl, mconfig); if (ret < 0) { dev_err(skl->skl_sst->dev, - "query module info failed:%d\n", ret); - goto err; + "query module info failed\n"); + return ret; } } } -err: + return ret; } @@ -1232,6 +1273,7 @@ static int skl_platform_soc_probe(struct snd_soc_platform *platform) } skl_populate_modules(skl); skl->skl_sst->update_d0i3c = skl_update_d0i3c; + skl_dsp_enable_notification(skl->skl_sst, false); } pm_runtime_mark_last_busy(platform->dev); pm_runtime_put_autosuspend(platform->dev); @@ -1256,6 +1298,7 @@ int skl_platform_register(struct device *dev) struct skl *skl = ebus_to_skl(ebus); INIT_LIST_HEAD(&skl->ppl_list); + INIT_LIST_HEAD(&skl->bind_list); ret = snd_soc_register_platform(dev, &skl_platform_drv); if (ret) { @@ -1276,6 +1319,17 @@ int skl_platform_register(struct device *dev) int skl_platform_unregister(struct device *dev) { + struct hdac_ext_bus *ebus = dev_get_drvdata(dev); + struct skl *skl = ebus_to_skl(ebus); + struct skl_module_deferred_bind *modules, *tmp; + + if (!list_empty(&skl->bind_list)) { + list_for_each_entry_safe(modules, tmp, &skl->bind_list, node) { + list_del(&modules->node); + kfree(modules); + } + } + snd_soc_unregister_component(dev); snd_soc_unregister_platform(dev); return 0; diff --git a/sound/soc/intel/skylake/skl-sst-cldma.c b/sound/soc/intel/skylake/skl-sst-cldma.c index c9f6d87381dbe97e6d5ed3d5b5c6423aa646aa92..d2b1d60fec021cdf00e5e264f50c111d9f163fa8 100644 --- a/sound/soc/intel/skylake/skl-sst-cldma.c +++ b/sound/soc/intel/skylake/skl-sst-cldma.c @@ -164,7 +164,7 @@ static void skl_cldma_cleanup(struct sst_dsp *ctx) ctx->dsp_ops.free_dma_buf(ctx->dev, &ctx->cl_dev.dmab_bdl); } -static int skl_cldma_wait_interruptible(struct sst_dsp *ctx) +int skl_cldma_wait_interruptible(struct sst_dsp *ctx) { int ret = 0; @@ -243,9 +243,14 @@ static void skl_cldma_fill_buffer(struct sst_dsp *ctx, unsigned int size, * 2. Polling on fw register to identify if data left to transferred doesn't * fill the ring buffer. Caller takes care of polling the required status * register to identify the transfer status. + * 3. if wait flag is set, waits for DBL interrupt to copy the next chunk till + * bytes_left is 0. + * if wait flag is not set, doesn't wait for BDL interrupt. after ccopying + * the first chunk return the no of bytes_left to be copied. */ static int -skl_cldma_copy_to_buf(struct sst_dsp *ctx, const void *bin, u32 total_size) +skl_cldma_copy_to_buf(struct sst_dsp *ctx, const void *bin, + u32 total_size, bool wait) { int ret = 0; bool start = true; @@ -272,13 +277,14 @@ skl_cldma_copy_to_buf(struct sst_dsp *ctx, const void *bin, u32 total_size) size = ctx->cl_dev.bufsize; skl_cldma_fill_buffer(ctx, size, curr_pos, true, start); - start = false; - ret = skl_cldma_wait_interruptible(ctx); - if (ret < 0) { - skl_cldma_stop(ctx); - return ret; + if (wait) { + start = false; + ret = skl_cldma_wait_interruptible(ctx); + if (ret < 0) { + skl_cldma_stop(ctx); + return ret; + } } - } else { skl_cldma_int_disable(ctx); @@ -298,9 +304,11 @@ skl_cldma_copy_to_buf(struct sst_dsp *ctx, const void *bin, u32 total_size) } bytes_left -= size; curr_pos = curr_pos + size; + if (!wait) + return bytes_left; } - return ret; + return bytes_left; } void skl_cldma_process_intr(struct sst_dsp *ctx) diff --git a/sound/soc/intel/skylake/skl-sst-cldma.h b/sound/soc/intel/skylake/skl-sst-cldma.h index 99e4c86b6358594941e06de887e2750b874ce305..5b730a1a0ae47c15594a890607cdf89a30416dc5 100644 --- a/sound/soc/intel/skylake/skl-sst-cldma.h +++ b/sound/soc/intel/skylake/skl-sst-cldma.h @@ -213,7 +213,7 @@ struct skl_cl_dev_ops { void (*cl_trigger)(struct sst_dsp *ctx, bool enable); void (*cl_cleanup_controller)(struct sst_dsp *ctx); int (*cl_copy_to_dmabuf)(struct sst_dsp *ctx, - const void *bin, u32 size); + const void *bin, u32 size, bool wait); void (*cl_stop_dma)(struct sst_dsp *ctx); }; diff --git a/sound/soc/intel/skylake/skl-sst-dsp.c b/sound/soc/intel/skylake/skl-sst-dsp.c index c3deefab65d6f80a6efa2c3c58fa119d99e3925e..08332723c70029fa49fa3b991b68b358cc1d4158 100644 --- a/sound/soc/intel/skylake/skl-sst-dsp.c +++ b/sound/soc/intel/skylake/skl-sst-dsp.c @@ -355,12 +355,13 @@ int skl_dsp_get_core(struct sst_dsp *ctx, unsigned int core_id) ret = ctx->fw_ops.set_state_D0(ctx, core_id); if (ret < 0) { dev_err(ctx->dev, "unable to get core%d\n", core_id); - return ret; + goto out; } } skl->cores.usage_count[core_id]++; +out: dev_dbg(ctx->dev, "core id %d state %d usage_count %d\n", core_id, skl->cores.state[core_id], skl->cores.usage_count[core_id]); @@ -379,7 +380,8 @@ int skl_dsp_put_core(struct sst_dsp *ctx, unsigned int core_id) return -EINVAL; } - if (--skl->cores.usage_count[core_id] == 0) { + if ((--skl->cores.usage_count[core_id] == 0) && + (skl->cores.state[core_id] != SKL_DSP_RESET)) { ret = ctx->fw_ops.set_state_D3(ctx, core_id); if (ret < 0) { dev_err(ctx->dev, "unable to put core %d: %d\n", diff --git a/sound/soc/intel/skylake/skl-sst-dsp.h b/sound/soc/intel/skylake/skl-sst-dsp.h index 849410d0823e96ec4cd658263cb0aaeb254a5397..eba20d37ba8c077d483fce9b673831f3ff832f30 100644 --- a/sound/soc/intel/skylake/skl-sst-dsp.h +++ b/sound/soc/intel/skylake/skl-sst-dsp.h @@ -17,13 +17,15 @@ #define __SKL_SST_DSP_H__ #include +#include +#include #include #include "skl-sst-cldma.h" -#include "skl-topology.h" struct sst_dsp; struct skl_sst; struct sst_dsp_device; +struct skl_lib_info; /* Intel HD Audio General DSP Registers */ #define SKL_ADSP_GEN_BASE 0x0 @@ -144,7 +146,7 @@ struct skl_dsp_fw_ops { int (*load_fw)(struct sst_dsp *ctx); /* FW module parser/loader */ int (*load_library)(struct sst_dsp *ctx, - struct skl_lib_info *linfo, int count); + struct skl_lib_info *linfo, int lib_count); int (*parse_fw)(struct sst_dsp *ctx); int (*set_state_D0)(struct sst_dsp *ctx, unsigned int core_id); int (*set_state_D3)(struct sst_dsp *ctx, unsigned int core_id); @@ -172,6 +174,19 @@ struct skl_dsp_loader_ops { int stream_tag); }; +#define MAX_INSTANCE_BUFF 2 + +struct uuid_module { + uuid_le uuid; + int id; + int is_loadable; + int max_instance; + u64 pvt_id[MAX_INSTANCE_BUFF]; + int *instance_id; + + struct list_head list; +}; + struct skl_load_module_info { u16 mod_id; const struct firmware *fw; @@ -186,6 +201,7 @@ struct skl_module_table { void skl_cldma_process_intr(struct sst_dsp *ctx); void skl_cldma_int_disable(struct sst_dsp *ctx); int skl_cldma_prepare(struct sst_dsp *ctx); +int skl_cldma_wait_interruptible(struct sst_dsp *ctx); void skl_dsp_set_state_locked(struct sst_dsp *ctx, int state); struct sst_dsp *skl_dsp_ctx_init(struct device *dev, @@ -214,6 +230,9 @@ int skl_dsp_boot(struct sst_dsp *ctx); int skl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, const char *fw_name, struct skl_dsp_loader_ops dsp_ops, struct skl_sst **dsp); +int kbl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, + const char *fw_name, struct skl_dsp_loader_ops dsp_ops, + struct skl_sst **dsp); int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, const char *fw_name, struct skl_dsp_loader_ops dsp_ops, struct skl_sst **dsp); @@ -222,17 +241,22 @@ int bxt_sst_init_fw(struct device *dev, struct skl_sst *ctx); void skl_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx); void bxt_sst_dsp_cleanup(struct device *dev, struct skl_sst *ctx); -int snd_skl_get_module_info(struct skl_sst *ctx, - struct skl_module_cfg *mconfig); int snd_skl_parse_uuids(struct sst_dsp *ctx, const struct firmware *fw, unsigned int offset, int index); -int skl_get_pvt_id(struct skl_sst *ctx, - struct skl_module_cfg *mconfig); -int skl_put_pvt_id(struct skl_sst *ctx, - struct skl_module_cfg *mconfig); +int skl_get_pvt_id(struct skl_sst *ctx, uuid_le *uuid_mod, int instance_id); +int skl_put_pvt_id(struct skl_sst *ctx, uuid_le *uuid_mod, int *pvt_id); int skl_get_pvt_instance_id_map(struct skl_sst *ctx, int module_id, int instance_id); void skl_freeup_uuid_list(struct skl_sst *ctx); int skl_dsp_strip_extended_manifest(struct firmware *fw); +void skl_dsp_enable_notification(struct skl_sst *ctx, bool enable); +int skl_sst_ctx_init(struct device *dev, int irq, const char *fw_name, + struct skl_dsp_loader_ops dsp_ops, struct skl_sst **dsp, + struct sst_dsp_device *skl_dev); +int skl_prepare_lib_load(struct skl_sst *skl, struct skl_lib_info *linfo, + struct firmware *stripped_fw, + unsigned int hdr_offset, int index); +void skl_release_library(struct skl_lib_info *linfo, int lib_count); + #endif /*__SKL_SST_DSP_H__*/ diff --git a/sound/soc/intel/skylake/skl-sst-ipc.c b/sound/soc/intel/skylake/skl-sst-ipc.c index e1391dfbc9e919960ef0e2a4cb11e4ce20b4c42f..498b15345b1a657d608a3fcff773dae206e819b0 100644 --- a/sound/soc/intel/skylake/skl-sst-ipc.c +++ b/sound/soc/intel/skylake/skl-sst-ipc.c @@ -34,6 +34,11 @@ #define IPC_GLB_REPLY_STATUS_MASK ((0x1 << IPC_GLB_REPLY_STATUS_SHIFT) - 1) #define IPC_GLB_REPLY_STATUS(x) ((x) << IPC_GLB_REPLY_STATUS_SHIFT) +#define IPC_GLB_REPLY_TYPE_SHIFT 29 +#define IPC_GLB_REPLY_TYPE_MASK 0x1F +#define IPC_GLB_REPLY_TYPE(x) (((x) >> IPC_GLB_REPLY_TYPE_SHIFT) \ + & IPC_GLB_RPLY_TYPE_MASK) + #define IPC_TIMEOUT_MSECS 3000 #define IPC_EMPTY_LIST_SIZE 8 @@ -387,51 +392,77 @@ static int skl_ipc_process_notification(struct sst_generic_ipc *ipc, return 0; } +static int skl_ipc_set_reply_error_code(u32 reply) +{ + switch (reply) { + case IPC_GLB_REPLY_OUT_OF_MEMORY: + return -ENOMEM; + + case IPC_GLB_REPLY_BUSY: + return -EBUSY; + + default: + return -EINVAL; + } +} + static void skl_ipc_process_reply(struct sst_generic_ipc *ipc, struct skl_ipc_header header) { struct ipc_message *msg; u32 reply = header.primary & IPC_GLB_REPLY_STATUS_MASK; u64 *ipc_header = (u64 *)(&header); + struct skl_sst *skl = container_of(ipc, struct skl_sst, ipc); + unsigned long flags; + spin_lock_irqsave(&ipc->dsp->spinlock, flags); msg = skl_ipc_reply_get_msg(ipc, *ipc_header); + spin_unlock_irqrestore(&ipc->dsp->spinlock, flags); if (msg == NULL) { dev_dbg(ipc->dev, "ipc: rx list is empty\n"); return; } /* first process the header */ - switch (reply) { - case IPC_GLB_REPLY_SUCCESS: + if (reply == IPC_GLB_REPLY_SUCCESS) { dev_dbg(ipc->dev, "ipc FW reply %x: success\n", header.primary); /* copy the rx data from the mailbox */ sst_dsp_inbox_read(ipc->dsp, msg->rx_data, msg->rx_size); - break; - - case IPC_GLB_REPLY_OUT_OF_MEMORY: - dev_err(ipc->dev, "ipc fw reply: %x: no memory\n", header.primary); - msg->errno = -ENOMEM; - break; - - case IPC_GLB_REPLY_BUSY: - dev_err(ipc->dev, "ipc fw reply: %x: Busy\n", header.primary); - msg->errno = -EBUSY; - break; + switch (IPC_GLB_NOTIFY_MSG_TYPE(header.primary)) { + case IPC_GLB_LOAD_MULTIPLE_MODS: + case IPC_GLB_LOAD_LIBRARY: + skl->mod_load_complete = true; + skl->mod_load_status = true; + wake_up(&skl->mod_load_wait); + break; - default: - dev_err(ipc->dev, "Unknown ipc reply: 0x%x\n", reply); - msg->errno = -EINVAL; - break; - } + default: + break; - if (reply != IPC_GLB_REPLY_SUCCESS) { + } + } else { + msg->errno = skl_ipc_set_reply_error_code(reply); dev_err(ipc->dev, "ipc FW reply: reply=%d\n", reply); dev_err(ipc->dev, "FW Error Code: %u\n", ipc->dsp->fw_ops.get_fw_errcode(ipc->dsp)); + switch (IPC_GLB_NOTIFY_MSG_TYPE(header.primary)) { + case IPC_GLB_LOAD_MULTIPLE_MODS: + case IPC_GLB_LOAD_LIBRARY: + skl->mod_load_complete = true; + skl->mod_load_status = false; + wake_up(&skl->mod_load_wait); + break; + + default: + break; + + } } + spin_lock_irqsave(&ipc->dsp->spinlock, flags); list_del(&msg->list); sst_ipc_tx_msg_reply_complete(ipc, msg); + spin_unlock_irqrestore(&ipc->dsp->spinlock, flags); } irqreturn_t skl_dsp_irq_thread_handler(int irq, void *context) @@ -811,8 +842,8 @@ int skl_ipc_load_modules(struct sst_generic_ipc *ipc, header.primary |= IPC_GLB_TYPE(IPC_GLB_LOAD_MULTIPLE_MODS); header.primary |= IPC_LOAD_MODULE_CNT(module_cnt); - ret = sst_ipc_tx_message_wait(ipc, *ipc_header, data, - (sizeof(u16) * module_cnt), NULL, 0); + ret = sst_ipc_tx_message_nowait(ipc, *ipc_header, data, + (sizeof(u16) * module_cnt)); if (ret < 0) dev_err(ipc->dev, "ipc: load modules failed :%d\n", ret); @@ -947,7 +978,7 @@ int skl_ipc_get_large_config(struct sst_generic_ipc *ipc, EXPORT_SYMBOL_GPL(skl_ipc_get_large_config); int skl_sst_ipc_load_library(struct sst_generic_ipc *ipc, - u8 dma_id, u8 table_id) + u8 dma_id, u8 table_id, bool wait) { struct skl_ipc_header header = {0}; u64 *ipc_header = (u64 *)(&header); @@ -959,7 +990,11 @@ int skl_sst_ipc_load_library(struct sst_generic_ipc *ipc, header.primary |= IPC_MOD_INSTANCE_ID(table_id); header.primary |= IPC_MOD_ID(dma_id); - ret = sst_ipc_tx_message_wait(ipc, *ipc_header, NULL, 0, NULL, 0); + if (wait) + ret = sst_ipc_tx_message_wait(ipc, *ipc_header, + NULL, 0, NULL, 0); + else + ret = sst_ipc_tx_message_nowait(ipc, *ipc_header, NULL, 0); if (ret < 0) dev_err(ipc->dev, "ipc: load lib failed\n"); diff --git a/sound/soc/intel/skylake/skl-sst-ipc.h b/sound/soc/intel/skylake/skl-sst-ipc.h index 9660ace379ababf63d9ab29ab5941b7e3000c03d..e057da2713c6cbfe188ff0165c17a0026c7d2476 100644 --- a/sound/soc/intel/skylake/skl-sst-ipc.h +++ b/sound/soc/intel/skylake/skl-sst-ipc.h @@ -69,6 +69,14 @@ struct skl_d0i3_data { struct delayed_work work; }; +#define SKL_LIB_NAME_LENGTH 128 +#define SKL_MAX_LIB 16 + +struct skl_lib_info { + char name[SKL_LIB_NAME_LENGTH]; + const struct firmware *fw; +}; + struct skl_sst { struct device *dev; struct sst_dsp *dsp; @@ -77,6 +85,11 @@ struct skl_sst { wait_queue_head_t boot_wait; bool boot_complete; + /* module load */ + wait_queue_head_t mod_load_wait; + bool mod_load_complete; + bool mod_load_status; + /* IPC messaging */ struct sst_generic_ipc ipc; @@ -105,6 +118,8 @@ struct skl_sst { void (*update_d0i3c)(struct device *dev, bool enable); struct skl_d0i3_data d0i3; + + const struct skl_dsp_ops *dsp_ops; }; struct skl_ipc_init_instance_msg { @@ -182,7 +197,7 @@ int skl_ipc_get_large_config(struct sst_generic_ipc *ipc, struct skl_ipc_large_config_msg *msg, u32 *param); int skl_sst_ipc_load_library(struct sst_generic_ipc *ipc, - u8 dma_id, u8 table_id); + u8 dma_id, u8 table_id, bool wait); int skl_ipc_set_d0ix(struct sst_generic_ipc *ipc, struct skl_ipc_d0ix_msg *msg); diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index ea162fbf68e5b3038f99f1c7ef0ba24590e22c41..81ee251881b448bca09791246f4ab6a2cfc29871 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -94,19 +94,6 @@ struct adsp_fw_hdr { u32 load_offset; } __packed; -#define MAX_INSTANCE_BUFF 2 - -struct uuid_module { - uuid_le uuid; - int id; - int is_loadable; - int max_instance; - u64 pvt_id[MAX_INSTANCE_BUFF]; - int *instance_id; - - struct list_head list; -}; - struct skl_ext_manifest_hdr { u32 id; u32 len; @@ -115,32 +102,6 @@ struct skl_ext_manifest_hdr { u32 entries; }; -int snd_skl_get_module_info(struct skl_sst *ctx, - struct skl_module_cfg *mconfig) -{ - struct uuid_module *module; - uuid_le *uuid_mod; - - uuid_mod = (uuid_le *)mconfig->guid; - - if (list_empty(&ctx->uuid_list)) { - dev_err(ctx->dev, "Module list is empty\n"); - return -EINVAL; - } - - list_for_each_entry(module, &ctx->uuid_list, list) { - if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) { - mconfig->id.module_id = module->id; - mconfig->is_loadable = module->is_loadable; - - return 0; - } - } - - return -EINVAL; -} -EXPORT_SYMBOL_GPL(snd_skl_get_module_info); - static int skl_get_pvtid_map(struct uuid_module *module, int instance_id) { int pvt_id; @@ -222,21 +183,18 @@ static inline int skl_pvtid_128(struct uuid_module *module) * This generates a 128 bit private unique id for a module TYPE so that * module instance is unique */ -int skl_get_pvt_id(struct skl_sst *ctx, struct skl_module_cfg *mconfig) +int skl_get_pvt_id(struct skl_sst *ctx, uuid_le *uuid_mod, int instance_id) { struct uuid_module *module; - uuid_le *uuid_mod; int pvt_id; - uuid_mod = (uuid_le *)mconfig->guid; - list_for_each_entry(module, &ctx->uuid_list, list) { if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) { pvt_id = skl_pvtid_128(module); if (pvt_id >= 0) { - module->instance_id[pvt_id] = - mconfig->id.instance_id; + module->instance_id[pvt_id] = instance_id; + return pvt_id; } } @@ -254,23 +212,21 @@ EXPORT_SYMBOL_GPL(skl_get_pvt_id); * * This frees a 128 bit private unique id previously generated */ -int skl_put_pvt_id(struct skl_sst *ctx, struct skl_module_cfg *mconfig) +int skl_put_pvt_id(struct skl_sst *ctx, uuid_le *uuid_mod, int *pvt_id) { int i; - uuid_le *uuid_mod; struct uuid_module *module; - uuid_mod = (uuid_le *)mconfig->guid; list_for_each_entry(module, &ctx->uuid_list, list) { if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) { - if (mconfig->id.pvt_id != 0) - i = (mconfig->id.pvt_id) / 64; + if (*pvt_id != 0) + i = (*pvt_id) / 64; else i = 0; - module->pvt_id[i] &= ~(1 << (mconfig->id.pvt_id)); - mconfig->id.pvt_id = -1; + module->pvt_id[i] &= ~(1 << (*pvt_id)); + *pvt_id = -1; return 0; } } @@ -405,3 +361,83 @@ int skl_dsp_strip_extended_manifest(struct firmware *fw) return 0; } + +int skl_sst_ctx_init(struct device *dev, int irq, const char *fw_name, + struct skl_dsp_loader_ops dsp_ops, struct skl_sst **dsp, + struct sst_dsp_device *skl_dev) +{ + struct skl_sst *skl; + struct sst_dsp *sst; + int ret; + + skl = devm_kzalloc(dev, sizeof(*skl), GFP_KERNEL); + if (skl == NULL) + return -ENOMEM; + + skl->dev = dev; + skl_dev->thread_context = skl; + INIT_LIST_HEAD(&skl->uuid_list); + skl->dsp = skl_dsp_ctx_init(dev, skl_dev, irq); + if (!skl->dsp) { + dev_err(skl->dev, "%s: no device\n", __func__); + return -ENODEV; + } + + sst = skl->dsp; + sst->fw_name = fw_name; + sst->dsp_ops = dsp_ops; + init_waitqueue_head(&skl->mod_load_wait); + INIT_LIST_HEAD(&sst->module_list); + ret = skl_ipc_init(dev, skl); + if (ret) + return ret; + + skl->is_first_boot = true; + if (dsp) + *dsp = skl; + + return ret; +} + +int skl_prepare_lib_load(struct skl_sst *skl, struct skl_lib_info *linfo, + struct firmware *stripped_fw, + unsigned int hdr_offset, int index) +{ + int ret; + struct sst_dsp *dsp = skl->dsp; + + if (linfo->fw == NULL) { + ret = request_firmware(&linfo->fw, linfo->name, + skl->dev); + if (ret < 0) { + dev_err(skl->dev, "Request lib %s failed:%d\n", + linfo->name, ret); + return ret; + } + } + + if (skl->is_first_boot) { + ret = snd_skl_parse_uuids(dsp, linfo->fw, hdr_offset, index); + if (ret < 0) + return ret; + } + + stripped_fw->data = linfo->fw->data; + stripped_fw->size = linfo->fw->size; + skl_dsp_strip_extended_manifest(stripped_fw); + + return 0; +} + +void skl_release_library(struct skl_lib_info *linfo, int lib_count) +{ + int i; + + /* library indices start from 1 to N. 0 represents base FW */ + for (i = 1; i < lib_count; i++) { + if (linfo[i].fw) { + release_firmware(linfo[i].fw); + linfo[i].fw = NULL; + } + } +} diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c index b30bd384c8d38338d50994e9a92673aba123824d..155e456b7a3ac1e3aa6fba6a9d5ed7a931dff3f8 100644 --- a/sound/soc/intel/skylake/skl-sst.c +++ b/sound/soc/intel/skylake/skl-sst.c @@ -52,7 +52,8 @@ static int skl_transfer_firmware(struct sst_dsp *ctx, { int ret = 0; - ret = ctx->cl_dev.ops.cl_copy_to_dmabuf(ctx, basefw, base_fw_size); + ret = ctx->cl_dev.ops.cl_copy_to_dmabuf(ctx, basefw, base_fw_size, + true); if (ret < 0) return ret; @@ -178,6 +179,18 @@ static int skl_set_dsp_D0(struct sst_dsp *ctx, unsigned int core_id) dev_err(ctx->dev, "unable to load firmware\n"); return ret; } + + /* load libs as they are also lost on D3 */ + if (skl->lib_count > 1) { + ret = ctx->fw_ops.load_library(ctx, skl->lib_info, + skl->lib_count); + if (ret < 0) { + dev_err(ctx->dev, "reload libs failed: %d\n", + ret); + return ret; + } + + } } /* @@ -203,7 +216,7 @@ static int skl_set_dsp_D0(struct sst_dsp *ctx, unsigned int core_id) skl->cores.state[core_id] = SKL_DSP_RUNNING; - return ret; + return 0; } static int skl_set_dsp_D3(struct sst_dsp *ctx, unsigned int core_id) @@ -323,27 +336,85 @@ static struct skl_module_table *skl_module_get_from_id( return NULL; } -static int skl_transfer_module(struct sst_dsp *ctx, - struct skl_load_module_info *module) +static int skl_transfer_module(struct sst_dsp *ctx, const void *data, + u32 size, u16 mod_id, u8 table_id, bool is_module) { - int ret; + int ret, bytes_left, curr_pos; struct skl_sst *skl = ctx->thread_context; + skl->mod_load_complete = false; - ret = ctx->cl_dev.ops.cl_copy_to_dmabuf(ctx, module->fw->data, - module->fw->size); - if (ret < 0) - return ret; + bytes_left = ctx->cl_dev.ops.cl_copy_to_dmabuf(ctx, data, size, false); + if (bytes_left < 0) + return bytes_left; - ret = skl_ipc_load_modules(&skl->ipc, SKL_NUM_MODULES, - (void *)&module->mod_id); - if (ret < 0) - dev_err(ctx->dev, "Failed to Load module: %d\n", ret); + /* check is_module flag to load module or library */ + if (is_module) + ret = skl_ipc_load_modules(&skl->ipc, SKL_NUM_MODULES, &mod_id); + else + ret = skl_sst_ipc_load_library(&skl->ipc, 0, table_id, false); + + if (ret < 0) { + dev_err(ctx->dev, "Failed to Load %s with err %d\n", + is_module ? "module" : "lib", ret); + goto out; + } + + /* + * if bytes_left > 0 then wait for BDL complete interrupt and + * copy the next chunk till bytes_left is 0. if bytes_left is + * is zero, then wait for load module IPC reply + */ + while (bytes_left > 0) { + curr_pos = size - bytes_left; + + ret = skl_cldma_wait_interruptible(ctx); + if (ret < 0) + goto out; + + bytes_left = ctx->cl_dev.ops.cl_copy_to_dmabuf(ctx, + data + curr_pos, + bytes_left, false); + } + ret = wait_event_timeout(skl->mod_load_wait, skl->mod_load_complete, + msecs_to_jiffies(SKL_IPC_BOOT_MSECS)); + if (ret == 0 || !skl->mod_load_status) { + dev_err(ctx->dev, "Module Load failed\n"); + ret = -EIO; + } + +out: ctx->cl_dev.ops.cl_stop_dma(ctx); return ret; } +static int +kbl_load_library(struct sst_dsp *ctx, struct skl_lib_info *linfo, int lib_count) +{ + struct skl_sst *skl = ctx->thread_context; + struct firmware stripped_fw; + int ret, i; + + /* library indices start from 1 to N. 0 represents base FW */ + for (i = 1; i < lib_count; i++) { + ret = skl_prepare_lib_load(skl, &skl->lib_info[i], &stripped_fw, + SKL_ADSP_FW_BIN_HDR_OFFSET, i); + if (ret < 0) + goto load_library_failed; + ret = skl_transfer_module(ctx, stripped_fw.data, + stripped_fw.size, 0, i, false); + if (ret < 0) + goto load_library_failed; + } + + return 0; + +load_library_failed: + skl_release_library(linfo, lib_count); + return ret; +} + static int skl_load_module(struct sst_dsp *ctx, u16 mod_id, u8 *guid) { struct skl_module_table *module_entry = NULL; @@ -365,7 +436,9 @@ static int skl_load_module(struct sst_dsp *ctx, u16 mod_id, u8 *guid) } if (!module_entry->usage_cnt) { - ret = skl_transfer_module(ctx, module_entry->mod_info); + ret = skl_transfer_module(ctx, module_entry->mod_info->fw->data, + module_entry->mod_info->fw->size, + mod_id, 0, true); if (ret < 0) { dev_err(ctx->dev, "Failed to Load module\n"); return ret; @@ -388,6 +461,11 @@ static int skl_unload_module(struct sst_dsp *ctx, u16 mod_id) dev_err(ctx->dev, "Module bad usage cnt!:%d\n", usage_cnt); return -EIO; } + + /* if module is used by others return, no need to unload */ + if (usage_cnt > 0) + return 0; + ret = skl_ipc_unload_modules(&skl->ipc, SKL_NUM_MODULES, &mod_id); if (ret < 0) { @@ -434,6 +512,16 @@ static struct skl_dsp_fw_ops skl_fw_ops = { .unload_mod = skl_unload_module, }; +static struct skl_dsp_fw_ops kbl_fw_ops = { + .set_state_D0 = skl_set_dsp_D0, + .set_state_D3 = skl_set_dsp_D3, + .load_fw = skl_load_base_firmware, + .get_fw_errcode = skl_get_errorcode, + .load_library = kbl_load_library, + .load_mod = skl_load_module, + .unload_mod = skl_unload_module, +}; + static struct sst_ops skl_ops = { .irq_handler = skl_dsp_sst_interrupt, .write = sst_shim32_write, @@ -455,45 +543,47 @@ int skl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, struct sst_dsp *sst; int ret; - skl = devm_kzalloc(dev, sizeof(*skl), GFP_KERNEL); - if (skl == NULL) - return -ENOMEM; - - skl->dev = dev; - skl_dev.thread_context = skl; - INIT_LIST_HEAD(&skl->uuid_list); - - skl->dsp = skl_dsp_ctx_init(dev, &skl_dev, irq); - if (!skl->dsp) { - dev_err(skl->dev, "%s: no device\n", __func__); - return -ENODEV; + ret = skl_sst_ctx_init(dev, irq, fw_name, dsp_ops, dsp, &skl_dev); + if (ret < 0) { + dev_err(dev, "%s: no device\n", __func__); + return ret; } + skl = *dsp; sst = skl->dsp; - - sst->fw_name = fw_name; sst->addr.lpe = mmio_base; sst->addr.shim = mmio_base; sst_dsp_mailbox_init(sst, (SKL_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ), SKL_ADSP_W0_UP_SZ, SKL_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ); - INIT_LIST_HEAD(&sst->module_list); - sst->dsp_ops = dsp_ops; sst->fw_ops = skl_fw_ops; - ret = skl_ipc_init(dev, skl); - if (ret) + skl->cores.count = 2; + + return 0; +} +EXPORT_SYMBOL_GPL(skl_sst_dsp_init); + +int kbl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, + const char *fw_name, struct skl_dsp_loader_ops dsp_ops, + struct skl_sst **dsp) +{ + struct sst_dsp *sst; + int ret; + + ret = skl_sst_dsp_init(dev, mmio_base, irq, fw_name, dsp_ops, dsp); + if (ret < 0) { + dev_err(dev, "%s: Init failed %d\n", __func__, ret); return ret; + } - skl->cores.count = 2; - skl->is_first_boot = true; + sst = (*dsp)->dsp; + sst->fw_ops = kbl_fw_ops; - if (dsp) - *dsp = skl; + return 0; - return ret; } -EXPORT_SYMBOL_GPL(skl_sst_dsp_init); +EXPORT_SYMBOL_GPL(kbl_sst_dsp_init); int skl_sst_init_fw(struct device *dev, struct skl_sst *ctx) { @@ -507,6 +597,15 @@ int skl_sst_init_fw(struct device *dev, struct skl_sst *ctx) } skl_dsp_init_core_state(sst); + + if (ctx->lib_count > 1) { + ret = sst->fw_ops.load_library(sst, ctx->lib_info, + ctx->lib_count); + if (ret < 0) { + dev_err(dev, "Load Library failed : %x\n", ret); + return ret; + } + } ctx->is_first_boot = false; return 0; diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 2dbfb1b24ef4a629ecdac8004630fc66aeb529fc..64a0f8ed33e135eb5c0af683624afa3ebdb616b9 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -299,8 +299,6 @@ static void skl_tplg_update_buffer_size(struct skl_sst *ctx, { int multiplier = 1; struct skl_module_fmt *in_fmt, *out_fmt; - int in_rate, out_rate; - /* Since fixups is applied to pin 0 only, ibs, obs needs * change for pin 0 only @@ -311,22 +309,12 @@ static void skl_tplg_update_buffer_size(struct skl_sst *ctx, if (mcfg->m_type == SKL_MODULE_TYPE_SRCINT) multiplier = 5; - if (in_fmt->s_freq % 1000) - in_rate = (in_fmt->s_freq / 1000) + 1; - else - in_rate = (in_fmt->s_freq / 1000); - - mcfg->ibs = in_rate * (mcfg->in_fmt->channels) * - (mcfg->in_fmt->bit_depth >> 3) * + mcfg->ibs = DIV_ROUND_UP(in_fmt->s_freq, 1000) * + in_fmt->channels * (in_fmt->bit_depth >> 3) * multiplier; - if (mcfg->out_fmt->s_freq % 1000) - out_rate = (mcfg->out_fmt->s_freq / 1000) + 1; - else - out_rate = (mcfg->out_fmt->s_freq / 1000); - - mcfg->obs = out_rate * (mcfg->out_fmt->channels) * - (mcfg->out_fmt->bit_depth >> 3) * + mcfg->obs = DIV_ROUND_UP(out_fmt->s_freq, 1000) * + out_fmt->channels * (out_fmt->bit_depth >> 3) * multiplier; } @@ -551,6 +539,7 @@ skl_tplg_init_pipe_modules(struct skl *skl, struct skl_pipe *pipe) int ret = 0; list_for_each_entry(w_module, &pipe->w_list, node) { + uuid_le *uuid_mod; w = w_module->w; mconfig = w->priv; @@ -588,13 +577,15 @@ skl_tplg_init_pipe_modules(struct skl *skl, struct skl_pipe *pipe) * FE/BE params */ skl_tplg_update_module_params(w, ctx); - mconfig->id.pvt_id = skl_get_pvt_id(ctx, mconfig); + uuid_mod = (uuid_le *)mconfig->guid; + mconfig->id.pvt_id = skl_get_pvt_id(ctx, uuid_mod, + mconfig->id.instance_id); if (mconfig->id.pvt_id < 0) return ret; skl_tplg_set_module_init_data(w); ret = skl_init_module(ctx, mconfig); if (ret < 0) { - skl_put_pvt_id(ctx, mconfig); + skl_put_pvt_id(ctx, uuid_mod, &mconfig->id.pvt_id); return ret; } skl_tplg_alloc_pipe_mcps(skl, mconfig); @@ -614,7 +605,9 @@ static int skl_tplg_unload_pipe_modules(struct skl_sst *ctx, struct skl_module_cfg *mconfig = NULL; list_for_each_entry(w_module, &pipe->w_list, node) { + uuid_le *uuid_mod; mconfig = w_module->w->priv; + uuid_mod = (uuid_le *)mconfig->guid; if (mconfig->is_loadable && ctx->dsp->fw_ops.unload_mod && mconfig->m_state > SKL_MODULE_UNINIT) { @@ -623,7 +616,7 @@ static int skl_tplg_unload_pipe_modules(struct skl_sst *ctx, if (ret < 0) return -EIO; } - skl_put_pvt_id(ctx, mconfig); + skl_put_pvt_id(ctx, uuid_mod, &mconfig->id.pvt_id); } /* no modules to unload in this path, so return */ @@ -645,8 +638,9 @@ static int skl_tplg_mixer_dapm_pre_pmu_event(struct snd_soc_dapm_widget *w, struct skl_module_cfg *mconfig = w->priv; struct skl_pipe_module *w_module; struct skl_pipe *s_pipe = mconfig->pipe; - struct skl_module_cfg *src_module = NULL, *dst_module; + struct skl_module_cfg *src_module = NULL, *dst_module, *module; struct skl_sst *ctx = skl->skl_sst; + struct skl_module_deferred_bind *modules; /* check resource available */ if (!skl_is_pipe_mcps_avail(skl, mconfig)) @@ -687,29 +681,48 @@ static int skl_tplg_mixer_dapm_pre_pmu_event(struct snd_soc_dapm_widget *w, src_module = dst_module; } + /* + * When the destination module is initialized, check for these modules + * in deferred bind list. If found, bind them. + */ + list_for_each_entry(w_module, &s_pipe->w_list, node) { + if (list_empty(&skl->bind_list)) + break; + + list_for_each_entry(modules, &skl->bind_list, node) { + module = w_module->w->priv; + if (modules->dst == module) + skl_bind_modules(ctx, modules->src, + modules->dst); + } + } + return 0; } -static int skl_fill_sink_instance_id(struct skl_sst *ctx, - struct skl_algo_data *alg_data) +static int skl_fill_sink_instance_id(struct skl_sst *ctx, u32 *params, + int size, struct skl_module_cfg *mcfg) { - struct skl_kpb_params *params = (struct skl_kpb_params *)alg_data->params; - struct skl_mod_inst_map *inst; int i, pvt_id; - inst = params->map; + if (mcfg->m_type == SKL_MODULE_TYPE_KPB) { + struct skl_kpb_params *kpb_params = + (struct skl_kpb_params *)params; + struct skl_mod_inst_map *inst = kpb_params->map; - for (i = 0; i < params->num_modules; i++) { - pvt_id = skl_get_pvt_instance_id_map(ctx, - inst->mod_id, inst->inst_id); - if (pvt_id < 0) - return -EINVAL; - inst->inst_id = pvt_id; - inst++; + for (i = 0; i < kpb_params->num_modules; i++) { + pvt_id = skl_get_pvt_instance_id_map(ctx, inst->mod_id, + inst->inst_id); + if (pvt_id < 0) + return -EINVAL; + + inst->inst_id = pvt_id; + inst++; + } } + return 0; } - /* * Some modules require params to be set after the module is bound to * all pins connected. @@ -726,6 +739,7 @@ static int skl_tplg_set_module_bind_params(struct snd_soc_dapm_widget *w, struct soc_bytes_ext *sb; struct skl_algo_data *bc; struct skl_specific_cfg *sp_cfg; + u32 *params; /* * check all out/in pins are in bind state. @@ -758,11 +772,18 @@ static int skl_tplg_set_module_bind_params(struct snd_soc_dapm_widget *w, bc = (struct skl_algo_data *)sb->dobj.private; if (bc->set_params == SKL_PARAM_BIND) { - if (mconfig->m_type == SKL_MODULE_TYPE_KPB) - skl_fill_sink_instance_id(ctx, bc); - ret = skl_set_module_params(ctx, - (u32 *)bc->params, bc->max, - bc->param_id, mconfig); + params = kzalloc(bc->max, GFP_KERNEL); + if (!params) + return -ENOMEM; + + memcpy(params, bc->params, bc->max); + skl_fill_sink_instance_id(ctx, params, bc->max, + mconfig); + + ret = skl_set_module_params(ctx, params, + bc->max, bc->param_id, mconfig); + kfree(params); + if (ret < 0) return ret; } @@ -772,6 +793,44 @@ static int skl_tplg_set_module_bind_params(struct snd_soc_dapm_widget *w, return 0; } + +static int skl_tplg_module_add_deferred_bind(struct skl *skl, + struct skl_module_cfg *src, struct skl_module_cfg *dst) +{ + struct skl_module_deferred_bind *m_list, *modules; + int i; + + /* only supported for module with static pin connection */ + for (i = 0; i < dst->max_in_queue; i++) { + struct skl_module_pin *pin = &dst->m_in_pin[i]; + + if (pin->is_dynamic) + continue; + + if ((pin->id.module_id == src->id.module_id) && + (pin->id.instance_id == src->id.instance_id)) { + + if (!list_empty(&skl->bind_list)) { + list_for_each_entry(modules, &skl->bind_list, node) { + if (modules->src == src && modules->dst == dst) + return 0; + } + } + + m_list = kzalloc(sizeof(*m_list), GFP_KERNEL); + if (!m_list) + return -ENOMEM; + + m_list->src = src; + m_list->dst = dst; + + list_add(&m_list->node, &skl->bind_list); + } + } + + return 0; +} + static int skl_tplg_bind_sinks(struct snd_soc_dapm_widget *w, struct skl *skl, struct snd_soc_dapm_widget *src_w, @@ -806,6 +865,28 @@ static int skl_tplg_bind_sinks(struct snd_soc_dapm_widget *w, sink = p->sink; sink_mconfig = sink->priv; + /* + * Modules other than PGA leaf can be connected + * directly or via switch to a module in another + * pipeline. EX: reference path + * when the path is enabled, the dst module that needs + * to be bound may not be initialized. if the module is + * not initialized, add these modules in the deferred + * bind list and when the dst module is initialised, + * bind this module to the dst_module in deferred list. + */ + if (((src_mconfig->m_state == SKL_MODULE_INIT_DONE) + && (sink_mconfig->m_state == SKL_MODULE_UNINIT))) { + + ret = skl_tplg_module_add_deferred_bind(skl, + src_mconfig, sink_mconfig); + + if (ret < 0) + return ret; + + } + + if (src_mconfig->m_state == SKL_MODULE_UNINIT || sink_mconfig->m_state == SKL_MODULE_UNINIT) continue; @@ -985,15 +1066,6 @@ static int skl_tplg_mixer_dapm_pre_pmd_event(struct snd_soc_dapm_widget *w, src_mconfig = sink_mconfig->m_in_pin[i].tgt_mcfg; if (!src_mconfig) continue; - /* - * If path_found == 1, that means pmd for source - * pipe has not occurred, source is connected to - * some other sink. so its responsibility of sink - * to unbind itself from source. - */ - ret = skl_stop_pipe(ctx, src_mconfig->pipe); - if (ret < 0) - return ret; ret = skl_unbind_modules(ctx, src_mconfig, sink_mconfig); @@ -1019,6 +1091,7 @@ static int skl_tplg_mixer_dapm_post_pmd_event(struct snd_soc_dapm_widget *w, struct skl_module_cfg *src_module = NULL, *dst_module; struct skl_sst *ctx = skl->skl_sst; struct skl_pipe *s_pipe = mconfig->pipe; + struct skl_module_deferred_bind *modules, *tmp; if (s_pipe->state == SKL_PIPE_INVALID) return -EINVAL; @@ -1026,6 +1099,35 @@ static int skl_tplg_mixer_dapm_post_pmd_event(struct snd_soc_dapm_widget *w, skl_tplg_free_pipe_mcps(skl, mconfig); skl_tplg_free_pipe_mem(skl, mconfig); + list_for_each_entry(w_module, &s_pipe->w_list, node) { + if (list_empty(&skl->bind_list)) + break; + + src_module = w_module->w->priv; + + list_for_each_entry_safe(modules, tmp, &skl->bind_list, node) { + /* + * When the destination module is deleted, Unbind the + * modules from deferred bind list. + */ + if (modules->dst == src_module) { + skl_unbind_modules(ctx, modules->src, + modules->dst); + } + + /* + * When the source module is deleted, remove this entry + * from the deferred bind list. + */ + if (modules->src == src_module) { + list_del(&modules->node); + modules->src = NULL; + modules->dst = NULL; + kfree(modules); + } + } + } + list_for_each_entry(w_module, &s_pipe->w_list, node) { dst_module = w_module->w->priv; @@ -1042,6 +1144,11 @@ static int skl_tplg_mixer_dapm_post_pmd_event(struct snd_soc_dapm_widget *w, skl_delete_pipe(ctx, mconfig->pipe); + list_for_each_entry(w_module, &s_pipe->w_list, node) { + src_module = w_module->w->priv; + src_module->m_state = SKL_MODULE_UNINIT; + } + return skl_tplg_unload_pipe_modules(ctx, s_pipe); } @@ -1082,36 +1189,6 @@ static int skl_tplg_pga_dapm_post_pmd_event(struct snd_soc_dapm_widget *w, return ret; } -/* - * In modelling, we assume there will be ONLY one mixer in a pipeline. If - * mixer is not required then it is treated as static mixer aka vmixer with - * a hard path to source module - * So we don't need to check if source is started or not as hard path puts - * dependency on each other - */ -static int skl_tplg_vmixer_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *k, int event) -{ - struct snd_soc_dapm_context *dapm = w->dapm; - struct skl *skl = get_skl_ctx(dapm->dev); - - switch (event) { - case SND_SOC_DAPM_PRE_PMU: - return skl_tplg_mixer_dapm_pre_pmu_event(w, skl); - - case SND_SOC_DAPM_POST_PMU: - return skl_tplg_mixer_dapm_post_pmu_event(w, skl); - - case SND_SOC_DAPM_PRE_PMD: - return skl_tplg_mixer_dapm_pre_pmd_event(w, skl); - - case SND_SOC_DAPM_POST_PMD: - return skl_tplg_mixer_dapm_post_pmd_event(w, skl); - } - - return 0; -} - /* * In modelling, we assume there will be ONLY one mixer in a pipeline. If a * second one is required that is created as another pipe entity. @@ -1252,10 +1329,12 @@ static void skl_tplg_fill_dma_id(struct skl_module_cfg *mcfg, case SKL_DEVICE_HDALINK: pipe->p_params->link_dma_id = params->link_dma_id; pipe->p_params->link_index = params->link_index; + pipe->p_params->link_bps = params->link_bps; break; case SKL_DEVICE_HDAHOST: pipe->p_params->host_dma_id = params->host_dma_id; + pipe->p_params->host_bps = params->host_bps; break; default: @@ -1578,7 +1657,7 @@ int skl_tplg_be_update_params(struct snd_soc_dai *dai, static const struct snd_soc_tplg_widget_events skl_tplg_widget_ops[] = { {SKL_MIXER_EVENT, skl_tplg_mixer_event}, - {SKL_VMIXER_EVENT, skl_tplg_vmixer_event}, + {SKL_VMIXER_EVENT, skl_tplg_mixer_event}, {SKL_PGA_EVENT, skl_tplg_pga_event}, }; @@ -1632,7 +1711,7 @@ static int skl_tplg_add_pipe(struct device *dev, list_for_each_entry(ppl, &skl->ppl_list, node) { if (ppl->pipe->ppl_id == tkn_elem->value) { mconfig->pipe = ppl->pipe; - return EEXIST; + return -EEXIST; } } @@ -1924,11 +2003,13 @@ static int skl_tplg_get_token(struct device *dev, ret = skl_tplg_add_pipe(dev, mconfig, skl, tkn_elem); - if (ret < 0) + if (ret < 0) { + if (ret == -EEXIST) { + is_pipe_exists = 1; + break; + } return is_pipe_exists; - - if (ret == EEXIST) - is_pipe_exists = 1; + } break; @@ -2421,7 +2502,7 @@ static int skl_tplg_get_manifest_tkn(struct device *dev, if (ret < 0) return ret; - tkn_count += ret; + tkn_count = ret; tuple_size += tkn_count * sizeof(struct snd_soc_tplg_vendor_string_elem); diff --git a/sound/soc/intel/skylake/skl-topology.h b/sound/soc/intel/skylake/skl-topology.h index fefab0e99a3bea56b1afc9c0135cb67ca91a83eb..cc64d6bdb4f6c4859621619daa3a9077a96a8dc7 100644 --- a/sound/soc/intel/skylake/skl-topology.h +++ b/sound/soc/intel/skylake/skl-topology.h @@ -257,6 +257,8 @@ struct skl_pipe_params { snd_pcm_format_t format; int link_index; int stream; + unsigned int host_bps; + unsigned int link_bps; }; struct skl_pipe { @@ -334,17 +336,10 @@ struct skl_pipeline { struct list_head node; }; -#define SKL_LIB_NAME_LENGTH 128 -#define SKL_MAX_LIB 16 - -struct skl_lib_info { - char name[SKL_LIB_NAME_LENGTH]; - const struct firmware *fw; -}; - -struct skl_manifest { - u32 lib_count; - struct skl_lib_info lib[SKL_MAX_LIB]; +struct skl_module_deferred_bind { + struct skl_module_cfg *src; + struct skl_module_cfg *dst; + struct list_head node; }; static inline struct skl *get_skl_ctx(struct device *dev) diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index 0c57d4eaae3ae3bf488860f6133acdf9e7e30271..4c9b5781282bb149e8d749b32f2a1bd5c2987338 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -410,7 +410,7 @@ static int skl_free(struct hdac_ext_bus *ebus) struct skl *skl = ebus_to_skl(ebus); struct hdac_bus *bus = ebus_to_hbus(ebus); - skl->init_failed = 1; /* to be sure */ + skl->init_done = 0; /* to be sure */ snd_hdac_ext_stop_streams(ebus); @@ -428,8 +428,10 @@ static int skl_free(struct hdac_ext_bus *ebus) snd_hdac_ext_bus_exit(ebus); + cancel_work_sync(&skl->probe_work); if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) snd_hdac_i915_exit(&ebus->bus); + return 0; } @@ -512,7 +514,7 @@ static int probe_codec(struct hdac_ext_bus *ebus, int addr) struct hdac_bus *bus = ebus_to_hbus(ebus); unsigned int cmd = (addr << 28) | (AC_NODE_ROOT << 20) | (AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID; - unsigned int res; + unsigned int res = -1; mutex_lock(&bus->cmd_mutex); snd_hdac_bus_send_cmd(bus, cmd); @@ -566,6 +568,84 @@ static const struct hdac_bus_ops bus_core_ops = { .get_response = snd_hdac_bus_get_response, }; +static int skl_i915_init(struct hdac_bus *bus) +{ + int err; + + /* + * The HDMI codec is in GPU so we need to ensure that it is powered + * up and ready for probe + */ + err = snd_hdac_i915_init(bus); + if (err < 0) + return err; + + err = snd_hdac_display_power(bus, true); + if (err < 0) + dev_err(bus->dev, "Cannot turn on display power on i915\n"); + + return err; +} + +static void skl_probe_work(struct work_struct *work) +{ + struct skl *skl = container_of(work, struct skl, probe_work); + struct hdac_ext_bus *ebus = &skl->ebus; + struct hdac_bus *bus = ebus_to_hbus(ebus); + struct hdac_ext_link *hlink = NULL; + int err; + + if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) { + err = skl_i915_init(bus); + if (err < 0) + return; + } + + err = skl_init_chip(bus, true); + if (err < 0) { + dev_err(bus->dev, "Init chip failed with err: %d\n", err); + goto out_err; + } + + /* codec detection */ + if (!bus->codec_mask) + dev_info(bus->dev, "no hda codecs found!\n"); + + /* create codec instances */ + err = skl_codec_create(ebus); + if (err < 0) + goto out_err; + + if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) { + err = snd_hdac_display_power(bus, false); + if (err < 0) { + dev_err(bus->dev, "Cannot turn off display power on i915\n"); + return; + } + } + + /* register platform dai and controls */ + err = skl_platform_register(bus->dev); + if (err < 0) + return; + /* + * we are done probing so decrement link counts + */ + list_for_each_entry(hlink, &ebus->hlink_list, list) + snd_hdac_ext_bus_link_put(ebus, hlink); + + /* configure PM */ + pm_runtime_put_noidle(bus->dev); + pm_runtime_allow(bus->dev); + skl->init_done = 1; + + return; + +out_err: + if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) + err = snd_hdac_display_power(bus, false); +} + /* * constructor */ @@ -593,6 +673,7 @@ static int skl_create(struct pci_dev *pci, snd_hdac_ext_bus_init(ebus, &pci->dev, &bus_core_ops, io_ops); ebus->bus.use_posbuf = 1; skl->pci = pci; + INIT_WORK(&skl->probe_work, skl_probe_work); ebus->bus.bdl_pos_adj = 0; @@ -601,27 +682,6 @@ static int skl_create(struct pci_dev *pci, return 0; } -static int skl_i915_init(struct hdac_bus *bus) -{ - int err; - - /* - * The HDMI codec is in GPU so we need to ensure that it is powered - * up and ready for probe - */ - err = snd_hdac_i915_init(bus); - if (err < 0) - return err; - - err = snd_hdac_display_power(bus, true); - if (err < 0) { - dev_err(bus->dev, "Cannot turn on display power on i915\n"); - return err; - } - - return err; -} - static int skl_first_init(struct hdac_ext_bus *ebus) { struct skl *skl = ebus_to_skl(ebus); @@ -684,20 +744,7 @@ static int skl_first_init(struct hdac_ext_bus *ebus) /* initialize chip */ skl_init_pci(skl); - if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) { - err = skl_i915_init(bus); - if (err < 0) - return err; - } - - skl_init_chip(bus, true); - - /* codec detection */ - if (!bus->codec_mask) { - dev_info(bus->dev, "no hda codecs found!\n"); - } - - return 0; + return skl_init_chip(bus, true); } static int skl_probe(struct pci_dev *pci, @@ -706,7 +753,6 @@ static int skl_probe(struct pci_dev *pci, struct skl *skl; struct hdac_ext_bus *ebus = NULL; struct hdac_bus *bus = NULL; - struct hdac_ext_link *hlink = NULL; int err; /* we use ext core ops, so provide NULL for ops here */ @@ -729,7 +775,7 @@ static int skl_probe(struct pci_dev *pci, if (skl->nhlt == NULL) { err = -ENODEV; - goto out_display_power_off; + goto out_free; } err = skl_nhlt_create_sysfs(skl); @@ -760,56 +806,24 @@ static int skl_probe(struct pci_dev *pci, if (bus->mlcap) snd_hdac_ext_bus_get_ml_capabilities(ebus); + snd_hdac_bus_stop_chip(bus); + /* create device for soc dmic */ err = skl_dmic_device_register(skl); if (err < 0) goto out_dsp_free; - /* register platform dai and controls */ - err = skl_platform_register(bus->dev); - if (err < 0) - goto out_dmic_free; - - /* create codec instances */ - err = skl_codec_create(ebus); - if (err < 0) - goto out_unregister; - - if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) { - err = snd_hdac_display_power(bus, false); - if (err < 0) { - dev_err(bus->dev, "Cannot turn off display power on i915\n"); - return err; - } - } - - /* - * we are done probling so decrement link counts - */ - list_for_each_entry(hlink, &ebus->hlink_list, list) - snd_hdac_ext_bus_link_put(ebus, hlink); - - /* configure PM */ - pm_runtime_put_noidle(bus->dev); - pm_runtime_allow(bus->dev); + schedule_work(&skl->probe_work); return 0; -out_unregister: - skl_platform_unregister(bus->dev); -out_dmic_free: - skl_dmic_device_unregister(skl); out_dsp_free: skl_free_dsp(skl); out_mach_free: skl_machine_device_unregister(skl); out_nhlt_free: skl_nhlt_free(skl->nhlt); -out_display_power_off: - if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) - snd_hdac_display_power(bus, false); out_free: - skl->init_failed = 1; skl_free(ebus); return err; @@ -828,7 +842,7 @@ static void skl_shutdown(struct pci_dev *pci) skl = ebus_to_skl(ebus); - if (skl->init_failed) + if (!skl->init_done) return; snd_hdac_ext_stop_streams(ebus); diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h index bbef77d2b917a9e080e0cd67b99fbaa796b79812..2a630fcb7f088c1d548f06de31933ca662eafca2 100644 --- a/sound/soc/intel/skylake/skl.h +++ b/sound/soc/intel/skylake/skl.h @@ -27,27 +27,6 @@ #define SKL_SUSPEND_DELAY 2000 -/* Vendor Specific Registers */ -#define AZX_REG_VS_EM1 0x1000 -#define AZX_REG_VS_INRC 0x1004 -#define AZX_REG_VS_OUTRC 0x1008 -#define AZX_REG_VS_FIFOTRK 0x100C -#define AZX_REG_VS_FIFOTRK2 0x1010 -#define AZX_REG_VS_EM2 0x1030 -#define AZX_REG_VS_EM3L 0x1038 -#define AZX_REG_VS_EM3U 0x103C -#define AZX_REG_VS_EM4L 0x1040 -#define AZX_REG_VS_EM4U 0x1044 -#define AZX_REG_VS_LTRC 0x1048 -#define AZX_REG_VS_D0I3C 0x104A -#define AZX_REG_VS_PCE 0x104B -#define AZX_REG_VS_L2MAGC 0x1050 -#define AZX_REG_VS_L2LAHPT 0x1054 -#define AZX_REG_VS_SDXDPIB_XBASE 0x1084 -#define AZX_REG_VS_SDXDPIB_XINTERVAL 0x20 -#define AZX_REG_VS_SDXEFIFOS_XBASE 0x1094 -#define AZX_REG_VS_SDXEFIFOS_XINTERVAL 0x20 - #define AZX_PCIREG_PGCTL 0x44 #define AZX_PGCTL_LSRMD_MASK (1 << 4) #define AZX_PCIREG_CGCTL 0x48 @@ -67,7 +46,7 @@ struct skl { struct hdac_ext_bus ebus; struct pci_dev *pci; - unsigned int init_failed:1; /* delayed init failed */ + unsigned int init_done:1; /* delayed init status */ struct platform_device *dmic_dev; struct platform_device *i2s_dev; struct snd_soc_platform *platform; @@ -77,6 +56,7 @@ struct skl { struct skl_dsp_resource resource; struct list_head ppl_list; + struct list_head bind_list; const char *fw_name; char tplg_name[64]; @@ -84,6 +64,8 @@ struct skl { const struct firmware *tplg; int supend_active; + + struct work_struct probe_work; }; #define skl_to_ebus(s) (&(s)->ebus) diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index d7013bde6f45fc7ed82db8ea0e92ae81226705f1..5c68797f36c4c487ad7612870cb07f7dd505a3b8 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -22,6 +22,16 @@ config SND_SOC_MT2701_CS42448 Select Y if you have such device. If unsure select "N". +config SND_SOC_MT2701_WM8960 + tristate "ASoc Audio driver for MT2701 with WM8960 codec" + depends on SND_SOC_MT2701 && I2C + select SND_SOC_WM8960 + help + This adds ASoC driver for Mediatek MT2701 boards + with the WM8960 codecs. + Select Y if you have such device. + If unsure select "N". + config SND_SOC_MT8173 tristate "ASoC support for Mediatek MT8173 chip" depends on ARCH_MEDIATEK diff --git a/sound/soc/mediatek/mt2701/Makefile b/sound/soc/mediatek/mt2701/Makefile index 31c3d04d49427d1a447985317e7bda6d5bd8c70d..c91deb6aca2195eefc8f3d441c06d91388273d9d 100644 --- a/sound/soc/mediatek/mt2701/Makefile +++ b/sound/soc/mediatek/mt2701/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_SND_SOC_MT2701) += snd-soc-mt2701-afe.o # machine driver obj-$(CONFIG_SND_SOC_MT2701_CS42448) += mt2701-cs42448.o +obj-$(CONFIG_SND_SOC_MT2701_WM8960) += mt2701-wm8960.o diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index c7fa3e6634639f8498ced1f873e984b252a415e1..bc5d4db94de6947ffc53b80e8ad3a8a3b4561ae7 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -603,6 +603,22 @@ static struct snd_soc_dai_ops mt2701_btmrg_ops = { static struct snd_soc_dai_driver mt2701_afe_pcm_dais[] = { /* FE DAIs: memory intefaces to CPU */ + { + .name = "PCMO0", + .id = MT2701_MEMIF_DL1, + .suspend = mtk_afe_dai_suspend, + .resume = mtk_afe_dai_resume, + .playback = { + .stream_name = "DL1", + .channels_min = 1, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_8000_192000, + .formats = (SNDRV_PCM_FMTBIT_S16_LE + | SNDRV_PCM_FMTBIT_S24_LE + | SNDRV_PCM_FMTBIT_S32_LE) + }, + .ops = &mt2701_single_memif_dai_ops, + }, { .name = "PCM_multi", .id = MT2701_MEMIF_DLM, diff --git a/sound/soc/mediatek/mt2701/mt2701-cs42448.c b/sound/soc/mediatek/mt2701/mt2701-cs42448.c index 1e7e8d43fd8a31732fc43a85a3393b23a7e5df27..aa5b31b121e322dd7d85fa4007841f2d7a977481 100644 --- a/sound/soc/mediatek/mt2701/mt2701-cs42448.c +++ b/sound/soc/mediatek/mt2701/mt2701-cs42448.c @@ -129,7 +129,7 @@ static int mt2701_cs42448_fe_ops_startup(struct snd_pcm_substream *substream) return 0; } -static struct snd_soc_ops mt2701_cs42448_48k_fe_ops = { +static const struct snd_soc_ops mt2701_cs42448_48k_fe_ops = { .startup = mt2701_cs42448_fe_ops_startup, }; diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c new file mode 100644 index 0000000000000000000000000000000000000000..a08ce2323bdc003b85ccda00ac42c51cd5419ce5 --- /dev/null +++ b/sound/soc/mediatek/mt2701/mt2701-wm8960.c @@ -0,0 +1,176 @@ +/* + * mt2701-wm8960.c -- MT2701 WM8960 ALSA SoC machine driver + * + * Copyright (c) 2017 MediaTek Inc. + * Author: Ryder Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +#include "mt2701-afe-common.h" + +static const struct snd_soc_dapm_widget mt2701_wm8960_widgets[] = { + SND_SOC_DAPM_HP("Headphone", NULL), + SND_SOC_DAPM_MIC("AMIC", NULL), +}; + +static const struct snd_kcontrol_new mt2701_wm8960_controls[] = { + SOC_DAPM_PIN_SWITCH("Headphone"), + SOC_DAPM_PIN_SWITCH("AMIC"), +}; + +static int mt2701_wm8960_be_ops_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai = rtd->codec_dai; + struct snd_soc_dai *cpu_dai = rtd->cpu_dai; + unsigned int mclk_rate; + unsigned int rate = params_rate(params); + unsigned int div_mclk_over_bck = rate > 192000 ? 2 : 4; + unsigned int div_bck_over_lrck = 64; + + mclk_rate = rate * div_bck_over_lrck * div_mclk_over_bck; + + snd_soc_dai_set_sysclk(cpu_dai, 0, mclk_rate, SND_SOC_CLOCK_OUT); + snd_soc_dai_set_sysclk(codec_dai, 0, mclk_rate, SND_SOC_CLOCK_IN); + + return 0; +} + +static struct snd_soc_ops mt2701_wm8960_be_ops = { + .hw_params = mt2701_wm8960_be_ops_hw_params +}; + +static struct snd_soc_dai_link mt2701_wm8960_dai_links[] = { + /* FE */ + { + .name = "wm8960-playback", + .stream_name = "wm8960-playback", + .cpu_dai_name = "PCMO0", + .codec_name = "snd-soc-dummy", + .codec_dai_name = "snd-soc-dummy-dai", + .trigger = {SND_SOC_DPCM_TRIGGER_POST, + SND_SOC_DPCM_TRIGGER_POST}, + .dynamic = 1, + .dpcm_playback = 1, + }, + { + .name = "wm8960-capture", + .stream_name = "wm8960-capture", + .cpu_dai_name = "PCM0", + .codec_name = "snd-soc-dummy", + .codec_dai_name = "snd-soc-dummy-dai", + .trigger = {SND_SOC_DPCM_TRIGGER_POST, + SND_SOC_DPCM_TRIGGER_POST}, + .dynamic = 1, + .dpcm_capture = 1, + }, + /* BE */ + { + .name = "wm8960-codec", + .cpu_dai_name = "I2S0", + .no_pcm = 1, + .codec_dai_name = "wm8960-hifi", + .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBS_CFS + | SND_SOC_DAIFMT_GATED, + .ops = &mt2701_wm8960_be_ops, + .dpcm_playback = 1, + .dpcm_capture = 1, + }, +}; + +static struct snd_soc_card mt2701_wm8960_card = { + .name = "mt2701-wm8960", + .owner = THIS_MODULE, + .dai_link = mt2701_wm8960_dai_links, + .num_links = ARRAY_SIZE(mt2701_wm8960_dai_links), + .controls = mt2701_wm8960_controls, + .num_controls = ARRAY_SIZE(mt2701_wm8960_controls), + .dapm_widgets = mt2701_wm8960_widgets, + .num_dapm_widgets = ARRAY_SIZE(mt2701_wm8960_widgets), +}; + +static int mt2701_wm8960_machine_probe(struct platform_device *pdev) +{ + struct snd_soc_card *card = &mt2701_wm8960_card; + struct device_node *platform_node, *codec_node; + int ret, i; + + platform_node = of_parse_phandle(pdev->dev.of_node, + "mediatek,platform", 0); + if (!platform_node) { + dev_err(&pdev->dev, "Property 'platform' missing or invalid\n"); + return -EINVAL; + } + for (i = 0; i < card->num_links; i++) { + if (mt2701_wm8960_dai_links[i].platform_name) + continue; + mt2701_wm8960_dai_links[i].platform_of_node = platform_node; + } + + card->dev = &pdev->dev; + + codec_node = of_parse_phandle(pdev->dev.of_node, + "mediatek,audio-codec", 0); + if (!codec_node) { + dev_err(&pdev->dev, + "Property 'audio-codec' missing or invalid\n"); + return -EINVAL; + } + for (i = 0; i < card->num_links; i++) { + if (mt2701_wm8960_dai_links[i].codec_name) + continue; + mt2701_wm8960_dai_links[i].codec_of_node = codec_node; + } + + ret = snd_soc_of_parse_audio_routing(card, "audio-routing"); + if (ret) { + dev_err(&pdev->dev, "failed to parse audio-routing: %d\n", ret); + return ret; + } + + ret = devm_snd_soc_register_card(&pdev->dev, card); + if (ret) + dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", + __func__, ret); + + return ret; +} + +#ifdef CONFIG_OF +static const struct of_device_id mt2701_wm8960_machine_dt_match[] = { + {.compatible = "mediatek,mt2701-wm8960-machine",}, + {} +}; +#endif + +static struct platform_driver mt2701_wm8960_machine = { + .driver = { + .name = "mt2701-wm8960", + .owner = THIS_MODULE, +#ifdef CONFIG_OF + .of_match_table = mt2701_wm8960_machine_dt_match, +#endif + }, + .probe = mt2701_wm8960_machine_probe, +}; + +module_platform_driver(mt2701_wm8960_machine); + +/* Module information */ +MODULE_DESCRIPTION("MT2701 WM8960 ALSA SoC machine driver"); +MODULE_AUTHOR("Ryder Lee "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("mt2701 wm8960 soc card"); + diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index 46c8e6ae00b4046f189e2b2569d9344a013fe61f..e0c2b23ec711840bedaac375489dabcc1a50b2f5 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -67,7 +67,7 @@ static int mt8173_max98090_hw_params(struct snd_pcm_substream *substream, SND_SOC_CLOCK_IN); } -static struct snd_soc_ops mt8173_max98090_ops = { +static const struct snd_soc_ops mt8173_max98090_ops = { .hw_params = mt8173_max98090_hw_params, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c index 467f7049a28863fca5f3750fb27bf1ad9369e0c3..5e383eb456a4b22d1adc6aca0d65efa7b53a350c 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c @@ -75,7 +75,7 @@ static int mt8173_rt5650_rt5514_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops mt8173_rt5650_rt5514_ops = { +static const struct snd_soc_ops mt8173_rt5650_rt5514_ops = { .hw_params = mt8173_rt5650_rt5514_hw_params, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 1b8b2a77884505515460a477309bb658312aed2a..fed1f15a39c24003885afe273d340a98c8599756 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -79,7 +79,7 @@ static int mt8173_rt5650_rt5676_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops mt8173_rt5650_rt5676_ops = { +static const struct snd_soc_ops mt8173_rt5650_rt5676_ops = { .hw_params = mt8173_rt5650_rt5676_hw_params, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index ba65f4157a7e0ef6a60e5406e2570cfb1aaa14d9..a78470839b65e0edcd5d69dd599a6b398dc340a0 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -105,7 +105,7 @@ static int mt8173_rt5650_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops mt8173_rt5650_ops = { +static const struct snd_soc_ops mt8173_rt5650_ops = { .hw_params = mt8173_rt5650_hw_params, }; diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c index 25a33e9d417a7ddcb4d99f7366f66f65232aee2e..d5651026ec104def855f6be79c84e55719344e07 100644 --- a/sound/soc/omap/am3517evm.c +++ b/sound/soc/omap/am3517evm.c @@ -49,7 +49,7 @@ static int am3517evm_hw_params(struct snd_pcm_substream *substream, return ret; } -static struct snd_soc_ops am3517evm_ops = { +static const struct snd_soc_ops am3517evm_ops = { .hw_params = am3517evm_hw_params, }; diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index fdecb70431745bf23c7113bf7abcede1cd5306ef..71e5f31fa3068d0925d65b50f262f9b5c3fe4733 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -124,7 +124,7 @@ static int n810_hw_params(struct snd_pcm_substream *substream, return err; } -static struct snd_soc_ops n810_ops = { +static const struct snd_soc_ops n810_ops = { .startup = n810_startup, .hw_params = n810_hw_params, .shutdown = n810_shutdown, diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c index 89fe95e877db0d533163a0ec9f8918d4702c0194..614b18d2f631b80c61e32eaca0b139c816fcb5d5 100644 --- a/sound/soc/omap/omap-abe-twl6040.c +++ b/sound/soc/omap/omap-abe-twl6040.c @@ -70,7 +70,7 @@ static int omap_abe_hw_params(struct snd_pcm_substream *substream, return ret; } -static struct snd_soc_ops omap_abe_ops = { +static const struct snd_soc_ops omap_abe_ops = { .hw_params = omap_abe_hw_params, }; diff --git a/sound/soc/omap/omap-twl4030.c b/sound/soc/omap/omap-twl4030.c index 743131473056f5116e745db22ff17fe003c0d979..a24b0dedabb9b20501f98247ea1a0dc311f52d7b 100644 --- a/sound/soc/omap/omap-twl4030.c +++ b/sound/soc/omap/omap-twl4030.c @@ -73,7 +73,7 @@ static int omap_twl4030_hw_params(struct snd_pcm_substream *substream, return snd_soc_runtime_set_dai_fmt(rtd, fmt); } -static struct snd_soc_ops omap_twl4030_ops = { +static const struct snd_soc_ops omap_twl4030_ops = { .hw_params = omap_twl4030_hw_params, }; diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c index 732e749a1f8ed54d2ec7e3f619afb393f1d8a40d..4e3de712159c500edf72aa169df910b0456f49f1 100644 --- a/sound/soc/omap/omap3pandora.c +++ b/sound/soc/omap/omap3pandora.c @@ -184,7 +184,7 @@ static int omap3pandora_in_init(struct snd_soc_pcm_runtime *rtd) return 0; } -static struct snd_soc_ops omap3pandora_ops = { +static const struct snd_soc_ops omap3pandora_ops = { .hw_params = omap3pandora_hw_params, }; diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c index aa4053bf6710f88cd50d9d3225754152e7caa435..e4096779ca0528275b27bbabfaed1583a26cb396 100644 --- a/sound/soc/omap/osk5912.c +++ b/sound/soc/omap/osk5912.c @@ -68,7 +68,7 @@ static int osk_hw_params(struct snd_pcm_substream *substream, return err; } -static struct snd_soc_ops osk_ops = { +static const struct snd_soc_ops osk_ops = { .startup = osk_startup, .hw_params = osk_hw_params, .shutdown = osk_shutdown, diff --git a/sound/soc/omap/rx51.c b/sound/soc/omap/rx51.c index a76845748a103447e039ee4db94cf3f8abe10c93..3aeb65feaea1de397a7dfee59a0b5d036a22ebd5 100644 --- a/sound/soc/omap/rx51.c +++ b/sound/soc/omap/rx51.c @@ -123,7 +123,7 @@ static int rx51_hw_params(struct snd_pcm_substream *substream, SND_SOC_CLOCK_IN); } -static struct snd_soc_ops rx51_ops = { +static const struct snd_soc_ops rx51_ops = { .startup = rx51_startup, .hw_params = rx51_hw_params, }; @@ -433,10 +433,9 @@ static int rx51_soc_probe(struct platform_device *pdev) } pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (pdata == NULL) { - dev_err(card->dev, "failed to create private data\n"); + if (pdata == NULL) return -ENOMEM; - } + snd_soc_card_set_drvdata(card, pdata); pdata->tvout_selection_gpio = devm_gpiod_get(card->dev, diff --git a/sound/soc/pxa/brownstone.c b/sound/soc/pxa/brownstone.c index b6cb9950f05d5126ff7a4aeb731e16ab44448e99..9a3f5b7997208125136d03a72a159c0777d17a9c 100644 --- a/sound/soc/pxa/brownstone.c +++ b/sound/soc/pxa/brownstone.c @@ -74,7 +74,7 @@ static int brownstone_wm8994_hw_params(struct snd_pcm_substream *substream, } /* machine stream operations */ -static struct snd_soc_ops brownstone_ops = { +static const struct snd_soc_ops brownstone_ops = { .hw_params = brownstone_wm8994_hw_params, }; diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index 311774e9ca469197e1aec91523ecbb8e9135ee0f..054e0d65db9d7764c9b1d043f79d6c51e74941fb 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -154,7 +154,7 @@ static int corgi_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops corgi_ops = { +static const struct snd_soc_ops corgi_ops = { .startup = corgi_startup, .hw_params = corgi_hw_params, .shutdown = corgi_shutdown, diff --git a/sound/soc/pxa/e750_wm9705.c b/sound/soc/pxa/e750_wm9705.c index fdcd94adee7c8ef7ec36b10888e1740f8bcfa150..82bcbbb1841b999a9be91129925f7f10257a1529 100644 --- a/sound/soc/pxa/e750_wm9705.c +++ b/sound/soc/pxa/e750_wm9705.c @@ -81,7 +81,7 @@ static struct snd_soc_dai_link e750_dai[] = { .name = "AC97 Aux", .stream_name = "AC97 Aux", .cpu_dai_name = "pxa2xx-ac97-aux", - .codec_dai_name ="wm9705-aux", + .codec_dai_name = "wm9705-aux", .platform_name = "pxa-pcm-audio", .codec_name = "wm9705-codec", }, diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c index 2df714f70ec0098377d3a72cf123f362024b5bca..1ed8aa2348f168ceebdd2bce62ae78b897fe13ed 100644 --- a/sound/soc/pxa/e800_wm9712.c +++ b/sound/soc/pxa/e800_wm9712.c @@ -81,7 +81,7 @@ static struct snd_soc_dai_link e800_dai[] = { .name = "AC97 Aux", .stream_name = "AC97 Aux", .cpu_dai_name = "pxa2xx-ac97-aux", - .codec_dai_name ="wm9712-aux", + .codec_dai_name = "wm9712-aux", .platform_name = "pxa-pcm-audio", .codec_name = "wm9712-codec", }, diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c index 6f2020f6c8d36a15582e8b2ac8b8c7004ceb745b..e046770ce70ef07e53cade44b22d3c798f115872 100644 --- a/sound/soc/pxa/em-x270.c +++ b/sound/soc/pxa/em-x270.c @@ -43,7 +43,7 @@ static struct snd_soc_dai_link em_x270_dai[] = { .name = "AC97 Aux", .stream_name = "AC97 Aux", .cpu_dai_name = "pxa2xx-ac97-aux", - .codec_dai_name ="wm9712-aux", + .codec_dai_name = "wm9712-aux", .platform_name = "pxa-pcm-audio", .codec_name = "wm9712-codec", }, diff --git a/sound/soc/pxa/hx4700.c b/sound/soc/pxa/hx4700.c index 85483049b916db5fef5fee39b97e4fb7fbcc71e8..a9ac881c2e143acef4e655e0056269ab813ff32b 100644 --- a/sound/soc/pxa/hx4700.c +++ b/sound/soc/pxa/hx4700.c @@ -79,7 +79,7 @@ static int hx4700_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops hx4700_ops = { +static const struct snd_soc_ops hx4700_ops = { .hw_params = hx4700_hw_params, }; diff --git a/sound/soc/pxa/imote2.c b/sound/soc/pxa/imote2.c index 9d0e40771ef5b01de482aeb9daef67c15f30297f..78475376f971db9a63d36a33555b30fcf02da106 100644 --- a/sound/soc/pxa/imote2.c +++ b/sound/soc/pxa/imote2.c @@ -42,7 +42,7 @@ static int imote2_asoc_hw_params(struct snd_pcm_substream *substream, return ret; } -static struct snd_soc_ops imote2_asoc_ops = { +static const struct snd_soc_ops imote2_asoc_ops = { .hw_params = imote2_asoc_hw_params, }; diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c index 2d4d4455fe8769a0eee091fd1bdc2cd3cdc711fe..2fc012b06c439bcba72b436e271e1ed26abafb36 100644 --- a/sound/soc/pxa/magician.c +++ b/sound/soc/pxa/magician.c @@ -255,12 +255,12 @@ static int magician_capture_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops magician_capture_ops = { +static const struct snd_soc_ops magician_capture_ops = { .startup = magician_startup, .hw_params = magician_capture_hw_params, }; -static struct snd_soc_ops magician_playback_ops = { +static const struct snd_soc_ops magician_playback_ops = { .startup = magician_startup, .hw_params = magician_playback_hw_params, }; diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c index 8760a668788509a6af520bbf60444b2582911417..c4c6fbedc723ad78220d990756539e656f1d3329 100644 --- a/sound/soc/pxa/mioa701_wm9713.c +++ b/sound/soc/pxa/mioa701_wm9713.c @@ -157,7 +157,7 @@ static struct snd_soc_dai_link mioa701_dai[] = { .name = "AC97 Aux", .stream_name = "AC97 Aux", .cpu_dai_name = "pxa2xx-ac97-aux", - .codec_dai_name ="wm9713-aux", + .codec_dai_name = "wm9713-aux", .codec_name = "wm9713-codec", .platform_name = "pxa-pcm-audio", .ops = &mioa701_ops, diff --git a/sound/soc/pxa/mmp-pcm.c b/sound/soc/pxa/mmp-pcm.c index 96df9b2d8fc47f9d2305dc322a764542d1ff477a..5b5f1a442891ab8b2f9afbfb4fedcb4218bc0dac 100644 --- a/sound/soc/pxa/mmp-pcm.c +++ b/sound/soc/pxa/mmp-pcm.c @@ -166,7 +166,6 @@ static void mmp_pcm_free_dma_buffers(struct snd_pcm *pcm) buf->area = NULL; } - return; } static int mmp_pcm_preallocate_dma_buffer(struct snd_pcm_substream *substream, diff --git a/sound/soc/pxa/mmp-sspa.c b/sound/soc/pxa/mmp-sspa.c index ca8b23f8c525e7b0c1f510ca8f647d607f537b12..9cc35012e6e5904f39673da1e5c116f068dc90c5 100644 --- a/sound/soc/pxa/mmp-sspa.c +++ b/sound/soc/pxa/mmp-sspa.c @@ -119,7 +119,6 @@ static void mmp_sspa_shutdown(struct snd_pcm_substream *substream, clk_disable(priv->sspa->clk); clk_disable(priv->sysclk); - return; } /* diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c index a879aba0691fe7d20f2e6be6e39c08a632c20dd4..b6693f32fc0273a0c9fb6d9c2042941eb90be510 100644 --- a/sound/soc/pxa/poodle.c +++ b/sound/soc/pxa/poodle.c @@ -129,7 +129,7 @@ static int poodle_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops poodle_ops = { +static const struct snd_soc_ops poodle_ops = { .startup = poodle_startup, .hw_params = poodle_hw_params, .shutdown = poodle_shutdown, diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index 3cad990dad2cd35e8334ff54ea14632c0319dd8c..0291c7cb64eb1edd631a153a82fe979691aafa8e 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -354,6 +354,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id, if (ssp->type == PXA3xx_SSP) { u32 val; u64 tmp = 19968; + tmp *= 1000000; do_div(tmp, freq_out); val = tmp; @@ -590,13 +591,13 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, if ((pxa_ssp_get_scr(ssp) == 4) && (width == 16)) { /* This is a special case where the bitclk is 64fs - * and we're not dealing with 2*32 bits of audio - * samples. - * - * The SSP values used for that are all found out by - * trying and failing a lot; some of the registers - * needed for that mode are only available on PXA3xx. - */ + * and we're not dealing with 2*32 bits of audio + * samples. + * + * The SSP values used for that are all found out by + * trying and failing a lot; some of the registers + * needed for that mode are only available on PXA3xx. + */ if (ssp->type != PXA3xx_SSP) return -EINVAL; diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index 2e2fb1838ec2531ba7a6c113b6c051349ff2f961..f49bf02e5ec2426f8462fc195746bd268a53cde6 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -140,9 +140,8 @@ static int pxa2xx_ac97_mic_startup(struct snd_pcm_substream *substream, { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) return -ENODEV; - else - snd_soc_dai_set_dma_data(cpu_dai, substream, - &pxa2xx_ac97_pcm_mic_mono_in); + snd_soc_dai_set_dma_data(cpu_dai, substream, + &pxa2xx_ac97_pcm_mic_mono_in); return 0; } diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c index 0389cf7b4b1ea57169bb33ab8208937c63ef3cc2..3fb60baf6eab0f53d8d524b880091e316607552c 100644 --- a/sound/soc/pxa/pxa2xx-i2s.c +++ b/sound/soc/pxa/pxa2xx-i2s.c @@ -46,10 +46,10 @@ #define SACR0_STRF (1 << 5) /* FIFO Select for EFWR Special Function */ #define SACR0_EFWR (1 << 4) /* Enable EFWR Function */ #define SACR0_RST (1 << 3) /* FIFO, i2s Register Reset */ -#define SACR0_BCKD (1 << 2) /* Bit Clock Direction */ +#define SACR0_BCKD (1 << 2) /* Bit Clock Direction */ #define SACR0_ENB (1 << 0) /* Enable I2S Link */ #define SACR1_ENLBF (1 << 5) /* Enable Loopback */ -#define SACR1_DRPL (1 << 4) /* Disable Replaying Function */ +#define SACR1_DRPL (1 << 4) /* Disable Replaying Function */ #define SACR1_DREC (1 << 3) /* Disable Recording Function */ #define SACR1_AMSL (1 << 0) /* Specify Alternate Mode */ @@ -60,7 +60,7 @@ #define SASR0_TFS (1 << 3) /* Tx FIFO Service Request */ #define SASR0_BSY (1 << 2) /* I2S Busy */ #define SASR0_RNE (1 << 1) /* Rx FIFO Not Empty */ -#define SASR0_TNF (1 << 0) /* Tx FIFO Not Empty */ +#define SASR0_TNF (1 << 0) /* Tx FIFO Not Empty */ #define SAICR_ROR (1 << 6) /* Clear Rx FIFO Overrun Interrupt */ #define SAICR_TUR (1 << 5) /* Clear Tx FIFO Underrun Interrupt */ @@ -119,7 +119,7 @@ static int pxa_i2s_wait(void) int i; /* flush the Rx FIFO */ - for(i = 0; i < 16; i++) + for (i = 0; i < 16; i++) SADR; return 0; } diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c index 410d48b93031c0da2a5ca560bca918618062d65e..b51d7a0755d5c7868769c803fa16cbc61a36a81f 100644 --- a/sound/soc/pxa/pxa2xx-pcm.c +++ b/sound/soc/pxa/pxa2xx-pcm.c @@ -85,7 +85,7 @@ static int pxa2xx_soc_pcm_new(struct snd_soc_pcm_runtime *rtd) } static struct snd_soc_platform_driver pxa2xx_soc_platform = { - .ops = &pxa2xx_pcm_ops, + .ops = &pxa2xx_pcm_ops, .pcm_new = pxa2xx_soc_pcm_new, .pcm_free = pxa2xx_pcm_free_dma_buffers, }; diff --git a/sound/soc/pxa/raumfeld.c b/sound/soc/pxa/raumfeld.c index 552b763005ed34d14ae46ebb82cb71bafa6c6c40..111a907c4eb985c22d5580dd9d09d00fe6459b00 100644 --- a/sound/soc/pxa/raumfeld.c +++ b/sound/soc/pxa/raumfeld.c @@ -132,7 +132,7 @@ static int raumfeld_cs4270_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops raumfeld_cs4270_ops = { +static const struct snd_soc_ops raumfeld_cs4270_ops = { .startup = raumfeld_cs4270_startup, .shutdown = raumfeld_cs4270_shutdown, .hw_params = raumfeld_cs4270_hw_params, @@ -228,14 +228,12 @@ static struct snd_soc_ops raumfeld_ak4104_ops = { .codec_name = "spi0.0", \ } -static struct snd_soc_dai_link snd_soc_raumfeld_connector_dai[] = -{ +static struct snd_soc_dai_link snd_soc_raumfeld_connector_dai[] = { DAI_LINK_CS4270, DAI_LINK_AK4104, }; -static struct snd_soc_dai_link snd_soc_raumfeld_speaker_dai[] = -{ +static struct snd_soc_dai_link snd_soc_raumfeld_speaker_dai[] = { DAI_LINK_CS4270, }; diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index 07d77cddac601be5b53e7801f516667e19c806a7..1671da648e95985ce97db086082ce43e1662c5c5 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -156,7 +156,7 @@ static int spitz_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops spitz_ops = { +static const struct snd_soc_ops spitz_ops = { .startup = spitz_startup, .hw_params = spitz_hw_params, }; @@ -230,8 +230,8 @@ static const struct snd_soc_dapm_route spitz_audio_map[] = { {"Headset Jack", NULL, "ROUT1"}, /* ext speaker connected to LOUT2, ROUT2 */ - {"Ext Spk", NULL , "ROUT2"}, - {"Ext Spk", NULL , "LOUT2"}, + {"Ext Spk", NULL, "ROUT2"}, + {"Ext Spk", NULL, "LOUT2"}, /* mic is connected to input 1 - with bias */ {"LINPUT1", NULL, "Mic Bias"}, diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index e022b2a777f6cfb01da1ba5a6ba95891482b77e3..ae9c12e1ea2aeccd1941e5a6e2997ed367b2833e 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -85,7 +85,7 @@ static int tosa_startup(struct snd_pcm_substream *substream) return 0; } -static struct snd_soc_ops tosa_ops = { +static const struct snd_soc_ops tosa_ops = { .startup = tosa_startup, }; @@ -133,7 +133,7 @@ static int tosa_set_spk(struct snd_kcontrol *kcontrol, static int tosa_hp_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { - gpio_set_value(TOSA_GPIO_L_MUTE, SND_SOC_DAPM_EVENT_ON(event) ? 1 :0); + gpio_set_value(TOSA_GPIO_L_MUTE, SND_SOC_DAPM_EVENT_ON(event) ? 1 : 0); return 0; } diff --git a/sound/soc/pxa/z2.c b/sound/soc/pxa/z2.c index 990b1aa6d7f67388e8fad6af6b6b4ec8c909b986..5b0eccd2b4ddee37412ed68eb46e4370c50683db 100644 --- a/sound/soc/pxa/z2.c +++ b/sound/soc/pxa/z2.c @@ -119,8 +119,8 @@ static const struct snd_soc_dapm_route z2_audio_map[] = { {"Headphone Jack", NULL, "ROUT1"}, /* ext speaker connected to LOUT2, ROUT2 */ - {"Ext Spk", NULL , "ROUT2"}, - {"Ext Spk", NULL , "LOUT2"}, + {"Ext Spk", NULL, "ROUT2"}, + {"Ext Spk", NULL, "LOUT2"}, /* mic is connected to R input 2 - with bias */ {"RINPUT2", NULL, "Mic Bias"}, @@ -152,7 +152,7 @@ static int z2_wm8750_init(struct snd_soc_pcm_runtime *rtd) return ret; } -static struct snd_soc_ops z2_ops = { +static const struct snd_soc_ops z2_ops = { .hw_params = z2_hw_params, }; diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c index 6fbcdf02c88dbc292ddeb6ea3c9d0accaed384ab..ba468e560dd257bdbd460288141dff75c0babaf6 100644 --- a/sound/soc/pxa/zylonite.c +++ b/sound/soc/pxa/zylonite.c @@ -132,7 +132,7 @@ static int zylonite_voice_hw_params(struct snd_pcm_substream *substream, return 0; } -static struct snd_soc_ops zylonite_voice_ops = { +static const struct snd_soc_ops zylonite_voice_ops = { .hw_params = zylonite_voice_hw_params, }; diff --git a/sound/soc/qcom/lpass-apq8016.c b/sound/soc/qcom/lpass-apq8016.c index 8aed72be32249ab0680b15bdb9e4bfbf1213693b..8a74844d99e2da9f7b18a4854d17bd7d2bf1feff 100644 --- a/sound/soc/qcom/lpass-apq8016.c +++ b/sound/soc/qcom/lpass-apq8016.c @@ -231,6 +231,18 @@ static struct lpass_variant apq8016_data = { .wrdma_channels = 2, .dai_driver = apq8016_lpass_cpu_dai_driver, .num_dai = ARRAY_SIZE(apq8016_lpass_cpu_dai_driver), + .dai_osr_clk_names = (const char *[]) { + "mi2s-osr-clk0", + "mi2s-osr-clk1", + "mi2s-osr-clk2", + "mi2s-osr-clk3", + }, + .dai_bit_clk_names = (const char *[]) { + "mi2s-bit-clk0", + "mi2s-bit-clk1", + "mi2s-bit-clk2", + "mi2s-bit-clk3", + }, .init = apq8016_lpass_init, .exit = apq8016_lpass_exit, .alloc_dma_channel = apq8016_lpass_alloc_dma_channel, diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 5202a584e0c6fcb021708ddd65844dd5e57dc7e2..292b103abada9541ed6a499f614e0d3121ee4fd0 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -429,7 +429,6 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) struct lpass_variant *variant; struct device *dev = &pdev->dev; const struct of_device_id *match; - char clk_name[16]; int ret, i, dai_id; dsp_of_node = of_parse_phandle(pdev->dev.of_node, "qcom,adsp", 0); @@ -477,31 +476,24 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) for (i = 0; i < variant->num_dai; i++) { dai_id = variant->dai_driver[i].id; - if (variant->num_dai > 1) - sprintf(clk_name, "mi2s-osr-clk%d", i); - else - sprintf(clk_name, "mi2s-osr-clk"); - drvdata->mi2s_osr_clk[dai_id] = devm_clk_get(&pdev->dev, - clk_name); + variant->dai_osr_clk_names[i]); if (IS_ERR(drvdata->mi2s_osr_clk[dai_id])) { dev_warn(&pdev->dev, - "error getting optional mi2s-osr-clk: %ld\n", + "%s() error getting optional %s: %ld\n", + __func__, + variant->dai_osr_clk_names[i], PTR_ERR(drvdata->mi2s_osr_clk[dai_id])); drvdata->mi2s_osr_clk[dai_id] = NULL; } - if (variant->num_dai > 1) - sprintf(clk_name, "mi2s-bit-clk%d", i); - else - sprintf(clk_name, "mi2s-bit-clk"); - drvdata->mi2s_bit_clk[dai_id] = devm_clk_get(&pdev->dev, - clk_name); + variant->dai_bit_clk_names[i]); if (IS_ERR(drvdata->mi2s_bit_clk[dai_id])) { dev_err(&pdev->dev, - "error getting mi2s-bit-clk: %ld\n", + "error getting %s: %ld\n", + variant->dai_bit_clk_names[i], PTR_ERR(drvdata->mi2s_bit_clk[dai_id])); return PTR_ERR(drvdata->mi2s_bit_clk[dai_id]); } diff --git a/sound/soc/qcom/lpass-ipq806x.c b/sound/soc/qcom/lpass-ipq806x.c index 608c1a92af8a801d8364343540d8cc85cdc6f640..ca1e1f2d27877a190f57d2c470236c680cccef80 100644 --- a/sound/soc/qcom/lpass-ipq806x.c +++ b/sound/soc/qcom/lpass-ipq806x.c @@ -92,6 +92,12 @@ static struct lpass_variant ipq806x_data = { .wrdma_channels = 4, .dai_driver = &ipq806x_lpass_cpu_dai_driver, .num_dai = 1, + .dai_osr_clk_names = (const char *[]) { + "mi2s-osr-clk", + }, + .dai_bit_clk_names = (const char *[]) { + "mi2s-bit-clk", + }, .alloc_dma_channel = ipq806x_lpass_alloc_dma_channel, .free_dma_channel = ipq806x_lpass_free_dma_channel, }; diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index 9b031352ea3c24708a46e188813169393c998866..b848db2d6c3dcf870c0c8001685ad7f2c25d4fbb 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -91,6 +91,8 @@ struct lpass_variant { /* SOC specific dais */ struct snd_soc_dai_driver *dai_driver; int num_dai; + const char * const *dai_osr_clk_names; + const char * const *dai_bit_clk_names; }; /* register the platform driver from the CPU DAI driver */ diff --git a/sound/soc/rockchip/rk3288_hdmi_analog.c b/sound/soc/rockchip/rk3288_hdmi_analog.c index b60abf322ce1c5e05c467322bc9523f12ed80d61..dbc53e48c52c5bfc0f1aaf4cfa550706a0936ad7 100644 --- a/sound/soc/rockchip/rk3288_hdmi_analog.c +++ b/sound/soc/rockchip/rk3288_hdmi_analog.c @@ -93,6 +93,9 @@ static int rk_hw_params(struct snd_pcm_substream *substream, case 96000: mclk = 12288000; break; + case 192000: + mclk = 24576000; + break; case 11025: case 22050: case 44100: diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index f1f1d7959a1b986020beb3665f233404c73b2ab4..0520f5afd7cca759ea6ac1dee55cc322e7533ddd 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -185,6 +185,14 @@ config SND_SOC_SNOW Say Y if you want to add audio support for various Snow boards based on Exynos5 series of SoCs. +config SND_SOC_ODROID + tristate "Audio support for Odroid XU3/XU4" + depends on SND_SOC_SAMSUNG && I2C + select SND_SOC_MAX98090 + select SND_SAMSUNG_I2S + help + Say Y here to enable audio support for the Odroid XU3/XU4. + config SND_SOC_ARNDALE_RT5631_ALC5631 tristate "Audio support for RT5631(ALC5631) on Arndale Board" depends on I2C diff --git a/sound/soc/samsung/Makefile b/sound/soc/samsung/Makefile index b5df5e2e3d9467111630d8a80ba73b06ec7a4681..b6c2ee35833306d66ac3dc483f3aadbc2feb8565 100644 --- a/sound/soc/samsung/Makefile +++ b/sound/soc/samsung/Makefile @@ -40,6 +40,7 @@ snd-soc-tobermory-objs := tobermory.o snd-soc-lowland-objs := lowland.o snd-soc-littlemill-objs := littlemill.o snd-soc-bells-objs := bells.o +snd-soc-odroid-objs := odroid.o snd-soc-arndale-rt5631-objs := arndale_rt5631.o snd-soc-tm2-wm5110-objs := tm2_wm5110.o @@ -62,5 +63,6 @@ obj-$(CONFIG_SND_SOC_TOBERMORY) += snd-soc-tobermory.o obj-$(CONFIG_SND_SOC_LOWLAND) += snd-soc-lowland.o obj-$(CONFIG_SND_SOC_LITTLEMILL) += snd-soc-littlemill.o obj-$(CONFIG_SND_SOC_BELLS) += snd-soc-bells.o +obj-$(CONFIG_SND_SOC_ODROID) += snd-soc-odroid.o obj-$(CONFIG_SND_SOC_ARNDALE_RT5631_ALC5631) += snd-soc-arndale-rt5631.o obj-$(CONFIG_SND_SOC_SAMSUNG_TM2_WM5110) += snd-soc-tm2-wm5110.o diff --git a/sound/soc/samsung/bells.c b/sound/soc/samsung/bells.c index 3dd246fa0059db268bb5d035829c85f78315f65e..34deba461ae18b7218b55fb78709da5886ea4974 100644 --- a/sound/soc/samsung/bells.c +++ b/sound/soc/samsung/bells.c @@ -446,7 +446,6 @@ static struct snd_soc_card bells_cards[] = { }, }; - static int bells_probe(struct platform_device *pdev) { int ret; diff --git a/sound/soc/samsung/i2s-regs.h b/sound/soc/samsung/i2s-regs.h index 9170c311d66e6f217a522b77eb45ec502ac4175c..fe69140054944c230c0b5197dc3d18b4c02dab58 100644 --- a/sound/soc/samsung/i2s-regs.h +++ b/sound/soc/samsung/i2s-regs.h @@ -160,5 +160,3 @@ #define I2SSIZE_SHIFT (16) #endif /* __SND_SOC_SAMSUNG_I2S_REGS_H */ - - diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 52a47ed292a4739d485c40a977099b186a649281..af3ba4d4ccc58106e5c42d89d1a2ffe631959157 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -1242,7 +1242,6 @@ static int samsung_i2s_probe(struct platform_device *pdev) i2s_dai_data = (struct samsung_i2s_dai_data *) platform_get_device_id(pdev)->driver_data; - pri_dai = i2s_alloc_dai(pdev, false); if (!pri_dai) { dev_err(&pdev->dev, "Unable to alloc I2S_pri\n"); diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c new file mode 100644 index 0000000000000000000000000000000000000000..0c0b00e40646657b4592d2b646f183d76d456442 --- /dev/null +++ b/sound/soc/samsung/odroid.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include "i2s.h" +#include "i2s-regs.h" + +struct odroid_priv { + struct snd_soc_card card; + struct snd_soc_dai_link dai_link; + + struct clk *pll; + struct clk *rclk; +}; + +static int odroid_card_startup(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + + snd_pcm_hw_constraint_single(runtime, SNDRV_PCM_HW_PARAM_CHANNELS, 2); + return 0; +} + +static int odroid_card_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct odroid_priv *priv = snd_soc_card_get_drvdata(rtd->card); + unsigned int pll_freq, rclk_freq; + int ret; + + switch (params_rate(params)) { + case 32000: + case 64000: + pll_freq = 131072000U; + break; + case 44100: + case 88200: + case 176400: + pll_freq = 180633600U; + break; + case 48000: + case 96000: + case 192000: + pll_freq = 196608000U; + break; + default: + return -EINVAL; + } + + ret = clk_set_rate(priv->pll, pll_freq + 1); + if (ret < 0) + return ret; + + rclk_freq = params_rate(params) * 256 * 4; + + ret = clk_set_rate(priv->rclk, rclk_freq); + if (ret < 0) + return ret; + + if (rtd->num_codecs > 1) { + struct snd_soc_dai *codec_dai = rtd->codec_dais[1]; + + ret = snd_soc_dai_set_sysclk(codec_dai, 0, rclk_freq, + SND_SOC_CLOCK_IN); + if (ret < 0) + return ret; + } + + return 0; +} + +static const struct snd_soc_ops odroid_card_ops = { + .startup = odroid_card_startup, + .hw_params = odroid_card_hw_params, +}; + +static void odroid_put_codec_of_nodes(struct snd_soc_dai_link *link) +{ + struct snd_soc_dai_link_component *component = link->codecs; + int i; + + for (i = 0; i < link->num_codecs; i++, component++) { + if (!component->of_node) + break; + of_node_put(component->of_node); + } +} + +static int odroid_audio_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *cpu, *codec; + struct odroid_priv *priv; + struct snd_soc_dai_link *link; + struct snd_soc_card *card; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + card = &priv->card; + card->dev = dev; + + card->owner = THIS_MODULE; + card->fully_routed = true; + + snd_soc_card_set_drvdata(card, priv); + + priv->pll = devm_clk_get(dev, "epll"); + if (IS_ERR(priv->pll)) + return PTR_ERR(priv->pll); + + priv->rclk = devm_clk_get(dev, "i2s_rclk"); + if (IS_ERR(priv->rclk)) + return PTR_ERR(priv->rclk); + + ret = snd_soc_of_parse_card_name(card, "model"); + if (ret < 0) + return ret; + + if (of_property_read_bool(dev->of_node, "samsung,audio-widgets")) { + ret = snd_soc_of_parse_audio_simple_widgets(card, + "samsung,audio-widgets"); + if (ret < 0) + return ret; + } + + if (of_property_read_bool(dev->of_node, "samsung,audio-routing")) { + ret = snd_soc_of_parse_audio_routing(card, + "samsung,audio-routing"); + if (ret < 0) + return ret; + } + + link = &priv->dai_link; + + link->ops = &odroid_card_ops; + link->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBS_CFS; + + card->dai_link = &priv->dai_link; + card->num_links = 1; + + cpu = of_get_child_by_name(dev->of_node, "cpu"); + codec = of_get_child_by_name(dev->of_node, "codec"); + + link->cpu_of_node = of_parse_phandle(cpu, "sound-dai", 0); + if (!link->cpu_of_node) { + dev_err(dev, "Failed parsing cpu/sound-dai property\n"); + return -EINVAL; + } + + ret = snd_soc_of_get_dai_link_codecs(dev, codec, link); + if (ret < 0) + goto err_put_codec_n; + + link->platform_of_node = link->cpu_of_node; + + link->name = "Primary"; + link->stream_name = link->name; + + ret = devm_snd_soc_register_card(dev, card); + if (ret < 0) { + dev_err(dev, "snd_soc_register_card() failed: %d\n", ret); + goto err_put_i2s_n; + } + + return 0; + +err_put_i2s_n: + of_node_put(link->cpu_of_node); +err_put_codec_n: + odroid_put_codec_of_nodes(link); + return ret; +} + +static int odroid_audio_remove(struct platform_device *pdev) +{ + struct odroid_priv *priv = platform_get_drvdata(pdev); + + of_node_put(priv->dai_link.cpu_of_node); + odroid_put_codec_of_nodes(&priv->dai_link); + + return 0; +} + +static const struct of_device_id odroid_audio_of_match[] = { + { .compatible = "samsung,odroid-xu3-audio" }, + { .compatible = "samsung,odroid-xu4-audio"}, + { }, +}; +MODULE_DEVICE_TABLE(of, odroid_audio_of_match); + +static struct platform_driver odroid_audio_driver = { + .driver = { + .name = "odroid-audio", + .of_match_table = odroid_audio_of_match, + .pm = &snd_soc_pm_ops, + }, + .probe = odroid_audio_probe, + .remove = odroid_audio_remove, +}; +module_platform_driver(odroid_audio_driver); + +MODULE_AUTHOR("Sylwester Nawrocki "); +MODULE_DESCRIPTION("Odroid XU3/XU4 audio support"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c index 644f186fd35c4b62a5fa016407385352ae10d066..8f42deaa184be0a759cf226958cc7904bd311270 100644 --- a/sound/soc/samsung/s3c-i2s-v2.c +++ b/sound/soc/samsung/s3c-i2s-v2.c @@ -72,7 +72,6 @@ static inline void dbg_showcon(const char *fn, u32 con) } #endif - /* Turn on or off the transmission path. */ static void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) { diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 85a33ac0a5c443f86159480d9c74d02711cb0c8b..d3b0dc145a560c8a35ddc4320810039f481db5aa 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -43,6 +43,7 @@ struct rsnd_adg { }; #define LRCLK_ASYNC (1 << 0) +#define AUDIO_OUT_48 (1 << 1) #define adg_mode_flags(adg) (adg->flags) #define for_each_rsnd_clk(pos, adg, i) \ @@ -364,7 +365,10 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate) rsnd_adg_set_ssi_clk(ssi_mod, data); - if (!(adg_mode_flags(adg) & LRCLK_ASYNC)) { + if (adg_mode_flags(adg) & LRCLK_ASYNC) { + if (adg_mode_flags(adg) & AUDIO_OUT_48) + ckr = 0x80000000; + } else { if (0 == (rate % 8000)) ckr = 0x80000000; } @@ -427,11 +431,14 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, struct clk *clk; struct device *dev = rsnd_priv_to_dev(priv); struct device_node *np = dev->of_node; + struct property *prop; u32 ckr, rbgx, rbga, rbgb; - u32 rate, req_rate = 0, div; + u32 rate, div; +#define REQ_SIZE 2 + u32 req_rate[REQ_SIZE] = {}; uint32_t count = 0; unsigned long req_48kHz_rate, req_441kHz_rate; - int i; + int i, req_size; const char *parent_clk_name = NULL; static const char * const clkout_name[] = { [CLKOUT] = "audio_clkout", @@ -446,19 +453,32 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, [CLKI] = 0x2, }; - of_property_read_u32(np, "#clock-cells", &count); + ckr = 0; + rbga = 2; /* default 1/6 */ + rbgb = 2; /* default 1/6 */ /* * ADG supports BRRA/BRRB output only * this means all clkout0/1/2/3 will be same rate */ - of_property_read_u32(np, "clock-frequency", &req_rate); + prop = of_find_property(np, "clock-frequency", NULL); + if (!prop) + goto rsnd_adg_get_clkout_end; + + req_size = prop->length / sizeof(u32); + + of_property_read_u32_array(np, "clock-frequency", req_rate, req_size); req_48kHz_rate = 0; req_441kHz_rate = 0; - if (0 == (req_rate % 44100)) - req_441kHz_rate = req_rate; - if (0 == (req_rate % 48000)) - req_48kHz_rate = req_rate; + for (i = 0; i < req_size; i++) { + if (0 == (req_rate[i] % 44100)) + req_441kHz_rate = req_rate[i]; + if (0 == (req_rate[i] % 48000)) + req_48kHz_rate = req_rate[i]; + } + + if (req_rate[0] % 48000 == 0) + adg->flags = AUDIO_OUT_48; /* * This driver is assuming that AUDIO_CLKA/AUDIO_CLKB/AUDIO_CLKC @@ -469,9 +489,6 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, * rsnd_adg_ssi_clk_try_start() * rsnd_ssi_master_clk_start() */ - ckr = 0; - rbga = 2; /* default 1/6 */ - rbgb = 2; /* default 1/6 */ adg->rbga_rate_for_441khz = 0; adg->rbgb_rate_for_48khz = 0; for_each_rsnd_clk(clk, adg, i) { @@ -490,7 +507,8 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, rbga = rbgx; adg->rbga_rate_for_441khz = rate / div; ckr |= brg_table[i] << 20; - if (req_441kHz_rate) + if (req_441kHz_rate && + !(adg_mode_flags(adg) & AUDIO_OUT_48)) parent_clk_name = __clk_get_name(clk); } } @@ -505,10 +523,9 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, rbgb = rbgx; adg->rbgb_rate_for_48khz = rate / div; ckr |= brg_table[i] << 16; - if (req_48kHz_rate) { + if (req_48kHz_rate && + (adg_mode_flags(adg) & AUDIO_OUT_48)) parent_clk_name = __clk_get_name(clk); - ckr |= 0x80000000; - } } } } @@ -518,12 +535,13 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, * this means all clkout0/1/2/3 will be * same rate */ + of_property_read_u32(np, "#clock-cells", &count); /* * for clkout */ if (!count) { clk = clk_register_fixed_rate(dev, clkout_name[CLKOUT], - parent_clk_name, 0, req_rate); + parent_clk_name, 0, req_rate[0]); if (!IS_ERR(clk)) { adg->clkout[CLKOUT] = clk; of_clk_add_provider(np, of_clk_src_simple_get, clk); @@ -536,19 +554,18 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, for (i = 0; i < CLKOUTMAX; i++) { clk = clk_register_fixed_rate(dev, clkout_name[i], parent_clk_name, 0, - req_rate); - if (!IS_ERR(clk)) { - adg->onecell.clks = adg->clkout; - adg->onecell.clk_num = CLKOUTMAX; - + req_rate[0]); + adg->clkout[i] = ERR_PTR(-ENOENT); + if (!IS_ERR(clk)) adg->clkout[i] = clk; - - of_clk_add_provider(np, of_clk_src_onecell_get, - &adg->onecell); - } } + adg->onecell.clks = adg->clkout; + adg->onecell.clk_num = CLKOUTMAX; + of_clk_add_provider(np, of_clk_src_onecell_get, + &adg->onecell); } +rsnd_adg_get_clkout_end: adg->ckr = ckr; adg->rbga = rbga; adg->rbgb = rbgb; @@ -564,6 +581,7 @@ int rsnd_adg_probe(struct rsnd_priv *priv) struct rsnd_adg *adg; struct device *dev = rsnd_priv_to_dev(priv); struct device_node *np = dev->of_node; + int ret; adg = devm_kzalloc(dev, sizeof(*adg), GFP_KERNEL); if (!adg) { @@ -571,8 +589,10 @@ int rsnd_adg_probe(struct rsnd_priv *priv) return -ENOMEM; } - rsnd_mod_init(priv, &adg->mod, &adg_ops, + ret = rsnd_mod_init(priv, &adg->mod, &adg_ops, NULL, NULL, 0, 0); + if (ret) + return ret; rsnd_adg_get_clkin(priv, adg); rsnd_adg_get_clkout(priv, adg); @@ -589,5 +609,10 @@ int rsnd_adg_probe(struct rsnd_priv *priv) void rsnd_adg_remove(struct rsnd_priv *priv) { + struct device *dev = rsnd_priv_to_dev(priv); + struct device_node *np = dev->of_node; + + of_clk_del_provider(np); + rsnd_adg_clk_disable(priv); } diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c index 7d92a24b7cfa558afbb8331401c974c59d5f1ae5..d879c010cf03c4607ebdab3c854a582d816ddf62 100644 --- a/sound/soc/sh/rcar/cmd.c +++ b/sound/soc/sh/rcar/cmd.c @@ -89,6 +89,7 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, dev_dbg(dev, "ctu/mix path = 0x%08x", data); rsnd_mod_write(mod, CMD_ROUTE_SLCT, data); + rsnd_mod_write(mod, CMD_BUSIF_MODE, rsnd_get_busif_shift(io, mod) | 1); rsnd_mod_write(mod, CMD_BUSIF_DALIGN, rsnd_get_dalign(mod, io)); rsnd_adg_set_cmd_timsel_gen2(mod, io); diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 47b370cb2d3b3aeb53f59f6198793f891890c0f8..8c1f4e2e0c4fb8c3ac09a641b4b8a928defb871c 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -96,7 +96,7 @@ #include #include "rsnd.h" -#define RSND_RATES SNDRV_PCM_RATE_8000_96000 +#define RSND_RATES SNDRV_PCM_RATE_8000_192000 #define RSND_FMTS (SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S16_LE) static const struct of_device_id rsnd_of_match[] = { @@ -110,7 +110,6 @@ MODULE_DEVICE_TABLE(of, rsnd_of_match); /* * rsnd_mod functions */ -#ifdef DEBUG void rsnd_mod_make_sure(struct rsnd_mod *mod, enum rsnd_mod_type type) { if (mod->type != type) { @@ -121,7 +120,6 @@ void rsnd_mod_make_sure(struct rsnd_mod *mod, enum rsnd_mod_type type) rsnd_mod_name(mod), rsnd_mod_id(mod)); } } -#endif char *rsnd_mod_name(struct rsnd_mod *mod) { @@ -345,6 +343,57 @@ u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io) return 0x76543210; } +u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod) +{ + enum rsnd_mod_type playback_mods[] = { + RSND_MOD_SRC, + RSND_MOD_CMD, + RSND_MOD_SSIU, + }; + enum rsnd_mod_type capture_mods[] = { + RSND_MOD_CMD, + RSND_MOD_SRC, + RSND_MOD_SSIU, + }; + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + struct rsnd_mod *tmod = NULL; + enum rsnd_mod_type *mods = + rsnd_io_is_play(io) ? + playback_mods : capture_mods; + int i; + + /* + * This is needed for 24bit data + * We need to shift 8bit + * + * Linux 24bit data is located as 0x00****** + * HW 24bit data is located as 0x******00 + * + */ + switch (runtime->sample_bits) { + case 16: + return 0; + case 32: + break; + } + + for (i = 0; i < ARRAY_SIZE(playback_mods); i++) { + tmod = rsnd_io_to_mod(io, mods[i]); + if (tmod) + break; + } + + if (tmod != mod) + return 0; + + if (rsnd_io_is_play(io)) + return (0 << 20) | /* shift to Left */ + (8 << 16); /* 8bit */ + else + return (1 << 20) | /* shift to Right */ + (8 << 16); /* 8bit */ +} + /* * rsnd_dai functions */ @@ -674,12 +723,10 @@ static int rsnd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) /* set clock inversion */ switch (fmt & SND_SOC_DAIFMT_INV_MASK) { case SND_SOC_DAIFMT_NB_IF: - rdai->bit_clk_inv = rdai->bit_clk_inv; rdai->frm_clk_inv = !rdai->frm_clk_inv; break; case SND_SOC_DAIFMT_IB_NF: rdai->bit_clk_inv = !rdai->bit_clk_inv; - rdai->frm_clk_inv = rdai->frm_clk_inv; break; case SND_SOC_DAIFMT_IB_IF: rdai->bit_clk_inv = !rdai->bit_clk_inv; @@ -1002,13 +1049,30 @@ static int rsnd_kctrl_put(struct snd_kcontrol *kctrl, return change; } -static int __rsnd_kctrl_new(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - struct rsnd_kctrl_cfg *cfg, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod)) +struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg) +{ + cfg->cfg.val = cfg->val; + + return &cfg->cfg; +} + +struct rsnd_kctrl_cfg *rsnd_kctrl_init_s(struct rsnd_kctrl_cfg_s *cfg) +{ + cfg->cfg.val = &cfg->val; + + return &cfg->cfg; +} + +int rsnd_kctrl_new(struct rsnd_mod *mod, + struct rsnd_dai_stream *io, + struct snd_soc_pcm_runtime *rtd, + const unsigned char *name, + void (*update)(struct rsnd_dai_stream *io, + struct rsnd_mod *mod), + struct rsnd_kctrl_cfg *cfg, + const char * const *texts, + int size, + u32 max) { struct snd_card *card = rtd->card->snd_card; struct snd_kcontrol *kctrl; @@ -1023,6 +1087,9 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod, }; int ret; + if (size > RSND_MAX_CHANNELS) + return -EINVAL; + kctrl = snd_ctl_new1(&knew, mod); if (!kctrl) return -ENOMEM; @@ -1031,74 +1098,17 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod, if (ret < 0) return ret; - cfg->update = update; - cfg->card = card; - cfg->kctrl = kctrl; - cfg->io = io; + cfg->texts = texts; + cfg->max = max; + cfg->size = size; + cfg->update = update; + cfg->card = card; + cfg->kctrl = kctrl; + cfg->io = io; return 0; } -void _rsnd_kctrl_remove(struct rsnd_kctrl_cfg *cfg) -{ - if (cfg->card && cfg->kctrl) - snd_ctl_remove(cfg->card, cfg->kctrl); - - cfg->card = NULL; - cfg->kctrl = NULL; -} - -int rsnd_kctrl_new_m(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod), - struct rsnd_kctrl_cfg_m *_cfg, - int ch_size, - u32 max) -{ - if (ch_size > RSND_MAX_CHANNELS) - return -EINVAL; - - _cfg->cfg.max = max; - _cfg->cfg.size = ch_size; - _cfg->cfg.val = _cfg->val; - return __rsnd_kctrl_new(mod, io, rtd, name, &_cfg->cfg, update); -} - -int rsnd_kctrl_new_s(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod), - struct rsnd_kctrl_cfg_s *_cfg, - u32 max) -{ - _cfg->cfg.max = max; - _cfg->cfg.size = 1; - _cfg->cfg.val = &_cfg->val; - return __rsnd_kctrl_new(mod, io, rtd, name, &_cfg->cfg, update); -} - -int rsnd_kctrl_new_e(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - struct rsnd_kctrl_cfg_s *_cfg, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod), - const char * const *texts, - u32 max) -{ - _cfg->cfg.max = max; - _cfg->cfg.size = 1; - _cfg->cfg.val = &_cfg->val; - _cfg->cfg.texts = texts; - return __rsnd_kctrl_new(mod, io, rtd, name, &_cfg->cfg, update); -} - /* * snd_soc_platform */ diff --git a/sound/soc/sh/rcar/dvc.c b/sound/soc/sh/rcar/dvc.c index cf8f59cdd8d7fc47ec1ad5d40519cd26bbc1e9bc..463de8360985c5518f90f6f102a2ccccd560ff0b 100644 --- a/sound/soc/sh/rcar/dvc.c +++ b/sound/soc/sh/rcar/dvc.c @@ -218,21 +218,6 @@ static int rsnd_dvc_probe_(struct rsnd_mod *mod, return rsnd_cmd_attach(io, rsnd_mod_id(mod)); } -static int rsnd_dvc_remove_(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct rsnd_priv *priv) -{ - struct rsnd_dvc *dvc = rsnd_mod_to_dvc(mod); - - rsnd_kctrl_remove(dvc->volume); - rsnd_kctrl_remove(dvc->mute); - rsnd_kctrl_remove(dvc->ren); - rsnd_kctrl_remove(dvc->rup); - rsnd_kctrl_remove(dvc->rdown); - - return 0; -} - static int rsnd_dvc_init(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -300,18 +285,18 @@ static int rsnd_dvc_pcm_new(struct rsnd_mod *mod, ret = rsnd_kctrl_new_e(mod, io, rtd, is_play ? "DVC Out Ramp Up Rate" : "DVC In Ramp Up Rate", - &dvc->rup, rsnd_dvc_volume_update, - dvc_ramp_rate, ARRAY_SIZE(dvc_ramp_rate)); + &dvc->rup, + dvc_ramp_rate); if (ret < 0) return ret; ret = rsnd_kctrl_new_e(mod, io, rtd, is_play ? "DVC Out Ramp Down Rate" : "DVC In Ramp Down Rate", - &dvc->rdown, rsnd_dvc_volume_update, - dvc_ramp_rate, ARRAY_SIZE(dvc_ramp_rate)); + &dvc->rdown, + dvc_ramp_rate); if (ret < 0) return ret; @@ -332,7 +317,6 @@ static struct rsnd_mod_ops rsnd_dvc_ops = { .name = DVC_NAME, .dma_req = rsnd_dvc_dma_req, .probe = rsnd_dvc_probe_, - .remove = rsnd_dvc_remove_, .init = rsnd_dvc_init, .quit = rsnd_dvc_quit, .pcm_new = rsnd_dvc_pcm_new, diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c index 63b6d3c28021024b1f06278c5c4f217a394faf8c..4b0980728e13ec75f18ac07135ab4971290b5310 100644 --- a/sound/soc/sh/rcar/gen.c +++ b/sound/soc/sh/rcar/gen.c @@ -236,6 +236,7 @@ static int rsnd_gen2_probe(struct rsnd_priv *priv) RSND_GEN_M_REG(SRC_ROUTE_MODE0, 0xc, 0x20), RSND_GEN_M_REG(SRC_CTRL, 0x10, 0x20), RSND_GEN_M_REG(SRC_INT_ENABLE0, 0x18, 0x20), + RSND_GEN_M_REG(CMD_BUSIF_MODE, 0x184, 0x20), RSND_GEN_M_REG(CMD_BUSIF_DALIGN,0x188, 0x20), RSND_GEN_M_REG(CMD_ROUTE_SLCT, 0x18c, 0x20), RSND_GEN_M_REG(CMD_CTRL, 0x190, 0x20), diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 7410ec0174db7db8c172c60c62f20f91c247df8d..323af41ecfcb8ffea222f25fbe6c524968f29fd5 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -73,6 +73,7 @@ enum rsnd_reg { RSND_REG_SCU_SYS_INT_EN0, RSND_REG_SCU_SYS_INT_EN1, RSND_REG_CMD_CTRL, + RSND_REG_CMD_BUSIF_MODE, RSND_REG_CMD_BUSIF_DALIGN, RSND_REG_CMD_ROUTE_SLCT, RSND_REG_CMDOUT_TIMSEL, @@ -204,6 +205,7 @@ void rsnd_bset(struct rsnd_priv *priv, struct rsnd_mod *mod, enum rsnd_reg reg, u32 mask, u32 data); u32 rsnd_get_adinr_bit(struct rsnd_mod *mod, struct rsnd_dai_stream *io); u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io); +u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod); /* * R-Car DMA @@ -611,35 +613,30 @@ struct rsnd_kctrl_cfg_s { u32 val; }; -void _rsnd_kctrl_remove(struct rsnd_kctrl_cfg *cfg); -#define rsnd_kctrl_remove(_cfg) _rsnd_kctrl_remove(&((_cfg).cfg)) - -int rsnd_kctrl_new_m(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod), - struct rsnd_kctrl_cfg_m *_cfg, - int ch_size, - u32 max); -int rsnd_kctrl_new_s(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod), - struct rsnd_kctrl_cfg_s *_cfg, - u32 max); -int rsnd_kctrl_new_e(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - struct snd_soc_pcm_runtime *rtd, - const unsigned char *name, - struct rsnd_kctrl_cfg_s *_cfg, - void (*update)(struct rsnd_dai_stream *io, - struct rsnd_mod *mod), - const char * const *texts, - u32 max); +struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg); +struct rsnd_kctrl_cfg *rsnd_kctrl_init_s(struct rsnd_kctrl_cfg_s *cfg); +int rsnd_kctrl_new(struct rsnd_mod *mod, + struct rsnd_dai_stream *io, + struct snd_soc_pcm_runtime *rtd, + const unsigned char *name, + void (*update)(struct rsnd_dai_stream *io, + struct rsnd_mod *mod), + struct rsnd_kctrl_cfg *cfg, + const char * const *texts, + int size, + u32 max); + +#define rsnd_kctrl_new_m(mod, io, rtd, name, update, cfg, size, max) \ + rsnd_kctrl_new(mod, io, rtd, name, update, rsnd_kctrl_init_m(cfg), \ + NULL, size, max) + +#define rsnd_kctrl_new_s(mod, io, rtd, name, update, cfg, max) \ + rsnd_kctrl_new(mod, io, rtd, name, update, rsnd_kctrl_init_s(cfg), \ + NULL, 1, max) + +#define rsnd_kctrl_new_e(mod, io, rtd, name, update, cfg, texts) \ + rsnd_kctrl_new(mod, io, rtd, name, update, rsnd_kctrl_init_s(cfg), \ + texts, 1, ARRAY_SIZE(texts)) /* * R-Car SSI @@ -732,8 +729,8 @@ void rsnd_cmd_remove(struct rsnd_priv *priv); int rsnd_cmd_attach(struct rsnd_dai_stream *io, int id); struct rsnd_mod *rsnd_cmd_mod_get(struct rsnd_priv *priv, int id); -#ifdef DEBUG void rsnd_mod_make_sure(struct rsnd_mod *mod, enum rsnd_mod_type type); +#ifdef DEBUG #define rsnd_mod_confirm_ssi(mssi) rsnd_mod_make_sure(mssi, RSND_MOD_SSI) #define rsnd_mod_confirm_src(msrc) rsnd_mod_make_sure(msrc, RSND_MOD_SRC) #define rsnd_mod_confirm_dvc(mdvc) rsnd_mod_make_sure(mdvc, RSND_MOD_DVC) diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index 42db48db09ba9a36abfbc807469e038ad86d3eaf..76a477a3ccb5d88e18fd8398d9ad2b2616a99f48 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -167,6 +167,7 @@ static int rsnd_src_hw_params(struct rsnd_mod *mod, * dpcm_fe_dai_hw_params() * dpcm_be_dai_hw_params() */ + src->convert_rate = 0; if (fe->dai_link->dynamic) { int stream = substream->stream; struct snd_soc_dpcm *dpcm; @@ -189,11 +190,13 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, struct rsnd_priv *priv = rsnd_mod_to_priv(mod); struct device *dev = rsnd_priv_to_dev(priv); struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + int is_play = rsnd_io_is_play(io); int use_src = 0; u32 fin, fout; u32 ifscr, fsrate, adinr; u32 cr, route; u32 bsdsr, bsisr; + u32 i_busif, o_busif, tmp; uint ratio; if (!runtime) @@ -269,6 +272,11 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, break; } + /* BUSIF_MODE */ + tmp = rsnd_get_busif_shift(io, mod); + i_busif = ( is_play ? tmp : 0) | 1; + o_busif = (!is_play ? tmp : 0) | 1; + rsnd_mod_write(mod, SRC_ROUTE_MODE0, route); rsnd_mod_write(mod, SRC_SRCIR, 1); /* initialize */ @@ -280,8 +288,9 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_mod_write(mod, SRC_BSISR, bsisr); rsnd_mod_write(mod, SRC_SRCIR, 0); /* cancel initialize */ - rsnd_mod_write(mod, SRC_I_BUSIF_MODE, 1); - rsnd_mod_write(mod, SRC_O_BUSIF_MODE, 1); + rsnd_mod_write(mod, SRC_I_BUSIF_MODE, i_busif); + rsnd_mod_write(mod, SRC_O_BUSIF_MODE, o_busif); + rsnd_mod_write(mod, SRC_BUSIF_DALIGN, rsnd_get_dalign(mod, io)); rsnd_adg_set_src_timesel_gen2(mod, io, fin, fout); @@ -414,8 +423,6 @@ static int rsnd_src_quit(struct rsnd_mod *mod, rsnd_mod_power_off(mod); - src->convert_rate = 0; - /* reset sync convert_rate */ src->sync.val = 0; diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 411bda2387adbcdab2dc510706986f5ac92b3cc1..91e5c07911b4a5b14364becf64c568d7a61cc1c4 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -227,6 +227,15 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, */ for (j = 0; j < ARRAY_SIZE(ssi_clk_mul_table); j++) { + /* + * It will set SSIWSR.CONT here, but SSICR.CKDV = 000 + * with it is not allowed. (SSIWSR.WS_MODE with + * SSICR.CKDV = 000 is not allowed either). + * Skip it. See SSICR.CKDV + */ + if (j == 0) + continue; + /* * this driver is assuming that * system word is 32bit x chan @@ -293,7 +302,7 @@ static void rsnd_ssi_config_init(struct rsnd_mod *mod, * always use 32bit system word. * see also rsnd_ssi_master_clk_enable() */ - cr_own = FORCE | SWL_32 | PDTA; + cr_own = FORCE | SWL_32; if (rdai->bit_clk_inv) cr_own |= SCKP; @@ -541,6 +550,13 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); u32 *buf = (u32 *)(runtime->dma_area + rsnd_dai_pointer_offset(io, 0)); + int shift = 0; + + switch (runtime->sample_bits) { + case 32: + shift = 8; + break; + } /* * 8/16/32 data can be assesse to TDR/RDR register @@ -548,9 +564,9 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, * see rsnd_ssi_init() */ if (rsnd_io_is_play(io)) - rsnd_mod_write(mod, SSITDR, *buf); + rsnd_mod_write(mod, SSITDR, (*buf) << shift); else - *buf = rsnd_mod_read(mod, SSIRDR); + *buf = (rsnd_mod_read(mod, SSIRDR) >> shift); elapsed = rsnd_dai_pointer_update(io, sizeof(*buf)); } @@ -700,6 +716,11 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, struct rsnd_priv *priv) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + struct rsnd_mod *ssi_parent_mod = rsnd_io_to_mod_ssip(io); + + /* Do nothing for SSI parent mod */ + if (ssi_parent_mod == mod) + return 0; /* PIO will request IRQ again */ free_irq(ssi->irq, mod); diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 14fafdaf1395f9737191df18599ee58fc4f858fd..512d238b79e2895f13a4b1b7be3e145859a65280 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -144,7 +144,8 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, (rsnd_io_is_play(io) ? rsnd_runtime_channel_after_ctu(io) : rsnd_runtime_channel_original(io))); - rsnd_mod_write(mod, SSI_BUSIF_MODE, 1); + rsnd_mod_write(mod, SSI_BUSIF_MODE, + rsnd_get_busif_shift(io, mod) | 1); rsnd_mod_write(mod, SSI_BUSIF_DALIGN, rsnd_get_dalign(mod, io)); } diff --git a/sound/soc/sirf/sirf-audio-port.c b/sound/soc/sirf/sirf-audio-port.c index 3f2cce03275c3b17a66adf9d4c761802006b89b8..be066de74aaad451cf05ac6077c15b4050a539d9 100644 --- a/sound/soc/sirf/sirf-audio-port.c +++ b/sound/soc/sirf/sirf-audio-port.c @@ -19,6 +19,7 @@ struct sirf_audio_port { static int sirf_audio_port_dai_probe(struct snd_soc_dai *dai) { struct sirf_audio_port *port = snd_soc_dai_get_drvdata(dai); + snd_soc_dai_init_dma_data(dai, &port->playback_dma_data, &port->capture_dma_data); return 0; diff --git a/sound/soc/sirf/sirf-audio.c b/sound/soc/sirf/sirf-audio.c index 94ea152e036218e46452c97425b41dd5a468794a..f2bc50790f763cbfe537ea7316e072102c401d0f 100644 --- a/sound/soc/sirf/sirf-audio.c +++ b/sound/soc/sirf/sirf-audio.c @@ -27,6 +27,7 @@ static int sirf_audio_hp_event(struct snd_soc_dapm_widget *w, struct snd_soc_card *card = dapm->card; struct sirf_audio_card *sirf_audio_card = snd_soc_card_get_drvdata(card); int on = !SND_SOC_DAPM_EVENT_OFF(event); + if (gpio_is_valid(sirf_audio_card->gpio_hp_pa)) gpio_set_value(sirf_audio_card->gpio_hp_pa, on); return 0; diff --git a/sound/soc/sirf/sirf-usp.c b/sound/soc/sirf/sirf-usp.c index 45fc06c0e0e551f00ef6f89717a8dbab2dcdbd91..77e7dcf969d0ce2e642f881c497d534bb7029df5 100644 --- a/sound/soc/sirf/sirf-usp.c +++ b/sound/soc/sirf/sirf-usp.c @@ -71,6 +71,7 @@ static void sirf_usp_rx_disable(struct sirf_usp *usp) static int sirf_usp_pcm_dai_probe(struct snd_soc_dai *dai) { struct sirf_usp *usp = snd_soc_dai_get_drvdata(dai); + snd_soc_dai_init_dma_data(dai, &usp->playback_dma_data, &usp->capture_dma_data); return 0; @@ -294,6 +295,7 @@ static struct snd_soc_dai_driver sirf_usp_pcm_dai = { static int sirf_usp_pcm_runtime_suspend(struct device *dev) { struct sirf_usp *usp = dev_get_drvdata(dev); + clk_disable_unprepare(usp->clk); return 0; } @@ -302,6 +304,7 @@ static int sirf_usp_pcm_runtime_resume(struct device *dev) { struct sirf_usp *usp = dev_get_drvdata(dev); int ret; + ret = clk_prepare_enable(usp->clk); if (ret) { dev_err(dev, "clk_enable failed: %d\n", ret); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 2722bb0c557310d97816cfa7857b24d7c9bd4948..754e3ef8d7ae1b8b188c3e52986f2c306fb7b763 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -936,7 +936,7 @@ static struct snd_soc_component *soc_find_component( * * @dlc: name of the DAI and optional component info to match * - * This function will search all regsitered components and their DAIs to + * This function will search all registered components and their DAIs to * find the DAI of the same name. The component's of_node and name * should also match if being specified. * @@ -1776,7 +1776,6 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num) } component->init = aux_dev->init; - component->auxiliary = 1; list_add(&component->card_aux_list, &card->aux_comp_list); return 0; @@ -1788,14 +1787,13 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num) static int soc_probe_aux_devices(struct snd_soc_card *card) { - struct snd_soc_component *comp, *tmp; + struct snd_soc_component *comp; int order; int ret; for (order = SND_SOC_COMP_ORDER_FIRST; order <= SND_SOC_COMP_ORDER_LAST; order++) { - list_for_each_entry_safe(comp, tmp, &card->aux_comp_list, - card_aux_list) { + list_for_each_entry(comp, &card->aux_comp_list, card_aux_list) { if (comp->driver->probe_order == order) { ret = soc_probe_component(card, comp); if (ret < 0) { @@ -1804,7 +1802,6 @@ static int soc_probe_aux_devices(struct snd_soc_card *card) comp->name, ret); return ret; } - list_del(&comp->card_aux_list); } } } @@ -1820,14 +1817,12 @@ static void soc_remove_aux_devices(struct snd_soc_card *card) for (order = SND_SOC_COMP_ORDER_FIRST; order <= SND_SOC_COMP_ORDER_LAST; order++) { list_for_each_entry_safe(comp, _comp, - &card->component_dev_list, card_list) { - - if (!comp->auxiliary) - continue; + &card->aux_comp_list, card_aux_list) { if (comp->driver->remove_order == order) { soc_remove_component(comp); - comp->auxiliary = 0; + /* remove it from the card's aux_comp_list */ + list_del(&comp->card_aux_list); } } } @@ -1918,6 +1913,7 @@ int snd_soc_runtime_set_dai_fmt(struct snd_soc_pcm_runtime *rtd, EXPORT_SYMBOL_GPL(snd_soc_runtime_set_dai_fmt); +#ifdef CONFIG_DMI /* Trim special characters, and replace '-' with '_' since '-' is used to * separate different DMI fields in the card long name. Only number and * alphabet characters and a few separator characters are kept. @@ -2049,6 +2045,7 @@ int snd_soc_set_dmi_name(struct snd_soc_card *card, const char *flavour) return 0; } EXPORT_SYMBOL_GPL(snd_soc_set_dmi_name); +#endif /* CONFIG_DMI */ static int snd_soc_instantiate_card(struct snd_soc_card *card) { @@ -2190,6 +2187,9 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) snd_soc_dapm_add_routes(&card->dapm, card->of_dapm_routes, card->num_of_dapm_routes); + /* try to set some sane longname if DMI is available */ + snd_soc_set_dmi_name(card, NULL); + snprintf(card->snd_card->shortname, sizeof(card->snd_card->shortname), "%s", card->name); snprintf(card->snd_card->longname, sizeof(card->snd_card->longname), @@ -2286,6 +2286,9 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card) list_for_each_entry(rtd, &card->rtd_list, list) flush_delayed_work(&rtd->delayed_work); + /* free the ALSA card at first; this syncs with pending operations */ + snd_card_free(card->snd_card); + /* remove and free each DAI */ soc_remove_dai_links(card); soc_remove_pcm_runtimes(card); @@ -2300,9 +2303,7 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card) if (card->remove) card->remove(card); - snd_card_free(card->snd_card); return 0; - } /* removes a socdev */ @@ -3139,7 +3140,7 @@ static int snd_soc_component_initialize(struct snd_soc_component *component, component->suspend = component->driver->suspend; component->resume = component->driver->resume; component->pcm_new = component->driver->pcm_new; - component->pcm_free= component->driver->pcm_free; + component->pcm_free = component->driver->pcm_free; dapm = &component->dapm; dapm->dev = dev; @@ -3240,6 +3241,11 @@ static void snd_soc_component_cleanup(struct snd_soc_component *component) static void snd_soc_component_del_unlocked(struct snd_soc_component *component) { + struct snd_soc_card *card = component->card; + + if (card) + snd_soc_unregister_card(card); + list_del(&component->list); } diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index fbaa1bb41102f45920f478008070ee53cdbbb6b4..7daf21fee355b22c0a522d51c3de723fd6b7a1fd 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -19,8 +19,27 @@ #include #include #include +#include #include +/** + * snd_soc_codec_set_jack - configure codec jack. + * @codec: CODEC + * @jack: structure to use for the jack + * @data: can be used if codec driver need extra data for configuring jack + * + * Configures and enables jack detection function. + */ +int snd_soc_codec_set_jack(struct snd_soc_codec *codec, + struct snd_soc_jack *jack, void *data) +{ + if (codec->driver->set_jack) + return codec->driver->set_jack(codec, jack, data); + else + return -EINVAL; +} +EXPORT_SYMBOL_GPL(snd_soc_codec_set_jack); + /** * snd_soc_card_jack_new - Create a new jack * @card: ASoC card @@ -293,6 +312,27 @@ static void gpio_work(struct work_struct *work) snd_soc_jack_gpio_detect(gpio); } +static int snd_soc_jack_pm_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct snd_soc_jack_gpio *gpio = + container_of(nb, struct snd_soc_jack_gpio, pm_notifier); + + switch (action) { + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + /* + * Use workqueue so we do not have to care about running + * concurrently with work triggered by the interrupt handler. + */ + queue_delayed_work(system_power_efficient_wq, &gpio->work, 0); + break; + } + + return NOTIFY_DONE; +} + /** * snd_soc_jack_add_gpios - Associate GPIO pins with an ASoC jack * @@ -369,6 +409,13 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count, i, ret); } + /* + * Register PM notifier so we do not miss state transitions + * happening while system is asleep. + */ + gpios[i].pm_notifier.notifier_call = snd_soc_jack_pm_notifier; + register_pm_notifier(&gpios[i].pm_notifier); + /* Expose GPIO value over sysfs for diagnostic purposes */ gpiod_export(gpios[i].desc, false); @@ -428,6 +475,7 @@ void snd_soc_jack_free_gpios(struct snd_soc_jack *jack, int count, for (i = 0; i < count; i++) { gpiod_unexport(gpios[i].desc); + unregister_pm_notifier(&gpios[i].pm_notifier); free_irq(gpiod_to_irq(gpios[i].desc), &gpios[i]); cancel_delayed_work_sync(&gpios[i].work); gpiod_put(gpios[i].desc); diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 3e9b1c0bb1ce3cb1864e1825ade56720803df289..002772e3ba2cb5f193ef35b4b96c8ee6eadc46d1 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -495,12 +495,13 @@ static void remove_widget(struct snd_soc_component *comp, struct snd_kcontrol *kcontrol = w->kcontrols[i]; struct soc_enum *se = (struct soc_enum *)kcontrol->private_value; + int j; snd_ctl_remove(card, kcontrol); kfree(se->dobj.control.dvalues); - for (i = 0; i < se->items; i++) - kfree(se->dobj.control.dtexts[i]); + for (j = 0; j < se->items; j++) + kfree(se->dobj.control.dtexts[j]); kfree(se); } @@ -933,6 +934,7 @@ static int soc_tplg_denum_create_texts(struct soc_enum *se, } } + se->texts = (const char * const *)se->dobj.control.dtexts; return 0; err: diff --git a/sound/soc/sti/uniperif.h b/sound/soc/sti/uniperif.h index d487dd2ef016fee0f25d6a5705feb98a0f26356a..cfcb0ea9d99d8981abc2425d15f3e7148e0dc7a6 100644 --- a/sound/soc/sti/uniperif.h +++ b/sound/soc/sti/uniperif.h @@ -1299,6 +1299,7 @@ struct uniperif { int ver; /* IP version, used by register access macros */ struct regmap_field *clk_sel; struct regmap_field *valid_sel; + spinlock_t irq_lock; /* use to prevent race condition with IRQ */ /* capabilities */ const struct snd_pcm_hardware *hw; diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index 60ae31a303ab001e5724518248f59ef12033570b..d8b6936e544e3c9e42444130f9087dc31be73357 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -65,10 +65,13 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) unsigned int status; unsigned int tmp; - if (player->state == UNIPERIF_STATE_STOPPED) { - /* Unexpected IRQ: do nothing */ - return IRQ_NONE; - } + spin_lock(&player->irq_lock); + if (!player->substream) + goto irq_spin_unlock; + + snd_pcm_stream_lock(player->substream); + if (player->state == UNIPERIF_STATE_STOPPED) + goto stream_unlock; /* Get interrupt status & clear them immediately */ status = GET_UNIPERIF_ITS(player); @@ -88,9 +91,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_FIFO_ERROR(player); /* Stop the player */ - snd_pcm_stream_lock(player->substream); snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); - snd_pcm_stream_unlock(player->substream); } ret = IRQ_HANDLED; @@ -104,9 +105,7 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) SET_UNIPERIF_ITM_BCLR_DMA_ERROR(player); /* Stop the player */ - snd_pcm_stream_lock(player->substream); snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); - snd_pcm_stream_unlock(player->substream); ret = IRQ_HANDLED; } @@ -116,7 +115,8 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) if (!player->underflow_enabled) { dev_err(player->dev, "unexpected Underflow recovering\n"); - return -EPERM; + ret = -EPERM; + goto stream_unlock; } /* Read the underflow recovery duration */ tmp = GET_UNIPERIF_STATUS_1_UNDERFLOW_DURATION(player); @@ -138,13 +138,16 @@ static irqreturn_t uni_player_irq_handler(int irq, void *dev_id) dev_err(player->dev, "Underflow recovery failed\n"); /* Stop the player */ - snd_pcm_stream_lock(player->substream); snd_pcm_stop(player->substream, SNDRV_PCM_STATE_XRUN); - snd_pcm_stream_unlock(player->substream); ret = IRQ_HANDLED; } +stream_unlock: + snd_pcm_stream_unlock(player->substream); +irq_spin_unlock: + spin_unlock(&player->irq_lock); + return ret; } @@ -588,6 +591,7 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol, struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct uniperif *player = priv->dai_data.uni; struct snd_aes_iec958 *iec958 = &player->stream_settings.iec958; + unsigned long flags; mutex_lock(&player->ctrl_lock); iec958->status[0] = ucontrol->value.iec958.status[0]; @@ -596,12 +600,14 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol, iec958->status[3] = ucontrol->value.iec958.status[3]; mutex_unlock(&player->ctrl_lock); + spin_lock_irqsave(&player->irq_lock, flags); if (player->substream && player->substream->runtime) uni_player_set_channel_status(player, player->substream->runtime); else uni_player_set_channel_status(player, NULL); + spin_unlock_irqrestore(&player->irq_lock, flags); return 0; } @@ -686,9 +692,12 @@ static int uni_player_startup(struct snd_pcm_substream *substream, { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct uniperif *player = priv->dai_data.uni; + unsigned long flags; int ret; + spin_lock_irqsave(&player->irq_lock, flags); player->substream = substream; + spin_unlock_irqrestore(&player->irq_lock, flags); player->clk_adj = 0; @@ -986,12 +995,15 @@ static void uni_player_shutdown(struct snd_pcm_substream *substream, { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct uniperif *player = priv->dai_data.uni; + unsigned long flags; + spin_lock_irqsave(&player->irq_lock, flags); if (player->state != UNIPERIF_STATE_STOPPED) /* Stop the player */ uni_player_stop(player); player->substream = NULL; + spin_unlock_irqrestore(&player->irq_lock, flags); } static int uni_player_parse_dt_audio_glue(struct platform_device *pdev, @@ -1062,7 +1074,7 @@ int uni_player_init(struct platform_device *pdev, player->clk = of_clk_get(pdev->dev.of_node, 0); if (IS_ERR(player->clk)) { dev_err(player->dev, "Failed to get clock\n"); - ret = PTR_ERR(player->clk); + return PTR_ERR(player->clk); } /* Select the frequency synthesizer clock */ @@ -1096,6 +1108,7 @@ int uni_player_init(struct platform_device *pdev, } mutex_init(&player->ctrl_lock); + spin_lock_init(&player->irq_lock); /* Ensure that disabled by default */ SET_UNIPERIF_CONFIG_BACK_STALL_REQ_DISABLE(player); diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 93a8df6ed880ea8cc32b62f94ab0b44befbe06ef..ee0055e608529195fda9b76a4226ebf752ead6e3 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -46,10 +46,15 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id) struct uniperif *reader = dev_id; unsigned int status; + spin_lock(&reader->irq_lock); + if (!reader->substream) + goto irq_spin_unlock; + + snd_pcm_stream_lock(reader->substream); if (reader->state == UNIPERIF_STATE_STOPPED) { /* Unexpected IRQ: do nothing */ dev_warn(reader->dev, "unexpected IRQ\n"); - return IRQ_HANDLED; + goto stream_unlock; } /* Get interrupt status & clear them immediately */ @@ -60,13 +65,16 @@ static irqreturn_t uni_reader_irq_handler(int irq, void *dev_id) if (unlikely(status & UNIPERIF_ITS_FIFO_ERROR_MASK(reader))) { dev_err(reader->dev, "FIFO error detected\n"); - snd_pcm_stream_lock(reader->substream); snd_pcm_stop(reader->substream, SNDRV_PCM_STATE_XRUN); - snd_pcm_stream_unlock(reader->substream); - return IRQ_HANDLED; + ret = IRQ_HANDLED; } +stream_unlock: + snd_pcm_stream_unlock(reader->substream); +irq_spin_unlock: + spin_unlock(&reader->irq_lock); + return ret; } @@ -347,9 +355,12 @@ static int uni_reader_startup(struct snd_pcm_substream *substream, { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct uniperif *reader = priv->dai_data.uni; + unsigned long flags; int ret; + spin_lock_irqsave(&reader->irq_lock, flags); reader->substream = substream; + spin_unlock_irqrestore(&reader->irq_lock, flags); if (!UNIPERIF_TYPE_IS_TDM(reader)) return 0; @@ -375,12 +386,15 @@ static void uni_reader_shutdown(struct snd_pcm_substream *substream, { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct uniperif *reader = priv->dai_data.uni; + unsigned long flags; + spin_lock_irqsave(&reader->irq_lock, flags); if (reader->state != UNIPERIF_STATE_STOPPED) { /* Stop the reader */ uni_reader_stop(reader); } reader->substream = NULL; + spin_unlock_irqrestore(&reader->irq_lock, flags); } static const struct snd_soc_dai_ops uni_reader_dai_ops = { @@ -415,6 +429,8 @@ int uni_reader_init(struct platform_device *pdev, return -EBUSY; } + spin_lock_init(&reader->irq_lock); + return 0; } EXPORT_SYMBOL_GPL(uni_reader_init); diff --git a/sound/soc/stm/Kconfig b/sound/soc/stm/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..972970f0890aa442dbfdbff6bf4d8ba6663bae42 --- /dev/null +++ b/sound/soc/stm/Kconfig @@ -0,0 +1,8 @@ +menuconfig SND_SOC_STM32 + tristate "STMicroelectronics STM32 SOC audio support" + depends on ARCH_STM32 || COMPILE_TEST + depends on SND_SOC + select SND_SOC_GENERIC_DMAENGINE_PCM + select REGMAP_MMIO + help + Say Y if you want to enable ASoC-support for STM32 diff --git a/sound/soc/stm/Makefile b/sound/soc/stm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e466a47596983aebc1a83ed9f366d752a985126e --- /dev/null +++ b/sound/soc/stm/Makefile @@ -0,0 +1,6 @@ +# SAI +snd-soc-stm32-sai-sub-objs := stm32_sai_sub.o +obj-$(CONFIG_SND_SOC_STM32) += snd-soc-stm32-sai-sub.o + +snd-soc-stm32-sai-objs := stm32_sai.o +obj-$(CONFIG_SND_SOC_STM32) += snd-soc-stm32-sai.o diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c new file mode 100644 index 0000000000000000000000000000000000000000..2a27a26bf7a152913d6cc99c868ad213abb8a844 --- /dev/null +++ b/sound/soc/stm/stm32_sai.c @@ -0,0 +1,115 @@ +/* + * STM32 ALSA SoC Digital Audio Interface (SAI) driver. + * + * Copyright (C) 2016, STMicroelectronics - All Rights Reserved + * Author(s): Olivier Moysan for STMicroelectronics. + * + * License terms: GPL V2.0. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "stm32_sai.h" + +static const struct of_device_id stm32_sai_ids[] = { + { .compatible = "st,stm32f4-sai", .data = (void *)SAI_STM32F4 }, + {} +}; + +static int stm32_sai_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct stm32_sai_data *sai; + struct reset_control *rst; + struct resource *res; + void __iomem *base; + const struct of_device_id *of_id; + + sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL); + if (!sai) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + of_id = of_match_device(stm32_sai_ids, &pdev->dev); + if (of_id) + sai->version = (enum stm32_sai_version)of_id->data; + else + return -EINVAL; + + sai->clk_x8k = devm_clk_get(&pdev->dev, "x8k"); + if (IS_ERR(sai->clk_x8k)) { + dev_err(&pdev->dev, "missing x8k parent clock\n"); + return PTR_ERR(sai->clk_x8k); + } + + sai->clk_x11k = devm_clk_get(&pdev->dev, "x11k"); + if (IS_ERR(sai->clk_x11k)) { + dev_err(&pdev->dev, "missing x11k parent clock\n"); + return PTR_ERR(sai->clk_x11k); + } + + /* init irqs */ + sai->irq = platform_get_irq(pdev, 0); + if (sai->irq < 0) { + dev_err(&pdev->dev, "no irq for node %s\n", pdev->name); + return sai->irq; + } + + /* reset */ + rst = reset_control_get(&pdev->dev, NULL); + if (!IS_ERR(rst)) { + reset_control_assert(rst); + udelay(2); + reset_control_deassert(rst); + } + + sai->pdev = pdev; + platform_set_drvdata(pdev, sai); + + return of_platform_populate(np, NULL, NULL, &pdev->dev); +} + +static int stm32_sai_remove(struct platform_device *pdev) +{ + of_platform_depopulate(&pdev->dev); + + return 0; +} + +MODULE_DEVICE_TABLE(of, stm32_sai_ids); + +static struct platform_driver stm32_sai_driver = { + .driver = { + .name = "st,stm32-sai", + .of_match_table = stm32_sai_ids, + }, + .probe = stm32_sai_probe, + .remove = stm32_sai_remove, +}; + +module_platform_driver(stm32_sai_driver); + +MODULE_DESCRIPTION("STM32 Soc SAI Interface"); +MODULE_AUTHOR("Olivier Moysan, "); +MODULE_ALIAS("platform:st,stm32-sai"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/stm/stm32_sai.h b/sound/soc/stm/stm32_sai.h new file mode 100644 index 0000000000000000000000000000000000000000..a801fda5066ffe6960e716b2b575bb2b547ef68d --- /dev/null +++ b/sound/soc/stm/stm32_sai.h @@ -0,0 +1,200 @@ +/* + * STM32 ALSA SoC Digital Audio Interface (SAI) driver. + * + * Copyright (C) 2016, STMicroelectronics - All Rights Reserved + * Author(s): Olivier Moysan for STMicroelectronics. + * + * License terms: GPL V2.0. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + */ + +/******************** SAI Register Map **************************************/ + +/* common register */ +#define STM_SAI_GCR 0x00 + +/* Sub-block A&B registers offsets, relative to A&B sub-block addresses */ +#define STM_SAI_CR1_REGX 0x00 /* A offset: 0x04. B offset: 0x24 */ +#define STM_SAI_CR2_REGX 0x04 +#define STM_SAI_FRCR_REGX 0x08 +#define STM_SAI_SLOTR_REGX 0x0C +#define STM_SAI_IMR_REGX 0x10 +#define STM_SAI_SR_REGX 0x14 +#define STM_SAI_CLRFR_REGX 0x18 +#define STM_SAI_DR_REGX 0x1C + +/******************** Bit definition for SAI_GCR register *******************/ +#define SAI_GCR_SYNCIN_SHIFT 0 +#define SAI_GCR_SYNCIN_MASK GENMASK(1, SAI_GCR_SYNCIN_SHIFT) +#define SAI_GCR_SYNCIN_SET(x) ((x) << SAI_GCR_SYNCIN_SHIFT) + +#define SAI_GCR_SYNCOUT_SHIFT 4 +#define SAI_GCR_SYNCOUT_MASK GENMASK(5, SAI_GCR_SYNCOUT_SHIFT) +#define SAI_GCR_SYNCOUT_SET(x) ((x) << SAI_GCR_SYNCOUT_SHIFT) + +/******************* Bit definition for SAI_XCR1 register *******************/ +#define SAI_XCR1_RX_TX_SHIFT 0 +#define SAI_XCR1_RX_TX BIT(SAI_XCR1_RX_TX_SHIFT) +#define SAI_XCR1_SLAVE_SHIFT 1 +#define SAI_XCR1_SLAVE BIT(SAI_XCR1_SLAVE_SHIFT) + +#define SAI_XCR1_PRTCFG_SHIFT 2 +#define SAI_XCR1_PRTCFG_MASK GENMASK(3, SAI_XCR1_PRTCFG_SHIFT) +#define SAI_XCR1_PRTCFG_SET(x) ((x) << SAI_XCR1_PRTCFG_SHIFT) + +#define SAI_XCR1_DS_SHIFT 5 +#define SAI_XCR1_DS_MASK GENMASK(7, SAI_XCR1_DS_SHIFT) +#define SAI_XCR1_DS_SET(x) ((x) << SAI_XCR1_DS_SHIFT) + +#define SAI_XCR1_LSBFIRST_SHIFT 8 +#define SAI_XCR1_LSBFIRST BIT(SAI_XCR1_LSBFIRST_SHIFT) +#define SAI_XCR1_CKSTR_SHIFT 9 +#define SAI_XCR1_CKSTR BIT(SAI_XCR1_CKSTR_SHIFT) + +#define SAI_XCR1_SYNCEN_SHIFT 10 +#define SAI_XCR1_SYNCEN_MASK GENMASK(11, SAI_XCR1_SYNCEN_SHIFT) +#define SAI_XCR1_SYNCEN_SET(x) ((x) << SAI_XCR1_SYNCEN_SHIFT) + +#define SAI_XCR1_MONO_SHIFT 12 +#define SAI_XCR1_MONO BIT(SAI_XCR1_MONO_SHIFT) +#define SAI_XCR1_OUTDRIV_SHIFT 13 +#define SAI_XCR1_OUTDRIV BIT(SAI_XCR1_OUTDRIV_SHIFT) +#define SAI_XCR1_SAIEN_SHIFT 16 +#define SAI_XCR1_SAIEN BIT(SAI_XCR1_SAIEN_SHIFT) +#define SAI_XCR1_DMAEN_SHIFT 17 +#define SAI_XCR1_DMAEN BIT(SAI_XCR1_DMAEN_SHIFT) +#define SAI_XCR1_NODIV_SHIFT 19 +#define SAI_XCR1_NODIV BIT(SAI_XCR1_NODIV_SHIFT) + +#define SAI_XCR1_MCKDIV_SHIFT 20 +#define SAI_XCR1_MCKDIV_WIDTH 4 +#define SAI_XCR1_MCKDIV_MASK GENMASK(24, SAI_XCR1_MCKDIV_SHIFT) +#define SAI_XCR1_MCKDIV_SET(x) ((x) << SAI_XCR1_MCKDIV_SHIFT) +#define SAI_XCR1_MCKDIV_MAX ((1 << SAI_XCR1_MCKDIV_WIDTH) - 1) + +#define SAI_XCR1_OSR_SHIFT 26 +#define SAI_XCR1_OSR BIT(SAI_XCR1_OSR_SHIFT) + +/******************* Bit definition for SAI_XCR2 register *******************/ +#define SAI_XCR2_FTH_SHIFT 0 +#define SAI_XCR2_FTH_MASK GENMASK(2, SAI_XCR2_FTH_SHIFT) +#define SAI_XCR2_FTH_SET(x) ((x) << SAI_XCR2_FTH_SHIFT) + +#define SAI_XCR2_FFLUSH_SHIFT 3 +#define SAI_XCR2_FFLUSH BIT(SAI_XCR2_FFLUSH_SHIFT) +#define SAI_XCR2_TRIS_SHIFT 4 +#define SAI_XCR2_TRIS BIT(SAI_XCR2_TRIS_SHIFT) +#define SAI_XCR2_MUTE_SHIFT 5 +#define SAI_XCR2_MUTE BIT(SAI_XCR2_MUTE_SHIFT) +#define SAI_XCR2_MUTEVAL_SHIFT 6 +#define SAI_XCR2_MUTEVAL BIT(SAI_XCR2_MUTEVAL_SHIFT) + +#define SAI_XCR2_MUTECNT_SHIFT 7 +#define SAI_XCR2_MUTECNT_MASK GENMASK(12, SAI_XCR2_MUTECNT_SHIFT) +#define SAI_XCR2_MUTECNT_SET(x) ((x) << SAI_XCR2_MUTECNT_SHIFT) + +#define SAI_XCR2_CPL_SHIFT 13 +#define SAI_XCR2_CPL BIT(SAI_XCR2_CPL_SHIFT) + +#define SAI_XCR2_COMP_SHIFT 14 +#define SAI_XCR2_COMP_MASK GENMASK(15, SAI_XCR2_COMP_SHIFT) +#define SAI_XCR2_COMP_SET(x) ((x) << SAI_XCR2_COMP_SHIFT) + +/****************** Bit definition for SAI_XFRCR register *******************/ +#define SAI_XFRCR_FRL_SHIFT 0 +#define SAI_XFRCR_FRL_MASK GENMASK(7, SAI_XFRCR_FRL_SHIFT) +#define SAI_XFRCR_FRL_SET(x) ((x) << SAI_XFRCR_FRL_SHIFT) + +#define SAI_XFRCR_FSALL_SHIFT 8 +#define SAI_XFRCR_FSALL_MASK GENMASK(14, SAI_XFRCR_FSALL_SHIFT) +#define SAI_XFRCR_FSALL_SET(x) ((x) << SAI_XFRCR_FSALL_SHIFT) + +#define SAI_XFRCR_FSDEF_SHIFT 16 +#define SAI_XFRCR_FSDEF BIT(SAI_XFRCR_FSDEF_SHIFT) +#define SAI_XFRCR_FSPOL_SHIFT 17 +#define SAI_XFRCR_FSPOL BIT(SAI_XFRCR_FSPOL_SHIFT) +#define SAI_XFRCR_FSOFF_SHIFT 18 +#define SAI_XFRCR_FSOFF BIT(SAI_XFRCR_FSOFF_SHIFT) + +/****************** Bit definition for SAI_XSLOTR register ******************/ + +#define SAI_XSLOTR_FBOFF_SHIFT 0 +#define SAI_XSLOTR_FBOFF_MASK GENMASK(4, SAI_XSLOTR_FBOFF_SHIFT) +#define SAI_XSLOTR_FBOFF_SET(x) ((x) << SAI_XSLOTR_FBOFF_SHIFT) + +#define SAI_XSLOTR_SLOTSZ_SHIFT 6 +#define SAI_XSLOTR_SLOTSZ_MASK GENMASK(7, SAI_XSLOTR_SLOTSZ_SHIFT) +#define SAI_XSLOTR_SLOTSZ_SET(x) ((x) << SAI_XSLOTR_SLOTSZ_SHIFT) + +#define SAI_XSLOTR_NBSLOT_SHIFT 8 +#define SAI_XSLOTR_NBSLOT_MASK GENMASK(11, SAI_XSLOTR_NBSLOT_SHIFT) +#define SAI_XSLOTR_NBSLOT_SET(x) ((x) << SAI_XSLOTR_NBSLOT_SHIFT) + +#define SAI_XSLOTR_SLOTEN_SHIFT 16 +#define SAI_XSLOTR_SLOTEN_WIDTH 16 +#define SAI_XSLOTR_SLOTEN_MASK GENMASK(31, SAI_XSLOTR_SLOTEN_SHIFT) +#define SAI_XSLOTR_SLOTEN_SET(x) ((x) << SAI_XSLOTR_SLOTEN_SHIFT) + +/******************* Bit definition for SAI_XIMR register *******************/ +#define SAI_XIMR_OVRUDRIE BIT(0) +#define SAI_XIMR_MUTEDETIE BIT(1) +#define SAI_XIMR_WCKCFGIE BIT(2) +#define SAI_XIMR_FREQIE BIT(3) +#define SAI_XIMR_CNRDYIE BIT(4) +#define SAI_XIMR_AFSDETIE BIT(5) +#define SAI_XIMR_LFSDETIE BIT(6) + +#define SAI_XIMR_SHIFT 0 +#define SAI_XIMR_MASK GENMASK(6, SAI_XIMR_SHIFT) + +/******************** Bit definition for SAI_XSR register *******************/ +#define SAI_XSR_OVRUDR BIT(0) +#define SAI_XSR_MUTEDET BIT(1) +#define SAI_XSR_WCKCFG BIT(2) +#define SAI_XSR_FREQ BIT(3) +#define SAI_XSR_CNRDY BIT(4) +#define SAI_XSR_AFSDET BIT(5) +#define SAI_XSR_LFSDET BIT(6) + +#define SAI_XSR_SHIFT 0 +#define SAI_XSR_MASK GENMASK(6, SAI_XSR_SHIFT) + +/****************** Bit definition for SAI_XCLRFR register ******************/ +#define SAI_XCLRFR_COVRUDR BIT(0) +#define SAI_XCLRFR_CMUTEDET BIT(1) +#define SAI_XCLRFR_CWCKCFG BIT(2) +#define SAI_XCLRFR_CFREQ BIT(3) +#define SAI_XCLRFR_CCNRDY BIT(4) +#define SAI_XCLRFR_CAFSDET BIT(5) +#define SAI_XCLRFR_CLFSDET BIT(6) + +#define SAI_XCLRFR_SHIFT 0 +#define SAI_XCLRFR_MASK GENMASK(6, SAI_XCLRFR_SHIFT) + +enum stm32_sai_version { + SAI_STM32F4 +}; + +/** + * struct stm32_sai_data - private data of SAI instance driver + * @pdev: device data pointer + * @clk_x8k: SAI parent clock for sampling frequencies multiple of 8kHz + * @clk_x11k: SAI parent clock for sampling frequencies multiple of 11kHz + * @version: SOC version + * @irq: SAI interrupt line + */ +struct stm32_sai_data { + struct platform_device *pdev; + struct clk *clk_x8k; + struct clk *clk_x11k; + int version; + int irq; +}; diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c new file mode 100644 index 0000000000000000000000000000000000000000..ae4706ca265be06c2b14a9786b8432083048268e --- /dev/null +++ b/sound/soc/stm/stm32_sai_sub.c @@ -0,0 +1,884 @@ +/* + * STM32 ALSA SoC Digital Audio Interface (SAI) driver. + * + * Copyright (C) 2016, STMicroelectronics - All Rights Reserved + * Author(s): Olivier Moysan for STMicroelectronics. + * + * License terms: GPL V2.0. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "stm32_sai.h" + +#define SAI_FREE_PROTOCOL 0x0 + +#define SAI_SLOT_SIZE_AUTO 0x0 +#define SAI_SLOT_SIZE_16 0x1 +#define SAI_SLOT_SIZE_32 0x2 + +#define SAI_DATASIZE_8 0x2 +#define SAI_DATASIZE_10 0x3 +#define SAI_DATASIZE_16 0x4 +#define SAI_DATASIZE_20 0x5 +#define SAI_DATASIZE_24 0x6 +#define SAI_DATASIZE_32 0x7 + +#define STM_SAI_FIFO_SIZE 8 +#define STM_SAI_DAI_NAME_SIZE 15 + +#define STM_SAI_IS_PLAYBACK(ip) ((ip)->dir == SNDRV_PCM_STREAM_PLAYBACK) +#define STM_SAI_IS_CAPTURE(ip) ((ip)->dir == SNDRV_PCM_STREAM_CAPTURE) + +#define STM_SAI_A_ID 0x0 +#define STM_SAI_B_ID 0x1 + +#define STM_SAI_BLOCK_NAME(x) (((x)->id == STM_SAI_A_ID) ? "A" : "B") + +/** + * struct stm32_sai_sub_data - private data of SAI sub block (block A or B) + * @pdev: device data pointer + * @regmap: SAI register map pointer + * @dma_params: dma configuration data for rx or tx channel + * @cpu_dai_drv: DAI driver data pointer + * @cpu_dai: DAI runtime data pointer + * @substream: PCM substream data pointer + * @pdata: SAI block parent data pointer + * @sai_ck: kernel clock feeding the SAI clock generator + * @phys_addr: SAI registers physical base address + * @mclk_rate: SAI block master clock frequency (Hz). set at init + * @id: SAI sub block id corresponding to sub-block A or B + * @dir: SAI block direction (playback or capture). set at init + * @master: SAI block mode flag. (true=master, false=slave) set at init + * @fmt: SAI block format. relevant only for custom protocols. set at init + * @sync: SAI block synchronization mode. (none, internal or external) + * @fs_length: frame synchronization length. depends on protocol settings + * @slots: rx or tx slot number + * @slot_width: rx or tx slot width in bits + * @slot_mask: rx or tx active slots mask. set at init or at runtime + * @data_size: PCM data width. corresponds to PCM substream width. + */ +struct stm32_sai_sub_data { + struct platform_device *pdev; + struct regmap *regmap; + struct snd_dmaengine_dai_dma_data dma_params; + struct snd_soc_dai_driver *cpu_dai_drv; + struct snd_soc_dai *cpu_dai; + struct snd_pcm_substream *substream; + struct stm32_sai_data *pdata; + struct clk *sai_ck; + dma_addr_t phys_addr; + unsigned int mclk_rate; + unsigned int id; + int dir; + bool master; + int fmt; + int sync; + int fs_length; + int slots; + int slot_width; + int slot_mask; + int data_size; +}; + +enum stm32_sai_fifo_th { + STM_SAI_FIFO_TH_EMPTY, + STM_SAI_FIFO_TH_QUARTER, + STM_SAI_FIFO_TH_HALF, + STM_SAI_FIFO_TH_3_QUARTER, + STM_SAI_FIFO_TH_FULL, +}; + +static bool stm32_sai_sub_readable_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case STM_SAI_CR1_REGX: + case STM_SAI_CR2_REGX: + case STM_SAI_FRCR_REGX: + case STM_SAI_SLOTR_REGX: + case STM_SAI_IMR_REGX: + case STM_SAI_SR_REGX: + case STM_SAI_CLRFR_REGX: + case STM_SAI_DR_REGX: + return true; + default: + return false; + } +} + +static bool stm32_sai_sub_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case STM_SAI_DR_REGX: + return true; + default: + return false; + } +} + +static bool stm32_sai_sub_writeable_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case STM_SAI_CR1_REGX: + case STM_SAI_CR2_REGX: + case STM_SAI_FRCR_REGX: + case STM_SAI_SLOTR_REGX: + case STM_SAI_IMR_REGX: + case STM_SAI_SR_REGX: + case STM_SAI_CLRFR_REGX: + case STM_SAI_DR_REGX: + return true; + default: + return false; + } +} + +static const struct regmap_config stm32_sai_sub_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = STM_SAI_DR_REGX, + .readable_reg = stm32_sai_sub_readable_reg, + .volatile_reg = stm32_sai_sub_volatile_reg, + .writeable_reg = stm32_sai_sub_writeable_reg, + .fast_io = true, +}; + +static irqreturn_t stm32_sai_isr(int irq, void *devid) +{ + struct stm32_sai_sub_data *sai = (struct stm32_sai_sub_data *)devid; + struct snd_pcm_substream *substream = sai->substream; + struct platform_device *pdev = sai->pdev; + unsigned int sr, imr, flags; + snd_pcm_state_t status = SNDRV_PCM_STATE_RUNNING; + + regmap_read(sai->regmap, STM_SAI_IMR_REGX, &imr); + regmap_read(sai->regmap, STM_SAI_SR_REGX, &sr); + + flags = sr & imr; + if (!flags) + return IRQ_NONE; + + regmap_update_bits(sai->regmap, STM_SAI_CLRFR_REGX, SAI_XCLRFR_MASK, + SAI_XCLRFR_MASK); + + if (flags & SAI_XIMR_OVRUDRIE) { + dev_err(&pdev->dev, "IT %s\n", + STM_SAI_IS_PLAYBACK(sai) ? "underrun" : "overrun"); + status = SNDRV_PCM_STATE_XRUN; + } + + if (flags & SAI_XIMR_MUTEDETIE) + dev_dbg(&pdev->dev, "IT mute detected\n"); + + if (flags & SAI_XIMR_WCKCFGIE) { + dev_err(&pdev->dev, "IT wrong clock configuration\n"); + status = SNDRV_PCM_STATE_DISCONNECTED; + } + + if (flags & SAI_XIMR_CNRDYIE) + dev_warn(&pdev->dev, "IT Codec not ready\n"); + + if (flags & SAI_XIMR_AFSDETIE) { + dev_warn(&pdev->dev, "IT Anticipated frame synchro\n"); + status = SNDRV_PCM_STATE_XRUN; + } + + if (flags & SAI_XIMR_LFSDETIE) { + dev_warn(&pdev->dev, "IT Late frame synchro\n"); + status = SNDRV_PCM_STATE_XRUN; + } + + if (status != SNDRV_PCM_STATE_RUNNING) { + snd_pcm_stream_lock(substream); + snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); + } + + return IRQ_HANDLED; +} + +static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai, + int clk_id, unsigned int freq, int dir) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + + if ((dir == SND_SOC_CLOCK_OUT) && sai->master) { + sai->mclk_rate = freq; + dev_dbg(cpu_dai->dev, "SAI MCLK frequency is %uHz\n", freq); + } + + return 0; +} + +static int stm32_sai_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, + u32 rx_mask, int slots, int slot_width) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int slotr, slotr_mask, slot_size; + + dev_dbg(cpu_dai->dev, "masks tx/rx:%#x/%#x, slots:%d, width:%d\n", + tx_mask, rx_mask, slots, slot_width); + + switch (slot_width) { + case 16: + slot_size = SAI_SLOT_SIZE_16; + break; + case 32: + slot_size = SAI_SLOT_SIZE_32; + break; + default: + slot_size = SAI_SLOT_SIZE_AUTO; + break; + } + + slotr = SAI_XSLOTR_SLOTSZ_SET(slot_size) | + SAI_XSLOTR_NBSLOT_SET(slots - 1); + slotr_mask = SAI_XSLOTR_SLOTSZ_MASK | SAI_XSLOTR_NBSLOT_MASK; + + /* tx/rx mask set in machine init, if slot number defined in DT */ + if (STM_SAI_IS_PLAYBACK(sai)) { + sai->slot_mask = tx_mask; + slotr |= SAI_XSLOTR_SLOTEN_SET(tx_mask); + } + + if (STM_SAI_IS_CAPTURE(sai)) { + sai->slot_mask = rx_mask; + slotr |= SAI_XSLOTR_SLOTEN_SET(rx_mask); + } + + slotr_mask |= SAI_XSLOTR_SLOTEN_MASK; + + regmap_update_bits(sai->regmap, STM_SAI_SLOTR_REGX, slotr_mask, slotr); + + sai->slot_width = slot_width; + sai->slots = slots; + + return 0; +} + +static int stm32_sai_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int cr1 = 0, frcr = 0; + int cr1_mask = 0, frcr_mask = 0; + int ret; + + dev_dbg(cpu_dai->dev, "fmt %x\n", fmt); + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + /* SCK active high for all protocols */ + case SND_SOC_DAIFMT_I2S: + cr1 |= SAI_XCR1_CKSTR; + frcr |= SAI_XFRCR_FSOFF | SAI_XFRCR_FSDEF; + break; + /* Left justified */ + case SND_SOC_DAIFMT_MSB: + frcr |= SAI_XFRCR_FSPOL | SAI_XFRCR_FSDEF; + break; + /* Right justified */ + case SND_SOC_DAIFMT_LSB: + frcr |= SAI_XFRCR_FSPOL | SAI_XFRCR_FSDEF; + break; + case SND_SOC_DAIFMT_DSP_A: + frcr |= SAI_XFRCR_FSPOL | SAI_XFRCR_FSOFF; + break; + case SND_SOC_DAIFMT_DSP_B: + frcr |= SAI_XFRCR_FSPOL; + break; + default: + dev_err(cpu_dai->dev, "Unsupported protocol %#x\n", + fmt & SND_SOC_DAIFMT_FORMAT_MASK); + return -EINVAL; + } + + cr1_mask |= SAI_XCR1_PRTCFG_MASK | SAI_XCR1_CKSTR; + frcr_mask |= SAI_XFRCR_FSPOL | SAI_XFRCR_FSOFF | + SAI_XFRCR_FSDEF; + + /* DAI clock strobing. Invert setting previously set */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + cr1 ^= SAI_XCR1_CKSTR; + break; + case SND_SOC_DAIFMT_NB_IF: + frcr ^= SAI_XFRCR_FSPOL; + break; + case SND_SOC_DAIFMT_IB_IF: + /* Invert fs & sck */ + cr1 ^= SAI_XCR1_CKSTR; + frcr ^= SAI_XFRCR_FSPOL; + break; + default: + dev_err(cpu_dai->dev, "Unsupported strobing %#x\n", + fmt & SND_SOC_DAIFMT_INV_MASK); + return -EINVAL; + } + cr1_mask |= SAI_XCR1_CKSTR; + frcr_mask |= SAI_XFRCR_FSPOL; + + regmap_update_bits(sai->regmap, STM_SAI_FRCR_REGX, frcr_mask, frcr); + + /* DAI clock master masks */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + /* codec is master */ + cr1 |= SAI_XCR1_SLAVE; + sai->master = false; + break; + case SND_SOC_DAIFMT_CBS_CFS: + sai->master = true; + break; + default: + dev_err(cpu_dai->dev, "Unsupported mode %#x\n", + fmt & SND_SOC_DAIFMT_MASTER_MASK); + return -EINVAL; + } + cr1_mask |= SAI_XCR1_SLAVE; + + ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, cr1_mask, cr1); + if (ret < 0) { + dev_err(cpu_dai->dev, "Failed to update CR1 register\n"); + return ret; + } + + sai->fmt = fmt; + + return 0; +} + +static int stm32_sai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int imr, cr2, ret; + + sai->substream = substream; + + ret = clk_prepare_enable(sai->sai_ck); + if (ret < 0) { + dev_err(cpu_dai->dev, "failed to enable clock: %d\n", ret); + return ret; + } + + /* Enable ITs */ + regmap_update_bits(sai->regmap, STM_SAI_SR_REGX, + SAI_XSR_MASK, (unsigned int)~SAI_XSR_MASK); + + regmap_update_bits(sai->regmap, STM_SAI_CLRFR_REGX, + SAI_XCLRFR_MASK, SAI_XCLRFR_MASK); + + imr = SAI_XIMR_OVRUDRIE; + if (STM_SAI_IS_CAPTURE(sai)) { + regmap_read(sai->regmap, STM_SAI_CR2_REGX, &cr2); + if (cr2 & SAI_XCR2_MUTECNT_MASK) + imr |= SAI_XIMR_MUTEDETIE; + } + + if (sai->master) + imr |= SAI_XIMR_WCKCFGIE; + else + imr |= SAI_XIMR_AFSDETIE | SAI_XIMR_LFSDETIE; + + regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, + SAI_XIMR_MASK, imr); + + return 0; +} + +static int stm32_sai_set_config(struct snd_soc_dai *cpu_dai, + struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int cr1, cr1_mask, ret; + int fth = STM_SAI_FIFO_TH_HALF; + + /* FIFO config */ + regmap_update_bits(sai->regmap, STM_SAI_CR2_REGX, + SAI_XCR2_FFLUSH | SAI_XCR2_FTH_MASK, + SAI_XCR2_FFLUSH | SAI_XCR2_FTH_SET(fth)); + + /* Mode, data format and channel config */ + cr1 = SAI_XCR1_PRTCFG_SET(SAI_FREE_PROTOCOL); + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S8: + cr1 |= SAI_XCR1_DS_SET(SAI_DATASIZE_8); + break; + case SNDRV_PCM_FORMAT_S16_LE: + cr1 |= SAI_XCR1_DS_SET(SAI_DATASIZE_16); + break; + case SNDRV_PCM_FORMAT_S32_LE: + cr1 |= SAI_XCR1_DS_SET(SAI_DATASIZE_32); + break; + default: + dev_err(cpu_dai->dev, "Data format not supported"); + return -EINVAL; + } + cr1_mask = SAI_XCR1_DS_MASK | SAI_XCR1_PRTCFG_MASK; + + cr1_mask |= SAI_XCR1_RX_TX; + if (STM_SAI_IS_CAPTURE(sai)) + cr1 |= SAI_XCR1_RX_TX; + + cr1_mask |= SAI_XCR1_MONO; + if ((sai->slots == 2) && (params_channels(params) == 1)) + cr1 |= SAI_XCR1_MONO; + + ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, cr1_mask, cr1); + if (ret < 0) { + dev_err(cpu_dai->dev, "Failed to update CR1 register\n"); + return ret; + } + + /* DMA config */ + sai->dma_params.maxburst = STM_SAI_FIFO_SIZE * fth / sizeof(u32); + snd_soc_dai_set_dma_data(cpu_dai, substream, (void *)&sai->dma_params); + + return 0; +} + +static int stm32_sai_set_slots(struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int slotr, slot_sz; + + regmap_read(sai->regmap, STM_SAI_SLOTR_REGX, &slotr); + + /* + * If SLOTSZ is set to auto in SLOTR, align slot width on data size + * By default slot width = data size, if not forced from DT + */ + slot_sz = slotr & SAI_XSLOTR_SLOTSZ_MASK; + if (slot_sz == SAI_XSLOTR_SLOTSZ_SET(SAI_SLOT_SIZE_AUTO)) + sai->slot_width = sai->data_size; + + if (sai->slot_width < sai->data_size) { + dev_err(cpu_dai->dev, + "Data size %d larger than slot width\n", + sai->data_size); + return -EINVAL; + } + + /* Slot number is set to 2, if not specified in DT */ + if (!sai->slots) + sai->slots = 2; + + /* The number of slots in the audio frame is equal to NBSLOT[3:0] + 1*/ + regmap_update_bits(sai->regmap, STM_SAI_SLOTR_REGX, + SAI_XSLOTR_NBSLOT_MASK, + SAI_XSLOTR_NBSLOT_SET((sai->slots - 1))); + + /* Set default slots mask if not already set from DT */ + if (!(slotr & SAI_XSLOTR_SLOTEN_MASK)) { + sai->slot_mask = (1 << sai->slots) - 1; + regmap_update_bits(sai->regmap, + STM_SAI_SLOTR_REGX, SAI_XSLOTR_SLOTEN_MASK, + SAI_XSLOTR_SLOTEN_SET(sai->slot_mask)); + } + + dev_dbg(cpu_dai->dev, "slots %d, slot width %d\n", + sai->slots, sai->slot_width); + + return 0; +} + +static void stm32_sai_set_frame(struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int fs_active, offset, format; + int frcr, frcr_mask; + + format = sai->fmt & SND_SOC_DAIFMT_FORMAT_MASK; + sai->fs_length = sai->slot_width * sai->slots; + + fs_active = sai->fs_length / 2; + if ((format == SND_SOC_DAIFMT_DSP_A) || + (format == SND_SOC_DAIFMT_DSP_B)) + fs_active = 1; + + frcr = SAI_XFRCR_FRL_SET((sai->fs_length - 1)); + frcr |= SAI_XFRCR_FSALL_SET((fs_active - 1)); + frcr_mask = SAI_XFRCR_FRL_MASK | SAI_XFRCR_FSALL_MASK; + + dev_dbg(cpu_dai->dev, "frame length %d, frame active %d\n", + sai->fs_length, fs_active); + + regmap_update_bits(sai->regmap, STM_SAI_FRCR_REGX, frcr_mask, frcr); + + if ((sai->fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_LSB) { + offset = sai->slot_width - sai->data_size; + + regmap_update_bits(sai->regmap, STM_SAI_SLOTR_REGX, + SAI_XSLOTR_FBOFF_MASK, + SAI_XSLOTR_FBOFF_SET(offset)); + } +} + +static int stm32_sai_configure_clock(struct snd_soc_dai *cpu_dai, + struct snd_pcm_hw_params *params) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int cr1, mask, div = 0; + int sai_clk_rate, ret; + + if (!sai->mclk_rate) { + dev_err(cpu_dai->dev, "Mclk rate is null\n"); + return -EINVAL; + } + + if (!(params_rate(params) % 11025)) + clk_set_parent(sai->sai_ck, sai->pdata->clk_x11k); + else + clk_set_parent(sai->sai_ck, sai->pdata->clk_x8k); + sai_clk_rate = clk_get_rate(sai->sai_ck); + + /* + * mclk_rate = 256 * fs + * MCKDIV = 0 if sai_ck < 3/2 * mclk_rate + * MCKDIV = sai_ck / (2 * mclk_rate) otherwise + */ + if (2 * sai_clk_rate >= 3 * sai->mclk_rate) + div = DIV_ROUND_CLOSEST(sai_clk_rate, 2 * sai->mclk_rate); + + if (div > SAI_XCR1_MCKDIV_MAX) { + dev_err(cpu_dai->dev, "Divider %d out of range\n", div); + return -EINVAL; + } + dev_dbg(cpu_dai->dev, "SAI clock %d, divider %d\n", sai_clk_rate, div); + + mask = SAI_XCR1_MCKDIV_MASK; + cr1 = SAI_XCR1_MCKDIV_SET(div); + ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, mask, cr1); + if (ret < 0) { + dev_err(cpu_dai->dev, "Failed to update CR1 register\n"); + return ret; + } + + return 0; +} + +static int stm32_sai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int ret; + + sai->data_size = params_width(params); + + ret = stm32_sai_set_slots(cpu_dai); + if (ret < 0) + return ret; + stm32_sai_set_frame(cpu_dai); + + ret = stm32_sai_set_config(cpu_dai, substream, params); + if (ret) + return ret; + + if (sai->master) + ret = stm32_sai_configure_clock(cpu_dai, params); + + return ret; +} + +static int stm32_sai_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + int ret; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + dev_dbg(cpu_dai->dev, "Enable DMA and SAI\n"); + + regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, + SAI_XCR1_DMAEN, SAI_XCR1_DMAEN); + + /* Enable SAI */ + ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, + SAI_XCR1_SAIEN, SAI_XCR1_SAIEN); + if (ret < 0) + dev_err(cpu_dai->dev, "Failed to update CR1 register\n"); + break; + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_STOP: + dev_dbg(cpu_dai->dev, "Disable DMA and SAI\n"); + + regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, + SAI_XCR1_DMAEN, + (unsigned int)~SAI_XCR1_DMAEN); + + ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, + SAI_XCR1_SAIEN, + (unsigned int)~SAI_XCR1_SAIEN); + if (ret < 0) + dev_err(cpu_dai->dev, "Failed to update CR1 register\n"); + break; + default: + return -EINVAL; + } + + return ret; +} + +static void stm32_sai_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = snd_soc_dai_get_drvdata(cpu_dai); + + regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0); + + clk_disable_unprepare(sai->sai_ck); + sai->substream = NULL; +} + +static int stm32_sai_dai_probe(struct snd_soc_dai *cpu_dai) +{ + struct stm32_sai_sub_data *sai = dev_get_drvdata(cpu_dai->dev); + + sai->dma_params.addr = (dma_addr_t)(sai->phys_addr + STM_SAI_DR_REGX); + sai->dma_params.maxburst = 1; + /* Buswidth will be set by framework at runtime */ + sai->dma_params.addr_width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + + if (STM_SAI_IS_PLAYBACK(sai)) + snd_soc_dai_init_dma_data(cpu_dai, &sai->dma_params, NULL); + else + snd_soc_dai_init_dma_data(cpu_dai, NULL, &sai->dma_params); + + return 0; +} + +static const struct snd_soc_dai_ops stm32_sai_pcm_dai_ops = { + .set_sysclk = stm32_sai_set_sysclk, + .set_fmt = stm32_sai_set_dai_fmt, + .set_tdm_slot = stm32_sai_set_dai_tdm_slot, + .startup = stm32_sai_startup, + .hw_params = stm32_sai_hw_params, + .trigger = stm32_sai_trigger, + .shutdown = stm32_sai_shutdown, +}; + +static const struct snd_pcm_hardware stm32_sai_pcm_hw = { + .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP, + .buffer_bytes_max = 8 * PAGE_SIZE, + .period_bytes_min = 1024, /* 5ms at 48kHz */ + .period_bytes_max = PAGE_SIZE, + .periods_min = 2, + .periods_max = 8, +}; + +static struct snd_soc_dai_driver stm32_sai_playback_dai[] = { +{ + .probe = stm32_sai_dai_probe, + .id = 1, /* avoid call to fmt_single_name() */ + .playback = { + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + /* DMA does not support 24 bits transfers */ + .formats = + SNDRV_PCM_FMTBIT_S8 | + SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S32_LE, + }, + .ops = &stm32_sai_pcm_dai_ops, + } +}; + +static struct snd_soc_dai_driver stm32_sai_capture_dai[] = { +{ + .probe = stm32_sai_dai_probe, + .id = 1, /* avoid call to fmt_single_name() */ + .capture = { + .channels_min = 1, + .channels_max = 2, + .rate_min = 8000, + .rate_max = 192000, + .rates = SNDRV_PCM_RATE_CONTINUOUS, + /* DMA does not support 24 bits transfers */ + .formats = + SNDRV_PCM_FMTBIT_S8 | + SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S32_LE, + }, + .ops = &stm32_sai_pcm_dai_ops, + } +}; + +static const struct snd_dmaengine_pcm_config stm32_sai_pcm_config = { + .pcm_hardware = &stm32_sai_pcm_hw, + .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, +}; + +static const struct snd_soc_component_driver stm32_component = { + .name = "stm32-sai", +}; + +static const struct of_device_id stm32_sai_sub_ids[] = { + { .compatible = "st,stm32-sai-sub-a", + .data = (void *)STM_SAI_A_ID}, + { .compatible = "st,stm32-sai-sub-b", + .data = (void *)STM_SAI_B_ID}, + {} +}; +MODULE_DEVICE_TABLE(of, stm32_sai_sub_ids); + +static int stm32_sai_sub_parse_of(struct platform_device *pdev, + struct stm32_sai_sub_data *sai) +{ + struct device_node *np = pdev->dev.of_node; + struct resource *res; + void __iomem *base; + + if (!np) + return -ENODEV; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + dev_err(&pdev->dev, "res %pr\n", res); + + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + sai->phys_addr = res->start; + sai->regmap = devm_regmap_init_mmio(&pdev->dev, base, + &stm32_sai_sub_regmap_config); + + /* Get direction property */ + if (of_property_match_string(np, "dma-names", "tx") >= 0) { + sai->dir = SNDRV_PCM_STREAM_PLAYBACK; + } else if (of_property_match_string(np, "dma-names", "rx") >= 0) { + sai->dir = SNDRV_PCM_STREAM_CAPTURE; + } else { + dev_err(&pdev->dev, "Unsupported direction\n"); + return -EINVAL; + } + + sai->sai_ck = devm_clk_get(&pdev->dev, "sai_ck"); + if (IS_ERR(sai->sai_ck)) { + dev_err(&pdev->dev, "missing kernel clock sai_ck\n"); + return PTR_ERR(sai->sai_ck); + } + + return 0; +} + +static int stm32_sai_sub_dais_init(struct platform_device *pdev, + struct stm32_sai_sub_data *sai) +{ + sai->cpu_dai_drv = devm_kzalloc(&pdev->dev, + sizeof(struct snd_soc_dai_driver), + GFP_KERNEL); + if (!sai->cpu_dai_drv) + return -ENOMEM; + + sai->cpu_dai_drv->name = dev_name(&pdev->dev); + if (STM_SAI_IS_PLAYBACK(sai)) { + memcpy(sai->cpu_dai_drv, &stm32_sai_playback_dai, + sizeof(stm32_sai_playback_dai)); + sai->cpu_dai_drv->playback.stream_name = sai->cpu_dai_drv->name; + } else { + memcpy(sai->cpu_dai_drv, &stm32_sai_capture_dai, + sizeof(stm32_sai_capture_dai)); + sai->cpu_dai_drv->capture.stream_name = sai->cpu_dai_drv->name; + } + + return 0; +} + +static int stm32_sai_sub_probe(struct platform_device *pdev) +{ + struct stm32_sai_sub_data *sai; + const struct of_device_id *of_id; + int ret; + + sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL); + if (!sai) + return -ENOMEM; + + of_id = of_match_device(stm32_sai_sub_ids, &pdev->dev); + if (!of_id) + return -EINVAL; + sai->id = (uintptr_t)of_id->data; + + sai->pdev = pdev; + platform_set_drvdata(pdev, sai); + + sai->pdata = dev_get_drvdata(pdev->dev.parent); + if (!sai->pdata) { + dev_err(&pdev->dev, "Parent device data not available\n"); + return -EINVAL; + } + + ret = stm32_sai_sub_parse_of(pdev, sai); + if (ret) + return ret; + + ret = stm32_sai_sub_dais_init(pdev, sai); + if (ret) + return ret; + + ret = devm_request_irq(&pdev->dev, sai->pdata->irq, stm32_sai_isr, + IRQF_SHARED, dev_name(&pdev->dev), sai); + if (ret) { + dev_err(&pdev->dev, "irq request returned %d\n", ret); + return ret; + } + + ret = devm_snd_soc_register_component(&pdev->dev, &stm32_component, + sai->cpu_dai_drv, 1); + if (ret) + return ret; + + ret = devm_snd_dmaengine_pcm_register(&pdev->dev, + &stm32_sai_pcm_config, 0); + if (ret) { + dev_err(&pdev->dev, "could not register pcm dma\n"); + return ret; + } + + return 0; +} + +static struct platform_driver stm32_sai_sub_driver = { + .driver = { + .name = "st,stm32-sai-sub", + .of_match_table = stm32_sai_sub_ids, + }, + .probe = stm32_sai_sub_probe, +}; + +module_platform_driver(stm32_sai_sub_driver); + +MODULE_DESCRIPTION("STM32 Soc SAI sub-block Interface"); +MODULE_AUTHOR("Olivier Moysan, "); +MODULE_ALIAS("platform:st,stm32-sai-sub"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/sunxi/sun8i-codec-analog.c b/sound/soc/sunxi/sun8i-codec-analog.c index 72331332b72ee94a4f9d5a268bc2ef9b94ad278f..6c17c99c2c8dac2ebf2ceb2670d9d47419942163 100644 --- a/sound/soc/sunxi/sun8i-codec-analog.c +++ b/sound/soc/sunxi/sun8i-codec-analog.c @@ -252,24 +252,15 @@ static const DECLARE_TLV_DB_RANGE(sun8i_codec_mic_gain_scale, ); static const struct snd_kcontrol_new sun8i_codec_common_controls[] = { - /* Mixer pre-gains */ - SOC_SINGLE_TLV("Line In Playback Volume", SUN8I_ADDA_LINEIN_GCTRL, - SUN8I_ADDA_LINEIN_GCTRL_LINEING, - 0x7, 0, sun8i_codec_out_mixer_pregain_scale), + /* Mixer pre-gain */ SOC_SINGLE_TLV("Mic1 Playback Volume", SUN8I_ADDA_MICIN_GCTRL, SUN8I_ADDA_MICIN_GCTRL_MIC1G, 0x7, 0, sun8i_codec_out_mixer_pregain_scale), - SOC_SINGLE_TLV("Mic2 Playback Volume", - SUN8I_ADDA_MICIN_GCTRL, SUN8I_ADDA_MICIN_GCTRL_MIC2G, - 0x7, 0, sun8i_codec_out_mixer_pregain_scale), - /* Microphone Amp boost gains */ + /* Microphone Amp boost gain */ SOC_SINGLE_TLV("Mic1 Boost Volume", SUN8I_ADDA_MIC1G_MICBIAS_CTRL, SUN8I_ADDA_MIC1G_MICBIAS_CTRL_MIC1BOOST, 0x7, 0, sun8i_codec_mic_gain_scale), - SOC_SINGLE_TLV("Mic2 Boost Volume", SUN8I_ADDA_MIC2G_CTRL, - SUN8I_ADDA_MIC2G_CTRL_MIC2BOOST, 0x7, 0, - sun8i_codec_mic_gain_scale), /* ADC */ SOC_SINGLE_TLV("ADC Gain Capture Volume", SUN8I_ADDA_ADC_AP_EN, @@ -295,12 +286,8 @@ static const struct snd_soc_dapm_widget sun8i_codec_common_widgets[] = { * stream widgets at the card level. */ - /* Line In */ - SND_SOC_DAPM_INPUT("LINEIN"), - - /* Microphone inputs */ + /* Microphone input */ SND_SOC_DAPM_INPUT("MIC1"), - SND_SOC_DAPM_INPUT("MIC2"), /* Microphone Bias */ SND_SOC_DAPM_SUPPLY("MBIAS", SUN8I_ADDA_MIC1G_MICBIAS_CTRL, @@ -310,8 +297,6 @@ static const struct snd_soc_dapm_widget sun8i_codec_common_widgets[] = { /* Mic input path */ SND_SOC_DAPM_PGA("Mic1 Amplifier", SUN8I_ADDA_MIC1G_MICBIAS_CTRL, SUN8I_ADDA_MIC1G_MICBIAS_CTRL_MIC1AMPEN, 0, NULL, 0), - SND_SOC_DAPM_PGA("Mic2 Amplifier", SUN8I_ADDA_MIC2G_CTRL, - SUN8I_ADDA_MIC2G_CTRL_MIC2AMPEN, 0, NULL, 0), /* Mixers */ SND_SOC_DAPM_MIXER("Left Mixer", SUN8I_ADDA_DAC_PA_SRC, @@ -335,35 +320,26 @@ static const struct snd_soc_dapm_widget sun8i_codec_common_widgets[] = { static const struct snd_soc_dapm_route sun8i_codec_common_routes[] = { /* Microphone Routes */ { "Mic1 Amplifier", NULL, "MIC1"}, - { "Mic2 Amplifier", NULL, "MIC2"}, /* Left Mixer Routes */ { "Left Mixer", "DAC Playback Switch", "Left DAC" }, { "Left Mixer", "DAC Reversed Playback Switch", "Right DAC" }, - { "Left Mixer", "Line In Playback Switch", "LINEIN" }, { "Left Mixer", "Mic1 Playback Switch", "Mic1 Amplifier" }, - { "Left Mixer", "Mic2 Playback Switch", "Mic2 Amplifier" }, /* Right Mixer Routes */ { "Right Mixer", "DAC Playback Switch", "Right DAC" }, { "Right Mixer", "DAC Reversed Playback Switch", "Left DAC" }, - { "Right Mixer", "Line In Playback Switch", "LINEIN" }, { "Right Mixer", "Mic1 Playback Switch", "Mic1 Amplifier" }, - { "Right Mixer", "Mic2 Playback Switch", "Mic2 Amplifier" }, /* Left ADC Mixer Routes */ { "Left ADC Mixer", "Mixer Capture Switch", "Left Mixer" }, { "Left ADC Mixer", "Mixer Reversed Capture Switch", "Right Mixer" }, - { "Left ADC Mixer", "Line In Capture Switch", "LINEIN" }, { "Left ADC Mixer", "Mic1 Capture Switch", "Mic1 Amplifier" }, - { "Left ADC Mixer", "Mic2 Capture Switch", "Mic2 Amplifier" }, /* Right ADC Mixer Routes */ { "Right ADC Mixer", "Mixer Capture Switch", "Right Mixer" }, { "Right ADC Mixer", "Mixer Reversed Capture Switch", "Left Mixer" }, - { "Right ADC Mixer", "Line In Capture Switch", "LINEIN" }, { "Right ADC Mixer", "Mic1 Capture Switch", "Mic1 Amplifier" }, - { "Right ADC Mixer", "Mic2 Capture Switch", "Mic2 Amplifier" }, /* ADC Routes */ { "Left ADC", NULL, "Left ADC Mixer" }, @@ -498,6 +474,61 @@ static int sun8i_codec_add_hmic(struct snd_soc_component *cmpnt) return ret; } +/* line in specific controls, widgets and rines */ +static const struct snd_kcontrol_new sun8i_codec_linein_controls[] = { + /* Mixer pre-gain */ + SOC_SINGLE_TLV("Line In Playback Volume", SUN8I_ADDA_LINEIN_GCTRL, + SUN8I_ADDA_LINEIN_GCTRL_LINEING, + 0x7, 0, sun8i_codec_out_mixer_pregain_scale), +}; + +static const struct snd_soc_dapm_widget sun8i_codec_linein_widgets[] = { + /* Line input */ + SND_SOC_DAPM_INPUT("LINEIN"), +}; + +static const struct snd_soc_dapm_route sun8i_codec_linein_routes[] = { + { "Left Mixer", "Line In Playback Switch", "LINEIN" }, + + { "Right Mixer", "Line In Playback Switch", "LINEIN" }, + + { "Left ADC Mixer", "Line In Capture Switch", "LINEIN" }, + + { "Right ADC Mixer", "Line In Capture Switch", "LINEIN" }, +}; + +static int sun8i_codec_add_linein(struct snd_soc_component *cmpnt) +{ + struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cmpnt); + struct device *dev = cmpnt->dev; + int ret; + + ret = snd_soc_add_component_controls(cmpnt, + sun8i_codec_linein_controls, + ARRAY_SIZE(sun8i_codec_linein_controls)); + if (ret) { + dev_err(dev, "Failed to add Line In controls: %d\n", ret); + return ret; + } + + ret = snd_soc_dapm_new_controls(dapm, sun8i_codec_linein_widgets, + ARRAY_SIZE(sun8i_codec_linein_widgets)); + if (ret) { + dev_err(dev, "Failed to add Line In DAPM widgets: %d\n", ret); + return ret; + } + + ret = snd_soc_dapm_add_routes(dapm, sun8i_codec_linein_routes, + ARRAY_SIZE(sun8i_codec_linein_routes)); + if (ret) { + dev_err(dev, "Failed to add Line In DAPM routes: %d\n", ret); + return ret; + } + + return 0; +} + + /* line out specific controls, widgets and routes */ static const DECLARE_TLV_DB_RANGE(sun8i_codec_lineout_vol_scale, 0, 1, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, 0, 1), @@ -578,19 +609,90 @@ static int sun8i_codec_add_lineout(struct snd_soc_component *cmpnt) return 0; } +/* mic2 specific controls, widgets and routes */ +static const struct snd_kcontrol_new sun8i_codec_mic2_controls[] = { + /* Mixer pre-gain */ + SOC_SINGLE_TLV("Mic2 Playback Volume", + SUN8I_ADDA_MICIN_GCTRL, SUN8I_ADDA_MICIN_GCTRL_MIC2G, + 0x7, 0, sun8i_codec_out_mixer_pregain_scale), + + /* Microphone Amp boost gain */ + SOC_SINGLE_TLV("Mic2 Boost Volume", SUN8I_ADDA_MIC2G_CTRL, + SUN8I_ADDA_MIC2G_CTRL_MIC2BOOST, 0x7, 0, + sun8i_codec_mic_gain_scale), +}; + +static const struct snd_soc_dapm_widget sun8i_codec_mic2_widgets[] = { + /* Microphone input */ + SND_SOC_DAPM_INPUT("MIC2"), + + /* Mic input path */ + SND_SOC_DAPM_PGA("Mic2 Amplifier", SUN8I_ADDA_MIC2G_CTRL, + SUN8I_ADDA_MIC2G_CTRL_MIC2AMPEN, 0, NULL, 0), +}; + +static const struct snd_soc_dapm_route sun8i_codec_mic2_routes[] = { + { "Mic2 Amplifier", NULL, "MIC2"}, + + { "Left Mixer", "Mic2 Playback Switch", "Mic2 Amplifier" }, + + { "Right Mixer", "Mic2 Playback Switch", "Mic2 Amplifier" }, + + { "Left ADC Mixer", "Mic2 Capture Switch", "Mic2 Amplifier" }, + + { "Right ADC Mixer", "Mic2 Capture Switch", "Mic2 Amplifier" }, +}; + +static int sun8i_codec_add_mic2(struct snd_soc_component *cmpnt) +{ + struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cmpnt); + struct device *dev = cmpnt->dev; + int ret; + + ret = snd_soc_add_component_controls(cmpnt, + sun8i_codec_mic2_controls, + ARRAY_SIZE(sun8i_codec_mic2_controls)); + if (ret) { + dev_err(dev, "Failed to add MIC2 controls: %d\n", ret); + return ret; + } + + ret = snd_soc_dapm_new_controls(dapm, sun8i_codec_mic2_widgets, + ARRAY_SIZE(sun8i_codec_mic2_widgets)); + if (ret) { + dev_err(dev, "Failed to add MIC2 DAPM widgets: %d\n", ret); + return ret; + } + + ret = snd_soc_dapm_add_routes(dapm, sun8i_codec_mic2_routes, + ARRAY_SIZE(sun8i_codec_mic2_routes)); + if (ret) { + dev_err(dev, "Failed to add MIC2 DAPM routes: %d\n", ret); + return ret; + } + + return 0; +} + struct sun8i_codec_analog_quirks { bool has_headphone; bool has_hmic; + bool has_linein; bool has_lineout; + bool has_mic2; }; static const struct sun8i_codec_analog_quirks sun8i_a23_quirks = { .has_headphone = true, .has_hmic = true, + .has_linein = true, + .has_mic2 = true, }; static const struct sun8i_codec_analog_quirks sun8i_h3_quirks = { + .has_linein = true, .has_lineout = true, + .has_mic2 = true, }; static int sun8i_codec_analog_cmpnt_probe(struct snd_soc_component *cmpnt) @@ -620,12 +722,24 @@ static int sun8i_codec_analog_cmpnt_probe(struct snd_soc_component *cmpnt) return ret; } + if (quirks->has_linein) { + ret = sun8i_codec_add_linein(cmpnt); + if (ret) + return ret; + } + if (quirks->has_lineout) { ret = sun8i_codec_add_lineout(cmpnt); if (ret) return ret; } + if (quirks->has_mic2) { + ret = sun8i_codec_add_mic2(cmpnt); + if (ret) + return ret; + } + return 0; } diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 7527ba29a5a0ea6eb9c6498d8d0293ad4aead18a..5723c3404f6bd6a896aed90b8b37d143de26c075 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -290,12 +290,10 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = { SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0), /* DAC Mixers */ - SND_SOC_DAPM_MIXER("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0, - sun8i_dac_mixer_controls, - ARRAY_SIZE(sun8i_dac_mixer_controls)), - SND_SOC_DAPM_MIXER("Right Digital DAC Mixer", SND_SOC_NOPM, 0, 0, - sun8i_dac_mixer_controls, - ARRAY_SIZE(sun8i_dac_mixer_controls)), + SOC_MIXER_ARRAY("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0, + sun8i_dac_mixer_controls), + SOC_MIXER_ARRAY("Right Digital DAC Mixer", SND_SOC_NOPM, 0, 0, + sun8i_dac_mixer_controls), /* Clocks */ SND_SOC_DAPM_SUPPLY("MODCLK AFI1", SUN8I_MOD_CLK_ENA, diff --git a/sound/soc/tegra/tegra20_ac97.c b/sound/soc/tegra/tegra20_ac97.c index a68368edab9c5df89a32e3678f7c9a65988b5463..affad46bf1887d08b78fb89394168ca36bef2fcc 100644 --- a/sound/soc/tegra/tegra20_ac97.c +++ b/sound/soc/tegra/tegra20_ac97.c @@ -318,7 +318,6 @@ static int tegra20_ac97_platform_probe(struct platform_device *pdev) ac97 = devm_kzalloc(&pdev->dev, sizeof(struct tegra20_ac97), GFP_KERNEL); if (!ac97) { - dev_err(&pdev->dev, "Can't allocate tegra20_ac97\n"); ret = -ENOMEM; goto err; } diff --git a/sound/soc/tegra/tegra20_das.c b/sound/soc/tegra/tegra20_das.c index 89add13c31cfc08259bd97d91555b70886117b77..4024e3abbeed0ef1a144017341116bb7d81c81d7 100644 --- a/sound/soc/tegra/tegra20_das.c +++ b/sound/soc/tegra/tegra20_das.c @@ -41,6 +41,7 @@ static inline void tegra20_das_write(u32 reg, u32 val) static inline u32 tegra20_das_read(u32 reg) { u32 val; + regmap_read(das->regmap, reg, &val); return val; } @@ -142,7 +143,6 @@ static int tegra20_das_probe(struct platform_device *pdev) das = devm_kzalloc(&pdev->dev, sizeof(struct tegra20_das), GFP_KERNEL); if (!das) { - dev_err(&pdev->dev, "Can't allocate tegra20_das\n"); ret = -ENOMEM; goto err; } diff --git a/sound/soc/tegra/tegra20_i2s.c b/sound/soc/tegra/tegra20_i2s.c index 14106fa82bca3ed821c2a2ad9977c2856e37bbf8..26253c2849e7945acaf69f767d9f23dbaac6850b 100644 --- a/sound/soc/tegra/tegra20_i2s.c +++ b/sound/soc/tegra/tegra20_i2s.c @@ -345,7 +345,6 @@ static int tegra20_i2s_platform_probe(struct platform_device *pdev) i2s = devm_kzalloc(&pdev->dev, sizeof(struct tegra20_i2s), GFP_KERNEL); if (!i2s) { - dev_err(&pdev->dev, "Can't allocate tegra20_i2s\n"); ret = -ENOMEM; goto err; } diff --git a/sound/soc/tegra/tegra20_spdif.c b/sound/soc/tegra/tegra20_spdif.c index a0c3640572b98a80a48601093d3848d2341a8509..767c0491e11abecf2545f574ac28ff6cdc2e0e29 100644 --- a/sound/soc/tegra/tegra20_spdif.c +++ b/sound/soc/tegra/tegra20_spdif.c @@ -271,10 +271,9 @@ static int tegra20_spdif_platform_probe(struct platform_device *pdev) spdif = devm_kzalloc(&pdev->dev, sizeof(struct tegra20_spdif), GFP_KERNEL); - if (!spdif) { - dev_err(&pdev->dev, "Can't allocate tegra20_spdif\n"); + if (!spdif) return -ENOMEM; - } + dev_set_drvdata(&pdev->dev, spdif); spdif->clk_spdif_out = devm_clk_get(&pdev->dev, "spdif_out"); diff --git a/sound/soc/tegra/tegra30_ahub.c b/sound/soc/tegra/tegra30_ahub.c index fef3b9a21a667304ca94579c5511c6ea3f58c073..8c10ae7982bad992cc94f60db565ee3791f7bc3a 100644 --- a/sound/soc/tegra/tegra30_ahub.c +++ b/sound/soc/tegra/tegra30_ahub.c @@ -41,6 +41,7 @@ static inline void tegra30_apbif_write(u32 reg, u32 val) static inline u32 tegra30_apbif_read(u32 reg) { u32 val; + regmap_read(ahub->regmap_apbif, reg, &val); return val; } @@ -560,10 +561,8 @@ static int tegra30_ahub_probe(struct platform_device *pdev) ahub = devm_kzalloc(&pdev->dev, sizeof(struct tegra30_ahub), GFP_KERNEL); - if (!ahub) { - dev_err(&pdev->dev, "Can't allocate tegra30_ahub\n"); + if (!ahub) return -ENOMEM; - } dev_set_drvdata(&pdev->dev, ahub); ahub->soc_data = soc_data; diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c index 8e55583aa104e1c847b2c64700b90a564378ddf4..b2b279c96029d90e5039701b5d8e2c955db2d43c 100644 --- a/sound/soc/tegra/tegra30_i2s.c +++ b/sound/soc/tegra/tegra30_i2s.c @@ -385,7 +385,6 @@ static int tegra30_i2s_platform_probe(struct platform_device *pdev) i2s = devm_kzalloc(&pdev->dev, sizeof(struct tegra30_i2s), GFP_KERNEL); if (!i2s) { - dev_err(&pdev->dev, "Can't allocate tegra30_i2s\n"); ret = -ENOMEM; goto err; } diff --git a/sound/soc/tegra/tegra_alc5632.c b/sound/soc/tegra/tegra_alc5632.c index eead6e7f205bedb88e992f55d6d7ab1de9bb75e2..0509902512ccd38a02376c0dc41a603d75f9a572 100644 --- a/sound/soc/tegra/tegra_alc5632.c +++ b/sound/soc/tegra/tegra_alc5632.c @@ -169,10 +169,8 @@ static int tegra_alc5632_probe(struct platform_device *pdev) alc5632 = devm_kzalloc(&pdev->dev, sizeof(struct tegra_alc5632), GFP_KERNEL); - if (!alc5632) { - dev_err(&pdev->dev, "Can't allocate tegra_alc5632\n"); + if (!alc5632) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/tegra_max98090.c b/sound/soc/tegra/tegra_max98090.c index a403db6d563e7cdaa2f0248212a40a3404b79d05..c34a54d6e8121b9a6ba6c2b996112908eac25778 100644 --- a/sound/soc/tegra/tegra_max98090.c +++ b/sound/soc/tegra/tegra_max98090.c @@ -225,10 +225,8 @@ static int tegra_max98090_probe(struct platform_device *pdev) machine = devm_kzalloc(&pdev->dev, sizeof(struct tegra_max98090), GFP_KERNEL); - if (!machine) { - dev_err(&pdev->dev, "Can't allocate tegra_max98090\n"); + if (!machine) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/tegra_rt5640.c b/sound/soc/tegra/tegra_rt5640.c index 25b9fc03ba6232a821c6730643e123bf8cb76c77..93a356802345aeefd00c4171fd55032c9802e15c 100644 --- a/sound/soc/tegra/tegra_rt5640.c +++ b/sound/soc/tegra/tegra_rt5640.c @@ -170,10 +170,8 @@ static int tegra_rt5640_probe(struct platform_device *pdev) machine = devm_kzalloc(&pdev->dev, sizeof(struct tegra_rt5640), GFP_KERNEL); - if (!machine) { - dev_err(&pdev->dev, "Can't allocate tegra_rt5640\n"); + if (!machine) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/tegra_sgtl5000.c b/sound/soc/tegra/tegra_sgtl5000.c index 4bbab098f50b92131c0c47f029a2edff8d16a6b1..6dda01f6998349d6d6f80d629af08480392bf776 100644 --- a/sound/soc/tegra/tegra_sgtl5000.c +++ b/sound/soc/tegra/tegra_sgtl5000.c @@ -120,10 +120,8 @@ static int tegra_sgtl5000_driver_probe(struct platform_device *pdev) machine = devm_kzalloc(&pdev->dev, sizeof(struct tegra_sgtl5000), GFP_KERNEL); - if (!machine) { - dev_err(&pdev->dev, "Can't allocate tegra_sgtl5000 struct\n"); + if (!machine) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/tegra_wm8753.c b/sound/soc/tegra/tegra_wm8753.c index bdedd1028569a68362c8bdeaf036fe0931d8b43a..d0ab0026a4cd879e906d388979d0f02af52d0c56 100644 --- a/sound/soc/tegra/tegra_wm8753.c +++ b/sound/soc/tegra/tegra_wm8753.c @@ -128,10 +128,8 @@ static int tegra_wm8753_driver_probe(struct platform_device *pdev) machine = devm_kzalloc(&pdev->dev, sizeof(struct tegra_wm8753), GFP_KERNEL); - if (!machine) { - dev_err(&pdev->dev, "Can't allocate tegra_wm8753 struct\n"); + if (!machine) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/tegra_wm8903.c b/sound/soc/tegra/tegra_wm8903.c index 2013e9c4bba0cfe32572997094243008a5c01b4c..dbfb49298ae8bea177237482f9d8dc58169bdf1e 100644 --- a/sound/soc/tegra/tegra_wm8903.c +++ b/sound/soc/tegra/tegra_wm8903.c @@ -248,10 +248,8 @@ static int tegra_wm8903_driver_probe(struct platform_device *pdev) machine = devm_kzalloc(&pdev->dev, sizeof(struct tegra_wm8903), GFP_KERNEL); - if (!machine) { - dev_err(&pdev->dev, "Can't allocate tegra_wm8903 struct\n"); + if (!machine) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/tegra_wm9712.c b/sound/soc/tegra/tegra_wm9712.c index 6492f8143ff1f172d3bcd5007b3ea1049cdc9e2f..c9cd2243262715ecfd929b44c6e7c99dee1d7aac 100644 --- a/sound/soc/tegra/tegra_wm9712.c +++ b/sound/soc/tegra/tegra_wm9712.c @@ -77,10 +77,8 @@ static int tegra_wm9712_driver_probe(struct platform_device *pdev) machine = devm_kzalloc(&pdev->dev, sizeof(struct tegra_wm9712), GFP_KERNEL); - if (!machine) { - dev_err(&pdev->dev, "Can't allocate tegra_wm9712 struct\n"); + if (!machine) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/tegra/trimslice.c b/sound/soc/tegra/trimslice.c index 870f84ab50054ece92d3b42786ec629727d4c044..c9dcad9bb93123b6b901ad3a3db33bfad0f3b19e 100644 --- a/sound/soc/tegra/trimslice.c +++ b/sound/soc/tegra/trimslice.c @@ -123,10 +123,8 @@ static int tegra_snd_trimslice_probe(struct platform_device *pdev) trimslice = devm_kzalloc(&pdev->dev, sizeof(struct tegra_trimslice), GFP_KERNEL); - if (!trimslice) { - dev_err(&pdev->dev, "Can't allocate tegra_trimslice\n"); + if (!trimslice) return -ENOMEM; - } card->dev = &pdev->dev; platform_set_drvdata(pdev, card); diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c index a8f705bb60dc087a733e74031a777d91e20b09fe..7912bf09dc4d70c66f19316c6c8f2b5a0d60357c 100644 --- a/sound/soc/txx9/txx9aclc.c +++ b/sound/soc/txx9/txx9aclc.c @@ -206,7 +206,7 @@ static void txx9aclc_dma_tasklet(unsigned long data) static int txx9aclc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { struct txx9aclc_dmadata *dmadata = substream->runtime->private_data; - struct txx9aclc_plat_drvdata *drvdata =txx9aclc_drvdata; + struct txx9aclc_plat_drvdata *drvdata = txx9aclc_drvdata; void __iomem *base = drvdata->base; unsigned long flags; int ret = 0; @@ -340,7 +340,7 @@ static bool filter(struct dma_chan *chan, void *param) static int txx9aclc_dma_init(struct txx9aclc_soc_device *dev, struct txx9aclc_dmadata *dmadata) { - struct txx9aclc_plat_drvdata *drvdata =txx9aclc_drvdata; + struct txx9aclc_plat_drvdata *drvdata = txx9aclc_drvdata; struct txx9dmac_slave *ds = &dmadata->dma_slave; dma_cap_mask_t mask; @@ -392,6 +392,7 @@ static int txx9aclc_pcm_remove(struct snd_soc_platform *platform) for (i = 0; i < 2; i++) { struct txx9aclc_dmadata *dmadata = &dev->dmadata[i]; struct dma_chan *chan = dmadata->dma_chan; + if (chan) { dmadata->frag_count = -1; dmaengine_terminate_all(chan); diff --git a/sound/soc/ux500/mop500.c b/sound/soc/ux500/mop500.c index ba9fc099cf67b22997aad328f256c2b1150c86a1..b50f68a439cebaf5d28ac38331e165b31e72ead2 100644 --- a/sound/soc/ux500/mop500.c +++ b/sound/soc/ux500/mop500.c @@ -33,7 +33,6 @@ static struct snd_soc_dai_link mop500_dai_links[] = { .stream_name = "ab8500_0", .cpu_dai_name = "ux500-msp-i2s.1", .codec_dai_name = "ab8500-codec-dai.0", - .platform_name = "ux500-msp-i2s.1", .codec_name = "ab8500-codec.0", .init = mop500_ab8500_machine_init, .ops = mop500_ab8500_ops, @@ -43,7 +42,6 @@ static struct snd_soc_dai_link mop500_dai_links[] = { .stream_name = "ab8500_1", .cpu_dai_name = "ux500-msp-i2s.3", .codec_dai_name = "ab8500-codec-dai.1", - .platform_name = "ux500-msp-i2s.3", .codec_name = "ab8500-codec.0", .init = NULL, .ops = mop500_ab8500_ops, @@ -87,8 +85,6 @@ static int mop500_of_probe(struct platform_device *pdev, for (i = 0; i < 2; i++) { mop500_dai_links[i].cpu_of_node = msp_np[i]; mop500_dai_links[i].cpu_dai_name = NULL; - mop500_dai_links[i].platform_of_node = msp_np[i]; - mop500_dai_links[i].platform_name = NULL; mop500_dai_links[i].codec_of_node = codec_np; mop500_dai_links[i].codec_name = NULL; } diff --git a/sound/soc/ux500/ux500_msp_dai.c b/sound/soc/ux500/ux500_msp_dai.c index b343efd9be5ba7e9d39fa1cf28f30d41f78b82ce..ec5152aa3f6ed2a9b1eeeea37b967d1d689d2dad 100644 --- a/sound/soc/ux500/ux500_msp_dai.c +++ b/sound/soc/ux500/ux500_msp_dai.c @@ -133,6 +133,7 @@ static int setup_pcm_framing(struct snd_soc_dai *dai, unsigned int rate, struct ux500_msp_i2s_drvdata *drvdata = dev_get_drvdata(dai->dev); u32 frame_length = MSP_FRAME_LEN_1; + prot_desc->frame_width = 0; switch (drvdata->slots) { @@ -482,7 +483,8 @@ static int ux500_msp_dai_prepare(struct snd_pcm_substream *substream, if ((drvdata->fmt & SND_SOC_DAIFMT_MASTER_MASK) && (drvdata->msp->f_bitclk > 19200000)) { /* If the bit-clock is higher than 19.2MHz, Vape should be - * run in 100% OPP. Only when bit-clock is used (MSP master) */ + * run in 100% OPP. Only when bit-clock is used (MSP master) + */ prcmu_qos_update_requirement(PRCMU_QOS_APE_OPP, "ux500-msp-i2s", 100); drvdata->vape_opp_constraint = 1; diff --git a/sound/soc/ux500/ux500_msp_i2s.c b/sound/soc/ux500/ux500_msp_i2s.c index 959d7b4edf56a94d167d09ea07ca7a597a64bbce..bd5266aca0f128c855a20b0358377f9804f1437f 100644 --- a/sound/soc/ux500/ux500_msp_i2s.c +++ b/sound/soc/ux500/ux500_msp_i2s.c @@ -604,7 +604,6 @@ int ux500_msp_i2s_trigger(struct ux500_msp *msp, int cmd, int direction) break; default: return -EINVAL; - break; } return 0; diff --git a/sound/soc/zte/Kconfig b/sound/soc/zte/Kconfig index 6d8a90d363150e808620ae1a655e79d4766e08a8..75f67a5d23eadb158d496dcebc2f43d12baeae1f 100644 --- a/sound/soc/zte/Kconfig +++ b/sound/soc/zte/Kconfig @@ -15,3 +15,11 @@ config ZX_I2S help Say Y or M if you want to add support for codecs attached to the ZTE ZX I2S interface + +config ZX_TDM + tristate "ZTE ZX TDM Driver Support" + depends on COMMON_CLK + select SND_SOC_GENERIC_DMAENGINE_PCM + help + Say Y or M if you want to add support for codecs attached to the + ZTE ZX TDM interface diff --git a/sound/soc/zte/Makefile b/sound/soc/zte/Makefile index 77768f5fd10c5f7d6c39f801f1e98bf3e88de189..1fc841acdfddd4c6785c937f5bb824e8b94b5d36 100644 --- a/sound/soc/zte/Makefile +++ b/sound/soc/zte/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_ZX_SPDIF) += zx-spdif.o obj-$(CONFIG_ZX_I2S) += zx-i2s.o +obj-$(CONFIG_ZX_TDM) += zx-tdm.o diff --git a/sound/soc/zte/zx-tdm.c b/sound/soc/zte/zx-tdm.c new file mode 100644 index 0000000000000000000000000000000000000000..bd632cc503b3a3e6aed20f8e1076cdda62020ff5 --- /dev/null +++ b/sound/soc/zte/zx-tdm.c @@ -0,0 +1,461 @@ +/* + * ZTE's TDM driver + * + * Copyright (C) 2017 ZTE Ltd + * + * Author: Baoyou Xie + * + * License terms: GNU General Public License (GPL) version 2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define REG_TIMING_CTRL 0x04 +#define REG_TX_FIFO_CTRL 0x0C +#define REG_RX_FIFO_CTRL 0x10 +#define REG_INT_EN 0x1C +#define REG_INT_STATUS 0x20 +#define REG_DATABUF 0x24 +#define REG_TS_MASK0 0x44 +#define REG_PROCESS_CTRL 0x54 + +#define FIFO_CTRL_TX_RST BIT(0) +#define FIFO_CTRL_RX_RST BIT(0) +#define DEAGULT_FIFO_THRES GENMASK(4, 2) + +#define FIFO_CTRL_TX_DMA_EN BIT(1) +#define FIFO_CTRL_RX_DMA_EN BIT(1) + +#define TX_FIFO_RST_MASK BIT(0) +#define RX_FIFO_RST_MASK BIT(0) + +#define FIFOCTRL_TX_FIFO_RST BIT(0) +#define FIFOCTRL_RX_FIFO_RST BIT(0) + +#define TXTH_MASK GENMASK(5, 2) +#define RXTH_MASK GENMASK(5, 2) + +#define FIFOCTRL_THRESHOLD(x) ((x) << 2) + +#define TIMING_MS_MASK BIT(1) +/* + * 00: 8 clk cycles every timeslot + * 01: 16 clk cycles every timeslot + * 10: 32 clk cycles every timeslot + */ +#define TIMING_SYNC_WIDTH_MASK GENMASK(6, 5) +#define TIMING_WIDTH_SHIFT 5 +#define TIMING_DEFAULT_WIDTH 0 +#define TIMING_TS_WIDTH(x) ((x) << TIMING_WIDTH_SHIFT) +#define TIMING_WIDTH_FACTOR 8 + +#define TIMING_MASTER_MODE BIT(21) +#define TIMING_LSB_FIRST BIT(20) +#define TIMING_TS_NUM(x) (((x) - 1) << 7) +#define TIMING_CLK_SEL_MASK GENMASK(2, 0) +#define TIMING_CLK_SEL_DEF BIT(2) + +#define PROCESS_TX_EN BIT(0) +#define PROCESS_RX_EN BIT(1) +#define PROCESS_TDM_EN BIT(2) +#define PROCESS_DISABLE_ALL 0 + +#define INT_DISABLE_ALL 0 +#define INT_STATUS_MASK GENMASK(6, 0) + +struct zx_tdm_info { + struct snd_dmaengine_dai_dma_data dma_playback; + struct snd_dmaengine_dai_dma_data dma_capture; + resource_size_t phy_addr; + void __iomem *regbase; + struct clk *dai_wclk; + struct clk *dai_pclk; + int master; + struct device *dev; +}; + +static inline u32 zx_tdm_readl(struct zx_tdm_info *tdm, u16 reg) +{ + return readl_relaxed(tdm->regbase + reg); +} + +static inline void zx_tdm_writel(struct zx_tdm_info *tdm, u16 reg, u32 val) +{ + writel_relaxed(val, tdm->regbase + reg); +} + +static void zx_tdm_tx_en(struct zx_tdm_info *tdm, bool on) +{ + unsigned long val; + + val = zx_tdm_readl(tdm, REG_PROCESS_CTRL); + if (on) + val |= PROCESS_TX_EN | PROCESS_TDM_EN; + else + val &= ~(PROCESS_TX_EN | PROCESS_TDM_EN); + zx_tdm_writel(tdm, REG_PROCESS_CTRL, val); +} + +static void zx_tdm_rx_en(struct zx_tdm_info *tdm, bool on) +{ + unsigned long val; + + val = zx_tdm_readl(tdm, REG_PROCESS_CTRL); + if (on) + val |= PROCESS_RX_EN | PROCESS_TDM_EN; + else + val &= ~(PROCESS_RX_EN | PROCESS_TDM_EN); + zx_tdm_writel(tdm, REG_PROCESS_CTRL, val); +} + +static void zx_tdm_tx_dma_en(struct zx_tdm_info *tdm, bool on) +{ + unsigned long val; + + val = zx_tdm_readl(tdm, REG_TX_FIFO_CTRL); + val |= FIFO_CTRL_TX_RST | DEAGULT_FIFO_THRES; + if (on) + val |= FIFO_CTRL_TX_DMA_EN; + else + val &= ~FIFO_CTRL_TX_DMA_EN; + zx_tdm_writel(tdm, REG_TX_FIFO_CTRL, val); +} + +static void zx_tdm_rx_dma_en(struct zx_tdm_info *tdm, bool on) +{ + unsigned long val; + + val = zx_tdm_readl(tdm, REG_RX_FIFO_CTRL); + val |= FIFO_CTRL_RX_RST | DEAGULT_FIFO_THRES; + if (on) + val |= FIFO_CTRL_RX_DMA_EN; + else + val &= ~FIFO_CTRL_RX_DMA_EN; + zx_tdm_writel(tdm, REG_RX_FIFO_CTRL, val); +} + +#define ZX_TDM_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000) + +#define ZX_TDM_FMTBIT \ + (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FORMAT_MU_LAW | \ + SNDRV_PCM_FORMAT_A_LAW) + +static int zx_tdm_dai_probe(struct snd_soc_dai *dai) +{ + struct zx_tdm_info *zx_tdm = dev_get_drvdata(dai->dev); + + snd_soc_dai_set_drvdata(dai, zx_tdm); + zx_tdm->dma_playback.addr = zx_tdm->phy_addr + REG_DATABUF; + zx_tdm->dma_playback.maxburst = 16; + zx_tdm->dma_capture.addr = zx_tdm->phy_addr + REG_DATABUF; + zx_tdm->dma_capture.maxburst = 16; + snd_soc_dai_init_dma_data(dai, &zx_tdm->dma_playback, + &zx_tdm->dma_capture); + return 0; +} + +static int zx_tdm_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) +{ + struct zx_tdm_info *tdm = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long val; + + val = zx_tdm_readl(tdm, REG_TIMING_CTRL); + val &= ~(TIMING_SYNC_WIDTH_MASK | TIMING_MS_MASK); + val |= TIMING_DEFAULT_WIDTH << TIMING_WIDTH_SHIFT; + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + tdm->master = 1; + val |= TIMING_MASTER_MODE; + break; + case SND_SOC_DAIFMT_CBS_CFS: + tdm->master = 0; + val &= ~TIMING_MASTER_MODE; + break; + default: + dev_err(cpu_dai->dev, "Unknown master/slave format\n"); + return -EINVAL; + } + + + zx_tdm_writel(tdm, REG_TIMING_CTRL, val); + + return 0; +} + +static int zx_tdm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *socdai) +{ + struct zx_tdm_info *tdm = snd_soc_dai_get_drvdata(socdai); + struct snd_dmaengine_dai_dma_data *dma_data; + unsigned int ts_width = TIMING_DEFAULT_WIDTH; + unsigned int ch_num = 32; + unsigned int mask = 0; + unsigned int ret = 0; + unsigned long val; + + dma_data = snd_soc_dai_get_dma_data(socdai, substream); + dma_data->addr_width = ch_num >> 3; + + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_MU_LAW: + case SNDRV_PCM_FORMAT_A_LAW: + case SNDRV_PCM_FORMAT_S16_LE: + ts_width = 1; + break; + default: + ts_width = 0; + dev_err(socdai->dev, "Unknown data format\n"); + return -EINVAL; + } + + val = zx_tdm_readl(tdm, REG_TIMING_CTRL); + val |= TIMING_TS_WIDTH(ts_width) | TIMING_TS_NUM(1); + zx_tdm_writel(tdm, REG_TIMING_CTRL, val); + zx_tdm_writel(tdm, REG_TS_MASK0, mask); + + if (tdm->master) + ret = clk_set_rate(tdm->dai_wclk, + params_rate(params) * TIMING_WIDTH_FACTOR * ch_num); + + return ret; +} + +static int zx_tdm_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) +{ + int capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + struct zx_tdm_info *zx_tdm = dev_get_drvdata(dai->dev); + unsigned int val; + int ret = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + if (capture) { + val = zx_tdm_readl(zx_tdm, REG_RX_FIFO_CTRL); + val |= FIFOCTRL_RX_FIFO_RST; + zx_tdm_writel(zx_tdm, REG_RX_FIFO_CTRL, val); + + zx_tdm_rx_dma_en(zx_tdm, true); + } else { + val = zx_tdm_readl(zx_tdm, REG_TX_FIFO_CTRL); + val |= FIFOCTRL_TX_FIFO_RST; + zx_tdm_writel(zx_tdm, REG_TX_FIFO_CTRL, val); + + zx_tdm_tx_dma_en(zx_tdm, true); + } + break; + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + if (capture) + zx_tdm_rx_en(zx_tdm, true); + else + zx_tdm_tx_en(zx_tdm, true); + break; + case SNDRV_PCM_TRIGGER_STOP: + if (capture) + zx_tdm_rx_dma_en(zx_tdm, false); + else + zx_tdm_tx_dma_en(zx_tdm, false); + break; + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + if (capture) + zx_tdm_rx_en(zx_tdm, false); + else + zx_tdm_tx_en(zx_tdm, false); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int zx_tdm_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct zx_tdm_info *zx_tdm = dev_get_drvdata(dai->dev); + int ret; + + ret = clk_prepare_enable(zx_tdm->dai_wclk); + if (ret) + return ret; + + ret = clk_prepare_enable(zx_tdm->dai_pclk); + if (ret) { + clk_disable_unprepare(zx_tdm->dai_wclk); + return ret; + } + + return 0; +} + +static void zx_tdm_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct zx_tdm_info *zx_tdm = dev_get_drvdata(dai->dev); + + clk_disable_unprepare(zx_tdm->dai_pclk); + clk_disable_unprepare(zx_tdm->dai_wclk); +} + +static struct snd_soc_dai_ops zx_tdm_dai_ops = { + .trigger = zx_tdm_trigger, + .hw_params = zx_tdm_hw_params, + .set_fmt = zx_tdm_set_fmt, + .startup = zx_tdm_startup, + .shutdown = zx_tdm_shutdown, +}; + +static const struct snd_soc_component_driver zx_tdm_component = { + .name = "zx-tdm", +}; + +static void zx_tdm_init_state(struct zx_tdm_info *tdm) +{ + unsigned int val; + + zx_tdm_writel(tdm, REG_PROCESS_CTRL, PROCESS_DISABLE_ALL); + + val = zx_tdm_readl(tdm, REG_TIMING_CTRL); + val |= TIMING_LSB_FIRST; + val &= ~TIMING_CLK_SEL_MASK; + val |= TIMING_CLK_SEL_DEF; + zx_tdm_writel(tdm, REG_TIMING_CTRL, val); + + zx_tdm_writel(tdm, REG_INT_EN, INT_DISABLE_ALL); + /* + * write INT_STATUS register to clear it. + */ + zx_tdm_writel(tdm, REG_INT_STATUS, INT_STATUS_MASK); + zx_tdm_writel(tdm, REG_RX_FIFO_CTRL, FIFOCTRL_RX_FIFO_RST); + zx_tdm_writel(tdm, REG_TX_FIFO_CTRL, FIFOCTRL_TX_FIFO_RST); + + val = zx_tdm_readl(tdm, REG_RX_FIFO_CTRL); + val &= ~(RXTH_MASK | RX_FIFO_RST_MASK); + val |= FIFOCTRL_THRESHOLD(8); + zx_tdm_writel(tdm, REG_RX_FIFO_CTRL, val); + + val = zx_tdm_readl(tdm, REG_TX_FIFO_CTRL); + val &= ~(TXTH_MASK | TX_FIFO_RST_MASK); + val |= FIFOCTRL_THRESHOLD(8); + zx_tdm_writel(tdm, REG_TX_FIFO_CTRL, val); +} + +static struct snd_soc_dai_driver zx_tdm_dai = { + .name = "zx-tdm-dai", + .id = 0, + .probe = zx_tdm_dai_probe, + .playback = { + .channels_min = 1, + .channels_max = 4, + .rates = ZX_TDM_RATES, + .formats = ZX_TDM_FMTBIT, + }, + .capture = { + .channels_min = 1, + .channels_max = 4, + .rates = ZX_TDM_RATES, + .formats = ZX_TDM_FMTBIT, + }, + .ops = &zx_tdm_dai_ops, +}; + +static int zx_tdm_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct of_phandle_args out_args; + unsigned int dma_reg_offset; + struct zx_tdm_info *zx_tdm; + unsigned int dma_mask; + struct resource *res; + struct regmap *regmap_sysctrl; + int ret; + + zx_tdm = devm_kzalloc(&pdev->dev, sizeof(*zx_tdm), GFP_KERNEL); + if (!zx_tdm) + return -ENOMEM; + + zx_tdm->dev = dev; + + zx_tdm->dai_wclk = devm_clk_get(&pdev->dev, "wclk"); + if (IS_ERR(zx_tdm->dai_wclk)) { + dev_err(&pdev->dev, "Fail to get wclk\n"); + return PTR_ERR(zx_tdm->dai_wclk); + } + + zx_tdm->dai_pclk = devm_clk_get(&pdev->dev, "pclk"); + if (IS_ERR(zx_tdm->dai_pclk)) { + dev_err(&pdev->dev, "Fail to get pclk\n"); + return PTR_ERR(zx_tdm->dai_pclk); + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + zx_tdm->phy_addr = res->start; + zx_tdm->regbase = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(zx_tdm->regbase)) + return PTR_ERR(zx_tdm->regbase); + + ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node, + "zte,tdm-dma-sysctrl", 2, 0, &out_args); + if (ret) { + dev_err(&pdev->dev, "Fail to get zte,tdm-dma-sysctrl\n"); + return ret; + } + + dma_reg_offset = out_args.args[0]; + dma_mask = out_args.args[1]; + regmap_sysctrl = syscon_node_to_regmap(out_args.np); + if (IS_ERR(regmap_sysctrl)) { + of_node_put(out_args.np); + return PTR_ERR(regmap_sysctrl); + } + + regmap_update_bits(regmap_sysctrl, dma_reg_offset, dma_mask, dma_mask); + of_node_put(out_args.np); + + zx_tdm_init_state(zx_tdm); + platform_set_drvdata(pdev, zx_tdm); + + ret = devm_snd_soc_register_component(&pdev->dev, &zx_tdm_component, + &zx_tdm_dai, 1); + if (ret) { + dev_err(&pdev->dev, "Register DAI failed: %d\n", ret); + return ret; + } + + ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + if (ret) + dev_err(&pdev->dev, "Register platform PCM failed: %d\n", ret); + + return ret; +} + +static const struct of_device_id zx_tdm_dt_ids[] = { + { .compatible = "zte,zx296718-tdm", }, + {} +}; +MODULE_DEVICE_TABLE(of, zx_tdm_dt_ids); + +static struct platform_driver tdm_driver = { + .probe = zx_tdm_probe, + .driver = { + .name = "zx-tdm", + .of_match_table = zx_tdm_dt_ids, + }, +}; +module_platform_driver(tdm_driver); + +MODULE_AUTHOR("Baoyou Xie "); +MODULE_DESCRIPTION("ZTE TDM DAI driver"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index ac75816ada7c31b133586693e5c73a41dc79348f..850fab4a8f3b5d4a17f51f9a4521fbd0cf624709 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -225,9 +225,9 @@ snd_emux_load_patch_seq_oss(struct snd_seq_oss_arg *arg, int format, else if (format == SNDRV_OSS_SOUNDFONT_PATCH) { struct soundfont_patch_info patch; if (count < (int)sizeof(patch)) - rc = -EINVAL; + return -EINVAL; if (copy_from_user(&patch, buf, sizeof(patch))) - rc = -EFAULT; + return -EFAULT; if (patch.type >= SNDRV_SFNT_LOAD_INFO && patch.type <= SNDRV_SFNT_PROBE_DATA) rc = snd_soundfont_load(emu->sflist, buf, count, SF_CLIENT_NO(p->chset.port)); diff --git a/sound/usb/card.c b/sound/usb/card.c index f36cb068dad331700c087ee765a5a2a82ac3e233..6640277a725b6565adfb9665077903ff51940c34 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -332,6 +332,7 @@ static int snd_usb_audio_dev_free(struct snd_device *device) static int snd_usb_audio_create(struct usb_interface *intf, struct usb_device *dev, int idx, const struct snd_usb_audio_quirk *quirk, + unsigned int usb_id, struct snd_usb_audio **rchip) { struct snd_card *card; @@ -381,8 +382,7 @@ static int snd_usb_audio_create(struct usb_interface *intf, atomic_set(&chip->usage_count, 0); atomic_set(&chip->shutdown, 0); - chip->usb_id = USB_ID(le16_to_cpu(dev->descriptor.idVendor), - le16_to_cpu(dev->descriptor.idProduct)); + chip->usb_id = usb_id; INIT_LIST_HEAD(&chip->pcm_list); INIT_LIST_HEAD(&chip->ep_list); INIT_LIST_HEAD(&chip->midi_list); @@ -569,7 +569,7 @@ static int usb_audio_probe(struct usb_interface *intf, (vid[i] == -1 || vid[i] == USB_ID_VENDOR(id)) && (pid[i] == -1 || pid[i] == USB_ID_PRODUCT(id))) { err = snd_usb_audio_create(intf, dev, i, quirk, - &chip); + id, &chip); if (err < 0) goto __error; chip->pm_intf = intf; diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index fab53f58d4471c171d427a42e5e3d23d03846be7..b3854f8c0c679f4d5da9b3ab00a644eacb5c99d9 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -430,7 +430,7 @@ static int snd_line6_control_playback_put(struct snd_kcontrol *kcontrol, } /* control definition */ -static struct snd_kcontrol_new line6_controls[] = { +static const struct snd_kcontrol_new line6_controls[] = { { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Playback Volume", diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c index 17aa616e61f53bd7b196422900308fefc16a7136..358224cc5638ebe1fda5c897588d7b7d10db57e0 100644 --- a/sound/usb/line6/pod.c +++ b/sound/usb/line6/pod.c @@ -380,7 +380,7 @@ static int snd_pod_control_monitor_put(struct snd_kcontrol *kcontrol, } /* control definition */ -static struct snd_kcontrol_new pod_control_monitor = { +static const struct snd_kcontrol_new pod_control_monitor = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitor Playback Volume", .index = 0, diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c index 8e22f430d70072293f9ffabad8bbea6e6834f038..ba7975c0d03d90e31674dbfb2a333d37343f0d96 100644 --- a/sound/usb/line6/toneport.c +++ b/sound/usb/line6/toneport.c @@ -250,7 +250,7 @@ static void toneport_start_pcm(unsigned long arg) } /* control definition */ -static struct snd_kcontrol_new toneport_control_monitor = { +static const struct snd_kcontrol_new toneport_control_monitor = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Monitor Playback Volume", .index = 0, @@ -261,7 +261,7 @@ static struct snd_kcontrol_new toneport_control_monitor = { }; /* source selector definition */ -static struct snd_kcontrol_new toneport_control_source = { +static const struct snd_kcontrol_new toneport_control_source = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "PCM Capture Source", .index = 0, diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 6e763bc8d7dbb5b7d4ba43ef3201050b726b089a..a35f414672373caa75a550be4b171bc20990f09f 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1922,7 +1922,7 @@ static int roland_load_put(struct snd_kcontrol *kcontrol, return changed; } -static struct snd_kcontrol_new roland_load_ctl = { +static const struct snd_kcontrol_new roland_load_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "MIDI Input Mode", .info = roland_load_info, diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 4703caea56b2f600417c8a961bd1d73ec1ab2e15..082736c539bc14eec73a1afad11e2a3cd5577d89 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1172,7 +1172,7 @@ static struct snd_kcontrol_new usb_feature_unit_ctl = { }; /* the read-only variant */ -static struct snd_kcontrol_new usb_feature_unit_ctl_ro = { +static const struct snd_kcontrol_new usb_feature_unit_ctl_ro = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later manually */ .info = mixer_ctl_feature_info, @@ -1745,7 +1745,7 @@ static int mixer_ctl_procunit_put(struct snd_kcontrol *kcontrol, } /* alsa control interface for processing/extension unit */ -static struct snd_kcontrol_new mixer_procunit_ctl = { +static const struct snd_kcontrol_new mixer_procunit_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later */ .info = mixer_ctl_feature_info, @@ -2033,7 +2033,7 @@ static int mixer_ctl_selector_put(struct snd_kcontrol *kcontrol, } /* alsa control interface for selector unit */ -static struct snd_kcontrol_new mixer_selectunit_ctl = { +static const struct snd_kcontrol_new mixer_selectunit_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", /* will be filled later */ .info = mixer_ctl_selector_info, diff --git a/sound/usb/mixer_scarlett.c b/sound/usb/mixer_scarlett.c index 7438e7c4a842da4aac34334f13575aa765e3c79d..c33e2378089d5936ac3466cc9c187fa62bb5a0ce 100644 --- a/sound/usb/mixer_scarlett.c +++ b/sound/usb/mixer_scarlett.c @@ -477,7 +477,7 @@ static int scarlett_ctl_meter_get(struct snd_kcontrol *kctl, return 0; } -static struct snd_kcontrol_new usb_scarlett_ctl_switch = { +static const struct snd_kcontrol_new usb_scarlett_ctl_switch = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", .info = scarlett_ctl_switch_info, @@ -487,7 +487,7 @@ static struct snd_kcontrol_new usb_scarlett_ctl_switch = { static const DECLARE_TLV_DB_SCALE(db_scale_scarlett_gain, -12800, 100, 0); -static struct snd_kcontrol_new usb_scarlett_ctl = { +static const struct snd_kcontrol_new usb_scarlett_ctl = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, @@ -499,7 +499,7 @@ static struct snd_kcontrol_new usb_scarlett_ctl = { .tlv = { .p = db_scale_scarlett_gain } }; -static struct snd_kcontrol_new usb_scarlett_ctl_master = { +static const struct snd_kcontrol_new usb_scarlett_ctl_master = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | SNDRV_CTL_ELEM_ACCESS_TLV_READ, @@ -511,7 +511,7 @@ static struct snd_kcontrol_new usb_scarlett_ctl_master = { .tlv = { .p = db_scale_scarlett_gain } }; -static struct snd_kcontrol_new usb_scarlett_ctl_enum = { +static const struct snd_kcontrol_new usb_scarlett_ctl_enum = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", .info = scarlett_ctl_enum_info, @@ -519,7 +519,7 @@ static struct snd_kcontrol_new usb_scarlett_ctl_enum = { .put = scarlett_ctl_enum_put, }; -static struct snd_kcontrol_new usb_scarlett_ctl_dynamic_enum = { +static const struct snd_kcontrol_new usb_scarlett_ctl_dynamic_enum = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "", .info = scarlett_ctl_enum_dynamic_info, @@ -527,7 +527,7 @@ static struct snd_kcontrol_new usb_scarlett_ctl_dynamic_enum = { .put = scarlett_ctl_enum_put, }; -static struct snd_kcontrol_new usb_scarlett_ctl_sync = { +static const struct snd_kcontrol_new usb_scarlett_ctl_sync = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, .name = "", diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index dc48eedea92e7aaaba64f4db4053fea1b73d0b77..26ed23b18b7774fd495f7ce90e51ec6dc8022e2d 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -698,16 +698,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; u8 meter_urb[64]; - char tmp[sizeof(mix_init_msg2)] = {0}; switch (kcontrol->private_value) { - case 0: - snd_us16x08_send_urb(chip, (char *)mix_init_msg1, - sizeof(mix_init_msg1)); + case 0: { + char tmp[sizeof(mix_init_msg1)]; + + memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1)); + snd_us16x08_send_urb(chip, tmp, 4); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value++; break; + } case 1: snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); @@ -718,15 +720,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, sizeof(meter_urb)); kcontrol->private_value++; break; - case 3: + case 3: { + char tmp[sizeof(mix_init_msg2)]; + memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2)); tmp[2] = snd_get_meter_comp_index(store); - snd_us16x08_send_urb(chip, tmp, sizeof(mix_init_msg2)); + snd_us16x08_send_urb(chip, tmp, 10); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value = 0; break; } + } for (set = 0; set < 6; set++) get_meter_levels_from_urb(set, store, meter_urb); @@ -1135,7 +1140,7 @@ static const struct snd_us16x08_control_params eq_controls[] = { .control_id = SND_US16X08_ID_EQLOWMIDWIDTH, .type = USB_MIXER_U8, .num_channels = 16, - .name = "EQ MidQLow Q", + .name = "EQ MidLow Q", }, { /* EQ mid high gain */ .kcontrol_new = &snd_us16x08_eq_gain_ctl, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 01eff6ce6401a3f8c6c8b51da93524f635bba86a..d7b0b0a3a2db55617a908e2fe4a8a2af90082e02 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1364,7 +1364,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, /* Amanero Combo384 USB interface with native DSD support */ case USB_ID(0x16d0, 0x071a): if (fp->altsetting == 2) { - switch (chip->dev->descriptor.bcdDevice) { + switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) { case 0x199: return SNDRV_PCM_FMTBIT_DSD_U32_LE; case 0x19b: diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c index cf45bf1f7ee0b0a441f5a9258deaf203983f0f84..e118bdca983d435f323139ac4fa5b8fd9b47fae4 100644 --- a/sound/usb/usx2y/us122l.c +++ b/sound/usb/usx2y/us122l.c @@ -472,7 +472,7 @@ static int usb_stream_hwdep_new(struct snd_card *card) hw->ops.mmap = usb_stream_hwdep_mmap; hw->ops.poll = usb_stream_hwdep_poll; - sprintf(hw->name, "/proc/bus/usb/%03d/%03d/hwdeppcm", + sprintf(hw->name, "/dev/bus/usb/%03d/%03d/hwdeppcm", dev->bus->busnum, dev->devnum); return 0; } diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index 605e1047c01dfbc2dd025b9033f133aaef5fc5f4..f4b3cda412fcc6d887b997d9e4cf8795900daffd 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -258,7 +258,7 @@ int usX2Y_hwdep_new(struct snd_card *card, struct usb_device* device) hw->ops.mmap = snd_us428ctls_mmap; hw->ops.poll = snd_us428ctls_poll; hw->exclusive = 1; - sprintf(hw->name, "/proc/bus/usb/%03d/%03d", device->bus->busnum, device->devnum); + sprintf(hw->name, "/dev/bus/usb/%03d/%03d", device->bus->busnum, device->devnum); return 0; } diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c index f95164b91152da9e137594698bb90ca657b0c08f..d51c7fd7835bb1b8e54a521fb6e9a668781bdb5e 100644 --- a/sound/usb/usx2y/usx2yhwdeppcm.c +++ b/sound/usb/usx2y/usx2yhwdeppcm.c @@ -723,7 +723,7 @@ int usX2Y_hwdep_pcm_new(struct snd_card *card) hw->ops.release = snd_usX2Y_hwdep_pcm_release; hw->ops.mmap = snd_usX2Y_hwdep_pcm_mmap; hw->exclusive = 1; - sprintf(hw->name, "/proc/bus/usb/%03d/%03d/hwdeppcm", dev->bus->busnum, dev->devnum); + sprintf(hw->name, "/dev/bus/usb/%03d/%03d/hwdeppcm", dev->bus->busnum, dev->devnum); err = snd_pcm_new(card, NAME_ALLCAPS" hwdep Audio", 2, 1, 1, &pcm); if (err < 0) { diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index c505b019e09ce0795cc8ddd79cf1f8c6a18d32ff..b11d3920b9a521b95142481106d420ac75643b26 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -1809,10 +1809,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) pdata->notify_pending = false; spin_unlock_irq(&pdata->lpe_audio_slock); - /* runtime PM isn't enabled as default, since it won't save much on - * BYT/CHT devices; user who want the runtime PM should adjust the - * power/ontrol and power/autosuspend_delay_ms sysfs entries instead - */ pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_set_active(&pdev->dev); diff --git a/tools/Makefile b/tools/Makefile index 00caacd3ed928c5156e3ba5c83640d8b1d9103ae..c8a90d01dd8ede564d6363d6f1a61bfe6f1736e8 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -86,10 +86,13 @@ tmon: FORCE freefall: FORCE $(call descend,laptop/$@) +kvm_stat: FORCE + $(call descend,kvm/$@) + all: acpi cgroup cpupower gpio hv firewire lguest \ perf selftests turbostat usb \ virtio vm net x86_energy_perf_policy \ - tmon freefall objtool + tmon freefall objtool kvm_stat acpi_install: $(call descend,power/$(@:_install=),install) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index af05f8e0903e27baccddc00748bf0f75820c9d3d..5e3c673fa3f4435b027fbc634815be10c26cf39f 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -114,6 +116,8 @@ struct kvm_debug_exit_arch { }; struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -181,11 +185,28 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 3051f86a9b5f4ab976568b266b41f47313f86b82..70eea2ecc6631cab3d718c3958cd0513a7f7e6a3 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -143,6 +145,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { @@ -201,11 +205,28 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 #define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 #define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32 +#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \ + (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT) #define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 #define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) +#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff) #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL 4 -#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 +#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ + (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff +#define VGIC_LEVEL_INFO_LINE_LEVEL 0 + +#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_ITS_SAVE_TABLES 1 +#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 +#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 3603b6f51b11beae38c1c1645086cafc807b07ab..07fbeb927834f3a96278414aedaa59ea580ae8de 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -29,6 +29,9 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_GUEST_DEBUG +/* Not always available, but if it is, this is the correct offset. */ +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + struct kvm_regs { __u64 pc; __u64 cr; @@ -413,6 +416,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* For KVM_PPC_CONFIGURE_V3_MMU */ +struct kvm_ppc_mmuv3_cfg { + __u64 flags; + __u64 process_table; /* second doubleword of partition table entry */ +}; + +/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ +#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ +#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ + +/* For KVM_PPC_GET_RMMU_INFO */ +struct kvm_ppc_rmmu_info { + struct kvm_ppc_radix_geom { + __u8 page_shift; + __u8 level_bits[4]; + __u8 pad[3]; + } geometries[8]; + __u32 ap_encodings[8]; +}; + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) @@ -613,5 +636,7 @@ struct kvm_get_htab_header { #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) #define KVM_XICS_MASKED (1ULL << 41) #define KVM_XICS_PENDING (1ULL << 42) +#define KVM_XICS_PRESENTED (1ULL << 43) +#define KVM_XICS_QUEUED (1ULL << 44) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index a2ffec4139ad1cb8cebe816a9f0b3e261cd97d42..3dd2a1d308dd0b92c36c3c5ca5276fd19838f252 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -26,6 +26,8 @@ #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 +#define KVM_DEV_FLIC_AISM 9 +#define KVM_DEV_FLIC_AIRQ_INJECT 10 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { __u8 isc; __u8 maskable; __u8 swap; - __u8 pad; + __u8 flags; +}; + +#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 + +struct kvm_s390_ais_req { + __u8 isc; + __u16 mode; }; #define KVM_S390_IO_ADAPTER_MASK 1 @@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_FEAT_CMMA 10 #define KVM_S390_VM_CPU_FEAT_PFMFI 11 #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +#define KVM_S390_VM_CPU_FEAT_KSS 13 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; @@ -131,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kmo[16]; /* with MSA4 */ __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ - __u8 reserved[1824]; + __u8 kma[16]; /* with MSA8 */ + __u8 reserved[1808]; }; /* kvm attributes for crypto */ @@ -197,6 +208,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,8 +232,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h index 059e33e94260ab5cab2dcf2b95a46704619a1756..328eeceec709803b91fa11ab14860341828f6639 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -7,6 +7,8 @@ #define LOCK_PREFIX "\n\tlock; " +#include + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. @@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v) GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/arch/x86/include/asm/cmpxchg.h b/tools/arch/x86/include/asm/cmpxchg.h new file mode 100644 index 0000000000000000000000000000000000000000..f5253260f3cc1ea4a0c2cfecc1bbc6407003fa44 --- /dev/null +++ b/tools/arch/x86/include/asm/cmpxchg.h @@ -0,0 +1,89 @@ +#ifndef TOOLS_ASM_X86_CMPXCHG_H +#define TOOLS_ASM_X86_CMPXCHG_H + +#include + +/* + * Non-existant functions to indicate usage errors at link time + * (or compile-time if the compiler implements __compiletime_error(). + */ +extern void __cmpxchg_wrong_size(void) + __compiletime_error("Bad argument size for cmpxchg"); + +/* + * Constants for operation sizes. On 32-bit, the 64-bit size it set to + * -1 because sizeof will never return -1, thereby making those switch + * case statements guaranteeed dead code which the compiler will + * eliminate, and allowing the "missing symbol in the default case" to + * indicate a usage error. + */ +#define __X86_CASE_B 1 +#define __X86_CASE_W 2 +#define __X86_CASE_L 4 +#ifdef __x86_64__ +#define __X86_CASE_Q 8 +#else +#define __X86_CASE_Q -1 /* sizeof will never return -1 */ +#endif + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr = (volatile u8 *)(ptr); \ + asm volatile(lock "cmpxchgb %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "q" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr = (volatile u16 *)(ptr); \ + asm volatile(lock "cmpxchgw %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr = (volatile u32 *)(ptr); \ + asm volatile(lock "cmpxchgl %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr = (volatile u64 *)(ptr); \ + asm volatile(lock "cmpxchgq %2,%1" \ + : "=a" (__ret), "+m" (*__ptr) \ + : "r" (__new), "0" (__old) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg(ptr, old, new, sizeof(*(ptr))) + + +#endif /* TOOLS_ASM_X86_CMPXCHG_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 293149a1c6a114999473a78199a186a5cf040d22..2701e5f8145bd250c3a35dc12cab5839e16ff30d 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -186,7 +186,8 @@ * * Reuse free bits when adding new feature flags! */ - +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ @@ -201,6 +202,8 @@ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ @@ -289,7 +292,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ @@ -321,5 +325,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 85599ad4d0247863cef655d02b9a4b3f83c77fb7..5dff775af7cd6456f7177d9ce5888ae78dc6bc10 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -36,6 +36,12 @@ # define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ +#ifdef CONFIG_X86_5LEVEL +# define DISABLE_LA57 0 +#else +# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -55,7 +61,7 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) #define DISABLED_MASK17 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index fac9a5c0abe94b233b72b35bca8c7a665847b694..d91ba04dd00709b7e549ced791759c12cf70d5d0 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -53,6 +53,12 @@ # define NEED_MOVBE 0 #endif +#ifdef CONFIG_X86_5LEVEL +# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) +#else +# define NEED_LA57 0 +#endif + #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -98,7 +104,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 0 +#define REQUIRED_MASK16 (NEED_LA57) #define REQUIRED_MASK17 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 739c0c5940226d7af38d2ab4bc068f7772f1a8c7..c2824d02ba3762553b62a07709058102050e34da 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -9,6 +9,9 @@ #include #include +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + #define DE_VECTOR 0 #define DB_VECTOR 1 #define BP_VECTOR 3 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index 14458658e988bb6c9333e73701ea55e7b6525a92..690a2dcf407860cf54793f03e69d8761b06f068d 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -76,7 +76,11 @@ #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_RDRAND 57 #define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_VMFUNC 59 +#define EXIT_REASON_ENCLS 60 +#define EXIT_REASON_RDSEED 61 #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 @@ -90,6 +94,7 @@ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVD, "INVD" }, \ { EXIT_REASON_INVLPG, "INVLPG" }, \ { EXIT_REASON_RDPMC, "RDPMC" }, \ { EXIT_REASON_RDTSC, "RDTSC" }, \ @@ -108,6 +113,8 @@ { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ { EXIT_REASON_MSR_READ, "MSR_READ" }, \ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ + { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ @@ -115,20 +122,24 @@ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ - { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ + { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ + { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ + { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ { EXIT_REASON_INVEPT, "INVEPT" }, \ + { EXIT_REASON_RDTSCP, "RDTSCP" }, \ { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ + { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_WBINVD, "WBINVD" }, \ + { EXIT_REASON_XSETBV, "XSETBV" }, \ { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ - { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ - { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ - { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ - { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVVPID, "INVVPID" }, \ + { EXIT_REASON_RDRAND, "RDRAND" }, \ { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_VMFUNC, "VMFUNC" }, \ + { EXIT_REASON_ENCLS, "ENCLS" }, \ + { EXIT_REASON_RDSEED, "RDSEED" }, \ + { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" } diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 49e6ebac7e73e33b0a03327cb65c95a29afc1c67..98dcc112b36349147dd9266dc758444a469f052f 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled) _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index e3fb5ecbdcb65cee24172baecbe7f87da959b640..523911f316ce5e65511962f0c13fe0032a06427a 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \ lzma \ get_cpuid \ bpf \ + sched_getcpu \ sdt # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b564a2eea03904636f41101dd485e441d2d4fe00..e35e4e5ad192e6126404b4f5ee52e397e2bb22db 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -48,21 +48,22 @@ FILES= \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ - test-jvmti.bin + test-jvmti.bin \ + test-sched_getcpu.bin FILES := $(addprefix $(OUTPUT),$(FILES)) -CC := $(CROSS_COMPILE)gcc -MD -CXX := $(CROSS_COMPILE)g++ -MD -PKG_CONFIG := $(CROSS_COMPILE)pkg-config +CC ?= $(CROSS_COMPILE)gcc +CXX ?= $(CROSS_COMPILE)g++ +PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config all: $(FILES) -__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) +__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 -__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) +__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 ############################### @@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin: $(OUTPUT)test-glibc.bin: $(BUILD) +$(OUTPUT)test-sched_getcpu.bin: + $(BUILD) + DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 @@ -171,7 +175,7 @@ $(OUTPUT)test-libperl.bin: $(BUILD) $(FLAGS_PERL_EMBED) $(OUTPUT)test-libpython.bin: - $(BUILD) + $(BUILD) $(FLAGS_PYTHON_EMBED) $(OUTPUT)test-libpython-version.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 699e43627397b5c3008cd74008016f4812550c77..cc6c7c01f4cadd5f8fe28ced947b9c80a2a0515d 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -117,6 +117,10 @@ # include "test-pthread-attr-setaffinity-np.c" #undef main +#define main main_test_sched_getcpu +# include "test-sched_getcpu.c" +#undef main + # if 0 /* * Disable libbabeltrace check for test-all, because the requested @@ -182,6 +186,7 @@ int main(int argc, char *argv[]) main_test_get_cpuid(); main_test_bpf(); main_test_libcrypto(); + main_test_sched_getcpu(); main_test_sdt(); return 0; diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index e04ab89a1013bdee0b4d0e1f887c00fa0f521aec..7598361ef1f10898ea73d944ae0b0c02c4725d99 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -9,6 +9,9 @@ # define __NR_bpf 321 # elif defined(__aarch64__) # define __NR_bpf 280 +# elif defined(__sparc__) +# define __NR_bpf 349 +# else # error __NR_bpf not defined. libbpf does not support your arch. # endif #endif @@ -26,6 +29,7 @@ int main(void) attr.log_size = 0; attr.log_level = 0; attr.kern_version = 0; + attr.prog_flags = 0; /* * Test existence of __NR_bpf and BPF_PROG_LOAD. diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c new file mode 100644 index 0000000000000000000000000000000000000000..9c6b4cbffb1cae2759a524b70d36cee1b8f9f02e --- /dev/null +++ b/tools/build/feature/test-sched_getcpu.c @@ -0,0 +1,9 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include + +int main(void) +{ + return sched_getcpu(); +} diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index 4aa5369ffa4ef295a8841edd380c7f678de80893..d85968cb1bf235feb1352809ac9d7229c152d74a 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -101,9 +101,25 @@ function create_bond_cfg_redhat { echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn } +function del_eth_cfg_ubuntu { + local fn=$cfgdir/interfaces + local tmpfl=$(mktemp) + + local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1 + local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)' + + awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" $fn >$tmpfl + + cp $tmpfl $fn + + rm $tmpfl +} + function create_eth_cfg_ubuntu { local fn=$cfgdir/interfaces + del_eth_cfg_ubuntu $1 + echo $'\n'auto $1 >>$fn echo iface $1 inet manual >>$fn echo bond-master $2 >>$fn @@ -119,6 +135,8 @@ function create_eth_cfg_pri_ubuntu { function create_bond_cfg_ubuntu { local fn=$cfgdir/interfaces + del_eth_cfg_ubuntu $1 + echo $'\n'auto $1 >>$fn echo iface $1 inet dhcp >>$fn echo bond-mode active-backup >>$fn diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h index 2ba78c9f570108ac8284fa96c209ea07498adf09..5e9738f97bf38f471067c7e4e205ed1d6bb5bd78 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v) return __sync_sub_and_fetch(&v->counter, 1) == 0; } +#define cmpxchg(ptr, oldval, newval) \ + __sync_val_compare_and_swap(ptr, oldval, newval) + +static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval) +{ + return cmpxchg(&(v)->counter, oldval, newval); +} + #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 4e3d3d18ebab6271dec637a017620fe80666fd11..9f21fc2b092b834f3badabe0233ed4e8b509444b 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -3,4 +3,10 @@ #include +/* atomic_cmpxchg_relaxed */ +#ifndef atomic_cmpxchg_relaxed +#define atomic_cmpxchg_relaxed atomic_cmpxchg +#define atomic_cmpxchg_release atomic_cmpxchg +#endif /* atomic_cmpxchg_relaxed */ + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/bug.h b/tools/include/linux/bug.h new file mode 100644 index 0000000000000000000000000000000000000000..8e4a4f49135d402ebd73a0b5a586c207c2ed1a86 --- /dev/null +++ b/tools/include/linux/bug.h @@ -0,0 +1,10 @@ +#ifndef _TOOLS_PERF_LINUX_BUG_H +#define _TOOLS_PERF_LINUX_BUG_H + +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) + +#endif /* _TOOLS_PERF_LINUX_BUG_H */ diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 48af2f10a42d00942b5262391f3f576c9856c0e4..825d44f89a2901035cb78a138eabc7175fcf246b 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -12,3 +12,10 @@ #if GCC_VERSION >= 70000 && !defined(__CHECKER__) # define __fallthrough __attribute__ ((fallthrough)) #endif + +#if GCC_VERSION >= 40300 +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* GCC_VERSION >= 40300 */ + +/* &a[0] degrades to a pointer: a different type from an array */ +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 8de163b17c0d00011d33083d247936d8265465e7..23299d7e71602efd2bd317786ca5fc800b2ed4a2 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -5,6 +5,10 @@ #include #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif + /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") @@ -13,6 +17,11 @@ # define __always_inline inline __attribute__((always_inline)) #endif +/* Are two types/vars the same type (ignoring qualifiers)? */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 390d7c9685fd6107c83be2296ead9cb198b571a3..4ce25d43e8e305df8afcfc6fb1747a58fad20c1b 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -208,6 +208,16 @@ .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h index c65cc0aa2659186cf04f91b8376691563c6ae27d..251eabf2a05e91ea4197ffce4541613e2eb92b1d 100644 --- a/tools/include/linux/hashtable.h +++ b/tools/include/linux/hashtable.h @@ -13,10 +13,6 @@ #include #include -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - #define DEFINE_HASHTABLE(name, bits) \ struct hlist_head name[1 << (bits)] = \ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 28607db02bd3ed165cd535de415ffd3d4476753f..73ccc48126bb5f5d0032709af649638eb4dc1cf6 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -4,6 +4,11 @@ #include #include #include +#include + +#ifndef UINT_MAX +#define UINT_MAX (~0U) +#endif #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) @@ -72,6 +77,8 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h index d5677d39c1e4c88c254b0d55b9d0ca5ab505fd92..0325cefc2220b8de2815e88aaada80020b80060c 100644 --- a/tools/include/linux/log2.h +++ b/tools/include/linux/log2.h @@ -12,6 +12,9 @@ #ifndef _TOOLS_LINUX_LOG2_H #define _TOOLS_LINUX_LOG2_H +#include +#include + /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h new file mode 100644 index 0000000000000000000000000000000000000000..a0177c1f55b14c13f742fe38e71667ad4b16650a --- /dev/null +++ b/tools/include/linux/refcount.h @@ -0,0 +1,151 @@ +#ifndef _TOOLS_LINUX_REFCOUNT_H +#define _TOOLS_LINUX_REFCOUNT_H + +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + */ + +#include +#include + +#ifdef NDEBUG +#define REFCOUNT_WARN(cond, str) (void)(cond) +#define __refcount_check +#else +#define REFCOUNT_WARN(cond, str) BUG_ON(cond) +#define __refcount_check __must_check +#endif + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } + +static inline void refcount_set(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +/* + * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + */ +static inline __refcount_check +bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + new = val + 1; + + if (!val) + return false; + + if (unlikely(!new)) + return true; + + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/* + * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object, will WARN when this is not so. + */ +static inline void refcount_inc(refcount_t *r) +{ + REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return false; + + new = val - i; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return !new; +} + +static inline __refcount_check +bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + + +#endif /* _ATOMIC_LINUX_REFCOUNT_H */ diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index f436d2420a18575095e66b7bfbf15b0abbe83fe8..d62b56cf8c12eedcdda935bc1180c4b4b70270b9 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -18,4 +18,6 @@ extern size_t strlcpy(char *dest, const char *src, size_t size); char *str_error_r(int errnum, char *buf, size_t buflen); +int prefixcmp(const char *str, const char *prefix); + #endif /* _LINUX_STRING_H_ */ diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index c24b3e3ae29691d404ad36bc56bdea3497ea0de7..77a28a26a6709fe3fe90026acfd9936849824890 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -7,6 +7,7 @@ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #include +#include struct page; struct kmem_cache; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0539a0ceef38155835552360667070552ebce641..94dfa9def355f7f52805bba5ade3a32c304f989a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,7 @@ enum bpf_cmd { BPF_OBJ_GET, BPF_PROG_ATTACH, BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, }; enum bpf_map_type { @@ -96,6 +97,8 @@ enum bpf_map_type { BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_MAP_TYPE_LPM_TRIE, + BPF_MAP_TYPE_ARRAY_OF_MAPS, + BPF_MAP_TYPE_HASH_OF_MAPS, }; enum bpf_prog_type { @@ -129,6 +132,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -152,6 +162,7 @@ union bpf_attr { __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ __u32 map_flags; /* prealloc or not */ + __u32 inner_map_fd; /* fd pointing to the inner map */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -173,6 +184,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -186,6 +198,17 @@ union bpf_attr { __u32 attach_type; __u32 attach_flags; }; + + struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ + __u32 prog_fd; + __u32 retval; + __u32 data_size_in; + __u32 data_size_out; + __aligned_u64 data_in; + __aligned_u64 data_out; + __u32 repeat; + __u32 duration; + } test; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -456,6 +479,17 @@ union bpf_attr { * Return: * > 0 length of the string including the trailing NUL on success * < 0 error + * + * u64 bpf_get_socket_cookie(skb) + * Get the cookie for the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: 8 Bytes non-decreasing number on success or 0 if the socket + * field is missing inside sk_buff + * + * u32 bpf_get_socket_uid(skb) + * Get the owner uid of the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -503,7 +537,9 @@ union bpf_attr { FN(get_numa_node_id), \ FN(skb_change_head), \ FN(xdp_adjust_head), \ - FN(probe_read_str), + FN(probe_read_str), \ + FN(get_socket_cookie), \ + FN(get_socket_uid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -574,6 +610,7 @@ struct __sk_buff { __u32 tc_classid; __u32 data; __u32 data_end; + __u32 napi_id; }; struct bpf_tunnel_key { diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h new file mode 100644 index 0000000000000000000000000000000000000000..813afd6eee713e68fa4b88e6aabf6aecce1993e1 --- /dev/null +++ b/tools/include/uapi/linux/fcntl.h @@ -0,0 +1,72 @@ +#ifndef _UAPI_LINUX_FCNTL_H +#define _UAPI_LINUX_FCNTL_H + +#include + +#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) +#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) + +/* + * Cancel a blocking posix lock; internal use only until we expose an + * asynchronous lock api to userspace: + */ +#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5) + +/* Create a file descriptor with FD_CLOEXEC set. */ +#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6) + +/* + * Request nofications on a directory. + * See below for events that may be notified. + */ +#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2) + +/* + * Set and get of pipe page size array + */ +#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) + +/* + * Set/Get seals + */ +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +/* + * Types of seals + */ +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* (1U << 31) is reserved for signed error codes */ + +/* + * Types of directory notifications that may be requested. + */ +#define DN_ACCESS 0x00000001 /* File accessed */ +#define DN_MODIFY 0x00000002 /* File modified */ +#define DN_CREATE 0x00000004 /* File created */ +#define DN_DELETE 0x00000008 /* File removed */ +#define DN_RENAME 0x00000010 /* File renamed */ +#define DN_ATTRIB 0x00000020 /* File changed attibutes */ +#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ + +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ + +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + + +#endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c66a485a24ac81e324ea53ea17b405a3c73b520a..b1c0b187acfe57da3ece7e91325536edcc9cff0c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events; /* wakeup every n events */ @@ -610,6 +611,23 @@ struct perf_event_header { __u16 size; }; +struct perf_ns_link_info { + __u64 dev; + __u64 ino; +}; + +enum { + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +880,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * u64 nr_namespaces; + * { u64 dev, inode; } [nr_namespaces]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -885,12 +915,14 @@ enum perf_callchain_context { */ #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ #define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { @@ -902,6 +934,21 @@ union perf_mem_data_src { mem_rsvd:31; }; }; +#elif defined(__BIG_ENDIAN_BITFIELD) +union perf_mem_data_src { + __u64 val; + struct { + __u64 mem_rsvd:31, + mem_dtlb:7, /* tlb access */ + mem_lock:2, /* lock instr */ + mem_snoop:5, /* snoop mode */ + mem_lvl:14, /* memory hierarchy level */ + mem_op:5; /* type of opcode */ + }; +}; +#else +#error "Unknown endianness" +#endif /* type of opcode (load/store/prefetch,code) */ #define PERF_MEM_OP_NA 0x01 /* not available */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h new file mode 100644 index 0000000000000000000000000000000000000000..17b10304c393355da9ed2da743107a5c59748290 --- /dev/null +++ b/tools/include/uapi/linux/stat.h @@ -0,0 +1,173 @@ +#ifndef _UAPI_LINUX_STAT_H +#define _UAPI_LINUX_STAT_H + +#include + +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) + +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 + +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) + +#define S_IRWXU 00700 +#define S_IRUSR 00400 +#define S_IWUSR 00200 +#define S_IXUSR 00100 + +#define S_IRWXG 00070 +#define S_IRGRP 00040 +#define S_IWGRP 00020 +#define S_IXGRP 00010 + +#define S_IRWXO 00007 +#define S_IROTH 00004 +#define S_IWOTH 00002 +#define S_IXOTH 00001 + +#endif + +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __u32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ + +/* + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + + +#endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 581278c5848877ea1a90cfdbf8045afd23298c0c..8f74ed8e72372994213d6cca31e4313edb6a2501 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -30,8 +30,8 @@ import fcntl import resource import struct import re +import subprocess from collections import defaultdict -from time import sleep VMX_EXIT_REASONS = { 'EXCEPTION_NMI': 0, @@ -225,6 +225,7 @@ IOCTL_NUMBERS = { 'RESET': 0x00002403, } + class Arch(object): """Encapsulates global architecture specific data. @@ -255,12 +256,14 @@ class Arch(object): return ArchX86(SVM_EXIT_REASONS) return + class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS self.exit_reasons = exit_reasons + class ArchPPC(Arch): def __init__(self): self.sc_perf_evt_open = 319 @@ -275,12 +278,14 @@ class ArchPPC(Arch): self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 self.exit_reasons = {} + class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS self.exit_reasons = AARCH64_EXIT_REASONS + class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 @@ -316,6 +321,61 @@ def parse_int_list(list_string): return integers +def get_pid_from_gname(gname): + """Fuzzy function to convert guest name to QEMU process pid. + + Returns a list of potential pids, can be empty if no match found. + Throws an exception on processing errors. + + """ + pids = [] + try: + child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'], + stdout=subprocess.PIPE) + except: + raise Exception + for line in child.stdout: + line = line.lstrip().split(' ', 1) + # perform a sanity check before calling the more expensive + # function to possibly extract the guest name + if ' -name ' in line[1] and gname == get_gname_from_pid(line[0]): + pids.append(int(line[0])) + child.stdout.close() + + return pids + + +def get_gname_from_pid(pid): + """Returns the guest name for a QEMU process pid. + + Extracts the guest name from the QEMU comma line by processing the '-name' + option. Will also handle names specified out of sequence. + + """ + name = '' + try: + line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0') + parms = line[line.index('-name') + 1].split(',') + while '' in parms: + # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in + # ['foo', '', 'bar'], which we revert here + idx = parms.index('') + parms[idx - 1] += ',' + parms[idx + 1] + del parms[idx:idx+2] + # the '-name' switch allows for two ways to specify the guest name, + # where the plain name overrides the name specified via 'guest=' + for arg in parms: + if '=' not in arg: + name = arg + break + if arg[:6] == 'guest=': + name = arg[6:] + except (ValueError, IOError, IndexError): + pass + + return name + + def get_online_cpus(): """Returns a list of cpu id integers.""" with open('/sys/devices/system/cpu/online') as cpu_list: @@ -342,6 +402,7 @@ def get_filters(): libc = ctypes.CDLL('libc.so.6', use_errno=True) syscall = libc.syscall + class perf_event_attr(ctypes.Structure): """Struct that holds the necessary data to set up a trace event. @@ -370,6 +431,7 @@ class perf_event_attr(ctypes.Structure): self.size = ctypes.sizeof(self) self.read_format = PERF_FORMAT_GROUP + def perf_event_open(attr, pid, cpu, group_fd, flags): """Wrapper for the sys_perf_evt_open() syscall. @@ -395,6 +457,7 @@ PERF_FORMAT_GROUP = 1 << 3 PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' + class Group(object): """Represents a perf event group.""" @@ -427,6 +490,7 @@ class Group(object): struct.unpack(read_format, os.read(self.events[0].fd, length)))) + class Event(object): """Represents a performance event and manages its life cycle.""" def __init__(self, name, group, trace_cpu, trace_pid, trace_point, @@ -510,6 +574,7 @@ class Event(object): """Resets the count of the trace event in the kernel.""" fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) + class TracepointProvider(object): """Data provider for the stats class. @@ -551,6 +616,7 @@ class TracepointProvider(object): def setup_traces(self): """Creates all event and group objects needed to be able to retrieve data.""" + fields = self.get_available_fields() if self._pid > 0: # Fetch list of all threads of the monitored pid, as qemu # starts a thread for each vcpu. @@ -561,7 +627,7 @@ class TracepointProvider(object): # The constant is needed as a buffer for python libs, std # streams and other files that the script opens. - newlim = len(groupids) * len(self._fields) + 50 + newlim = len(groupids) * len(fields) + 50 try: softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) @@ -577,7 +643,7 @@ class TracepointProvider(object): for groupid in groupids: group = Group() - for name in self._fields: + for name in fields: tracepoint = name tracefilter = None match = re.match(r'(.*)\((.*)\)', name) @@ -650,13 +716,23 @@ class TracepointProvider(object): ret[name] += val return ret + def reset(self): + """Reset all field counters""" + for group in self.group_leaders: + for event in group.events: + event.reset() + + class DebugfsProvider(object): """Provides data from the files that KVM creates in the kvm debugfs folder.""" def __init__(self): self._fields = self.get_available_fields() + self._baseline = {} self._pid = 0 self.do_read = True + self.paths = [] + self.reset() def get_available_fields(self): """"Returns a list of available fields. @@ -673,6 +749,7 @@ class DebugfsProvider(object): @fields.setter def fields(self, fields): self._fields = fields + self.reset() @property def pid(self): @@ -690,10 +767,11 @@ class DebugfsProvider(object): self.paths = filter(lambda x: "{}-".format(pid) in x, vms) else: - self.paths = [''] + self.paths = [] self.do_read = True + self.reset() - def read(self): + def read(self, reset=0): """Returns a dict with format:'file name / field -> current value'.""" results = {} @@ -701,10 +779,22 @@ class DebugfsProvider(object): if not self.do_read: return results - for path in self.paths: + paths = self.paths + if self._pid == 0: + paths = [] + for entry in os.walk(PATH_DEBUGFS_KVM): + for dir in entry[1]: + paths.append(dir) + for path in paths: for field in self._fields: - results[field] = results.get(field, 0) \ - + self.read_field(field, path) + value = self.read_field(field, path) + key = path + field + if reset: + self._baseline[key] = value + if self._baseline.get(key, -1) == -1: + self._baseline[key] = value + results[field] = (results.get(field, 0) + value - + self._baseline.get(key, 0)) return results @@ -718,6 +808,12 @@ class DebugfsProvider(object): except IOError: return 0 + def reset(self): + """Reset field counters""" + self._baseline = {} + self.read(1) + + class Stats(object): """Manages the data providers and the data they provide. @@ -753,14 +849,20 @@ class Stats(object): for provider in self.providers: provider.pid = self._pid_filter + def reset(self): + self.values = {} + for provider in self.providers: + provider.reset() + @property def fields_filter(self): return self._fields_filter @fields_filter.setter def fields_filter(self, fields_filter): - self._fields_filter = fields_filter - self.update_provider_filters() + if fields_filter != self._fields_filter: + self._fields_filter = fields_filter + self.update_provider_filters() @property def pid_filter(self): @@ -768,9 +870,10 @@ class Stats(object): @pid_filter.setter def pid_filter(self, pid): - self._pid_filter = pid - self.values = {} - self.update_provider_pid() + if pid != self._pid_filter: + self._pid_filter = pid + self.values = {} + self.update_provider_pid() def get(self): """Returns a dict with field -> (value, delta to last value) of all @@ -778,23 +881,26 @@ class Stats(object): for provider in self.providers: new = provider.read() for key in provider.fields: - oldval = self.values.get(key, (0, 0)) + oldval = self.values.get(key, (0, 0))[0] newval = new.get(key, 0) - newdelta = None - if oldval is not None: - newdelta = newval - oldval[0] + newdelta = newval - oldval self.values[key] = (newval, newdelta) return self.values LABEL_WIDTH = 40 NUMBER_WIDTH = 10 +DELAY_INITIAL = 0.25 +DELAY_REGULAR = 3.0 +MAX_GUEST_NAME_LEN = 48 +MAX_REGEX_LEN = 44 +DEFAULT_REGEX = r'^[^\(]*$' + class Tui(object): """Instruments curses to draw a nice text ui.""" def __init__(self, stats): self.stats = stats self.screen = None - self.drilldown = False self.update_drilldown() def __enter__(self): @@ -809,7 +915,14 @@ class Tui(object): # return from C start_color() is ignorable. try: curses.start_color() - except: + except curses.error: + pass + + # Hide cursor in extra statement as some monochrome terminals + # might support hiding but not colors. + try: + curses.curs_set(0) + except curses.error: pass curses.use_default_colors() @@ -827,36 +940,60 @@ class Tui(object): def update_drilldown(self): """Sets or removes a filter that only allows fields without braces.""" if not self.stats.fields_filter: - self.stats.fields_filter = r'^[^\(]*$' + self.stats.fields_filter = DEFAULT_REGEX - elif self.stats.fields_filter == r'^[^\(]*$': + elif self.stats.fields_filter == DEFAULT_REGEX: self.stats.fields_filter = None def update_pid(self, pid): """Propagates pid selection to stats object.""" self.stats.pid_filter = pid - def refresh(self, sleeptime): - """Refreshes on-screen data.""" + def refresh_header(self, pid=None): + """Refreshes the header.""" + if pid is None: + pid = self.stats.pid_filter self.screen.erase() - if self.stats.pid_filter > 0: - self.screen.addstr(0, 0, 'kvm statistics - pid {0}' - .format(self.stats.pid_filter), - curses.A_BOLD) + gname = get_gname_from_pid(pid) + if gname: + gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' + if len(gname) > MAX_GUEST_NAME_LEN + else gname)) + if pid > 0: + self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}' + .format(pid, gname), curses.A_BOLD) else: self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + if self.stats.fields_filter and self.stats.fields_filter \ + != DEFAULT_REGEX: + regex = self.stats.fields_filter + if len(regex) > MAX_REGEX_LEN: + regex = regex[:MAX_REGEX_LEN] + '...' + self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex)) self.screen.addstr(2, 1, 'Event') self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - len('Total'), 'Total') - self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 - + len('%Total'), '%Total') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 7 + 8 - len('Current'), 'Current') + self.screen.addstr(4, 1, 'Collecting data...') + self.screen.refresh() + + def refresh_body(self, sleeptime): row = 3 + self.screen.move(row, 0) + self.screen.clrtobot() stats = self.stats.get() + def sortkey(x): if stats[x][1]: return (-stats[x][1], -stats[x][0]) else: return (0, -stats[x][0]) + total = 0. + for val in stats.values(): + total += val[0] for key in sorted(stats.keys(), key=sortkey): if row >= self.screen.getmaxyx()[0]: @@ -869,6 +1006,8 @@ class Tui(object): col += LABEL_WIDTH self.screen.addstr(row, col, '%10d' % (values[0],)) col += NUMBER_WIDTH + self.screen.addstr(row, col, '%7.1f' % (values[0] * 100 / total,)) + col += 7 if values[1] is not None: self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) row += 1 @@ -893,20 +1032,24 @@ class Tui(object): regex = self.screen.getstr() curses.noecho() if len(regex) == 0: + self.stats.fields_filter = DEFAULT_REGEX + self.refresh_header() return try: re.compile(regex) self.stats.fields_filter = regex + self.refresh_header() return except re.error: continue - def show_vm_selection(self): + def show_vm_selection_by_pid(self): """Draws PID selection mask. Asks for a pid until a valid pid or 0 has been entered. """ + msg = '' while True: self.screen.erase() self.screen.addstr(0, 0, @@ -915,6 +1058,7 @@ class Tui(object): self.screen.addstr(1, 0, 'This might limit the shown data to the trace ' 'statistics.') + self.screen.addstr(5, 0, msg) curses.echo() self.screen.addstr(3, 0, "Pid [0 or pid]: ") @@ -922,60 +1066,128 @@ class Tui(object): curses.noecho() try: - pid = int(pid) - - if pid == 0: - self.update_pid(pid) - break - else: - if not os.path.isdir(os.path.join('/proc/', str(pid))): + if len(pid) > 0: + pid = int(pid) + if pid != 0 and not os.path.isdir(os.path.join('/proc/', + str(pid))): + msg = '"' + str(pid) + '": Not a running process' continue - else: - self.update_pid(pid) - break + else: + pid = 0 + self.refresh_header(pid) + self.update_pid(pid) + break except ValueError: + msg = '"' + str(pid) + '": Not a valid pid' continue + def show_vm_selection_by_guest_name(self): + """Draws guest selection mask. + + Asks for a guest name until a valid guest name or '' is entered. + + """ + msg = '' + while True: + self.screen.erase() + self.screen.addstr(0, 0, + 'Show statistics for specific guest.', + curses.A_BOLD) + self.screen.addstr(1, 0, + 'This might limit the shown data to the trace ' + 'statistics.') + self.screen.addstr(5, 0, msg) + curses.echo() + self.screen.addstr(3, 0, "Guest [ENTER or guest]: ") + gname = self.screen.getstr() + curses.noecho() + + if not gname: + self.refresh_header(0) + self.update_pid(0) + break + else: + pids = [] + try: + pids = get_pid_from_gname(gname) + except: + msg = '"' + gname + '": Internal error while searching, ' \ + 'use pid filter instead' + continue + if len(pids) == 0: + msg = '"' + gname + '": Not an active guest' + continue + if len(pids) > 1: + msg = '"' + gname + '": Multiple matches found, use pid ' \ + 'filter instead' + continue + self.refresh_header(pids[0]) + self.update_pid(pids[0]) + break + def show_stats(self): """Refreshes the screen and processes user input.""" - sleeptime = 0.25 + sleeptime = DELAY_INITIAL + self.refresh_header() while True: - self.refresh(sleeptime) + self.refresh_body(sleeptime) curses.halfdelay(int(sleeptime * 10)) - sleeptime = 3 + sleeptime = DELAY_REGULAR try: char = self.screen.getkey() if char == 'x': - self.drilldown = not self.drilldown + self.refresh_header() self.update_drilldown() + sleeptime = DELAY_INITIAL if char == 'q': break + if char == 'c': + self.stats.fields_filter = DEFAULT_REGEX + self.refresh_header(0) + self.update_pid(0) + sleeptime = DELAY_INITIAL if char == 'f': self.show_filter_selection() + sleeptime = DELAY_INITIAL + if char == 'g': + self.show_vm_selection_by_guest_name() + sleeptime = DELAY_INITIAL if char == 'p': - self.show_vm_selection() + self.show_vm_selection_by_pid() + sleeptime = DELAY_INITIAL + if char == 'r': + self.refresh_header() + self.stats.reset() + sleeptime = DELAY_INITIAL except KeyboardInterrupt: break except curses.error: continue + def batch(stats): """Prints statistics in a key, value format.""" - s = stats.get() - time.sleep(1) - s = stats.get() - for key in sorted(s.keys()): - values = s[key] - print '%-42s%10d%10d' % (key, values[0], values[1]) + try: + s = stats.get() + time.sleep(1) + s = stats.get() + for key in sorted(s.keys()): + values = s[key] + print '%-42s%10d%10d' % (key, values[0], values[1]) + except KeyboardInterrupt: + pass + def log(stats): """Prints statistics as reiterating key block, multiple value blocks.""" keys = sorted(stats.get().iterkeys()) + def banner(): for k in keys: print '%s' % k, print + def statline(): s = stats.get() for k in keys: @@ -984,11 +1196,15 @@ def log(stats): line = 0 banner_repeat = 20 while True: - time.sleep(1) - if line % banner_repeat == 0: - banner() - statline() - line += 1 + try: + time.sleep(1) + if line % banner_repeat == 0: + banner() + statline() + line += 1 + except KeyboardInterrupt: + break + def get_options(): """Returns processed program arguments.""" @@ -1009,6 +1225,16 @@ Requirements: CAP_SYS_ADMIN and perf events are used. - CAP_SYS_RESOURCE if the hard limit is not high enough to allow the large number of files that are possibly opened. + +Interactive Commands: + c clear filter + f filter by regular expression + g filter by guest name + p filter by PID + q quit + x toggle reporting of stats for individual child trace events + r reset stats +Press any other key to refresh statistics immediately. """ class PlainHelpFormatter(optparse.IndentedHelpFormatter): @@ -1018,6 +1244,22 @@ Requirements: else: return "" + def cb_guest_to_pid(option, opt, val, parser): + try: + pids = get_pid_from_gname(val) + except: + raise optparse.OptionValueError('Error while searching for guest ' + '"{}", use "-p" to specify a pid ' + 'instead'.format(val)) + if len(pids) == 0: + raise optparse.OptionValueError('No guest by the name "{}" ' + 'found'.format(val)) + if len(pids) > 1: + raise optparse.OptionValueError('Multiple processes found (pids: ' + '{}) - use "-p" to specify a pid ' + 'instead'.format(" ".join(pids))) + parser.values.pid = pids[0] + optparser = optparse.OptionParser(description=description_text, formatter=PlainHelpFormatter()) optparser.add_option('-1', '--once', '--batch', @@ -1051,15 +1293,24 @@ Requirements: help='fields to display (regex)', ) optparser.add_option('-p', '--pid', - action='store', - default=0, - type=int, - dest='pid', - help='restrict statistics to pid', - ) + action='store', + default=0, + type='int', + dest='pid', + help='restrict statistics to pid', + ) + optparser.add_option('-g', '--guest', + action='callback', + type='string', + dest='pid', + metavar='GUEST', + help='restrict statistics to guest by name', + callback=cb_guest_to_pid, + ) (options, _) = optparser.parse_args(sys.argv) return options + def get_providers(options): """Returns a list of data providers depending on the passed options.""" providers = [] @@ -1073,6 +1324,7 @@ def get_providers(options): return providers + def check_access(options): """Exits if the current user can't access all needed directories.""" if not os.path.exists('/sys/kernel/debug'): @@ -1086,8 +1338,8 @@ def check_access(options): "Also ensure, that the kvm modules are loaded.\n") sys.exit(1) - if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints - or not options.debugfs): + if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or + not options.debugfs): sys.stderr.write("Please enable CONFIG_TRACING in your kernel " "when using the option -t (default).\n" "If it is enabled, make {0} readable by the " @@ -1098,10 +1350,11 @@ def check_access(options): sys.stderr.write("Falling back to debugfs statistics!\n") options.debugfs = True - sleep(5) + time.sleep(5) return options + def main(): options = get_options() options = check_access(options) diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index b92a153d7115c00bfbcf20b54858f2a1d79477d0..109431bdc63c991fa355d89d8ef7057a285ecca5 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -18,11 +18,33 @@ state transitions such as guest mode entry and exit. This tool is useful for observing guest behavior from the host perspective. Often conclusions about performance or buggy behavior can be drawn from the output. +While running in regular mode, use any of the keys listed in section +'Interactive Commands' below. +Use batch and logging modes for scripting purposes. The set of KVM kernel module trace events may be specific to the kernel version or architecture. It is best to check the KVM kernel module source code for the meaning of events. +INTERACTIVE COMMANDS +-------------------- +[horizontal] +*c*:: clear filter + +*f*:: filter by regular expression + +*g*:: filter by guest name + +*p*:: filter by PID + +*q*:: quit + +*r*:: reset stats + +*x*:: toggle reporting of stats for child trace events + +Press any other key to refresh statistics immediately. + OPTIONS ------- -1:: @@ -46,6 +68,10 @@ OPTIONS --pid=:: limit statistics to one virtual machine (pid) +-g:: +--guest=:: + limit statistics to one virtual machine (guest name) + -f:: --fields=:: fields to display (regex) diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 5d19fdf80292c226769a91ccef519a47b3788b2b..897cd6f3f68720342f7b11ac6e20b4e6e027bad6 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -3339,7 +3339,7 @@ int main(int argc, char *argv[]) * simple, single region. */ boot->e820_entries = 1; - boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM }); + boot->e820_table[0] = ((struct e820_entry) { 0, mem, E820_TYPE_RAM }); /* * The boot header contains a command line pointer: we put the command * line after the boot header. diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 4b6bfc43cccf0e096c3c88e272afa1491fa69e97..809c7721cd2451a5e1a8e3bbe5a3994965106346 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) return filename__read_str(path, buf, sizep); } +int sysfs__read_bool(const char *entry, bool *value) +{ + char *buf; + size_t size; + int ret; + + ret = sysfs__read_str(entry, &buf, &size); + if (ret < 0) + return ret; + + switch (buf[0]) { + case '1': + case 'y': + case 'Y': + *value = true; + break; + case '0': + case 'n': + case 'N': + *value = false; + break; + default: + ret = -1; + } + + free(buf); + + return ret; +} int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 6b332dc74498fc35a1f26b0488391e12435eb269..956c21127d1ef7d9817c0bbb8a9423ff4bdf1de8 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); int sysfs__read_str(const char *entry, char **buf, size_t *sizep); +int sysfs__read_bool(const char *entry, bool *value); #endif /* __API_FS__ */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 207c2eeddab064d7c304efed09653ba6e227d6e9..6e178987af8e3f45d63da349e38c39b4e4cd6d84 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -37,6 +37,8 @@ # define __NR_bpf 321 # elif defined(__aarch64__) # define __NR_bpf 280 +# elif defined(__sparc__) +# define __NR_bpf 349 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif @@ -69,6 +71,23 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } +int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, __u32 map_flags) +{ + union bpf_attr attr; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = 4; + attr.inner_map_fd = inner_map_fd; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz) @@ -98,6 +117,28 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; + attr.insns = ptr_to_u64(insns); + attr.license = ptr_to_u64(license); + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = 2; + log_buf[0] = 0; + attr.kern_version = kern_version; + attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + + return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { @@ -192,3 +233,27 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } + +int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, + void *data_out, __u32 *size_out, __u32 *retval, + __u32 *duration) +{ + union bpf_attr attr; + int ret; + + bzero(&attr, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.data_in = ptr_to_u64(data); + attr.test.data_out = ptr_to_u64(data_out); + attr.test.data_size_in = size; + attr.test.repeat = repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + if (size_out) + *size_out = attr.test.data_size_out; + if (retval) + *retval = attr.test.retval; + if (duration) + *duration = attr.test.duration; + return ret; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 09c3dcac04963e6973d7a002e54b6a139af6212a..972bd8333eb72e982420abeac9f8ceff5a4ed1a3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -26,6 +26,8 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); +int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 @@ -33,6 +35,10 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -45,6 +51,8 @@ int bpf_obj_get(const char *pathname); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); - +int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, + void *data_out, __u32 *size_out, __u32 *retval, + __u32 *duration); #endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ac6eb863b2a40df00c4ead9f48c872ab67949c19..1a2c07eb7795bb4fb43e4a97bdc721d7cbc7f3b8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1618,8 +1618,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n) return fd; } -static void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type) +void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { prog->type = type; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index b30394f9947a35356af870223664a4409f8e5fd6..32c7252f734e42f9895c4686c4236f4b259d99a3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -25,6 +25,7 @@ #include #include #include // for size_t +#include enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -185,6 +186,7 @@ int bpf_program__set_sched_cls(struct bpf_program *prog); int bpf_program__set_sched_act(struct bpf_program *prog); int bpf_program__set_xdp(struct bpf_program *prog); int bpf_program__set_perf_event(struct bpf_program *prog); +void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); bool bpf_program__is_socket_filter(struct bpf_program *prog); bool bpf_program__is_tracepoint(struct bpf_program *prog); diff --git a/tools/lib/string.c b/tools/lib/string.c index bd239bc1d557dbde447e8128ac7cd3163dc11a04..8e678af1c6ee479eae333cf5766598db5dce5208 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -87,3 +87,12 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size) } return ret; } + +int prefixcmp(const char *str, const char *prefix) +{ + for (; ; str++, prefix++) + if (!*prefix) + return 0; + else if (*str != *prefix) + return (unsigned char)*prefix - (unsigned char)*str; +} diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index e228c3cb37160b8fd5bda5e3a7c9e76f89ea4537..ba970a73d053f3c019a318dbb7fa14b9dbab9b40 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/lib/subcmd/help.h b/tools/lib/subcmd/help.h index e145a020780c0d5dc03c39b60205bb7d782dfcf7..9bd4223dc722013bbfd170d652064f5b056cc07d 100644 --- a/tools/lib/subcmd/help.h +++ b/tools/lib/subcmd/help.h @@ -2,6 +2,7 @@ #define __SUBCMD_HELP_H #include +#include struct cmdnames { size_t alloc; diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 6bc24025d05457098b504c3a1e9e5a1316b21817..359bfa77f39cdebac7df82876b227d1a63975c39 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index fc2e45d8aaf1d04c09d53fd4db6478d60d066895..8fa5f036eff08002ee802dc6014f6689d790a80f 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -79,13 +79,4 @@ static inline void astrcat(char **out, const char *add) free(tmp); } -static inline int prefixcmp(const char *str, const char *prefix) -{ - for (; ; str++, prefix++) - if (!*prefix) - return 0; - else if (*str != *prefix) - return (unsigned char)*prefix - (unsigned char)*str; -} - #endif /* __SUBCMD_UTIL_H */ diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c index 5e431077fcd6784ac9567a785bc6fb90a40bc7bb..d270ac00613d60612802e85cd71661a54c762f41 100644 --- a/tools/lib/symbol/kallsyms.c +++ b/tools/lib/symbol/kallsyms.c @@ -1,3 +1,4 @@ +#include #include "symbol/kallsyms.h" #include #include diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index 544b05a53b7057bbe9092e31f600065d47e3b24e..ad572e6cdbd0f47cfa947f18451a253977ff6c64 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -229,6 +229,7 @@ static void usage(void) { printf("Usage: bpf_jit_disasm [...]\n"); printf(" -o Also display related opcodes (default: off).\n"); + printf(" -O Write binary image of code to file, don't disassemble to stdout.\n"); printf(" -f Read last image dump from file or stdin (default: klog).\n"); printf(" -h Display this help.\n"); } @@ -238,12 +239,19 @@ int main(int argc, char **argv) unsigned int len, klen, opt, opcodes = 0; static uint8_t image[32768]; char *kbuff, *file = NULL; + char *ofile = NULL; + int ofd; + ssize_t nr; + uint8_t *pos; - while ((opt = getopt(argc, argv, "of:")) != -1) { + while ((opt = getopt(argc, argv, "of:O:")) != -1) { switch (opt) { case 'o': opcodes = 1; break; + case 'O': + ofile = optarg; + break; case 'f': file = optarg; break; @@ -263,11 +271,35 @@ int main(int argc, char **argv) } len = get_last_jit_image(kbuff, klen, image, sizeof(image)); - if (len > 0) - get_asm_insns(image, len, opcodes); - else + if (len <= 0) { fprintf(stderr, "No JIT image found!\n"); + goto done; + } + if (!ofile) { + get_asm_insns(image, len, opcodes); + goto done; + } + + ofd = open(ofile, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE); + if (ofd < 0) { + fprintf(stderr, "Could not open file %s for writing: ", ofile); + perror(NULL); + goto done; + } + pos = image; + do { + nr = write(ofd, pos, len); + if (nr < 0) { + fprintf(stderr, "Could not write data to %s: ", ofile); + perror(NULL); + goto done; + } + len -= nr; + pos += nr; + } while (len); + close(ofd); +done: put_log_buff(kbuff); return 0; } diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 066086dd59a8017e293993a50d2f432d47441cfe..5f66697fe1e09a81b9fa8c5cb37c54331cf5888e 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -36,8 +36,7 @@ #include "warn.h" #include - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#include #define STATE_FP_SAVED 0x1 #define STATE_FP_SETUP 0x2 @@ -193,7 +192,8 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "complete_and_exit", "kvm_spurious_fault", "__reiserfs_panic", - "lbug_with_loc" + "lbug_with_loc", + "fortify_panic", }; if (func->bind == STB_WEAK) diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 46c326db4f464e4bef4a073c93c52c61c4262e77..ecc5b1b5d15df6fddd1fe27d929f92f5111b8894 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -31,11 +31,10 @@ #include #include #include +#include #include "builtin.h" -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) - struct cmd_struct { const char *name; int (*fn)(int, const char **); diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c new file mode 100644 index 0000000000000000000000000000000000000000..ad54a58d7dda0e82f3bb46f95e6a5f9af8c9a42e --- /dev/null +++ b/tools/pci/pcitest.c @@ -0,0 +1,186 @@ +/** + * Userspace PCI Endpoint Test Module + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define BILLION 1E9 + +static char *result[] = { "NOT OKAY", "OKAY" }; + +struct pci_test { + char *device; + char barnum; + bool legacyirq; + unsigned int msinum; + bool read; + bool write; + bool copy; + unsigned long size; +}; + +static int run_test(struct pci_test *test) +{ + long ret; + int fd; + struct timespec start, end; + double time; + + fd = open(test->device, O_RDWR); + if (fd < 0) { + perror("can't open PCI Endpoint Test device"); + return fd; + } + + if (test->barnum >= 0 && test->barnum <= 5) { + ret = ioctl(fd, PCITEST_BAR, test->barnum); + fprintf(stdout, "BAR%d:\t\t", test->barnum); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + if (test->legacyirq) { + ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0); + fprintf(stdout, "LEGACY IRQ:\t"); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + if (test->msinum > 0 && test->msinum <= 32) { + ret = ioctl(fd, PCITEST_MSI, test->msinum); + fprintf(stdout, "MSI%d:\t\t", test->msinum); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + if (test->write) { + ret = ioctl(fd, PCITEST_WRITE, test->size); + fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + if (test->read) { + ret = ioctl(fd, PCITEST_READ, test->size); + fprintf(stdout, "READ (%7ld bytes):\t\t", test->size); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + if (test->copy) { + ret = ioctl(fd, PCITEST_COPY, test->size); + fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size); + if (ret < 0) + fprintf(stdout, "TEST FAILED\n"); + else + fprintf(stdout, "%s\n", result[ret]); + } + + fflush(stdout); +} + +int main(int argc, char **argv) +{ + int c; + struct pci_test *test; + + test = calloc(1, sizeof(*test)); + if (!test) { + perror("Fail to allocate memory for pci_test\n"); + return -ENOMEM; + } + + /* since '0' is a valid BAR number, initialize it to -1 */ + test->barnum = -1; + + /* set default size as 100KB */ + test->size = 0x19000; + + /* set default endpoint device */ + test->device = "/dev/pci-endpoint-test.0"; + + while ((c = getopt(argc, argv, "D:b:m:lrwcs:")) != EOF) + switch (c) { + case 'D': + test->device = optarg; + continue; + case 'b': + test->barnum = atoi(optarg); + if (test->barnum < 0 || test->barnum > 5) + goto usage; + continue; + case 'l': + test->legacyirq = true; + continue; + case 'm': + test->msinum = atoi(optarg); + if (test->msinum < 1 || test->msinum > 32) + goto usage; + continue; + case 'r': + test->read = true; + continue; + case 'w': + test->write = true; + continue; + case 'c': + test->copy = true; + continue; + case 's': + test->size = strtoul(optarg, NULL, 0); + continue; + case '?': + case 'h': + default: +usage: + fprintf(stderr, + "usage: %s [options]\n" + "Options:\n" + "\t-D PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n" + "\t-b BAR test (bar number between 0..5)\n" + "\t-m MSI test (msi number between 1..32)\n" + "\t-r Read buffer test\n" + "\t-w Write buffer test\n" + "\t-c Copy buffer test\n" + "\t-s Size of buffer {default: 100KB}\n", + argv[0]); + return -EINVAL; + } + + run_test(test); + return 0; +} diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh new file mode 100644 index 0000000000000000000000000000000000000000..5442bbea4c229f5b4f871faa12c625348af29d36 --- /dev/null +++ b/tools/pci/pcitest.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +echo "BAR tests" +echo + +bar=0 + +while [ $bar -lt 6 ] +do + pcitest -b $bar + bar=`expr $bar + 1` +done +echo + +echo "Interrupt tests" +echo + +pcitest -l +msi=1 + +while [ $msi -lt 33 ] +do + pcitest -m $msi + msi=`expr $msi + 1` +done +echo + +echo "Read Tests" +echo + +pcitest -r -s 1 +pcitest -r -s 1024 +pcitest -r -s 1025 +pcitest -r -s 1024000 +pcitest -r -s 1024001 +echo + +echo "Write Tests" +echo + +pcitest -w -s 1 +pcitest -w -s 1024 +pcitest -w -s 1025 +pcitest -w -s 1024000 +pcitest -w -s 1024001 +echo + +echo "Copy Tests" +echo + +pcitest -c -s 1 +pcitest -c -s 1024 +pcitest -c -s 1025 +pcitest -c -s 1024000 +pcitest -c -s 1024001 +echo diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 3db3db9278be6f52af6bb9935eb7698eb1fc2f79..643cc4ba6872511d7941fad057ac875f5382deb2 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -31,3 +31,5 @@ config.mak.autogen .config-detected util/intel-pt-decoder/inat-tables.c arch/*/include/generated/ +pmu-events/pmu-events.c +pmu-events/jevents diff --git a/tools/perf/Build b/tools/perf/Build index 9b79f8d7db50e3dafcbe200ad8f3b47230ead69b..bd8eeb60533cdee3311dd6277b728990e4e70042 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -50,5 +50,6 @@ libperf-y += util/ libperf-y += arch/ libperf-y += ui/ libperf-y += scripts/ +libperf-y += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 2da07e51e1190271dd0d74130baf4807b0b08031..82241423517051b1668564710063cd1797bcf327 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -76,7 +76,7 @@ REPORT OPTIONS -c:: --coalesce:: - Specify sorintg fields for single cacheline display. + Specify sorting fields for single cacheline display. Following fields are available: tid,pid,iaddr,dso (see COALESCE) @@ -106,7 +106,7 @@ REPORT OPTIONS -d:: --display:: - Siwtch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default. + Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default. C2C RECORD ---------- diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 2d96de6132a941dcd50d9808aafead39bc7df6ea..6e6a8b22c8594f42e9727c81a8e4b4adc8301e43 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -30,6 +30,24 @@ OPTIONS --verbose=:: Verbosity level. +-p:: +--pid=:: + Trace on existing process id (comma separated list). + +-a:: +--all-cpus:: + Force system-wide collection. Scripts run without a + normally use -a by default, while scripts run with a + normally don't - this option allows the latter to be run in + system-wide mode. + +-C:: +--cpu=:: + Only trace for the list of CPUs provided. Multiple CPUs can + be provided as a comma separated list with no space like: 0,1. + Ranges of CPUs are specified with -: 0-2. + Default is to trace on all online CPUs. + SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 41857cce5e86f4404929051d4daf0414386452d9..f709de54707b714feff11e3e0e7f28cbb0c9fb8d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob] +'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob] DESCRIPTION ----------- @@ -24,6 +24,10 @@ Don't print descriptions. --long-desc:: Print longer event descriptions. +--details:: +Print how named events are resolved internally into perf events, and also +any extra expressions computed by perf stat. + [[EVENT_MODIFIERS]] EVENT MODIFIERS @@ -240,6 +244,8 @@ To limit the list use: . 'pmu' to print the kernel supplied PMU events. +. 'sdt' to list all Statically Defined Tracepoint events. + . If none of the above is matched, it will apply the supplied glob to all events, printing the ones that match. diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index e6c9902c6d82b0e2535059c1a915356e84856589..165c2b1d43177b7df8870d85d7911a1a615ce7e7 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -240,9 +240,13 @@ Add a probe on schedule() function 12th line with recording cpu local variable: or ./perf probe --add='schedule:12 cpu' - this will add one or more probes which has the name start with "schedule". +Add one or more probes which has the name start with "schedule". - Add probes on lines in schedule() function which calls update_rq_clock(). + ./perf probe schedule* + or + ./perf probe --add='schedule*' + +Add probes on lines in schedule() function which calls update_rq_clock(). ./perf probe 'schedule;update_rq_clock*' or diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b16003ec14a743bcdcc8473798388b0849aa3523..b0e9e921d534c46d79da49fe0641013a64caebb6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -225,7 +225,7 @@ OPTIONS the libunwind or libdw library) should be used instead. Using the "lbr" method doesn't require any compiler options. It will produce call graphs from the hardware LBR registers. The - main limition is that it is only available on new Intel + main limitation is that it is only available on new Intel platforms, such as Haswell. It can only get user call chain. It doesn't work with branch stack sampling at the same time. @@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can displayed with the weight and local_weight sort keys. This currently works for TSX abort events and some memory events in precise mode on modern Intel CPUs. +--namespaces:: +Record events of type PERF_RECORD_NAMESPACES. + --transaction:: Record transaction flags for transaction related events. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c04cc0647c16e6d8bbb458d655ab8ddbec3262c9..9fa84617181e47e00a93529b6a91fca3d3fbe9c2 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -72,7 +72,8 @@ OPTIONS --sort=:: Sort histogram entries by given key(s) - multiple keys can be specified in CSV format. Following sort keys are available: - pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight. + pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, + local_weight, cgroup_id. Each key has following meaning: @@ -80,6 +81,7 @@ OPTIONS - pid: command and tid of the task - dso: name of library or module executed at the time of sample - symbol: name of function executed at the time of sample + - symbol_size: size of function executed at the time of sample - parent: name of function matched to the parent regex filter. Unmatched entries are displayed as "[other]". - cpu: cpu number the task ran at the time of sample @@ -91,6 +93,7 @@ OPTIONS - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. - local_weight: Local weight version of the weight above. + - cgroup_id: ID derived from cgroup namespace device and inode numbers. - transaction: Transaction abort flags. - overhead: Overhead percentage of sample - overhead_sys: Overhead percentage of sample running in system mode @@ -172,11 +175,14 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. + If the keys starts with a prefix '+', then it will append the specified + field(s) to the default field order. For example: perf report -F +period,sample. + -p:: --parent=:: A regex filter to identify parent. The parent is a caller of this function and searched through the callchain, thus it requires callchain - information recorded. The pattern is in the exteneded regex format and + information recorded. The pattern is in the extended regex format and defaults to "\^sys_|^do_page_fault", see '--sort parent'. -x:: @@ -201,8 +207,8 @@ OPTIONS -g:: --call-graph=:: Display call chains using type, min percent threshold, print limit, - call order, sort key, optional branch and value. Note that ordering of - parameters is not fixed so any parement can be given in an arbitraty order. + call order, sort key, optional branch and value. Note that ordering + is not fixed so any parameter can be given in an arbitrary order. One exception is the print_limit which should be preceded by threshold. print_type can be either: @@ -229,6 +235,7 @@ OPTIONS sort_key can be: - function: compare on functions (default) - address: compare on individual code addresses + - srcline: compare on source filename and line number branch can be: - branch: include last branch information in callgraph when available. @@ -424,6 +431,10 @@ include::itrace.txt[] --hierarchy:: Enable hierarchical output. +--inline:: + If a callgraph address belongs to an inlined function, the inline stack + will be printed. Each entry is function name or file/line. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index d33deddb0146cc9500f29fe6f0b1e65d6f237b10..a092a2499e8f8f2003681369c140a19a84b9c401 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist' --migrations:: Show migration events. +-n:: +--next:: + Show next task. + -I:: --idle-hist:: Show idle-related events only. diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index dfbb506d2c349744399637c78148fe6446b124bc..142606c0ec9c1dcbb9329f23cb9b9bc65085c419 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -39,7 +39,7 @@ EVENT HANDLERS When perf script is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is -ignored (or passed to a 'trace_handled' function, see below) and the +ignored (or passed to a 'trace_unhandled' function, see below) and the next event is processed. Most of the event's field values are passed as arguments to the diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 54acba22155865a8fafc7629b9e3987c061fed47..51ec2d20068ad5df7b0a00a0983b5ec08d49979a 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -149,10 +149,8 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu, print "id=%d, args=%s\n" % \ (id, args), -def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs, - common_pid, common_comm): - print_header(event_name, common_cpu, common_secs, common_nsecs, - common_pid, common_comm) +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) def print_header(event_name, cpu, secs, nsecs, pid, comm): print "%-20s %5u %05u.%09u %8u %-20s " % \ @@ -321,7 +319,7 @@ So those are the essential steps in writing and running a script. The process can be generalized to any tracepoint or set of tracepoints you're interested in - basically find the tracepoint(s) you're interested in by looking at the list of available events shown by -'perf list' and/or look in /sys/kernel/debug/tracing events for +'perf list' and/or look in /sys/kernel/debug/tracing/events/ for detailed event and field info, record the corresponding trace data using 'perf record', passing it the list of interesting events, generate a skeleton script using 'perf script -g python' and modify the @@ -334,7 +332,7 @@ right place, you can have your script listed alongside the other scripts listed by the 'perf script -l' command e.g.: ---- -root@tropicana:~# perf script -l +# perf script -l List of available trace scripts: wakeup-latency system-wide min/max/avg wakeup latency rw-by-file r/w activity for a program, by file @@ -383,8 +381,6 @@ source tree: ---- # ls -al kernel-source/tools/perf/scripts/python - -root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python total 32 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. @@ -399,7 +395,7 @@ otherwise your script won't show up at run-time), 'perf script -l' should show a new entry for your script: ---- -root@tropicana:~# perf script -l +# perf script -l List of available trace scripts: wakeup-latency system-wide min/max/avg wakeup latency rw-by-file r/w activity for a program, by file @@ -437,7 +433,7 @@ EVENT HANDLERS When perf script is invoked using a trace script, a user-defined 'handler function' is called for each event in the trace. If there's no handler function defined for a given event type, the event is -ignored (or passed to a 'trace_handled' function, see below) and the +ignored (or passed to a 'trace_unhandled' function, see below) and the next event is processed. Most of the event's field values are passed as arguments to the @@ -532,7 +528,7 @@ can implement a set of optional functions: gives scripts a chance to do setup tasks: ---- -def trace_begin: +def trace_begin(): pass ---- @@ -541,7 +537,7 @@ def trace_begin: as display results: ---- -def trace_end: +def trace_end(): pass ---- @@ -550,8 +546,7 @@ def trace_end: of common arguments are passed into it: ---- -def trace_unhandled(event_name, context, common_cpu, common_secs, - common_nsecs, common_pid, common_comm): +def trace_unhandled(event_name, context, event_fields_dict): pass ---- diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4ed5f239ba7dee9fe2fc44854e3b8daa42091a97..3517e204a2b30754e430213c41b1d48e51d9b52c 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,7 +116,7 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, + srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -189,15 +189,20 @@ OPTIONS i.e., -F "" is not allowed. The brstack output includes branch related information with raw addresses using the - /v/v/v/v/ syntax in the following order: + /v/v/v/v/cycles syntax in the following order: FROM: branch source instruction TO : branch target instruction M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported X/- : X=branch inside a transactional region, -=not in transaction region or not supported A/- : A=TSX abort entry, -=not aborted region or not supported + cycles The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible. + When brstackinsn is specified the full assembler sequences of branch sequences for each sample + is printed. This is the full execution path leading to the sample. This is only supported when the + sample was recorded with perf record -b or -j any. + -k:: --vmlinux=:: vmlinux pathname @@ -248,6 +253,9 @@ OPTIONS --show-mmap-events Display mmap related events (e.g. MMAP, MMAP2). +--show-namespace-events + Display namespace events i.e. events of type PERF_RECORD_NAMESPACES. + --show-switch-events Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. @@ -299,6 +307,14 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. +--max-blocks:: + Set the maximum number of program blocks to print with brstackasm for + each sample. + +--inline:: + If a callgraph address belongs to an inlined function, the inline stack + will be printed. Each entry has function name and file/line. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index aecf2a87e7d60bf4a759f47d7c60aac1b12aec07..bd0e4417f2be63f892a870ec5ad60ae0fd5d9994 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -94,8 +94,7 @@ to activate system-wide monitoring. Default is to count on all CPUs. -A:: --no-aggr:: -Do not aggregate counts across all monitored CPUs in system-wide mode (-a). -This option is only valid in system-wide mode. +Do not aggregate counts across all monitored CPUs. -n:: --null:: @@ -237,6 +236,9 @@ To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. +--no-merge:: +Do not merge results from same PMUs. + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index afd728672b6fb488415ca4c58043baaf6b264226..c1e3288a2dfbcc80100a88cd8286fdc2a707c541 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -123,7 +123,8 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. major or all pagefaults. Default value is maj. --syscalls:: - Trace system calls. This options is enabled by default. + Trace system calls. This options is enabled by default, disable with + --no-syscalls. --call-graph [mode,type,min[,limit],order[,key][,branch]]:: Setup and enable call-graph (stack chain/backtrace) recording. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b664b18d3991b16f28168527320e5402b755fba3..de8b39dda7b828edf2d5acae5bd7ca6dae505369 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -11,8 +11,8 @@ All fields are in native-endian of the machine that generated the perf.data. When perf is writing to a pipe it uses a special version of the file format that does not rely on seeking to adjust data offsets. This -format is not described here. The pipe version can be converted to -normal perf.data with perf inject. +format is described in "Pipe-mode data" section. The pipe data version can be +augmented with additional events using perf inject. The file starts with a perf_header: @@ -270,7 +270,7 @@ When the event stream contains multiple events each event is identified by an ID. This can be either through the PERF_SAMPLE_ID or the PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is at a fixed offset from the event header, which allows reliable -parsing of the header. Relying on ID may be ambigious. +parsing of the header. Relying on ID may be ambiguous. IDENTIFIER is only supported by newer Linux kernels. Perf record specific events: @@ -288,7 +288,7 @@ struct attr_event { uint64_t id[]; }; - PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */ + PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */ #define MAX_EVENT_NAME 64 @@ -411,6 +411,21 @@ An array bound by the perf_file_section size. ids points to a array of uint64_t defining the ids for event attr attr. +Pipe-mode data + +Pipe-mode avoid seeks in the file by removing the perf_file_section and flags +from the struct perf_header. The trimmed header is: + +struct perf_pipe_file_header { + u64 magic; + u64 size; +}; + +The information about attrs, data, and event_types is instead in the +synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and +PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode. + + References: include/uapi/linux/perf_event.h diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 170b0289a7bcd3682b3c67249bb0eac2384b70d1..db0ca3063eaebc83d483465a2e0e9bfdaf5e4387 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -23,7 +23,7 @@ For memory address profiling, try: perf mem record / perf mem report For tracepoint events, try: perf report -s trace_fields To record callchains for each sample: perf record -g To record every process run by a user: perf record -u -Skip collecing build-id when recording: perf record -B +Skip collecting build-id when recording: perf record -B To change sampling frequency to 100 Hz: perf record -F 100 See assembly instructions with percentage: perf annotate If you prefer Intel style assembly, try: perf annotate -M intel diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 8672f835ae4eff2659ff314c40bf77d3730b7986..a29da46d180f8780507938f5183af24e189a1c21 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -12,6 +12,7 @@ tools/arch/sparc/include/asm/barrier_32.h tools/arch/sparc/include/asm/barrier_64.h tools/arch/tile/include/asm/barrier.h tools/arch/x86/include/asm/barrier.h +tools/arch/x86/include/asm/cmpxchg.h tools/arch/x86/include/asm/cpufeatures.h tools/arch/x86/include/asm/disabled-features.h tools/arch/x86/include/asm/required-features.h @@ -63,6 +64,7 @@ tools/include/linux/bitops.h tools/include/linux/compiler.h tools/include/linux/compiler-gcc.h tools/include/linux/coresight-pmu.h +tools/include/linux/bug.h tools/include/linux/filter.h tools/include/linux/hash.h tools/include/linux/kernel.h @@ -72,12 +74,15 @@ tools/include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman.h tools/include/uapi/linux/bpf.h tools/include/uapi/linux/bpf_common.h +tools/include/uapi/linux/fcntl.h tools/include/uapi/linux/hw_breakpoint.h tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h +tools/include/uapi/linux/stat.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h +tools/include/linux/refcount.h tools/include/linux/string.h tools/include/linux/stringify.h tools/include/linux/types.h diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 27c9fbca7bd9c79eb703ad2d37d4280f9d286cc4..1f4fbc9a3292e06b6ae48c47297e795ccbc45809 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(srctree)/tools/scripts/Makefile.arch -$(call detected_var,ARCH) +$(call detected_var,SRCARCH) NO_PERF_REGS := 1 # Additional ARCH settings for ppc -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) NO_PERF_REGS := 0 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -ifeq ($(ARCH),x86) +ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated @@ -43,12 +43,12 @@ ifeq ($(ARCH),x86) NO_PERF_REGS := 0 endif -ifeq ($(ARCH),arm) +ifeq ($(SRCARCH),arm) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-arm endif -ifeq ($(ARCH),arm64) +ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -61,7 +61,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(ARCH),$(filter $(ARCH),x86 arm)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -115,9 +115,9 @@ endif FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf -FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config --include $(src-perf)/arch/$(ARCH)/Makefile +-include $(src-perf)/arch/$(SRCARCH)/Makefile ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET @@ -170,13 +170,20 @@ PYTHON2_CONFIG := \ override PYTHON_CONFIG := \ $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) -PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +grep-libs = $(filter -l%,$(1)) +strip-libs = $(filter-out -l%,$(1)) -PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) -PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) -ifeq ($(CC), clang) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) +ifdef PYTHON_CONFIG + PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) + PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) + PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil + PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) + ifeq ($(CC), clang) + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) + endif + FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) @@ -221,12 +228,12 @@ ifeq ($(DEBUG),0) endif INC_FLAGS += -I$(src-perf)/util/include -INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include +INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/uapi INC_FLAGS += -I$(srctree)/tools/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/ -INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/ +INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/ # $(obj-perf) for generated common-cmds.h # $(obj-perf)/util for generated bison/flex headers @@ -267,6 +274,7 @@ ifdef NO_LIBELF NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 + NO_JVMTI := 1 else ifeq ($(feature-libelf), 0) ifeq ($(feature-glibc), 1) @@ -276,7 +284,7 @@ else LIBC_SUPPORT := 1 endif ifeq ($(LIBC_SUPPORT),1) - msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel); + msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel); NO_LIBELF := 1 NO_DWARF := 1 @@ -284,6 +292,7 @@ else NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 + NO_JVMTI := 1 else ifneq ($(filter s% -static%,$(LDFLAGS),),) msg := $(error No static glibc found, please install glibc-static); @@ -317,6 +326,10 @@ ifdef NO_DWARF NO_LIBDW_DWARF_UNWIND := 1 endif +ifeq ($(feature-sched_getcpu), 1) + CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf @@ -342,7 +355,7 @@ ifndef NO_LIBELF ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); + msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled); NO_DWARF := 1 else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) @@ -367,7 +380,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_BPF_PROLOGUE $(call detected,CONFIG_BPF_PROLOGUE) else - msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); + msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset()); endif else msg := $(warning DWARF support is off, BPF prologue is disabled); @@ -393,7 +406,7 @@ ifdef PERF_HAVE_JITDUMP endif endif -ifeq ($(ARCH),powerpc) +ifeq ($(SRCARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif @@ -474,7 +487,7 @@ else endif ifndef NO_LOCAL_LIBUNWIND - ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) msg := $(warning No debug_frame support found in libunwind); @@ -550,8 +563,6 @@ ifndef NO_GTK2 endif endif -grep-libs = $(filter -l%,$(1)) -strip-libs = $(filter-out -l%,$(1)) ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL @@ -599,21 +610,9 @@ else $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev) else - PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) - - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) - PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) - PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC), clang) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif - FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) - ifneq ($(feature-libpython), 1) $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev) else - ifneq ($(feature-libpython-version), 1) $(warning Python 3 is not yet supported; please set) $(warning PYTHON and/or PYTHON_CONFIG appropriately.) @@ -741,7 +740,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq ($(ARCH), x86) + ifneq ($(SRCARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -770,7 +769,7 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - ifeq ($(ARCH),x86) + ifeq ($(SRCARCH),x86) ifeq ($(feature-get_cpuid), 0) msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); NO_AUXTRACE := 1 @@ -873,7 +872,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(ARCH)$(IS_64_BIT), x861) +ifeq ($(SRCARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 79fe31f20a17644e416642bb9d3a213c97286479..5008f51a08a2118ca34723a87373acd231afb7c3 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -226,7 +226,7 @@ endif ifeq ($(config),0) include $(srctree)/tools/scripts/Makefile.arch --include arch/$(ARCH)/Makefile +-include arch/$(SRCARCH)/Makefile endif # The FEATURE_DUMP_EXPORT holds location of the actual diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index 109eb75cf7de4e97c855fe52c3d6437f454b5c8e..d9b6af837c7d392fce6168d9ce5ebe2b9a6b57d5 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ libperf-y += common.o -libperf-y += $(ARCH)/ +libperf-y += $(SRCARCH)/ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index dfea6b6355254e0eac677dfc05dcce0463089f6e..29361d9b635a67ec323d4e6470b20b8e6ef3e385 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -33,6 +33,7 @@ #include "../../util/cs-etm.h" #include +#include #define ENABLE_SINK_MAX 128 #define CS_BUS_DEVICE_PATH "/bus/coresight/devices/" diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c index 33ec5b339da87fca6c49757e58816f076c738ba6..8bb176a37990501c8cce8dd834840b07b29ed503 100644 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ b/tools/perf/arch/arm/util/dwarf-regs.c @@ -9,6 +9,7 @@ */ #include +#include #include struct pt_regs_dwarfnum { @@ -16,10 +17,9 @@ struct pt_regs_dwarfnum { unsigned int dwarfnum; }; -#define STR(s) #s #define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} #define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%r##num), .dwarfnum = num} + {.name = __stringify(%r##num), .dwarfnum = num} #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} /* diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c index b4176c60117a58b8dc8a4adf7a181576a542f7b0..bacfa00fca398ed59c1caa901cfe77d0e510f8b1 100644 --- a/tools/perf/arch/arm/util/unwind-libdw.c +++ b/tools/perf/arch/arm/util/unwind-libdw.c @@ -1,6 +1,7 @@ #include #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" +#include "../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 44eafd6f2d5008e62aa68b21a2b415d1db736a49..8f1908756cb69bd6b452aec963650b5bebe2ffea 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -50,7 +50,7 @@ static int arm64__annotate_init(struct arch *arch) arch->initialized = true; arch->priv = arm; arch->associate_instruction_ops = arm64__associate_instruction_ops; - arch->objdump.comment_char = ';'; + arch->objdump.comment_char = '/'; arch->objdump.skip_functions_char = '+'; return 0; diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c index 068b6189157b6eb9a11c58f7cbca1f4ca1348efb..cd764a9fd0983de57e02feca3967dd1e5e2504d2 100644 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ b/tools/perf/arch/arm64/util/dwarf-regs.c @@ -8,9 +8,12 @@ * published by the Free Software Foundation. */ +#include #include +#include #include #include /* for struct user_pt_regs */ +#include #include "util.h" struct pt_regs_dwarfnum { @@ -20,7 +23,7 @@ struct pt_regs_dwarfnum { #define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} #define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%x##num), .dwarfnum = num} + {.name = __stringify(%x##num), .dwarfnum = num} #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} #define DWARFNUM2OFFSET(index) \ (index * sizeof((struct user_pt_regs *)0)->regs[0]) diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index c116b713f7f773e296ff9485ff1f2ce6046c6add..b415dfdbcccabadfce60eebfa2e0fd7bcef37fcd 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -1,6 +1,6 @@ +#include #ifndef REMOTE_UNWIND_LIBUNWIND -#include #include #include "perf_regs.h" #include "../../util/unwind.h" diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 886dd2aaff0d81533468ad60db70abbd23465ec3..6b40e9f017404f87877668645f7afa8f28649788 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -4,6 +4,8 @@ #include "../util/util.h" #include "../util/debug.h" +#include "sane_ctype.h" + const char *const arm_triplets[] = { "arm-eabi-", "arm-linux-androideabi-", @@ -24,6 +26,7 @@ const char *const arm64_triplets[] = { const char *const powerpc_triplets[] = { "powerpc-unknown-linux-gnu-", + "powerpc-linux-gnu-", "powerpc64-unknown-linux-gnu-", "powerpc64-linux-gnu-", "powerpc64le-linux-gnu-", diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 41bdf9530d821d6b14d7bd1eb007e2ddda06a5a9..98ac87052a74c5cbdf2c954221d8ce4d3f9e9752 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "util.h" struct pt_regs_dwarfnum { @@ -24,10 +25,10 @@ struct pt_regs_dwarfnum { }; #define REG_DWARFNUM_NAME(r, num) \ - {.name = STR(%)STR(r), .dwarfnum = num, \ + {.name = __stringify(%)__stringify(r), .dwarfnum = num, \ .ptregs_offset = offsetof(struct pt_regs, r)} #define GPR_DWARFNUM_NAME(num) \ - {.name = STR(%gpr##num), .dwarfnum = num, \ + {.name = __stringify(%gpr##num), .dwarfnum = num, \ .ptregs_offset = offsetof(struct pt_regs, gpr[num])} #define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 74eee30398f807d559a33800d7d43094f525e19d..249723f0e6a9e4bc3b64df49df7add18c0b175e6 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -1,3 +1,4 @@ +#include #include "util/kvm-stat.h" #include "util/parse-events.h" #include "util/debug.h" diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index a3c3e1ce6807cefddeaffd1304f353218c8bc835..f860dc411f69a9236a9ef9e37a7f660c9e9062d4 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -1,5 +1,11 @@ +#include +#include +#include + #include "../../perf.h" +#include "../../util/util.h" #include "../../util/perf_regs.h" +#include "../../util/debug.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(r0, PERF_REG_POWERPC_R0), @@ -47,3 +53,109 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), SMPL_REG_END }; + +/* REG or %rREG */ +#define SDT_OP_REGEX1 "^(%r)?([1-2]?[0-9]|3[0-1])$" + +/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */ +#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$" + +static regex_t sdt_op_regex1, sdt_op_regex2; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED); + if (ret) + goto error; + + ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED); + if (ret) + goto free_regex1; + + initialized = 1; + return 0; + +free_regex1: + regfree(&sdt_op_regex1); +error: + pr_debug4("Regex compilation error.\n"); + return ret; +} + +/* + * Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG). + * Possible variants of OP are: + * Format Example + * ------------------------- + * NUM(REG) 48(18) + * -NUM(REG) -48(18) + * NUM(%rREG) 48(%r18) + * -NUM(%rREG) -48(%r18) + * REG 18 + * %rREG %r18 + * iNUM i0 + * i-NUM i-1 + * + * SDT marker arguments on Powerpc uses %rREG form with -mregnames flag + * and REG form with -mno-regnames. Here REG is general purpose register, + * which is in 0 to 31 range. + */ +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + int ret, new_len; + regmatch_t rm[5]; + char prefix; + + /* Constant argument. Uprobe does not support it */ + if (old_op[0] == 'i') { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) { + /* REG or %rREG --> %gprREG */ + + new_len = 5; /* % g p r NULL */ + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%%gpr%.*s", + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so); + } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) { + /* + * -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) --> + * +/-NUM(%gprREG) + */ + prefix = (rm[1].rm_so == -1) ? '+' : '-'; + + new_len = 8; /* +/- ( % g p r ) NULL */ + new_len += (int)(rm[2].rm_eo - rm[2].rm_so); + new_len += (int)(rm[4].rm_eo - rm[4].rm_so); + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix, + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, + (int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so); + } else { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + return SDT_ARG_VALID; +} diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 1030a6e504bb9fe2a8f7eca185a419332613d924..bf9a2594572c7ff3e94adfab36dce813476bb6a9 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -10,6 +10,7 @@ #include "symbol.h" #include "map.h" #include "probe-event.h" +#include "probe-file.h" #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) @@ -51,6 +52,18 @@ int arch__compare_symbol_names(const char *namea, const char *nameb) return strcmp(namea, nameb); } + +int arch__compare_symbol_names_n(const char *namea, const char *nameb, + unsigned int n) +{ + /* Skip over initial dot */ + if (*namea == '.') + namea++; + if (*nameb == '.') + nameb++; + + return strncmp(namea, nameb, n); +} #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 @@ -79,13 +92,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, * However, if the user specifies an offset, we fall back to using the * GEP since all userspace applications (objdump/readelf) show function * disassembly with offsets from the GEP. - * - * In addition, we shouldn't specify an offset for kretprobes. */ - if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) || - !map || !sym) + if (pev->point.offset || !map || !sym) return; + /* For kretprobes, add an offset only if the kernel supports it */ + if (!pev->uprobes && pev->point.retprobe) { +#ifdef HAVE_LIBELF_SUPPORT + if (!kretprobe_offset_is_supported()) +#endif + return; + } + lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym); if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c new file mode 100644 index 0000000000000000000000000000000000000000..745b4b1b8b21eefe3525144741d1d64c02a7a25d --- /dev/null +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -0,0 +1,30 @@ +static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + /* catch all kind of jumps */ + if (strchr(name, 'j') || + !strncmp(name, "bct", 3) || + !strncmp(name, "br", 2)) + ops = &jump_ops; + /* override call/returns */ + if (!strcmp(name, "bras") || + !strcmp(name, "brasl") || + !strcmp(name, "basr")) + ops = &call_ops; + if (!strcmp(name, "br")) + ops = &ret_ops; + + arch__associate_ins_ops(arch, name, ops); + return ops; +} + +static int s390__annotate_init(struct arch *arch) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = s390__associate_ins_ops; + } + + return 0; +} diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index ed57df2e6d687d89ccf95d9aa03c263411de7fe9..d233e2eb959223174b9d6808016778361b338b0b 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -9,6 +9,7 @@ * as published by the Free Software Foundation. */ +#include #include "../../util/kvm-stat.h" #include diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index e93ef0b38db8e16a38f83e2e3f08dfb8d5fff4a0..5aef183e2f85c5f6c45e44d4e9a68c9ea62c0d74 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -338,6 +338,7 @@ 329 common pkey_mprotect sys_pkey_mprotect 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free +332 common statx sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index 7f064eb371589aa887e112cddb54145d6268141b..f9713a71d77e8be3d7766ba91cf69556d401bedc 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -6,7 +6,10 @@ #include "evsel.h" #include "arch-tests.h" +#include #include +#include +#include #include static pid_t spawn(void) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 5c76cc83186a24ae0df5f3986a922dc75e4ff959..e3ae9cff2b6716617088c2baf3d962370df49b81 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -1,3 +1,5 @@ +#include +#include #include #include #include diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index cc1d865e31f1b7975578343761104db81398e8de..6aa3f2a38321e170c9cba5cca97d85e6364ffeda 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -13,6 +13,7 @@ * */ +#include #include #include "../../util/header.h" diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 5132775a044fd7b1acf2a6c814567f56c7e03b3b..af2bce7a2cd60d5fbe5beaf371e6cd189733a9e0 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 90fa2286edcf91d9e780c87fa87dd3f645acbccc..f630de0206a17248308c928618040a5b793db4ae 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index b63d4be655a24a678f69d6deea3b4ee87283b938..bf817beca0a822f498efa88ff64089319c2a27a3 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,3 +1,4 @@ +#include #include "../../util/kvm-stat.h" #include #include diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c5db14f36cc7a145257385b61cf9c39a02a592ce..f95edebfb7162daaef5a04d5363084d980565cb6 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -1,5 +1,11 @@ +#include +#include +#include + #include "../../perf.h" +#include "../../util/util.h" #include "../../util/perf_regs.h" +#include "../../util/debug.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), @@ -26,3 +32,224 @@ const struct sample_reg sample_reg_masks[] = { #endif SMPL_REG_END }; + +struct sdt_name_reg { + const char *sdt_name; + const char *uprobe_name; +}; +#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m} +#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL} + +static const struct sdt_name_reg sdt_reg_tbl[] = { + SDT_NAME_REG(eax, ax), + SDT_NAME_REG(rax, ax), + SDT_NAME_REG(al, ax), + SDT_NAME_REG(ah, ax), + SDT_NAME_REG(ebx, bx), + SDT_NAME_REG(rbx, bx), + SDT_NAME_REG(bl, bx), + SDT_NAME_REG(bh, bx), + SDT_NAME_REG(ecx, cx), + SDT_NAME_REG(rcx, cx), + SDT_NAME_REG(cl, cx), + SDT_NAME_REG(ch, cx), + SDT_NAME_REG(edx, dx), + SDT_NAME_REG(rdx, dx), + SDT_NAME_REG(dl, dx), + SDT_NAME_REG(dh, dx), + SDT_NAME_REG(esi, si), + SDT_NAME_REG(rsi, si), + SDT_NAME_REG(sil, si), + SDT_NAME_REG(edi, di), + SDT_NAME_REG(rdi, di), + SDT_NAME_REG(dil, di), + SDT_NAME_REG(ebp, bp), + SDT_NAME_REG(rbp, bp), + SDT_NAME_REG(bpl, bp), + SDT_NAME_REG(rsp, sp), + SDT_NAME_REG(esp, sp), + SDT_NAME_REG(spl, sp), + + /* rNN registers */ + SDT_NAME_REG(r8b, r8), + SDT_NAME_REG(r8w, r8), + SDT_NAME_REG(r8d, r8), + SDT_NAME_REG(r9b, r9), + SDT_NAME_REG(r9w, r9), + SDT_NAME_REG(r9d, r9), + SDT_NAME_REG(r10b, r10), + SDT_NAME_REG(r10w, r10), + SDT_NAME_REG(r10d, r10), + SDT_NAME_REG(r11b, r11), + SDT_NAME_REG(r11w, r11), + SDT_NAME_REG(r11d, r11), + SDT_NAME_REG(r12b, r12), + SDT_NAME_REG(r12w, r12), + SDT_NAME_REG(r12d, r12), + SDT_NAME_REG(r13b, r13), + SDT_NAME_REG(r13w, r13), + SDT_NAME_REG(r13d, r13), + SDT_NAME_REG(r14b, r14), + SDT_NAME_REG(r14w, r14), + SDT_NAME_REG(r14d, r14), + SDT_NAME_REG(r15b, r15), + SDT_NAME_REG(r15w, r15), + SDT_NAME_REG(r15d, r15), + SDT_NAME_REG_END, +}; + +/* + * Perf only supports OP which is in +/-NUM(REG) form. + * Here plus-minus sign, NUM and parenthesis are optional, + * only REG is mandatory. + * + * SDT events also supports indirect addressing mode with a + * symbol as offset, scaled mode and constants in OP. But + * perf does not support them yet. Below are few examples. + * + * OP with scaled mode: + * (%rax,%rsi,8) + * 10(%ras,%rsi,8) + * + * OP with indirect addressing mode: + * check_action(%rip) + * mp_+52(%rip) + * 44+mp_(%rip) + * + * OP with constant values: + * $0 + * $123 + * $-1 + */ +#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$" + +static regex_t sdt_op_regex; + +static int sdt_init_op_regex(void) +{ + static int initialized; + int ret = 0; + + if (initialized) + return 0; + + ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED); + if (ret < 0) { + pr_debug4("Regex compilation error.\n"); + return ret; + } + + initialized = 1; + return 0; +} + +/* + * Max x86 register name length is 5(ex: %r15d). So, 6th char + * should always contain NULL. This helps to find register name + * length using strlen, insted of maintaing one more variable. + */ +#define SDT_REG_NAME_SIZE 6 + +/* + * The uprobe parser does not support all gas register names; + * so, we have to replace them (ex. for x86_64: %rax -> %ax). + * Note: If register does not require renaming, just copy + * paste as it is, but don't leave it empty. + */ +static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg) +{ + int i = 0; + + for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) { + if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) { + strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name); + return; + } + } + + strncpy(uprobe_reg, sdt_reg, sdt_len); +} + +int arch_sdt_arg_parse_op(char *old_op, char **new_op) +{ + char new_reg[SDT_REG_NAME_SIZE] = {0}; + int new_len = 0, ret; + /* + * rm[0]: +/-NUM(REG) + * rm[1]: +/- + * rm[2]: NUM + * rm[3]: ( + * rm[4]: REG + * rm[5]: ) + */ + regmatch_t rm[6]; + /* + * Max prefix length is 2 as it may contains sign(+/-) + * and displacement 0 (Both sign and displacement 0 are + * optional so it may be empty). Use one more character + * to hold last NULL so that strlen can be used to find + * prefix length, instead of maintaing one more variable. + */ + char prefix[3] = {0}; + + ret = sdt_init_op_regex(); + if (ret < 0) + return ret; + + /* + * If unsupported OR does not match with regex OR + * register name too long, skip it. + */ + if (strchr(old_op, ',') || strchr(old_op, '$') || + regexec(&sdt_op_regex, old_op, 6, rm, 0) || + rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) { + pr_debug4("Skipping unsupported SDT argument: %s\n", old_op); + return SDT_ARG_SKIP; + } + + /* + * Prepare prefix. + * If SDT OP has parenthesis but does not provide + * displacement, add 0 for displacement. + * SDT Uprobe Prefix + * ----------------------------- + * +24(%rdi) +24(%di) + + * 24(%rdi) +24(%di) + + * %rdi %di + * (%rdi) +0(%di) +0 + * -80(%rbx) -80(%bx) - + */ + if (rm[3].rm_so != rm[3].rm_eo) { + if (rm[1].rm_so != rm[1].rm_eo) + prefix[0] = *(old_op + rm[1].rm_so); + else if (rm[2].rm_so != rm[2].rm_eo) + prefix[0] = '+'; + else + strncpy(prefix, "+0", 2); + } + + /* Rename register */ + sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so, + new_reg); + + /* Prepare final OP which should be valid for uprobe_events */ + new_len = strlen(prefix) + + (rm[2].rm_eo - rm[2].rm_so) + + (rm[3].rm_eo - rm[3].rm_so) + + strlen(new_reg) + + (rm[5].rm_eo - rm[5].rm_so) + + 1; /* NULL */ + + *new_op = zalloc(new_len); + if (!*new_op) + return -ENOMEM; + + scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s", + strlen(prefix), prefix, + (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so, + (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so, + strlen(new_reg), new_reg, + (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so); + + return SDT_ARG_VALID; +} diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index c4b72176ca8361b024fbab0905422bca1a9657b0..38dc9bb2a7c90a7ee8cf8c5aca9078fecc546743 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -1,6 +1,7 @@ #include #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" +#include "../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 579a592990dd996bfd16fcf7214a30567a6b24f2..842ab2781cdc51196a9496ea2759f64a2f350103 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -25,17 +25,17 @@ # endif #endif -int bench_numa(int argc, const char **argv, const char *prefix); -int bench_sched_messaging(int argc, const char **argv, const char *prefix); -int bench_sched_pipe(int argc, const char **argv, const char *prefix); -int bench_mem_memcpy(int argc, const char **argv, const char *prefix); -int bench_mem_memset(int argc, const char **argv, const char *prefix); -int bench_futex_hash(int argc, const char **argv, const char *prefix); -int bench_futex_wake(int argc, const char **argv, const char *prefix); -int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); -int bench_futex_requeue(int argc, const char **argv, const char *prefix); +int bench_numa(int argc, const char **argv); +int bench_sched_messaging(int argc, const char **argv); +int bench_sched_pipe(int argc, const char **argv); +int bench_mem_memcpy(int argc, const char **argv); +int bench_mem_memset(int argc, const char **argv); +int bench_futex_hash(int argc, const char **argv); +int bench_futex_wake(int argc, const char **argv); +int bench_futex_wake_parallel(int argc, const char **argv); +int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ -int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); +int bench_futex_lock_pi(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index da04b8c5568a39cf509b9aa5df844641866bcfd1..fe16b310097f509042a87ec8d2ee26a0f445c199 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include #include #include @@ -113,8 +114,7 @@ static void print_summary(void) (int) runtime.tv_sec); } -int bench_futex_hash(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_hash(int argc, const char **argv) { int ret = 0; cpu_set_t cpu; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 91877777ec6e3a4052e49eca9cba4793fa551f73..73a1c44ea63c9c916f3b1b2e3fbfb82d8a93fb77 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -3,6 +3,7 @@ */ /* For the CLR_() macros */ +#include #include #include @@ -139,8 +140,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr) } } -int bench_futex_lock_pi(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_lock_pi(int argc, const char **argv) { int ret = 0; unsigned int i; diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 2b9705a8734cd6005fd3149943e2ff20eecf4bbe..41786cbea24c643373af4884ab2901ae3dc99669 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include #include #include @@ -108,8 +109,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_requeue(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_requeue(int argc, const char **argv) { int ret = 0; unsigned int i, j; diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 2c8fa67ad53767e43c7e91dc84279528268fcb3a..4ab12c8e016a7d8f8e90d47a25ff3a9c145b8171 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -8,6 +8,7 @@ */ /* For the CLR_() macros */ +#include #include #include @@ -196,8 +197,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_wake_parallel(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_wake_parallel(int argc, const char **argv) { int ret = 0; unsigned int i, j; diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index e246b1b8388a3fd3bfc7c9c398a90efc26d86c79..2fa49222ef8deeb0dbc0616d7bbc181bfea28cbf 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -9,6 +9,7 @@ */ /* For the CLR_() macros */ +#include #include #include @@ -114,8 +115,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_wake(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_futex_wake(int argc, const char **argv) { int ret = 0; unsigned int i, j; diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index b2e06d1190d0766694c97f7f0b808717442af604..e44fd32395305cc5e58042ec7a85fee2e9ef1748 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -88,13 +88,11 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak #ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP #include -static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, - size_t cpusetsize, - cpu_set_t *cpuset) +#include +static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused, + size_t cpusetsize __maybe_unused, + cpu_set_t *cpuset __maybe_unused) { - attr = attr; - cpusetsize = cpusetsize; - cpuset = cpuset; return 0; } #endif diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 52504a83b5a15a0649f45f936d72d97a0fe4159f..fbd732b54047976ec3abae8f54c630c6a6f78e2e 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -12,6 +12,7 @@ #include #include "../util/header.h" #include "../util/cloexec.h" +#include "../util/string2.h" #include "bench.h" #include "mem-memcpy-arch.h" #include "mem-memset-arch.h" @@ -284,7 +285,7 @@ static const char * const bench_mem_memcpy_usage[] = { NULL }; -int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_mem_memcpy(int argc, const char **argv) { struct bench_mem_info info = { .functions = memcpy_functions, @@ -358,7 +359,7 @@ static const struct function memset_functions[] = { { .name = NULL, } }; -int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_mem_memset(int argc, const char **argv) { struct bench_mem_info info = { .functions = memset_functions, diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 3083fc36282b564d3fc5da05864907c3ac222cf4..27de0c8c5c19a19e42f20c15f4d198cf31405857 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -4,6 +4,7 @@ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance */ +#include /* For the CLR_() macros */ #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -187,7 +189,8 @@ static const struct option options[] = { OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), - OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), + OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " + "convergence is reached when each process (all its threads) is running on a single NUMA node."), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), @@ -1766,7 +1769,7 @@ static int bench_all(void) return 0; } -int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_numa(int argc, const char **argv) { init_params(&p0, "main,", argc, argv); argc = parse_options(argc, argv, options, bench_numa_usage, 0); diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 6a111e775210f700de6e3b796b703675748a0b05..4f961e74535b9c0d9141ecd0df51c236f30d358d 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -260,8 +260,7 @@ static const char * const bench_sched_message_usage[] = { NULL }; -int bench_sched_messaging(int argc, const char **argv, - const char *prefix __maybe_unused) +int bench_sched_messaging(int argc, const char **argv) { unsigned int i, total_children; struct timeval start, stop, diff; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 2243f0150d76452da3502c0d5f3ead12b85de512..a152737370c59342866e003807a318bafd475cbb 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -76,7 +76,7 @@ static void *worker_thread(void *__tdata) return NULL; } -int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused) +int bench_sched_pipe(int argc, const char **argv) { struct thread_data threads[2], *td; int pipe_1[2], pipe_2[2]; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4f52d85f5ebc574daa91f29b1a3d24758c3d276d..7a5dc7e5c577270edaa3a36f7a916d7f3cc5a182 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -33,6 +33,7 @@ #include "util/block-range.h" #include +#include #include struct perf_annotate { @@ -383,7 +384,7 @@ static const char * const annotate_usage[] = { NULL }; -int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_annotate(int argc, const char **argv) { struct perf_annotate annotate = { .tool = { @@ -393,6 +394,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) .comm = perf_event__process_comm, .exit = perf_event__process_exit, .fork = perf_event__process_fork, + .namespaces = perf_event__process_namespaces, + .attr = perf_event__process_attr, + .build_id = perf_event__process_build_id, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index a1cddc6bbf0f178f44ee03b2a3729b85a385dbd4..445e62881254437d9f4d3067dc6f31b3b2233169 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -25,7 +25,7 @@ #include #include -typedef int (*bench_fn_t)(int argc, const char **argv, const char *prefix); +typedef int (*bench_fn_t)(int argc, const char **argv); struct bench { const char *name; @@ -155,7 +155,7 @@ static int bench_str2int(const char *str) * to something meaningful: */ static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn, - int argc, const char **argv, const char *prefix) + int argc, const char **argv) { int size; char *name; @@ -171,7 +171,7 @@ static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t f prctl(PR_SET_NAME, name); argv[0] = name; - ret = fn(argc, argv, prefix); + ret = fn(argc, argv); free(name); @@ -198,7 +198,7 @@ static void run_collection(struct collection *coll) fflush(stdout); argv[1] = bench->name; - run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL); + run_bench(coll->name, bench->name, bench->fn, 1, argv); printf("\n"); } } @@ -211,7 +211,7 @@ static void run_all_collections(void) run_collection(coll); } -int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_bench(int argc, const char **argv) { struct collection *coll; int ret = 0; @@ -270,7 +270,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); fflush(stdout); - ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1, prefix); + ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 30e2b2cb2421219de6cc168483f5e9144af8036c..9eba7f1add1f9706f2b8153fb33f3574b362d5ee 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "builtin.h" #include "perf.h" @@ -21,6 +22,7 @@ #include "util/build-id.h" #include "util/session.h" #include "util/symbol.h" +#include "util/time-utils.h" static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { @@ -47,19 +49,22 @@ static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) char to[PATH_MAX]; const char *name; u64 addr1 = 0, addr2 = 0; - int i; + int i, err = -1; scnprintf(from, sizeof(from), "%s/kallsyms", from_dir); scnprintf(to, sizeof(to), "%s/kallsyms", to_dir); for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { - addr1 = kallsyms__get_function_start(from, name); - if (addr1) + err = kallsyms__get_function_start(from, name, &addr1); + if (!err) break; } - if (name) - addr2 = kallsyms__get_function_start(to, name); + if (err) + return false; + + if (kallsyms__get_function_start(to, name, &addr2)) + return false; return addr1 == addr2; } @@ -276,8 +281,7 @@ static int build_id_cache__update_file(const char *filename) return err; } -int cmd_buildid_cache(int argc, const char **argv, - const char *prefix __maybe_unused) +int cmd_buildid_cache(int argc, const char **argv) { struct strlist *list; struct str_node *pos; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 5e914ee79eb3f0eb3a5e38db5f02e7b792b2ce2c..fdaca16e0c7488fc1c5fc1548f736b974a1ad95b 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -16,6 +16,7 @@ #include "util/session.h" #include "util/symbol.h" #include "util/data.h" +#include static int sysfs__fprintf_build_id(FILE *fp) { @@ -87,8 +88,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) return 0; } -int cmd_buildid_list(int argc, const char **argv, - const char *prefix __maybe_unused) +int cmd_buildid_list(int argc, const char **argv) { bool show_kernel = false; bool with_hits = false; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e2b21723bbf8acddac596ebf519ba5b7f82c84b3..620a467ee3043c5cadfedd02c87c0195bb681413 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -9,10 +9,13 @@ * Dick Fowles * Joe Mario */ +#include +#include #include #include #include #include +#include #include "util.h" #include "debug.h" #include "builtin.h" @@ -24,11 +27,13 @@ #include "tool.h" #include "data.h" #include "sort.h" +#include "event.h" #include "evlist.h" #include "evsel.h" #include #include "ui/browsers/hists.h" #include "evlist.h" +#include "thread.h" struct c2c_hists { struct hists hists; @@ -2334,7 +2339,7 @@ static int perf_c2c__hists_browse(struct hists *hists) static void perf_c2c_display(struct perf_session *session) { - if (c2c.use_stdio) + if (use_browser == 0) perf_c2c__hists_fprintf(stdout, session); else perf_c2c__hists_browse(&c2c.hists.hists); @@ -2536,7 +2541,7 @@ static int perf_c2c__report(int argc, const char **argv) OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), #endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, - "Use the stdio interface"), + "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, "Display full length of symbols"), OPT_BOOLEAN(0, "no-source", &no_source, @@ -2755,12 +2760,12 @@ static int perf_c2c__record(int argc, const char **argv) pr_debug("\n"); } - ret = cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv); free(rec_argv); return ret; } -int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_c2c(int argc, const char **argv) { argc = parse_options(argc, argv, c2c_options, c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 8c0d93b7c2f0316dc95d18d1b36bbb38b3fac191..80668fa7556ef5731b97c795c5a3a760134a9d8f 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -154,11 +154,12 @@ static int parse_config_arg(char *arg, char **var, char **value) return 0; } -int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_config(int argc, const char **argv) { int i, ret = 0; struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); + const char *config_filename; argc = parse_options(argc, argv, config_options, config_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -175,6 +176,11 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) else if (use_user_config) config_exclusive_filename = user_config; + if (!config_exclusive_filename) + config_filename = user_config; + else + config_filename = config_exclusive_filename; + /* * At only 'config' sub-command, individually use the config set * because of reinitializing with options config file location. @@ -192,13 +198,9 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) parse_options_usage(config_usage, config_options, "l", 1); } else { ret = show_config(set); - if (ret < 0) { - const char * config_filename = config_exclusive_filename; - if (!config_exclusive_filename) - config_filename = user_config; + if (ret < 0) pr_err("Nothing configured, " "please check your %s \n", config_filename); - } } break; default: @@ -221,13 +223,8 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) if (value == NULL) ret = show_spec_config(set, var); - else { - const char *config_filename = config_exclusive_filename; - - if (!config_exclusive_filename) - config_filename = user_config; + else ret = set_config(set, config_filename, var, value); - } free(arg); } } else diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 7ad6e17ac6b362cadb02e453d36ce5bb288ff2e1..0adb5f82335ad22534a6f7d91e4fc3b34086bb47 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -6,7 +6,7 @@ #include "data-convert.h" #include "data-convert-bt.h" -typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); +typedef int (*data_cmd_fn_t)(int argc, const char **argv); struct data_cmd { const char *name; @@ -50,8 +50,7 @@ static const char * const data_convert_usage[] = { NULL }; -static int cmd_data_convert(int argc, const char **argv, - const char *prefix __maybe_unused) +static int cmd_data_convert(int argc, const char **argv) { const char *to_ctf = NULL; struct perf_data_convert_opts opts = { @@ -98,7 +97,7 @@ static struct data_cmd data_cmds[] = { { .name = NULL, }, }; -int cmd_data(int argc, const char **argv, const char *prefix) +int cmd_data(int argc, const char **argv) { struct data_cmd *cmd; const char *cmdstr; @@ -118,7 +117,7 @@ int cmd_data(int argc, const char **argv, const char *prefix) if (strcmp(cmd->name, cmdstr)) continue; - return cmd->fn(argc, argv, prefix); + return cmd->fn(argc, argv); } pr_err("Unknown command: %s\n", cmdstr); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1b96a3122228f913af671d315ad0a71cedb6c11d..eec5df80f5a30460ad19962f62ad318e50f76e2f 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -19,6 +19,8 @@ #include "util/data.h" #include "util/config.h" +#include +#include #include #include @@ -364,6 +366,7 @@ static struct perf_tool tool = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }; @@ -1320,7 +1323,7 @@ static int diff__config(const char *var, const char *value, return 0; } -int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_diff(int argc, const char **argv) { int ret = hists__init(); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index e09c4287fe87435d200e20f9f216de9aa71129c7..6d210e40d611b00b5a5292ccc76d5f2881a51ee5 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -46,7 +46,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details return 0; } -int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_evlist(int argc, const char **argv) { struct perf_attr_details details = { .verbose = false, }; const struct option options[] = { diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index c3e643666c722cf743252a7f89b0b2b50d46d604..9e0b35cd0eeae3e7a072cbd0f11a99ee1685c97c 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -9,13 +9,18 @@ #include "builtin.h" #include "perf.h" +#include #include #include +#include +#include #include "debug.h" #include +#include #include "evlist.h" #include "target.h" +#include "cpumap.h" #include "thread_map.h" #include "util/config.h" @@ -50,11 +55,12 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused, done = true; } -static int write_tracing_file(const char *name, const char *val) +static int __write_tracing_file(const char *name, const char *val, bool append) { char *file; int fd, ret = -1; ssize_t size = strlen(val); + int flags = O_WRONLY; file = get_tracing_file(name); if (!file) { @@ -62,7 +68,12 @@ static int write_tracing_file(const char *name, const char *val) return -1; } - fd = open(file, O_WRONLY); + if (append) + flags |= O_APPEND; + else + flags |= O_TRUNC; + + fd = open(file, flags); if (fd < 0) { pr_debug("cannot open tracing file: %s\n", name); goto out; @@ -79,6 +90,18 @@ static int write_tracing_file(const char *name, const char *val) return ret; } +static int write_tracing_file(const char *name, const char *val) +{ + return __write_tracing_file(name, val, false); +} + +static int append_tracing_file(const char *name, const char *val) +{ + return __write_tracing_file(name, val, true); +} + +static int reset_tracing_cpu(void); + static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) { if (write_tracing_file("tracing_on", "0") < 0) @@ -90,14 +113,78 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused) if (write_tracing_file("set_ftrace_pid", " ") < 0) return -1; + if (reset_tracing_cpu() < 0) + return -1; + + return 0; +} + +static int set_tracing_pid(struct perf_ftrace *ftrace) +{ + int i; + char buf[16]; + + if (target__has_cpu(&ftrace->target)) + return 0; + + for (i = 0; i < thread_map__nr(ftrace->evlist->threads); i++) { + scnprintf(buf, sizeof(buf), "%d", + ftrace->evlist->threads->map[i]); + if (append_tracing_file("set_ftrace_pid", buf) < 0) + return -1; + } return 0; } +static int set_tracing_cpumask(struct cpu_map *cpumap) +{ + char *cpumask; + size_t mask_size; + int ret; + int last_cpu; + + last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); + mask_size = (last_cpu + 3) / 4 + 1; + mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ + + cpumask = malloc(mask_size); + if (cpumask == NULL) { + pr_debug("failed to allocate cpu mask\n"); + return -1; + } + + cpu_map__snprint_mask(cpumap, cpumask, mask_size); + + ret = write_tracing_file("tracing_cpumask", cpumask); + + free(cpumask); + return ret; +} + +static int set_tracing_cpu(struct perf_ftrace *ftrace) +{ + struct cpu_map *cpumap = ftrace->evlist->cpus; + + if (!target__has_cpu(&ftrace->target)) + return 0; + + return set_tracing_cpumask(cpumap); +} + +static int reset_tracing_cpu(void) +{ + struct cpu_map *cpumap = cpu_map__new(NULL); + int ret; + + ret = set_tracing_cpumask(cpumap); + cpu_map__put(cpumap); + return ret; +} + static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) { char *trace_file; int trace_fd; - char *trace_pid; char buf[4096]; struct pollfd pollfd = { .events = POLLIN, @@ -108,42 +195,43 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) return -1; } - if (argc < 1) - return -1; - signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); - reset_tracing_files(ftrace); + if (reset_tracing_files(ftrace) < 0) + goto out; /* reset ftrace buffer */ if (write_tracing_file("trace", "0") < 0) goto out; - if (perf_evlist__prepare_workload(ftrace->evlist, &ftrace->target, - argv, false, ftrace__workload_exec_failed_signal) < 0) + if (argc && perf_evlist__prepare_workload(ftrace->evlist, + &ftrace->target, argv, false, + ftrace__workload_exec_failed_signal) < 0) { goto out; + } - if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { - pr_err("failed to set current_tracer to %s\n", ftrace->tracer); - goto out; + if (set_tracing_pid(ftrace) < 0) { + pr_err("failed to set ftrace pid\n"); + goto out_reset; } - if (asprintf(&trace_pid, "%d", thread_map__pid(ftrace->evlist->threads, 0)) < 0) { - pr_err("failed to allocate pid string\n"); - goto out; + if (set_tracing_cpu(ftrace) < 0) { + pr_err("failed to set tracing cpumask\n"); + goto out_reset; } - if (write_tracing_file("set_ftrace_pid", trace_pid) < 0) { - pr_err("failed to set pid: %s\n", trace_pid); - goto out_free_pid; + if (write_tracing_file("current_tracer", ftrace->tracer) < 0) { + pr_err("failed to set current_tracer to %s\n", ftrace->tracer); + goto out_reset; } trace_file = get_tracing_file("trace_pipe"); if (!trace_file) { pr_err("failed to open trace_pipe\n"); - goto out_free_pid; + goto out_reset; } trace_fd = open(trace_file, O_RDONLY); @@ -152,7 +240,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (trace_fd < 0) { pr_err("failed to open trace_pipe\n"); - goto out_free_pid; + goto out_reset; } fcntl(trace_fd, F_SETFL, O_NONBLOCK); @@ -163,6 +251,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) goto out_close_fd; } + setup_pager(); + perf_evlist__start_workload(ftrace->evlist); while (!done) { @@ -191,11 +281,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) out_close_fd: close(trace_fd); -out_free_pid: - free(trace_pid); -out: +out_reset: reset_tracing_files(ftrace); - +out: return done ? 0 : -1; } @@ -219,7 +307,7 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) return -1; } -int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_ftrace(int argc, const char **argv) { int ret; struct perf_ftrace ftrace = { @@ -227,15 +315,21 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) .target = { .uid = UINT_MAX, }, }; const char * const ftrace_usage[] = { - "perf ftrace [] ", + "perf ftrace [] []", "perf ftrace [] -- []", NULL }; const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", "tracer to use: function_graph(default) or function"), + OPT_STRING('p', "pid", &ftrace.target.pid, "pid", + "trace on existing process id"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", + "list of cpus to monitor"), OPT_END() }; @@ -245,9 +339,18 @@ int cmd_ftrace(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, ftrace_options, ftrace_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc) + if (!argc && target__none(&ftrace.target)) usage_with_options(ftrace_usage, ftrace_options); + ret = target__validate(&ftrace.target); + if (ret) { + char errbuf[512]; + + target__strerror(&ftrace.target, ret, errbuf, 512); + pr_err("%s\n", errbuf); + return -EINVAL; + } + ftrace.evlist = perf_evlist__new(); if (ftrace.evlist == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index aed0d844e8c271426137abf3b622362913de2340..492f8e14ab09826f15f58023bb721206fbb9cc8f 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -12,16 +12,22 @@ #include #include #include "util/debug.h" +#include +#include +#include +#include +#include +#include static struct man_viewer_list { struct man_viewer_list *next; - char name[FLEX_ARRAY]; + char name[0]; } *man_viewer_list; static struct man_viewer_info_list { struct man_viewer_info_list *next; const char *info; - char name[FLEX_ARRAY]; + char name[0]; } *man_viewer_info_list; enum help_format { @@ -301,12 +307,6 @@ void list_common_cmds_help(void) } } -static int is_perf_command(const char *s) -{ - return is_in_cmdlist(&main_cmds, s) || - is_in_cmdlist(&other_cmds, s); -} - static const char *cmd_to_page(const char *perf_cmd) { char *s; @@ -418,7 +418,7 @@ static int show_html_page(const char *perf_cmd) return 0; } -int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_help(int argc, const char **argv) { bool show_all = false; enum help_format help_format = HELP_FORMAT_MAN; @@ -446,7 +446,6 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) "perf help [--all] [--man|--web|--info] [command]", NULL }; - const char *alias; int rc; load_command_list("perf-", &main_cmds, &other_cmds); @@ -472,12 +471,6 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) return 0; } - alias = alias_lookup(argv[0]); - if (alias && !is_perf_command(argv[0])) { - printf("`perf %s' is aliased to `%s'\n", argv[0], alias); - return 0; - } - switch (help_format) { case HELP_FORMAT_MAN: rc = show_man_page(argv[0]); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b9bc7e39833a47055608feffa1f5917aada33174..ea8db38eedd10a987534da2c81a1ca83cbb2109f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -18,10 +18,13 @@ #include "util/data.h" #include "util/auxtrace.h" #include "util/jit.h" +#include "util/thread.h" #include #include +#include +#include struct perf_inject { struct perf_tool tool; @@ -333,6 +336,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool, return err; } +static int perf_event__repipe_namespaces(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err = perf_event__process_namespaces(tool, event, sample, machine); + + perf_event__repipe(tool, event, sample, machine); + + return err; +} + static int perf_event__repipe_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -660,6 +675,7 @@ static int __cmd_inject(struct perf_inject *inject) session->itrace_synth_opts = &inject->itrace_synth_opts; inject->itrace_synth_opts.inject = true; inject->tool.comm = perf_event__repipe_comm; + inject->tool.namespaces = perf_event__repipe_namespaces; inject->tool.exit = perf_event__repipe_exit; inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; @@ -681,6 +697,8 @@ static int __cmd_inject(struct perf_inject *inject) lseek(fd, output_data_offset, SEEK_SET); ret = perf_session__process_events(session); + if (ret) + return ret; if (!file_out->is_pipe) { if (inject->build_ids) @@ -725,7 +743,7 @@ static int __cmd_inject(struct perf_inject *inject) return ret; } -int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_inject(int argc, const char **argv) { struct perf_inject inject = { .tool = { diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 224bfc454b4aa53d049e642dfc9feeadb1b37165..bcfb363112d3c999b7b09d2d99b771c04358fdfa 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -7,6 +7,7 @@ * * Released under the GPL v2. (and only v2, not any later version) */ +#include #include "builtin.h" #include #include @@ -43,7 +44,7 @@ static int __cmd_kallsyms(int argc, const char **argv) return 0; } -int cmd_kallsyms(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_kallsyms(int argc, const char **argv) { const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 6da8d083e4e596d821673ed7408c814723029aca..9409c9464667023c60841e3fb4e7a2364bc23167 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -20,11 +20,16 @@ #include "util/debug.h" +#include #include #include +#include +#include #include #include +#include "sane_ctype.h" + static int kmem_slab; static int kmem_page; @@ -964,6 +969,7 @@ static struct perf_tool perf_kmem = { .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; @@ -1865,7 +1871,7 @@ static int __cmd_record(int argc, const char **argv) for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) @@ -1884,7 +1890,7 @@ static int kmem_config(const char *var, const char *value, void *cb __maybe_unus return 0; } -int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_kmem(int argc, const char **argv) { const char * const default_slab_sort = "frag,hit,bytes"; const char * const default_page_sort = "bytes,hit"; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 08fa88f62a246f4f309410da45379aa21949f14e..f309c3773522dee5c5c8a515b1c278757fca58c6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -3,6 +3,7 @@ #include "util/evsel.h" #include "util/evlist.h" +#include "util/term.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -23,13 +24,33 @@ #ifdef HAVE_TIMERFD_SUPPORT #include #endif +#include +#include #include +#include +#include +#include #include #include +#include #include #include +static const char *get_filename_for_perf_kvm(void) +{ + const char *filename; + + if (perf_host && !perf_guest) + filename = strdup("perf.data.host"); + else if (!perf_host && perf_guest) + filename = strdup("perf.data.guest"); + else + filename = strdup("perf.data.kvm"); + + return filename; +} + #ifdef HAVE_KVM_STAT_SUPPORT #include "util/kvm-stat.h" @@ -1044,6 +1065,7 @@ static int read_events(struct perf_kvm_stat *kvm) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; struct perf_data_file file = { @@ -1208,7 +1230,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED); record_usage = kvm_stat_record_usage; - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } static int @@ -1348,6 +1370,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, kvm->tool.exit = perf_event__process_exit; kvm->tool.fork = perf_event__process_fork; kvm->tool.lost = process_lost_event; + kvm->tool.namespaces = perf_event__process_namespaces; kvm->tool.ordered_events = true; perf_tool__fill_defaults(&kvm->tool); @@ -1475,7 +1498,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv) #endif perf_stat: - return cmd_stat(argc, argv, NULL); + return cmd_stat(argc, argv); } #endif /* HAVE_KVM_STAT_SUPPORT */ @@ -1494,7 +1517,7 @@ static int __cmd_record(const char *file_name, int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } static int __cmd_report(const char *file_name, int argc, const char **argv) @@ -1512,7 +1535,7 @@ static int __cmd_report(const char *file_name, int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_report(i, rec_argv, NULL); + return cmd_report(i, rec_argv); } static int @@ -1531,10 +1554,10 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_buildid_list(i, rec_argv, NULL); + return cmd_buildid_list(i, rec_argv); } -int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_kvm(int argc, const char **argv) { const char *file_name = NULL; const struct option kvm_options[] = { @@ -1589,9 +1612,9 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) else if (!strncmp(argv[0], "rep", 3)) return __cmd_report(file_name, argc, argv); else if (!strncmp(argv[0], "diff", 4)) - return cmd_diff(argc, argv, NULL); + return cmd_diff(argc, argv); else if (!strncmp(argv[0], "top", 3)) - return cmd_top(argc, argv, NULL); + return cmd_top(argc, argv); else if (!strncmp(argv[0], "buildid-list", 12)) return __cmd_buildid_list(file_name, argc, argv); #ifdef HAVE_KVM_STAT_SUPPORT diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 3b9d98b5feef69182a0036b3c2f57d29a3364fc9..4bf2cb4d25aa6bdba591495974f65091078d0b85 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -18,8 +18,9 @@ #include static bool desc_flag = true; +static bool details_flag; -int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_list(int argc, const char **argv) { int i; bool raw_dump = false; @@ -30,6 +31,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) "Print extra event descriptions. --no-desc to not print."), OPT_BOOLEAN('v', "long-desc", &long_desc_flag, "Print longer event descriptions."), + OPT_BOOLEAN(0, "details", &details_flag, + "Print information on the perf event names and expressions used internally by events."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -50,7 +53,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, raw_dump, !desc_flag, long_desc_flag); + print_events(NULL, raw_dump, !desc_flag, long_desc_flag, + details_flag); return 0; } @@ -72,7 +76,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { @@ -80,7 +84,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) if (sep == NULL) { print_events(argv[i], raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, + details_flag); continue; } sep_idx = sep - argv[i]; @@ -103,7 +108,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); print_hwcache_events(s, raw_dump); print_pmu_events(s, raw_dump, !desc_flag, - long_desc_flag); + long_desc_flag, + details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); free(s); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index ce3bfb48b26f0f309b5c90cf49bb4870fbd76a4b..ff98652484a77efaa86a835b7c8f9a796ab36b50 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,3 +1,5 @@ +#include +#include #include "builtin.h" #include "perf.h" @@ -26,6 +28,7 @@ #include #include +#include static struct perf_session *session; @@ -858,6 +861,7 @@ static int __cmd_report(bool display_info) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }; struct perf_data_file file = { @@ -940,34 +944,36 @@ static int __cmd_record(int argc, const char **argv) BUG_ON(i != rec_argc); - ret = cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv); free(rec_argv); return ret; } -int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_lock(int argc, const char **argv) { - const struct option info_options[] = { - OPT_BOOLEAN('t', "threads", &info_threads, - "dump thread list in perf.data"), - OPT_BOOLEAN('m', "map", &info_map, - "map of lock instances (address:name table)"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_END() - }; const struct option lock_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_END() }; + + const struct option info_options[] = { + OPT_BOOLEAN('t', "threads", &info_threads, + "dump thread list in perf.data"), + OPT_BOOLEAN('m', "map", &info_map, + "map of lock instances (address:name table)"), + OPT_PARENT(lock_options) + }; + const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ - OPT_END() + OPT_PARENT(lock_options) }; + const char * const info_usage[] = { "perf lock info []", NULL @@ -1006,7 +1012,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) rc = __cmd_report(false); } else if (!strcmp(argv[0], "script")) { /* Aliased to 'perf script' */ - return cmd_script(argc, argv, prefix); + return cmd_script(argc, argv); } else if (!strcmp(argv[0], "info")) { if (argc) { argc = parse_options(argc, argv, diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 6114e07ca6131ca94ed1a6107fb69ae3ccbab145..e001c02907937792d373f5245be0e6489b3e5210 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include "builtin.h" #include "perf.h" @@ -8,6 +12,7 @@ #include "util/data.h" #include "util/mem-events.h" #include "util/debug.h" +#include "util/symbol.h" #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -129,7 +134,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) pr_debug("\n"); } - ret = cmd_record(i, rec_argv, NULL); + ret = cmd_record(i, rec_argv); free(rec_argv); return ret; } @@ -256,7 +261,7 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; - ret = cmd_report(i, rep_argv, NULL); + ret = cmd_report(i, rep_argv); free(rep_argv); return ret; } @@ -330,7 +335,7 @@ parse_mem_ops(const struct option *opt, const char *str, int unset) return ret; } -int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_mem(int argc, const char **argv) { struct stat st; struct perf_mem mem = { @@ -342,6 +347,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .lost = perf_event__process_lost, .fork = perf_event__process_fork, .build_id = perf_event__process_build_id, + .namespaces = perf_event__process_namespaces, .ordered_events = true, }, .input_name = "perf.data", diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1fcebc31a50810b18c50b1a39b28f90f350a6327..cf9f9e9c2fc00e81c88c43b53da36ace0d9efd5c 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -442,9 +442,9 @@ static int perf_del_probe_events(struct strfilter *filter) } if (ret == -ENOENT && ret2 == -ENOENT) - pr_debug("\"%s\" does not hit any event.\n", str); - /* Note that this is silently ignored */ - ret = 0; + pr_warning("\"%s\" does not hit any event.\n", str); + else + ret = 0; error: if (kfd >= 0) @@ -468,7 +468,7 @@ static int perf_del_probe_events(struct strfilter *filter) static int -__cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) +__cmd_probe(int argc, const char **argv) { const char * const probe_usage[] = { "perf probe [] 'PROBEDEF' ['PROBEDEF' ...]", @@ -486,7 +486,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INCR('v', "verbose", &verbose, "be more verbose (show parsed arguments, etc)"), OPT_BOOLEAN('q', "quiet", ¶ms.quiet, - "be quiet (do not show any mesages)"), + "be quiet (do not show any messages)"), OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT", "list up probe events", opt_set_filter_with_command, DEFAULT_LIST_FILTER), @@ -687,13 +687,13 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) return 0; } -int cmd_probe(int argc, const char **argv, const char *prefix) +int cmd_probe(int argc, const char **argv) { int ret; ret = init_params(); if (!ret) { - ret = __cmd_probe(argc, argv, prefix); + ret = __cmd_probe(argc, argv); cleanup_params(); } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index bc84a375295d7cb4920df6a4be1f91403bcdc04b..ee7d0a82ccd073cb52a49cea6b15e14d827436ad 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -38,11 +38,18 @@ #include "util/bpf-loader.h" #include "util/trigger.h" #include "util/perf-hooks.h" +#include "util/time-utils.h" +#include "util/units.h" #include "asm/bug.h" +#include +#include +#include #include #include +#include #include +#include #include #include @@ -876,6 +883,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGTERM, sig_handler); signal(SIGSEGV, sigsegv_handler); + if (rec->opts.record_namespaces) + tool->namespace_events = true; + if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { signal(SIGUSR2, snapshot_sig_handler); if (rec->opts.auxtrace_snapshot_mode) @@ -983,6 +993,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) */ if (forks) { union perf_event *event; + pid_t tgid; event = malloc(sizeof(event->comm) + machine->id_hdr_size); if (event == NULL) { @@ -996,10 +1007,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * cannot see a correct process name for those events. * Synthesize COMM event to prevent it. */ - perf_event__synthesize_comm(tool, event, - rec->evlist->workload.pid, - process_synthesized_event, - machine); + tgid = perf_event__synthesize_comm(tool, event, + rec->evlist->workload.pid, + process_synthesized_event, + machine); + free(event); + + if (tgid == -1) + goto out_child; + + event = malloc(sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (event == NULL) { + err = -ENOMEM; + goto out_child; + } + + /* + * Synthesize NAMESPACES event for the command specified. + */ + perf_event__synthesize_namespaces(tool, event, + rec->evlist->workload.pid, + tgid, process_synthesized_event, + machine); free(event); perf_evlist__start_workload(rec->evlist); @@ -1497,6 +1528,7 @@ static struct record record = { .fork = perf_event__process_fork, .exit = perf_event__process_exit, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .ordered_events = true, @@ -1611,6 +1643,8 @@ static struct option __record_options[] = { "opts", "AUX area tracing Snapshot Mode", ""), OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), + OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, + "Record namespaces events"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, @@ -1640,7 +1674,7 @@ static struct option __record_options[] = { struct option *record_options = __record_options; -int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_record(int argc, const char **argv) { int err; struct record *rec = &record; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0a88670e56f35f6d8c5397e80264001acaf35e94..22478ff2b706ad1b9fc15c6fe13e341b98af10e5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,7 +16,6 @@ #include #include "util/symbol.h" #include "util/callchain.h" -#include "util/strlist.h" #include "util/values.h" #include "perf.h" @@ -38,10 +37,18 @@ #include "arch/common.h" #include "util/time-utils.h" #include "util/auxtrace.h" +#include "util/units.h" #include +#include +#include +#include +#include #include #include +#include +#include +#include struct report { struct perf_tool tool; @@ -394,8 +401,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, fprintf(stdout, "\n\n"); } - if (sort_order == NULL && - parent_pattern == default_parent_pattern) + if (!quiet) fprintf(stdout, "#\n# (%s)\n#\n", help); if (rep->show_threads) { @@ -682,7 +688,7 @@ const char report_callchain_help[] = "Display call graph (stack chain/backtrace) CALLCHAIN_REPORT_HELP "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; -int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_report(int argc, const char **argv) { struct perf_session *session; struct itrace_synth_opts itrace_synth_opts = { .set = 0, }; @@ -701,6 +707,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, @@ -845,6 +852,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) stdio__config_color, "always"), OPT_STRING(0, "time", &report.time_str, "str", "Time span of interest (start,stop)"), + OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, + "Show inline function"), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index b94cf0de715ab9a2d6205c12053916c31d276a13..39996c53995a2f57b982536a09b420020cd9e256 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -22,16 +22,21 @@ #include "util/debug.h" +#include #include #include #include +#include +#include #include #include #include #include #include +#include "sane_ctype.h" + #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 #define COMM_LEN 20 @@ -221,6 +226,7 @@ struct perf_sched { unsigned int max_stack; bool show_cpu_visual; bool show_wakeups; + bool show_next; bool show_migrations; bool show_state; u64 skipped_samples; @@ -1897,14 +1903,18 @@ static char task_state_char(struct thread *thread, int state) } static void timehist_print_sample(struct perf_sched *sched, + struct perf_evsel *evsel, struct perf_sample *sample, struct addr_location *al, struct thread *thread, u64 t, int state) { struct thread_runtime *tr = thread__priv(thread); + const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); + const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); u32 max_cpus = sched->max_cpu + 1; char tstr[64]; + char nstr[30]; u64 wait_time; timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); @@ -1937,7 +1947,12 @@ static void timehist_print_sample(struct perf_sched *sched, if (sched->show_state) printf(" %5c ", task_state_char(thread, state)); - if (sched->show_wakeups) + if (sched->show_next) { + snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid); + printf(" %-*s", comm_width, nstr); + } + + if (sched->show_wakeups && !sched->show_next) printf(" %-*s", comm_width, ""); if (thread->tid == 0) @@ -2531,7 +2546,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, } if (!sched->summary_only) - timehist_print_sample(sched, sample, &al, thread, t, state); + timehist_print_sample(sched, evsel, sample, &al, thread, t, state); out: if (sched->hist_time.start == 0 && t >= ptime->start) @@ -3262,16 +3277,17 @@ static int __cmd_record(int argc, const char **argv) BUG_ON(i != rec_argc); - return cmd_record(i, rec_argv, NULL); + return cmd_record(i, rec_argv); } -int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_sched(int argc, const char **argv) { const char default_sort_order[] = "avg, max, switch, runtime"; struct perf_sched sched = { .tool = { .sample = perf_sched__process_tracepoint_sample, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .lost = perf_event__process_lost, .fork = perf_sched__process_fork_event, .ordered_events = true, @@ -3340,6 +3356,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('S', "with-summary", &sched.summary, "Show all syscalls and summary with statistics"), OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"), + OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"), OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"), OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"), OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"), @@ -3400,7 +3417,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) * Aliased to 'perf script' for now: */ if (!strcmp(argv[0], "script")) - return cmd_script(argc, argv, prefix); + return cmd_script(argc, argv); if (!strncmp(argv[0], "rec", 3)) { return __cmd_record(argc, argv); @@ -3437,10 +3454,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(timehist_usage, timehist_options); } - if (sched.show_wakeups && sched.summary_only) { - pr_err(" Error: -s and -w are mutually exclusive.\n"); + if ((sched.show_wakeups || sched.show_next) && + sched.summary_only) { + pr_err(" Error: -s and -[n|w] are mutually exclusive.\n"); parse_options_usage(timehist_usage, timehist_options, "s", true); - parse_options_usage(NULL, timehist_options, "w", true); + if (sched.show_wakeups) + parse_options_usage(NULL, timehist_options, "w", true); + if (sched.show_next) + parse_options_usage(NULL, timehist_options, "n", true); return -EINVAL; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c0783b4f7b6c650e2c35ffebfe1e2d7bb3e51e61..4761b0d7fcb5b2586e7fd86c1ed8b219d0cec987 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -21,13 +21,27 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "print_binary.h" #include +#include #include #include #include "asm/bug.h" #include "util/mem-events.h" +#include "util/dump-insn.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sane_ctype.h" static char const *script_name; static char const *generate_script_lang; @@ -42,6 +56,7 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; +static int max_blocks; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -69,6 +84,7 @@ enum perf_output_field { PERF_OUTPUT_CALLINDENT = 1U << 20, PERF_OUTPUT_INSN = 1U << 21, PERF_OUTPUT_INSNLEN = 1U << 22, + PERF_OUTPUT_BRSTACKINSN = 1U << 23, }; struct output_option { @@ -98,6 +114,7 @@ struct output_option { {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, {.str = "insn", .field = PERF_OUTPUT_INSN}, {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN}, + {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, }; /* default set to maintain compatibility with current format */ @@ -292,7 +309,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - + if (PRINT_FIELD(BRSTACKINSN) && + !(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) { + pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" + "Hint: run 'perf record -b ...'\n"); + return -EINVAL; + } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) @@ -546,6 +569,233 @@ static void print_sample_brstacksym(struct perf_sample *sample, } } +#define MAXBB 16384UL + +static int grab_bb(u8 *buffer, u64 start, u64 end, + struct machine *machine, struct thread *thread, + bool *is64bit, u8 *cpumode, bool last) +{ + long offset, len; + struct addr_location al; + bool kernel; + + if (!start || !end) + return 0; + + kernel = machine__kernel_ip(machine, start); + if (kernel) + *cpumode = PERF_RECORD_MISC_KERNEL; + else + *cpumode = PERF_RECORD_MISC_USER; + + /* + * Block overlaps between kernel and user. + * This can happen due to ring filtering + * On Intel CPUs the entry into the kernel is filtered, + * but the exit is not. Let the caller patch it up. + */ + if (kernel != machine__kernel_ip(machine, end)) { + printf("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", + start, end); + return -ENXIO; + } + + memset(&al, 0, sizeof(al)); + if (end - start > MAXBB - MAXINSN) { + if (last) + printf("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end); + else + printf("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start); + return 0; + } + + thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); + if (!al.map || !al.map->dso) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) { + printf("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); + return 0; + } + + /* Load maps to ensure dso->is_64_bit has been updated */ + map__load(al.map); + + offset = al.map->map_ip(al.map, start); + len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer, + end - start + MAXINSN); + + *is64bit = al.map->dso->is_64_bit; + if (len <= 0) + printf("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n", + start, end); + return len; +} + +static void print_jump(uint64_t ip, struct branch_entry *en, + struct perf_insn *x, u8 *inbuf, int len, + int insn) +{ + printf("\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", + ip, + dump_insn(x, ip, inbuf, len, NULL), + en->flags.predicted ? " PRED" : "", + en->flags.mispred ? " MISPRED" : "", + en->flags.in_tx ? " INTX" : "", + en->flags.abort ? " ABORT" : ""); + if (en->flags.cycles) { + printf(" %d cycles", en->flags.cycles); + if (insn) + printf(" %.2f IPC", (float)insn / en->flags.cycles); + } + putchar('\n'); +} + +static void print_ip_sym(struct thread *thread, u8 cpumode, int cpu, + uint64_t addr, struct symbol **lastsym, + struct perf_event_attr *attr) +{ + struct addr_location al; + int off; + + memset(&al, 0, sizeof(al)); + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); + if (!al.map) + thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + addr, &al); + if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) + return; + + al.cpu = cpu; + al.sym = NULL; + if (al.map) + al.sym = map__find_symbol(al.map, al.addr); + + if (!al.sym) + return; + + if (al.addr < al.sym->end) + off = al.addr - al.sym->start; + else + off = al.addr - al.map->start - al.sym->start; + printf("\t%s", al.sym->name); + if (off) + printf("%+d", off); + putchar(':'); + if (PRINT_FIELD(SRCLINE)) + map__fprintf_srcline(al.map, al.addr, "\t", stdout); + putchar('\n'); + *lastsym = al.sym; +} + +static void print_sample_brstackinsn(struct perf_sample *sample, + struct thread *thread, + struct perf_event_attr *attr, + struct machine *machine) +{ + struct branch_stack *br = sample->branch_stack; + u64 start, end; + int i, insn, len, nr, ilen; + struct perf_insn x; + u8 buffer[MAXBB]; + unsigned off; + struct symbol *lastsym = NULL; + + if (!(br && br->nr)) + return; + nr = br->nr; + if (max_blocks && nr > max_blocks + 1) + nr = max_blocks + 1; + + x.thread = thread; + x.cpu = sample->cpu; + + putchar('\n'); + + /* Handle first from jump, of which we don't know the entry. */ + len = grab_bb(buffer, br->entries[nr-1].from, + br->entries[nr-1].from, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len > 0) { + print_ip_sym(thread, x.cpumode, x.cpu, + br->entries[nr - 1].from, &lastsym, attr); + print_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + &x, buffer, len, 0); + } + + /* Print all blocks */ + for (i = nr - 2; i >= 0; i--) { + if (br->entries[i].from || br->entries[i].to) + pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, + br->entries[i].from, + br->entries[i].to); + start = br->entries[i + 1].to; + end = br->entries[i].from; + + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + /* Patch up missing kernel transfers due to ring filters */ + if (len == -ENXIO && i > 0) { + end = br->entries[--i].from; + pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); + } + if (len <= 0) + continue; + + insn = 0; + for (off = 0;; off += ilen) { + uint64_t ip = start + off; + + print_ip_sym(thread, x.cpumode, x.cpu, ip, &lastsym, attr); + if (ip == end) { + print_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn); + break; + } else { + printf("\t%016" PRIx64 "\t%s\n", ip, + dump_insn(&x, ip, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + insn++; + } + } + } + + /* + * Hit the branch? In this case we are already done, and the target + * has not been executed yet. + */ + if (br->entries[0].from == sample->ip) + return; + if (br->entries[0].flags.abort) + return; + + /* + * Print final block upto sample + */ + start = br->entries[0].to; + end = sample->ip; + len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true); + print_ip_sym(thread, x.cpumode, x.cpu, start, &lastsym, attr); + if (len <= 0) { + /* Print at least last IP if basic block did not work */ + len = grab_bb(buffer, sample->ip, sample->ip, + machine, thread, &x.is64bit, &x.cpumode, false); + if (len <= 0) + return; + + printf("\t%016" PRIx64 "\t%s\n", sample->ip, + dump_insn(&x, sample->ip, buffer, len, NULL)); + return; + } + for (off = 0; off <= end - start; off += ilen) { + printf("\t%016" PRIx64 "\t%s\n", start + off, + dump_insn(&x, start + off, buffer + off, len - off, &ilen)); + if (ilen == 0) + break; + } +} static void print_sample_addr(struct perf_sample *sample, struct thread *thread, @@ -632,7 +882,9 @@ static void print_sample_callindent(struct perf_sample *sample, } static void print_insn(struct perf_sample *sample, - struct perf_event_attr *attr) + struct perf_event_attr *attr, + struct thread *thread, + struct machine *machine) { if (PRINT_FIELD(INSNLEN)) printf(" ilen: %d", sample->insn_len); @@ -643,12 +895,15 @@ static void print_insn(struct perf_sample *sample, for (i = 0; i < sample->insn_len; i++) printf(" %02x", (unsigned char)sample->insn[i]); } + if (PRINT_FIELD(BRSTACKINSN)) + print_sample_brstackinsn(sample, thread, attr, machine); } static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct perf_event_attr *attr = &evsel->attr; bool print_srcline_last = false; @@ -689,7 +944,7 @@ static void print_sample_bts(struct perf_sample *sample, if (print_srcline_last) map__fprintf_srcline(al->map, al->addr, "\n ", stdout); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -830,6 +1085,7 @@ struct perf_script { bool show_task_events; bool show_mmap_events; bool show_switch_events; + bool show_namespace_events; bool allocated; struct cpu_map *cpus; struct thread_map *threads; @@ -871,7 +1127,8 @@ static size_t data_src__printf(u64 data_src) static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, - struct addr_location *al) + struct addr_location *al, + struct machine *machine) { struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; @@ -898,7 +1155,7 @@ static void process_event(struct perf_script *script, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al, machine); return; } @@ -936,7 +1193,7 @@ static void process_event(struct perf_script *script, if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); - print_insn(sample, attr); + print_insn(sample, attr, thread, machine); printf("\n"); } @@ -1046,7 +1303,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, sample, evsel, &al); + process_event(scr, sample, evsel, &al, machine); out_put: addr_location__put(&al); @@ -1118,6 +1375,41 @@ static int process_comm_event(struct perf_tool *tool, return ret; } +static int process_namespaces_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + int ret = -1; + + thread = machine__findnew_thread(machine, event->namespaces.pid, + event->namespaces.tid); + if (thread == NULL) { + pr_debug("problem processing NAMESPACES event, skipping it.\n"); + return -1; + } + + if (perf_event__process_namespaces(tool, event, sample, machine) < 0) + goto out; + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->namespaces.tid; + sample->pid = event->namespaces.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + ret = 0; +out: + thread__put(thread); + return ret; +} + static int process_fork_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -1293,6 +1585,8 @@ static int __cmd_script(struct perf_script *script) } if (script->show_switch_events) script->tool.context_switch = process_switch_event; + if (script->show_namespace_events) + script->tool.namespaces = process_namespaces_event; ret = perf_session__process_events(script->session); @@ -1427,7 +1721,7 @@ static int parse_scriptname(const struct option *opt __maybe_unused, static int parse_output_fields(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { - char *tok; + char *tok, *strtok_saveptr = NULL; int i, imax = ARRAY_SIZE(all_output_options); int j; int rc = 0; @@ -1488,7 +1782,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused, } } - for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) { + for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) { for (i = 0; i < imax; ++i) { if (strcmp(tok, all_output_options[i].str) == 0) break; @@ -2078,7 +2372,7 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } -int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_script(int argc, const char **argv) { bool show_full_info = false; bool header = false; @@ -2097,6 +2391,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .mmap = perf_event__process_mmap, .mmap2 = perf_event__process_mmap2, .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, .exit = perf_event__process_exit, .fork = perf_event__process_fork, .attr = process_attr, @@ -2152,7 +2447,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen", parse_output_fields), + "bpf-output,callindent,insn,insnlen,brstackinsn", + parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2180,7 +2476,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the mmap events"), OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), + OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, + "Show namespace events (if recorded)"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_INTEGER(0, "max-blocks", &max_blocks, + "Maximum number of code blocks to dump with brstackinsn"), OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", @@ -2194,6 +2494,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Enable kernel symbol demangling"), OPT_STRING(0, "time", &script.time_str, "str", "Time span of interest (start,stop)"), + OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, + "Show inline function"), OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -2217,7 +2519,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) - return cmd_record(argc, argv, NULL); + return cmd_record(argc, argv); } if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 13b54999ad79ecd4f765d557bf57764486f1fac8..ad9324d1daf9f29a990a0d8f903563873ac91ef9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -64,14 +64,24 @@ #include "util/session.h" #include "util/tool.h" #include "util/group.h" +#include "util/string2.h" #include "asm/bug.h" #include #include +#include +#include #include #include +#include #include #include +#include +#include +#include +#include + +#include "sane_ctype.h" #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "" @@ -140,12 +150,14 @@ static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; static bool force_metric_only = false; +static bool no_merge = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; static const char *output_name; static int output_fd; +static int print_free_counters_hint; struct perf_stat { bool record; @@ -310,8 +322,12 @@ static int read_counter(struct perf_evsel *counter) struct perf_counts_values *count; count = perf_counts(counter->counts, cpu, thread); - if (perf_evsel__read(counter, cpu, thread, count)) + if (perf_evsel__read(counter, cpu, thread, count)) { + counter->counts->scaled = -1; + perf_counts(counter->counts, cpu, thread)->ena = 0; + perf_counts(counter->counts, cpu, thread)->run = 0; return -1; + } if (STAT_RECORD) { if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { @@ -336,12 +352,14 @@ static int read_counter(struct perf_evsel *counter) static void read_counters(void) { struct perf_evsel *counter; + int ret; evlist__for_each_entry(evsel_list, counter) { - if (read_counter(counter)) + ret = read_counter(counter); + if (ret) pr_debug("failed to read counter %s\n", counter->name); - if (perf_stat_process_counter(&stat_config, counter)) + if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); } } @@ -873,10 +891,7 @@ static void print_metric_csv(void *ctx, return; } snprintf(buf, sizeof(buf), fmt, val); - vals = buf; - while (isspace(*vals)) - vals++; - ends = vals; + ends = vals = ltrim(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; @@ -948,10 +963,7 @@ static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, return; unit = fixunit(tbuf, os->evsel, unit); snprintf(buf, sizeof buf, fmt, val); - vals = buf; - while (isspace(*vals)) - vals++; - ends = vals; + ends = vals = ltrim(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; @@ -1109,6 +1121,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep); + if (counter->supported) + print_free_counters_hint = 1; + fprintf(stat_config.output, "%-*s%s", csv_output ? 0 : unit_width, counter->unit, csv_sep); @@ -1140,6 +1155,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.print_metric = pm; out.new_line = nl; out.ctx = &os; + out.force_header = false; if (csv_output && !metric_only) { print_noise(counter, noise); @@ -1178,11 +1194,81 @@ static void aggr_update_shadow(void) } } +static void collect_all_aliases(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evsel_list->entries), node); + list_for_each_entry_continue (alias, &evsel_list->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + nsec_counter(alias) != nsec_counter(counter)) + break; + alias->merged_stat = true; + cb(alias, data, false); + } +} + +static bool collect_data(struct perf_evsel *counter, + void (*cb)(struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; + cb(counter, data, true); + if (!no_merge) + collect_all_aliases(counter, cb, data); + return true; +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct perf_counts_values *counts; + + s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); + if (s2 != ad->id) + continue; + if (first) + ad->nr++; + counts = perf_counts(counter->counts, cpu, 0); + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (counts->ena == 0 || counts->run == 0 || + counter->counts->scaled == -1) { + ad->ena = 0; + ad->run = 0; + break; + } + ad->val += counts->val; + ad->ena += counts->ena; + ad->run += counts->run; + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; struct perf_evsel *counter; - int cpu, s, s2, id, nr; + int s, id, nr; double uval; u64 ena, run, val; bool first; @@ -1197,23 +1283,21 @@ static void print_aggr(char *prefix) * Without each counter has its own line. */ for (s = 0; s < aggr_map->nr; s++) { + struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - id = aggr_map->map[s]; + ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { - val = ena = run = 0; - nr = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); - if (s2 != id) - continue; - val += perf_counts(counter->counts, cpu, 0)->val; - ena += perf_counts(counter->counts, cpu, 0)->ena; - run += perf_counts(counter->counts, cpu, 0)->run; - nr++; - } + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(counter, aggr_cb, &ad)) + continue; + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; if (first && metric_only) { first = false; aggr_printout(counter, id, nr); @@ -1257,6 +1341,21 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) } } +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->priv; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -1264,23 +1363,31 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { FILE *output = stat_config.output; - struct perf_stat_evsel *ps = counter->priv; - double avg = avg_stats(&ps->res_stats[0]); double uval; - double avg_enabled, avg_running; + struct caggr_data cd = { .avg = 0.0 }; - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); + if (!collect_data(counter, counter_aggr_cb, &cd)) + return; if (prefix && !metric_only) fprintf(output, "%s", prefix); - uval = avg * counter->scale; - printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); + uval = cd.avg * counter->scale; + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); if (!metric_only) fprintf(output, "\n"); } +static void counter_cb(struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + /* * Print out the results of a single counter: * does not use aggregated count in system-wide @@ -1293,9 +1400,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + struct aggr_data ad = { .cpu = cpu }; + + if (!collect_data(counter, counter_cb, &ad)) + return; + val = ad.val; + ena = ad.ena; + run = ad.run; if (prefix) fprintf(output, "%s", prefix); @@ -1380,6 +1491,7 @@ static void print_metric_headers(const char *prefix, bool no_indent) out.ctx = &os; out.print_metric = print_metric_header; out.new_line = new_line_metric; + out.force_header = true; os.evsel = counter; perf_stat__print_shadow_stats(counter, 0, 0, @@ -1466,6 +1578,7 @@ static void print_header(int argc, const char **argv) static void print_footer(void) { FILE *output = stat_config.output; + int n; if (!null_run) fprintf(output, "\n"); @@ -1477,6 +1590,15 @@ static void print_footer(void) avg_stats(&walltime_nsecs_stats)); } fprintf(output, "\n\n"); + + if (print_free_counters_hint && + sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && + n > 0) + fprintf(output, +"Some events weren't counted. Try disabling the NMI watchdog:\n" +" echo 0 > /proc/sys/kernel/nmi_watchdog\n" +" perf stat ...\n" +" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } static void print_counters(struct timespec *ts, int argc, const char **argv) @@ -1633,6 +1755,7 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), + OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", @@ -2339,7 +2462,36 @@ static int __cmd_report(int argc, const char **argv) return 0; } -int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) +static void setup_system_wide(int forks) +{ + /* + * Make system wide (-a) the default target if + * no target was specified and one of following + * conditions is met: + * + * - there's no workload specified + * - there is workload specified but all requested + * events are system wide events + */ + if (!target__none(&target)) + return; + + if (!forks) + target.system_wide = true; + else { + struct perf_evsel *counter; + + evlist__for_each_entry(evsel_list, counter) { + if (!counter->system_wide) + return; + } + + if (evsel_list->nr_entries) + target.system_wide = true; + } +} + +int cmd_stat(int argc, const char **argv) { const char * const stat_usage[] = { "perf stat [] []", @@ -2361,6 +2513,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__collect_metric_expr(evsel_list); perf_stat__init_shadow_stats(); if (csv_sep) { @@ -2445,9 +2598,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } else if (big_num_opt == 0) /* User passed --no-big-num */ big_num = false; - /* Make system wide (-a) the default target. */ - if (!argc && target__none(&target)) - target.system_wide = true; + setup_system_wide(argc); if (run_count < 0) { pr_err("Run count must be a positive number\n"); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e7eaa298d34a1ec020a123ade8296451ffe30b73..4e2e61695986350a93b04b322712267068974e9e 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -12,6 +12,8 @@ * of the License. */ +#include +#include #include #include "builtin.h" @@ -23,11 +25,12 @@ #include "util/cache.h" #include "util/evlist.h" #include "util/evsel.h" +#include #include #include #include "util/symbol.h" +#include "util/thread.h" #include "util/callchain.h" -#include "util/strlist.h" #include "perf.h" #include "util/header.h" @@ -1773,7 +1776,7 @@ static int timechart__io_record(int argc, const char **argv) for (i = 0; i < (unsigned int)argc; i++) *p++ = argv[i]; - return cmd_record(rec_argc, rec_argv, NULL); + return cmd_record(rec_argc, rec_argv); } @@ -1864,7 +1867,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar for (j = 0; j < (unsigned int)argc; j++) *p++ = argv[j]; - return cmd_record(rec_argc, rec_argv, NULL); + return cmd_record(rec_argc, rec_argv); } static int @@ -1917,8 +1920,7 @@ parse_time(const struct option *opt, const char *arg, int __maybe_unused unset) return 0; } -int cmd_timechart(int argc, const char **argv, - const char *prefix __maybe_unused) +int cmd_timechart(int argc, const char **argv) { struct timechart tchart = { .tool = { @@ -1933,6 +1935,11 @@ int cmd_timechart(int argc, const char **argv, .merge_dist = 1000, }; const char *output_name = "output.svg"; + const struct option timechart_common_options[] = { + OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), + OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"), + OPT_END() + }; const struct option timechart_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_STRING('o', "output", &output_name, "file", "output file name"), @@ -1940,9 +1947,6 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK(0, "highlight", NULL, "duration or task name", "highlight tasks. Pass duration in ns or process name.", parse_highlight), - OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), - OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, - "output processes data only"), OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), @@ -1962,22 +1966,18 @@ int cmd_timechart(int argc, const char **argv, "merge events that are merge-dist us apart", parse_time), OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"), - OPT_END() + OPT_PARENT(timechart_common_options), }; const char * const timechart_subcommands[] = { "record", NULL }; const char *timechart_usage[] = { "perf timechart [] {record}", NULL }; - const struct option timechart_record_options[] = { - OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), - OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, - "output processes data only"), OPT_BOOLEAN('I', "io-only", &tchart.io_only, "record only IO data"), OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"), - OPT_END() + OPT_PARENT(timechart_common_options), }; const char * const timechart_record_usage[] = { "perf timechart record []", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ab9077915763f19a71b1b36a8b50135e6a0ffbb3..10b6362ca0bf7e0b6d5fce4697b15645ba816f02 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -27,19 +27,20 @@ #include "util/drv_configs.h" #include "util/evlist.h" #include "util/evsel.h" +#include "util/event.h" #include "util/machine.h" #include "util/session.h" #include "util/symbol.h" #include "util/thread.h" #include "util/thread_map.h" #include "util/top.h" -#include "util/util.h" #include #include #include "util/parse-events.h" #include "util/cpumap.h" #include "util/xyarray.h" #include "util/sort.h" +#include "util/term.h" #include "util/intlist.h" #include "util/parse-branch-options.h" #include "arch/common.h" @@ -58,6 +59,7 @@ #include #include #include +#include #include #include @@ -72,6 +74,8 @@ #include #include +#include "sane_ctype.h" + static volatile int done; #define HEADER_LINE_NR 5 @@ -1075,7 +1079,7 @@ parse_percent_limit(const struct option *opt, const char *arg, const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP "\n\t\t\t\tDefault: fp,graph,0.5,caller,function"; -int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_top(int argc, const char **argv) { char errbuf[BUFSIZ]; struct perf_top top = { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 256f1fac6f7e0069ebb047cf080fa55999dd0ae1..4b2a5d2981970baf86e4e458ef4ced256fc98c0e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -21,9 +21,11 @@ #include "builtin.h" #include "util/color.h" #include "util/debug.h" +#include "util/event.h" #include "util/evlist.h" #include #include "util/machine.h" +#include "util/path.h" #include "util/session.h" #include "util/thread.h" #include @@ -31,23 +33,33 @@ #include "util/intlist.h" #include "util/thread_map.h" #include "util/stat.h" +#include "trace/beauty/beauty.h" #include "trace-event.h" #include "util/parse-events.h" #include "util/bpf-loader.h" #include "callchain.h" +#include "print_binary.h" +#include "string2.h" #include "syscalltbl.h" #include "rb_resort.h" +#include +#include #include /* FIXME: Still needed for audit_errno_to_name */ +#include +#include #include #include #include #include #include +#include #include #include #include +#include "sane_ctype.h" + #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif @@ -267,15 +279,6 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void ({ struct syscall_tp *fields = evsel->priv; \ fields->name.pointer(&fields->name, sample); }) -struct syscall_arg { - unsigned long val; - struct thread *thread; - struct trace *trace; - void *parm; - u8 idx; - u8 mask; -}; - struct strarray { int offset; int nr_entries; @@ -678,6 +681,10 @@ static struct syscall_fmt { { .name = "mlockall", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mmap", .hexret = true, +/* The standard mmap maps to old_mmap on s390x */ +#if defined(__s390x__) + .alias = "old_mmap", +#endif .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ [3] = SCA_MMAP_FLAGS, /* flags */ }, }, @@ -771,6 +778,10 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "statfs", .errmsg = true, }, + { .name = "statx", .errmsg = true, + .arg_scnprintf = { [0] = SCA_FDAT, /* flags */ + [2] = SCA_STATX_FLAGS, /* flags */ + [3] = SCA_STATX_MASK, /* mask */ }, }, { .name = "swapoff", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "swapon", .errmsg = true, @@ -821,12 +832,21 @@ struct syscall { void **arg_parm; }; -static size_t fprintf_duration(unsigned long t, FILE *fp) +/* + * We need to have this 'calculated' boolean because in some cases we really + * don't know what is the duration of a syscall, for instance, when we start + * a session and some threads are waiting for a syscall to finish, say 'poll', + * in which case all we can do is to print "( ? ) for duration and for the + * start timestamp. + */ +static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) { double duration = (double)t / NSEC_PER_MSEC; size_t printed = fprintf(fp, "("); - if (duration >= 1.0) + if (!calculated) + printed += fprintf(fp, " ? "); + else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); @@ -1028,13 +1048,27 @@ static bool trace__filter_duration(struct trace *trace, double t) return t < (trace->duration_filter * NSEC_PER_MSEC); } -static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) { double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; return fprintf(fp, "%10.3f ", ts); } +/* + * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are + * using ttrace->entry_time for a thread that receives a sys_exit without + * first having received a sys_enter ("poll" issued before tracing session + * starts, lost sys_enter exit due to ring buffer overflow). + */ +static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) +{ + if (tstamp > 0) + return __trace__fprintf_tstamp(trace, tstamp, fp); + + return fprintf(fp, " ? "); +} + static bool done = false; static bool interrupted = false; @@ -1045,10 +1079,10 @@ static void sig_handler(int sig) } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, u64 tstamp, FILE *fp) + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) { size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, fp); + printed += fprintf_duration(duration, duration_calculated, fp); if (trace->multiple_threads) { if (trace->show_comm) @@ -1450,7 +1484,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp duration = sample->time - ttrace->entry_time; - printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1497,7 +1531,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { - trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { @@ -1545,6 +1579,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, { long ret; u64 duration = 0; + bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0; struct syscall *sc = trace__syscall_info(trace, evsel, id); @@ -1573,6 +1608,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, duration = sample->time - ttrace->entry_time; if (trace__filter_duration(trace, duration)) goto out; + duration_calculated = true; } else if (trace->duration_filter) goto out; @@ -1588,7 +1624,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (trace->summary_only) goto out; - trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output); + trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); if (ttrace->entry_pending) { fprintf(trace->output, "%-70s", ttrace->entry_str); @@ -1653,15 +1689,17 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ttrace = thread__priv(thread); if (!ttrace) - goto out; + goto out_put; filename_len = strlen(filename); + if (filename_len == 0) + goto out_put; if (ttrace->filename.namelen < filename_len) { char *f = realloc(ttrace->filename.name, filename_len + 1); if (f == NULL) - goto out; + goto out_put; ttrace->filename.namelen = filename_len; ttrace->filename.name = f; @@ -1671,12 +1709,12 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ttrace->filename.pending_open = true; if (!ttrace->filename.ptr) - goto out; + goto out_put; entry_str_len = strlen(ttrace->entry_str); remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ if (remaining_space <= 0) - goto out; + goto out_put; if (filename_len > (size_t)remaining_space) { filename += filename_len - remaining_space; @@ -1690,6 +1728,8 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, ttrace->filename.ptr = 0; ttrace->filename.entry_str_pos = 0; +out_put: + thread__put(thread); out: return 0; } @@ -1710,6 +1750,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs ttrace->runtime_ms += runtime_ms; trace->runtime_ms += runtime_ms; +out_put: thread__put(thread); return 0; @@ -1720,8 +1761,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs (pid_t)perf_evsel__intval(evsel, sample, "pid"), runtime, perf_evsel__intval(evsel, sample, "vruntime")); - thread__put(thread); - return 0; + goto out_put; } static void bpf_output__printer(enum binary_printer_ops op, @@ -1851,7 +1891,7 @@ static int trace__pgfault(struct trace *trace, thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); + trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); fprintf(trace->output, "%sfault [", evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? @@ -1920,7 +1960,7 @@ static int trace__process_sample(struct perf_tool *tool, thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); if (thread && thread__is_filtered(thread)) - return 0; + goto out; trace__set_base_time(trace, evsel, sample); @@ -1928,7 +1968,8 @@ static int trace__process_sample(struct perf_tool *tool, ++trace->nr_events; handler(trace, evsel, event, sample); } - +out: + thread__put(thread); return err; } @@ -1988,7 +2029,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv) for (i = 0; i < (unsigned int)argc; i++) rec_argv[j++] = argv[i]; - return cmd_record(j, rec_argv, NULL); + return cmd_record(j, rec_argv); } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); @@ -2415,8 +2456,9 @@ static int trace__replay(struct trace *trace) trace->tool.exit = perf_event__process_exit; trace->tool.fork = perf_event__process_fork; trace->tool.attr = perf_event__process_attr; - trace->tool.tracing_data = perf_event__process_tracing_data; + trace->tool.tracing_data = perf_event__process_tracing_data; trace->tool.build_id = perf_event__process_build_id; + trace->tool.namespaces = perf_event__process_namespaces; trace->tool.ordered_events = true; trace->tool.ordering_requires_timestamps = true; @@ -2785,7 +2827,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, return err; } -int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_trace(int argc, const char **argv) { const char *trace_usage[] = { "perf trace [] []", diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 9b10cda6b6dcca3ef7ffedb95aec802851401464..d25149456a2fc336b5d817c2d80c86d73cb06503 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -1,9 +1,9 @@ -#include "util/util.h" #include "builtin.h" #include "perf.h" +#include +#include -int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused, - const char *prefix __maybe_unused) +int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 036e1e35b1a8fa5ed64439cb5c73d30558ed71b4..d4d19fe3d050a86e2f0d868e1729a6c5fbb95cb8 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -2,46 +2,42 @@ #define BUILTIN_H #include "util/util.h" -#include "util/strbuf.h" extern const char perf_usage_string[]; extern const char perf_more_info_string[]; void list_common_cmds_help(void); const char *help_unknown_cmd(const char *cmd); -void prune_packed_objects(int); -int read_line_with_nul(char *buf, int size, FILE *file); -int check_pager_config(const char *cmd); -int cmd_annotate(int argc, const char **argv, const char *prefix); -int cmd_bench(int argc, const char **argv, const char *prefix); -int cmd_buildid_cache(int argc, const char **argv, const char *prefix); -int cmd_buildid_list(int argc, const char **argv, const char *prefix); -int cmd_config(int argc, const char **argv, const char *prefix); -int cmd_c2c(int argc, const char **argv, const char *prefix); -int cmd_diff(int argc, const char **argv, const char *prefix); -int cmd_evlist(int argc, const char **argv, const char *prefix); -int cmd_help(int argc, const char **argv, const char *prefix); -int cmd_sched(int argc, const char **argv, const char *prefix); -int cmd_kallsyms(int argc, const char **argv, const char *prefix); -int cmd_list(int argc, const char **argv, const char *prefix); -int cmd_record(int argc, const char **argv, const char *prefix); -int cmd_report(int argc, const char **argv, const char *prefix); -int cmd_stat(int argc, const char **argv, const char *prefix); -int cmd_timechart(int argc, const char **argv, const char *prefix); -int cmd_top(int argc, const char **argv, const char *prefix); -int cmd_script(int argc, const char **argv, const char *prefix); -int cmd_version(int argc, const char **argv, const char *prefix); -int cmd_probe(int argc, const char **argv, const char *prefix); -int cmd_kmem(int argc, const char **argv, const char *prefix); -int cmd_lock(int argc, const char **argv, const char *prefix); -int cmd_kvm(int argc, const char **argv, const char *prefix); -int cmd_test(int argc, const char **argv, const char *prefix); -int cmd_trace(int argc, const char **argv, const char *prefix); -int cmd_inject(int argc, const char **argv, const char *prefix); -int cmd_mem(int argc, const char **argv, const char *prefix); -int cmd_data(int argc, const char **argv, const char *prefix); -int cmd_ftrace(int argc, const char **argv, const char *prefix); +int cmd_annotate(int argc, const char **argv); +int cmd_bench(int argc, const char **argv); +int cmd_buildid_cache(int argc, const char **argv); +int cmd_buildid_list(int argc, const char **argv); +int cmd_config(int argc, const char **argv); +int cmd_c2c(int argc, const char **argv); +int cmd_diff(int argc, const char **argv); +int cmd_evlist(int argc, const char **argv); +int cmd_help(int argc, const char **argv); +int cmd_sched(int argc, const char **argv); +int cmd_kallsyms(int argc, const char **argv); +int cmd_list(int argc, const char **argv); +int cmd_record(int argc, const char **argv); +int cmd_report(int argc, const char **argv); +int cmd_stat(int argc, const char **argv); +int cmd_timechart(int argc, const char **argv); +int cmd_top(int argc, const char **argv); +int cmd_script(int argc, const char **argv); +int cmd_version(int argc, const char **argv); +int cmd_probe(int argc, const char **argv); +int cmd_kmem(int argc, const char **argv); +int cmd_lock(int argc, const char **argv); +int cmd_kvm(int argc, const char **argv); +int cmd_test(int argc, const char **argv); +int cmd_trace(int argc, const char **argv); +int cmd_inject(int argc, const char **argv); +int cmd_mem(int argc, const char **argv); +int cmd_data(int argc, const char **argv); +int cmd_ftrace(int argc, const char **argv); int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c747bfd7f14da88e5478692a00cb30d587852a08..83fe2202382eda0cd95eafc15a726cb9022527d8 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -1,7 +1,9 @@ #!/bin/sh HEADERS=' +include/uapi/linux/fcntl.h include/uapi/linux/perf_event.h +include/uapi/linux/stat.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index ac3efd396a727298802b49e6a24e530f696fcdf7..2d0caf20ff3a49dc11f8406959f46fd6d687c06b 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common perf-data mainporcelain common perf-diff mainporcelain common +perf-c2c mainporcelain common perf-config mainporcelain common perf-evlist mainporcelain common perf-ftrace mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 6d5479e03e0dbc080531f3037d5fe5a6644bc3c9..628a5e412cb14b00d9ead40bdcbb13db4c82bd75 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -17,11 +17,18 @@ #include #include "util/bpf-loader.h" #include "util/debug.h" +#include "util/event.h" #include #include +#include #include +#include #include #include +#include +#include +#include +#include const char perf_usage_string[] = "perf [--version] [--help] [OPTIONS] COMMAND [ARGS]"; @@ -34,7 +41,7 @@ const char *input_name; struct cmd_struct { const char *cmd; - int (*fn)(int, const char **, const char *); + int (*fn)(int, const char **); int option; }; @@ -88,7 +95,7 @@ static int pager_command_config(const char *var, const char *value, void *data) } /* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ -int check_pager_config(const char *cmd) +static int check_pager_config(const char *cmd) { int err; struct pager_config c; @@ -267,71 +274,6 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) return handled; } -static int handle_alias(int *argcp, const char ***argv) -{ - int envchanged = 0, ret = 0, saved_errno = errno; - int count, option_count; - const char **new_argv; - const char *alias_command; - char *alias_string; - - alias_command = (*argv)[0]; - alias_string = alias_lookup(alias_command); - if (alias_string) { - if (alias_string[0] == '!') { - if (*argcp > 1) { - struct strbuf buf; - - if (strbuf_init(&buf, PATH_MAX) < 0 || - strbuf_addstr(&buf, alias_string) < 0 || - sq_quote_argv(&buf, (*argv) + 1, - PATH_MAX) < 0) - die("Failed to allocate memory."); - free(alias_string); - alias_string = buf.buf; - } - ret = system(alias_string + 1); - if (ret >= 0 && WIFEXITED(ret) && - WEXITSTATUS(ret) != 127) - exit(WEXITSTATUS(ret)); - die("Failed to run '%s' when expanding alias '%s'", - alias_string + 1, alias_command); - } - count = split_cmdline(alias_string, &new_argv); - if (count < 0) - die("Bad alias.%s string", alias_command); - option_count = handle_options(&new_argv, &count, &envchanged); - if (envchanged) - die("alias '%s' changes environment variables\n" - "You can use '!perf' in the alias to do this.", - alias_command); - memmove(new_argv - option_count, new_argv, - count * sizeof(char *)); - new_argv -= option_count; - - if (count < 1) - die("empty alias for %s", alias_command); - - if (!strcmp(alias_command, new_argv[0])) - die("recursive alias: %s", alias_command); - - new_argv = realloc(new_argv, sizeof(char *) * - (count + *argcp + 1)); - /* insert after command name */ - memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp); - new_argv[count + *argcp] = NULL; - - *argv = new_argv; - *argcp += count - 1; - - ret = 1; - } - - errno = saved_errno; - - return ret; -} - #define RUN_SETUP (1<<0) #define USE_PAGER (1<<1) @@ -339,13 +281,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status; struct stat st; - const char *prefix; char sbuf[STRERR_BUFSIZE]; - prefix = NULL; - if (p->option & RUN_SETUP) - prefix = NULL; /* setup_perf_directory(); */ - if (use_browser == -1) use_browser = check_browser_config(p->cmd); @@ -356,7 +293,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) commit_pager_choice(); perf_env__set_cmdline(&perf_env, argc, argv); - status = p->fn(argc, argv, prefix); + status = p->fn(argc, argv); perf_config__exit(); exit_browser(status); perf_env__exit(&perf_env); @@ -397,16 +334,6 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; unsigned int i; - static const char ext[] = STRIP_EXTENSION; - - if (sizeof(ext) > 1) { - i = strlen(argv[0]) - strlen(ext); - if (i > 0 && !strcmp(argv[0] + i, ext)) { - char *argv0 = strdup(argv[0]); - argv[0] = cmd = argv0; - argv0[i] = '\0'; - } - } /* Turn "perf cmd --help" into "perf help cmd" */ if (argc > 1 && !strcmp(argv[1], "--help")) { @@ -448,7 +375,8 @@ static void execv_dashed_external(const char **argv) if (status != -ERR_RUN_COMMAND_EXEC) { if (IS_RUN_COMMAND_ERR(status)) { do_die: - die("unable to run '%s'", argv[0]); + pr_err("FATAL: unable to run '%s'", argv[0]); + status = -128; } exit(-status); } @@ -460,25 +388,12 @@ static void execv_dashed_external(const char **argv) static int run_argv(int *argcp, const char ***argv) { - int done_alias = 0; + /* See if it's an internal command */ + handle_internal_command(*argcp, *argv); - while (1) { - /* See if it's an internal command */ - handle_internal_command(*argcp, *argv); - - /* .. then try the external ones */ - execv_dashed_external(*argv); - - /* It could be an alias -- this works around the insanity - * of overriding "perf log" with "perf show" by having - * alias.log = show - */ - if (done_alias || !handle_alias(argcp, argv)) - break; - done_alias = 1; - } - - return done_alias; + /* .. then try the external ones */ + execv_dashed_external(*argv); + return 0; } static void pthread__block_sigwinch(void) @@ -566,7 +481,7 @@ int main(int argc, const char **argv) #ifdef HAVE_LIBAUDIT_SUPPORT setup_path(); argv[0] = "trace"; - return cmd_trace(argc, argv, NULL); + return cmd_trace(argc, argv); #else fprintf(stderr, "trace command not available: missing audit-libs devel package at build time.\n"); @@ -611,17 +526,12 @@ int main(int argc, const char **argv) while (1) { static int done_help; - int was_alias = run_argv(&argc, &argv); + + run_argv(&argc, &argv); if (errno != ENOENT) break; - if (was_alias) { - fprintf(stderr, "Expansion of alias '%s' failed; " - "'%s' is not a perf-command\n", - cmd, argv[0]); - goto out; - } if (!done_help) { cmd = argv[0] = help_unknown_cmd(cmd); done_help = 1; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1c27d947c2fe55c9f0d0390695e6b586d839ec0b..806c216a1078210a028167c26eb442cec8e219b9 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -50,6 +50,7 @@ struct record_opts { bool running_time; bool full_auxtrace; bool auxtrace_snapshot_mode; + bool record_namespaces; bool record_switch_events; bool all_kernel; bool all_user; diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 9213a1273697a801d6b75f4a8804424e68eb6ab7..999a4e8781621677821440e80ec8491689bbc6e3 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -2,7 +2,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o pmu-events-y += pmu-events.o -JDIR = pmu-events/arch/$(ARCH) +JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') # @@ -10,4 +10,4 @@ JSON = $(shell [ -d $(JDIR) ] && \ # directory and create tables in pmu-events.c. # $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS) - $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) + $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) diff --git a/tools/perf/pmu-events/arch/x86/broadwell/uncore.json b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json new file mode 100644 index 0000000000000000000000000000000000000000..28e1e159a3cb5a44dd323b064976e907cd574291 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json @@ -0,0 +1,278 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x41", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x44", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x48", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x11", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M", + "BriefDescription": "L3 Lookup read request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x21", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M", + "BriefDescription": "L3 Lookup write request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x81", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M", + "BriefDescription": "L3 Lookup any request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x18", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I", + "BriefDescription": "L3 Lookup read request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x88", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I", + "BriefDescription": "L3 Lookup any request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x1f", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI", + "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x2f", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI", + "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x8f", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI", + "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x86", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES", + "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x16", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES", + "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x26", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES", + "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x02", + "EventName": "UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT", + "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.", + "PublicDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.", + "Counter": "0,", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x02", + "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT", + "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode", + "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "NCU", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json index 076459c51d4e1d2cc4b91e04fb2b4efbb670ca3a..58ed6d33d1f47fcaa4b7961e0587ca4af55a9a0a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json index d17dc235f7346b7efa5826c39f74329de366e125..f4b0745cdbbfcc86163f7e52c8b61854c60d30de 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", - "EventName": "UNC_M_CLOCKTICKS", + "EventName": "UNC_M_DCLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", - "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_DCLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", - "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_DCLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", - "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_DCLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json index b44d43088bbb793fb407d5d99608f8eee5ceb0b1..dd1b95655d1d763c5dd1771a80ff15f3f28c5f58 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json index 076459c51d4e1d2cc4b91e04fb2b4efbb670ca3a..58ed6d33d1f47fcaa4b7961e0587ca4af55a9a0a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json index 39387f7909b265cebd52edd7967ef7d76fa3f6fa..824961318c1e5ffd8cff04c5edc34367a52afe55 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -10,7 +10,7 @@ { "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.DATA", + "EventName": "QPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x2", @@ -19,7 +19,7 @@ { "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", + "EventName": "QPI_CTL_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json index d17dc235f7346b7efa5826c39f74329de366e125..66eed399724cd5bc574bba1abb6c9b1f4bb62ac3 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json index b44d43088bbb793fb407d5d99608f8eee5ceb0b1..dd1b95655d1d763c5dd1771a80ff15f3f28c5f58 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/haswell/uncore.json b/tools/perf/pmu-events/arch/x86/haswell/uncore.json new file mode 100644 index 0000000000000000000000000000000000000000..3ef5c21fef56b7962dfffd1ab128b4a3bd07b37e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/haswell/uncore.json @@ -0,0 +1,374 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x21", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL", + "BriefDescription": "An external snoop misses in some processor core.", + "PublicDescription": "An external snoop misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x41", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x24", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL", + "BriefDescription": "An external snoop hits a non-modified line in some processor core.", + "PublicDescription": "An external snoop hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x44", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x84", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x28", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL", + "BriefDescription": "An external snoop hits a modified line in some processor core.", + "PublicDescription": "An external snoop hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x48", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x88", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x11", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M", + "BriefDescription": "L3 Lookup read request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x21", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M", + "BriefDescription": "L3 Lookup write request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x41", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_M", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x81", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M", + "BriefDescription": "L3 Lookup any request that access cache and found line in M-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x18", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I", + "BriefDescription": "L3 Lookup read request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x28", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_I", + "BriefDescription": "L3 Lookup write request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x48", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_I", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x88", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I", + "BriefDescription": "L3 Lookup any request that access cache and found line in I-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x1f", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI", + "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x2f", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI", + "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x4f", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x8f", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI", + "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x86", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES", + "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x46", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_ES", + "BriefDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x16", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES", + "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x26", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES", + "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x83", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.All", + "BriefDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory)", + "PublicDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory).", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "NCU", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json index 076459c51d4e1d2cc4b91e04fb2b4efbb670ca3a..58ed6d33d1f47fcaa4b7961e0587ca4af55a9a0a 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1,2,3", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1,2,3", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -212,7 +212,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -221,7 +221,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to local home agent. Derived from unc_h_requests.reads_local", + "BriefDescription": "read requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_LOCAL", @@ -230,7 +230,7 @@ "Unit": "HA" }, { - "BriefDescription": "read requests to remote home agent. Derived from unc_h_requests.reads_remote", + "BriefDescription": "read requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS_REMOTE", @@ -239,7 +239,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -248,7 +248,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to local home agent. Derived from unc_h_requests.writes_local", + "BriefDescription": "write requests to local home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_LOCAL", @@ -257,7 +257,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to remote home agent. Derived from unc_h_requests.writes_remote", + "BriefDescription": "write requests to remote home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES_REMOTE", @@ -266,7 +266,7 @@ "Unit": "HA" }, { - "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously). Derived from unc_h_snoop_resp.rspcnflct", + "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT", @@ -275,7 +275,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -285,7 +285,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -295,7 +295,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -305,7 +305,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json index 39387f7909b265cebd52edd7967ef7d76fa3f6fa..824961318c1e5ffd8cff04c5edc34367a52afe55 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -10,7 +10,7 @@ { "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.DATA", + "EventName": "QPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x2", @@ -19,7 +19,7 @@ { "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", "Counter": "0,1,2,3", - "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", + "EventName": "QPI_CTL_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "8Bytes", "UMask": "0x4", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json index d17dc235f7346b7efa5826c39f74329de366e125..66eed399724cd5bc574bba1abb6c9b1f4bb62ac3 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json @@ -3,7 +3,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -13,48 +13,51 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Pre-charges due to page misses. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Pre-charges due to page misses", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -63,7 +66,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for reads. Derived from unc_m_pre_count.rd", + "BriefDescription": "Pre-charge for reads", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.RD", @@ -72,7 +75,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Pre-charge for writes. Derived from unc_m_pre_count.wr", + "BriefDescription": "Pre-charge for writes", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.WR", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json index b44d43088bbb793fb407d5d99608f8eee5ceb0b1..dd1b95655d1d763c5dd1771a80ff15f3f28c5f58 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json @@ -1,83 +1,91 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C0 and C1. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C3. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "C6 and C7. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "External Prochot. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xA", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Thermal Strongest Upper Limit Cycles. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "OS Strongest Upper Limit Cycles. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Power Strongest Upper Limit Cycles. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x74", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json b/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json new file mode 100644 index 0000000000000000000000000000000000000000..42c70eed05a2f0b7a89214444e4d151c65924fe9 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json @@ -0,0 +1,314 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x01", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS", + "BriefDescription": "A snoop misses in some processor core.", + "PublicDescription": "A snoop misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x02", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL", + "BriefDescription": "A snoop invalidates a non-modified line in some processor core.", + "PublicDescription": "A snoop invalidates a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x04", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT", + "BriefDescription": "A snoop hits a non-modified line in some processor core.", + "PublicDescription": "A snoop hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x08", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM", + "BriefDescription": "A snoop hits a modified line in some processor core.", + "PublicDescription": "A snoop hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x10", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M", + "BriefDescription": "A snoop invalidates a modified line in some processor core.", + "PublicDescription": "A snoop invalidates a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x20", + "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x40", + "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x80", + "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x01", + "EventName": "UNC_CBO_CACHE_LOOKUP.M", + "BriefDescription": "LLC lookup request that access cache and found line in M-state.", + "PublicDescription": "LLC lookup request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x02", + "EventName": "UNC_CBO_CACHE_LOOKUP.E", + "BriefDescription": "LLC lookup request that access cache and found line in E-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x04", + "EventName": "UNC_CBO_CACHE_LOOKUP.S", + "BriefDescription": "LLC lookup request that access cache and found line in S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x08", + "EventName": "UNC_CBO_CACHE_LOOKUP.I", + "BriefDescription": "LLC lookup request that access cache and found line in I-state.", + "PublicDescription": "LLC lookup request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x10", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable read requests.", + "PublicDescription": "Filter on processor core initiated cacheable read requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x20", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable write requests.", + "PublicDescription": "Filter on processor core initiated cacheable write requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x40", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER", + "BriefDescription": "Filter on external snoop requests.", + "PublicDescription": "Filter on external snoop requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x80", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER", + "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x80", + "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS", + "BriefDescription": "Counts the number of LLC evictions allocated.", + "PublicDescription": "Counts the number of LLC evictions allocated.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x83", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL", + "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "PublicDescription": "Number of requests allocated in Coherency Tracker.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL", + "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "10", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "Fixed", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x06", + "EventName": "UNC_CBO_CACHE_LOOKUP.ES", + "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json index 2efdc6772e0b6a3a081d9b10b4644102363394c3..2674105948331ca104395cee5bb659ad318757bf 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -237,7 +237,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory. Derived from unc_c_tor_occupancy.miss_local", + "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL", "PerPkg": "1", @@ -254,7 +254,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory. Derived from unc_c_tor_occupancy.miss_remote", + "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE", "PerPkg": "1", @@ -262,7 +262,7 @@ "Unit": "CBO" }, { - "BriefDescription": "Read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "Read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -271,7 +271,7 @@ "Unit": "HA" }, { - "BriefDescription": "Write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "Write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", @@ -280,7 +280,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache along with writeback to memory. Derived from unc_h_snoop_resp.rsp_fwd_wb", + "BriefDescription": "M line forwarded from remote cache along with writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB", @@ -290,7 +290,7 @@ "Unit": "HA" }, { - "BriefDescription": "M line forwarded from remote cache with no writeback to memory. Derived from unc_h_snoop_resp.rspifwd", + "BriefDescription": "M line forwarded from remote cache with no writeback to memory", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPIFWD", @@ -300,7 +300,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line response from remote cache. Derived from unc_h_snoop_resp.rsps", + "BriefDescription": "Shared line response from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPS", @@ -310,7 +310,7 @@ "Unit": "HA" }, { - "BriefDescription": "Shared line forwarded from remote cache. Derived from unc_h_snoop_resp.rspsfwd", + "BriefDescription": "Shared line forwarded from remote cache", "Counter": "0,1,2,3", "EventCode": "0x21", "EventName": "UNC_H_SNOOP_RESP.RSPSFWD", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json index d7e2fda1d695cabdcfeb0ad49aa9e85eefcc1fbc..b798a860bc81487873a07ea41fe5882b78b9afd4 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -8,25 +8,27 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where receiving QPI link is in half-width mode. Derived from unc_q_rxl0p_power_cycles", + "BriefDescription": "Cycles where receiving QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0x10", "EventName": "UNC_Q_RxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "rxl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where transmitting QPI link is in half-width mode. Derived from unc_q_txl0p_power_cycles", + "BriefDescription": "Cycles where transmitting QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_Q_TxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "txl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", + "BriefDescription": "Number of data flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.DATA", "PerPkg": "1", @@ -35,7 +37,7 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", + "BriefDescription": "Number of non data (control) flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json index ac4ad4d6357b00bf98adf866904cc7786eed2855..df4b43294fa00ccd11e0594da61ec7aaeb8c03a2 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Memory page activates for reads and writes. Derived from unc_m_act_count.rd", + "BriefDescription": "Memory page activates for reads and writes", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_M_ACT_COUNT.RD", @@ -13,7 +13,7 @@ "BriefDescription": "Read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0x3", @@ -23,48 +23,51 @@ "BriefDescription": "Write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "ScaleUnit": "64Bytes", "UMask": "0xC", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Memory page conflicts. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Memory page conflicts", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index dc2586db0dfce9ba5e0f2125f65d2cf37c835b40..d40498f2cb1e9a4f3e881a1145743934f0b21576 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -1,44 +1,48 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_BAND0_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_BAND1_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_BAND2_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_BAND3_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -49,6 +53,7 @@ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -59,6 +64,7 @@ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -69,6 +75,7 @@ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -79,90 +86,100 @@ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xa", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when thermal conditions are the upper limit on frequency. This is related to the THERMAL_THROTTLE CYCLES_ABOVE_TEMP event, which always counts cycles when we are above the thermal temperature. This event (STRONGEST_UPPER_LIMIT) is sampled at the output of the algorithm that determines the actual frequency, while THERMAL_THROTTLE looks at the input. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency. Derived from unc_p_freq_max_current_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x7", "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_current_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the system is changing frequency. This can not be filtered by thread ID. One can also use it with the occupancy counter that monitors number of threads in C0 to estimate the performance impact that frequency transitions had on the system. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Cycles spent changing Frequency", "Counter": "0,1,2,3", "EventCode": "0x60", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -173,6 +190,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", "Filter": "filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -183,6 +201,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", "Filter": "filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -193,6 +212,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", "Filter": "filter_band2=3000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -203,6 +223,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", "Filter": "filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -213,6 +234,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", "Filter": "edge=1,filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -223,6 +245,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -233,6 +256,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band2=4000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -243,6 +267,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json index 2f23cf0129e7ad2a61c1c04d700bce0645a92126..3fa61d962607e6a48711cf4cd0ec286c4a87c31b 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json @@ -1,13 +1,13 @@ [ { - "BriefDescription": "Uncore cache clock ticks. Derived from unc_c_clockticks", + "BriefDescription": "Uncore cache clock ticks", "Counter": "0,1,2,3", "EventName": "UNC_C_CLOCKTICKS", "PerPkg": "1", "Unit": "CBO" }, { - "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch). Derived from unc_c_llc_lookup.any", + "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)", "Counter": "0,1", "EventCode": "0x34", "EventName": "UNC_C_LLC_LOOKUP.ANY", @@ -18,7 +18,7 @@ "Unit": "CBO" }, { - "BriefDescription": "M line evictions from LLC (writebacks to memory). Derived from unc_c_llc_victims.m_state", + "BriefDescription": "M line evictions from LLC (writebacks to memory)", "Counter": "0,1", "EventCode": "0x37", "EventName": "UNC_C_LLC_VICTIMS.M_STATE", @@ -171,11 +171,12 @@ "Unit": "CBO" }, { - "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth. Derived from unc_c_tor_occupancy.miss_all", + "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth", "EventCode": "0x36", "EventName": "UNC_C_TOR_OCCUPANCY.MISS_ALL", "Filter": "filter_opc=0x182", "MetricExpr": "(UNC_C_TOR_OCCUPANCY.MISS_ALL / UNC_C_CLOCKTICKS) * 100.", + "MetricName": "tor_occupancy.miss_all %", "PerPkg": "1", "UMask": "0xa", "Unit": "CBO" @@ -189,7 +190,7 @@ "Unit": "CBO" }, { - "BriefDescription": "read requests to home agent. Derived from unc_h_requests.reads", + "BriefDescription": "read requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.READS", @@ -198,7 +199,7 @@ "Unit": "HA" }, { - "BriefDescription": "write requests to home agent. Derived from unc_h_requests.writes", + "BriefDescription": "write requests to home agent", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_H_REQUESTS.WRITES", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json index 63351876eb57afeb65276c5225d76fbfce7f739d..1b53c0e609e33ad2a8a83133132a308b8a1d63f8 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events. Derived from unc_q_clockticks", + "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events", "Counter": "0,1,2,3", "EventCode": "0x14", "EventName": "UNC_Q_CLOCKTICKS", @@ -8,25 +8,27 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where receiving QPI link is in half-width mode. Derived from unc_q_rxl0p_power_cycles", + "BriefDescription": "Cycles where receiving QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0x10", "EventName": "UNC_Q_RxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "rxl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Cycles where transmitting QPI link is in half-width mode. Derived from unc_q_txl0p_power_cycles", + "BriefDescription": "Cycles where transmitting QPI link is in half-width mode", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_Q_TxL0P_POWER_CYCLES", "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.", + "MetricName": "txl0p_power_cycles %", "PerPkg": "1", "Unit": "QPI LL" }, { - "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data", + "BriefDescription": "Number of data flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.DATA", "PerPkg": "1", @@ -35,7 +37,7 @@ "Unit": "QPI LL" }, { - "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data", + "BriefDescription": "Number of non data (control) flits transmitted ", "Counter": "0,1,2,3", "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json index e2cf6daa7b37cdeb195193bce36da2df73545758..8551cebeba23b5a92949cd62dcbc455a7a8f52a7 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Memory page activates. Derived from unc_m_act_count", + "BriefDescription": "Memory page activates", "Counter": "0,1,2,3", "EventCode": "0x1", "EventName": "UNC_M_ACT_COUNT", @@ -11,7 +11,7 @@ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.RD", + "EventName": "LLC_MISSES.MEM_READ", "PerPkg": "1", "UMask": "0x3", "Unit": "iMC" @@ -20,47 +20,50 @@ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "UNC_M_CAS_COUNT.WR", + "EventName": "LLC_MISSES.MEM_WRITE", "PerPkg": "1", "UMask": "0xc", "Unit": "iMC" }, { - "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events. Derived from unc_m_clockticks", + "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events", "Counter": "0,1,2,3", "EventName": "UNC_M_CLOCKTICKS", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode. Derived from unc_m_power_channel_ppd", + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", "Counter": "0,1,2,3", "EventCode": "0x85", "EventName": "UNC_M_POWER_CHANNEL_PPD", "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles all ranks are in critical thermal throttle. Derived from unc_m_power_critical_throttle_cycles", + "BriefDescription": "Cycles all ranks are in critical thermal throttle", "Counter": "0,1,2,3", "EventCode": "0x86", "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_critical_throttle_cycles %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Cycles Memory is in self refresh power mode. Derived from unc_m_power_self_refresh", + "BriefDescription": "Cycles Memory is in self refresh power mode", "Counter": "0,1,2,3", "EventCode": "0x43", "EventName": "UNC_M_POWER_SELF_REFRESH", "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", "PerPkg": "1", "Unit": "iMC" }, { - "BriefDescription": "Memory page conflicts. Derived from unc_m_pre_count.page_miss", + "BriefDescription": "Memory page conflicts", "Counter": "0,1,2,3", "EventCode": "0x2", "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", @@ -69,7 +72,7 @@ "Unit": "iMC" }, { - "BriefDescription": "Occupancy counter for memory read queue. Derived from unc_m_rpq_occupancy", + "BriefDescription": "Occupancy counter for memory read queue", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_M_RPQ_OCCUPANCY", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index bbe36d547386ae7ee7ff9696874ffac82ebe0022..16034bfd06dd91ae575abaef2c219e24100cfa14 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -1,44 +1,48 @@ [ { - "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events. Derived from unc_p_clockticks", + "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events", "Counter": "0,1,2,3", "EventName": "UNC_P_CLOCKTICKS", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_BAND0_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_BAND1_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_BAND2_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles", + "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency", "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_BAND3_CYCLES", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -49,6 +53,7 @@ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band0_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -59,6 +64,7 @@ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band1_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -69,6 +75,7 @@ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band2_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -79,89 +86,99 @@ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS", "Filter": "edge=1", "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_band3_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c0", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0", "Filter": "occ_sel=1", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c0 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details. Derived from unc_p_power_state_occupancy.cores_c3", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3", "Filter": "occ_sel=2", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c3 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events . Derived from unc_p_power_state_occupancy.cores_c6", + "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ", "Counter": "0,1,2,3", "EventCode": "0x80", "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6", "Filter": "occ_sel=3", "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "power_state_occupancy.cores_c6 %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip. Derived from unc_p_prochot_external_cycles", + "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip", "Counter": "0,1,2,3", "EventCode": "0xa", "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES", "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "prochot_external_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency. Derived from unc_p_freq_max_limit_thermal_cycles", + "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_limit_thermal_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency. Derived from unc_p_freq_max_os_cycles", + "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x6", "EventName": "UNC_P_FREQ_MAX_OS_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_os_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency. Derived from unc_p_freq_max_power_cycles", + "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x5", "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_power_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency. Derived from unc_p_freq_max_current_cycles", + "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency", "Counter": "0,1,2,3", "EventCode": "0x7", "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES", "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_max_current_cycles %", "PerPkg": "1", "Unit": "PCU" }, { - "BriefDescription": "Cycles spent changing Frequency. Derived from unc_p_freq_trans_cycles", + "BriefDescription": "Cycles spent changing Frequency", "Counter": "0,1,2,3", "EventName": "UNC_P_FREQ_TRANS_CYCLES", "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_trans_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -172,6 +189,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", "Filter": "filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -182,6 +200,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", "Filter": "filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -192,6 +211,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", "Filter": "filter_band2=3000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -202,6 +222,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", "Filter": "filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -212,6 +233,7 @@ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", "Filter": "edge=1,filter_band0=1200", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -222,6 +244,7 @@ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band1=2000", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -232,6 +255,7 @@ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band2=4000", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" }, @@ -242,6 +266,7 @@ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", "Filter": "edge=1,filter_band3=4000", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", + "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", "Unit": "PCU" } diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 12181bb1da2a8a294da7487b00287413fb3d8c54..d1a12e584c1b8e1223dd66266dac6745247e0aff 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -17,6 +17,7 @@ GenuineIntel-6-3A,v18,ivybridge,core GenuineIntel-6-3E,v19,ivytown,core GenuineIntel-6-2D,v20,jaketown,core GenuineIntel-6-57,v9,knightslanding,core +GenuineIntel-6-85,v9,knightslanding,core GenuineIntel-6-1E,v2,nehalemep,core GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json b/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json new file mode 100644 index 0000000000000000000000000000000000000000..42c70eed05a2f0b7a89214444e4d151c65924fe9 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json @@ -0,0 +1,314 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x01", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS", + "BriefDescription": "A snoop misses in some processor core.", + "PublicDescription": "A snoop misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x02", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL", + "BriefDescription": "A snoop invalidates a non-modified line in some processor core.", + "PublicDescription": "A snoop invalidates a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x04", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT", + "BriefDescription": "A snoop hits a non-modified line in some processor core.", + "PublicDescription": "A snoop hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x08", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM", + "BriefDescription": "A snoop hits a modified line in some processor core.", + "PublicDescription": "A snoop hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x10", + "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M", + "BriefDescription": "A snoop invalidates a modified line in some processor core.", + "PublicDescription": "A snoop invalidates a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x20", + "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x40", + "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x80", + "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER", + "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x01", + "EventName": "UNC_CBO_CACHE_LOOKUP.M", + "BriefDescription": "LLC lookup request that access cache and found line in M-state.", + "PublicDescription": "LLC lookup request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x02", + "EventName": "UNC_CBO_CACHE_LOOKUP.E", + "BriefDescription": "LLC lookup request that access cache and found line in E-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x04", + "EventName": "UNC_CBO_CACHE_LOOKUP.S", + "BriefDescription": "LLC lookup request that access cache and found line in S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x08", + "EventName": "UNC_CBO_CACHE_LOOKUP.I", + "BriefDescription": "LLC lookup request that access cache and found line in I-state.", + "PublicDescription": "LLC lookup request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x10", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable read requests.", + "PublicDescription": "Filter on processor core initiated cacheable read requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x20", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER", + "BriefDescription": "Filter on processor core initiated cacheable write requests.", + "PublicDescription": "Filter on processor core initiated cacheable write requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x40", + "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER", + "BriefDescription": "Filter on external snoop requests.", + "PublicDescription": "Filter on external snoop requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x80", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER", + "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x81", + "UMask": "0x80", + "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS", + "BriefDescription": "Counts the number of LLC evictions allocated.", + "PublicDescription": "Counts the number of LLC evictions allocated.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x83", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL", + "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "PublicDescription": "Number of requests allocated in Coherency Tracker.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL", + "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0,1", + "CounterMask": "10", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "ARB", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "Fixed", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x06", + "EventName": "UNC_CBO_CACHE_LOOKUP.ES", + "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/uncore.json b/tools/perf/pmu-events/arch/x86/skylake/uncore.json new file mode 100644 index 0000000000000000000000000000000000000000..dbc193252fb30d6e08bd704544bcb7cbd5337546 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylake/uncore.json @@ -0,0 +1,254 @@ +[ + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x41", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x81", + "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION", + "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x44", + "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x22", + "UMask": "0x48", + "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE", + "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x21", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M", + "BriefDescription": "L3 Lookup write request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x81", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M", + "BriefDescription": "L3 Lookup any request that access cache and found line in M-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x18", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I", + "BriefDescription": "L3 Lookup read request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x88", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I", + "BriefDescription": "L3 Lookup any request that access cache and found line in I-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x1f", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI", + "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x2f", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI", + "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x8f", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI", + "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x86", + "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES", + "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x16", + "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES", + "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "CBO", + "EventCode": "0x34", + "UMask": "0x26", + "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES", + "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state", + "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.", + "Counter": "0", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x02", + "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT", + "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode", + "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x81", + "UMask": "0x20", + "EventName": "UNC_ARB_TRK_REQUESTS.WRITES", + "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x84", + "UMask": "0x01", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.", + "Counter": "0,1", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "iMPH-U", + "EventCode": "0x80", + "UMask": "0x01", + "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST", + "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;", + "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.", + "Counter": "0", + "CounterMask": "1", + "Invert": "0", + "EdgeDetect": "0" + }, + { + "Unit": "NCU", + "EventCode": "0x0", + "UMask": "0x01", + "EventName": "UNC_CLOCK.SOCKET", + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles", + "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "Counter": "FIXED", + "CounterMask": "0", + "Invert": "0", + "EdgeDetect": "0" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index eed09346a72afab1ede36b7b3b507f3dc5d62a84..baa073f3833475ac422e20d1ba080d18c11ed82b 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -195,6 +195,7 @@ static struct map { { "CBO", "uncore_cbox" }, { "QPI LL", "uncore_qpi" }, { "SBO", "uncore_sbox" }, + { "iMPH-U", "uncore_arb" }, {} }; @@ -291,7 +292,9 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg) + char *pmu, char *unit, char *perpkg, + char *metric_expr, + char *metric_name) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -315,6 +318,10 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.unit = \"%s\",\n", unit); if (perpkg) fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg); + if (metric_expr) + fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); + if (metric_name) + fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); fprintf(outfp, "},\n"); return 0; @@ -362,7 +369,9 @@ static char *real_event(const char *name, char *event) int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg), + char *pmu, char *unit, char *perpkg, + char *metric_expr, + char *metric_name), void *data) { int err = -EIO; @@ -388,6 +397,8 @@ int json_events(const char *fn, char *filter = NULL; char *perpkg = NULL; char *unit = NULL; + char *metric_expr = NULL; + char *metric_name = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -398,6 +409,7 @@ int json_events(const char *fn, for (j = 0; j < obj->size; j += 2) { jsmntok_t *field, *val; int nz; + char *s; field = tok + j; EXPECT(field->type == JSMN_STRING, tok + j, @@ -444,7 +456,6 @@ int json_events(const char *fn, NULL); } else if (json_streq(map, field, "Unit")) { const char *ppmu; - char *s; ppmu = field_to_perf(unit_to_pmu, map, val); if (ppmu) { @@ -458,12 +469,19 @@ int json_events(const char *fn, } addfield(map, &desc, ". ", "Unit: ", NULL); addfield(map, &desc, "", pmu, NULL); + addfield(map, &desc, "", " ", NULL); } else if (json_streq(map, field, "Filter")) { addfield(map, &filter, "", "", val); } else if (json_streq(map, field, "ScaleUnit")) { addfield(map, &unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { addfield(map, &perpkg, "", "", val); + } else if (json_streq(map, field, "MetricName")) { + addfield(map, &metric_name, "", "", val); + } else if (json_streq(map, field, "MetricExpr")) { + addfield(map, &metric_expr, "", "", val); + for (s = metric_expr; *s; s++) + *s = tolower(*s); } /* ignore unknown fields */ } @@ -488,7 +506,7 @@ int json_events(const char *fn, fixname(name); err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg); + pmu, unit, perpkg, metric_expr, metric_name); free(event); free(desc); free(name); @@ -498,6 +516,8 @@ int json_events(const char *fn, free(filter); free(perpkg); free(unit); + free(metric_expr); + free(metric_name); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 71e13de31092f2de94827573f4e4dc537df8b726..611fac01913de14335924f1b7e02a86263fa377e 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -5,7 +5,8 @@ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, - char *unit, char *perpkg), + char *unit, char *perpkg, char *metric_expr, + char *metric_name), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index c669a3cdb9f0279c96caa80cf68aab2862e202a9..569eab3688dd084236e6261d7511a31a40dc7117 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -13,6 +13,8 @@ struct pmu_event { const char *pmu; const char *unit; const char *perpkg; + const char *metric_expr; + const char *metric_name; }; /* diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1cb3d9b540e9eda133f4548efc9316ad0d014898..84222bdb8689203924fc8f6a75f0baac4531d82c 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -38,6 +38,7 @@ perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o perf-y += event-times.o +perf-y += expr.o perf-y += backward-ring-buffer.o perf-y += sdt.o perf-y += is_printable_array.o @@ -74,7 +75,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ -ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc)) +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 88dc51f4c27b2df8c5d8dc482a189b9831cb2832..0dd77494bb58e62620b0ad21843c95add77e5bfa 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -18,10 +18,16 @@ * permissions. All the event text files are stored there. */ +#include +#include #include #include #include #include +#include +#include +#include +#include #include "../perf.h" #include "util.h" #include diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 42e892b1e97981a19511d026f977c45feff0497b..50f6d7afee586320b7193b88dea7f521c28aa5f6 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -8,6 +8,7 @@ #include #include "tests.h" #include "debug.h" +#include #define NR_ITERS 111 diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index e7664fe3bd33739fd92be2579c30102e481e8f03..8ba2c4618fe90231d1157e8218bf10a2cb82f6a0 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -288,3 +288,17 @@ int test__bp_signal(int subtest __maybe_unused) return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; } + +bool test__bp_signal_is_supported(void) +{ +/* + * The powerpc so far does not have support to even create + * instruction breakpoint using the perf event interface. + * Once it's there we can release this. + */ +#ifdef __powerpc__ + return false; +#else + return true; +#endif +} diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 1a04fe77487dc54512b3764f8a89961364c9ad45..5876da126b584febb15b128fb28baac1b30baac9 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,10 +1,14 @@ +#include #include #include +#include +#include #include #include #include #include #include +#include #include #include #include "tests.h" diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 83c4669cbc5b9e30576321026dd38d68aa117fde..3ccfd58a8c3cf3e8b16cc513a67324b8f11eae7b 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -3,8 +3,10 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include #include #include +#include #include "builtin.h" #include "hist.h" #include "intlist.h" @@ -13,6 +15,7 @@ #include "color.h" #include #include "symbol.h" +#include static bool dont_fork; @@ -43,6 +46,10 @@ static struct test generic_tests[] = { .desc = "Parse event definition strings", .func = test__parse_events, }, + { + .desc = "Simple expression parser", + .func = test__expr, + }, { .desc = "PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, @@ -90,10 +97,12 @@ static struct test generic_tests[] = { { .desc = "Breakpoint overflow signal handler", .func = test__bp_signal, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Breakpoint overflow sampling", .func = test__bp_signal_overflow, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Number of exit events of a simple workload", @@ -394,6 +403,11 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) if (!perf_test__matches(t, curr, argc, argv)) continue; + if (t->is_supported && !t->is_supported()) { + pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc); + continue; + } + pr_info("%2d: %-*s:", i, width, t->desc); if (intlist__find(skiplist, i)) { @@ -460,7 +474,7 @@ static int perf_test__list(int argc, const char **argv) return 0; } -int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) +int cmd_test(int argc, const char **argv) { const char *test_usage[] = { "perf test [] [{list |[|]}]", diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index f853e242a86c014ecfb80da3a6fa30f64295c963..c5bb2203f5a90850564397998a8af4b8cb3bfbc1 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -2,6 +2,7 @@ #include "debug.h" #include "util.h" #include "c++/clang-c.h" +#include static struct { int (*func)(void); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index d1f693041324a8a6670ff56081537c3ed3f528f0..94b7c7b02bdefbb33f2987677c11b3577ab062ec 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -1,9 +1,12 @@ +#include +#include #include +#include #include #include #include -#include #include +#include #include "parse-events.h" #include "evlist.h" @@ -16,6 +19,8 @@ #include "tests.h" +#include "sane_ctype.h" + #define BUFSZ 1024 #define READLEN 128 @@ -224,6 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, unsigned char buf2[BUFSZ]; size_t ret_len; u64 objdump_addr; + const char *objdump_name; + char decomp_name[KMOD_DECOMP_LEN]; int ret; pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); @@ -284,9 +291,25 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, state->done[state->done_cnt++] = al.map->start; } + objdump_name = al.map->dso->long_name; + if (dso__needs_decompress(al.map->dso)) { + if (dso__decompress_kmodule_path(al.map->dso, objdump_name, + decomp_name, + sizeof(decomp_name)) < 0) { + pr_debug("decompression failed\n"); + return -1; + } + + objdump_name = decomp_name; + } + /* Read the object code using objdump */ objdump_addr = map__rip_2objdump(al.map, al.addr); - ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len); + ret = read_via_objdump(objdump_name, objdump_addr, buf2, len); + + if (dso__needs_decompress(al.map->dso)) + unlink(objdump_name); + if (ret > 0) { /* * The kernel maps are inaccurate - assume objdump is right in diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index f168a85992d0ca3fec6c5abe4f2fd0efc3917224..4478773cdb9751b4f1f5a89f97c1808ad81d1bae 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -66,7 +66,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 2); TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); - TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1); + TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 13725e09ba22447ed97b498e6751a34c2a027cbf..8f08df5861cb36c7b3415dd706e63cff8ad1aa46 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -1,4 +1,6 @@ +#include #include +#include #include #include #include diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 1046491de4b295830f0a3ecea7616e018859d1aa..dfe5c89e2049f03fdb1305f8161928fa9f71f37e 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "tests.h" #include "debug.h" diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 19ef77bd6eb4152b539dc46638d4ef3a8db972e3..634f20c631d8f9981dc89499998e3315d7661243 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -1,5 +1,8 @@ #include +#include +#include #include +#include #include "tests.h" #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 60926a1f6fd7fcf128924759545d7f6620128406..d2bea6f780f8a1184c67c92d96727c7ddc715046 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -3,6 +3,8 @@ #include "parse-events.h" #include "tests.h" #include "debug.h" +#include +#include static int perf_evsel__roundtrip_cache_name_test(void) { diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c new file mode 100644 index 0000000000000000000000000000000000000000..6c6a3749aaf6bf5e7f54585e2872bcdcfc12c0a3 --- /dev/null +++ b/tools/perf/tests/expr.c @@ -0,0 +1,56 @@ +#include "util/debug.h" +#include "util/expr.h" +#include "tests.h" +#include + +static int test(struct parse_ctx *ctx, const char *e, double val2) +{ + double val; + + if (expr__parse(&val, ctx, &e)) + TEST_ASSERT_VAL("parse test failed", 0); + TEST_ASSERT_VAL("unexpected value", val == val2); + return 0; +} + +int test__expr(int subtest __maybe_unused) +{ + const char *p; + const char **other; + double val; + int ret; + struct parse_ctx ctx; + int num_other; + + expr__ctx_init(&ctx); + expr__add_id(&ctx, "FOO", 1); + expr__add_id(&ctx, "BAR", 2); + + ret = test(&ctx, "1+1", 2); + ret |= test(&ctx, "FOO+BAR", 3); + ret |= test(&ctx, "(BAR/2)%2", 1); + ret |= test(&ctx, "1 - -4", 5); + ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + + if (ret) + return ret; + + p = "FOO/0"; + ret = expr__parse(&val, &ctx, &p); + TEST_ASSERT_VAL("division by zero", ret == 1); + + p = "BAR/"; + ret = expr__parse(&val, &ctx, &p); + TEST_ASSERT_VAL("missing operand", ret == 1); + + TEST_ASSERT_VAL("find other", + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); + TEST_ASSERT_VAL("find other", num_other == 3); + TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR")); + TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); + TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); + TEST_ASSERT_VAL("find other", other[3] == NULL); + free((void *)other); + + return 0; +} diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 6b21746d6eec84b0bf443fc4cd05523be57f9878..00b8dc50f3dba8ffa1cbcc75938d15718741d75b 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -1,3 +1,4 @@ +#include #include "perf.h" #include "util/debug.h" #include "util/symbol.h" @@ -7,6 +8,7 @@ #include "util/machine.h" #include "util/thread.h" #include "tests/hists_common.h" +#include static struct { u32 pid; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 9fd54b79a7886266889f17f5865d69b46171cbf3..d549a9f2c41b30f4d35eae742dbdfcb584ee804d 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -1,5 +1,6 @@ #include "perf.h" #include "util/debug.h" +#include "util/event.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" @@ -9,6 +10,7 @@ #include "util/parse-events.h" #include "tests/tests.h" #include "tests/hists_common.h" +#include struct sample { u32 pid; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 62efb14f3a5a7eabb0dfbc7bae31e90276ef9c54..df9c91f49af19927ad48bc9c103aaf8bd43e0c40 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -3,12 +3,14 @@ #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" +#include "util/event.h" #include "util/evlist.h" #include "util/machine.h" #include "util/thread.h" #include "util/parse-events.h" #include "tests/tests.h" #include "tests/hists_common.h" +#include struct sample { u32 pid; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index eddc7407ff8a9341a2e19e859bf1cab8781e8eda..a26cbb79e988832af5eeddd4b9970ba37326a248 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -9,6 +9,8 @@ #include "thread.h" #include "parse-events.h" #include "hists_common.h" +#include +#include struct sample { u32 pid; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 63c5efaba1b5c611d3bb96da433d93415b3b87ca..06e5080182d31e879b25d2cabfc43c7a9a552097 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -1,5 +1,6 @@ #include "perf.h" #include "util/debug.h" +#include "util/event.h" #include "util/symbol.h" #include "util/sort.h" #include "util/evsel.h" @@ -9,6 +10,7 @@ #include "util/parse-events.h" #include "tests/tests.h" #include "tests/hists_common.h" +#include struct sample { u32 cpu; diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c index 42e13393e5028c2d1bfe515b990b0e79275e96c3..a5192f6a20d798990958adcfea8da970416183cd 100644 --- a/tools/perf/tests/is_printable_array.c +++ b/tools/perf/tests/is_printable_array.c @@ -1,7 +1,8 @@ #include +#include #include "tests.h" #include "debug.h" -#include "util.h" +#include "print_binary.h" int test__is_printable_array(int subtest __maybe_unused) { diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index 76f41f249944ccaa3af8de50256f8a24faf22f6b..6cd9e5107f7784ab4c340f80a03bc786b7c264b4 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -61,6 +61,7 @@ int test__kmod_path__parse(int subtest __maybe_unused) M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true); M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false); +#ifdef HAVE_ZLIB_SUPPORT /* path alloc_name alloc_ext kmod comp name ext */ T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz"); T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz"); @@ -96,6 +97,7 @@ int test__kmod_path__parse(int subtest __maybe_unused) M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true); M("x.ko.gz", PERF_RECORD_MISC_USER, false); +#endif /* path alloc_name alloc_ext kmod comp name ext */ T("[test_module]", true , true , true, false, "[test_module]", NULL); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 634bce9caebd343278e24f6c32ab3569f9bc003d..15c770856aacc6363c5b2618e51bb1356debbde1 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -1,3 +1,5 @@ +#include +#include /* For the CLR_() macros */ #include @@ -7,6 +9,7 @@ #include "cpumap.h" #include "tests.h" #include +#include /* * This test will generate random numbers of calls to some getpid syscalls, diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 0c5ce44f723fcbfd9c43b6c15377c27d2d503038..6ea4d8a5d26b15ff4b3e0a228c51850260263d13 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,6 +12,7 @@ #include "thread_map.h" #include "symbol.h" #include "thread.h" +#include "util.h" #define THREADS 4 diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index c8d9592eb142dd08ef4cf216dd67e6e6614996b7..1a74dd9fd06792001f111d3ca4b42ce484319704 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -1,8 +1,14 @@ +#include +#include /* For the CPU_* macros */ #include +#include +#include +#include #include #include +#include #include "evsel.h" #include "tests.h" #include "thread_map.h" diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index f52239fed361913a219dd563f17ef7b5103fb238..9788fac910950d63244f83d417a1e421edd98ee4 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -5,6 +5,7 @@ #include "thread_map.h" #include "tests.h" #include "debug.h" +#include #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index d7414128d7fe40d150dcb2424082dd858f6779d1..e44506e21ee779e33f4d8ffa7e6e6b4ebe40cc36 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -1,5 +1,10 @@ +#include +#include #include #include +#include +#include +#include #include "thread_map.h" #include "evsel.h" #include "debug.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 1dc8380144220bd2c37f5c6338ad9a38e1c641bf..7fad885491c5710acb1e1d82e290d5a34639f88e 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1,4 +1,3 @@ - #include "parse-events.h" #include "evsel.h" #include "evlist.h" @@ -6,8 +5,15 @@ #include "tests.h" #include "debug.h" #include "util.h" +#include +#include +#include +#include +#include +#include #include #include +#include #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index 65dcf48a92fbc2bbc7e89fc8a5243c1bcec3eead..c6207db09f12850f02123525a07c0c025e08a55d 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -1,3 +1,4 @@ +#include #include #include diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 87893f3ba5f1766cffa093ce44db50de45526279..d37cd9588cc0357577a513bca0afe9e600b0c58f 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -1,3 +1,5 @@ +#include +#include /* For the CLR_() macros */ #include diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 1e2ba26029301ffd85e1a1803eea8b8f40f134e6..a6d7aef3003056446f0b4507409d6672a63c986a 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -2,6 +2,8 @@ #include "pmu.h" #include "util.h" #include "tests.h" +#include +#include /* Simulated format definitions. */ static struct test_format { diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 5f23710b9fee62855de88f11f39e6054d12c9a27..bac5c3885b3be58180eb4fc258c72b9b0c8241be 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -1,4 +1,6 @@ #include +#include +#include #include #include "util.h" diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index f59d210e1bafbe21f5689587b718a8fb04f252cc..06eda675ae2c1ee3a59758c00285dc1a1f793230 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -1,6 +1,6 @@ +#include #include #include -#include #include #include #include "tests.h" @@ -43,7 +43,7 @@ static char *get_self_path(void) { char *buf = calloc(PATH_MAX, sizeof(char)); - if (buf && readlink("/proc/self/exe", buf, PATH_MAX) < 0) { + if (buf && readlink("/proc/self/exe", buf, PATH_MAX - 1) < 0) { pr_debug("Failed to get correct path of perf\n"); free(buf); return NULL; diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4c9fd046d57b1772422747fbf7deea8be036d68f..828494db4a190945dd3da7aa0d3ae3a987a52d36 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -1,3 +1,5 @@ +#include +#include #include #include #include diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 7ddbe267d0acbae827010e6408bde3faf575003e..65474fd80da7216df44bc2e221660b89f4b3a02d 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 01a5ba2788c604c8c6b6c6e9f0a0364444f90da0..cf00ebad2ef5ccefabee6324e004fc9b4d2e601b 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -4,6 +4,7 @@ #include "cpumap.h" #include "tests.h" +#include #include static int exited; @@ -82,7 +83,7 @@ int test__task_exit(int subtest __maybe_unused) evsel = perf_evlist__first(evlist); evsel->attr.task = 1; - evsel->attr.sample_freq = 0; + evsel->attr.sample_freq = 1; evsel->attr.inherit = 0; evsel->attr.watermark = 0; evsel->attr.wakeup_events = 1; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 1fa9b9d83aa51ba80beb9df29b48ee58be7f4619..577363809c9b1b54731f7e80b291278bf2764e78 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -34,6 +34,7 @@ struct test { int (*get_nr)(void); const char *(*get_desc)(int subtest); } subtest; + bool (*is_supported)(void); }; /* Tests */ @@ -62,6 +63,7 @@ int test__sample_parsing(int subtest); int test__keep_tracking(int subtest); int test__parse_no_sample_id_all(int subtest); int test__dwarf_unwind(int subtest); +int test__expr(int subtest); int test__hists_filter(int subtest); int test__mmap_thread_lookup(int subtest); int test__thread_mg_share(int subtest); @@ -98,6 +100,8 @@ const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(int subtest); +bool test__bp_signal_is_supported(void); + #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index f2d2e542d0ee77a8abf7bfc6074f0934da85b5c4..a63d6945807b3a0e7fe9247475068e258dbd9cdc 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -29,7 +29,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&map->refcnt) == 1); + refcount_read(&map->refcnt) == 1); thread_map__put(map); /* test dummy pid */ @@ -44,7 +44,7 @@ int test__thread_map(int subtest __maybe_unused) thread_map__comm(map, 0) && !strcmp(thread_map__comm(map, 0), "dummy")); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&map->refcnt) == 1); + refcount_read(&map->refcnt) == 1); thread_map__put(map); return 0; } @@ -71,7 +71,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, thread_map__comm(threads, 0) && !strcmp(thread_map__comm(threads, 0), NAME)); TEST_ASSERT_VAL("wrong refcnt", - atomic_read(&threads->refcnt) == 1); + refcount_read(&threads->refcnt) == 1); thread_map__put(threads); return 0; } diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c index 188b63140fc841f8c3111d20439b78d7f73133db..76686dd6f5ecfd235705bca7b39d7abdc237a150 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-mg-share.c @@ -43,7 +43,7 @@ int test__thread_mg_share(int subtest __maybe_unused) leader && t1 && t2 && t3 && other); mg = leader->mg; - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 4); /* test the map groups pointer is shared */ TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); @@ -71,25 +71,25 @@ int test__thread_mg_share(int subtest __maybe_unused) machine__remove_thread(machine, other_leader); other_mg = other->mg; - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 2); TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); /* release thread group */ thread__put(leader); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 3); thread__put(t1); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 2); thread__put(t2); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 1); thread__put(t3); /* release other group */ thread__put(other_leader); - TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1); + TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 1); thread__put(other); diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c index 623c2aa53c4aa37e109a595ed31d799bc2205564..44589de084b824319063df71ce0543f7f054088b 100644 --- a/tools/perf/tests/unit_number__scnprintf.c +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -1,7 +1,8 @@ +#include #include #include #include "tests.h" -#include "util.h" +#include "units.h" #include "debug.h" int test__unit_number__scnprint(int subtest __maybe_unused) diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 862b043e59243588671c75298b337d8cd4e29c3b..8456175fc23458091aaf787a465d3a7123fd5f71 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -1,5 +1,6 @@ #include #include +#include #include #include "map.h" #include "symbol.h" diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build new file mode 100644 index 0000000000000000000000000000000000000000..be95ac6ce84595817ddc267bc7cc77b434c26e81 --- /dev/null +++ b/tools/perf/trace/beauty/Build @@ -0,0 +1 @@ +libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h new file mode 100644 index 0000000000000000000000000000000000000000..cf50be3f17a4ff3acdd9506ce00f8cff7f280f5f --- /dev/null +++ b/tools/perf/trace/beauty/beauty.h @@ -0,0 +1,24 @@ +#ifndef _PERF_TRACE_BEAUTY_H +#define _PERF_TRACE_BEAUTY_H + +#include + +struct trace; +struct thread; + +struct syscall_arg { + unsigned long val; + struct thread *thread; + struct trace *trace; + void *parm; + u8 idx; + u8 mask; +}; + +size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags + +size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask + +#endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c index d3b0b1fab077a79f0e0db4f2880bff109ab775eb..fde8f2fc65589d54a457cabc8604b7389c5a985b 100644 --- a/tools/perf/trace/beauty/signum.c +++ b/tools/perf/trace/beauty/signum.c @@ -1,3 +1,4 @@ +#include static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) { diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c new file mode 100644 index 0000000000000000000000000000000000000000..5643b692af4cf60c6a791a148e1c35b30567d002 --- /dev/null +++ b/tools/perf/trace/beauty/statx.c @@ -0,0 +1,72 @@ +/* + * trace/beauty/statx.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include +#include +#include + +size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return scnprintf(bf, size, "SYNC_AS_STAT"); +#define P_FLAG(n) \ + if (flags & AT_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~AT_##n; \ + } + + P_FLAG(SYMLINK_NOFOLLOW); + P_FLAG(REMOVEDIR); + P_FLAG(SYMLINK_FOLLOW); + P_FLAG(NO_AUTOMOUNT); + P_FLAG(EMPTY_PATH); + P_FLAG(STATX_FORCE_SYNC); + P_FLAG(STATX_DONT_SYNC); + +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & STATX_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~STATX_##n; \ + } + + P_FLAG(TYPE); + P_FLAG(MODE); + P_FLAG(NLINK); + P_FLAG(UID); + P_FLAG(GID); + P_FLAG(ATIME); + P_FLAG(MTIME); + P_FLAG(CTIME); + P_FLAG(INO); + P_FLAG(SIZE); + P_FLAG(BLOCKS); + P_FLAG(BTIME); + +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 3eb3edb307a4814d1d11346f94117d3c006e87b7..a4d3762cd8250c0e799978d0454d52a04073b8f1 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,4 +1,5 @@ #include "../util.h" +#include "../string2.h" #include "../config.h" #include "../../perf.h" #include "libslang.h" @@ -13,6 +14,7 @@ #include "helpline.h" #include "keysyms.h" #include "../color.h" +#include "sane_ctype.h" static int ui_browser__percent_color(struct ui_browser *browser, double percent, bool current) @@ -579,7 +581,7 @@ static int ui_browser__color_config(const char *var, const char *value, break; *bg = '\0'; - while (isspace(*++bg)); + bg = ltrim(++bg); ui_browser__colorsets[i].bg = bg; ui_browser__colorsets[i].fg = fg; return 0; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ba36aac340bc7d6531eaeaace7ee40ee38851806..d990ad08a3c69f79fdffbb3e9405e8a1d109827d 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -9,7 +9,10 @@ #include "../../util/symbol.h" #include "../../util/evsel.h" #include "../../util/config.h" +#include #include +#include +#include struct disasm_line_samples { double percent; diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index edbeaaf31acea117e2d94fd7c3d11f17ae4dbfaa..e2c9390ff4c5244d8b0640844cc5c8bd957c963e 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c @@ -8,6 +8,8 @@ #include "util/header.h" #include "util/session.h" +#include + static void ui_browser__argv_write(struct ui_browser *browser, void *entry, int row) { diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fc4fb669ceee37404bdda0e2ba0652f4304c28f8..69f4570bd4f941bee8a67fc48741994ce41a127e 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1,7 +1,11 @@ +#include +#include +#include #include #include #include #include +#include #include "../../util/evsel.h" #include "../../util/evlist.h" @@ -10,6 +14,7 @@ #include "../../util/sort.h" #include "../../util/util.h" #include "../../util/top.h" +#include "../../util/thread.h" #include "../../arch/common.h" #include "../browsers/hists.h" @@ -18,6 +23,11 @@ #include "../ui.h" #include "map.h" #include "annotate.h" +#include "srcline.h" +#include "string2.h" +#include "units.h" + +#include "sane_ctype.h" extern void hist_browser__init_hpp(void); @@ -144,9 +154,60 @@ static void callchain_list__set_folding(struct callchain_list *cl, bool unfold) cl->unfolded = unfold ? cl->has_children : false; } +static struct inline_node *inline_node__create(struct map *map, u64 ip) +{ + struct dso *dso; + struct inline_node *node; + + if (map == NULL) + return NULL; + + dso = map->dso; + if (dso == NULL) + return NULL; + + if (dso->kernel != DSO_TYPE_USER) + return NULL; + + node = dso__parse_addr_inlines(dso, + map__rip_2objdump(map, ip)); + + return node; +} + +static int inline__count_rows(struct inline_node *node) +{ + struct inline_list *ilist; + int i = 0; + + if (node == NULL) + return 0; + + list_for_each_entry(ilist, &node->val, list) { + if ((ilist->filename != NULL) || (ilist->funcname != NULL)) + i++; + } + + return i; +} + +static int callchain_list__inline_rows(struct callchain_list *chain) +{ + struct inline_node *node; + int rows; + + node = inline_node__create(chain->ms.map, chain->ip); + if (node == NULL) + return 0; + + rows = inline__count_rows(node); + inline_node__delete(node); + return rows; +} + static int callchain_node__count_rows_rb_tree(struct callchain_node *node) { - int n = 0; + int n = 0, inline_rows; struct rb_node *nd; for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) { @@ -156,6 +217,13 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node) list_for_each_entry(chain, &child->val, list) { ++n; + + if (symbol_conf.inline_name) { + inline_rows = + callchain_list__inline_rows(chain); + n += inline_rows; + } + /* We need this because we may not have children */ folded_sign = callchain_list__folded(chain); if (folded_sign == '+') @@ -207,7 +275,7 @@ static int callchain_node__count_rows(struct callchain_node *node) { struct callchain_list *chain; bool unfolded = false; - int n = 0; + int n = 0, inline_rows; if (callchain_param.mode == CHAIN_FLAT) return callchain_node__count_flat_rows(node); @@ -216,6 +284,11 @@ static int callchain_node__count_rows(struct callchain_node *node) list_for_each_entry(chain, &node->val, list) { ++n; + if (symbol_conf.inline_name) { + inline_rows = callchain_list__inline_rows(chain); + n += inline_rows; + } + unfolded = chain->unfolded; } @@ -362,6 +435,19 @@ static void hist_entry__init_have_children(struct hist_entry *he) he->init_have_children = true; } +static void hist_entry_init_inline_node(struct hist_entry *he) +{ + if (he->inline_node) + return; + + he->inline_node = inline_node__create(he->ms.map, he->ip); + + if (he->inline_node == NULL) + return; + + he->has_children = true; +} + static bool hist_browser__toggle_fold(struct hist_browser *browser) { struct hist_entry *he = browser->he_selection; @@ -393,7 +479,12 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) if (he->unfolded) { if (he->leaf) - he->nr_rows = callchain__count_rows(&he->sorted_chain); + if (he->inline_node) + he->nr_rows = inline__count_rows( + he->inline_node); + else + he->nr_rows = callchain__count_rows( + &he->sorted_chain); else he->nr_rows = hierarchy_count_rows(browser, he, false); @@ -753,6 +844,71 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u #define LEVEL_OFFSET_STEP 3 +static int hist_browser__show_inline(struct hist_browser *browser, + struct inline_node *node, + unsigned short row, + int offset) +{ + struct inline_list *ilist; + char buf[1024]; + int color, width, first_row; + + first_row = row; + width = browser->b.width - (LEVEL_OFFSET_STEP + 2); + list_for_each_entry(ilist, &node->val, list) { + if ((ilist->filename != NULL) || (ilist->funcname != NULL)) { + color = HE_COLORSET_NORMAL; + if (ui_browser__is_current_entry(&browser->b, row)) + color = HE_COLORSET_SELECTED; + + if (callchain_param.key == CCKEY_ADDRESS || + callchain_param.key == CCKEY_SRCLINE) { + if (ilist->filename != NULL) + scnprintf(buf, sizeof(buf), + "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + scnprintf(buf, sizeof(buf), "??"); + } else if (ilist->funcname != NULL) + scnprintf(buf, sizeof(buf), "%s (inline)", + ilist->funcname); + else if (ilist->filename != NULL) + scnprintf(buf, sizeof(buf), + "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + scnprintf(buf, sizeof(buf), "??"); + + ui_browser__set_color(&browser->b, color); + hist_browser__gotorc(browser, row, 0); + ui_browser__write_nstring(&browser->b, " ", + LEVEL_OFFSET_STEP + offset); + ui_browser__write_nstring(&browser->b, buf, width); + row++; + } + } + + return row - first_row; +} + +static size_t show_inline_list(struct hist_browser *browser, struct map *map, + u64 ip, int row, int offset) +{ + struct inline_node *node; + int ret; + + node = inline_node__create(map, ip); + if (node == NULL) + return 0; + + ret = hist_browser__show_inline(browser, node, row, offset); + + inline_node__delete(node); + return ret; +} + static int hist_browser__show_callchain_list(struct hist_browser *browser, struct callchain_node *node, struct callchain_list *chain, @@ -764,6 +920,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, char bf[1024], *alloc_str; char buf[64], *alloc_str2; const char *str; + int inline_rows = 0, ret = 1; if (arg->row_offset != 0) { arg->row_offset--; @@ -801,10 +958,15 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, } print(browser, chain, str, offset, row, arg); - free(alloc_str); free(alloc_str2); - return 1; + + if (symbol_conf.inline_name) { + inline_rows = show_inline_list(browser, chain->ms.map, + chain->ip, row + 1, offset); + } + + return ret + inline_rows; } static bool check_percent_display(struct rb_node *node, u64 parent_total) @@ -1228,6 +1390,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, folded_sign = hist_entry__folded(entry); } + if (symbol_conf.inline_name && + (!entry->has_children)) { + hist_entry_init_inline_node(entry); + folded_sign = hist_entry__folded(entry); + } + if (row_offset == 0) { struct hpp_arg arg = { .b = &browser->b, @@ -1259,7 +1427,8 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (first) { - if (symbol_conf.use_callchain) { + if (symbol_conf.use_callchain || + symbol_conf.inline_name) { ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } @@ -1301,8 +1470,14 @@ static int hist_browser__show_entry(struct hist_browser *browser, .is_current_entry = current_entry, }; - printed += hist_browser__show_callchain(browser, entry, 1, row, - hist_browser__show_callchain_entry, &arg, + if (entry->inline_node) + printed += hist_browser__show_inline(browser, + entry->inline_node, row, 0); + else + printed += hist_browser__show_callchain(browser, + entry, 1, row, + hist_browser__show_callchain_entry, + &arg, hist_browser__check_output_full); } @@ -2308,7 +2483,7 @@ static int switch_data_file(void) return ret; memset(options, 0, sizeof(options)); - memset(options, 0, sizeof(abs_path)); + memset(abs_path, 0, sizeof(abs_path)); while ((dent = readdir(pwd_dir))) { char path[PATH_MAX]; diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 9ce142de536d0dcbb4017d2d80a40a9a668775c4..ffa5addf631d95fab3b1abcb8fcd6389aef45aba 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -11,6 +11,8 @@ #include "../keysyms.h" #include "map.h" +#include "sane_ctype.h" + struct map_browser { struct ui_browser b; struct map *map; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 8c9308ac30b7e77024170e441ef97be14ccb4865..e99ba86158d29b9ab637e66bac3182f7f429d88f 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -3,7 +3,8 @@ #include "util/annotate.h" #include "util/evsel.h" #include "ui/helpline.h" - +#include +#include enum { ANN_COL__PERCENT, diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index a4f02de7c1b54d426bb1b3f50b7fc2ba39ae5fa8..e24f83957705529bb1e62eda964840f4798fe430 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -4,7 +4,9 @@ #include "../sort.h" #include "../hist.h" #include "../helpline.h" +#include "../string2.h" #include "gtk.h" +#include #define MAX_COLUMNS 32 diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 5d632dca672aef2851778fb50be672e39c99bf50..ddb2c6fbdf919e8124ffb40eee47c801b43bd6c6 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -1,3 +1,4 @@ +#include #include #include @@ -209,6 +210,8 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, return 0; ret = b->callchain->max_depth - a->callchain->max_depth; + if (callchain_param.order == ORDER_CALLER) + ret = -ret; } return ret; } diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 50d13e58210f0d7dc4b59421914317604757ac10..caf1ce6f51527ba5e542caacfb26d06c37829674 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -4,12 +4,16 @@ #include "../util/cache.h" #include "../util/debug.h" #include "../util/hist.h" +#include "../util/util.h" pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; void *perf_gtk_handle; int use_browser = -1; +#define PERF_GTK_DSO "libperf-gtk.so" + #ifdef HAVE_GTK2_SUPPORT + static int setup_gtk_browser(void) { int (*perf_ui_init)(void); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 668f4aecf2e6dfd933deb963d50362d09caa9fd9..42e432bd2eb4b59a1f15f6af69c788c66a62f1af 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -4,7 +4,10 @@ #include "../../util/hist.h" #include "../../util/sort.h" #include "../../util/evsel.h" - +#include "../../util/srcline.h" +#include "../../util/string2.h" +#include "../../util/thread.h" +#include "../../util/sane_ctype.h" static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) { @@ -17,6 +20,67 @@ static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) return ret; } +static size_t inline__fprintf(struct map *map, u64 ip, int left_margin, + int depth, int depth_mask, FILE *fp) +{ + struct dso *dso; + struct inline_node *node; + struct inline_list *ilist; + int ret = 0, i; + + if (map == NULL) + return 0; + + dso = map->dso; + if (dso == NULL) + return 0; + + if (dso->kernel != DSO_TYPE_USER) + return 0; + + node = dso__parse_addr_inlines(dso, + map__rip_2objdump(map, ip)); + if (node == NULL) + return 0; + + list_for_each_entry(ilist, &node->val, list) { + if ((ilist->filename != NULL) || (ilist->funcname != NULL)) { + ret += callchain__fprintf_left_margin(fp, left_margin); + + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + ret += fprintf(fp, " "); + } + + if (callchain_param.key == CCKEY_ADDRESS || + callchain_param.key == CCKEY_SRCLINE) { + if (ilist->filename != NULL) + ret += fprintf(fp, "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + ret += fprintf(fp, "??"); + } else if (ilist->funcname != NULL) + ret += fprintf(fp, "%s (inline)", + ilist->funcname); + else if (ilist->filename != NULL) + ret += fprintf(fp, "%s:%d (inline)", + ilist->filename, + ilist->line_nr); + else + ret += fprintf(fp, "??"); + + ret += fprintf(fp, "\n"); + } + } + + inline_node__delete(node); + return ret; +} + static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, int left_margin) { @@ -78,6 +142,10 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, fputs(str, fp); fputc('\n', fp); free(alloc_str); + + if (symbol_conf.inline_name) + ret += inline__fprintf(chain->ms.map, chain->ip, + left_margin, depth, depth_mask, fp); return ret; } @@ -229,6 +297,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (!i++ && field_order == NULL && sort_order && !prefixcmp(sort_order, "sym")) continue; + if (!printed) { ret += callchain__fprintf_left_margin(fp, left_margin); ret += fprintf(fp, "|\n"); @@ -251,6 +320,13 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (++entries_printed == callchain_param.print_limit) break; + + if (symbol_conf.inline_name) + ret += inline__fprintf(chain->ms.map, + chain->ip, + left_margin, + 0, 0, + fp); } root = &cnode->rb_root; } @@ -529,6 +605,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, bool use_callchain) { int ret; + int callchain_ret = 0; + int inline_ret = 0; struct perf_hpp hpp = { .buf = bf, .size = size, @@ -547,7 +625,16 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); if (use_callchain) - ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); + callchain_ret = hist_entry_callchain__fprintf(he, total_period, + 0, fp); + + if (callchain_ret == 0 && symbol_conf.inline_name) { + inline_ret = inline__fprintf(he->ms.map, he->ip, 0, 0, 0, fp); + ret += inline_ret; + if (inline_ret > 0) + ret += fprintf(fp, "\n"); + } else + ret += callchain_ret; return ret; } diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 4ea2ba861fc2fe6624fff3b67e2c0663015ff91d..d9350a1da48b46bbfaad0170e6ccb60a3f6f135b 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef HAVE_BACKTRACE_SUPPORT #include #endif diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5da376bc1afca6733664f798e0e9d3050dab6e21..79dea95a7f688ca9fa9b16346b3fae25af797f5c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -libperf-y += alias.o libperf-y += annotate.o libperf-y += block-range.o libperf-y += build-id.o @@ -14,9 +13,11 @@ libperf-y += find_bit.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o +libperf-y += memswap.o libperf-y += parse-events.o libperf-y += perf_regs.o libperf-y += path.o +libperf-y += print_binary.o libperf-y += rbtree.o libperf-y += libstring.o libperf-y += bitmap.o @@ -42,6 +43,7 @@ libperf-y += pstack.o libperf-y += session.o libperf-$(CONFIG_AUDIT) += syscalltbl.o libperf-y += ordered-events.o +libperf-y += namespaces.o libperf-y += comm.o libperf-y += thread.o libperf-y += thread_map.o @@ -81,13 +83,16 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o +libperf-y += dump-insn.o libperf-y += parse-regs-options.o libperf-y += term.o libperf-y += help-unknown-cmd.o libperf-y += mem-events.o libperf-y += vsprintf.o libperf-y += drv_configs.o +libperf-y += units.o libperf-y += time-utils.o +libperf-y += expr-bison.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o @@ -140,6 +145,10 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ +$(OUTPUT)util/expr-bison.c: util/expr.y + $(call rule_mkdir) + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ + $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l @@ -152,6 +161,7 @@ CFLAGS_parse-events-flex.o += -w CFLAGS_pmu-flex.o += -w CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c deleted file mode 100644 index 6455471d9cd1e6dc8386919161a5255ec040f783..0000000000000000000000000000000000000000 --- a/tools/perf/util/alias.c +++ /dev/null @@ -1,78 +0,0 @@ -#include "cache.h" -#include "util.h" -#include "config.h" - -static const char *alias_key; -static char *alias_val; - -static int alias_lookup_cb(const char *k, const char *v, - void *cb __maybe_unused) -{ - if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { - if (!v) - return config_error_nonbool(k); - alias_val = strdup(v); - return 0; - } - return 0; -} - -char *alias_lookup(const char *alias) -{ - alias_key = alias; - alias_val = NULL; - perf_config(alias_lookup_cb, NULL); - return alias_val; -} - -int split_cmdline(char *cmdline, const char ***argv) -{ - int src, dst, count = 0, size = 16; - char quoted = 0; - - *argv = malloc(sizeof(char*) * size); - - /* split alias_string */ - (*argv)[count++] = cmdline; - for (src = dst = 0; cmdline[src];) { - char c = cmdline[src]; - if (!quoted && isspace(c)) { - cmdline[dst++] = 0; - while (cmdline[++src] - && isspace(cmdline[src])) - ; /* skip */ - if (count >= size) { - size += 16; - *argv = realloc(*argv, sizeof(char*) * size); - } - (*argv)[count++] = cmdline + dst; - } else if (!quoted && (c == '\'' || c == '"')) { - quoted = c; - src++; - } else if (c == quoted) { - quoted = 0; - src++; - } else { - if (c == '\\' && quoted != '\'') { - src++; - c = cmdline[src]; - if (!c) { - zfree(argv); - return error("cmdline ends with \\"); - } - } - cmdline[dst++] = c; - src++; - } - } - - cmdline[dst] = 0; - - if (quoted) { - zfree(argv); - return error("unclosed quote"); - } - - return count; -} - diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7aa57225cbf7971b08db04c27151494c071b163b..ddbd56df91878884a4de5da2a29aae991e0689a0 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -7,6 +7,8 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include +#include #include "util.h" #include "ui/ui.h" #include "sort.h" @@ -18,12 +20,16 @@ #include "annotate.h" #include "evsel.h" #include "block-range.h" +#include "string2.h" #include "arch/common.h" #include #include #include +#include #include +#include "sane_ctype.h" + const char *disassembler_style; const char *objdump_path; static regex_t file_lineno; @@ -108,6 +114,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/arm64/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" +#include "arch/s390/annotate/instructions.c" static struct arch architectures[] = { { @@ -132,6 +139,7 @@ static struct arch architectures[] = { }, { .name = "s390", + .init = s390__annotate_init, .objdump = { .comment_char = '#', }, @@ -231,10 +239,20 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op const char *s = strchr(ops->raw, '+'); const char *c = strchr(ops->raw, ','); - if (c++ != NULL) + /* + * skip over possible up to 2 operands to get to address, e.g.: + * tbnz w0, #26, ffff0000083cd190 + */ + if (c++ != NULL) { ops->target.addr = strtoull(c, NULL, 16); - else + if (!ops->target.addr) { + c = strchr(c, ','); + if (c++ != NULL) + ops->target.addr = strtoull(c, NULL, 16); + } + } else { ops->target.addr = strtoull(ops->raw, NULL, 16); + } if (s++ != NULL) { ops->target.offset = strtoull(s, NULL, 16); @@ -249,10 +267,27 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { + const char *c = strchr(ops->raw, ','); + if (!ops->target.addr || ops->target.offset < 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset); + if (c != NULL) { + const char *c2 = strchr(c + 1, ','); + + /* check for 3-op insn */ + if (c2 != NULL) + c = c2; + c++; + + /* mirror arch objdump's space-after-comma style */ + if (*c == ' ') + c++; + } + + return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + ins->name, c ? c - ops->raw : 0, ops->raw, + ops->target.offset); } static struct ins_ops jump_ops = { @@ -385,9 +420,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m if (comment == NULL) return 0; - while (comment[0] != '\0' && isspace(comment[0])) - ++comment; - + comment = ltrim(comment); comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); @@ -432,9 +465,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops if (comment == NULL) return 0; - while (comment[0] != '\0' && isspace(comment[0])) - ++comment; - + comment = ltrim(comment); comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); return 0; @@ -783,10 +814,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, str static int disasm_line__parse(char *line, const char **namep, char **rawp) { - char *name = line, tmp; - - while (isspace(name[0])) - ++name; + char tmp, *name = ltrim(line); if (name[0] == '\0') return -1; @@ -804,12 +832,7 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) goto out_free_name; (*rawp)[0] = tmp; - - if ((*rawp)[0] != '\0') { - (*rawp)++; - while (isspace((*rawp)[0])) - ++(*rawp); - } + *rawp = ltrim(*rawp); return 0; @@ -1154,7 +1177,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, { struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; - char *line = NULL, *parsed_line, *tmp, *tmp2, *c; + char *line = NULL, *parsed_line, *tmp, *tmp2; size_t line_len; s64 line_ip, offset = -1; regmatch_t match[2]; @@ -1165,32 +1188,16 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, if (!line) return -1; - while (line_len != 0 && isspace(line[line_len - 1])) - line[--line_len] = '\0'; - - c = strchr(line, '\n'); - if (c) - *c = 0; - line_ip = -1; - parsed_line = line; + parsed_line = rtrim(line); /* /filename:linenr ? Save line number and ignore. */ - if (regexec(&file_lineno, line, 2, match, 0) == 0) { - *line_nr = atoi(line + match[1].rm_so); + if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { + *line_nr = atoi(parsed_line + match[1].rm_so); return 0; } - /* - * Strip leading spaces: - */ - tmp = line; - while (*tmp) { - if (*tmp != ' ') - break; - tmp++; - } - + tmp = ltrim(parsed_line); if (*tmp) { /* * Parse hexa addresses followed by ':' @@ -1313,6 +1320,8 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil { char linkname[PATH_MAX]; char *build_id_filename; + char *build_id_path = NULL; + char *pos; if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) @@ -1328,8 +1337,21 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil goto fallback; } + build_id_path = strdup(filename); + if (!build_id_path) + return -1; + + /* + * old style build-id cache has name of XX/XXXXXXX.. while + * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. + * extract the build-id part of dirname in the new style only. + */ + pos = strrchr(build_id_path, '/'); + if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) + dirname(build_id_path); + if (dso__is_kcore(dso) || - readlink(filename, linkname, sizeof(linkname)) < 0 || + readlink(build_id_path, linkname, sizeof(linkname)) < 0 || strstr(linkname, DSO__NAME_KALLSYMS) || access(filename, R_OK)) { fallback: @@ -1341,6 +1363,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil __symbol__join_symfs(filename, filename_size, dso->long_name); } + free(build_id_path); return 0; } @@ -1408,31 +1431,10 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na sizeof(symfs_filename)); } } else if (dso__needs_decompress(dso)) { - char tmp[PATH_MAX]; - struct kmod_path m; - int fd; - bool ret; - - if (kmod_path__parse_ext(&m, symfs_filename)) - goto out; - - snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX"); + char tmp[KMOD_DECOMP_LEN]; - fd = mkstemp(tmp); - if (fd < 0) { - free(m.ext); - goto out; - } - - ret = decompress_to_file(m.ext, symfs_filename, fd); - - if (ret) - pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename); - - free(m.ext); - close(fd); - - if (!ret) + if (dso__decompress_kmodule_path(dso, symfs_filename, + tmp, sizeof(tmp)) < 0) goto out; strcpy(symfs_filename, tmp); @@ -1441,7 +1443,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand", + " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", @@ -1488,6 +1490,12 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na nline = 0; while (!feof(file)) { + /* + * The source code line number (lineno) needs to be kept in + * accross calls to symbol__parse_objdump_line(), so that it + * can associate it with the instructions till the next one. + * See disasm_line__new() and struct disasm_line::line_nr. + */ if (symbol__parse_objdump_line(sym, map, arch, file, privsize, &lineno) < 0) break; @@ -1657,24 +1665,31 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, start = map__rip_2objdump(map, sym->start); for (i = 0; i < len; i++) { - u64 offset; + u64 offset, nr_samples; double percent_max = 0.0; src_line->nr_pcnt = nr_pcnt; for (k = 0; k < nr_pcnt; k++) { + double percent = 0.0; + h = annotation__histogram(notes, evidx + k); - src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum; + nr_samples = h->addr[i]; + if (h->sum) + percent = 100.0 * nr_samples / h->sum; - if (src_line->samples[k].percent > percent_max) - percent_max = src_line->samples[k].percent; + if (percent > percent_max) + percent_max = percent; + src_line->samples[k].percent = percent; + src_line->samples[k].nr = nr_samples; } if (percent_max <= 0.5) goto next; offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, false); + src_line->path = get_srcline(map->dso, offset, NULL, + false, true); insert_source_line(&tmp_root, src_line); next: diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 09776b5af991d3334fd35c1b9218999bdfda38da..948aa8e6fd394729bc67441823f64c17508756d6 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -98,7 +98,7 @@ struct cyc_hist { struct source_line_samples { double percent; double percent_sum; - double nr; + u64 nr; }; struct source_line { diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c5a6e0b124529e876cf9b2844b485b7177e6e4a2..0daf63b9ee3e1482cdc5fc5e96a968c4117e3a0c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -13,10 +13,10 @@ * */ +#include #include #include #include -#include #include #include #include @@ -46,7 +46,6 @@ #include "cpumap.h" #include "thread_map.h" #include "asm/bug.h" -#include "symbol/kallsyms.h" #include "auxtrace.h" #include @@ -59,6 +58,9 @@ #include "intel-pt.h" #include "intel-bts.h" +#include "sane_ctype.h" +#include "symbol/kallsyms.h" + int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, struct auxtrace_mmap_params *mp, void *userpg, int fd) @@ -1826,7 +1828,7 @@ static int addr_filter__resolve_kernel_syms(struct addr_filter *filt) filt->addr = start; if (filt->range && !filt->size && !filt->sym_to) { filt->size = size; - no_size = !!size; + no_size = !size; } } @@ -1840,7 +1842,7 @@ static int addr_filter__resolve_kernel_syms(struct addr_filter *filt) if (err) return err; filt->size = start + size - filt->addr; - no_size = !!size; + no_size = !size; } /* The very last symbol in kallsyms does not imply a particular size */ diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 26fb1ee5746aaf7fa15db352f848d13bdd299736..9f0de72d58e26f07c479e53965cc106c365b3b70 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -17,6 +17,7 @@ #define __PERF_AUXTRACE_H #include +#include #include #include #include diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index bc6bc7062eb4ffed74457bff52a5ab5c2d885300..4bd2d1d882af1d6e9da7561e80d572a82258b5cd 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -9,7 +9,9 @@ #include #include #include +#include #include +#include #include "perf.h" #include "debug.h" #include "bpf-loader.h" @@ -17,6 +19,7 @@ #include "probe-event.h" #include "probe-finder.h" // for MAX_PROBES #include "parse-events.h" +#include "strfilter.h" #include "llvm-utils.h" #include "c++/clang-c.h" diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index f2b737b225f2e0233d9d73aa9944588bf1f4d64e..48863867878bc35413d6bf94e460da4e57976869 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -85,6 +85,8 @@ int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, char *buf, size_t size); #else +#include + static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, bool source __maybe_unused) diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 6cdbee119ceb3fa8f550368f9b93cc5b31aacbb0..1356220a9f1b6eee97e56c51132f394922225d36 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -12,6 +12,7 @@ #include "bpf-loader.h" #include "bpf-prologue.h" #include "probe-finder.h" +#include #include #include diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h index d94cbea128999aab4368404140b07a0755ba4439..ba564838375f7b50bfc8d057b7c69f42c7b0f5cb 100644 --- a/tools/perf/util/bpf-prologue.h +++ b/tools/perf/util/bpf-prologue.h @@ -18,6 +18,8 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, struct bpf_insn *new_prog, size_t *new_cnt, size_t cnt_space); #else +#include + static inline int bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, int nargs __maybe_unused, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e528c40739ccec0b60e392c5d815ccdbf79541db..e0148b081bdfbb7cead2c118a51ca81bd238e740 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -7,18 +7,26 @@ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo */ #include "util.h" +#include +#include #include +#include +#include #include "build-id.h" #include "event.h" #include "symbol.h" +#include "thread.h" #include #include "debug.h" #include "session.h" #include "tool.h" #include "header.h" #include "vdso.h" +#include "path.h" #include "probe-file.h" +#include "strlist.h" +#include "sane_ctype.h" static bool no_buildid_cache; @@ -182,13 +190,17 @@ char *build_id_cache__origname(const char *sbuild_id) char buf[PATH_MAX]; char *ret = NULL, *p; size_t offs = 5; /* == strlen("../..") */ + ssize_t len; linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; - if (readlink(linkname, buf, PATH_MAX) < 0) + len = readlink(linkname, buf, sizeof(buf) - 1); + if (len <= 0) goto out; + buf[len] = '\0'; + /* The link should be "../../" */ p = strrchr(buf, '/'); /* Cut off the "/" */ if (p && (p > buf + offs)) { @@ -266,51 +278,6 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return bf; } -bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) -{ - char *id_name = NULL, *ch; - struct stat sb; - char sbuild_id[SBUILD_ID_SIZE]; - - if (!dso->has_build_id) - goto err; - - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - id_name = build_id_cache__linkname(sbuild_id, NULL, 0); - if (!id_name) - goto err; - if (access(id_name, F_OK)) - goto err; - if (lstat(id_name, &sb) == -1) - goto err; - if ((size_t)sb.st_size > size - 1) - goto err; - if (readlink(id_name, bf, size - 1) < 0) - goto err; - - bf[sb.st_size] = '\0'; - - /* - * link should be: - * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 - */ - ch = strrchr(bf, '/'); - if (!ch) - goto err; - if (ch - 3 < bf) - goto err; - - free(id_name); - return strncmp(".ko", ch - 3, 3) == 0; -err: - pr_err("Invalid build id: %s\n", id_name ? : - dso->long_name ? : - dso->short_name ? : - "[unknown]"); - free(id_name); - return false; -} - #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -443,14 +410,14 @@ void disable_buildid_cache(void) } static bool lsdir_bid_head_filter(const char *name __maybe_unused, - struct dirent *d __maybe_unused) + struct dirent *d) { return (strlen(d->d_name) == 2) && isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]); } static bool lsdir_bid_tail_filter(const char *name __maybe_unused, - struct dirent *d __maybe_unused) + struct dirent *d) { int i = 0; while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3) @@ -690,7 +657,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, err = 0; /* Update SDT cache : error is just warned */ - if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) + if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free: diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index d27990610f9f4bfff8598bd71855f4b1f14f1a8c..96690a55c62c40394444a8f23df1cd03b840acfe 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -5,7 +5,6 @@ #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" -#include "strlist.h" #include extern struct perf_tool build_id__mark_dso_hit_ops; @@ -18,7 +17,6 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); -bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, @@ -34,6 +32,9 @@ char *build_id_cache__origname(const char *sbuild_id); char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size); char *build_id_cache__cachedir(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); + +struct strlist; + struct strlist *build_id_cache__list_all(bool validonly); char *build_id_cache__complement(const char *incomplete_sbuild_id); int build_id_cache__list_build_ids(const char *pathname, @@ -42,6 +43,10 @@ bool build_id_cache__cached(const char *sbuild_id); int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); int build_id_cache__remove_s(const char *sbuild_id); + +extern char buildid_dir[]; + +void set_buildid_dir(const char *dir); void disable_buildid_cache(void); #endif diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h index 0eadd792ab1f0b99a784d14eb71fae99a6f1214f..ccafcf72b37a84d02b6b5d93e50a58cecdebf7ed 100644 --- a/tools/perf/util/c++/clang-c.h +++ b/tools/perf/util/c++/clang-c.h @@ -20,6 +20,7 @@ extern int perf_clang__compile_bpf(const char *filename, size_t *p_obj_buf_sz); #else +#include static inline void perf_clang__init(void) { } static inline void perf_clang__cleanup(void) { } diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 512c0c83fbc6a9a9ed2a9c996de74ff6fb45eb62..0328f297a748380d7e128e69b204e42001c3e8c1 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -15,7 +15,6 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" -char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); #define alloc_nr(x) (((x)+16)*3/2) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index aba953421a0329d0de0f2adc5ce1a4599d0fe320..b4204b43ed58a83c9530ae3b75df4c01004b442e 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -9,6 +9,7 @@ * */ +#include #include #include #include @@ -23,6 +24,21 @@ #include "machine.h" #include "callchain.h" +#define CALLCHAIN_PARAM_DEFAULT \ + .mode = CHAIN_GRAPH_ABS, \ + .min_percent = 0.5, \ + .order = ORDER_CALLEE, \ + .key = CCKEY_FUNCTION, \ + .value = CCVAL_PERCENT, \ + +struct callchain_param callchain_param = { + CALLCHAIN_PARAM_DEFAULT +}; + +struct callchain_param callchain_param_default = { + CALLCHAIN_PARAM_DEFAULT +}; + __thread struct callchain_cursor callchain_cursor; int parse_callchain_record_opt(const char *arg, struct callchain_param *param) @@ -80,6 +96,10 @@ static int parse_callchain_sort_key(const char *value) callchain_param.key = CCKEY_ADDRESS; return 0; } + if (!strncmp(value, "srcline", strlen(value))) { + callchain_param.key = CCKEY_SRCLINE; + return 0; + } if (!strncmp(value, "branch", strlen(value))) { callchain_param.branch_callstack = 1; return 0; @@ -108,11 +128,37 @@ static int parse_callchain_value(const char *value) return -1; } +static int get_stack_size(const char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} + static int __parse_callchain_report_opt(const char *arg, bool allow_record_opt) { char *tok; - char *endptr; + char *endptr, *saveptr = NULL; bool minpcnt_set = false; bool record_opt_set = false; bool try_stack_size = false; @@ -123,7 +169,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) if (!arg) return 0; - while ((tok = strtok((char *)arg, ",")) != NULL) { + while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) { if (!strncmp(tok, "none", strlen(tok))) { callchain_param.mode = CHAIN_NONE; callchain_param.enabled = false; @@ -191,6 +237,68 @@ int parse_callchain_top_opt(const char *arg) return __parse_callchain_report_opt(arg, true); } +int parse_callchain_record(const char *arg, struct callchain_param *param) +{ + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph fp\n"); + break; + + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + const unsigned long default_stack_dump_size = 8192; + + ret = 0; + param->record_mode = CALLCHAIN_DWARF; + param->dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + param->dump_size = size; + } + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + param->record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; + } else { + pr_err("callchain: Unknown --call-graph option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + return ret; +} + int perf_callchain_config(const char *var, const char *value) { char *endptr; @@ -510,14 +618,56 @@ enum match_result { MATCH_GT, }; +static enum match_result match_chain_srcline(struct callchain_cursor_node *node, + struct callchain_list *cnode) +{ + char *left = NULL; + char *right = NULL; + enum match_result ret = MATCH_EQ; + int cmp; + + if (cnode->ms.map) + left = get_srcline(cnode->ms.map->dso, + map__rip_2objdump(cnode->ms.map, cnode->ip), + cnode->ms.sym, true, false); + if (node->map) + right = get_srcline(node->map->dso, + map__rip_2objdump(node->map, node->ip), + node->sym, true, false); + + if (left && right) + cmp = strcmp(left, right); + else if (!left && right) + cmp = 1; + else if (left && !right) + cmp = -1; + else if (cnode->ip == node->ip) + cmp = 0; + else + cmp = (cnode->ip < node->ip) ? -1 : 1; + + if (cmp != 0) + ret = cmp < 0 ? MATCH_LT : MATCH_GT; + + free_srcline(left); + free_srcline(right); + return ret; +} + static enum match_result match_chain(struct callchain_cursor_node *node, struct callchain_list *cnode) { struct symbol *sym = node->sym; u64 left, right; - if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) { + if (callchain_param.key == CCKEY_SRCLINE) { + enum match_result match = match_chain_srcline(node, cnode); + + if (match != MATCH_ERROR) + return match; + } + + if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { left = cnode->ms.sym->start; right = sym->start; } else { @@ -911,15 +1061,16 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso) { + bool show_addr = callchain_param.key == CCKEY_ADDRESS; + bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE; int printed; if (cl->ms.sym) { - if (callchain_param.key == CCKEY_ADDRESS && - cl->ms.map && !cl->srcline) + if (show_srcline && cl->ms.map && !cl->srcline) cl->srcline = get_srcline(cl->ms.map->dso, map__rip_2objdump(cl->ms.map, cl->ip), - cl->ms.sym, false); + cl->ms.sym, false, show_addr); if (cl->srcline) printed = scnprintf(bf, bfsize, "%s %s", cl->ms.sym->name, cl->srcline); @@ -1063,63 +1214,100 @@ int callchain_branch_counts(struct callchain_root *root, cycles_count); } -static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, - u64 branch_count, u64 predicted_count, - u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count) +static int counts_str_build(char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) { double predicted_percent = 0.0; const char *null_str = ""; char iter_str[32]; - char *str; - u64 cycles = 0; - - if (branch_count == 0) { - if (fp) - return fprintf(fp, " (calltrace)"); + char cycle_str[32]; + char *istr, *cstr; + u64 cycles; + if (branch_count == 0) return scnprintf(bf, bfsize, " (calltrace)"); - } + + cycles = cycles_count / branch_count; if (iter_count && samples_count) { - scnprintf(iter_str, sizeof(iter_str), - ", iterations:%" PRId64 "", - iter_count / samples_count); - str = iter_str; + if (cycles > 0) + scnprintf(iter_str, sizeof(iter_str), + " iterations:%" PRId64 "", + iter_count / samples_count); + else + scnprintf(iter_str, sizeof(iter_str), + "iterations:%" PRId64 "", + iter_count / samples_count); + istr = iter_str; + } else + istr = (char *)null_str; + + if (cycles > 0) { + scnprintf(cycle_str, sizeof(cycle_str), + "cycles:%" PRId64 "", cycles); + cstr = cycle_str; } else - str = (char *)null_str; + cstr = (char *)null_str; predicted_percent = predicted_count * 100.0 / branch_count; - cycles = cycles_count / branch_count; - if ((predicted_percent >= 100.0) && (abort_count == 0)) { - if (fp) - return fprintf(fp, " (cycles:%" PRId64 "%s)", - cycles, str); + if ((predicted_count == branch_count) && (abort_count == 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, " (%s%s)", cstr, istr); + else + return scnprintf(bf, bfsize, "%s", (char *)null_str); + } - return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)", - cycles, str); + if ((predicted_count < branch_count) && (abort_count == 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (predicted:%.1f%% %s%s)", + predicted_percent, cstr, istr); + else { + return scnprintf(bf, bfsize, + " (predicted:%.1f%%)", + predicted_percent); + } } - if ((predicted_percent < 100.0) && (abort_count == 0)) { - if (fp) - return fprintf(fp, - " (predicted:%.1f%%, cycles:%" PRId64 "%s)", - predicted_percent, cycles, str); + if ((predicted_count == branch_count) && (abort_count > 0)) { + if ((cycles > 0) || (istr != (char *)null_str)) + return scnprintf(bf, bfsize, + " (abort:%" PRId64 " %s%s)", + abort_count, cstr, istr); + else + return scnprintf(bf, bfsize, + " (abort:%" PRId64 ")", + abort_count); + } + if ((cycles > 0) || (istr != (char *)null_str)) return scnprintf(bf, bfsize, - " (predicted:%.1f%%, cycles:%" PRId64 "%s)", - predicted_percent, cycles, str); - } + " (predicted:%.1f%% abort:%" PRId64 " %s%s)", + predicted_percent, abort_count, cstr, istr); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%% abort:%" PRId64 ")", + predicted_percent, abort_count); +} + +static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) +{ + char str[128]; + + counts_str_build(str, sizeof(str), branch_count, + predicted_count, abort_count, cycles_count, + iter_count, samples_count); if (fp) - return fprintf(fp, - " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", - predicted_percent, abort_count, cycles, str); + return fprintf(fp, "%s", str); - return scnprintf(bf, bfsize, - " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", - predicted_percent, abort_count, cycles, str); + return scnprintf(bf, bfsize, "%s", str); } int callchain_list_counts__printf_value(struct callchain_node *node, diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 4f4b60f1558a827cbe34d9d387e6a91a917b58dd..c56c23dbbf72838b8758ec703759a6ad09f229a6 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -77,7 +77,8 @@ typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, enum chain_key { CCKEY_FUNCTION, - CCKEY_ADDRESS + CCKEY_ADDRESS, + CCKEY_SRCLINE }; enum chain_value { diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index eafbf11442b224f90ad9c5d704df75d86985f917..03347748f3fadc43f070fa0b6ef789f071c30d65 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -4,6 +4,7 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" +#include int nr_cgroups; @@ -27,8 +28,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen) path_v1[0] = '\0'; path_v2[0] = '\0'; - while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %" - STR(PATH_MAX)"s %*d %*d\n", + while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" + __stringify(PATH_MAX)"s %*d %*d\n", mountpoint, type, tokens) == 3) { if (!path_v1[0] && !strcmp(type, "cgroup")) { @@ -127,19 +128,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) goto found; n++; } - if (atomic_read(&cgrp->refcnt) == 0) + if (refcount_read(&cgrp->refcnt) == 0) free(cgrp); return -1; found: - atomic_inc(&cgrp->refcnt); + refcount_inc(&cgrp->refcnt); counter->cgrp = cgrp; return 0; } void close_cgroup(struct cgroup_sel *cgrp) { - if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) { + if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) { close(cgrp->fd); zfree(&cgrp->name); free(cgrp); diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 31f8dcdbd7efefac48353b90715c027cf2c13386..d91966b97cbd7235f9fd756c50cd41ba7d7fadae 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -1,14 +1,14 @@ #ifndef __CGROUP_H__ #define __CGROUP_H__ -#include +#include struct option; struct cgroup_sel { char *name; int fd; - atomic_t refcnt; + refcount_t refcnt; }; diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index f0dcd0ee0afaa0505470ca382a9d597a5e88fb3a..4b4f00df58a8253c46e3e912753f8d2d25c06044 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -1,3 +1,4 @@ +#include #include #include "util.h" #include "../perf.h" diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index d0d465953d36e16c07038ea69182e71c44db0558..94a5a7d829d5ba32efe646f9c04729ab363ac593 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -3,10 +3,4 @@ unsigned long perf_event_open_cloexec_flag(void); -#ifdef __GLIBC_PREREQ -#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) -int sched_getcpu(void) __THROW; -#endif -#endif - #endif /* __PERF_CLOEXEC_H */ diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index a93997f16dec3b2b9815e4bf298a0928a52f4a1a..52122bcc31701fef202cbdb95e5502841def750c 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -1,6 +1,8 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include + /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ #define COLOR_MAXLEN 24 diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 21b7ff382c3f0dfb2e0bff43074adae3c0b10973..7bc981b6bf296ae000407a787bc30aaa22fa574b 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -1,13 +1,15 @@ #include "comm.h" #include "util.h" +#include #include #include -#include +#include +#include struct comm_str { char *str; struct rb_node rb_node; - atomic_t refcnt; + refcount_t refcnt; }; /* Should perhaps be moved to struct machine */ @@ -16,13 +18,13 @@ static struct rb_root comm_str_root; static struct comm_str *comm_str__get(struct comm_str *cs) { if (cs) - atomic_inc(&cs->refcnt); + refcount_inc(&cs->refcnt); return cs; } static void comm_str__put(struct comm_str *cs) { - if (cs && atomic_dec_and_test(&cs->refcnt)) { + if (cs && refcount_dec_and_test(&cs->refcnt)) { rb_erase(&cs->rb_node, &comm_str_root); zfree(&cs->str); free(cs); @@ -43,7 +45,7 @@ static struct comm_str *comm_str__alloc(const char *str) return NULL; } - atomic_set(&cs->refcnt, 0); + refcount_set(&cs->refcnt, 1); return cs; } @@ -61,7 +63,7 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) cmp = strcmp(str, iter->str); if (!cmp) - return iter; + return comm_str__get(iter); if (cmp < 0) p = &(*p)->rb_left; @@ -95,8 +97,6 @@ struct comm *comm__new(const char *str, u64 timestamp, bool exec) return NULL; } - comm_str__get(comm->comm_str); - return comm; } @@ -108,7 +108,6 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec) if (!new) return -ENOMEM; - comm_str__get(new); comm_str__put(old); comm->comm_str = new; comm->start = timestamp; diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h new file mode 100644 index 0000000000000000000000000000000000000000..67fd1bb7c2b75f51309a7af539d5637db24954fe --- /dev/null +++ b/tools/perf/util/compress.h @@ -0,0 +1,12 @@ +#ifndef PERF_COMPRESS_H +#define PERF_COMPRESS_H + +#ifdef HAVE_ZLIB_SUPPORT +int gzip_decompress_to_file(const char *input, int output_fd); +#endif + +#ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_to_file(const char *input, int output_fd); +#endif + +#endif /* PERF_COMPRESS_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 0c7d5a4975cd4a8f798533cbd15660c058652d37..8d724f0fa5a81f09ac2547010179ad60f1ed39fd 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -8,12 +8,19 @@ * Copyright (C) Johannes Schindelin, 2005 * */ +#include +#include #include "util.h" #include "cache.h" #include #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ #include "config.h" +#include +#include +#include + +#include "sane_ctype.h" #define MAXNAME (256) @@ -627,6 +634,8 @@ static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; const char *home = NULL; + char *user_config; + struct stat st; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) @@ -637,35 +646,41 @@ static int perf_config_set__init(struct perf_config_set *set) } home = getenv("HOME"); - if (perf_config_global() && home) { - char *user_config = strdup(mkpath("%s/.perfconfig", home)); - struct stat st; - if (user_config == NULL) { - warning("Not enough memory to process %s/.perfconfig, " - "ignoring it.", home); - goto out; - } + /* + * Skip reading user config if: + * - there is no place to read it from (HOME) + * - we are asked not to (PERF_CONFIG_NOGLOBAL=1) + */ + if (!home || !*home || !perf_config_global()) + return 0; - if (stat(user_config, &st) < 0) { - if (errno == ENOENT) - ret = 0; - goto out_free; - } + user_config = strdup(mkpath("%s/.perfconfig", home)); + if (user_config == NULL) { + warning("Not enough memory to process %s/.perfconfig, " + "ignoring it.", home); + goto out; + } - ret = 0; + if (stat(user_config, &st) < 0) { + if (errno == ENOENT) + ret = 0; + goto out_free; + } - if (st.st_uid && (st.st_uid != geteuid())) { - warning("File %s not owned by current user or root, " - "ignoring it.", user_config); - goto out_free; - } + ret = 0; - if (st.st_size) - ret = perf_config_from_file(collect_config, user_config, set); -out_free: - free(user_config); + if (st.st_uid && (st.st_uid != geteuid())) { + warning("File %s not owned by current user or root, " + "ignoring it.", user_config); + goto out_free; } + + if (st.st_size) + ret = perf_config_from_file(collect_config, user_config, set); + +out_free: + free(user_config); out: return ret; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index e3fde313deb27371de3782eaa76e1e2ee232fdf8..c4af82ab7808c2049216acad5efb2824fecabb1f 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -1,6 +1,8 @@ +#include #include #include "evsel.h" #include "counts.h" +#include "util.h" struct perf_counts *perf_counts__new(int ncpus, int nthreads) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8c750493911369976d0fa171e74f3b678f62c054..37b3bb79ee08db66122c30f6ae5af2473f005d7a 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -3,11 +3,14 @@ #include "../perf.h" #include "cpumap.h" #include +#include #include #include #include #include "asm/bug.h" +#include "sane_ctype.h" + static int max_cpu_num; static int max_present_cpu_num; static int max_node_num; @@ -29,7 +32,7 @@ static struct cpu_map *cpu_map__default_new(void) cpus->map[i] = i; cpus->nr = nr_cpus; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -43,7 +46,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) if (cpus != NULL) { cpus->nr = nr_cpus; memcpy(cpus->map, tmp_cpus, payload_size); - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -252,7 +255,7 @@ struct cpu_map *cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; cpus->map[0] = -1; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -269,7 +272,7 @@ struct cpu_map *cpu_map__empty_new(int nr) for (i = 0; i < nr; i++) cpus->map[i] = -1; - atomic_set(&cpus->refcnt, 1); + refcount_set(&cpus->refcnt, 1); } return cpus; @@ -278,7 +281,7 @@ struct cpu_map *cpu_map__empty_new(int nr) static void cpu_map__delete(struct cpu_map *map) { if (map) { - WARN_ONCE(atomic_read(&map->refcnt) != 0, + WARN_ONCE(refcount_read(&map->refcnt) != 0, "cpu_map refcnt unbalanced\n"); free(map); } @@ -287,13 +290,13 @@ static void cpu_map__delete(struct cpu_map *map) struct cpu_map *cpu_map__get(struct cpu_map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } void cpu_map__put(struct cpu_map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) cpu_map__delete(map); } @@ -357,7 +360,7 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); - atomic_set(&c->refcnt, 1); + refcount_set(&c->refcnt, 1); *res = c; return 0; } @@ -673,3 +676,49 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size) pr_debug("cpumask list: %s\n", buf); return ret; } + +static char hex_char(unsigned char val) +{ + if (val < 10) + return val + '0'; + if (val < 16) + return val - 10 + 'a'; + return '?'; +} + +size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) +{ + int i, cpu; + char *ptr = buf; + unsigned char *bitmap; + int last_cpu = cpu_map__cpu(map, map->nr - 1); + + bitmap = zalloc((last_cpu + 7) / 8); + if (bitmap == NULL) { + buf[0] = '\0'; + return 0; + } + + for (i = 0; i < map->nr; i++) { + cpu = cpu_map__cpu(map, i); + bitmap[cpu / 8] |= 1 << (cpu % 8); + } + + for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { + unsigned char bits = bitmap[cpu / 8]; + + if (cpu % 8) + bits >>= 4; + else + bits &= 0xf; + + *ptr++ = hex_char(bits); + if ((cpu % 32) == 0 && cpu > 0) + *ptr++ = ','; + } + *ptr = '\0'; + free(bitmap); + + buf[size - 1] = '\0'; + return ptr - buf; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 1a0549af8f5c944b4fc2b8b619164a107dcf613d..6b8bff87481d090a0e72f7391772bb307d0e1f7c 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -3,13 +3,13 @@ #include #include -#include +#include #include "perf.h" #include "util/debug.h" struct cpu_map { - atomic_t refcnt; + refcount_t refcnt; int nr; int map[]; }; @@ -20,6 +20,7 @@ struct cpu_map *cpu_map__dummy_new(void); struct cpu_map *cpu_map__new_data(struct cpu_map_data *data); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size); +size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index d4a5a21c2a7e2e47596444665b5f5828874da337..4b261c2ec0f10a8e85b68a8a16832ee54ea177b9 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -3,7 +3,7 @@ * * No surprises, and works with signed and unsigned chars. */ -#include "util.h" +#include "sane_ctype.h" enum { S = GIT_SPACE, diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 4e6cbc99f08efc608c2d73ffb9054672d25fb7d9..89d50318833d94e751dc693e6d100b85fce41c28 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -7,7 +7,10 @@ * Released under the GPL v2. (and only v2, not any later version) */ +#include +#include #include +#include #include #include #include @@ -27,6 +30,7 @@ #include "evsel.h" #include "machine.h" #include "config.h" +#include "sane_ctype.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -1468,6 +1472,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, .lost = perf_event__process_lost, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, + .namespaces = perf_event__process_namespaces, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 60bfc9ca1e22f3a1b10d58169d2ed70ccfe040cc..e84bbc8ec058916c968ae940ae662746495efe67 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 03eb81f30d0d0d471fdff09e8779ed02fd73eb36..a5b3777ffee601aa28e358c4f9928fd16088d1c8 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -2,19 +2,26 @@ #include "../perf.h" +#include #include #include #include +#include #include #include - +#ifdef HAVE_BACKTRACE_SUPPORT +#include +#endif #include "cache.h" #include "color.h" #include "event.h" #include "debug.h" +#include "print_binary.h" #include "util.h" #include "target.h" +#include "sane_ctype.h" + int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; @@ -244,3 +251,31 @@ void perf_debug_setup(void) { libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } + +/* Obtain a backtrace and print it to stdout. */ +#ifdef HAVE_BACKTRACE_SUPPORT +void dump_stack(void) +{ + void *array[16]; + size_t size = backtrace(array, ARRAY_SIZE(array)); + char **strings = backtrace_symbols(array, size); + size_t i; + + printf("Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf("%s\n", strings[i]); + + free(strings); +} +#else +void dump_stack(void) {} +#endif + +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + signal(sig, SIG_DFL); + raise(sig); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 98832f5531d3d0931b8a21560e56330bf1db5cdc..8a23ea1a71c7cff1bc24cd3dd063d261b5fc18db 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -56,4 +56,7 @@ int perf_debug_option(const char *str); void perf_debug_setup(void); int perf_quiet_option(void); +void dump_stack(void); +void sighandler_dump_stack(int sig); + #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 3e6062ab2cdda23e9b74b54ef4f021031905249b..cb66d334f5328690072193b5a902df50076786dc 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -7,6 +7,8 @@ #include "demangle-java.h" +#include "sane_ctype.h" + enum { MODE_PREFIX = 0, MODE_CLASS = 1, diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c index 1647f285c6296d62ef031b5e880732fefa25863f..eec754243f4d3b257ae1013875215d7c6ec007f3 100644 --- a/tools/perf/util/drv_configs.c +++ b/tools/perf/util/drv_configs.c @@ -17,6 +17,7 @@ #include "evlist.h" #include "evsel.h" #include "pmu.h" +#include static int perf_evsel__apply_drv_configs(struct perf_evsel *evsel, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d38b62a700ca126c293756baa83dbc27df61e53a..4e7ab611377acd56c1be78cbd983058e5ce5142b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1,12 +1,20 @@ #include +#include #include #include +#include +#include +#include +#include +#include "compress.h" +#include "path.h" #include "symbol.h" #include "dso.h" #include "machine.h" #include "auxtrace.h" #include "util.h" #include "debug.h" +#include "string2.h" #include "vdso.h" static const char * const debuglink_paths[] = { @@ -240,6 +248,64 @@ bool dso__needs_decompress(struct dso *dso) dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } +static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf) +{ + int fd = -1; + struct kmod_path m; + + if (!dso__needs_decompress(dso)) + return -1; + + if (kmod_path__parse_ext(&m, dso->long_name)) + return -1; + + if (!m.comp) + goto out; + + fd = mkstemp(tmpbuf); + if (fd < 0) { + dso->load_errno = errno; + goto out; + } + + if (!decompress_to_file(m.ext, name, fd)) { + dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; + close(fd); + fd = -1; + } + +out: + free(m.ext); + return fd; +} + +int dso__decompress_kmodule_fd(struct dso *dso, const char *name) +{ + char tmpbuf[] = KMOD_DECOMP_NAME; + int fd; + + fd = decompress_kmodule(dso, name, tmpbuf); + unlink(tmpbuf); + return fd; +} + +int dso__decompress_kmodule_path(struct dso *dso, const char *name, + char *pathname, size_t len) +{ + char tmpbuf[] = KMOD_DECOMP_NAME; + int fd; + + fd = decompress_kmodule(dso, name, tmpbuf); + if (fd < 0) { + unlink(tmpbuf); + return -1; + } + + strncpy(pathname, tmpbuf, len); + close(fd); + return 0; +} + /* * Parses kernel module specified in @path and updates * @m argument like: @@ -327,6 +393,21 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, return 0; } +void dso__set_module_info(struct dso *dso, struct kmod_path *m, + struct machine *machine) +{ + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (m->kmod && m->comp) + dso->symtab_type++; + + dso__set_short_name(dso, strdup(m->name), true); +} + /* * Global list of open DSOs and the counter. */ @@ -373,7 +454,7 @@ static int do_open(char *name) static int __open_dso(struct dso *dso, struct machine *machine) { - int fd; + int fd = -EINVAL; char *root_dir = (char *)""; char *name = malloc(PATH_MAX); @@ -384,15 +465,30 @@ static int __open_dso(struct dso *dso, struct machine *machine) root_dir = machine->root_dir; if (dso__read_binary_type_filename(dso, dso->binary_type, - root_dir, name, PATH_MAX)) { - free(name); - return -EINVAL; - } + root_dir, name, PATH_MAX)) + goto out; if (!is_regular_file(name)) - return -EINVAL; + goto out; + + if (dso__needs_decompress(dso)) { + char newpath[KMOD_DECOMP_LEN]; + size_t len = sizeof(newpath); + + if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) { + fd = -dso->load_errno; + goto out; + } + + strcpy(name, newpath); + } fd = do_open(name); + + if (dso__needs_decompress(dso)) + unlink(name); + +out: free(name); return fd; } @@ -1109,7 +1205,7 @@ struct dso *dso__new(const char *name) INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); pthread_mutex_init(&dso->lock, NULL); - atomic_set(&dso->refcnt, 1); + refcount_set(&dso->refcnt, 1); } return dso; @@ -1147,13 +1243,13 @@ void dso__delete(struct dso *dso) struct dso *dso__get(struct dso *dso) { if (dso) - atomic_inc(&dso->refcnt); + refcount_inc(&dso->refcnt); return dso; } void dso__put(struct dso *dso) { - if (dso && atomic_dec_and_test(&dso->refcnt)) + if (dso && refcount_dec_and_test(&dso->refcnt)) dso__delete(dso); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ecc4bbd3f82e3e172a89480d04466ba95eab7b09..bd061ba7b47cc8eab2ff05e4027dc22990d7c7ef 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -1,7 +1,7 @@ #ifndef __PERF_DSO #define __PERF_DSO -#include +#include #include #include #include @@ -187,7 +187,7 @@ struct dso { void *priv; u64 db_id; }; - atomic_t refcnt; + refcount_t refcnt; char name[0]; }; @@ -244,6 +244,12 @@ bool is_supported_compression(const char *ext); bool is_kernel_module(const char *pathname, int cpumode); bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); +int dso__decompress_kmodule_fd(struct dso *dso, const char *name); +int dso__decompress_kmodule_path(struct dso *dso, const char *name, + char *pathname, size_t len); + +#define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX" +#define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME) struct kmod_path { char *name; @@ -259,6 +265,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, #define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) #define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) +void dso__set_module_info(struct dso *dso, struct kmod_path *m, + struct machine *machine); + /* * The dso__data_* external interface provides following functions: * dso__data_get_fd diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c new file mode 100644 index 0000000000000000000000000000000000000000..ffbdb19f05d01d2a2204551867cd9ef15da753aa --- /dev/null +++ b/tools/perf/util/dump-insn.c @@ -0,0 +1,14 @@ +#include +#include "dump-insn.h" + +/* Fallback code */ + +__weak +const char *dump_insn(struct perf_insn *x __maybe_unused, + u64 ip __maybe_unused, u8 *inbuf __maybe_unused, + int inlen __maybe_unused, int *lenp) +{ + if (lenp) + *lenp = 0; + return "?"; +} diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h new file mode 100644 index 0000000000000000000000000000000000000000..90fb115981cf5d3900d4eb1dc5202c2e51bf659c --- /dev/null +++ b/tools/perf/util/dump-insn.h @@ -0,0 +1,22 @@ +#ifndef __PERF_DUMP_INSN_H +#define __PERF_DUMP_INSN_H 1 + +#define MAXINSN 15 + +#include + +struct thread; + +struct perf_insn { + /* Initialized by callers: */ + struct thread *thread; + u8 cpumode; + bool is64bit; + int cpu; + /* Temporary */ + char out[256]; +}; + +const char *dump_insn(struct perf_insn *x, u64 ip, + u8 *inbuf, int inlen, int *lenp); +#endif diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 41e068e943499d2e7ce7a1794936eff09afd7077..f5acda13dcfa7daef429510fbfde922ea28a2789 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -17,10 +17,13 @@ * */ +#include +#include #include #include "util.h" #include "debug.h" #include "dwarf-aux.h" +#include "string2.h" /** * cu_find_realpath - Find the realpath of the target file diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 62bc4a86a970f8a7cda3cac3f7626e1014408027..c708395b3cb661933ed0357dfafd3528614870f7 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -8,6 +8,7 @@ #include #include #include +#include #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 075fc77286bf05feb5cca14fc3825f34d0fdaed1..9e21538c42ae96747b671e4f27a9f73ff83dc6ec 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,6 +1,7 @@ #include "cpumap.h" #include "env.h" #include "util.h" +#include struct perf_env perf_env; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4ea7ce72ed9c8e3fb92ffca44b3f51fb09f6b01f..dc5c3bb69d73882ebbac88dfec2d16c55b56355e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,15 +1,24 @@ +#include +#include +#include +#include #include -#include /* To get things like MAP_HUGETLB even on older libc headers */ +#include +#include +#include +#include /* To get things like MAP_HUGETLB even on older libc headers */ #include +#include #include "event.h" #include "debug.h" #include "hist.h" #include "machine.h" #include "sort.h" -#include "string.h" +#include "string2.h" #include "strlist.h" #include "thread.h" #include "thread_map.h" +#include "sane_ctype.h" #include "symbol/kallsyms.h" #include "asm/bug.h" #include "stat.h" @@ -31,6 +40,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", [PERF_RECORD_SWITCH] = "SWITCH", [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", + [PERF_RECORD_NAMESPACES] = "NAMESPACES", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -49,6 +59,16 @@ static const char *perf_event__names[] = { [PERF_RECORD_TIME_CONV] = "TIME_CONV", }; +static const char *perf_ns__names[] = { + [NET_NS_INDEX] = "net", + [UTS_NS_INDEX] = "uts", + [IPC_NS_INDEX] = "ipc", + [PID_NS_INDEX] = "pid", + [USER_NS_INDEX] = "user", + [MNT_NS_INDEX] = "mnt", + [CGROUP_NS_INDEX] = "cgroup", +}; + const char *perf_event__name(unsigned int id) { if (id >= ARRAY_SIZE(perf_event__names)) @@ -58,6 +78,13 @@ const char *perf_event__name(unsigned int id) return perf_event__names[id]; } +static const char *perf_ns__name(unsigned int id) +{ + if (id >= ARRAY_SIZE(perf_ns__names)) + return "UNKNOWN"; + return perf_ns__names[id]; +} + static int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, @@ -88,7 +115,7 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, int fd; size_t size = 0; ssize_t n; - char *nl, *name, *tgids, *ppids; + char *name, *tgids, *ppids; *tgid = -1; *ppid = -1; @@ -115,10 +142,10 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, ppids = strstr(bf, "PPid:"); if (name) { - name += 5; /* strlen("Name:") */ + char *nl; - while (*name && isspace(*name)) - ++name; + name += 5; /* strlen("Name:") */ + name = ltrim(name); nl = strchr(name, '\n'); if (nl) @@ -203,6 +230,58 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, return tgid; } +static void perf_event__get_ns_link_info(pid_t pid, const char *ns, + struct perf_ns_link_info *ns_link_info) +{ + struct stat64 st; + char proc_ns[128]; + + sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns); + if (stat64(proc_ns, &st) == 0) { + ns_link_info->dev = st.st_dev; + ns_link_info->ino = st.st_ino; + } +} + +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine) +{ + u32 idx; + struct perf_ns_link_info *ns_link_info; + + if (!tool || !tool->namespace_events) + return 0; + + memset(&event->namespaces, 0, (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size)); + + event->namespaces.pid = tgid; + event->namespaces.tid = pid; + + event->namespaces.nr_namespaces = NR_NAMESPACES; + + ns_link_info = event->namespaces.link_info; + + for (idx = 0; idx < event->namespaces.nr_namespaces; idx++) + perf_event__get_ns_link_info(pid, perf_ns__name(idx), + &ns_link_info[idx]); + + event->namespaces.header.type = PERF_RECORD_NAMESPACES; + + event->namespaces.header.size = (sizeof(event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + + if (perf_tool__process_synth_event(tool, event, machine, process) != 0) + return -1; + + return 0; +} + static int perf_event__synthesize_fork(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, pid_t ppid, @@ -255,8 +334,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/maps", - machine->root_dir, pid); + snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); fp = fopen(filename, "r"); if (fp == NULL) { @@ -434,8 +513,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool, static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, union perf_event *fork_event, + union perf_event *namespaces_event, pid_t pid, int full, - perf_event__handler_t process, + perf_event__handler_t process, struct perf_tool *tool, struct machine *machine, bool mmap_data, @@ -455,6 +535,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (tgid == -1) return -1; + if (perf_event__synthesize_namespaces(tool, namespaces_event, pid, + tgid, process, machine) < 0) + return -1; + + return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data, proc_map_timeout); @@ -488,6 +573,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, ppid, process, machine) < 0) break; + + if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid, + tgid, process, machine) < 0) + break; + /* * Send the prepared comm event */ @@ -516,6 +606,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, unsigned int proc_map_timeout) { union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; int err = -1, thread, j; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -530,10 +621,16 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + err = 0; for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, - fork_event, + fork_event, namespaces_event, thread_map__pid(threads, thread), 0, process, tool, machine, mmap_data, proc_map_timeout)) { @@ -559,7 +656,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, /* if not, generate events for it */ if (need_leader && __event__synthesize_thread(comm_event, mmap_event, - fork_event, + fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, mmap_data, proc_map_timeout)) { @@ -568,6 +665,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, } } } + free(namespaces_event); +out_free_fork: free(fork_event); out_free_mmap: free(mmap_event); @@ -587,6 +686,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, char proc_path[PATH_MAX]; struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; + union perf_event *namespaces_event; int err = -1; if (machine__is_default_guest(machine)) @@ -604,11 +704,17 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; + namespaces_event = malloc(sizeof(namespaces_event->namespaces) + + (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + + machine->id_hdr_size); + if (namespaces_event == NULL) + goto out_free_fork; + snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); proc = opendir(proc_path); if (proc == NULL) - goto out_free_fork; + goto out_free_namespaces; while ((dirent = readdir(proc)) != NULL) { char *end; @@ -620,13 +726,16 @@ int perf_event__synthesize_threads(struct perf_tool *tool, * We may race with exiting thread, so don't stop just because * one thread couldn't be synthesized. */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, - 1, process, tool, machine, mmap_data, + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data, proc_map_timeout); } err = 0; closedir(proc); +out_free_namespaces: + free(namespaces_event); out_free_fork: free(fork_event); out_free_mmap: @@ -659,15 +768,16 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } -u64 kallsyms__get_function_start(const char *kallsyms_filename, - const char *symbol_name) +int kallsyms__get_function_start(const char *kallsyms_filename, + const char *symbol_name, u64 *addr) { struct process_symbol_args args = { .name = symbol_name, }; if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0) - return 0; + return -1; - return args.start; + *addr = args.start; + return 0; } int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, @@ -1008,6 +1118,33 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp) +{ + size_t ret = 0; + struct perf_ns_link_info *ns_link_info; + u32 nr_namespaces, idx; + + ns_link_info = event->namespaces.link_info; + nr_namespaces = event->namespaces.nr_namespaces; + + ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[", + event->namespaces.pid, + event->namespaces.tid, + nr_namespaces); + + for (idx = 0; idx < nr_namespaces; idx++) { + if (idx && (idx % 4 == 0)) + ret += fprintf(fp, "\n\t\t "); + + ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx, + perf_ns__name(idx), (u64)ns_link_info[idx].dev, + (u64)ns_link_info[idx].ino, + ((idx + 1) != nr_namespaces) ? ", " : "]\n"); + } + + return ret; +} + int perf_event__process_comm(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -1016,6 +1153,14 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused, return machine__process_comm_event(machine, event, sample); } +int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_namespaces_event(machine, event, sample); +} + int perf_event__process_lost(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -1153,11 +1298,12 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused, size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n", + return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", - event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : ""); + event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) @@ -1196,6 +1342,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_MMAP: ret += perf_event__fprintf_mmap(event, fp); break; + case PERF_RECORD_NAMESPACES: + ret += perf_event__fprintf_namespaces(event, fp); + break; case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c735c53a26f8f6dcb37727299e0560d07a5f6ddb..7c3fa1c8cbcd4fdcde2074f354798adf59f01e67 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -3,9 +3,9 @@ #include #include +#include #include "../perf.h" -#include "map.h" #include "build-id.h" #include "perf_regs.h" @@ -39,6 +39,13 @@ struct comm_event { char comm[16]; }; +struct namespaces_event { + struct perf_event_header header; + u32 pid, tid; + u64 nr_namespaces; + struct perf_ns_link_info link_info[]; +}; + struct fork_event { struct perf_event_header header; u32 pid, ppid; @@ -222,7 +229,7 @@ struct build_id_event { enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, - PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */ + PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */ PERF_RECORD_HEADER_TRACING_DATA = 66, PERF_RECORD_HEADER_BUILD_ID = 67, PERF_RECORD_FINISHED_ROUND = 68, @@ -269,6 +276,7 @@ struct events_stats { u64 total_lost; u64 total_lost_samples; u64 total_aux_lost; + u64 total_aux_partial; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; @@ -485,6 +493,7 @@ union perf_event { struct mmap_event mmap; struct mmap2_event mmap2; struct comm_event comm; + struct namespaces_event namespaces; struct fork_event fork; struct lost_event lost; struct lost_samples_event lost_samples; @@ -587,6 +596,10 @@ int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_namespaces(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -636,6 +649,12 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(struct perf_tool *tool, + union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct machine *machine); + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -653,12 +672,21 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); -u64 kallsyms__get_function_start(const char *kallsyms_filename, - const char *symbol_name); +int kallsyms__get_function_start(const char *kallsyms_filename, + const char *symbol_name, u64 *addr); void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max); void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map, u16 type, int max); + +void event_attr_init(struct perf_event_attr *attr); + +int perf_event_paranoid(void); + +extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b601f2814a30e9d7d2b6f689b5e495659c34e600..46c0faf6c502b9ae31ac09901bbca31de61691bf 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -8,6 +8,8 @@ */ #include "util.h" #include +#include +#include #include #include "cpumap.h" #include "thread_map.h" @@ -15,12 +17,15 @@ #include "evlist.h" #include "evsel.h" #include "debug.h" +#include "units.h" #include "asm/bug.h" +#include #include #include "parse-events.h" #include +#include #include #include @@ -777,7 +782,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messu /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return NULL; head = perf_mmap__read_head(md); @@ -794,7 +799,7 @@ perf_mmap__read_backward(struct perf_mmap *md) /* * Check if event was unmapped due to a POLLHUP/POLLERR. */ - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return NULL; head = perf_mmap__read_head(md); @@ -856,7 +861,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md) { u64 head; - if (!atomic_read(&md->refcnt)) + if (!refcount_read(&md->refcnt)) return; head = perf_mmap__read_head(md); @@ -875,14 +880,14 @@ static bool perf_mmap__empty(struct perf_mmap *md) static void perf_mmap__get(struct perf_mmap *map) { - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); } static void perf_mmap__put(struct perf_mmap *md) { - BUG_ON(md->base && atomic_read(&md->refcnt) == 0); + BUG_ON(md->base && refcount_read(&md->refcnt) == 0); - if (atomic_dec_and_test(&md->refcnt)) + if (refcount_dec_and_test(&md->refcnt)) perf_mmap__munmap(md); } @@ -894,7 +899,7 @@ void perf_mmap__consume(struct perf_mmap *md, bool overwrite) perf_mmap__write_tail(md, old); } - if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) + if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) perf_mmap__put(md); } @@ -937,7 +942,7 @@ static void perf_mmap__munmap(struct perf_mmap *map) munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; - atomic_set(&map->refcnt, 0); + refcount_set(&map->refcnt, 0); } auxtrace_mmap__munmap(&map->auxtrace_mmap); } @@ -974,8 +979,19 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) if (!map) return NULL; - for (i = 0; i < evlist->nr_mmaps; i++) + for (i = 0; i < evlist->nr_mmaps; i++) { map[i].fd = -1; + /* + * When the perf_mmap() call is made we grab one refcount, plus + * one extra to let perf_evlist__mmap_consume() get the last + * events after all real references (perf_mmap__get()) are + * dropped. + * + * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and + * thus does perf_mmap__get() on it. + */ + refcount_set(&map[i].refcnt, 0); + } return map; } @@ -1001,7 +1017,7 @@ static int perf_mmap__mmap(struct perf_mmap *map, * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - atomic_set(&map->refcnt, 2); + refcount_set(&map->refcnt, 2); map->prev = 0; map->mask = mp->mask; map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 389b9ccdf8c75db39d8e2163d6909a4db01e65a8..94cea4398a13aeefb1eff28fdd2defa3194209f0 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,7 +1,8 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 -#include +#include +#include #include #include #include @@ -10,6 +11,7 @@ #include "evsel.h" #include "util.h" #include "auxtrace.h" +#include #include struct pollfd; @@ -29,7 +31,7 @@ struct perf_mmap { void *base; int mask; int fd; - atomic_t refcnt; + refcount_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac59710b79e0b4cbee35cacc1e40a2102e091d8e..cda44b0e821c63baea1f9da52123184768fdd2f4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,16 +8,20 @@ */ #include +#include +#include #include #include #include #include #include #include +#include #include #include "asm/bug.h" #include "callchain.h" #include "cgroup.h" +#include "event.h" #include "evsel.h" #include "evlist.h" #include "util.h" @@ -30,6 +34,8 @@ #include "stat.h" #include "util/parse-branch-options.h" +#include "sane_ctype.h" + static struct { bool sample_id_all; bool exclude_guest; @@ -236,6 +242,10 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); perf_evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; + evsel->metric_expr = NULL; + evsel->metric_name = NULL; + evsel->metric_events = NULL; + evsel->collect_stat = false; } struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -263,8 +273,20 @@ struct perf_evsel *perf_evsel__new_cycles(void) struct perf_evsel *evsel; event_attr_init(&attr); + /* + * Unnamed union member, not supported as struct member named + * initializer in older compilers such as gcc 4.4.7 + * + * Just for probing the precise_ip: + */ + attr.sample_period = 1; perf_event_attr__set_max_precise_ip(&attr); + /* + * Now let the usual logic to set up the perf_event_attr defaults + * to kick in when we return and before perf_evsel__open() is called. + */ + attr.sample_period = 0; evsel = perf_evsel__new(&attr); if (evsel == NULL) @@ -932,6 +954,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; + if (opts->record_namespaces) + attr->namespaces = track; + if (opts->record_switch_events) attr->context_switch = track; @@ -1232,7 +1257,7 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0) + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) return -errno; return 0; @@ -1250,7 +1275,7 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) + if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) return -errno; perf_evsel__compute_deltas(evsel, cpu, thread, &count); @@ -2450,11 +2475,17 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, int err, char *msg, size_t size) { char sbuf[STRERR_BUFSIZE]; + int printed = 0; switch (err) { case EPERM: case EACCES: - return scnprintf(msg, size, + if (err == EPERM) + printed = scnprintf(msg, size, + "No permission to enable %s event.\n\n", + perf_evsel__name(evsel)); + + return scnprintf(msg + printed, size - printed, "You may not have permission to collect %sstats.\n\n" "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" "which controls use of the performance events system by\n" diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 06ef6f29efa12a6c1300eca641e9446a0a4d17f8..d101695c482cfb288db70b1f268372d4f4323f65 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -131,6 +131,11 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool merged_stat; + const char * metric_expr; + const char * metric_name; + struct perf_evsel **metric_events; + bool collect_stat; }; union u64_swap { diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 4ef5184819a0975d592aecff5204d9d273494b4f..583f3a602506f29f198d7153b8ad8ee8073a6fd9 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -1,10 +1,13 @@ +#include #include #include #include #include "evsel.h" #include "callchain.h" #include "map.h" +#include "strlist.h" #include "symbol.h" +#include "srcline.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { @@ -166,6 +169,38 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (!print_oneline) printed += fprintf(fp, "\n"); + if (symbol_conf.inline_name && node->map) { + struct inline_node *inode; + + addr = map__rip_2objdump(node->map, node->ip), + inode = dso__parse_addr_inlines(node->map->dso, addr); + + if (inode) { + struct inline_list *ilist; + + list_for_each_entry(ilist, &inode->val, list) { + if (print_arrow) + printed += fprintf(fp, " <-"); + + /* IP is same, just skip it */ + if (print_ip) + printed += fprintf(fp, "%c%16s", + s, ""); + if (print_sym) + printed += fprintf(fp, " %s", + ilist->funcname); + if (print_srcline) + printed += fprintf(fp, "\n %s:%d", + ilist->filename, + ilist->line_nr); + if (!print_oneline) + printed += fprintf(fp, "\n"); + } + + inline_node__delete(inode); + } + } + if (symbol_conf.bt_stop_list && node->sym && strlist__has_entry(symbol_conf.bt_stop_list, diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h new file mode 100644 index 0000000000000000000000000000000000000000..9c2760a1a96ea8de6d1338e8b1a93b3696e770c8 --- /dev/null +++ b/tools/perf/util/expr.h @@ -0,0 +1,25 @@ +#ifndef PARSE_CTX_H +#define PARSE_CTX_H 1 + +#define EXPR_MAX_OTHER 8 +#define MAX_PARSE_ID EXPR_MAX_OTHER + +struct parse_id { + const char *name; + double val; +}; + +struct parse_ctx { + int num_ids; + struct parse_id ids[MAX_PARSE_ID]; +}; + +void expr__ctx_init(struct parse_ctx *ctx); +void expr__add_id(struct parse_ctx *ctx, const char *id, double val); +#ifndef IN_EXPR_Y +int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); +#endif +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_other); + +#endif diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y new file mode 100644 index 0000000000000000000000000000000000000000..954556bea36ef758ef31c5f7a1e723cc8fe50db0 --- /dev/null +++ b/tools/perf/util/expr.y @@ -0,0 +1,173 @@ +/* Simple expression parser */ +%{ +#include "util.h" +#include "util/debug.h" +#define IN_EXPR_Y 1 +#include "expr.h" +#include + +#define MAXIDLEN 256 +%} + +%pure-parser +%parse-param { double *final_val } +%parse-param { struct parse_ctx *ctx } +%parse-param { const char **pp } +%lex-param { const char **pp } + +%union { + double num; + char id[MAXIDLEN+1]; +} + +%token NUMBER +%token ID +%left '|' +%left '^' +%left '&' +%left '-' '+' +%left '*' '/' '%' +%left NEG NOT +%type expr + +%{ +static int expr__lex(YYSTYPE *res, const char **pp); + +static void expr__error(double *final_val __maybe_unused, + struct parse_ctx *ctx __maybe_unused, + const char **pp __maybe_unused, + const char *s) +{ + pr_debug("%s\n", s); +} + +static int lookup_id(struct parse_ctx *ctx, char *id, double *val) +{ + int i; + + for (i = 0; i < ctx->num_ids; i++) { + if (!strcasecmp(ctx->ids[i].name, id)) { + *val = ctx->ids[i].val; + return 0; + } + } + return -1; +} + +%} +%% + +all_expr: expr { *final_val = $1; } + ; + +expr: NUMBER + | ID { if (lookup_id(ctx, $1, &$$) < 0) { + pr_debug("%s not found", $1); + YYABORT; + } + } + | expr '+' expr { $$ = $1 + $3; } + | expr '-' expr { $$ = $1 - $3; } + | expr '*' expr { $$ = $1 * $3; } + | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; } + | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; } + | '-' expr %prec NEG { $$ = -$2; } + | '(' expr ')' { $$ = $2; } + ; + +%% + +static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) +{ + char *dst = res->id; + const char *s = p; + + while (isalnum(*p) || *p == '_' || *p == '.') { + if (p - s >= MAXIDLEN) + return -1; + *dst++ = *p++; + } + *dst = 0; + *pp = p; + return ID; +} + +static int expr__lex(YYSTYPE *res, const char **pp) +{ + int tok; + const char *s; + const char *p = *pp; + + while (isspace(*p)) + p++; + s = p; + switch (*p++) { + case 'a' ... 'z': + case 'A' ... 'Z': + return expr__symbol(res, p - 1, pp); + case '0' ... '9': case '.': + res->num = strtod(s, (char **)&p); + tok = NUMBER; + break; + default: + tok = *s; + break; + } + *pp = p; + return tok; +} + +/* Caller must make sure id is allocated */ +void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +{ + int idx; + assert(ctx->num_ids < MAX_PARSE_ID); + idx = ctx->num_ids++; + ctx->ids[idx].name = name; + ctx->ids[idx].val = val; +} + +void expr__ctx_init(struct parse_ctx *ctx) +{ + ctx->num_ids = 0; +} + +int expr__find_other(const char *p, const char *one, const char ***other, + int *num_otherp) +{ + const char *orig = p; + int err = -1; + int num_other; + + *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); + if (!*other) + return -1; + + num_other = 0; + for (;;) { + YYSTYPE val; + int tok = expr__lex(&val, &p); + if (tok == 0) { + err = 0; + break; + } + if (tok == ID && strcasecmp(one, val.id)) { + if (num_other >= EXPR_MAX_OTHER - 1) { + pr_debug("Too many extra events in %s\n", orig); + break; + } + (*other)[num_other] = strdup(val.id); + if (!(*other)[num_other]) + return -1; + num_other++; + } + } + (*other)[num_other] = NULL; + *num_otherp = num_other; + if (err) { + *num_otherp = 0; + free(*other); + *other = NULL; + } + return err; +} diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 05714d548584b30297892854469c584d280f3a8b..b5baff3007bbd477551cc2e62770e534d0919f33 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1,4 +1,8 @@ +#include +#include #include "util.h" +#include "string2.h" +#include #include #include #include @@ -7,11 +11,15 @@ #include #include #include +#include +#include #include +#include #include "evlist.h" #include "evsel.h" #include "header.h" +#include "memswap.h" #include "../perf.h" #include "trace-event.h" #include "session.h" @@ -26,6 +34,8 @@ #include #include "asm/bug.h" +#include "sane_ctype.h" + /* * magic2 = "PERFILE2" * must be a numerical value to let the endianness @@ -370,15 +380,11 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; - char proc[32]; u32 n; int i, ret; - /* - * actual atual path to perf binary - */ - sprintf(proc, "/proc/%d/exe", getpid()); - ret = readlink(proc, buf, sizeof(buf)); + /* actual path to perf binary */ + ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); if (ret <= 0) return -1; @@ -835,7 +841,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, /* * default get_cpuid(): nothing gets recorded - * actual implementation must be in arch/$(ARCH)/util/header.c + * actual implementation must be in arch/$(SRCARCH)/util/header.c */ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) { @@ -1463,8 +1469,16 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, cpumode)) - dso->kernel = dso_type; + if (dso_type != DSO_TYPE_USER) { + struct kmod_path m = { .name = NULL, }; + + if (!kmod_path__parse_name(&m, filename) && m.kmod) + dso__set_module_info(dso, &m, machine); + else + dso->kernel = dso_type; + + free(m.name); + } build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); @@ -2274,6 +2288,9 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) perf_header__process_sections(header, fd, &hd, perf_file_section__fprintf_info); + if (session->file->is_pipe) + return 0; + fprintf(fp, "# missing features: "); for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) { if (bit) diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index 2821f8d77e5208116cbd5832f9b9833d0764221e..1c88ad6425b8ce722a5fad4ebe0ee99509dc4462 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -1,21 +1,18 @@ #include "cache.h" #include "config.h" +#include #include #include #include "../builtin.h" #include "levenshtein.h" static int autocorrect; -static struct cmdnames aliases; static int perf_unknown_cmd_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "help.autocorrect")) autocorrect = perf_config_int(var,value); - /* Also use aliases for command lookup */ - if (!prefixcmp(var, "alias.")) - add_cmdname(&aliases, var + 6, strlen(var + 6)); return 0; } @@ -59,14 +56,12 @@ const char *help_unknown_cmd(const char *cmd) memset(&main_cmds, 0, sizeof(main_cmds)); memset(&other_cmds, 0, sizeof(main_cmds)); - memset(&aliases, 0, sizeof(aliases)); perf_config(perf_unknown_cmd_config, NULL); load_command_list("perf-", &main_cmds, &other_cmds); - if (add_cmd_list(&main_cmds, &aliases) < 0 || - add_cmd_list(&main_cmds, &other_cmds) < 0) { + if (add_cmd_list(&main_cmds, &other_cmds) < 0) { fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n"); goto end; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index eaf72a938fb423ed4ba46982c69324d2341839bc..cf0186a088c17f90c2357f1694af2241addf4eef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -3,12 +3,17 @@ #include "hist.h" #include "map.h" #include "session.h" +#include "namespaces.h" #include "sort.h" #include "evlist.h" #include "evsel.h" #include "annotate.h" +#include "srcline.h" +#include "thread.h" #include "ui/progress.h" +#include #include +#include static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); @@ -169,6 +174,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + hists__new_col_len(hists, HISTC_CGROUP_ID, 20); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); @@ -574,9 +580,14 @@ __hists__add_entry(struct hists *hists, bool sample_self, struct hist_entry_ops *ops) { + struct namespaces *ns = thread__namespaces(al->thread); struct hist_entry entry = { .thread = al->thread, .comm = thread__comm(al->thread), + .cgroup_id = { + .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0, + .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0, + }, .ms = { .map = al->map, .sym = al->sym, @@ -1129,6 +1140,11 @@ void hist_entry__delete(struct hist_entry *he) zfree(&he->mem_info); } + if (he->inline_node) { + inline_node__delete(he->inline_node); + he->inline_node = NULL; + } + zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) @@ -2447,7 +2463,7 @@ int parse_filter_percentage(const struct option *opt __maybe_unused, else if (!strcmp(arg, "absolute")) symbol_conf.filter_relative = false; else { - pr_debug("Invalud percentage: %s\n", arg); + pr_debug("Invalid percentage: %s\n", arg); return -1; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 28c216e3d5b72557825a8918ab2aa6d6369c40f8..ee3670a388df8f825fd6c79cb698e5fb6d7e5695 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -30,6 +30,7 @@ enum hist_column { HISTC_DSO, HISTC_THREAD, HISTC_COMM, + HISTC_CGROUP_ID, HISTC_PARENT, HISTC_CPU, HISTC_SOCKET, @@ -57,6 +58,7 @@ enum hist_column { HISTC_SRCLINE_FROM, HISTC_SRCLINE_TO, HISTC_TRACE, + HISTC_SYM_SIZE, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 6c2eb5da4afc0d290786881ebd9a7d9c5f83775f..b2834ac7b1f558fd8ecbf4b680cfa0d290ab5987 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -14,7 +14,9 @@ */ #include +#include #include +#include #include #include #include diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 4f3c758d875d6ce6855db7fa0731436f9bb06671..54818828023bf4ad0960165d26afce60fe7166d1 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -26,6 +26,7 @@ #include "insn.c" #include "intel-pt-insn-decoder.h" +#include "dump-insn.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small @@ -39,6 +40,8 @@ static void intel_pt_insn_decoder(struct insn *insn, enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; int ext; + intel_pt_insn->rel = 0; + if (insn_is_avx(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; @@ -177,6 +180,29 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } +const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, + u8 *inbuf, int inlen, int *lenp) +{ + struct insn insn; + int n, i; + int left; + + insn_init(&insn, inbuf, inlen, x->is64bit); + insn_get_length(&insn); + if (!insn_complete(&insn) || insn.length > inlen) + return ""; + if (lenp) + *lenp = insn.length; + left = sizeof(x->out); + n = snprintf(x->out, left, "insn: "); + left -= n; + for (i = 0; i < insn.length; i++) { + n += snprintf(x->out + n, left, "%02x ", inbuf[i]); + left -= n; + } + return x->out; +} + const char *branch_name[] = { [INTEL_PT_OP_OTHER] = "Other", [INTEL_PT_OP_CALL] = "Call", diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index da20cd5612e97f53853a3fdfb79502cdfa5d3a6b..4c7718f87a0890ee64e2f78e0b97291b67ecf33e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -13,6 +13,7 @@ * */ +#include #include #include #include @@ -22,6 +23,7 @@ #include "../perf.h" #include "session.h" #include "machine.h" +#include "memswap.h" #include "sort.h" #include "tool.h" #include "event.h" diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index c9a941ef0f6dde0c88da23a6d233d120646c3257..9084930e17571f890710a63dbdd885b9ee65a2aa 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -9,13 +10,13 @@ #include #include #include +#include #include "util.h" #include "event.h" #include "debug.h" #include "evlist.h" #include "symbol.h" -#include "strlist.h" #include #include "tsc.h" @@ -25,6 +26,8 @@ #include "genelf.h" #include "../builtin.h" +#include "sane_ctype.h" + struct jit_buf_desc { struct perf_data_file *output; struct perf_session *session; @@ -181,7 +184,7 @@ jit_open(struct jit_buf_desc *jd, const char *name) jd->use_arch_timestamp); if (header.version > JITHEADER_VERSION) { - pr_err("wrong jitdump version %u, expected " STR(JITHEADER_VERSION), + pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION), header.version); goto error; } diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 824356488ce6a8c7d9ecda1748456b262f0a7114..c6a15f204c03827bec7fa4cfdf879d8d9d5b89fc 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -12,6 +12,7 @@ #include "llvm-utils.h" #include "config.h" #include "util.h" +#include #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 9ddea5cecd94b57f9d177f94f447d89f4716a5a8..4ca7c5c6cdcd0a9e2aeeefad5cd08c9fe9020c8f 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -1,6 +1,8 @@ +#include #include #include #include +#include "compress.h" #include "util.h" #include "debug.h" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 71c9720d49730066dc1100defee124a4d3c198b9..d7f31cb0a4cbeb41e6c02d58323a8848ff82cfc4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,3 +1,7 @@ +#include +#include +#include +#include #include "callchain.h" #include "debug.h" #include "event.h" @@ -10,9 +14,15 @@ #include "thread.h" #include "vdso.h" #include -#include +#include +#include +#include #include "unwind.h" #include "linux/hash.h" +#include "asm/bug.h" + +#include "sane_ctype.h" +#include static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); @@ -501,6 +511,37 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event return err; } +int machine__process_namespaces_event(struct machine *machine __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct thread *thread = machine__findnew_thread(machine, + event->namespaces.pid, + event->namespaces.tid); + int err = 0; + + WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES, + "\nWARNING: kernel seems to support more namespaces than perf" + " tool.\nTry updating the perf tool..\n\n"); + + WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES, + "\nWARNING: perf tool seems to support more namespaces than" + " the kernel.\nTry updating the kernel..\n\n"); + + if (dump_trace) + perf_event__fprintf_namespaces(event, stdout); + + if (thread == NULL || + thread__set_namespaces(thread, sample->time, &event->namespaces)) { + dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n"); + err = -1; + } + + thread__put(thread); + + return err; +} + int machine__process_lost_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { @@ -531,16 +572,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, if (dso == NULL) goto out_unlock; - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; - - /* _KMODULE_COMP should be next to _KMODULE */ - if (m->kmod && m->comp) - dso->symtab_type++; - - dso__set_short_name(dso, strdup(m->name), true); + dso__set_module_info(dso, m, machine); dso__set_long_name(dso, strdup(filename), true); } @@ -755,11 +787,11 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; * Returns the name of the start symbol in *symbol_name. Pass in NULL as * symbol_name if it's not that important. */ -static u64 machine__get_running_kernel_start(struct machine *machine, - const char **symbol_name) +static int machine__get_running_kernel_start(struct machine *machine, + const char **symbol_name, u64 *start) { char filename[PATH_MAX]; - int i; + int i, err = -1; const char *name; u64 addr = 0; @@ -769,21 +801,28 @@ static u64 machine__get_running_kernel_start(struct machine *machine, return 0; for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { - addr = kallsyms__get_function_start(filename, name); - if (addr) + err = kallsyms__get_function_start(filename, name, &addr); + if (!err) break; } + if (err) + return -1; + if (symbol_name) *symbol_name = name; - return addr; + *start = addr; + return 0; } int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; - u64 start = machine__get_running_kernel_start(machine, NULL); + u64 start = 0; + + if (machine__get_running_kernel_start(machine, NULL, &start)) + return -1; /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); @@ -1144,8 +1183,8 @@ static int machine__create_modules(struct machine *machine) int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); - const char *name; - u64 addr; + const char *name = NULL; + u64 addr = 0; int ret; if (kernel == NULL) @@ -1170,8 +1209,7 @@ int machine__create_kernel_maps(struct machine *machine) */ map_groups__fixup_end(&machine->kmaps); - addr = machine__get_running_kernel_start(machine, &name); - if (!addr) { + if (machine__get_running_kernel_start(machine, &name, &addr)) { } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { machine__destroy_kernel_maps(machine); return -1; @@ -1439,7 +1477,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, if (machine->last_match == th) machine->last_match = NULL; - BUG_ON(atomic_read(&th->refcnt) == 0); + BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) pthread_rwlock_wrlock(&machine->threads_lock); rb_erase_init(&th->rb_node, &machine->threads); @@ -1538,6 +1576,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_comm_event(machine, event, sample); break; case PERF_RECORD_MMAP: ret = machine__process_mmap_event(machine, event, sample); break; + case PERF_RECORD_NAMESPACES: + ret = machine__process_namespaces_event(machine, event, sample); break; case PERF_RECORD_MMAP2: ret = machine__process_mmap2_event(machine, event, sample); break; case PERF_RECORD_FORK: diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a28305029711c08f56c89385c85fa49611e55aa3..3cdb1340f9170ad388e827b218fae3cd541502fc 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -97,6 +97,9 @@ int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); +int machine__process_namespaces_event(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 0a943e7b1ea7fc745485f016875395e91feca8c8..2179b2deb7307c749fb171e48c784e4bc158b028 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -9,13 +9,13 @@ #include /* To get things like MAP_HUGETLB even on older libc headers */ #include "map.h" #include "thread.h" -#include "strlist.h" #include "vdso.h" #include "build-id.h" #include "util.h" #include "debug.h" #include "machine.h" #include +#include "srcline.h" #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); @@ -141,7 +141,7 @@ void map__init(struct map *map, enum map_type type, RB_CLEAR_NODE(&map->rb_node); map->groups = NULL; map->erange_warned = false; - atomic_set(&map->refcnt, 1); + refcount_set(&map->refcnt, 1); } struct map *map__new(struct machine *machine, u64 start, u64 len, @@ -255,7 +255,7 @@ void map__delete(struct map *map) void map__put(struct map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) map__delete(map); } @@ -325,11 +325,6 @@ int map__load(struct map *map) return 0; } -int __weak arch__compare_symbol_names(const char *namea, const char *nameb) -{ - return strcmp(namea, nameb); -} - struct symbol *map__find_symbol(struct map *map, u64 addr) { if (map__load(map) < 0) @@ -354,7 +349,7 @@ struct map *map__clone(struct map *from) struct map *map = memdup(from, sizeof(*map)); if (map != NULL) { - atomic_set(&map->refcnt, 1); + refcount_set(&map->refcnt, 1); RB_CLEAR_NODE(&map->rb_node); dso__get(map->dso); map->groups = NULL; @@ -405,7 +400,8 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr), NULL, true); + map__rip_2objdump(map, addr), NULL, + true, true); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); @@ -485,7 +481,7 @@ void map_groups__init(struct map_groups *mg, struct machine *machine) maps__init(&mg->maps[i]); } mg->machine = machine; - atomic_set(&mg->refcnt, 1); + refcount_set(&mg->refcnt, 1); } static void __maps__purge(struct maps *maps) @@ -547,7 +543,7 @@ void map_groups__delete(struct map_groups *mg) void map_groups__put(struct map_groups *mg) { - if (mg && atomic_dec_and_test(&mg->refcnt)) + if (mg && refcount_dec_and_test(&mg->refcnt)) map_groups__delete(mg); } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index abdacf800c98a78148565f23b7d9b4c916cbacaf..f9e8ac8a52cde5fefb74cb87464c1346e3d08431 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -1,7 +1,7 @@ #ifndef __PERF_MAP_H #define __PERF_MAP_H -#include +#include #include #include #include @@ -51,7 +51,7 @@ struct map { struct dso *dso; struct map_groups *groups; - atomic_t refcnt; + refcount_t refcnt; }; struct kmap { @@ -67,7 +67,7 @@ struct maps { struct map_groups { struct maps maps[MAP__NR_TYPES]; struct machine *machine; - atomic_t refcnt; + refcount_t refcnt; }; struct map_groups *map_groups__new(struct machine *machine); @@ -77,7 +77,7 @@ bool map_groups__empty(struct map_groups *mg); static inline struct map_groups *map_groups__get(struct map_groups *mg) { if (mg) - atomic_inc(&mg->refcnt); + refcount_inc(&mg->refcnt); return mg; } @@ -130,13 +130,14 @@ struct thread; */ #define __map__for_each_symbol_by_name(map, sym_name, pos) \ for (pos = map__find_symbol_by_name(map, sym_name); \ - pos && arch__compare_symbol_names(pos->name, sym_name) == 0; \ + pos && \ + !symbol__match_symbol_name(pos->name, sym_name, \ + SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \ pos = symbol__next_by_name(pos)) #define map__for_each_symbol_by_name(map, sym_name, pos) \ __map__for_each_symbol_by_name(map, sym_name, (pos)) -int arch__compare_symbol_names(const char *namea, const char *nameb); void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct machine *machine, u64 start, u64 len, @@ -150,7 +151,7 @@ struct map *map__clone(struct map *map); static inline struct map *map__get(struct map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 1d4ab53c60cae7b8075dd1bc3a0f1f37a3d9f08a..06f5a3a4295c52cac138ac2526afdac04ffbf0dc 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "mem-events.h" #include "debug.h" #include "symbol.h" @@ -205,8 +206,8 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) static const char * const snoop_access[] = { "N/A", "None", - "Miss", "Hit", + "Miss", "HitM", }; diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c new file mode 100644 index 0000000000000000000000000000000000000000..55f7faa8d9ec0696476c085adf96f20431a77350 --- /dev/null +++ b/tools/perf/util/memswap.c @@ -0,0 +1,24 @@ +#include +#include "memswap.h" +#include + +void mem_bswap_32(void *src, int byte_size) +{ + u32 *m = src; + while (byte_size > 0) { + *m = bswap_32(*m); + byte_size -= sizeof(u32); + ++m; + } +} + +void mem_bswap_64(void *src, int byte_size) +{ + u64 *m = src; + + while (byte_size > 0) { + *m = bswap_64(*m); + byte_size -= sizeof(u64); + ++m; + } +} diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h new file mode 100644 index 0000000000000000000000000000000000000000..7d1b1c34bb570265715774f8b09e152449f4decf --- /dev/null +++ b/tools/perf/util/memswap.h @@ -0,0 +1,7 @@ +#ifndef PERF_MEMSWAP_H_ +#define PERF_MEMSWAP_H_ + +void mem_bswap_64(void *src, int byte_size); +void mem_bswap_32(void *src, int byte_size); + +#endif /* PERF_MEMSWAP_H_ */ diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c new file mode 100644 index 0000000000000000000000000000000000000000..67dcbcc73c7dcdd95da8af4d45e3257d38123147 --- /dev/null +++ b/tools/perf/util/namespaces.c @@ -0,0 +1,37 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2017 Hari Bathini, IBM Corporation + */ + +#include "namespaces.h" +#include "util.h" +#include "event.h" +#include +#include +#include + +struct namespaces *namespaces__new(struct namespaces_event *event) +{ + struct namespaces *namespaces; + u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) * + sizeof(struct perf_ns_link_info)); + + namespaces = zalloc(sizeof(struct namespaces) + link_info_size); + if (!namespaces) + return NULL; + + namespaces->end_time = -1; + + if (event) + memcpy(namespaces->link_info, event->link_info, link_info_size); + + return namespaces; +} + +void namespaces__free(struct namespaces *namespaces) +{ + free(namespaces); +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h new file mode 100644 index 0000000000000000000000000000000000000000..468f1e9a1484c0a8e7c20213850990955732cd43 --- /dev/null +++ b/tools/perf/util/namespaces.h @@ -0,0 +1,26 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2017 Hari Bathini, IBM Corporation + */ + +#ifndef __PERF_NAMESPACES_H +#define __PERF_NAMESPACES_H + +#include "../perf.h" +#include + +struct namespaces_event; + +struct namespaces { + struct list_head list; + u64 end_time; + struct perf_ns_link_info link_info[]; +}; + +struct namespaces *namespaces__new(struct namespaces_event *event); +void namespaces__free(struct namespaces *namespaces); + +#endif /* __PERF_NAMESPACES_H */ diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fe84df1875aa9e231d63d56ba3417f242ea4e3b7..4de398cfb577177af2a2bff7a4212a5f569b1ee5 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -79,7 +81,7 @@ static union perf_event *dup_event(struct ordered_events *oe, static void free_dup_event(struct ordered_events *oe, union perf_event *event) { - if (oe->copy_on_queue) { + if (event && oe->copy_on_queue) { oe->cur_alloc_size -= event->header.size; free(event); } @@ -150,6 +152,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve list_move(&event->list, &oe->cache); oe->nr_events--; free_dup_event(oe, event->event); + event->event = NULL; } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 67a8aebc67ab492a9936ad56ae9655526f9d9781..01e779b91c8ef618984fcdb8d68d526b1870828a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,13 +1,18 @@ #include #include -#include "util.h" +#include +#include +#include +#include +#include "term.h" #include "../perf.h" #include "evlist.h" #include "evsel.h" #include #include "parse-events.h" #include -#include "string.h" +#include "string2.h" +#include "strlist.h" #include "symbol.h" #include "cache.h" #include "header.h" @@ -316,8 +321,9 @@ __add_event(struct list_head *list, int *idx, return NULL; (*idx)++; - evsel->cpus = cpu_map__get(cpus); - evsel->own_cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); + evsel->system_wide = !!cpus; if (name) evsel->name = strdup(name); @@ -1254,11 +1260,59 @@ int parse_events_add_pmu(struct parse_events_evlist *data, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->metric_expr = info.metric_expr; + evsel->metric_name = info.metric_name; } return evsel ? 0 : -ENOMEM; } +int parse_events_multi_pmu_add(struct parse_events_evlist *data, + char *str, struct list_head **listp) +{ + struct list_head *head; + struct parse_events_term *term; + struct list_head *list; + struct perf_pmu *pmu = NULL; + int ok = 0; + + *listp = NULL; + /* Add it for all PMUs that support the alias */ + list = malloc(sizeof(struct list_head)); + if (!list) + return -1; + INIT_LIST_HEAD(list); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + struct perf_pmu_alias *alias; + + list_for_each_entry(alias, &pmu->aliases, list) { + if (!strcasecmp(alias->name, str)) { + head = malloc(sizeof(struct list_head)); + if (!head) + return -1; + INIT_LIST_HEAD(head); + if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + str, 1, false, &str, NULL) < 0) + return -1; + list_add_tail(&term->list, head); + + if (!parse_events_add_pmu(data, list, + pmu->name, head)) { + pr_debug("%s -> %s/%s/\n", str, + pmu->name, alias->str); + ok++; + } + + parse_events_terms__delete(head); + } + } + } + if (!ok) + return -1; + *listp = list; + return 0; +} + int parse_events__modifier_group(struct list_head *list, char *event_mod) { @@ -2276,7 +2330,7 @@ void print_symbol_events(const char *event_glob, unsigned type, * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc) + bool long_desc, bool details_flag) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -2286,7 +2340,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); - print_pmu_events(event_glob, name_only, quiet_flag, long_desc); + print_pmu_events(event_glob, name_only, quiet_flag, long_desc, + details_flag); if (event_glob != NULL) return; @@ -2415,6 +2470,31 @@ int parse_events_term__clone(struct parse_events_term **new, return new_term(new, &temp, term->val.str, term->val.num); } +int parse_events_copy_term_list(struct list_head *old, + struct list_head **new) +{ + struct parse_events_term *term, *n; + int ret; + + if (!old) { + *new = NULL; + return 0; + } + + *new = malloc(sizeof(struct list_head)); + if (!*new) + return -ENOMEM; + INIT_LIST_HEAD(*new); + + list_for_each_entry (term, old, list) { + ret = parse_events_term__clone(&n, term); + if (ret) + return ret; + list_add_tail(&n->list, *new); + } + return 0; +} + void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1af6a267c21bfd3a437e3bed1f2122f96c4dced7..a235f4d6d5e514f60055db58ce5ebb0b8e723d0b 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -8,6 +8,7 @@ #include #include #include +#include struct list_head; struct perf_evsel; @@ -166,6 +167,14 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, int parse_events_add_pmu(struct parse_events_evlist *data, struct list_head *list, char *name, struct list_head *head_config); + +int parse_events_multi_pmu_add(struct parse_events_evlist *data, + char *str, + struct list_head **listp); + +int parse_events_copy_term_list(struct list_head *old, + struct list_head **new); + enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); @@ -175,7 +184,7 @@ void parse_events_evlist_error(struct parse_events_evlist *data, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc); + bool long_desc, bool details_flag); struct event_symbol { const char *symbol; @@ -196,4 +205,23 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +#ifdef HAVE_LIBELF_SUPPORT +/* + * If the probe point starts with '%', + * or starts with "sdt_" and has a ':' but no '=', + * then it should be a SDT/cached probe point. + */ +static inline bool is_sdt_event(char *str) +{ + return (str[0] == '%' || + (!strncmp(str, "sdt_", 4) && + !!strchr(str, ':') && !strchr(str, '='))); +} +#else +static inline bool is_sdt_event(char *str __maybe_unused) +{ + return false; +} +#endif /* HAVE_LIBELF_SUPPORT */ + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 30f018ea137096c22d6798e4bcd7723373db26af..04fd8c9af9f9578c0f8f44a3c8cb05cf087d992f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -226,68 +226,55 @@ event_pmu: PE_NAME opt_event_config { struct parse_events_evlist *data = _data; - struct list_head *list; + struct list_head *list, *orig_terms, *terms; + + if (parse_events_copy_term_list($2, &orig_terms)) + YYABORT; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + if (parse_events_add_pmu(data, list, $1, $2)) { + struct perf_pmu *pmu = NULL; + int ok = 0; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + char *name = pmu->name; + + if (!strncmp(name, "uncore_", 7) && + strncmp($1, "uncore_", 7)) + name += 7; + if (!strncmp($1, name, strlen($1))) { + if (parse_events_copy_term_list(orig_terms, &terms)) + YYABORT; + if (!parse_events_add_pmu(data, list, pmu->name, terms)) + ok++; + parse_events_terms__delete(terms); + } + } + if (!ok) + YYABORT; + } parse_events_terms__delete($2); + parse_events_terms__delete(orig_terms); $$ = list; } | PE_KERNEL_PMU_EVENT sep_dc { - struct parse_events_evlist *data = _data; - struct list_head *head; - struct parse_events_term *term; struct list_head *list; - struct perf_pmu *pmu = NULL; - int ok = 0; - - /* Add it for all PMUs that support the alias */ - ALLOC_LIST(list); - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - struct perf_pmu_alias *alias; - - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, $1)) { - ALLOC_LIST(head); - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1, false, &@1, NULL)); - list_add_tail(&term->list, head); - - if (!parse_events_add_pmu(data, list, - pmu->name, head)) { - pr_debug("%s -> %s/%s/\n", $1, - pmu->name, alias->str); - ok++; - } - parse_events_terms__delete(head); - } - } - } - if (!ok) + if (parse_events_multi_pmu_add(_data, $1, &list) < 0) YYABORT; $$ = list; } | PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc { - struct parse_events_evlist *data = _data; - struct list_head *head; - struct parse_events_term *term; struct list_head *list; char pmu_name[128]; - snprintf(&pmu_name, 128, "%s-%s", $1, $3); - ALLOC_LIST(head); - ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - &pmu_name, 1, false, &@1, NULL)); - list_add_tail(&term->list, head); - - ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events_terms__delete(head); + snprintf(&pmu_name, 128, "%s-%s", $1, $3); + if (parse_events_multi_pmu_add(_data, pmu_name, &list) < 0) + YYABORT; $$ = list; } diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 7c7630be5a897d651bbda7eaf2e0c4addcf95a03..50ec3bc87a60442570d6e5a5d27acffc4e06ba26 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -11,8 +11,13 @@ * which is what it's designed for. */ #include "cache.h" -#include "util.h" +#include "path.h" +#include #include +#include +#include +#include +#include static char bad_path[] = "/bad-path/"; /* @@ -50,3 +55,24 @@ char *mkpath(const char *fmt, ...) return bad_path; return cleanup_path(pathname); } + +int path__join(char *bf, size_t size, const char *path1, const char *path2) +{ + return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2); +} + +int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3) +{ + return scnprintf(bf, size, "%s%s%s%s%s", path1, path1[0] ? "/" : "", + path2, path2[0] ? "/" : "", path3); +} + +bool is_regular_file(const char *file) +{ + struct stat st; + + if (stat(file, &st)) + return false; + + return S_ISREG(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h new file mode 100644 index 0000000000000000000000000000000000000000..9a276a58e3c2c8a789e43a24910596597048914f --- /dev/null +++ b/tools/perf/util/path.h @@ -0,0 +1,9 @@ +#ifndef _PERF_PATH_H +#define _PERF_PATH_H + +int path__join(char *bf, size_t size, const char *path1, const char *path2); +int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); + +bool is_regular_file(const char *file); + +#endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c index cb368306b12b7e023f1a2f3eb501529699f2fde6..d55092964da2f3fec32b61f6ef7906093d0945fe 100644 --- a/tools/perf/util/perf-hooks.c +++ b/tools/perf/util/perf-hooks.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "util/util.h" #include "util/debug.h" #include "util/perf-hooks.h" diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index c4023f22f287dd7fb6cdf4a395af2f0d631665fb..b2ae039eff85c4c7166fecaa713704edf952956c 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -6,6 +6,12 @@ const struct sample_reg __weak sample_reg_masks[] = { SMPL_REG_END }; +int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, + char **new_op __maybe_unused) +{ + return SDT_ARG_SKIP; +} + #ifdef HAVE_PERF_REGS_SUPPORT int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 679d6e493962267f7b219b88c05d06c61ef1d2ea..32b37d19dcc34b4bffffcdb0f755d4ea14cab55f 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,6 +15,13 @@ struct sample_reg { extern const struct sample_reg sample_reg_masks[]; +enum { + SDT_ARG_VALID = 0, + SDT_ARG_SKIP, +}; + +int arch_sdt_arg_parse_op(char *old_op, char **new_op); + #ifdef HAVE_PERF_REGS_SUPPORT #include diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 12f84dd2ac5dfddc9f7e124fa25b5da4bb2be021..ac16a9db1fb566ca14d272e33bc34e5166ea2b81 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include #include #include @@ -15,6 +17,7 @@ #include "header.h" #include "pmu-events/pmu-events.h" #include "cache.h" +#include "string2.h" struct perf_pmu_format { char *name; @@ -231,7 +234,9 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, - char *unit, char *perpkg) + char *unit, char *perpkg, + char *metric_expr, + char *metric_name) { struct perf_pmu_alias *alias; int ret; @@ -265,6 +270,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } + alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL; + alias->metric_name = metric_name ? strdup(metric_name): NULL; alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; @@ -294,7 +301,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, - NULL); + NULL, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -564,7 +571,9 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, (char *)pe->long_desc, (char *)pe->topic, - (char *)pe->unit, (char *)pe->perpkg); + (char *)pe->unit, (char *)pe->perpkg, + (char *)pe->metric_expr, + (char *)pe->metric_name); } out: @@ -991,6 +1000,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->metric_expr = NULL; + info->metric_name = NULL; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -1006,6 +1017,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (alias->per_pkg) info->per_pkg = true; + info->metric_expr = alias->metric_expr; + info->metric_name = alias->metric_name; list_del(&term->list); free(term); @@ -1100,6 +1113,8 @@ struct sevent { char *topic; char *str; char *pmu; + char *metric_expr; + char *metric_name; }; static int cmp_sevent(const void *a, const void *b) @@ -1136,13 +1151,12 @@ static void wordwrap(char *s, int start, int max, int corr) break; s += wlen; column += n; - while (isspace(*s)) - s++; + s = ltrim(s); } } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc) + bool long_desc, bool details_flag) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1198,6 +1212,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, aliases[j].topic = alias->topic; aliases[j].str = alias->str; aliases[j].pmu = pmu->name; + aliases[j].metric_expr = alias->metric_expr; + aliases[j].metric_name = alias->metric_name; j++; } if (pmu->selectable && @@ -1232,8 +1248,14 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, printf("%*s", 8, "["); wordwrap(aliases[j].desc, 8, columns, 0); printf("]\n"); - if (verbose > 0) - printf("%*s%s/%s/\n", 8, "", aliases[j].pmu, aliases[j].str); + if (details_flag) { + printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); + if (aliases[j].metric_name) + printf(" MetricName: %s", aliases[j].metric_name); + if (aliases[j].metric_expr) + printf(" MetricExpr: %s", aliases[j].metric_expr); + putchar('\n'); + } } else printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 00852ddc7741b068e212adf2eb3e5e2b0c56c872..ea7f450dc60928ab46e2288cf3acbdd35b31af38 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -31,6 +31,8 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; + const char *metric_expr; + const char *metric_name; double scale; bool per_pkg; bool snapshot; @@ -50,6 +52,8 @@ struct perf_pmu_alias { double scale; bool per_pkg; bool snapshot; + char *metric_expr; + char *metric_name; }; struct perf_pmu *perf_pmu__find(const char *name); @@ -76,7 +80,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc); + bool long_desc, bool details_flag); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c new file mode 100644 index 0000000000000000000000000000000000000000..e908177b997624e0ee2b45be5c3dffebbbea331c --- /dev/null +++ b/tools/perf/util/print_binary.c @@ -0,0 +1,55 @@ +#include "print_binary.h" +#include +#include "sane_ctype.h" + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra) +{ + size_t i, j, mask; + + if (!printer) + return; + + bytes_per_line = roundup_pow_of_two(bytes_per_line); + mask = bytes_per_line - 1; + + printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + for (i = 0; i < len; i++) { + if ((i & mask) == 0) { + printer(BINARY_PRINT_LINE_BEGIN, -1, extra); + printer(BINARY_PRINT_ADDR, i, extra); + } + + printer(BINARY_PRINT_NUM_DATA, data[i], extra); + + if (((i & mask) == mask) || i == len - 1) { + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_NUM_PAD, -1, extra); + + printer(BINARY_PRINT_SEP, i, extra); + for (j = i & ~mask; j <= i; j++) + printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_CHAR_PAD, i, extra); + printer(BINARY_PRINT_LINE_END, -1, extra); + } + } + printer(BINARY_PRINT_DATA_END, -1, extra); +} + +int is_printable_array(char *p, unsigned int len) +{ + unsigned int i; + + if (!p || !len || p[len - 1] != 0) + return 0; + + len--; + + for (i = 0; i < len; i++) { + if (!isprint(p[i]) && !isspace(p[i])) + return 0; + } + return 1; +} diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h new file mode 100644 index 0000000000000000000000000000000000000000..da0427263d2d62a4eea5204b9034cca3811fe0a7 --- /dev/null +++ b/tools/perf/util/print_binary.h @@ -0,0 +1,28 @@ +#ifndef PERF_PRINT_BINARY_H +#define PERF_PRINT_BINARY_H + +#include + +enum binary_printer_ops { + BINARY_PRINT_DATA_BEGIN, + BINARY_PRINT_LINE_BEGIN, + BINARY_PRINT_ADDR, + BINARY_PRINT_NUM_DATA, + BINARY_PRINT_NUM_PAD, + BINARY_PRINT_SEP, + BINARY_PRINT_CHAR_DATA, + BINARY_PRINT_CHAR_PAD, + BINARY_PRINT_LINE_END, + BINARY_PRINT_DATA_END, +}; + +typedef void (*print_binary_t)(enum binary_printer_ops op, + unsigned int val, void *extra); + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra); + +int is_printable_array(char *p, unsigned int len); + +#endif /* PERF_PRINT_BINARY_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28fb62c32678483cd6d54a9c88e620022e1f9d08..84e7e698411e6a80a39050514227cbb90556ca13 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include @@ -35,6 +36,7 @@ #include "util.h" #include "event.h" #include "strlist.h" +#include "strfilter.h" #include "debug.h" #include "cache.h" #include "color.h" @@ -46,8 +48,10 @@ #include "probe-finder.h" #include "probe-file.h" #include "session.h" +#include "string2.h" + +#include "sane_ctype.h" -#define MAX_CMDLEN 256 #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ @@ -757,7 +761,9 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - if (!tevs[i].point.address || tevs[i].point.retprobe) + if (!tevs[i].point.address) + continue; + if (tevs[i].point.retprobe && !kretprobe_offset_is_supported()) continue; /* If we found a wrong one, mark it by NULL symbol */ if (kprobe_warn_out_range(tevs[i].point.symbol, @@ -1339,14 +1345,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (!arg) return -EINVAL; - /* - * If the probe point starts with '%', - * or starts with "sdt_" and has a ':' but no '=', - * then it should be a SDT/cached probe point. - */ - if (arg[0] == '%' || - (!strncmp(arg, "sdt_", 4) && - !!strchr(arg, ':') && !strchr(arg, '='))) { + if (is_sdt_event(arg)) { pev->sdt = true; if (arg[0] == '%') arg++; @@ -1528,11 +1527,6 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -EINVAL; } - if (pp->retprobe && !pp->function) { - semantic_error("Return probe requires an entry function.\n"); - return -EINVAL; - } - if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) { semantic_error("Offset/Line/Lazy pattern can't be used with " "return probe.\n"); @@ -2841,7 +2835,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } /* Note that the symbols in the kmodule are not relocated */ - if (!pev->uprobes && !pp->retprobe && !pev->target) { + if (!pev->uprobes && !pev->target && + (!pp->retprobe || kretprobe_offset_is_supported())) { reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); @@ -3057,7 +3052,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, struct probe_trace_event *new_tevs; int ret = 0; - if (ntevs == 0) { + if (*ntevs == 0) { *tevs = *tevs2; *ntevs = ntevs2; *tevs2 = NULL; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 5d4e940614024ddf9a5d0420b8bdfa51ee07cd38..373842656fb6444f7ffdbe1aefc2f4a4add50818 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -3,8 +3,6 @@ #include #include "intlist.h" -#include "strlist.h" -#include "strfilter.h" /* Probe related configurations */ struct probe_conf { @@ -107,6 +105,8 @@ struct line_range { struct intlist *line_list; /* Visible lines */ }; +struct strlist; + /* List of variables */ struct variable_list { struct probe_trace_point point; /* Actual probepoint */ @@ -153,6 +153,9 @@ int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); + +struct strfilter; + int del_perf_probe_events(struct strfilter *filter); int show_perf_probe_event(const char *group, const char *event, diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 436b64731f65112f3c624a70a077f7370532da5a..d679389e627c1800c58b6b632ef65e67ff49d771 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -14,10 +14,15 @@ * GNU General Public License for more details. * */ +#include +#include +#include #include +#include #include "util.h" #include "event.h" #include "strlist.h" +#include "strfilter.h" #include "debug.h" #include "cache.h" #include "color.h" @@ -27,8 +32,11 @@ #include "probe-event.h" #include "probe-file.h" #include "session.h" +#include "perf_regs.h" +#include "string2.h" -#define MAX_CMDLEN 256 +/* 4096 - 2 ('\n' + '\0') */ +#define MAX_CMDLEN 4094 static void print_open_warning(int err, bool uprobe) { @@ -70,7 +78,7 @@ static void print_both_open_warning(int kerr, int uerr) } } -static int open_probe_events(const char *trace_file, bool readwrite) +int open_trace_file(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; int ret; @@ -92,12 +100,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("kprobe_events", readwrite); + return open_trace_file("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("uprobe_events", readwrite); + return open_trace_file("uprobe_events", readwrite); } int probe_file__open(int flag) @@ -687,6 +695,110 @@ static unsigned long long sdt_note__get_addr(struct sdt_note *note) : (unsigned long long)note->addr.a64[0]; } +static const char * const type_to_suffix[] = { + ":s64", "", "", "", ":s32", "", ":s16", ":s8", + "", ":u8", ":u16", "", ":u32", "", "", "", ":u64" +}; + +/* + * Isolate the string number and convert it into a decimal value; + * this will be an index to get suffix of the uprobe name (defining + * the type) + */ +static int sdt_arg_parse_size(char *n_ptr, const char **suffix) +{ + long type_idx; + + type_idx = strtol(n_ptr, NULL, 10); + if (type_idx < -8 || type_idx > 8) { + pr_debug4("Failed to get a valid sdt type\n"); + return -1; + } + + *suffix = type_to_suffix[type_idx + 8]; + return 0; +} + +static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg) +{ + char *op, *desc = strdup(arg), *new_op = NULL; + const char *suffix = ""; + int ret = -1; + + if (desc == NULL) { + pr_debug4("Allocation error\n"); + return ret; + } + + /* + * Argument is in N@OP format. N is size of the argument and OP is + * the actual assembly operand. N can be omitted; in that case + * argument is just OP(without @). + */ + op = strchr(desc, '@'); + if (op) { + op[0] = '\0'; + op++; + + if (sdt_arg_parse_size(desc, &suffix)) + goto error; + } else { + op = desc; + } + + ret = arch_sdt_arg_parse_op(op, &new_op); + + if (ret < 0) + goto error; + + if (ret == SDT_ARG_VALID) { + ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix); + if (ret < 0) + goto error; + } + + ret = 0; +error: + free(desc); + free(new_op); + return ret; +} + +static char *synthesize_sdt_probe_command(struct sdt_note *note, + const char *pathname, + const char *sdtgrp) +{ + struct strbuf buf; + char *ret = NULL, **args; + int i, args_count; + + if (strbuf_init(&buf, 32) < 0) + return NULL; + + if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx", + sdtgrp, note->name, pathname, + sdt_note__get_addr(note)) < 0) + goto error; + + if (!note->args) + goto out; + + if (note->args) { + args = argv_split(note->args, &args_count); + + for (i = 0; i < args_count; ++i) { + if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) + goto error; + } + } + +out: + ret = strbuf_detach(&buf, NULL); +error: + strbuf_release(&buf); + return ret; +} + int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) { struct probe_cache_entry *entry = NULL; @@ -723,11 +835,12 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) entry->pev.group = strdup(sdtgrp); list_add_tail(&entry->node, &pcache->entries); } - ret = asprintf(&buf, "p:%s/%s %s:0x%llx", - sdtgrp, note->name, pathname, - sdt_note__get_addr(note)); - if (ret < 0) + buf = synthesize_sdt_probe_command(note, pathname, sdtgrp); + if (!buf) { + ret = -ENOMEM; break; + } + strlist__add(entry->tevlist, buf); free(buf); entry = NULL; @@ -877,59 +990,72 @@ int probe_cache__show_all_caches(struct strfilter *filter) return 0; } +enum ftrace_readme { + FTRACE_README_PROBE_TYPE_X = 0, + FTRACE_README_KRETPROBE_OFFSET, + FTRACE_README_END, +}; + static struct { const char *pattern; - bool avail; - bool checked; -} probe_type_table[] = { -#define DEFINE_TYPE(idx, pat, def_avail) \ - [idx] = {.pattern = pat, .avail = (def_avail)} - DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true), - DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true), - DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false), - DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true), - DEFINE_TYPE(PROBE_TYPE_BITFIELD, - "* b@/", true), + bool avail; +} ftrace_readme_table[] = { +#define DEFINE_TYPE(idx, pat) \ + [idx] = {.pattern = pat, .avail = false} + DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), + DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), }; -bool probe_type_is_available(enum probe_type type) +static bool scan_ftrace_readme(enum ftrace_readme type) { + int fd; FILE *fp; char *buf = NULL; size_t len = 0; - bool target_line = false; - bool ret = probe_type_table[type].avail; + bool ret = false; + static bool scanned = false; - if (type >= PROBE_TYPE_END) - return false; - /* We don't have to check the type which supported by default */ - if (ret || probe_type_table[type].checked) - return ret; + if (scanned) + goto result; - if (asprintf(&buf, "%s/README", tracing_path) < 0) + fd = open_trace_file("README", false); + if (fd < 0) return ret; - fp = fopen(buf, "r"); - if (!fp) - goto end; - - zfree(&buf); - while (getline(&buf, &len, fp) > 0 && !ret) { - if (!target_line) { - target_line = !!strstr(buf, " type: "); - if (!target_line) - continue; - } else if (strstr(buf, "\t ") != buf) - break; - ret = strglobmatch(buf, probe_type_table[type].pattern); + fp = fdopen(fd, "r"); + if (!fp) { + close(fd); + return ret; } - /* Cache the result */ - probe_type_table[type].checked = true; - probe_type_table[type].avail = ret; + + while (getline(&buf, &len, fp) > 0) + for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++) + if (!ftrace_readme_table[i].avail) + ftrace_readme_table[i].avail = + strglobmatch(buf, ftrace_readme_table[i].pattern); + scanned = true; fclose(fp); -end: free(buf); - return ret; +result: + if (type >= FTRACE_README_END) + return false; + + return ftrace_readme_table[type].avail; +} + +bool probe_type_is_available(enum probe_type type) +{ + if (type >= PROBE_TYPE_END) + return false; + else if (type == PROBE_TYPE_X) + return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X); + + return true; +} + +bool kretprobe_offset_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET); } diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index eba44c3e9dca9ac670d06736237feb49fb21734b..5ecc9d3925dbb53c3190d8c6aa075c5e38bd3935 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -1,10 +1,11 @@ #ifndef __PROBE_FILE_H #define __PROBE_FILE_H -#include "strlist.h" -#include "strfilter.h" #include "probe-event.h" +struct strlist; +struct strfilter; + /* Cache of probe definitions */ struct probe_cache_entry { struct list_head node; @@ -35,11 +36,13 @@ enum probe_type { /* probe-file.c depends on libelf */ #ifdef HAVE_LIBELF_SUPPORT +int open_trace_file(const char *trace_file, bool readwrite); int probe_file__open(int flag); int probe_file__open_both(int *kfd, int *ufd, int flag); struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); + int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); @@ -64,6 +67,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache, const char *group, const char *event); int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); +bool kretprobe_offset_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 57cd268d4275bd2c8a3f8f1e43f29fab1fe1a835..a5731de0e5ebc73ed3fbc35dd1b334e664f20b69 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include @@ -37,9 +38,11 @@ #include "debug.h" #include "intlist.h" #include "util.h" +#include "strlist.h" #include "symbol.h" #include "probe-finder.h" #include "probe-file.h" +#include "string2.h" /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 2956c51986529ee7481f922d488a449c0a7619a0..27f06155101298e1e5ab878d75b9944745c22bf4 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -2,9 +2,9 @@ #define _PROBE_FINDER_H #include -#include "util.h" #include "intlist.h" #include "probe-event.h" +#include "sane_ctype.h" #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 0546a430434735c902a8c23e45bea583349f9851..9f3b0d9754a896484fa193865d2141bd37a82ca1 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -21,8 +21,10 @@ util/cgroup.c util/parse-branch-options.c util/rblist.c util/counts.c +util/print_binary.c util/strlist.c util/trace-event.c ../lib/rbtree.c util/string.c util/symbol_fprintf.c +util/units.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a5fbc012e3df974adcec3581386ff80b6fdedf6a..c129e99114aedfcead61505802ec50aef0e22cd2 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -4,11 +4,25 @@ #include #include #include "evlist.h" +#include "callchain.h" #include "evsel.h" #include "event.h" #include "cpumap.h" +#include "print_binary.h" #include "thread_map.h" +/* + * Provide these two so that we don't have to link against callchain.c and + * start dragging hist.c, etc. + */ +struct callchain_param callchain_param; + +int parse_callchain_record(const char *arg __maybe_unused, + struct callchain_param *param __maybe_unused) +{ + return 0; +} + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index 293534c1a474352486df78087a4d452bcb96fe20..1ba8920151d8919e3d8b7b2a1236d7a82d27a939 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -1,3 +1,4 @@ +#include #include #include "strbuf.h" #include "quote.h" diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 98bf584853ea28e0fd9ab3828aa1e83f9d4010e5..d91bdf5a1aa45985040d6321c33ef56d7dcb3f18 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "cpumap.h" #include "parse-events.h" +#include #include #include "util.h" #include "cloexec.h" diff --git a/tools/perf/util/sane_ctype.h b/tools/perf/util/sane_ctype.h new file mode 100644 index 0000000000000000000000000000000000000000..4308c22c22ad14fc6674a10890da994800b62b97 --- /dev/null +++ b/tools/perf/util/sane_ctype.h @@ -0,0 +1,51 @@ +#ifndef _PERF_SANE_CTYPE_H +#define _PERF_SANE_CTYPE_H + +extern const char *graph_line; +extern const char *graph_dotted_line; +extern const char *spaces; +extern const char *dots; + +/* Sane ctype - no locale, and works with signed chars */ +#undef isascii +#undef isspace +#undef isdigit +#undef isxdigit +#undef isalpha +#undef isprint +#undef isalnum +#undef islower +#undef isupper +#undef tolower +#undef toupper + +extern unsigned char sane_ctype[256]; +#define GIT_SPACE 0x01 +#define GIT_DIGIT 0x02 +#define GIT_ALPHA 0x04 +#define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define GIT_PRINT_EXTRA 0x20 +#define GIT_PRINT 0x3E +#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) +#define isascii(x) (((x) & ~0x7f) == 0) +#define isspace(x) sane_istest(x,GIT_SPACE) +#define isdigit(x) sane_istest(x,GIT_DIGIT) +#define isxdigit(x) \ + (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') +#define isalpha(x) sane_istest(x,GIT_ALPHA) +#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define isprint(x) sane_istest(x,GIT_PRINT) +#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20)) +#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20)) +#define tolower(x) sane_case((unsigned char)(x), 0x20) +#define toupper(x) sane_case((unsigned char)(x), 0) + +static inline int sane_case(int x, int high) +{ + if (sane_istest(x, GIT_ALPHA)) + x = (x & ~0x20) | high; + return x; +} + +#endif /* _PERF_SANE_CTYPE_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index dff043a2958921f61e0e9ee7f803bdcfb9bfc5bf..7b79c413486b3a6ef8b0f6c32e330911c5ce7638 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -19,6 +19,7 @@ * */ +#include #include #include #include @@ -27,7 +28,9 @@ #include #include -#include "../util.h" +#include +/* perl needs the following define, right after including stdbool.h */ +#define HAS_BOOL #include #include diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 783326cfbaa6bfdaeef277dc9545f55213ffa942..40de3cb40d2100fe918f26795ae29ae702d62665 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -45,6 +46,7 @@ #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" +#include "print_binary.h" #include "stat.h" PyMODINIT_FUNC initperf_trace_context(void); @@ -1217,7 +1219,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "# be retrieved using Python functions of the form " "common_*(context).\n"); - fprintf(ofp, "# See the perf-trace-python Documentation for the list " + fprintf(ofp, "# See the perf-script-python Documentation for the list " "of available functions.\n\n"); fprintf(ofp, "import os\n"); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1dd617d116b5d844f23c88592f9dac9f061a7ff6..7dc1096264c575cbbd1cd41d07f041585a997453 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,8 @@ +#include +#include #include #include +#include #include #include @@ -8,6 +11,7 @@ #include "evlist.h" #include "evsel.h" +#include "memswap.h" #include "session.h" #include "tool.h" #include "sort.h" @@ -16,6 +20,7 @@ #include "perf_regs.h" #include "asm/bug.h" #include "auxtrace.h" +#include "thread.h" #include "thread-stack.h" #include "stat.h" @@ -139,8 +144,14 @@ struct perf_session *perf_session__new(struct perf_data_file *file, if (perf_session__open(session) < 0) goto out_close; - perf_session__set_id_hdr_size(session); - perf_session__set_comm_exec(session); + /* + * set session attributes that are present in perf.data + * but not in pipe-mode. + */ + if (!file->is_pipe) { + perf_session__set_id_hdr_size(session); + perf_session__set_comm_exec(session); + } } } else { session->machines.host.env = &perf_env; @@ -155,7 +166,11 @@ struct perf_session *perf_session__new(struct perf_data_file *file, pr_warning("Cannot read kernel map\n"); } - if (tool && tool->ordering_requires_timestamps && + /* + * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is + * processed, so perf_evlist__sample_id_all is not meaningful here. + */ + if ((!file || !file->is_pipe) && tool && tool->ordering_requires_timestamps && tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; @@ -1239,6 +1254,8 @@ static int machines__deliver_event(struct machines *machines, return tool->mmap2(tool, event, sample, machine); case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine); + case PERF_RECORD_NAMESPACES: + return tool->namespaces(tool, event, sample, machine); case PERF_RECORD_FORK: return tool->fork(tool, event, sample, machine); case PERF_RECORD_EXIT: @@ -1258,9 +1275,12 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: - if (tool->aux == perf_event__process_aux && - (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) - evlist->stats.total_aux_lost += 1; + if (tool->aux == perf_event__process_aux) { + if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) + evlist->stats.total_aux_lost += 1; + if (event->aux.flags & PERF_AUX_FLAG_PARTIAL) + evlist->stats.total_aux_partial += 1; + } return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); @@ -1494,6 +1514,11 @@ int perf_session__register_idle_thread(struct perf_session *session) err = -1; } + if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) { + pr_err("problem inserting idle task.\n"); + err = -1; + } + /* machine__findnew_thread() got the thread, so put it */ thread__put(thread); return err; @@ -1548,6 +1573,23 @@ static void perf_session__warn_about_errors(const struct perf_session *session) stats->nr_events[PERF_RECORD_AUX]); } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_partial != 0) { + bool vmm_exclusive = false; + + (void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive", + &vmm_exclusive); + + ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n" + "Are you running a KVM guest in the background?%s\n\n", + stats->total_aux_partial, + stats->nr_events[PERF_RECORD_AUX], + vmm_exclusive ? + "\nReloading kvm_intel module with vmm_exclusive=0\n" + "will reduce the gaps to only guest's timeslices." : + ""); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " @@ -1628,6 +1670,7 @@ static int __perf_session__process_pipe_events(struct perf_session *session) buf = malloc(cur_size); if (!buf) return -errno; + ordered_events__set_copy_on_queue(oe, true); more: event = buf; err = readn(fd, event, sizeof(struct perf_event_header)); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 4bd758553450c8904860d19dd642f790310bc591..47b5e7dbcb18d420ea526508d0fbe0e1e3da7234 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -5,14 +5,14 @@ #include "event.h" #include "header.h" #include "machine.h" -#include "symbol.h" -#include "thread.h" #include "data.h" #include "ordered-events.h" +#include #include #include struct ip_callchain; +struct symbol; struct thread; struct auxtrace; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0ff622288d243c4edad03a904b6da3a52cf4da50..5762ae4e9e912e30dc158ee293c503373d80cc1f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1,12 +1,18 @@ +#include +#include +#include #include #include "sort.h" #include "hist.h" #include "comm.h" #include "symbol.h" +#include "thread.h" #include "evsel.h" #include "evlist.h" +#include "strlist.h" #include #include "mem-events.h" +#include regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; @@ -323,7 +329,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true); + he->ms.sym, true, true); } static int64_t @@ -366,7 +372,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) left->branch_info->srcline_from = get_srcline(map->dso, map__rip_2objdump(map, left->branch_info->from.al_addr), - left->branch_info->from.sym, true); + left->branch_info->from.sym, + true, true); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -376,7 +383,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->srcline_from = get_srcline(map->dso, map__rip_2objdump(map, right->branch_info->from.al_addr), - right->branch_info->from.sym, true); + right->branch_info->from.sym, + true, true); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -407,7 +415,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) left->branch_info->srcline_to = get_srcline(map->dso, map__rip_2objdump(map, left->branch_info->to.al_addr), - left->branch_info->from.sym, true); + left->branch_info->from.sym, + true, true); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -417,7 +426,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) right->branch_info->srcline_to = get_srcline(map->dso, map__rip_2objdump(map, right->branch_info->to.al_addr), - right->branch_info->to.sym, true); + right->branch_info->to.sym, + true, true); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -448,7 +458,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true); + e->ms.sym, false, true, true); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); @@ -536,6 +546,46 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort cgroup_id */ + +static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev) +{ + return (int64_t)(right_dev - left_dev); +} + +static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino) +{ + return (int64_t)(right_ino - left_ino); +} + +static int64_t +sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev); + if (ret != 0) + return ret; + + return _sort__cgroup_inode_cmp(right->cgroup_id.ino, + left->cgroup_id.ino); +} + +static int hist_entry__cgroup_id_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width __maybe_unused) +{ + return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev, + he->cgroup_id.ino); +} + +struct sort_entry sort_cgroup_id = { + .se_header = "cgroup id (dev/inode)", + .se_cmp = sort__cgroup_id_cmp, + .se_snprintf = hist_entry__cgroup_id_snprintf, + .se_width_idx = HISTC_CGROUP_ID, +}; + /* --sort socket */ static int64_t @@ -846,6 +896,9 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, static int64_t sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) { + if (!left->branch_info || !right->branch_info) + return cmp_null(left->branch_info, right->branch_info); + return left->branch_info->flags.cycles - right->branch_info->flags.cycles; } @@ -853,6 +906,8 @@ sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { + if (!he->branch_info) + return scnprintf(bf, size, "%-.*s", width, "N/A"); if (he->branch_info->flags.cycles == 0) return repsep_snprintf(bf, size, "%-*s", width, "-"); return repsep_snprintf(bf, size, "%-*hd", width, @@ -1396,6 +1451,46 @@ struct sort_entry sort_transaction = { .se_width_idx = HISTC_TRANSACTION, }; +/* --sort symbol_size */ + +static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r) +{ + int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0; + int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0; + + return size_l < size_r ? -1 : + size_l == size_r ? 0 : 1; +} + +static int64_t +sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__sym_size_cmp(right->ms.sym, left->ms.sym); +} + +static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf, + size_t bf_size, unsigned int width) +{ + if (sym) + return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym)); + + return repsep_snprintf(bf, bf_size, "%*s", width, "unknown"); +} + +static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width); +} + +struct sort_entry sort_sym_size = { + .se_header = "Symbol size", + .se_cmp = sort__sym_size_cmp, + .se_snprintf = hist_entry__sym_size_snprintf, + .se_width_idx = HISTC_SYM_SIZE, +}; + + struct sort_dimension { const char *name; struct sort_entry *entry; @@ -1418,6 +1513,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), DIM(SORT_TRACE, "trace", sort_trace), + DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), + DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 796c847e2f000d6722ed2cb35321e7f3b8b0489e..b7c75597e18fd226c190e8fbb81ef610690f8541 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -2,7 +2,7 @@ #define __PERF_SORT_H #include "../builtin.h" -#include "util.h" +#include #include "color.h" #include @@ -11,7 +11,6 @@ #include "symbol.h" #include "string.h" #include "callchain.h" -#include "strlist.h" #include "values.h" #include "../perf.h" @@ -21,7 +20,9 @@ #include #include "parse-events.h" #include "hist.h" -#include "thread.h" +#include "srcline.h" + +struct thread; extern regex_t parent_regex; extern const char *sort_order; @@ -54,6 +55,11 @@ struct he_stat { u32 nr_events; }; +struct namespace_id { + u64 dev; + u64 ino; +}; + struct hist_entry_diff { bool computed; union { @@ -91,6 +97,7 @@ struct hist_entry { struct map_symbol ms; struct thread *thread; struct comm *comm; + struct namespace_id cgroup_id; u64 ip; u64 transaction; s32 socket; @@ -122,6 +129,7 @@ struct hist_entry { }; char *srcline; char *srcfile; + struct inline_node *inline_node; struct symbol *parent; struct branch_info *branch_info; struct hists *hists; @@ -211,6 +219,8 @@ enum sort_type { SORT_GLOBAL_WEIGHT, SORT_TRANSACTION, SORT_TRACE, + SORT_SYM_SIZE, + SORT_CGROUP_ID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index b4db3f48e3b09784f00d610c8cbffea8c26e38c6..ebc88a74e67b7129cbbd6790c34f50dcefec0de1 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,11 +8,62 @@ #include "util/dso.h" #include "util/util.h" #include "util/debug.h" +#include "util/callchain.h" +#include "srcline.h" #include "symbol.h" bool srcline_full_filename; +static const char *dso__name(struct dso *dso) +{ + const char *dso_name; + + if (dso->symsrc_filename) + dso_name = dso->symsrc_filename; + else + dso_name = dso->long_name; + + if (dso_name[0] == '[') + return NULL; + + if (!strncmp(dso_name, "/tmp/perf-", 10)) + return NULL; + + return dso_name; +} + +static int inline_list__append(char *filename, char *funcname, int line_nr, + struct inline_node *node, struct dso *dso) +{ + struct inline_list *ilist; + char *demangled; + + ilist = zalloc(sizeof(*ilist)); + if (ilist == NULL) + return -1; + + ilist->filename = filename; + ilist->line_nr = line_nr; + + if (dso != NULL) { + demangled = dso__demangle_sym(dso, 0, funcname); + if (demangled == NULL) { + ilist->funcname = funcname; + } else { + ilist->funcname = demangled; + free(funcname); + } + } + + if (callchain_param.order == ORDER_CALLEE) + list_add_tail(&ilist->list, &node->val); + else + list_add(&ilist->list, &node->val); + + return 0; +} + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -151,9 +203,19 @@ static void addr2line_cleanup(struct a2l_data *a2l) #define MAX_INLINE_NEST 1024 +static int inline_list__append_dso_a2l(struct dso *dso, + struct inline_node *node) +{ + struct a2l_data *a2l = dso->a2l; + char *funcname = a2l->funcname ? strdup(a2l->funcname) : NULL; + char *filename = a2l->filename ? strdup(a2l->filename) : NULL; + + return inline_list__append(filename, funcname, a2l->line, node, dso); +} + static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line, struct dso *dso, - bool unwind_inlines) + bool unwind_inlines, struct inline_node *node) { int ret = 0; struct a2l_data *a2l = dso->a2l; @@ -173,23 +235,36 @@ static int addr2line(const char *dso_name, u64 addr, bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); - if (a2l->found && unwind_inlines) { + if (!a2l->found) + return 0; + + if (unwind_inlines) { int cnt = 0; + if (node && inline_list__append_dso_a2l(dso, node)) + return 0; + while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, &a2l->funcname, &a2l->line) && - cnt++ < MAX_INLINE_NEST) - ; + cnt++ < MAX_INLINE_NEST) { + + if (node != NULL) { + if (inline_list__append_dso_a2l(dso, node)) + return 0; + // found at least one inline frame + ret = 1; + } + } } - if (a2l->found && a2l->filename) { - *file = strdup(a2l->filename); - *line = a2l->line; - - if (*file) - ret = 1; + if (file) { + *file = a2l->filename ? strdup(a2l->filename) : NULL; + ret = *file ? 1 : 0; } + if (line) + *line = a2l->line; + return ret; } @@ -205,18 +280,66 @@ void dso__free_a2l(struct dso *dso) dso->a2l = NULL; } +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso) +{ + struct inline_node *node; + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + return NULL; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + if (!addr2line(dso_name, addr, NULL, NULL, dso, TRUE, node)) + goto out_free_inline_node; + + if (list_empty(&node->val)) + goto out_free_inline_node; + + return node; + +out_free_inline_node: + inline_node__delete(node); + return NULL; +} + #else /* HAVE_LIBBFD_SUPPORT */ +static int filename_split(char *filename, unsigned int *line_nr) +{ + char *sep; + + sep = strchr(filename, '\n'); + if (sep) + *sep = '\0'; + + if (!strcmp(filename, "??:0")) + return 0; + + sep = strchr(filename, ':'); + if (sep) { + *sep++ = '\0'; + *line_nr = strtoul(sep, NULL, 0); + return 1; + } + + return 0; +} + static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, struct dso *dso __maybe_unused, - bool unwind_inlines __maybe_unused) + bool unwind_inlines __maybe_unused, + struct inline_node *node __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; char *filename = NULL; size_t len; - char *sep; int ret = 0; scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, @@ -233,23 +356,14 @@ static int addr2line(const char *dso_name, u64 addr, goto out; } - sep = strchr(filename, '\n'); - if (sep) - *sep = '\0'; - - if (!strcmp(filename, "??:0")) { - pr_debug("no debugging info in %s\n", dso_name); + ret = filename_split(filename, line_nr); + if (ret != 1) { free(filename); goto out; } - sep = strchr(filename, ':'); - if (sep) { - *sep++ = '\0'; - *file = filename; - *line_nr = strtoul(sep, NULL, 0); - ret = 1; - } + *file = filename; + out: pclose(fp); return ret; @@ -259,6 +373,58 @@ void dso__free_a2l(struct dso *dso __maybe_unused) { } +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso __maybe_unused) +{ + FILE *fp; + char cmd[PATH_MAX]; + struct inline_node *node; + char *filename = NULL; + size_t len; + unsigned int line_nr = 0; + + scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i %016"PRIx64, + dso_name, addr); + + fp = popen(cmd, "r"); + if (fp == NULL) { + pr_err("popen failed for %s\n", dso_name); + return NULL; + } + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + goto out; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + while (getline(&filename, &len, fp) != -1) { + if (filename_split(filename, &line_nr) != 1) { + free(filename); + goto out; + } + + if (inline_list__append(filename, NULL, line_nr, node, + NULL) != 0) + goto out; + + filename = NULL; + } + +out: + pclose(fp); + + if (list_empty(&node->val)) { + inline_node__delete(node); + return NULL; + } + + return node; +} + #endif /* HAVE_LIBBFD_SUPPORT */ /* @@ -268,7 +434,7 @@ void dso__free_a2l(struct dso *dso __maybe_unused) #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines) { char *file = NULL; unsigned line = 0; @@ -278,18 +444,11 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (!dso->has_srcline) goto out; - if (dso->symsrc_filename) - dso_name = dso->symsrc_filename; - else - dso_name = dso->long_name; - - if (dso_name[0] == '[') - goto out; - - if (!strncmp(dso_name, "/tmp/perf-", 10)) + dso_name = dso__name(dso); + if (dso_name == NULL) goto out; - if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines)) + if (!addr2line(dso_name, addr, &file, &line, dso, unwind_inlines, NULL)) goto out; if (asprintf(&srcline, "%s:%u", @@ -309,6 +468,11 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, dso->has_srcline = 0; dso__free_a2l(dso); } + + if (!show_addr) + return (show_sym && sym) ? + strndup(sym->name, sym->namelen) : NULL; + if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", addr - sym->start) < 0) @@ -325,7 +489,32 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym) + bool show_sym, bool show_addr) +{ + return __get_srcline(dso, addr, sym, show_sym, show_addr, false); +} + +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr) +{ + const char *dso_name; + + dso_name = dso__name(dso); + if (dso_name == NULL) + return NULL; + + return addr2inlines(dso_name, addr, dso); +} + +void inline_node__delete(struct inline_node *node) { - return __get_srcline(dso, addr, sym, show_sym, false); + struct inline_list *ilist, *tmp; + + list_for_each_entry_safe(ilist, tmp, &node->val, list) { + list_del_init(&ilist->list); + zfree(&ilist->filename); + zfree(&ilist->funcname); + free(ilist); + } + + free(node); } diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h new file mode 100644 index 0000000000000000000000000000000000000000..7b52ba88676ef6e90b692946fe243a115d261c27 --- /dev/null +++ b/tools/perf/util/srcline.h @@ -0,0 +1,34 @@ +#ifndef PERF_SRCLINE_H +#define PERF_SRCLINE_H + +#include +#include + +struct dso; +struct symbol; + +extern bool srcline_full_filename; +char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool show_addr); +char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, + bool show_sym, bool show_addr, bool unwind_inlines); +void free_srcline(char *srcline); + +#define SRCLINE_UNKNOWN ((char *) "??:0") + +struct inline_list { + char *filename; + char *funcname; + unsigned int line_nr; + struct list_head list; +}; + +struct inline_node { + u64 addr; + struct list_head val; +}; + +struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr); +void inline_node__delete(struct inline_node *node); + +#endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 8a2bbd2a4d828c83deb2f203d25d5e452752c90b..ac10cc675d39579bfca249abbc4353e7c9accc6e 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -3,6 +3,9 @@ #include "stat.h" #include "color.h" #include "pmu.h" +#include "rblist.h" +#include "evlist.h" +#include "expr.h" enum { CTX_BIT_USER = 1 << 0, @@ -41,13 +44,73 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct rblist runtime_saved_values; static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +struct saved_value { + struct rb_node rb_node; + struct perf_evsel *evsel; + int cpu; + int ctx; + struct stats stats; +}; + +static int saved_value_cmp(struct rb_node *rb_node, const void *entry) +{ + struct saved_value *a = container_of(rb_node, + struct saved_value, + rb_node); + const struct saved_value *b = entry; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + if (a->cpu != b->cpu) + return a->cpu - b->cpu; + return a->evsel - b->evsel; +} + +static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, + const void *entry) +{ + struct saved_value *nd = malloc(sizeof(struct saved_value)); + + if (!nd) + return NULL; + memcpy(nd, entry, sizeof(struct saved_value)); + return &nd->rb_node; +} + +static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, + int cpu, int ctx, + bool create) +{ + struct rb_node *nd; + struct saved_value dm = { + .cpu = cpu, + .ctx = ctx, + .evsel = evsel, + }; + nd = rblist__find(&runtime_saved_values, &dm); + if (nd) + return container_of(nd, struct saved_value, rb_node); + if (create) { + rblist__add_node(&runtime_saved_values, &dm); + nd = rblist__find(&runtime_saved_values, &dm); + if (nd) + return container_of(nd, struct saved_value, rb_node); + } + return NULL; +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); + rblist__init(&runtime_saved_values); + runtime_saved_values.node_cmp = saved_value_cmp; + runtime_saved_values.node_new = saved_value_new; + /* No delete for now */ } static int evsel_context(struct perf_evsel *evsel) @@ -70,6 +133,8 @@ static int evsel_context(struct perf_evsel *evsel) void perf_stat__reset_shadow_stats(void) { + struct rb_node *pos, *next; + memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); @@ -92,6 +157,15 @@ void perf_stat__reset_shadow_stats(void) memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); + + next = rb_first(&runtime_saved_values.entries); + while (next) { + pos = next; + next = rb_next(pos); + memset(&container_of(pos, struct saved_value, rb_node)->stats, + 0, + sizeof(struct stats)); + } } /* @@ -143,6 +217,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); + + if (counter->collect_stat) { + struct saved_value *v = saved_value_lookup(counter, cpu, ctx, + true); + update_stats(&v->stats, count[0]); + } } /* used for get_ratio_color() */ @@ -172,6 +252,95 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } +static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, + const char *name) +{ + struct perf_evsel *c2; + + evlist__for_each_entry (evsel_list, c2) { + if (!strcasecmp(c2->name, name)) + return c2; + } + return NULL; +} + +/* Mark MetricExpr target events and link events using them to them. */ +void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) +{ + struct perf_evsel *counter, *leader, **metric_events, *oc; + bool found; + const char **metric_names; + int i; + int num_metric_names; + + evlist__for_each_entry(evsel_list, counter) { + bool invalid = false; + + leader = counter->leader; + if (!counter->metric_expr) + continue; + metric_events = counter->metric_events; + if (!metric_events) { + if (expr__find_other(counter->metric_expr, counter->name, + &metric_names, &num_metric_names) < 0) + continue; + + metric_events = calloc(sizeof(struct perf_evsel *), + num_metric_names + 1); + if (!metric_events) + return; + counter->metric_events = metric_events; + } + + for (i = 0; i < num_metric_names; i++) { + found = false; + if (leader) { + /* Search in group */ + for_each_group_member (oc, leader) { + if (!strcasecmp(oc->name, metric_names[i])) { + found = true; + break; + } + } + } + if (!found) { + /* Search ignoring groups */ + oc = perf_stat__find_event(evsel_list, metric_names[i]); + } + if (!oc) { + /* Deduping one is good enough to handle duplicated PMUs. */ + static char *printed; + + /* + * Adding events automatically would be difficult, because + * it would risk creating groups that are not schedulable. + * perf stat doesn't understand all the scheduling constraints + * of events. So we ask the user instead to add the missing + * events. + */ + if (!printed || strcasecmp(printed, metric_names[i])) { + fprintf(stderr, + "Add %s event to groups to get metric expression for %s\n", + metric_names[i], + counter->name); + printed = strdup(metric_names[i]); + } + invalid = true; + continue; + } + metric_events[i] = oc; + oc->collect_stat = true; + } + metric_events[i] = NULL; + free(metric_names); + if (invalid) { + free(metric_events); + counter->metric_events = NULL; + counter->metric_expr = NULL; + } + } +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) @@ -614,6 +783,34 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); + } else if (evsel->metric_expr) { + struct parse_ctx pctx; + int i; + + expr__ctx_init(&pctx); + expr__add_id(&pctx, evsel->name, avg); + for (i = 0; evsel->metric_events[i]; i++) { + struct saved_value *v; + + v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); + if (!v) + break; + expr__add_id(&pctx, evsel->metric_events[i]->name, + avg_stats(&v->stats)); + } + if (!evsel->metric_events[i]) { + const char *p = evsel->metric_expr; + + if (expr__parse(&ratio, &pctx, &p) == 0) + print_metric(ctxp, NULL, "%8.1f", + evsel->metric_name ? + evsel->metric_name : + out->force_header ? evsel->name : "", + ratio); + else + print_metric(ctxp, NULL, NULL, "", 0); + } else + print_metric(ctxp, NULL, NULL, "", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0d51334a9b4628090f35ffe4da921b4e0ecb15b0..c58174443dc12c7fad840d8b67a34223e5283f9f 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,3 +1,5 @@ +#include +#include #include #include "stat.h" #include "evlist.h" diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c29bb94c48a4b6070a659e3674f4ae84694724f9..0a65ae23f49504874bf82134d4e9ed40d5408a7f 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -85,11 +85,13 @@ struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; new_line_t new_line; + bool force_header; }; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out); +void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 817593908d47a3d1ea27eb73080b94852e88d990..aafe908b82b5720b4487c242370ed326ec520e2c 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -1,15 +1,7 @@ #include "debug.h" #include "util.h" #include - -int prefixcmp(const char *str, const char *prefix) -{ - for (; ; str++, prefix++) - if (!*prefix) - return 0; - else if (*str != *prefix) - return (unsigned char)*prefix - (unsigned char)*str; -} +#include /* * Used as the default ->buf value, so that people can always assume diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index efb53772e0ecc8e6a9c9ea4493d37c6dc55d38bf..4dc0af669a3009f72726ff124913057413e1758f 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -1,7 +1,10 @@ #include "util.h" -#include "string.h" +#include "string2.h" #include "strfilter.h" +#include +#include "sane_ctype.h" + /* Operators */ static const char *OP_and = "&"; /* Logical AND */ static const char *OP_or = "|"; /* Logical OR */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index bddca519dd5854a0fd3319e85d54d7e638514b47..cca53b693a48d852450c00bf7ef6f90285d7ce4d 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,5 +1,9 @@ -#include "util.h" -#include "linux/string.h" +#include "string2.h" +#include +#include +#include + +#include "sane_ctype.h" #define K 1024LL /* @@ -99,8 +103,10 @@ static int count_argc(const char *str) void argv_free(char **argv) { char **p; - for (p = argv; *p; p++) - zfree(p); + for (p = argv; *p; p++) { + free(*p); + *p = NULL; + } free(argv); } @@ -120,7 +126,7 @@ void argv_free(char **argv) char **argv_split(const char *str, int *argcp) { int argc = count_argc(str); - char **argv = zalloc(sizeof(*argv) * (argc+1)); + char **argv = calloc(argc + 1, sizeof(*argv)); char **argvp; if (argv == NULL) @@ -322,12 +328,8 @@ char *strxfrchar(char *s, char from, char to) */ char *ltrim(char *s) { - int len = strlen(s); - - while (len && isspace(*s)) { - len--; + while (isspace(*s)) s++; - } return s; } @@ -381,7 +383,7 @@ char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints goto out_err_overflow; if (i > 0) - printed += snprintf(e + printed, size - printed, " %s ", or_and); + printed += scnprintf(e + printed, size - printed, " %s ", or_and); printed += scnprintf(e + printed, size - printed, "%s %s %d", var, eq_neq, ints[i]); } diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h new file mode 100644 index 0000000000000000000000000000000000000000..2f619681bd6a7084f5620ea077e8e69d1d659893 --- /dev/null +++ b/tools/perf/util/string2.h @@ -0,0 +1,42 @@ +#ifndef PERF_STRING_H +#define PERF_STRING_H + +#include +#include +#include + +s64 perf_atoll(const char *str); +char **argv_split(const char *str, int *argcp); +void argv_free(char **argv); +bool strglobmatch(const char *str, const char *pat); +bool strglobmatch_nocase(const char *str, const char *pat); +bool strlazymatch(const char *str, const char *pat); +static inline bool strisglob(const char *str) +{ + return strpbrk(str, "*?[") != NULL; +} +int strtailcmp(const char *s1, const char *s2); +char *strxfrchar(char *s, char from, char to); + +char *ltrim(char *s); +char *rtrim(char *s); + +static inline char *trim(char *s) +{ + return ltrim(rtrim(s)); +} + +char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); + +static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, true, nints, ints); +} + +static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) +{ + return asprintf_expr_inout_ints(var, false, nints, ints); +} + + +#endif /* PERF_STRING_H */ diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 0d3dfcb919b44936dfa42f2f437176f833afa39c..9de5434bb49ea355c25358c5441290b28ea0fb60 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -10,6 +10,7 @@ #include #include #include +#include static struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4e59ddeb4eda7cd8e75ec2c0c42e1fec4de0432a..502505cf236af30226b22ff82878098060f19862 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -10,8 +10,9 @@ #include "demangle-rust.h" #include "machine.h" #include "vdso.h" -#include #include "debug.h" +#include "sane_ctype.h" +#include #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ @@ -390,6 +391,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * return 0; } +char *dso__demangle_sym(struct dso *dso, int kmodule, char *elf_name) +{ + return demangle_sym(dso, kmodule, elf_name); +} + /* * Align offset to 4 bytes as needed for note name and descriptor data. */ @@ -631,43 +637,6 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } -static int decompress_kmodule(struct dso *dso, const char *name, - enum dso_binary_type type) -{ - int fd = -1; - char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; - struct kmod_path m; - - if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && - type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && - type != DSO_BINARY_TYPE__BUILD_ID_CACHE) - return -1; - - if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE) - name = dso->long_name; - - if (kmod_path__parse_ext(&m, name) || !m.comp) - return -1; - - fd = mkstemp(tmpbuf); - if (fd < 0) { - dso->load_errno = errno; - goto out; - } - - if (!decompress_to_file(m.ext, name, fd)) { - dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; - close(fd); - fd = -1; - } - - unlink(tmpbuf); - -out: - free(m.ext); - return fd; -} - bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -699,9 +668,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, int fd; if (dso__needs_decompress(dso)) { - fd = decompress_kmodule(dso, name, type); + fd = dso__decompress_kmodule_fd(dso, name); if (fd < 0) return -1; + + type = dso->symtab_type; } else { fd = open(name, O_RDONLY); if (fd < 0) { @@ -1828,7 +1799,7 @@ void kcore_extract__delete(struct kcore_extract *kce) static int populate_sdt_note(Elf **elf, const char *data, size_t len, struct list_head *sdt_notes) { - const char *provider, *name; + const char *provider, *name, *args; struct sdt_note *tmp = NULL; GElf_Ehdr ehdr; GElf_Addr base_off = 0; @@ -1887,6 +1858,25 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, goto out_free_prov; } + args = memchr(name, '\0', data + len - name); + + /* + * There is no argument if: + * - We reached the end of the note; + * - There is not enough room to hold a potential string; + * - The argument string is empty or just contains ':'. + */ + if (args == NULL || data + len - args < 2 || + args[1] == ':' || args[1] == '\0') + tmp->args = NULL; + else { + tmp->args = strdup(++args); + if (!tmp->args) { + ret = -ENOMEM; + goto out_free_name; + } + } + if (gelf_getclass(*elf) == ELFCLASS32) { memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr)); tmp->bit32 = true; @@ -1898,7 +1888,7 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, if (!gelf_getehdr(*elf, &ehdr)) { pr_debug("%s : cannot get elf header.\n", __func__); ret = -EBADF; - goto out_free_name; + goto out_free_args; } /* Adjust the prelink effect : @@ -1923,6 +1913,8 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, list_add_tail(&tmp->note_list, sdt_notes); return 0; +out_free_args: + free(tmp->args); out_free_name: free(tmp->name); out_free_prov: diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 11cdde9805455b52563af827a9d9d352bd2ede35..40bf5d4c0bfd4a6bc9aa4564dc9613e66ca1d7b6 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,6 +1,7 @@ #include "symbol.h" #include "util.h" +#include #include #include #include @@ -373,3 +374,10 @@ int kcore_copy(const char *from_dir __maybe_unused, void symbol__elf_init(void) { } + +char *dso__demangle_sym(struct dso *dso __maybe_unused, + int kmodule __maybe_unused, + char *elf_name __maybe_unused) +{ + return NULL; +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9b4d8ba22fed85f1f2bef6f5d47dc88cbb4df5d1..e7a98dbd2aed9133d99f38b71b3cac9fb297e0d6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,8 @@ #include "strlist.h" #include "intlist.h" #include "header.h" +#include "path.h" +#include "sane_ctype.h" #include #include @@ -87,6 +90,17 @@ static int prefix_underscores_count(const char *str) return tail - str; } +int __weak arch__compare_symbol_names(const char *namea, const char *nameb) +{ + return strcmp(namea, nameb); +} + +int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb, + unsigned int n) +{ + return strncmp(namea, nameb, n); +} + int __weak arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { @@ -396,8 +410,26 @@ static void symbols__sort_by_name(struct rb_root *symbols, } } +int symbol__match_symbol_name(const char *name, const char *str, + enum symbol_tag_include includes) +{ + const char *versioning; + + if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY && + (versioning = strstr(name, "@@"))) { + int len = strlen(str); + + if (len < versioning - name) + len = versioning - name; + + return arch__compare_symbol_names_n(name, str, len); + } else + return arch__compare_symbol_names(name, str); +} + static struct symbol *symbols__find_by_name(struct rb_root *symbols, - const char *name) + const char *name, + enum symbol_tag_include includes) { struct rb_node *n; struct symbol_name_rb_node *s = NULL; @@ -411,11 +443,11 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, int cmp; s = rb_entry(n, struct symbol_name_rb_node, rb_node); - cmp = arch__compare_symbol_names(name, s->sym.name); + cmp = symbol__match_symbol_name(s->sym.name, name, includes); - if (cmp < 0) + if (cmp > 0) n = n->rb_left; - else if (cmp > 0) + else if (cmp < 0) n = n->rb_right; else break; @@ -424,16 +456,17 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, if (n == NULL) return NULL; - /* return first symbol that has same name (if any) */ - for (n = rb_prev(n); n; n = rb_prev(n)) { - struct symbol_name_rb_node *tmp; + if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY) + /* return first symbol that has same name (if any) */ + for (n = rb_prev(n); n; n = rb_prev(n)) { + struct symbol_name_rb_node *tmp; - tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); - if (arch__compare_symbol_names(tmp->sym.name, s->sym.name)) - break; + tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); + if (arch__compare_symbol_names(tmp->sym.name, s->sym.name)) + break; - s = tmp; - } + s = tmp; + } return &s->sym; } @@ -463,7 +496,7 @@ void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym) struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { - if (dso->last_find_result[type].addr != addr) { + if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) { dso->last_find_result[type].addr = addr; dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); } @@ -500,7 +533,12 @@ struct symbol *symbol__next_by_name(struct symbol *sym) struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name) { - return symbols__find_by_name(&dso->symbol_names[type], name); + struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name, + SYMBOL_TAG_INCLUDE__NONE); + if (!s) + s = symbols__find_by_name(&dso->symbol_names[type], name, + SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); + return s; } void dso__sort_by_name(struct dso *dso, enum map_type type) @@ -1072,8 +1110,9 @@ static int validate_kcore_addresses(const char *kallsyms_filename, if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) { u64 start; - start = kallsyms__get_function_start(kallsyms_filename, - kmap->ref_reloc_sym->name); + if (kallsyms__get_function_start(kallsyms_filename, + kmap->ref_reloc_sym->name, &start)) + return -ENOENT; if (start != kmap->ref_reloc_sym->addr) return -EINVAL; } @@ -1245,9 +1284,7 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) return 0; - addr = kallsyms__get_function_start(filename, - kmap->ref_reloc_sym->name); - if (!addr) + if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr)) return -1; *delta = addr - kmap->ref_reloc_sym->addr; @@ -1525,10 +1562,6 @@ int dso__load(struct dso *dso, struct map *map) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; - if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) - if (dso__build_id_is_kmod(dso, name, PATH_MAX)) - kmod = true; - if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod); else diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 6c358b7ed336c72fe9984c8f5b33fb3d200c638c..41ebba9a2eb2f3849635daf67125a851ff717d95 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -13,7 +13,7 @@ #include #include "build-id.h" #include "event.h" -#include "util.h" +#include "path.h" #ifdef HAVE_LIBELF_SUPPORT #include @@ -118,7 +118,8 @@ struct symbol_conf { show_ref_callgraph, hide_unresolved, raw_trace, - report_hierarchy; + report_hierarchy, + inline_name; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -305,6 +306,8 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map); +char *dso__demangle_sym(struct dso *dso, int kmodule, char *elf_name); + void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); void symbols__insert(struct rb_root *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root *symbols); @@ -345,12 +348,24 @@ void arch__sym_update(struct symbol *s, GElf_Sym *sym); #define SYMBOL_A 0 #define SYMBOL_B 1 +int arch__compare_symbol_names(const char *namea, const char *nameb); +int arch__compare_symbol_names_n(const char *namea, const char *nameb, + unsigned int n); int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb); +enum symbol_tag_include { + SYMBOL_TAG_INCLUDE__NONE = 0, + SYMBOL_TAG_INCLUDE__DEFAULT_ONLY +}; + +int symbol__match_symbol_name(const char *namea, const char *nameb, + enum symbol_tag_include includes); + /* structure containing an SDT note's info */ struct sdt_note { char *name; /* name of the note*/ char *provider; /* provider name */ + char *args; bool bit32; /* whether the location is 32 bits? */ union { /* location, base and semaphore addrs */ Elf64_Addr a64[3]; diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c index 90b47d8aa19c41305f4b0823e459d6e60f19f3f5..8f254a74d97deed4abc357d2d527ff1453a7c4f1 100644 --- a/tools/perf/util/term.c +++ b/tools/perf/util/term.c @@ -1,4 +1,8 @@ -#include "util.h" +#include "term.h" +#include +#include +#include +#include void get_term_dimensions(struct winsize *ws) { diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index d3301529f6a762a17e5e390f8fd53286b5fc6145..dd17d6a38d3a1bc666a81c175e78295307ff40f9 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -15,6 +15,7 @@ #include #include +#include #include "thread.h" #include "event.h" #include "machine.h" diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f5af87f6666303df886d4c70fd2d565c1d0436e9..378c418ca0c173ddcc83425d590c32d3d71d2a39 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -1,12 +1,15 @@ #include "../perf.h" +#include #include #include #include +#include #include "session.h" #include "thread.h" #include "thread-stack.h" #include "util.h" #include "debug.h" +#include "namespaces.h" #include "comm.h" #include "unwind.h" @@ -40,6 +43,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->tid = tid; thread->ppid = -1; thread->cpu = -1; + INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); comm_str = malloc(32); @@ -53,7 +57,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) goto err_thread; list_add(&comm->list, &thread->comm_list); - atomic_set(&thread->refcnt, 1); + refcount_set(&thread->refcnt, 1); RB_CLEAR_NODE(&thread->rb_node); } @@ -66,7 +70,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) void thread__delete(struct thread *thread) { - struct comm *comm, *tmp; + struct namespaces *namespaces, *tmp_namespaces; + struct comm *comm, *tmp_comm; BUG_ON(!RB_EMPTY_NODE(&thread->rb_node)); @@ -76,7 +81,12 @@ void thread__delete(struct thread *thread) map_groups__put(thread->mg); thread->mg = NULL; } - list_for_each_entry_safe(comm, tmp, &thread->comm_list, list) { + list_for_each_entry_safe(namespaces, tmp_namespaces, + &thread->namespaces_list, list) { + list_del(&namespaces->list); + namespaces__free(namespaces); + } + list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { list_del(&comm->list); comm__free(comm); } @@ -88,13 +98,13 @@ void thread__delete(struct thread *thread) struct thread *thread__get(struct thread *thread) { if (thread) - atomic_inc(&thread->refcnt); + refcount_inc(&thread->refcnt); return thread; } void thread__put(struct thread *thread) { - if (thread && atomic_dec_and_test(&thread->refcnt)) { + if (thread && refcount_dec_and_test(&thread->refcnt)) { /* * Remove it from the dead_threads list, as last reference * is gone. @@ -104,6 +114,38 @@ void thread__put(struct thread *thread) } } +struct namespaces *thread__namespaces(const struct thread *thread) +{ + if (list_empty(&thread->namespaces_list)) + return NULL; + + return list_first_entry(&thread->namespaces_list, struct namespaces, list); +} + +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event) +{ + struct namespaces *new, *curr = thread__namespaces(thread); + + new = namespaces__new(event); + if (!new) + return -ENOMEM; + + list_add(&new->list, &thread->namespaces_list); + + if (timestamp && curr) { + /* + * setns syscall must have changed few or all the namespaces + * of this thread. Update end time for the namespaces + * previously used. + */ + curr = list_next_entry(new, list); + curr->end_time = timestamp; + } + + return 0; +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 99263cb6e6b646bd987fc3ff410c2efa495f3944..4eb849e9098f5ca0115cb90527dd9f564476e4ea 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,7 +1,7 @@ #ifndef __PERF_THREAD_H #define __PERF_THREAD_H -#include +#include #include #include #include @@ -23,11 +23,12 @@ struct thread { pid_t tid; pid_t ppid; int cpu; - atomic_t refcnt; + refcount_t refcnt; char shortname[3]; bool comm_set; int comm_len; bool dead; /* if set thread has exited */ + struct list_head namespaces_list; struct list_head comm_list; u64 db_id; @@ -40,6 +41,7 @@ struct thread { }; struct machine; +struct namespaces; struct comm; struct thread *thread__new(pid_t pid, pid_t tid); @@ -62,6 +64,10 @@ static inline void thread__exited(struct thread *thread) thread->dead = true; } +struct namespaces *thread__namespaces(const struct thread *thread); +int thread__set_namespaces(struct thread *thread, u64 timestamp, + struct namespaces_event *event); + int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp, bool exec); static inline int thread__set_comm(struct thread *thread, const char *comm, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 7c3fcc538a705f9df580c33b6444512547b3f453..63ead7b06324c1615ceacb9c421b7f47129f3f5e 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include "string2.h" #include "strlist.h" #include #include @@ -66,7 +68,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) for (i = 0; i < items; i++) thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } for (i=0; inr = 1; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } return threads; @@ -105,7 +107,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) goto out_free_threads; threads->nr = 0; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); while ((dirent = readdir(proc)) != NULL) { char *end; @@ -235,7 +237,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) out: strlist__delete(slist); if (threads) - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); return threads; out_free_namelist: @@ -255,7 +257,7 @@ struct thread_map *thread_map__new_dummy(void) if (threads != NULL) { thread_map__set_pid(threads, 0, -1); threads->nr = 1; - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } return threads; } @@ -300,7 +302,7 @@ struct thread_map *thread_map__new_by_tid_str(const char *tid_str) } out: if (threads) - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); return threads; out_free_threads: @@ -326,7 +328,7 @@ static void thread_map__delete(struct thread_map *threads) if (threads) { int i; - WARN_ONCE(atomic_read(&threads->refcnt) != 0, + WARN_ONCE(refcount_read(&threads->refcnt) != 0, "thread map refcnt unbalanced\n"); for (i = 0; i < threads->nr; i++) free(thread_map__comm(threads, i)); @@ -337,13 +339,13 @@ static void thread_map__delete(struct thread_map *threads) struct thread_map *thread_map__get(struct thread_map *map) { if (map) - atomic_inc(&map->refcnt); + refcount_inc(&map->refcnt); return map; } void thread_map__put(struct thread_map *map) { - if (map && atomic_dec_and_test(&map->refcnt)) + if (map && refcount_dec_and_test(&map->refcnt)) thread_map__delete(map); } @@ -423,7 +425,7 @@ static void thread_map__copy_event(struct thread_map *threads, threads->map[i].comm = strndup(event->entries[i].comm, 16); } - atomic_set(&threads->refcnt, 1); + refcount_set(&threads->refcnt, 1); } struct thread_map *thread_map__new_event(struct thread_map_event *event) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index ea0ef08c6303413b6ececf2b93ec905914e558aa..bd34d7a0b9fa9fbcd92224e3f83208f7479cac09 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -3,7 +3,7 @@ #include #include -#include +#include struct thread_map_data { pid_t pid; @@ -11,7 +11,7 @@ struct thread_map_data { }; struct thread_map { - atomic_t refcnt; + refcount_t refcnt; int nr; struct thread_map_data map[]; }; diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index d1b21c72206d8b1a6a52697adb3091ff09e81ec2..5b5d0214debdcb99c563c84abe4d98746ff6a518 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -117,3 +117,28 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } + +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) +{ + u64 sec = timestamp / NSEC_PER_SEC; + u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; + + return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); +} + +int fetch_current_timestamp(char *buf, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index c1f197c4af6c9aa019cdebc9625b07020f30ad4e..8656be08513b178ab64e4c716ca2d4f0a9378900 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -1,6 +1,9 @@ #ifndef _TIME_UTILS_H_ #define _TIME_UTILS_H_ +#include +#include + struct perf_time_interval { u64 start, end; }; @@ -11,4 +14,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); + +int fetch_current_timestamp(char *buf, size_t sz); + #endif diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index ac2590a3de2d1586fa60a1893d9ec4233967b5e0..829471a1c6d76bc637c5fcefbeaec89b8d61d39a 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -40,6 +40,7 @@ struct perf_tool { event_op mmap, mmap2, comm, + namespaces, fork, exit, lost, @@ -66,6 +67,7 @@ struct perf_tool { event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; + bool namespace_events; }; #endif /* __PERF_TOOL_H */ diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index b2940c88734a01e280046f5c806c2a530fafc25a..9bdfb78a9a35d2b27a3c38bf5234af0af1f7985d 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include struct perf_evlist; struct perf_evsel; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index de0078e21408149fcf1105e357a3d829343233ff..746bbee645d98a69e3edabb73d92dd9a87da7422 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -21,13 +21,14 @@ #include #include #include -#include #include #include "../perf.h" #include "util.h" #include "trace-event.h" +#include "sane_ctype.h" + static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) { diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 27420159bf6944df76eb76a45943a4810e0ba972..8a9a677f75768205c47343c126813808e564a38a 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -192,7 +192,7 @@ static int read_ftrace_printk(struct pevent *pevent) if (!size) return 0; - buf = malloc(size); + buf = malloc(size + 1); if (buf == NULL) return -1; @@ -201,6 +201,8 @@ static int read_ftrace_printk(struct pevent *pevent) return -1; } + buf[size] = '\0'; + parse_ftrace_printk(pevent, buf, size); free(buf); diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c new file mode 100644 index 0000000000000000000000000000000000000000..4767ec2c5ef63cefbb03464fd244002ae6f5d54e --- /dev/null +++ b/tools/perf/util/units.c @@ -0,0 +1,68 @@ +#include "units.h" +#include +#include +#include +#include +#include +#include + +unsigned long parse_tag_value(const char *str, struct parse_tag *tags) +{ + struct parse_tag *i = tags; + + while (i->tag) { + char *s = strchr(str, i->tag); + + if (s) { + unsigned long int value; + char *endptr; + + value = strtoul(str, &endptr, 10); + if (s != endptr) + break; + + if (value > ULONG_MAX / i->mult) + break; + value *= i->mult; + return value; + } + i++; + } + + return (unsigned long) -1; +} + +unsigned long convert_unit(unsigned long value, char *unit) +{ + *unit = ' '; + + if (value > 1000) { + value /= 1000; + *unit = 'K'; + } + + if (value > 1000) { + value /= 1000; + *unit = 'M'; + } + + if (value > 1000) { + value /= 1000; + *unit = 'G'; + } + + return value; +} + +int unit_number__scnprintf(char *buf, size_t size, u64 n) +{ + char unit[4] = "BKMG"; + int i = 0; + + while (((n / 1024) > 1) && (i < 3)) { + n /= 1024; + i++; + } + + return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); +} diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h new file mode 100644 index 0000000000000000000000000000000000000000..f02c87317150e0d5f50296b33bb29fd96b6b9edb --- /dev/null +++ b/tools/perf/util/units.h @@ -0,0 +1,17 @@ +#ifndef PERF_UNIT_H +#define PERF_UNIT_H + +#include +#include + +struct parse_tag { + char tag; + int mult; +}; + +unsigned long parse_tag_value(const char *str, struct parse_tag *tags); + +unsigned long convert_unit(unsigned long value, char *unit); +int unit_number__scnprintf(char *buf, size_t size, u64 n); + +#endif /* PERF_UNIT_H */ diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 783a53fb7a4ed41c2fef9e90de26355e6b1bcc7e..7755a5e0fe5eb290c41502bf22725cce8a0b6935 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -12,6 +12,7 @@ #include "event.h" #include "perf_regs.h" #include "callchain.h" +#include "util.h" static char *debuginfo_path; @@ -38,6 +39,14 @@ static int __report_module(struct addr_location *al, u64 ip, return 0; mod = dwfl_addrmodule(ui->dwfl, ip); + if (mod) { + Dwarf_Addr s; + + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + if (s != al->map->start) + mod = 0; + } + if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, al->map->start, @@ -167,12 +176,24 @@ frame_callback(Dwfl_Frame *state, void *arg) { struct unwind_info *ui = arg; Dwarf_Addr pc; + bool isactivation; if (!dwfl_frame_pc(state, &pc, NULL)) { pr_err("%s", dwfl_errmsg(-1)); return DWARF_CB_ABORT; } + // report the module before we query for isactivation + report_module(pc, ui); + + if (!dwfl_frame_pc(state, &pc, &isactivation)) { + pr_err("%s", dwfl_errmsg(-1)); + return DWARF_CB_ABORT; + } + + if (!isactivation) + --pc; + return entry(pc, ui) || !(--ui->max_stack) ? DWARF_CB_ABORT : DWARF_CB_OK; } @@ -219,7 +240,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui); - if (err && !ui->max_stack) + if (err && ui->max_stack != max_stack) err = 0; /* diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 58328669ed16a7af3b475bb569b73310a48678df..4a2b269a7b3be24e766d892d475075984639b4cc 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -2,10 +2,12 @@ #define __PERF_UNWIND_LIBDW_H #include -#include "event.h" -#include "thread.h" #include "unwind.h" +struct machine; +struct perf_sample; +struct thread; + bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg); struct unwind_info { diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index bfb9b7987692ee583104c54f7756c43ca13cf52e..672c2ada9357a25054b4b1ba7eeec643505984e2 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -16,8 +16,10 @@ */ #include +#include #include #include +#include #include #include #include @@ -690,6 +692,17 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, while (!ret && (unw_step(&c) > 0) && i < max_stack) { unw_get_reg(&c, UNW_REG_IP, &ips[i]); + + /* + * Decrement the IP for any non-activation frames. + * this is required to properly find the srcline + * for caller frames. + * See also the documentation for dwfl_frame_pc(), + * which this code tries to replicate. + */ + if (unw_is_signal_frame(&c) <= 0) + --ips[i]; + ++i; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 61fb1e90ff5166c6c1f50f8446650cf302aa610d..bfbdcc6198c9d85c09f9859866ac7974e8dd61c7 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -1,10 +1,13 @@ #ifndef __UNWIND_H #define __UNWIND_H +#include #include -#include "event.h" -#include "symbol.h" -#include "thread.h" + +struct map; +struct perf_sample; +struct symbol; +struct thread; struct unwind_entry { struct map *map; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d8b45cea54d08233c3d743cc38c6c0ef7af90328..28c9f335006c962a5df924e30d9a45687775c201 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -3,38 +3,22 @@ #include "debug.h" #include #include +#include #include -#ifdef HAVE_BACKTRACE_SUPPORT -#include -#endif +#include +#include +#include #include #include #include #include #include -#include #include #include #include #include -#include "callchain.h" #include "strlist.h" -#define CALLCHAIN_PARAM_DEFAULT \ - .mode = CHAIN_GRAPH_ABS, \ - .min_percent = 0.5, \ - .order = ORDER_CALLEE, \ - .key = CCKEY_FUNCTION, \ - .value = CCVAL_PERCENT, \ - -struct callchain_param callchain_param = { - CALLCHAIN_PARAM_DEFAULT -}; - -struct callchain_param callchain_param_default = { - CALLCHAIN_PARAM_DEFAULT -}; - /* * XXX We need to find a better place for these things... */ @@ -269,28 +253,6 @@ int copyfile(const char *from, const char *to) return copyfile_mode(from, to, 0755); } -unsigned long convert_unit(unsigned long value, char *unit) -{ - *unit = ' '; - - if (value > 1000) { - value /= 1000; - *unit = 'K'; - } - - if (value > 1000) { - value /= 1000; - *unit = 'M'; - } - - if (value > 1000) { - value /= 1000; - *unit = 'G'; - } - - return value; -} - static ssize_t ion(bool is_read, int fd, void *buf, size_t n) { void *buf_start = buf; @@ -372,171 +334,6 @@ int hex2u64(const char *ptr, u64 *long_val) return p - ptr; } -/* Obtain a backtrace and print it to stdout. */ -#ifdef HAVE_BACKTRACE_SUPPORT -void dump_stack(void) -{ - void *array[16]; - size_t size = backtrace(array, ARRAY_SIZE(array)); - char **strings = backtrace_symbols(array, size); - size_t i; - - printf("Obtained %zd stack frames.\n", size); - - for (i = 0; i < size; i++) - printf("%s\n", strings[i]); - - free(strings); -} -#else -void dump_stack(void) {} -#endif - -void sighandler_dump_stack(int sig) -{ - psignal(sig, "perf"); - dump_stack(); - signal(sig, SIG_DFL); - raise(sig); -} - -int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) -{ - u64 sec = timestamp / NSEC_PER_SEC; - u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC; - - return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); -} - -unsigned long parse_tag_value(const char *str, struct parse_tag *tags) -{ - struct parse_tag *i = tags; - - while (i->tag) { - char *s; - - s = strchr(str, i->tag); - if (s) { - unsigned long int value; - char *endptr; - - value = strtoul(str, &endptr, 10); - if (s != endptr) - break; - - if (value > ULONG_MAX / i->mult) - break; - value *= i->mult; - return value; - } - i++; - } - - return (unsigned long) -1; -} - -int get_stack_size(const char *str, unsigned long *_size) -{ - char *endptr; - unsigned long size; - unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); - - size = strtoul(str, &endptr, 0); - - do { - if (*endptr) - break; - - size = round_up(size, sizeof(u64)); - if (!size || size > max_size) - break; - - *_size = size; - return 0; - - } while (0); - - pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", - max_size, str); - return -1; -} - -int parse_callchain_record(const char *arg, struct callchain_param *param) -{ - char *tok, *name, *saveptr = NULL; - char *buf; - int ret = -1; - - /* We need buffer that we know we can write to. */ - buf = malloc(strlen(arg) + 1); - if (!buf) - return -ENOMEM; - - strcpy(buf, arg); - - tok = strtok_r((char *)buf, ",", &saveptr); - name = tok ? : (char *)buf; - - do { - /* Framepointer style */ - if (!strncmp(name, "fp", sizeof("fp"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - param->record_mode = CALLCHAIN_FP; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph fp\n"); - break; - - /* Dwarf style */ - } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { - const unsigned long default_stack_dump_size = 8192; - - ret = 0; - param->record_mode = CALLCHAIN_DWARF; - param->dump_size = default_stack_dump_size; - - tok = strtok_r(NULL, ",", &saveptr); - if (tok) { - unsigned long size = 0; - - ret = get_stack_size(tok, &size); - param->dump_size = size; - } - } else if (!strncmp(name, "lbr", sizeof("lbr"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - param->record_mode = CALLCHAIN_LBR; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph lbr\n"); - break; - } else { - pr_err("callchain: Unknown --call-graph option " - "value: %s\n", arg); - break; - } - - } while (0); - - free(buf); - return ret; -} - -const char *get_filename_for_perf_kvm(void) -{ - const char *filename; - - if (perf_host && !perf_guest) - filename = strdup("perf.data.host"); - else if (!perf_host && perf_guest) - filename = strdup("perf.data.guest"); - else - filename = strdup("perf.data.kvm"); - - return filename; -} - int perf_event_paranoid(void) { int value; @@ -547,27 +344,6 @@ int perf_event_paranoid(void) return value; } -void mem_bswap_32(void *src, int byte_size) -{ - u32 *m = src; - while (byte_size > 0) { - *m = bswap_32(*m); - byte_size -= sizeof(u32); - ++m; - } -} - -void mem_bswap_64(void *src, int byte_size) -{ - u64 *m = src; - - while (byte_size > 0) { - *m = bswap_64(*m); - byte_size -= sizeof(u64); - ++m; - } -} - bool find_process(const char *name) { size_t len = strlen(name); @@ -696,7 +472,8 @@ const char *perf_tip(const char *dirpath) tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : "Tip: get more memory! ;-p"; + return errno == ENOENT ? NULL : + "Tip: check path of tips.txt or get more memory! ;-p"; if (strlist__nr_entries(tips) == 0) goto out; @@ -710,95 +487,3 @@ const char *perf_tip(const char *dirpath) return tip; } - -bool is_regular_file(const char *file) -{ - struct stat st; - - if (stat(file, &st)) - return false; - - return S_ISREG(st.st_mode); -} - -int fetch_current_timestamp(char *buf, size_t sz) -{ - struct timeval tv; - struct tm tm; - char dt[32]; - - if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) - return -1; - - if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) - return -1; - - scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); - - return 0; -} - -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra) -{ - size_t i, j, mask; - - if (!printer) - return; - - bytes_per_line = roundup_pow_of_two(bytes_per_line); - mask = bytes_per_line - 1; - - printer(BINARY_PRINT_DATA_BEGIN, 0, extra); - for (i = 0; i < len; i++) { - if ((i & mask) == 0) { - printer(BINARY_PRINT_LINE_BEGIN, -1, extra); - printer(BINARY_PRINT_ADDR, i, extra); - } - - printer(BINARY_PRINT_NUM_DATA, data[i], extra); - - if (((i & mask) == mask) || i == len - 1) { - for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_NUM_PAD, -1, extra); - - printer(BINARY_PRINT_SEP, i, extra); - for (j = i & ~mask; j <= i; j++) - printer(BINARY_PRINT_CHAR_DATA, data[j], extra); - for (j = 0; j < mask-(i & mask); j++) - printer(BINARY_PRINT_CHAR_PAD, i, extra); - printer(BINARY_PRINT_LINE_END, -1, extra); - } - } - printer(BINARY_PRINT_DATA_END, -1, extra); -} - -int is_printable_array(char *p, unsigned int len) -{ - unsigned int i; - - if (!p || !len || p[len - 1] != 0) - return 0; - - len--; - - for (i = 0; i < len; i++) { - if (!isprint(p[i]) && !isspace(p[i])) - return 0; - } - return 1; -} - -int unit_number__scnprintf(char *buf, size_t size, u64 n) -{ - char unit[4] = "BKMG"; - int i = 0; - - while (((n / 1024) > 1) && (i < 3)) { - n /= 1024; - i++; - } - - return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]); -} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c74708da857129356e2a308cd987dec34f371104..5dfb9bb6482d3103d494a9234fef50b308b3d7ef 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -1,125 +1,17 @@ #ifndef GIT_COMPAT_UTIL_H #define GIT_COMPAT_UTIL_H -#ifndef FLEX_ARRAY -/* - * See if our compiler is known to support flexible array members. - */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) -# define FLEX_ARRAY /* empty */ -#elif defined(__GNUC__) -# if (__GNUC__ >= 3) -# define FLEX_ARRAY /* empty */ -# else -# define FLEX_ARRAY 0 /* older GNU extension */ -# endif -#endif - -/* - * Otherwise, default to safer but a bit wasteful traditional style - */ -#ifndef FLEX_ARRAY -# define FLEX_ARRAY 1 -#endif -#endif - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) - -#ifdef __GNUC__ -#define TYPEOF(x) (__typeof__(x)) -#else -#define TYPEOF(x) -#endif - -#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) -#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ - -/* Approximation of the length of the decimal representation of this type. */ -#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) - #define _ALL_SOURCE 1 #define _BSD_SOURCE 1 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 -#define HAS_BOOL -#include -#include -#include -#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include "strlist.h" - -extern const char *graph_line; -extern const char *graph_dotted_line; -extern const char *spaces; -extern const char *dots; -extern char buildid_dir[]; - -/* On most systems would have given us this, but - * not on some systems (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#ifndef PRIuMAX -#define PRIuMAX "llu" -#endif - -#ifndef PRIu32 -#define PRIu32 "u" -#endif - -#ifndef PRIx32 -#define PRIx32 "x" -#endif - -#ifndef PATH_SEP -#define PATH_SEP ':' -#endif - -#ifndef STRIP_EXTENSION -#define STRIP_EXTENSION "" -#endif - -#ifndef has_dos_drive_prefix -#define has_dos_drive_prefix(path) 0 -#endif - -#ifndef is_dir_sep -#define is_dir_sep(c) ((c) == '/') -#endif #ifdef __GNUC__ #define NORETURN __attribute__((__noreturn__)) @@ -130,8 +22,6 @@ extern char buildid_dir[]; #endif #endif -#define PERF_GTK_DSO "libperf-gtk.so" - /* General helper functions */ void usage(const char *err) NORETURN; void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); @@ -140,25 +30,6 @@ void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); void set_warning_routine(void (*routine)(const char *err, va_list params)); -int prefixcmp(const char *str, const char *prefix); -void set_buildid_dir(const char *dir); - -#ifdef __GLIBC_PREREQ -#if __GLIBC_PREREQ(2, 1) -#define HAVE_STRCHRNUL -#endif -#endif - -#ifndef HAVE_STRCHRNUL -#define strchrnul gitstrchrnul -static inline char *gitstrchrnul(const char *s, int c) -{ - while (*s && *s != c) - s++; - return (char *)s; -} -#endif - static inline void *zalloc(size_t size) { return calloc(1, size); @@ -166,47 +37,8 @@ static inline void *zalloc(size_t size) #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isxdigit -#undef isalpha -#undef isprint -#undef isalnum -#undef islower -#undef isupper -#undef tolower -#undef toupper - -extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define GIT_PRINT_EXTRA 0x20 -#define GIT_PRINT 0x3E -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isxdigit(x) \ - (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isprint(x) sane_istest(x,GIT_PRINT) -#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20)) -#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20)) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} +struct dirent; +struct strlist; int mkdir_p(char *path, mode_t mode); int rm_rf(const char *path); @@ -216,112 +48,17 @@ int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); -s64 perf_atoll(const char *str); -char **argv_split(const char *str, int *argcp); -void argv_free(char **argv); -bool strglobmatch(const char *str, const char *pat); -bool strglobmatch_nocase(const char *str, const char *pat); -bool strlazymatch(const char *str, const char *pat); -static inline bool strisglob(const char *str) -{ - return strpbrk(str, "*?[") != NULL; -} -int strtailcmp(const char *s1, const char *s2); -char *strxfrchar(char *s, char from, char to); -unsigned long convert_unit(unsigned long value, char *unit); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, void *buf, size_t n); -struct perf_event_attr; - -void event_attr_init(struct perf_event_attr *attr); - -#define _STR(x) #x -#define STR(x) _STR(x) - size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); -char *ltrim(char *s); -char *rtrim(char *s); - -static inline char *trim(char *s) -{ - return ltrim(rtrim(s)); -} - -void dump_stack(void); -void sighandler_dump_stack(int sig); - extern unsigned int page_size; extern int cacheline_size; -extern int sysctl_perf_event_max_stack; -extern int sysctl_perf_event_max_contexts_per_stack; - -struct parse_tag { - char tag; - int mult; -}; - -unsigned long parse_tag_value(const char *str, struct parse_tag *tags); - -#define SRCLINE_UNKNOWN ((char *) "??:0") - -static inline int path__join(char *bf, size_t size, - const char *path1, const char *path2) -{ - return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2); -} - -static inline int path__join3(char *bf, size_t size, - const char *path1, const char *path2, - const char *path3) -{ - return scnprintf(bf, size, "%s%s%s%s%s", - path1, path1[0] ? "/" : "", - path2, path2[0] ? "/" : "", path3); -} - -struct dso; -struct symbol; -extern bool srcline_full_filename; -char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym); -char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool unwind_inlines); -void free_srcline(char *srcline); - -int perf_event_paranoid(void); - -void mem_bswap_64(void *src, int byte_size); -void mem_bswap_32(void *src, int byte_size); - -const char *get_filename_for_perf_kvm(void); bool find_process(const char *name); -#ifdef HAVE_ZLIB_SUPPORT -int gzip_decompress_to_file(const char *input, int output_fd); -#endif - -#ifdef HAVE_LZMA_SUPPORT -int lzma_decompress_to_file(const char *input, int output_fd); -#endif - -char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); - -static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) -{ - return asprintf_expr_inout_ints(var, true, nints, ints); -} - -static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) -{ - return asprintf_expr_inout_ints(var, false, nints, ints); -} - -int get_stack_size(const char *str, unsigned long *_size); - int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) @@ -331,37 +68,9 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) const char *perf_tip(const char *dirpath); -bool is_regular_file(const char *file); -int fetch_current_timestamp(char *buf, size_t sz); -enum binary_printer_ops { - BINARY_PRINT_DATA_BEGIN, - BINARY_PRINT_LINE_BEGIN, - BINARY_PRINT_ADDR, - BINARY_PRINT_NUM_DATA, - BINARY_PRINT_NUM_PAD, - BINARY_PRINT_SEP, - BINARY_PRINT_CHAR_DATA, - BINARY_PRINT_CHAR_PAD, - BINARY_PRINT_LINE_END, - BINARY_PRINT_DATA_END, -}; - -typedef void (*print_binary_t)(enum binary_printer_ops, - unsigned int val, - void *extra); - -void print_binary(unsigned char *data, size_t len, - size_t bytes_per_line, print_binary_t printer, - void *extra); - -#if !defined(__GLIBC__) && !defined(__ANDROID__) -extern int sched_getcpu(void); +#ifndef HAVE_SCHED_GETCPU_SUPPORT +int sched_getcpu(void); #endif -int is_printable_array(char *p, unsigned int len); - -int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); - -int unit_number__scnprintf(char *buf, size_t size, u64 n); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 5074be4ed467bcad894ccb2ed9936d593f12907f..5de2e15e2eda9c343246431caf0ba68d9559e614 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -1,4 +1,7 @@ +#include +#include #include +#include #include "util.h" #include "values.h" @@ -108,24 +111,45 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, return i; } -static void perf_read_values__enlarge_counters(struct perf_read_values *values) +static int perf_read_values__enlarge_counters(struct perf_read_values *values) { - int i; + char **countername; + int i, counters_max = values->counters_max * 2; + u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid)); + + if (!counterrawid) { + pr_debug("failed to enlarge read_values rawid array"); + goto out_enomem; + } - values->counters_max *= 2; - values->counterrawid = realloc(values->counterrawid, - values->counters_max * sizeof(*values->counterrawid)); - values->countername = realloc(values->countername, - values->counters_max * sizeof(*values->countername)); - if (!values->counterrawid || !values->countername) - die("failed to enlarge read_values counters arrays"); + countername = realloc(values->countername, counters_max * sizeof(*values->countername)); + if (!countername) { + pr_debug("failed to enlarge read_values rawid array"); + goto out_free_rawid; + } for (i = 0; i < values->threads; i++) { - values->value[i] = realloc(values->value[i], - values->counters_max * sizeof(**values->value)); - if (!values->value[i]) - die("failed to enlarge read_values counters arrays"); + u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); + + if (value) { + pr_debug("failed to enlarge read_values ->values array"); + goto out_free_name; + } + + values->value[i] = value; } + + values->counters_max = counters_max; + values->counterrawid = counterrawid; + values->countername = countername; + + return 0; +out_free_name: + free(countername); +out_free_rawid: + free(counterrawid); +out_enomem: + return -ENOMEM; } static int perf_read_values__findnew_counter(struct perf_read_values *values, @@ -137,8 +161,11 @@ static int perf_read_values__findnew_counter(struct perf_read_values *values, if (values->counterrawid[i] == rawid) return i; - if (values->counters == values->counters_max) - perf_read_values__enlarge_counters(values); + if (values->counters == values->counters_max) { + i = perf_read_values__enlarge_counters(values); + if (i) + return i; + } i = values->counters++; values->counterrawid[i] = rawid; @@ -172,8 +199,10 @@ static void perf_read_values__display_pretty(FILE *fp, int *counterwidth; counterwidth = malloc(values->counters * sizeof(*counterwidth)); - if (!counterwidth) - die("failed to allocate counterwidth array"); + if (!counterwidth) { + fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n"); + return; + } tidwidth = 3; pidwidth = 3; for (j = 0; j < values->counters; j++) diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 7bdcad484225f13ed96ff07eb355a45acb997ac5..d3c39eec89a85b64f724bf27466c3dabc15a1029 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -1,4 +1,4 @@ - +#include #include #include #include diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c index c10ba41ef3f6298eb77e624f7f1b14f41555c36c..7251fdbabceda5e0bb88e4b9ae83f501ea9ad517 100644 --- a/tools/perf/util/xyarray.c +++ b/tools/perf/util/xyarray.c @@ -1,5 +1,7 @@ #include "xyarray.h" #include "util.h" +#include +#include struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) { diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 495a449fc25c35456c69f5e235fa8cbaff11d41c..1329d843eb7b4ef490517a8d631054151575106d 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -4,6 +4,7 @@ #include #include +#include "util/compress.h" #include "util/util.h" #include "util/debug.h" diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cba3d994995c69e28be42ef192ae9cf06b3fbe96 --- /dev/null +++ b/tools/power/acpi/.gitignore @@ -0,0 +1,4 @@ +acpidbg +acpidump +ec +include diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4d0ccc89e6c67b7a56b2fe5c2f4fb14e1e42e9a0 --- /dev/null +++ b/tools/power/pm-graph/Makefile @@ -0,0 +1,28 @@ +PREFIX ?= /usr +DESTDIR ?= + +all: + @echo "Nothing to build" + +install : + install -d $(DESTDIR)$(PREFIX)/lib/pm-graph + install analyze_suspend.py $(DESTDIR)$(PREFIX)/lib/pm-graph + install analyze_boot.py $(DESTDIR)$(PREFIX)/lib/pm-graph + + ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py $(DESTDIR)$(PREFIX)/bin/bootgraph + ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py $(DESTDIR)$(PREFIX)/bin/sleepgraph + + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 + install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 + +uninstall : + rm $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8 + rm $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8 + + rm $(DESTDIR)$(PREFIX)/bin/bootgraph + rm $(DESTDIR)$(PREFIX)/bin/sleepgraph + + rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py + rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py + rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph diff --git a/tools/power/pm-graph/analyze_boot.py b/tools/power/pm-graph/analyze_boot.py new file mode 100755 index 0000000000000000000000000000000000000000..3e1dcbbf1adcfec240ef238f76f7126eac61b9ab --- /dev/null +++ b/tools/power/pm-graph/analyze_boot.py @@ -0,0 +1,824 @@ +#!/usr/bin/python +# +# Tool for analyzing boot timing +# Copyright (c) 2013, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Authors: +# Todd Brandt +# +# Description: +# This tool is designed to assist kernel and OS developers in optimizing +# their linux stack's boot time. It creates an html representation of +# the kernel boot timeline up to the start of the init process. +# + +# ----------------- LIBRARIES -------------------- + +import sys +import time +import os +import string +import re +import platform +import shutil +from datetime import datetime, timedelta +from subprocess import call, Popen, PIPE +import analyze_suspend as aslib + +# ----------------- CLASSES -------------------- + +# Class: SystemValues +# Description: +# A global, single-instance container used to +# store system values and test parameters +class SystemValues(aslib.SystemValues): + title = 'BootGraph' + version = 2.0 + hostname = 'localhost' + testtime = '' + kernel = '' + dmesgfile = '' + ftracefile = '' + htmlfile = 'bootgraph.html' + outfile = '' + phoronix = False + addlogs = False + useftrace = False + usedevsrc = True + suspendmode = 'boot' + max_graph_depth = 2 + graph_filter = 'do_one_initcall' + reboot = False + manual = False + iscronjob = False + timeformat = '%.6f' + def __init__(self): + if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ): + self.phoronix = True + self.addlogs = True + self.outfile = os.environ['LOG_FILE'] + self.htmlfile = os.environ['LOG_FILE'] + self.hostname = platform.node() + self.testtime = datetime.now().strftime('%Y-%m-%d_%H:%M:%S') + if os.path.exists('/proc/version'): + fp = open('/proc/version', 'r') + val = fp.read().strip() + fp.close() + self.kernel = self.kernelVersion(val) + else: + self.kernel = 'unknown' + def kernelVersion(self, msg): + return msg.split()[2] + def kernelParams(self): + cmdline = 'initcall_debug log_buf_len=32M' + if self.useftrace: + cmdline += ' trace_buf_size=128M trace_clock=global '\ + 'trace_options=nooverwrite,funcgraph-abstime,funcgraph-cpu,'\ + 'funcgraph-duration,funcgraph-proc,funcgraph-tail,'\ + 'nofuncgraph-overhead,context-info,graph-time '\ + 'ftrace=function_graph '\ + 'ftrace_graph_max_depth=%d '\ + 'ftrace_graph_filter=%s' % \ + (self.max_graph_depth, self.graph_filter) + return cmdline + def setGraphFilter(self, val): + fp = open(self.tpath+'available_filter_functions') + master = fp.read().split('\n') + fp.close() + for i in val.split(','): + func = i.strip() + if func not in master: + doError('function "%s" not available for ftrace' % func) + self.graph_filter = val + def cronjobCmdString(self): + cmdline = '%s -cronjob' % os.path.abspath(sys.argv[0]) + args = iter(sys.argv[1:]) + for arg in args: + if arg in ['-h', '-v', '-cronjob', '-reboot']: + continue + elif arg in ['-o', '-dmesg', '-ftrace', '-filter']: + args.next() + continue + cmdline += ' '+arg + if self.graph_filter != 'do_one_initcall': + cmdline += ' -filter "%s"' % self.graph_filter + cmdline += ' -o "%s"' % os.path.abspath(self.htmlfile) + return cmdline + def manualRebootRequired(self): + cmdline = self.kernelParams() + print 'To generate a new timeline manually, follow these steps:\n' + print '1. Add the CMDLINE string to your kernel command line.' + print '2. Reboot the system.' + print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n' + print 'CMDLINE="%s"' % cmdline + sys.exit() + +sysvals = SystemValues() + +# Class: Data +# Description: +# The primary container for test data. +class Data(aslib.Data): + dmesg = {} # root data structure + start = 0.0 # test start + end = 0.0 # test end + dmesgtext = [] # dmesg text file in memory + testnumber = 0 + idstr = '' + html_device_id = 0 + valid = False + initstart = 0.0 + boottime = '' + phases = ['boot'] + do_one_initcall = False + def __init__(self, num): + self.testnumber = num + self.idstr = 'a' + self.dmesgtext = [] + self.dmesg = { + 'boot': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, 'color': '#dddddd'} + } + def deviceTopology(self): + return '' + def newAction(self, phase, name, start, end, ret, ulen): + # new device callback for a specific phase + self.html_device_id += 1 + devid = '%s%d' % (self.idstr, self.html_device_id) + list = self.dmesg[phase]['list'] + length = -1.0 + if(start >= 0 and end >= 0): + length = end - start + i = 2 + origname = name + while(name in list): + name = '%s[%d]' % (origname, i) + i += 1 + list[name] = {'name': name, 'start': start, 'end': end, + 'pid': 0, 'length': length, 'row': 0, 'id': devid, + 'ret': ret, 'ulen': ulen } + return name + def deviceMatch(self, cg): + if cg.end - cg.start == 0: + return True + list = self.dmesg['boot']['list'] + for devname in list: + dev = list[devname] + if cg.name == 'do_one_initcall': + if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0): + dev['ftrace'] = cg + self.do_one_initcall = True + return True + else: + if(cg.start > dev['start'] and cg.end < dev['end']): + if 'ftraces' not in dev: + dev['ftraces'] = [] + dev['ftraces'].append(cg) + return True + return False + +# ----------------- FUNCTIONS -------------------- + +# Function: loadKernelLog +# Description: +# Load a raw kernel log from dmesg +def loadKernelLog(): + data = Data(0) + data.dmesg['boot']['start'] = data.start = ktime = 0.0 + sysvals.stamp = { + 'time': datetime.now().strftime('%B %d %Y, %I:%M:%S %p'), + 'host': sysvals.hostname, + 'mode': 'boot', 'kernel': ''} + + devtemp = dict() + if(sysvals.dmesgfile): + lf = open(sysvals.dmesgfile, 'r') + else: + lf = Popen('dmesg', stdout=PIPE).stdout + for line in lf: + line = line.replace('\r\n', '') + idx = line.find('[') + if idx > 1: + line = line[idx:] + m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + if(not m): + continue + ktime = float(m.group('ktime')) + if(ktime > 120): + break + msg = m.group('msg') + data.end = data.initstart = ktime + data.dmesgtext.append(line) + if(ktime == 0.0 and re.match('^Linux version .*', msg)): + if(not sysvals.stamp['kernel']): + sysvals.stamp['kernel'] = sysvals.kernelVersion(msg) + continue + m = re.match('.* setting system clock to (?P.*) UTC.*', msg) + if(m): + bt = datetime.strptime(m.group('t'), '%Y-%m-%d %H:%M:%S') + bt = bt - timedelta(seconds=int(ktime)) + data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S') + sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p') + continue + m = re.match('^calling *(?P.*)\+.*', msg) + if(m): + devtemp[m.group('f')] = ktime + continue + m = re.match('^initcall *(?P.*)\+.* returned (?P.*) after (?P.*) usecs', msg) + if(m): + data.valid = True + f, r, t = m.group('f', 'r', 't') + if(f in devtemp): + data.newAction('boot', f, devtemp[f], ktime, int(r), int(t)) + data.end = ktime + del devtemp[f] + continue + if(re.match('^Freeing unused kernel memory.*', msg)): + break + + data.dmesg['boot']['end'] = data.end + lf.close() + return data + +# Function: loadTraceLog +# Description: +# Check if trace is available and copy to a temp file +def loadTraceLog(data): + # load the data to a temp file if none given + if not sysvals.ftracefile: + lib = aslib.sysvals + aslib.rootCheck(True) + if not lib.verifyFtrace(): + doError('ftrace not available') + if lib.fgetVal('current_tracer').strip() != 'function_graph': + doError('ftrace not configured for a boot callgraph') + sysvals.ftracefile = '/tmp/boot_ftrace.%s.txt' % os.getpid() + call('cat '+lib.tpath+'trace > '+sysvals.ftracefile, shell=True) + if not sysvals.ftracefile: + doError('No trace data available') + + # parse the trace log + ftemp = dict() + tp = aslib.TestProps() + tp.setTracerType('function_graph') + tf = open(sysvals.ftracefile, 'r') + for line in tf: + if line[0] == '#': + continue + m = re.match(tp.ftrace_line_fmt, line.strip()) + if(not m): + continue + m_time, m_proc, m_pid, m_msg, m_dur = \ + m.group('time', 'proc', 'pid', 'msg', 'dur') + if float(m_time) > data.end: + break + if(m_time and m_pid and m_msg): + t = aslib.FTraceLine(m_time, m_msg, m_dur) + pid = int(m_pid) + else: + continue + if t.fevent or t.fkprobe: + continue + key = (m_proc, pid) + if(key not in ftemp): + ftemp[key] = [] + ftemp[key].append(aslib.FTraceCallGraph(pid)) + cg = ftemp[key][-1] + if(cg.addLine(t)): + ftemp[key].append(aslib.FTraceCallGraph(pid)) + tf.close() + + # add the callgraph data to the device hierarchy + for key in ftemp: + proc, pid = key + for cg in ftemp[key]: + if len(cg.list) < 1 or cg.invalid: + continue + if(not cg.postProcess()): + print('Sanity check failed for %s-%d' % (proc, pid)) + continue + # match cg data to devices + if not data.deviceMatch(cg): + print ' BAD: %s %s-%d [%f - %f]' % (cg.name, proc, pid, cg.start, cg.end) + +# Function: colorForName +# Description: +# Generate a repeatable color from a list for a given name +def colorForName(name): + list = [ + ('c1', '#ec9999'), + ('c2', '#ffc1a6'), + ('c3', '#fff0a6'), + ('c4', '#adf199'), + ('c5', '#9fadea'), + ('c6', '#a699c1'), + ('c7', '#ad99b4'), + ('c8', '#eaffea'), + ('c9', '#dcecfb'), + ('c10', '#ffffea') + ] + i = 0 + total = 0 + count = len(list) + while i < len(name): + total += ord(name[i]) + i += 1 + return list[total % count] + +def cgOverview(cg, minlen): + stats = dict() + large = [] + for l in cg.list: + if l.fcall and l.depth == 1: + if l.length >= minlen: + large.append(l) + if l.name not in stats: + stats[l.name] = [0, 0.0] + stats[l.name][0] += (l.length * 1000.0) + stats[l.name][1] += 1 + return (large, stats) + +# Function: createBootGraph +# Description: +# Create the output html file from the resident test data +# Arguments: +# testruns: array of Data objects from parseKernelLog or parseTraceLog +# Output: +# True if the html file was created, false if it failed +def createBootGraph(data, embedded): + # html function templates + html_srccall = '

    {0}
    \n' + html_timetotal = '\n'\ + ''\ + '\n
    Time from Kernel Boot to start of User Mode: {0} ms
    \n' + + # device timeline + devtl = aslib.Timeline(100, 20) + + # write the test title and general info header + devtl.createHeader(sysvals, 'noftrace') + + # Generate the header for this timeline + t0 = data.start + tMax = data.end + tTotal = tMax - t0 + if(tTotal == 0): + print('ERROR: No timeline data') + return False + boot_time = '%.0f'%(tTotal*1000) + devtl.html += html_timetotal.format(boot_time) + + # determine the maximum number of rows we need to draw + phase = 'boot' + list = data.dmesg[phase]['list'] + devlist = [] + for devname in list: + d = aslib.DevItem(0, phase, list[devname]) + devlist.append(d) + devtl.getPhaseRows(devlist) + devtl.calcTotalRows() + + # draw the timeline background + devtl.createZoomBox() + boot = data.dmesg[phase] + length = boot['end']-boot['start'] + left = '%.3f' % (((boot['start']-t0)*100.0)/tTotal) + width = '%.3f' % ((length*100.0)/tTotal) + devtl.html += devtl.html_tblock.format(phase, left, width, devtl.scaleH) + devtl.html += devtl.html_phase.format('0', '100', \ + '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \ + 'white', '') + + # draw the device timeline + num = 0 + devstats = dict() + for devname in sorted(list): + cls, color = colorForName(devname) + dev = list[devname] + info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0, + dev['ulen']/1000.0, dev['ret']) + devstats[dev['id']] = {'info':info} + dev['color'] = color + height = devtl.phaseRowHeight(0, phase, dev['row']) + top = '%.6f' % ((dev['row']*height) + devtl.scaleH) + left = '%.6f' % (((dev['start']-t0)*100)/tTotal) + width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal) + length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000) + devtl.html += devtl.html_device.format(dev['id'], + devname+length+'kernel_mode', left, top, '%.3f'%height, + width, devname, ' '+cls, '') + rowtop = devtl.phaseRowTop(0, phase, dev['row']) + height = '%.6f' % (devtl.rowH / 2) + top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2)) + if data.do_one_initcall: + if('ftrace' not in dev): + continue + cg = dev['ftrace'] + large, stats = cgOverview(cg, 0.001) + devstats[dev['id']]['fstat'] = stats + for l in large: + left = '%f' % (((l.time-t0)*100)/tTotal) + width = '%f' % (l.length*100/tTotal) + title = '%s (%0.3fms)' % (l.name, l.length * 1000.0) + devtl.html += html_srccall.format(l.name, left, + top, height, width, title, 'x%d'%num) + num += 1 + continue + if('ftraces' not in dev): + continue + for cg in dev['ftraces']: + left = '%f' % (((cg.start-t0)*100)/tTotal) + width = '%f' % ((cg.end-cg.start)*100/tTotal) + cglen = (cg.end - cg.start) * 1000.0 + title = '%s (%0.3fms)' % (cg.name, cglen) + cg.id = 'x%d' % num + devtl.html += html_srccall.format(cg.name, left, + top, height, width, title, dev['id']+cg.id) + num += 1 + + # draw the time scale, try to make the number of labels readable + devtl.createTimeScale(t0, tMax, tTotal, phase) + devtl.html += '\n' + + # timeline is finished + devtl.html += '\n\n' + + if(sysvals.outfile == sysvals.htmlfile): + hf = open(sysvals.htmlfile, 'a') + else: + hf = open(sysvals.htmlfile, 'w') + + # add the css if this is not an embedded run + extra = '\ + .c1 {background:rgba(209,0,0,0.4);}\n\ + .c2 {background:rgba(255,102,34,0.4);}\n\ + .c3 {background:rgba(255,218,33,0.4);}\n\ + .c4 {background:rgba(51,221,0,0.4);}\n\ + .c5 {background:rgba(17,51,204,0.4);}\n\ + .c6 {background:rgba(34,0,102,0.4);}\n\ + .c7 {background:rgba(51,0,68,0.4);}\n\ + .c8 {background:rgba(204,255,204,0.4);}\n\ + .c9 {background:rgba(169,208,245,0.4);}\n\ + .c10 {background:rgba(255,255,204,0.4);}\n\ + .vt {transform:rotate(-60deg);transform-origin:0 0;}\n\ + table.fstat {table-layout:fixed;padding:150px 15px 0 0;font-size:10px;column-width:30px;}\n\ + .fstat th {width:55px;}\n\ + .fstat td {text-align:left;width:35px;}\n\ + .srccall {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\ + .srccall:hover {color:white;font-weight:bold;border:1px solid white;}\n' + if(not embedded): + aslib.addCSS(hf, sysvals, 1, False, extra) + + # write the device timeline + hf.write(devtl.html) + + # add boot specific html + statinfo = 'var devstats = {\n' + for n in sorted(devstats): + statinfo += '\t"%s": [\n\t\t"%s",\n' % (n, devstats[n]['info']) + if 'fstat' in devstats[n]: + funcs = devstats[n]['fstat'] + for f in sorted(funcs, key=funcs.get, reverse=True): + if funcs[f][0] < 0.01 and len(funcs) > 10: + break + statinfo += '\t\t"%f|%s|%d",\n' % (funcs[f][0], f, funcs[f][1]) + statinfo += '\t],\n' + statinfo += '};\n' + html = \ + '
    \n'\ + '\n'\ + '\n' + hf.write(html) + + # add the callgraph html + if(sysvals.usecallgraph): + aslib.addCallgraphs(sysvals, hf, data) + + # add the dmesg log as a hidden div + if sysvals.addlogs: + hf.write('\n') + + if(not embedded): + # write the footer and close + aslib.addScriptCode(hf, [data]) + hf.write('\n\n') + else: + # embedded out will be loaded in a page, skip the js + hf.write('' % \ + (data.start*1000, data.initstart*1000)) + hf.close() + return True + +# Function: updateCron +# Description: +# (restore=False) Set the tool to run automatically on reboot +# (restore=True) Restore the original crontab +def updateCron(restore=False): + if not restore: + sysvals.rootUser(True) + crondir = '/var/spool/cron/crontabs/' + cronfile = crondir+'root' + backfile = crondir+'root-analyze_boot-backup' + if not os.path.exists(crondir): + doError('%s not found' % crondir) + out = Popen(['which', 'crontab'], stdout=PIPE).stdout.read() + if not out: + doError('crontab not found') + # on restore: move the backup cron back into place + if restore: + if os.path.exists(backfile): + shutil.move(backfile, cronfile) + return + # backup current cron and install new one with reboot + if os.path.exists(cronfile): + shutil.move(cronfile, backfile) + else: + fp = open(backfile, 'w') + fp.close() + res = -1 + try: + fp = open(backfile, 'r') + op = open(cronfile, 'w') + for line in fp: + if '@reboot' not in line: + op.write(line) + continue + fp.close() + op.write('@reboot python %s\n' % sysvals.cronjobCmdString()) + op.close() + res = call('crontab %s' % cronfile, shell=True) + except Exception, e: + print 'Exception: %s' % str(e) + shutil.move(backfile, cronfile) + res = -1 + if res != 0: + doError('crontab failed') + +# Function: updateGrub +# Description: +# update grub.cfg for all kernels with our parameters +def updateGrub(restore=False): + # call update-grub on restore + if restore: + try: + call(['update-grub'], stderr=PIPE, stdout=PIPE, + env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'}) + except Exception, e: + print 'Exception: %s\n' % str(e) + return + # verify we can do this + sysvals.rootUser(True) + grubfile = '/etc/default/grub' + if not os.path.exists(grubfile): + print 'ERROR: Unable to set the kernel parameters via grub.\n' + sysvals.manualRebootRequired() + out = Popen(['which', 'update-grub'], stdout=PIPE).stdout.read() + if not out: + print 'ERROR: Unable to set the kernel parameters via grub.\n' + sysvals.manualRebootRequired() + + # extract the option and create a grub config without it + tgtopt = 'GRUB_CMDLINE_LINUX_DEFAULT' + cmdline = '' + tempfile = '/etc/default/grub.analyze_boot' + shutil.move(grubfile, tempfile) + res = -1 + try: + fp = open(tempfile, 'r') + op = open(grubfile, 'w') + cont = False + for line in fp: + line = line.strip() + if len(line) == 0 or line[0] == '#': + continue + opt = line.split('=')[0].strip() + if opt == tgtopt: + cmdline = line.split('=', 1)[1].strip('\\') + if line[-1] == '\\': + cont = True + elif cont: + cmdline += line.strip('\\') + if line[-1] != '\\': + cont = False + else: + op.write('%s\n' % line) + fp.close() + # if the target option value is in quotes, strip them + sp = '"' + val = cmdline.strip() + if val[0] == '\'' or val[0] == '"': + sp = val[0] + val = val.strip(sp) + cmdline = val + # append our cmd line options + if len(cmdline) > 0: + cmdline += ' ' + cmdline += sysvals.kernelParams() + # write out the updated target option + op.write('\n%s=%s%s%s\n' % (tgtopt, sp, cmdline, sp)) + op.close() + res = call('update-grub') + os.remove(grubfile) + except Exception, e: + print 'Exception: %s' % str(e) + res = -1 + # cleanup + shutil.move(tempfile, grubfile) + if res != 0: + doError('update-grub failed') + +# Function: doError +# Description: +# generic error function for catastrphic failures +# Arguments: +# msg: the error message to print +# help: True if printHelp should be called after, False otherwise +def doError(msg, help=False): + if help == True: + printHelp() + print 'ERROR: %s\n' % msg + sys.exit() + +# Function: printHelp +# Description: +# print out the help text +def printHelp(): + print('') + print('%s v%.1f' % (sysvals.title, sysvals.version)) + print('Usage: bootgraph ') + print('') + print('Description:') + print(' This tool reads in a dmesg log of linux kernel boot and') + print(' creates an html representation of the boot timeline up to') + print(' the start of the init process.') + print('') + print(' If no specific command is given the tool reads the current dmesg') + print(' and/or ftrace log and outputs bootgraph.html') + print('') + print('Options:') + print(' -h Print this help text') + print(' -v Print the current tool version') + print(' -addlogs Add the dmesg log to the html output') + print(' -o file Html timeline name (default: bootgraph.html)') + print(' [advanced]') + print(' -f Use ftrace to add function detail (default: disabled)') + print(' -callgraph Add callgraph detail, can be very large (default: disabled)') + print(' -maxdepth N limit the callgraph data to N call levels (default: 2)') + print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)') + print(' -timeprec N Number of significant digits in timestamps (0:S, 3:ms, [6:us])') + print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)') + print(' -filter list Limit ftrace to comma-delimited list of functions (default: do_one_initcall)') + print(' [commands]') + print(' -reboot Reboot the machine automatically and generate a new timeline') + print(' -manual Show the requirements to generate a new timeline manually') + print(' -dmesg file Load a stored dmesg file (used with -ftrace)') + print(' -ftrace file Load a stored ftrace file (used with -dmesg)') + print(' -flistall Print all functions capable of being captured in ftrace') + print('') + return True + +# ----------------- MAIN -------------------- +# exec start (skipped if script is loaded as library) +if __name__ == '__main__': + # loop through the command line arguments + cmd = '' + simplecmds = ['-updategrub', '-flistall'] + args = iter(sys.argv[1:]) + for arg in args: + if(arg == '-h'): + printHelp() + sys.exit() + elif(arg == '-v'): + print("Version %.1f" % sysvals.version) + sys.exit() + elif(arg in simplecmds): + cmd = arg[1:] + elif(arg == '-f'): + sysvals.useftrace = True + elif(arg == '-callgraph'): + sysvals.useftrace = True + sysvals.usecallgraph = True + elif(arg == '-mincg'): + sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0) + elif(arg == '-timeprec'): + sysvals.setPrecision(aslib.getArgInt('-timeprec', args, 0, 6)) + elif(arg == '-maxdepth'): + sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000) + elif(arg == '-filter'): + try: + val = args.next() + except: + doError('No filter functions supplied', True) + aslib.rootCheck(True) + sysvals.setGraphFilter(val) + elif(arg == '-ftrace'): + try: + val = args.next() + except: + doError('No ftrace file supplied', True) + if(os.path.exists(val) == False): + doError('%s does not exist' % val) + sysvals.ftracefile = val + elif(arg == '-addlogs'): + sysvals.addlogs = True + elif(arg == '-expandcg'): + sysvals.cgexp = True + elif(arg == '-dmesg'): + try: + val = args.next() + except: + doError('No dmesg file supplied', True) + if(os.path.exists(val) == False): + doError('%s does not exist' % val) + if(sysvals.htmlfile == val or sysvals.outfile == val): + doError('Output filename collision') + sysvals.dmesgfile = val + elif(arg == '-o'): + try: + val = args.next() + except: + doError('No HTML filename supplied', True) + if(sysvals.dmesgfile == val or sysvals.ftracefile == val): + doError('Output filename collision') + sysvals.htmlfile = val + elif(arg == '-reboot'): + if sysvals.iscronjob: + doError('-reboot and -cronjob are incompatible') + sysvals.reboot = True + elif(arg == '-manual'): + sysvals.reboot = True + sysvals.manual = True + # remaining options are only for cron job use + elif(arg == '-cronjob'): + sysvals.iscronjob = True + if sysvals.reboot: + doError('-reboot and -cronjob are incompatible') + else: + doError('Invalid argument: '+arg, True) + + if cmd != '': + if cmd == 'updategrub': + updateGrub() + elif cmd == 'flistall': + sysvals.getFtraceFilterFunctions(False) + sys.exit() + + # update grub, setup a cronjob, and reboot + if sysvals.reboot: + if not sysvals.manual: + updateGrub() + updateCron() + call('reboot') + else: + sysvals.manualRebootRequired() + sys.exit() + + # disable the cronjob + if sysvals.iscronjob: + updateCron(True) + updateGrub(True) + + data = loadKernelLog() + if sysvals.useftrace: + loadTraceLog(data) + if sysvals.iscronjob: + try: + sysvals.fsetVal('0', 'tracing_on') + except: + pass + + if(sysvals.outfile and sysvals.phoronix): + fp = open(sysvals.outfile, 'w') + fp.write('pass %s initstart %.3f end %.3f boot %s\n' % + (data.valid, data.initstart*1000, data.end*1000, data.boottime)) + fp.close() + if(not data.valid): + if sysvals.dmesgfile: + doError('No initcall data found in %s' % sysvals.dmesgfile) + else: + doError('No initcall data found, is initcall_debug enabled?') + + print(' Host: %s' % sysvals.hostname) + print(' Test time: %s' % sysvals.testtime) + print(' Boot time: %s' % data.boottime) + print('Kernel Version: %s' % sysvals.kernel) + print(' Kernel start: %.3f' % (data.start * 1000)) + print(' init start: %.3f' % (data.initstart * 1000)) + + createBootGraph(data, sysvals.phoronix) diff --git a/scripts/analyze_suspend.py b/tools/power/pm-graph/analyze_suspend.py similarity index 91% rename from scripts/analyze_suspend.py rename to tools/power/pm-graph/analyze_suspend.py index 20cdb2bc1daec4fc1205c71c38c8d082f0602863..a9206e67fc1f1b0385b08323fbf36ebcb05e910c 100755 --- a/scripts/analyze_suspend.py +++ b/tools/power/pm-graph/analyze_suspend.py @@ -12,10 +12,6 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. -# # Authors: # Todd Brandt # @@ -23,7 +19,7 @@ # Home Page # https://01.org/suspendresume # Source repo -# https://github.com/01org/suspendresume +# https://github.com/01org/pm-graph # # Description: # This tool is designed to assist kernel and OS developers in optimizing @@ -71,14 +67,16 @@ from subprocess import call, Popen, PIPE # A global, single-instance container used to # store system values and test parameters class SystemValues: + title = 'SleepGraph' + version = '4.6' ansi = False - version = '4.5' verbose = False addlogs = False mindevlen = 0.0 mincglen = 0.0 cgphase = '' cgtest = -1 + max_graph_depth = 0 callloopmaxgap = 0.0001 callloopmaxlen = 0.005 srgap = 0 @@ -106,8 +104,8 @@ class SystemValues: ftracefile = '' htmlfile = '' embedded = False - rtcwake = False - rtcwaketime = 10 + rtcwake = True + rtcwaketime = 15 rtcpath = '' devicefilter = [] stamp = 0 @@ -235,6 +233,12 @@ class SystemValues: self.rtcpath = rtc if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()): self.ansi = True + def rootUser(self, fatal=False): + if 'USER' in os.environ and os.environ['USER'] == 'root': + return True + if fatal: + doError('This command must be run as root') + return False def setPrecision(self, num): if num < 0 or num > 6: return @@ -564,7 +568,7 @@ class SystemValues: self.fsetVal('global', 'trace_clock') # set trace buffer to a huge value self.fsetVal('nop', 'current_tracer') - self.fsetVal('100000', 'buffer_size_kb') + self.fsetVal('131073', 'buffer_size_kb') # go no further if this is just a status check if testing: return @@ -583,7 +587,7 @@ class SystemValues: self.fsetVal('nofuncgraph-overhead', 'trace_options') self.fsetVal('context-info', 'trace_options') self.fsetVal('graph-time', 'trace_options') - self.fsetVal('0', 'max_graph_depth') + self.fsetVal('%d' % self.max_graph_depth, 'max_graph_depth') cf = ['dpm_run_callback'] if(self.usetraceeventsonly): cf += ['dpm_prepare', 'dpm_complete'] @@ -639,6 +643,12 @@ class SystemValues: return '\x1B[%d;40m%s\x1B[m' % (color, str) sysvals = SystemValues() +suspendmodename = { + 'freeze': 'Freeze (S0)', + 'standby': 'Standby (S1)', + 'mem': 'Suspend (S3)', + 'disk': 'Hibernate (S4)' +} # Class: DevProps # Description: @@ -1013,6 +1023,8 @@ class Data: tmp = dict() for devname in list: dev = list[devname] + if dev['length'] == 0: + continue tmp[dev['start']] = devname for t in sorted(tmp): slist.append(tmp[t]) @@ -1477,12 +1489,14 @@ class FTraceLine: # Each instance is tied to a single device in a single phase, and is # comprised of an ordered list of FTraceLine objects class FTraceCallGraph: + id = '' start = -1.0 end = -1.0 list = [] invalid = False depth = 0 pid = 0 + name = '' def __init__(self, pid): self.start = -1.0 self.end = -1.0 @@ -1631,9 +1645,17 @@ class FTraceCallGraph: return True return False def postProcess(self, debug=False): + if len(self.list) > 0: + self.name = self.list[0].name stack = dict() cnt = 0 + last = 0 for l in self.list: + # ftrace bug: reported duration is not reliable + # check each leaf and clip it at max possible length + if(last and last.freturn and last.fcall): + if last.length > l.time - last.time: + last.length = l.time - last.time if(l.fcall and not l.freturn): stack[l.depth] = l cnt += 1 @@ -1643,11 +1665,12 @@ class FTraceCallGraph: print 'Post Process Error: Depth missing' l.debugPrint() return False - # transfer total time from return line to call line - stack[l.depth].length = l.length + # calculate call length from call/return lines + stack[l.depth].length = l.time - stack[l.depth].time stack.pop(l.depth) l.length = 0 cnt -= 1 + last = l if(cnt == 0): # trace caught the whole call tree return True @@ -1664,8 +1687,8 @@ class FTraceCallGraph: 'dpm_prepare': 'suspend_prepare', 'dpm_complete': 'resume_complete' } - if(self.list[0].name in borderphase): - p = borderphase[self.list[0].name] + if(self.name in borderphase): + p = borderphase[self.name] list = data.dmesg[p]['list'] for devname in list: dev = list[devname] @@ -1690,7 +1713,7 @@ class FTraceCallGraph: break return found def newActionFromFunction(self, data): - name = self.list[0].name + name = self.name if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']: return fs = self.start @@ -1710,7 +1733,7 @@ class FTraceCallGraph: phase, myname = out data.dmesg[phase]['list'][myname]['ftrace'] = self def debugPrint(self): - print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid) + print('[%f - %f] %s (%d)') % (self.start, self.end, self.name, self.pid) for l in self.list: if(l.freturn and l.fcall): print('%f (%02d): %s(); (%.3f us)' % (l.time, \ @@ -1738,7 +1761,7 @@ class DevItem: # A container for a device timeline which calculates # all the html properties to display it correctly class Timeline: - html = {} + html = '' height = 0 # total timeline height scaleH = 20 # timescale (top) row height rowH = 30 # device row height @@ -1746,14 +1769,28 @@ class Timeline: rows = 0 # total timeline rows rowlines = dict() rowheight = dict() + html_tblock = '
    \n' + html_device = '
    {6}
    \n' + html_phase = '
    {5}
    \n' + html_phaselet = '
    \n' def __init__(self, rowheight, scaleheight): self.rowH = rowheight self.scaleH = scaleheight - self.html = { - 'header': '', - 'timeline': '', - 'legend': '', - } + self.html = '' + def createHeader(self, sv, suppress=''): + if(not sv.stamp['time']): + return + self.html += '' \ + % (sv.title, sv.version) + if sv.logmsg and 'log' not in suppress: + self.html += '' + if sv.addlogs and 'dmesg' not in suppress: + self.html += '' + if sv.addlogs and sv.ftracefile and 'ftrace' not in suppress: + self.html += '' + headline_stamp = '
    {0} {1} {2} {3}
    \n' + self.html += headline_stamp.format(sv.stamp['host'], sv.stamp['kernel'], + sv.stamp['mode'], sv.stamp['time']) # Function: getDeviceRows # Description: # determine how may rows the device funcs will take @@ -1880,10 +1917,8 @@ class Timeline: break top += self.rowheight[test][phase][i] return top - # Function: calcTotalRows - # Description: - # Calculate the heights and offsets for the header and rows def calcTotalRows(self): + # Calculate the heights and offsets for the header and rows maxrows = 0 standardphases = [] for t in self.rowlines: @@ -1901,6 +1936,20 @@ class Timeline: for t, p in standardphases: for i in sorted(self.rowheight[t][p]): self.rowheight[t][p][i] = self.bodyH/len(self.rowlines[t][p]) + def createZoomBox(self, mode='command', testcount=1): + # Create bounding box, add buttons + html_zoombox = '
    \n' + html_timeline = '
    \n
    \n' + html_devlist1 = '' + html_devlist2 = '\n' + if mode != 'command': + if testcount > 1: + self.html += html_devlist2 + self.html += html_devlist1.format('1') + else: + self.html += html_devlist1.format('') + self.html += html_zoombox + self.html += html_timeline.format('dmesg', self.height) # Function: createTimeScale # Description: # Create the timescale for a timeline block @@ -1913,7 +1962,7 @@ class Timeline: # The html code needed to display the time scale def createTimeScale(self, m0, mMax, tTotal, mode): timescale = '
    {1}
    \n' - rline = '
    Resume
    \n' + rline = '
    {0}
    \n' output = '
    \n' # set scale for timeline mTotal = mMax - m0 @@ -1926,21 +1975,20 @@ class Timeline: divEdge = (mTotal - tS*(divTotal-1))*100/mTotal for i in range(divTotal): htmlline = '' - if(mode == 'resume'): - pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal)) - val = '%0.fms' % (float(i)*tS*1000) - htmlline = timescale.format(pos, val) - if(i == 0): - htmlline = rline - else: + if(mode == 'suspend'): pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge) val = '%0.fms' % (float(i-divTotal+1)*tS*1000) if(i == divTotal - 1): - val = 'Suspend' + val = mode + htmlline = timescale.format(pos, val) + else: + pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal)) + val = '%0.fms' % (float(i)*tS*1000) htmlline = timescale.format(pos, val) + if(i == 0): + htmlline = rline.format(mode) output += htmlline - output += '
    \n' - return output + self.html += output+'
    \n' # Class: TestProps # Description: @@ -2009,7 +2057,7 @@ class ProcessMonitor: val['kern'] = kern if ujiff > 0 or kjiff > 0: running[pid] = ujiff + kjiff - result = process.wait() + process.wait() out = '' for pid in running: jiffies = running[pid] @@ -2071,26 +2119,6 @@ def parseStamp(line, data): if not sysvals.stamp: sysvals.stamp = data.stamp -# Function: diffStamp -# Description: -# compare the host, kernel, and mode fields in 3 stamps -# Arguments: -# stamp1: string array with mode, kernel, and host -# stamp2: string array with mode, kernel, and host -# Return: -# True if stamps differ, False if they're the same -def diffStamp(stamp1, stamp2): - if 'host' in stamp1 and 'host' in stamp2: - if stamp1['host'] != stamp2['host']: - return True - if 'kernel' in stamp1 and 'kernel' in stamp2: - if stamp1['kernel'] != stamp2['kernel']: - return True - if 'mode' in stamp1 and 'mode' in stamp2: - if stamp1['mode'] != stamp2['mode']: - return True - return False - # Function: doesTraceLogHaveTraceEvents # Description: # Quickly determine if the ftrace log has some or all of the trace events @@ -2722,7 +2750,7 @@ def parseTraceLog(): # create blocks for orphan cg data for sortkey in sorted(sortlist): cg = sortlist[sortkey] - name = cg.list[0].name + name = cg.name if sysvals.isCallgraphFunc(name): vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name)) cg.newActionFromFunction(data) @@ -3100,149 +3128,154 @@ def parseKernelLog(data): data.fixupInitcallsThatDidntReturn() return True +def callgraphHTML(sv, hf, num, cg, title, color, devid): + html_func_top = '
    \n\n' + html_func_start = '
    \n\n' + html_func_end = '
    \n' + html_func_leaf = '
    {0} {1}
    \n' + + cgid = devid + if cg.id: + cgid += cg.id + cglen = (cg.end - cg.start) * 1000 + if cglen < sv.mincglen: + return num + + fmt = '(%.3f ms @ '+sv.timeformat+' to '+sv.timeformat+')' + flen = fmt % (cglen, cg.start, cg.end) + hf.write(html_func_top.format(cgid, color, num, title, flen)) + num += 1 + for line in cg.list: + if(line.length < 0.000000001): + flen = '' + else: + fmt = '(%.3f ms @ '+sv.timeformat+')' + flen = fmt % (line.length*1000, line.time) + if(line.freturn and line.fcall): + hf.write(html_func_leaf.format(line.name, flen)) + elif(line.freturn): + hf.write(html_func_end) + else: + hf.write(html_func_start.format(num, line.name, flen)) + num += 1 + hf.write(html_func_end) + return num + +def addCallgraphs(sv, hf, data): + hf.write('
    \n') + # write out the ftrace data converted to html + num = 0 + for p in data.phases: + if sv.cgphase and p != sv.cgphase: + continue + list = data.dmesg[p]['list'] + for devname in data.sortedDevices(p): + dev = list[devname] + color = 'white' + if 'color' in data.dmesg[p]: + color = data.dmesg[p]['color'] + if 'color' in dev: + color = dev['color'] + name = devname + if(devname in sv.devprops): + name = sv.devprops[devname].altName(devname) + if sv.suspendmode in suspendmodename: + name += ' '+p + if('ftrace' in dev): + cg = dev['ftrace'] + num = callgraphHTML(sv, hf, num, cg, + name, color, dev['id']) + if('ftraces' in dev): + for cg in dev['ftraces']: + num = callgraphHTML(sv, hf, num, cg, + name+' → '+cg.name, color, dev['id']) + + hf.write('\n\n
    \n') + # Function: createHTMLSummarySimple # Description: # Create summary html file for a series of tests # Arguments: # testruns: array of Data objects from parseTraceLog -def createHTMLSummarySimple(testruns, htmlfile): - # print out the basic summary of all the tests - hf = open(htmlfile, 'w') - +def createHTMLSummarySimple(testruns, htmlfile, folder): # write the html header first (html head, css code, up to body start) html = '\n\n\n\ \n\ - AnalyzeSuspend Summary\n\ + SleepGraph Summary\n\ \n\n\n' # group test header - count = len(testruns) - headline_stamp = '
    {0} {1} {2} {3} ({4} tests)
    \n' - html += headline_stamp.format(sysvals.stamp['host'], - sysvals.stamp['kernel'], sysvals.stamp['mode'], - sysvals.stamp['time'], count) - - # check to see if all the tests have the same value - stampcolumns = False - for data in testruns: - if diffStamp(sysvals.stamp, data.stamp): - stampcolumns = True - break - + html += '
    %s (%d tests)
    \n' % (folder, len(testruns)) th = '\t{0}\n' td = '\t{0}\n' - tdlink = '\tClick Here\n' + tdlink = '\thtml\n' # table header - html += '\n\n' - html += th.format("Test #") - if stampcolumns: - html += th.format("Hostname") - html += th.format("Kernel Version") - html += th.format("Suspend Mode") - html += th.format("Test Time") - html += th.format("Suspend Time") - html += th.format("Resume Time") - html += th.format("Detail") - html += '\n' + html += '
    \n\n' + th.format('#') +\ + th.format('Mode') + th.format('Host') + th.format('Kernel') +\ + th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\ + th.format('Detail') + '\n' # test data, 1 row per test - sTimeAvg = 0.0 - rTimeAvg = 0.0 - num = 1 - for data in testruns: - # data.end is the end of post_resume - resumeEnd = data.dmesg['resume_complete']['end'] + avg = ''+\ + '\n' + sTimeAvg = rTimeAvg = 0.0 + mode = '' + num = 0 + for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'])): + if mode != data['mode']: + # test average line + if(num > 0): + sTimeAvg /= (num - 1) + rTimeAvg /= (num - 1) + html += avg.format('%d' % (num - 1), mode, + '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) + sTimeAvg = rTimeAvg = 0.0 + mode = data['mode'] + num = 1 + # alternate row color if num % 2 == 1: html += '\n' else: html += '\n' - - # test num - html += td.format("test %d" % num) + html += td.format("%d" % num) num += 1 - if stampcolumns: - # host name - val = "unknown" - if('host' in data.stamp): - val = data.stamp['host'] - html += td.format(val) - # host kernel + # basic info + for item in ['mode', 'host', 'kernel', 'time']: val = "unknown" - if('kernel' in data.stamp): - val = data.stamp['kernel'] + if(item in data): + val = data[item] html += td.format(val) - # suspend mode - val = "unknown" - if('mode' in data.stamp): - val = data.stamp['mode'] - html += td.format(val) - # test time - val = "unknown" - if('time' in data.stamp): - val = data.stamp['time'] - html += td.format(val) # suspend time - sTime = (data.tSuspended - data.start)*1000 + sTime = float(data['suspend']) sTimeAvg += sTime - html += td.format("%3.3f ms" % sTime) + html += td.format('%.3f ms' % sTime) # resume time - rTime = (resumeEnd - data.tResumed)*1000 + rTime = float(data['resume']) rTimeAvg += rTime - html += td.format("%3.3f ms" % rTime) + html += td.format('%.3f ms' % rTime) # link to the output html - html += tdlink.format(data.outfile) - - html += '\n' - - # last line: test average - if(count > 0): - sTimeAvg /= count - rTimeAvg /= count - html += '\n' - html += td.format('Average') # name - if stampcolumns: - html += td.format('') # host - html += td.format('') # kernel - html += td.format('') # mode - html += td.format('') # time - html += td.format("%3.3f ms" % sTimeAvg) # suspend time - html += td.format("%3.3f ms" % rTimeAvg) # resume time - html += td.format('') # output link - html += '\n' + html += tdlink.format(data['url']) + '\n' + # last test average line + if(num > 0): + sTimeAvg /= (num - 1) + rTimeAvg /= (num - 1) + html += avg.format('%d' % (num - 1), mode, + '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) # flush the data to file - hf.write(html+'
    Average of {0} {1} tests{2}{3}
    \n') - hf.write('\n\n') + hf = open(htmlfile, 'w') + hf.write(html+'\n\n\n') hf.close() -def htmlTitle(): - modename = { - 'freeze': 'Freeze (S0)', - 'standby': 'Standby (S1)', - 'mem': 'Suspend (S3)', - 'disk': 'Hibernate (S4)' - } - kernel = sysvals.stamp['kernel'] - host = sysvals.hostname[0].upper()+sysvals.hostname[1:] - mode = sysvals.suspendmode - if sysvals.suspendmode in modename: - mode = modename[sysvals.suspendmode] - return host+' '+mode+' '+kernel - def ordinal(value): suffix = 'th' if value < 10 or value > 19: @@ -3272,24 +3305,11 @@ def createHTML(testruns): kerror = True data.normalizeTime(testruns[-1].tSuspended) - x2changes = ['', 'absolute'] - if len(testruns) > 1: - x2changes = ['1', 'relative'] # html function templates - headline_version = '' % sysvals.version - headline_stamp = '
    {0} {1} {2} {3}
    \n' - html_devlist1 = '' % x2changes[0] - html_zoombox = '
    \n' - html_devlist2 = '\n' - html_timeline = '
    \n
    \n' - html_tblock = '
    \n' - html_device = '
    {6}
    \n' html_error = '
    ERROR→
    \n' html_traceevent = '
    {5}
    \n' html_cpuexec = '
    \n' - html_phase = '
    {5}
    \n' - html_phaselet = '
    \n' - html_legend = '
     {2}
    \n' + html_legend = '
     {2}
    \n' html_timetotal = '\n'\ ''\ ''\ @@ -3311,20 +3331,18 @@ def createHTML(testruns): '\n
    {2} Suspend Time: {0} ms{2} Resume Time: {1} ms
    \n' # html format variables - hoverZ = 'z-index:8;' - if sysvals.usedevsrc: - hoverZ = '' scaleH = 20 - scaleTH = 20 if kerror: scaleH = 40 - scaleTH = 60 # device timeline vprint('Creating Device Timeline...') devtl = Timeline(30, scaleH) + # write the test title and general info header + devtl.createHeader(sysvals) + # Generate the header for this timeline for data in testruns: tTotal = data.end - data.start @@ -3346,7 +3364,7 @@ def createHTML(testruns): if(len(testruns) > 1): testdesc = ordinal(data.testnumber+1)+' '+testdesc thtml = html_timetotal3.format(run_time, testdesc) - devtl.html['header'] += thtml + devtl.html += thtml elif data.fwValid: suspend_time = '%.0f'%(sktime + (data.fwSuspend/1000000.0)) resume_time = '%.0f'%(rktime + (data.fwResume/1000000.0)) @@ -3363,10 +3381,10 @@ def createHTML(testruns): else: thtml = html_timetotal2.format(suspend_time, low_time, \ resume_time, testdesc1, stitle, rtitle) - devtl.html['header'] += thtml + devtl.html += thtml sftime = '%.3f'%(data.fwSuspend / 1000000.0) rftime = '%.3f'%(data.fwResume / 1000000.0) - devtl.html['header'] += html_timegroups.format('%.3f'%sktime, \ + devtl.html += html_timegroups.format('%.3f'%sktime, \ sftime, rftime, '%.3f'%rktime, testdesc2, sysvals.suspendmode) else: suspend_time = '%.3f' % sktime @@ -3382,7 +3400,7 @@ def createHTML(testruns): else: thtml = html_timetotal2.format(suspend_time, low_time, \ resume_time, testdesc, stitle, rtitle) - devtl.html['header'] += thtml + devtl.html += thtml # time scale for potentially multiple datasets t0 = testruns[0].start @@ -3429,15 +3447,8 @@ def createHTML(testruns): devtl.getPhaseRows(threadlist, devtl.rows) devtl.calcTotalRows() - # create bounding box, add buttons - if sysvals.suspendmode != 'command': - devtl.html['timeline'] += html_devlist1 - if len(testruns) > 1: - devtl.html['timeline'] += html_devlist2 - devtl.html['timeline'] += html_zoombox - devtl.html['timeline'] += html_timeline.format('dmesg', devtl.height) - # draw the full timeline + devtl.createZoomBox(sysvals.suspendmode, len(testruns)) phases = {'suspend':[],'resume':[]} for phase in data.dmesg: if 'resume' in phase: @@ -3452,37 +3463,36 @@ def createHTML(testruns): # draw suspend and resume blocks separately bname = '%s%d' % (dir[0], data.testnumber) if dir == 'suspend': - m0 = testruns[data.testnumber].start - mMax = testruns[data.testnumber].tSuspended - mTotal = mMax - m0 + m0 = data.start + mMax = data.tSuspended left = '%f' % (((m0-t0)*100.0)/tTotal) else: - m0 = testruns[data.testnumber].tSuspended - mMax = testruns[data.testnumber].end + m0 = data.tSuspended + mMax = data.end # in an x2 run, remove any gap between blocks if len(testruns) > 1 and data.testnumber == 0: mMax = testruns[1].start - mTotal = mMax - m0 left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal) + mTotal = mMax - m0 # if a timeline block is 0 length, skip altogether if mTotal == 0: continue width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal) - devtl.html['timeline'] += html_tblock.format(bname, left, width, devtl.scaleH) + devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH) for b in sorted(phases[dir]): # draw the phase color background phase = data.dmesg[b] length = phase['end']-phase['start'] left = '%f' % (((phase['start']-m0)*100.0)/mTotal) width = '%f' % ((length*100.0)/mTotal) - devtl.html['timeline'] += html_phase.format(left, width, \ + devtl.html += devtl.html_phase.format(left, width, \ '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \ data.dmesg[b]['color'], '') for e in data.errorinfo[dir]: # draw red lines for any kernel errors found t, err = e right = '%f' % (((mMax-t)*100.0)/mTotal) - devtl.html['timeline'] += html_error.format(right, err) + devtl.html += html_error.format(right, err) for b in sorted(phases[dir]): # draw the devices for this phase phaselist = data.dmesg[b]['list'] @@ -3496,7 +3506,7 @@ def createHTML(testruns): if 'htmlclass' in dev: xtraclass = dev['htmlclass'] if 'color' in dev: - xtrastyle = 'background-color:%s;' % dev['color'] + xtrastyle = 'background:%s;' % dev['color'] if(d in sysvals.devprops): name = sysvals.devprops[d].altName(d) xtraclass = sysvals.devprops[d].xtraClass() @@ -3521,7 +3531,7 @@ def createHTML(testruns): title += 'post_resume_process' else: title += b - devtl.html['timeline'] += html_device.format(dev['id'], \ + devtl.html += devtl.html_device.format(dev['id'], \ title, left, top, '%.3f'%rowheight, width, \ d+drv, xtraclass, xtrastyle) if('cpuexec' in dev): @@ -3535,7 +3545,7 @@ def createHTML(testruns): left = '%f' % (((start-m0)*100)/mTotal) width = '%f' % ((end-start)*100/mTotal) color = 'rgba(255, 0, 0, %f)' % j - devtl.html['timeline'] += \ + devtl.html += \ html_cpuexec.format(left, top, height, width, color) if('src' not in dev): continue @@ -3548,20 +3558,20 @@ def createHTML(testruns): xtrastyle = '' if e.color: xtrastyle = 'background:%s;' % e.color - devtl.html['timeline'] += \ + devtl.html += \ html_traceevent.format(e.title(), \ left, top, height, width, e.text(), '', xtrastyle) # draw the time scale, try to make the number of labels readable - devtl.html['timeline'] += devtl.createTimeScale(m0, mMax, tTotal, dir) - devtl.html['timeline'] += '
    \n' + devtl.createTimeScale(m0, mMax, tTotal, dir) + devtl.html += '
    \n' # timeline is finished - devtl.html['timeline'] += '
    \n
    \n' + devtl.html += '
    \n\n' # draw a legend which describes the phases by color if sysvals.suspendmode != 'command': data = testruns[-1] - devtl.html['legend'] = '
    \n' + devtl.html += '
    \n' pdelta = 100.0/len(data.phases) pmargin = pdelta / 4.0 for phase in data.phases: @@ -3571,127 +3581,41 @@ def createHTML(testruns): id += tmp[1][0] order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin) name = string.replace(phase, '_', '  ') - devtl.html['legend'] += html_legend.format(order, \ + devtl.html += html_legend.format(order, \ data.dmesg[phase]['color'], name, id) - devtl.html['legend'] += '
    \n' + devtl.html += '
    \n' hf = open(sysvals.htmlfile, 'w') - if not sysvals.cgexp: - cgchk = 'checked' - cgnchk = 'not(:checked)' - else: - cgchk = 'not(:checked)' - cgnchk = 'checked' - - # write the html header first (html head, css code, up to body start) - html_header = '\n\n\n\ - \n\ - '+htmlTitle()+'\n\ - \n\n\n' - # no header or css if its embedded if(sysvals.embedded): hf.write('pass True tSus %.3f tRes %.3f tLow %.3f fwvalid %s tSus %.3f tRes %.3f\n' % (data.tSuspended-data.start, data.end-data.tSuspended, data.tLow, data.fwValid, \ data.fwSuspend/1000000, data.fwResume/1000000)) else: - hf.write(html_header) - - # write the test title and general info header - if(sysvals.stamp['time'] != ""): - hf.write(headline_version) - if sysvals.logmsg: - hf.write('') - if sysvals.addlogs and sysvals.dmesgfile: - hf.write('') - if sysvals.addlogs and sysvals.ftracefile: - hf.write('') - hf.write(headline_stamp.format(sysvals.stamp['host'], - sysvals.stamp['kernel'], sysvals.stamp['mode'], \ - sysvals.stamp['time'])) + addCSS(hf, sysvals, len(testruns), kerror) # write the device timeline - hf.write(devtl.html['header']) - hf.write(devtl.html['timeline']) - hf.write(devtl.html['legend']) + hf.write(devtl.html) hf.write('
    \n') hf.write('\n') @@ -3701,52 +3625,7 @@ def createHTML(testruns): else: data = testruns[-1] if(sysvals.usecallgraph and not sysvals.embedded): - hf.write('
    \n') - # write out the ftrace data converted to html - html_func_top = '
    \n\n' - html_func_start = '
    \n\n' - html_func_end = '
    \n' - html_func_leaf = '
    {0} {1}
    \n' - num = 0 - for p in data.phases: - if sysvals.cgphase and p != sysvals.cgphase: - continue - list = data.dmesg[p]['list'] - for devname in data.sortedDevices(p): - if('ftrace' not in list[devname]): - continue - devid = list[devname]['id'] - cg = list[devname]['ftrace'] - clen = (cg.end - cg.start) * 1000 - if clen < sysvals.mincglen: - continue - fmt = '(%.3f ms @ '+sysvals.timeformat+' to '+sysvals.timeformat+')' - flen = fmt % (clen, cg.start, cg.end) - name = devname - if(devname in sysvals.devprops): - name = sysvals.devprops[devname].altName(devname) - if sysvals.suspendmode == 'command': - ftitle = name - else: - ftitle = name+' '+p - hf.write(html_func_top.format(devid, data.dmesg[p]['color'], \ - num, ftitle, flen)) - num += 1 - for line in cg.list: - if(line.length < 0.000000001): - flen = '' - else: - fmt = '(%.3f ms @ '+sysvals.timeformat+')' - flen = fmt % (line.length*1000, line.time) - if(line.freturn and line.fcall): - hf.write(html_func_leaf.format(line.name, flen)) - elif(line.freturn): - hf.write(html_func_end) - else: - hf.write(html_func_start.format(num, line.name, flen)) - num += 1 - hf.write(html_func_end) - hf.write('\n\n
    \n') + addCallgraphs(sysvals, hf, data) # add the test log as a hidden div if sysvals.logmsg: @@ -3788,6 +3667,100 @@ def createHTML(testruns): hf.close() return True +def addCSS(hf, sv, testcount=1, kerror=False, extra=''): + kernel = sv.stamp['kernel'] + host = sv.hostname[0].upper()+sv.hostname[1:] + mode = sv.suspendmode + if sv.suspendmode in suspendmodename: + mode = suspendmodename[sv.suspendmode] + title = host+' '+mode+' '+kernel + + # various format changes by flags + cgchk = 'checked' + cgnchk = 'not(:checked)' + if sv.cgexp: + cgchk = 'not(:checked)' + cgnchk = 'checked' + + hoverZ = 'z-index:8;' + if sv.usedevsrc: + hoverZ = '' + + devlistpos = 'absolute' + if testcount > 1: + devlistpos = 'relative' + + scaleTH = 20 + if kerror: + scaleTH = 60 + + # write the html header first (html head, css code, up to body start) + html_header = '\n\n\n\ + \n\ + '+title+'\n\ + \n\n\n' + hf.write(html_header) + # Function: addScriptCode # Description: # Adds the javascript code to the output html @@ -3809,7 +3782,7 @@ def addScriptCode(hf, testruns): ' var resolution = -1;\n'\ ' var dragval = [0, 0];\n'\ ' function redrawTimescale(t0, tMax, tS) {\n'\ - ' var rline = \'
    ←R
    \';\n'\ + ' var rline = \'
    \';\n'\ ' var tTotal = tMax - t0;\n'\ ' var list = document.getElementsByClassName("tblock");\n'\ ' for (var i = 0; i < list.length; i++) {\n'\ @@ -3824,19 +3797,23 @@ def addScriptCode(hf, testruns): ' var pos = 0.0, val = 0.0;\n'\ ' for (var j = 0; j < divTotal; j++) {\n'\ ' var htmlline = "";\n'\ - ' if(list[i].id[5] == "r") {\n'\ - ' pos = 100 - (((j)*tS*100)/mTotal);\n'\ - ' val = (j)*tS;\n'\ - ' htmlline = \'
    \'+val+\'ms
    \';\n'\ - ' if(j == 0)\n'\ - ' htmlline = rline;\n'\ - ' } else {\n'\ + ' var mode = list[i].id[5];\n'\ + ' if(mode == "s") {\n'\ ' pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\ ' val = (j-divTotal+1)*tS;\n'\ ' if(j == divTotal - 1)\n'\ ' htmlline = \'
    S→
    \';\n'\ ' else\n'\ ' htmlline = \'
    \'+val+\'ms
    \';\n'\ + ' } else {\n'\ + ' pos = 100 - (((j)*tS*100)/mTotal);\n'\ + ' val = (j)*tS;\n'\ + ' htmlline = \'
    \'+val+\'ms
    \';\n'\ + ' if(j == 0)\n'\ + ' if(mode == "r")\n'\ + ' htmlline = rline+"←R
    ";\n'\ + ' else\n'\ + ' htmlline = rline+"0ms";\n'\ ' }\n'\ ' html += htmlline;\n'\ ' }\n'\ @@ -4002,12 +3979,80 @@ def addScriptCode(hf, testruns): ' }\n'\ ' }\n'\ ' }\n'\ + ' if(typeof devstats !== \'undefined\')\n'\ + ' callDetail(this.id, this.title);\n'\ ' var cglist = document.getElementById("callgraphs");\n'\ ' if(!cglist) return;\n'\ ' var cg = cglist.getElementsByClassName("atop");\n'\ ' if(cg.length < 10) return;\n'\ ' for (var i = 0; i < cg.length; i++) {\n'\ - ' if(idlist.indexOf(cg[i].id) >= 0) {\n'\ + ' cgid = cg[i].id.split("x")[0]\n'\ + ' if(idlist.indexOf(cgid) >= 0) {\n'\ + ' cg[i].style.display = "block";\n'\ + ' } else {\n'\ + ' cg[i].style.display = "none";\n'\ + ' }\n'\ + ' }\n'\ + ' }\n'\ + ' function callDetail(devid, devtitle) {\n'\ + ' if(!(devid in devstats) || devstats[devid].length < 1)\n'\ + ' return;\n'\ + ' var list = devstats[devid];\n'\ + ' var tmp = devtitle.split(" ");\n'\ + ' var name = tmp[0], phase = tmp[tmp.length-1];\n'\ + ' var dd = document.getElementById(phase);\n'\ + ' var total = parseFloat(tmp[1].slice(1));\n'\ + ' var mlist = [];\n'\ + ' var maxlen = 0;\n'\ + ' var info = []\n'\ + ' for(var i in list) {\n'\ + ' if(list[i][0] == "@") {\n'\ + ' info = list[i].split("|");\n'\ + ' continue;\n'\ + ' }\n'\ + ' var tmp = list[i].split("|");\n'\ + ' var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]);\n'\ + ' var p = (t*100.0/total).toFixed(2);\n'\ + ' mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"];\n'\ + ' if(f.length > maxlen)\n'\ + ' maxlen = f.length;\n'\ + ' }\n'\ + ' var pad = 5;\n'\ + ' if(mlist.length == 0) pad = 30;\n'\ + ' var html = \'
    \'+name+\':\';\n'\ + ' if(info.length > 2)\n'\ + ' html += " start="+info[1]+", end="+info[2]+"";\n'\ + ' if(info.length > 3)\n'\ + ' html += ", length(w/o overhead)="+info[3]+" ms";\n'\ + ' if(info.length > 4)\n'\ + ' html += ", return="+info[4]+"";\n'\ + ' html += "
    ";\n'\ + ' if(mlist.length > 0) {\n'\ + ' html += \'\';\n'\ + ' for(var i in mlist)\n'\ + ' html += "";\n'\ + ' html += "";\n'\ + ' for(var i in mlist)\n'\ + ' html += "";\n'\ + ' html += "";\n'\ + ' for(var i in mlist)\n'\ + ' html += "";\n'\ + ' html += "";\n'\ + ' for(var i in mlist)\n'\ + ' html += "";\n'\ + ' html += "
    Function"+mlist[i][0]+"
    Calls"+mlist[i][1]+"
    Time(ms)"+mlist[i][2]+"
    Percent"+mlist[i][3]+"
    ";\n'\ + ' }\n'\ + ' dd.innerHTML = html;\n'\ + ' var height = (maxlen*5)+100;\n'\ + ' dd.style.height = height+"px";\n'\ + ' document.getElementById("devicedetail").style.height = height+"px";\n'\ + ' }\n'\ + ' function callSelect() {\n'\ + ' var cglist = document.getElementById("callgraphs");\n'\ + ' if(!cglist) return;\n'\ + ' var cg = cglist.getElementsByClassName("atop");\n'\ + ' for (var i = 0; i < cg.length; i++) {\n'\ + ' if(this.id == cg[i].id) {\n'\ ' cg[i].style.display = "block";\n'\ ' } else {\n'\ ' cg[i].style.display = "none";\n'\ @@ -4093,6 +4138,9 @@ def addScriptCode(hf, testruns): ' dev[i].onmouseover = deviceHover;\n'\ ' dev[i].onmouseout = deviceUnhover;\n'\ ' }\n'\ + ' var dev = dmesg.getElementsByClassName("srccall");\n'\ + ' for (var i = 0; i < dev.length; i++)\n'\ + ' dev[i].onclick = callSelect;\n'\ ' zoomTimeline();\n'\ ' });\n'\ '\n' @@ -4675,7 +4723,7 @@ def rootCheck(fatal): if(os.access(sysvals.powerfile, os.W_OK)): return True if fatal: - doError('This command must be run as root') + doError('This command requires sysfs mount and root access') return False # Function: getArgInt @@ -4767,51 +4815,62 @@ def runTest(subdir, testpath=''): cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1' call(cmd.format(os.environ['SUDO_USER'], sysvals.testdir), shell=True) +def find_in_html(html, strs, div=False): + for str in strs: + l = len(str) + i = html.find(str) + if i >= 0: + break + if i < 0: + return '' + if not div: + return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group() + n = html[i+l:].find('') + if n < 0: + return '' + return html[i+l:i+l+n] + # Function: runSummary # Description: # create a summary of tests in a sub-directory -def runSummary(subdir, output): - # get a list of ftrace output files - files = [] +def runSummary(subdir, local=True): + inpath = os.path.abspath(subdir) + outpath = inpath + if local: + outpath = os.path.abspath('.') + print('Generating a summary of folder "%s"' % inpath) + testruns = [] for dirname, dirnames, filenames in os.walk(subdir): for filename in filenames: - if(re.match('.*_ftrace.txt', filename)): - files.append("%s/%s" % (dirname, filename)) - - # process the files in order and get an array of data objects - testruns = [] - for file in sorted(files): - if output: - print("Test found in %s" % os.path.dirname(file)) - sysvals.ftracefile = file - sysvals.dmesgfile = file.replace('_ftrace.txt', '_dmesg.txt') - doesTraceLogHaveTraceEvents() - sysvals.usecallgraph = False - if not sysvals.usetraceeventsonly: - if(not os.path.exists(sysvals.dmesgfile)): - print("Skipping %s: not a valid test input" % file) + if(not re.match('.*.html', filename)): continue - else: - if output: - f = os.path.basename(sysvals.ftracefile) - d = os.path.basename(sysvals.dmesgfile) - print("\tInput files: %s and %s" % (f, d)) - testdata = loadKernelLog() - data = testdata[0] - parseKernelLog(data) - testdata = [data] - appendIncompleteTraceLog(testdata) - else: - if output: - print("\tInput file: %s" % os.path.basename(sysvals.ftracefile)) - testdata = parseTraceLog() - data = testdata[0] - data.normalizeTime(data.tSuspended) - link = file.replace(subdir+'/', '').replace('_ftrace.txt', '.html') - data.outfile = link - testruns.append(data) - - createHTMLSummarySimple(testruns, subdir+'/summary.html') + file = os.path.join(dirname, filename) + html = open(file, 'r').read(10000) + suspend = find_in_html(html, + ['Kernel Suspend: ', 'Kernel Suspend Time: ']) + resume = find_in_html(html, + ['Kernel Resume: ', 'Kernel Resume Time: ']) + line = find_in_html(html, ['
    '], True) + stmp = line.split() + if not suspend or not resume or len(stmp) < 4: + continue + data = { + 'host': stmp[0], + 'kernel': stmp[1], + 'mode': stmp[2], + 'time': string.join(stmp[3:], ' '), + 'suspend': suspend, + 'resume': resume, + 'url': os.path.relpath(file, outpath), + } + if len(stmp) == 7: + data['kernel'] = 'unknown' + data['mode'] = stmp[1] + data['time'] = string.join(stmp[2:], ' ') + testruns.append(data) + outfile = os.path.join(outpath, 'summary.html') + print('Summary file: %s' % outfile) + createHTMLSummarySimple(testruns, outfile, inpath) # Function: checkArgBool # Description: @@ -4869,9 +4928,14 @@ def configFromFile(file): sysvals.predelay = getArgInt('-predelay', value, 0, 60000, False) elif(opt.lower() == 'postdelay'): sysvals.postdelay = getArgInt('-postdelay', value, 0, 60000, False) + elif(opt.lower() == 'maxdepth'): + sysvals.max_graph_depth = getArgInt('-maxdepth', value, 0, 1000, False) elif(opt.lower() == 'rtcwake'): - sysvals.rtcwake = True - sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False) + if value.lower() == 'off': + sysvals.rtcwake = False + else: + sysvals.rtcwake = True + sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False) elif(opt.lower() == 'timeprec'): sysvals.setPrecision(getArgInt('-timeprec', value, 0, 6, False)) elif(opt.lower() == 'mindev'): @@ -4969,8 +5033,8 @@ def printHelp(): modes = getModes() print('') - print('AnalyzeSuspend v%s' % sysvals.version) - print('Usage: sudo analyze_suspend.py ') + print('%s v%s' % (sysvals.title, sysvals.version)) + print('Usage: sudo sleepgraph ') print('') print('Description:') print(' This tool is designed to assist kernel and OS developers in optimizing') @@ -4981,22 +5045,22 @@ def printHelp(): print(' a detailed view of which devices/subsystems are taking the most') print(' time in suspend/resume.') print('') + print(' If no specific command is given, the default behavior is to initiate') + print(' a suspend/resume and capture the dmesg/ftrace output as an html timeline.') + print('') print(' Generates output files in subdirectory: suspend-mmddyy-HHMMSS') print(' HTML output: _.html') print(' raw dmesg output: __dmesg.txt') print(' raw ftrace output: __ftrace.txt') print('') print('Options:') - print(' [general]') print(' -h Print this help text') print(' -v Print the current tool version') print(' -config fn Pull arguments and config options from file fn') print(' -verbose Print extra information during execution and analysis') - print(' -status Test to see if the system is enabled to run this tool') - print(' -modes List available suspend modes') print(' -m mode Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode) print(' -o subdir Override the output subdirectory') - print(' -rtcwake t Use rtcwake to autoresume after seconds (default: disabled)') + print(' -rtcwake t Wakeup t seconds after suspend, set t to "off" to disable (default: 15)') print(' -addlogs Add the dmesg and ftrace logs to the html output') print(' -srgap Add a visible gap in the timeline between sus/res (default: disabled)') print(' [advanced]') @@ -5012,23 +5076,25 @@ def printHelp(): print(' be created in a new subdirectory with a summary page.') print(' [debug]') print(' -f Use ftrace to create device callgraphs (default: disabled)') + print(' -maxdepth N limit the callgraph data to N call levels (default: 0=all)') print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)') - print(' -flist Print the list of functions currently being captured in ftrace') - print(' -flistall Print all functions capable of being captured in ftrace') print(' -fadd file Add functions to be graphed in the timeline from a list in a text file') print(' -filter "d1,d2,..." Filter out all but this comma-delimited list of device names') print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)') print(' -cgphase P Only show callgraph data for phase P (e.g. suspend_late)') print(' -cgtest N Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)') print(' -timeprec N Number of significant digits in timestamps (0:S, [3:ms], 6:us)') - print(' [utilities]') + print(' [commands]') + print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)') + print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)') + print(' -summary directory Create a summary of all test in this dir') + print(' -modes List available suspend modes') + print(' -status Test to see if the system is enabled to run this tool') print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table') print(' -usbtopo Print out the current USB topology with power info') print(' -usbauto Enable autosuspend for all connected USB devices') - print(' [re-analyze data from previous runs]') - print(' -ftrace ftracefile Create HTML output using ftrace input') - print(' -dmesg dmesgfile Create HTML output using dmesg (not needed for kernel >= 3.15)') - print(' -summary directory Create a summary of all test in this dir') + print(' -flist Print the list of functions currently being captured in ftrace') + print(' -flistall Print all functions capable of being captured in ftrace') print('') return True @@ -5076,9 +5142,18 @@ if __name__ == '__main__': sysvals.useprocmon = True elif(arg == '-dev'): sysvals.usedevsrc = True + elif(arg == '-maxdepth'): + sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000) elif(arg == '-rtcwake'): - sysvals.rtcwake = True - sysvals.rtcwaketime = getArgInt('-rtcwake', args, 0, 3600) + try: + val = args.next() + except: + doError('No rtcwake time supplied', True) + if val.lower() == 'off': + sysvals.rtcwake = False + else: + sysvals.rtcwake = True + sysvals.rtcwaketime = getArgInt('-rtcwake', val, 0, 3600, False) elif(arg == '-timeprec'): sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6)) elif(arg == '-mindev'): @@ -5201,7 +5276,6 @@ if __name__ == '__main__': elif(cmd == 'usbauto'): setUSBDevicesAuto() elif(cmd == 'summary'): - print("Generating a summary of folder \"%s\"" % cmdarg) runSummary(cmdarg, True) sys.exit() diff --git a/tools/power/pm-graph/bootgraph.8 b/tools/power/pm-graph/bootgraph.8 new file mode 100644 index 0000000000000000000000000000000000000000..55272a67b0e71cb1cf82f0dfbd05af739130b04c --- /dev/null +++ b/tools/power/pm-graph/bootgraph.8 @@ -0,0 +1,132 @@ +.TH BOOTGRAPH 8 +.SH NAME +bootgraph \- Kernel boot timing analysis +.SH SYNOPSIS +.ft B +.B bootgraph +.RB [ OPTIONS ] +.RB [ COMMAND ] +.SH DESCRIPTION +\fBbootgraph \fP reads the dmesg log from kernel boot and +creates an html representation of the initcall timeline up to the start +of the init process. +.PP +If no specific command is given, the tool reads the current dmesg log and +outputs bootgraph.html. +.PP +The tool can also augment the timeline with ftrace data on custom target +functions as well as full trace callgraphs. +.SH OPTIONS +.TP +\fB-h\fR +Print this help text +.TP +\fB-v\fR +Print the current tool version +.TP +\fB-addlogs\fR +Add the dmesg log to the html output. It will be viewable by +clicking a button in the timeline. +.TP +\fB-o \fIfile\fR +Override the HTML output filename (default: bootgraph.html) +.SS "Ftrace Debug" +.TP +\fB-f\fR +Use ftrace to add function detail (default: disabled) +.TP +\fB-callgraph\fR +Use ftrace to create initcall callgraphs (default: disabled). If -filter +is not used there will be one callgraph per initcall. This can produce +very large outputs, i.e. 10MB - 100MB. +.TP +\fB-maxdepth \fIlevel\fR +limit the callgraph trace depth to \fIlevel\fR (default: 2). This is +the best way to limit the output size when using -callgraph. +.TP +\fB-mincg \fIt\fR +Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0=all). +This reduces the html file size as there can be many tiny callgraphs +which are barely visible in the timeline. +The value is a float: e.g. 0.001 represents 1 us. +.TP +\fB-timeprec \fIn\fR +Number of significant digits in timestamps (0:S, 3:ms, [6:us]) +.TP +\fB-expandcg\fR +pre-expand the callgraph data in the html output (default: disabled) +.TP +\fB-filter \fI"func1,func2,..."\fR +Instead of tracing each initcall, trace a custom list of functions (default: do_one_initcall) + +.SH COMMANDS +.TP +\fB-reboot\fR +Reboot the machine and generate a new timeline automatically. Works in 4 steps. + 1. updates grub with the required kernel parameters + 2. installs a cron job which re-runs the tool after reboot + 3. reboots the system + 4. after startup, extracts the data and generates the timeline +.TP +\fB-manual\fR +Show the requirements to generate a new timeline manually. Requires 3 steps. + 1. append the string to the kernel command line via your native boot manager. + 2. reboot the system + 3. after startup, re-run the tool with the same arguments and no command +.TP +\fB-dmesg \fIfile\fR +Create HTML output from an existing dmesg file. +.TP +\fB-ftrace \fIfile\fR +Create HTML output from an existing ftrace file (used with -dmesg). +.TP +\fB-flistall\fR +Print all ftrace functions capable of being captured. These are all the +possible values you can add to trace via the -filter argument. + +.SH EXAMPLES +Create a timeline using the current dmesg log. +.IP +\f(CW$ bootgraph\fR +.PP +Create a timeline using the current dmesg and ftrace log. +.IP +\f(CW$ bootgraph -callgraph\fR +.PP +Create a timeline using the current dmesg, add the log to the html and change the name. +.IP +\f(CW$ bootgraph -addlogs -o myboot.html\fR +.PP +Capture a new boot timeline by automatically rebooting the machine. +.IP +\f(CW$ sudo bootgraph -reboot -addlogs -o latestboot.html\fR +.PP +Capture a new boot timeline with function trace data. +.IP +\f(CW$ sudo bootgraph -reboot -f\fR +.PP +Capture a new boot timeline with trace & callgraph data. Skip callgraphs smaller than 5ms. +.IP +\f(CW$ sudo bootgraph -reboot -callgraph -mincg 5\fR +.PP +Capture a new boot timeline with callgraph data over custom functions. +.IP +\f(CW$ sudo bootgraph -reboot -callgraph -filter "acpi_ps_parse_aml,msleep"\fR +.PP +Capture a brand new boot timeline with manual reboot. +.IP +\f(CW$ sudo bootgraph -callgraph -manual\fR +.IP +\f(CW$ vi /etc/default/grub # add the CMDLINE string to your kernel params\fR +.IP +\f(CW$ sudo reboot # reboot the machine\fR +.IP +\f(CW$ sudo bootgraph -callgraph # re-run the tool after restart\fR +.PP + +.SH "SEE ALSO" +dmesg(1), update-grub(8), crontab(1), reboot(8) +.PP +.SH AUTHOR +.nf +Written by Todd Brandt diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 new file mode 100644 index 0000000000000000000000000000000000000000..610e72ebbc0673d53a6df08db53180b045e8b2f6 --- /dev/null +++ b/tools/power/pm-graph/sleepgraph.8 @@ -0,0 +1,243 @@ +.TH SLEEPGRAPH 8 +.SH NAME +sleepgraph \- Suspend/Resume timing analysis +.SH SYNOPSIS +.ft B +.B sleepgraph +.RB [ OPTIONS ] +.RB [ COMMAND ] +.SH DESCRIPTION +\fBsleepgraph \fP is designed to assist kernel and OS developers +in optimizing their linux stack's suspend/resume time. Using a kernel +image built with a few extra options enabled, the tool will execute a +suspend and capture dmesg and ftrace data until resume is complete. +This data is transformed into a device timeline and an optional +callgraph to give a detailed view of which devices/subsystems are +taking the most time in suspend/resume. +.PP +If no specific command is given, the default behavior is to initiate +a suspend/resume. +.PP +Generates output files in subdirectory: suspend-yymmdd-HHMMSS + html timeline : _.html + raw dmesg file : __dmesg.txt + raw ftrace file : __ftrace.txt +.SH OPTIONS +.TP +\fB-h\fR +Print the help text. +.TP +\fB-v\fR +Print the current tool version. +.TP +\fB-verbose\fR +Print extra information during execution and analysis. +.TP +\fB-config \fIfile\fR +Pull arguments and config options from a file. +.TP +\fB-m \fImode\fR +Mode to initiate for suspend e.g. standby, freeze, mem (default: mem). +.TP +\fB-o \fIsubdir\fR +Override the output subdirectory. Use {date}, {time}, {hostname} for current values. +.sp +e.g. suspend-{hostname}-{date}-{time} +.TP +\fB-rtcwake \fIt\fR | off +Use rtcwake to autoresume after \fIt\fR seconds (default: 15). Set t to "off" to +disable rtcwake and require a user keypress to resume. +.TP +\fB-addlogs\fR +Add the dmesg and ftrace logs to the html output. They will be viewable by +clicking buttons in the timeline. + +.SS "Advanced" +.TP +\fB-cmd \fIstr\fR +Run the timeline over a custom suspend command, e.g. pm-suspend. By default +the tool forces suspend via /sys/power/state so this allows testing over +an OS's official suspend method. The output file will change to +hostname_command.html and will autodetect which suspend mode was triggered. +.TP +\fB-filter \fI"d1,d2,..."\fR +Filter out all but these device callbacks. These strings can be device names +or module names. e.g. 0000:00:02.0, ata5, i915, usb, etc. +.TP +\fB-mindev \fIt\fR +Discard all device callbacks shorter than \fIt\fR milliseconds (default: 0.0). +This reduces the html file size as there can be many tiny callbacks which are barely +visible. The value is a float: e.g. 0.001 represents 1 us. +.TP +\fB-proc\fR +Add usermode process info into the timeline (default: disabled). +.TP +\fB-dev\fR +Add kernel source calls and threads to the timeline (default: disabled). +.TP +\fB-x2\fR +Run two suspend/resumes back to back (default: disabled). +.TP +\fB-x2delay \fIt\fR +Include \fIt\fR ms delay between multiple test runs (default: 0 ms). +.TP +\fB-predelay \fIt\fR +Include \fIt\fR ms delay before 1st suspend (default: 0 ms). +.TP +\fB-postdelay \fIt\fR +Include \fIt\fR ms delay after last resume (default: 0 ms). +.TP +\fB-multi \fIn d\fR +Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will +be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}. + +.SS "Ftrace Debug" +.TP +\fB-f\fR +Use ftrace to create device callgraphs (default: disabled). This can produce +very large outputs, i.e. 10MB - 100MB. +.TP +\fB-maxdepth \fIlevel\fR +limit the callgraph trace depth to \fIlevel\fR (default: 0=all). This is +the best way to limit the output size when using callgraphs via -f. +.TP +\fB-expandcg\fR +pre-expand the callgraph data in the html output (default: disabled) +.TP +\fB-fadd \fIfile\fR +Add functions to be graphed in the timeline from a list in a text file +.TP +\fB-mincg \fIt\fR +Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0.0). +This reduces the html file size as there can be many tiny callgraphs +which are barely visible in the timeline. +The value is a float: e.g. 0.001 represents 1 us. +.TP +\fB-cgphase \fIp\fR +Only show callgraph data for phase \fIp\fR (e.g. suspend_late). +.TP +\fB-cgtest \fIn\fR +In an x2 run, only show callgraph data for test \fIn\fR (e.g. 0 or 1). +.TP +\fB-timeprec \fIn\fR +Number of significant digits in timestamps (0:S, [3:ms], 6:us). + +.SH COMMANDS +.TP +\fB-ftrace \fIfile\fR +Create HTML output from an existing ftrace file. +.TP +\fB-dmesg \fIfile\fR +Create HTML output from an existing dmesg file. +.TP +\fB-summary \fIindir\fR +Create a summary page of all tests in \fIindir\fR. Creates summary.html +in the current folder. The output page is a table of tests with +suspend and resume values sorted by suspend mode, host, and kernel. +Includes test averages by mode and links to the test html files. +.TP +\fB-modes\fR +List available suspend modes. +.TP +\fB-status\fR +Test to see if the system is able to run this tool. Use this along +with any options you intend to use to see if they will work. +.TP +\fB-fpdt\fR +Print out the contents of the ACPI Firmware Performance Data Table. +.TP +\fB-usbtopo\fR +Print out the current USB topology with power info. +.TP +\fB-usbauto\fR +Enable autosuspend for all connected USB devices. +.TP +\fB-flist\fR +Print the list of ftrace functions currently being captured. Functions +that are not available as symbols in the current kernel are shown in red. +By default, the tool traces a list of important suspend/resume functions +in order to better fill out the timeline. If the user has added their own +with -fadd they will also be checked. +.TP +\fB-flistall\fR +Print all ftrace functions capable of being captured. These are all the +possible values you can add to trace via the -fadd argument. + +.SH EXAMPLES +.SS "Simple Commands" +Check which suspend modes are currently supported. +.IP +\f(CW$ sleepgraph -modes\fR +.PP +Read the Firmware Performance Data Table (FPDT) +.IP +\f(CW$ sudo sleepgraph -fpdt\fR +.PP +Print out the current USB power topology +.IP +\f(CW$ sleepgraph -usbtopo +.PP +Verify that you can run a command with a set of arguments +.IP +\f(CW$ sudo sleepgraph -f -rtcwake 30 -status +.PP +Generate a summary of all timelines in a particular folder. +.IP +\f(CW$ sleepgraph -summary ~/workspace/myresults/\fR +.PP +Re-generate the html output from a previous run's dmesg and ftrace log. +.IP +\f(CW$ sleepgraph -dmesg myhost_mem_dmesg.txt -ftrace myhost_mem_ftrace.txt\fR +.PP + +.SS "Capturing Simple Timelines" +Execute a mem suspend with a 15 second wakeup. Include the logs in the html. +.IP +\f(CW$ sudo sleepgraph -rtcwake 15 -addlogs\fR +.PP +Execute a standby with a 15 second wakeup. Change the output folder name. +.IP +\f(CW$ sudo sleepgraph -m standby -rtcwake 15 -o "standby-{hostname}-{date}-{time}"\fR +.PP +Execute a freeze with no wakeup (require keypress). Change output folder name. +.IP +\f(CW$ sudo sleepgraph -m freeze -rtcwake off -o "freeze-{hostname}-{date}-{time}"\fR +.PP + +.SS "Capturing Advanced Timelines" +Execute a suspend & include dev mode source calls, limit callbacks to 5ms or larger. +.IP +\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -dev -mindev 5\fR +.PP +Run two suspends back to back, include a 500ms delay before, after, and in between runs. +.IP +\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -x2 -predelay 500 -x2delay 500 -postdelay 500\fR +.PP +Do a batch run of 10 freezes with 30 seconds delay between runs. +.IP +\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 10 30\fR +.PP +Execute a suspend using a custom command. +.IP +\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR +.PP + + +.SS "Capturing Timelines with Callgraph Data" +Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger. +.IP +\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f -maxdepth 5 -mincg 10\fR +.PP +Capture a full callgraph across all suspend, then filter the html by a single phase. +.IP +\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f\fR +.IP +\f(CW$ sleepgraph -dmesg host_mem_dmesg.txt -ftrace host_mem_ftrace.txt -f -cgphase resume +.PP + +.SH "SEE ALSO" +dmesg(1) +.PP +.SH AUTHOR +.nf +Written by Todd Brandt diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index fd706ac0f347fa8fcdba6321088a5e483c7c5e7c..0b24dd9d01ff2991045696fbcb153a9f3f6ce248 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -353,6 +353,14 @@ def split_csv(): os.system('grep -m 1 common_cpu cpu.csv > cpu{:0>3}.csv'.format(index)) os.system('grep CPU_{:0>3} cpu.csv >> cpu{:0>3}.csv'.format(index, index)) +def fix_ownership(path): + """Change the owner of the file to SUDO_UID, if required""" + + uid = os.environ.get('SUDO_UID') + gid = os.environ.get('SUDO_GID') + if uid is not None: + os.chown(path, int(uid), int(gid)) + def cleanup_data_files(): """ clean up existing data files """ @@ -518,12 +526,16 @@ else: if not os.path.exists('results'): os.mkdir('results') + # The regular user needs to own the directory, not root. + fix_ownership('results') os.chdir('results') if os.path.exists(testname): print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.') sys.exit() os.mkdir(testname) +# The regular user needs to own the directory, not root. +fix_ownership(testname) os.chdir(testname) # Temporary (or perhaps not) @@ -566,4 +578,9 @@ plot_scaled_cpu() plot_boost_cpu() plot_ghz_cpu() +# It is preferrable, but not necessary, that the regular user owns the files, not root. +for root, dirs, files in os.walk('.'): + for f in files: + fix_ownership(f) + os.chdir('../../') diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 621578aa12d6fe456134369b97968126c3a1981e..fc74db62fef4d11851422586a87eb5dee6b17176 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -43,6 +43,15 @@ ifneq ($(CC), clang) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif +# Hack to avoid type-punned warnings on old systems such as RHEL5: +# We should be changing CFLAGS and checking gcc version, but this +# will do for now and keep the above -Wstrict-aliasing=3 in place +# in newer systems. +# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h +ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3 +EXTRA_WARNINGS += -fno-strict-aliasing +endif + ifneq ($(findstring $(MAKEFLAGS), w),w) PRINT_DIR = --no-print-directory else diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 816f119c9b7b1584a587077a55e28691418ff706..8c590cd1171a99c2ff7d38fb1cf7879b052acc3a 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,9 @@ static char *output_file; static uint32_t speed = 500000; static uint16_t delay; static int verbose; +static int transfer_size; +static int iterations; +static int interval = 5; /* interval in seconds for showing transfer rate */ uint8_t default_tx[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -156,13 +160,13 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len) close(out_fd); } - if (verbose || !output_file) + if (verbose) hex_dump(rx, len, 32, "RX"); } static void print_usage(const char *prog) { - printf("Usage: %s [-DsbdlHOLC3]\n", prog); + printf("Usage: %s [-DsbdlHOLC3vpNR24SI]\n", prog); puts(" -D --device device to use (default /dev/spidev1.1)\n" " -s --speed max speed (Hz)\n" " -d --delay delay (usec)\n" @@ -180,7 +184,9 @@ static void print_usage(const char *prog) " -N --no-cs no chip select\n" " -R --ready slave pulls low to pause\n" " -2 --dual dual transfer\n" - " -4 --quad quad transfer\n"); + " -4 --quad quad transfer\n" + " -S --size transfer size\n" + " -I --iter iterations\n"); exit(1); } @@ -205,11 +211,13 @@ static void parse_opts(int argc, char *argv[]) { "dual", 0, 0, '2' }, { "verbose", 0, 0, 'v' }, { "quad", 0, 0, '4' }, + { "size", 1, 0, 'S' }, + { "iter", 1, 0, 'I' }, { NULL, 0, 0, 0 }, }; int c; - c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:v", + c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:vS:I:", lopts, NULL); if (c == -1) @@ -270,6 +278,12 @@ static void parse_opts(int argc, char *argv[]) case '4': mode |= SPI_TX_QUAD; break; + case 'S': + transfer_size = atoi(optarg); + break; + case 'I': + iterations = atoi(optarg); + break; default: print_usage(argv[0]); break; @@ -336,6 +350,57 @@ static void transfer_file(int fd, char *filename) close(tx_fd); } +static uint64_t _read_count; +static uint64_t _write_count; + +static void show_transfer_rate(void) +{ + static uint64_t prev_read_count, prev_write_count; + double rx_rate, tx_rate; + + rx_rate = ((_read_count - prev_read_count) * 8) / (interval*1000.0); + tx_rate = ((_write_count - prev_write_count) * 8) / (interval*1000.0); + + printf("rate: tx %.1fkbps, rx %.1fkbps\n", rx_rate, tx_rate); + + prev_read_count = _read_count; + prev_write_count = _write_count; +} + +static void transfer_buf(int fd, int len) +{ + uint8_t *tx; + uint8_t *rx; + int i; + + tx = malloc(len); + if (!tx) + pabort("can't allocate tx buffer"); + for (i = 0; i < len; i++) + tx[i] = random(); + + rx = malloc(len); + if (!rx) + pabort("can't allocate rx buffer"); + + transfer(fd, tx, rx, len); + + _write_count += len; + _read_count += len; + + if (mode & SPI_LOOP) { + if (memcmp(tx, rx, len)) { + fprintf(stderr, "transfer error !\n"); + hex_dump(tx, len, 32, "TX"); + hex_dump(rx, len, 32, "RX"); + exit(1); + } + } + + free(rx); + free(tx); +} + int main(int argc, char *argv[]) { int ret = 0; @@ -391,7 +456,25 @@ int main(int argc, char *argv[]) transfer_escaped_string(fd, input_tx); else if (input_file) transfer_file(fd, input_file); - else + else if (transfer_size) { + struct timespec last_stat; + + clock_gettime(CLOCK_MONOTONIC, &last_stat); + + while (iterations-- > 0) { + struct timespec current; + + transfer_buf(fd, transfer_size); + + clock_gettime(CLOCK_MONOTONIC, ¤t); + if (current.tv_sec - last_stat.tv_sec > interval) { + show_transfer_rate(); + last_stat = current; + } + } + printf("total: tx %.1fKB, rx %.1fKB\n", + _write_count/1024.0, _read_count/1024.0); + } else transfer(fd, default_tx, default_rx, sizeof(default_tx)); close(fd); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 405212be044a93f77db540eb0ee6d7ba4a62e825..d870520da68b9b9bc924ff035a57808890693385 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -28,7 +28,10 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o -obj-$(CONFIG_DEV_DAX) += dax.o +ifeq ($(CONFIG_DAX),m) +obj-$(CONFIG_DAX) += dax.o +endif +obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o @@ -48,9 +51,13 @@ nd_blk-y += config_check.o nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y += config_check.o -dax-y := $(DAX_SRC)/dax.o +dax-y := $(DAX_SRC)/super.o dax-y += config_check.o +device_dax-y := $(DAX_SRC)/device.o +device_dax-y += dax-dev.o +device_dax-y += config_check.o + dax_pmem-y := $(DAX_SRC)/pmem.o dax_pmem-y += config_check.o diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c new file mode 100644 index 0000000000000000000000000000000000000000..36ee3d8797c3bcdac889d2fa32d4c2280906f845 --- /dev/null +++ b/tools/testing/nvdimm/dax-dev.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include "test/nfit_test.h" +#include +#include "../../../drivers/dax/dax-private.h" + +phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, + unsigned long size) +{ + struct resource *res; + phys_addr_t addr; + int i; + + for (i = 0; i < dev_dax->num_resources; i++) { + res = &dev_dax->res[i]; + addr = pgoff * PAGE_SIZE + res->start; + if (addr >= res->start && addr <= res->end) + break; + pgoff -= PHYS_PFN(resource_size(res)); + } + + if (i < dev_dax->num_resources) { + res = &dev_dax->res[i]; + if (addr + size - 1 <= res->end) { + if (get_nfit_res(addr)) { + struct page *page; + + if (dev_dax->region->align > PAGE_SIZE) + return -1; + + page = vmalloc_to_page((void *)addr); + return PFN_PHYS(page_to_pfn(page)); + } else + return addr; + } + } + + return -1; +} diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index c9b8c48f85fc9c98bae254c2d4998066c36ccd25..b53596ad601bb4964231c58151308f61221868f5 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -15,13 +15,13 @@ #include #include -long pmem_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, pfn_t *pfn, long size) +long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) { - struct pmem_device *pmem = bdev->bd_queue->queuedata; - resource_size_t offset = sector * 512 + pmem->data_offset; + resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; - if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) + if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512, + PFN_PHYS(nr_pages)))) return -EIO; /* @@ -34,11 +34,10 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, *kaddr = pmem->virt_addr + offset; page = vmalloc_to_page(pmem->virt_addr + offset); *pfn = page_to_pfn_t(page); - dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, - "%s: sector: %#llx pfn: %#lx\n", __func__, - (unsigned long long) sector, page_to_pfn(page)); + pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n", + __func__, pmem, pgoff, page_to_pfn(page)); - return PAGE_SIZE; + return 1; } *kaddr = pmem->virt_addr + offset; @@ -49,6 +48,6 @@ long pmem_direct_access(struct block_device *bdev, sector_t sector, * requested range. */ if (unlikely(pmem->bb.count)) - return size; - return pmem->size - pmem->pfn_pad - offset; + return nr_pages; + return PHYS_PFN(pmem->size - pmem->pfn_pad - offset); } diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 798f176554338bdd87a091b3519b83c805abddbe..c2187178fb13335ce8c54c1787949e85871494aa 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -132,6 +132,7 @@ static u32 handle[] = { [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), + [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; @@ -728,8 +729,8 @@ static int nfit_test0_alloc(struct nfit_test *t) static int nfit_test1_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 - + sizeof(struct acpi_nfit_memory_map) - + offsetof(struct acpi_nfit_control_region, window_size); + + sizeof(struct acpi_nfit_memory_map) * 2 + + offsetof(struct acpi_nfit_control_region, window_size) * 2; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -906,6 +907,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region2 (spa1, dimm0) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; @@ -921,6 +923,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region3 (spa1, dimm1) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; @@ -951,6 +954,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region5 (spa1, dimm3) */ memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; @@ -1086,6 +1090,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; /* dcr-descriptor0: blk */ @@ -1384,6 +1389,7 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; /* mem-region16 (spa/bdw4, dimm4) */ memdev = nfit_buf + offset + @@ -1486,6 +1492,34 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; + offset += dcr->header.length; + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[6]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0; + memdev->region_index = 0+2; + memdev->region_size = SPA2_SIZE; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; + + /* dcr-descriptor1 */ + offset += sizeof(*memdev); + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = offsetof(struct acpi_nfit_control_region, + window_size); + dcr->region_index = 0+2; + dcr_common_init(dcr); + dcr->serial_number = ~handle[6]; + dcr->code = NFIT_FIC_BYTE; + dcr->windows = 0; + post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; @@ -1817,6 +1851,10 @@ static int nfit_test_probe(struct platform_device *pdev) if (rc) return rc; + rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc); + if (rc) + return rc; + if (nfit_test->setup != nfit_test0_setup) return 0; @@ -1907,7 +1945,7 @@ static __init int nfit_test_init(void) case 1: nfit_test->num_pm = 1; nfit_test->dcr_idx = NUM_DCR; - nfit_test->num_dcr = 1; + nfit_test->num_dcr = 2; nfit_test->alloc = nfit_test1_alloc; nfit_test->setup = nfit_test1_setup; break; @@ -1924,6 +1962,7 @@ static __init int nfit_test_init(void) put_device(&pdev->dev); goto err_register; } + get_device(&pdev->dev); rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rc) @@ -1942,6 +1981,10 @@ static __init int nfit_test_init(void) if (instances[i]) platform_device_unregister(&instances[i]->pdev); nfit_test_teardown(); + for (i = 0; i < NUM_NFITS; i++) + if (instances[i]) + put_device(&instances[i]->pdev.dev); + return rc; } @@ -1949,10 +1992,13 @@ static __exit void nfit_test_exit(void) { int i; - platform_driver_unregister(&nfit_test_driver); for (i = 0; i < NUM_NFITS; i++) platform_device_unregister(&instances[i]->pdev); + platform_driver_unregister(&nfit_test_driver); nfit_test_teardown(); + + for (i = 0; i < NUM_NFITS; i++) + put_device(&instances[i]->pdev.dev); class_destroy(nfit_test_dimm); } diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index f0600d20ce7d6f305d5d02b477fa804fcec91935..91750352459dfd9c3cc2bb81f99f5f9ef74ed7f3 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -1 +1,5 @@ kselftest +gpiogpio-event-mon +gpiogpio-hammer +gpioinclude/ +gpiolsgpio diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d8593f1251ecce4c750fc54086ba3fc2b0b7c90a..26ce4f7168be534de2eef4be6650de9617b03b33 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -39,7 +39,7 @@ TARGETS += x86 TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or -# "make quicktest=1 kselftest from top level Makefile +# "make quicktest=1 kselftest" from top level Makefile TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug @@ -133,4 +133,4 @@ clean: make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; -.PHONY: install +.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9af09e8099c0aae9fd6acd5d42cc5afb949871e9..f389b02d43a004e90aed9acd68148875af3beba2 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -8,16 +8,19 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -LDLIBS += -lcap +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +LDLIBS += -lcap -lelf -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ + test_align + +TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o TEST_PROGS := test_kmod.sh include ../lib.mk -BPFOBJ := $(OUTPUT)/bpf.o +BPFOBJ := $(OUTPUT)/libbpf.a $(TEST_GEN_PROGS): $(BPFOBJ) @@ -28,3 +31,11 @@ force: $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ + +CLANG ?= clang + +%.o: %.c + $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ + -I../../../../samples/bpf/ \ + -Wno-compare-distinct-pointer-types \ + -O2 -target bpf -c $< -o $@ diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h new file mode 100644 index 0000000000000000000000000000000000000000..487cbfb89beb5012816c7e4989b3f9ff4261995d --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_endian.h @@ -0,0 +1,42 @@ +#ifndef __BPF_ENDIAN__ +#define __BPF_ENDIAN__ + +#include + +/* LLVM's BPF target selects the endianness of the CPU + * it compiles on, or the user specifies (bpfel/bpfeb), + * respectively. The used __BYTE_ORDER__ is defined by + * the compiler, we cannot rely on __BYTE_ORDER from + * libc headers, since it doesn't reflect the actual + * requested byte order. + * + * Note, LLVM's BPF target has different __builtin_bswapX() + * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE + * in bpfel and bpfeb case, which means below, that we map + * to cpu_to_be16(). We could use it unconditionally in BPF + * case, but better not rely on it, so that this header here + * can be used from application and BPF program side, which + * use different targets. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __bpf_ntohs(x) __builtin_bswap16(x) +# define __bpf_htons(x) __builtin_bswap16(x) +# define __bpf_constant_ntohs(x) ___constant_swab16(x) +# define __bpf_constant_htons(x) ___constant_swab16(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __bpf_ntohs(x) (x) +# define __bpf_htons(x) (x) +# define __bpf_constant_ntohs(x) (x) +# define __bpf_constant_htons(x) (x) +#else +# error "Fix your compiler's __BYTE_ORDER__?!" +#endif + +#define bpf_htons(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htons(x) : __bpf_htons(x)) +#define bpf_ntohs(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohs(x) : __bpf_ntohs(x)) + +#endif /* __BPF_ENDIAN__ */ diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 84a5d1823f028344e7a4339e8aa5eb45506d797d..20ecbaa0d85d72b860678caf49c3e421105802b8 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -35,4 +35,11 @@ static inline unsigned int bpf_num_possible_cpus(void) return possible_cpus; } +#define __bpf_percpu_val_align __attribute__((__aligned__(8))) + +#define BPF_DECLARE_PERCPU(type, name) \ + struct { type v; /* padding */ } __bpf_percpu_val_align \ + name[bpf_num_possible_cpus()] +#define bpf_percpu(name, cpu) name[(cpu)].v + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h new file mode 100644 index 0000000000000000000000000000000000000000..719225b1662697f90ab04d9a0f6562e9b4bc34d0 --- /dev/null +++ b/tools/testing/selftests/bpf/gnu/stubs.h @@ -0,0 +1 @@ +/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */ diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h new file mode 100644 index 0000000000000000000000000000000000000000..51841848fbfe35aeecb30d35acdeee516201afaa --- /dev/null +++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h @@ -0,0 +1,22 @@ +#ifndef _UAPI_LINUX_TYPES_H +#define _UAPI_LINUX_TYPES_H + +#include + +/* copied from linux:include/uapi/linux/types.h */ +#define __bitwise +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + +#define __aligned_u64 __u64 __attribute__((aligned(8))) +#define __aligned_be64 __be64 __attribute__((aligned(8))) +#define __aligned_le64 __le64 __attribute__((aligned(8))) + +#endif /* _UAPI_LINUX_TYPES_H */ diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c new file mode 100644 index 0000000000000000000000000000000000000000..9644d4e069dec3e43ec654f6f44d933a02a7878a --- /dev/null +++ b/tools/testing/selftests/bpf/test_align.c @@ -0,0 +1,453 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "../../../include/linux/filter.h" + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#define MAX_INSNS 512 +#define MAX_MATCHES 16 + +struct bpf_align_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + enum { + UNDEF, + ACCEPT, + REJECT + } result; + enum bpf_prog_type prog_type; + const char *matches[MAX_MATCHES]; +}; + +static struct bpf_align_test tests[] = { + { + .descr = "mov", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 16), + BPF_MOV64_IMM(BPF_REG_3, 32), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + "2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + }, + }, + { + .descr = "shift", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_4, 32), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", + "2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + "3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", + "7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + "8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp", + "9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + }, + }, + { + .descr = "addsub", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", + "2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp", + "3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp", + "4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", + "5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp", + "6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + }, + }, + { + .descr = "mul", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", + "2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", + "3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + "4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp", + }, + }, + +#define PREP_PKT_POINTERS \ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \ + offsetof(struct __sk_buff, data)), \ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \ + offsetof(struct __sk_buff, data_end)) + +#define LOAD_UNKNOWN(DST_REG) \ + PREP_PKT_POINTERS, \ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \ + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0) + + { + .descr = "unknown shift", + .insns = { + LOAD_UNKNOWN(BPF_REG_3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1), + LOAD_UNKNOWN(BPF_REG_4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp", + "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp", + "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp", + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp", + "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp", + "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp", + "20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp", + "21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp", + "22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp", + "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp", + }, + }, + { + .descr = "unknown mul", + .insns = { + LOAD_UNKNOWN(BPF_REG_3), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_3), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp", + "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp", + "12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp", + "14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", + "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp", + "16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp" + }, + }, + { + .descr = "packet const offset", + .insns = { + PREP_PKT_POINTERS, + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + + BPF_MOV64_IMM(BPF_REG_0, 0), + + /* Skip over ethernet header. */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + "4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp", + "5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp", + "6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp", + "10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp", + "14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + "15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + }, + }, + { + .descr = "packet variable offset", + .insns = { + LOAD_UNKNOWN(BPF_REG_6), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + + /* First, add a constant to the R5 packet pointer, + * then a variable with a known alignment. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + /* Now, test in the other direction. Adding first + * the variable offset to R5, then the constant. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + /* Test multiple accumulations of unknown values + * into a packet pointer. + */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ + "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp", + + /* Offset is added to packet pointer R5, resulting in known + * auxiliary alignment and offset. + */ + "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (0) + + * reg->aux_off (14) which is 16. Then the variable + * offset is considered using reg->aux_off_align which + * is 4 and meets the load's requirements. + */ + "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + + /* Variable offset is added to R5 packet pointer, + * resulting in auxiliary alignment of 4. + */ + "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* Constant offset is added to R5, resulting in + * reg->off of 14. + */ + "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (14) which + * is 16. Then the variable offset is considered using + * reg->aux_off_align which is 4 and meets the load's + * requirements. + */ + "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp", + + /* Constant offset is added to R5 packet pointer, + * resulting in reg->off value of 14. + */ + "26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp", + /* Variable offset is added to R5, resulting in an + * auxiliary offset of 14, and an auxiliary alignment of 4. + */ + "27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* Constant is added to R5 again, setting reg->off to 4. */ + "28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* And once more we add a variable, which causes an accumulation + * of reg->off into reg->aux_off_align, with resulting value of + * 18. The auxiliary alignment stays at 4. + */ + "29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + /* At the time the word size load is performed from R5, + * it's total offset is NET_IP_ALIGN + reg->off (0) + + * reg->aux_off (18) which is 20. Then the variable offset + * is considered using reg->aux_off_align which is 4 and meets + * the load's requirements. + */ + "33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + }, + }, +}; + +static int probe_filter_length(const struct bpf_insn *fp) +{ + int len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static char bpf_vlog[32768]; + +static int do_test_single(struct bpf_align_test *test) +{ + struct bpf_insn *prog = test->insns; + int prog_type = test->prog_type; + int prog_len, i; + int fd_prog; + int ret; + + prog_len = probe_filter_length(prog); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, 1, "GPL", 0, + bpf_vlog, sizeof(bpf_vlog)); + if (fd_prog < 0) { + printf("Failed to load program.\n"); + printf("%s", bpf_vlog); + ret = 1; + } else { + ret = 0; + for (i = 0; i < MAX_MATCHES; i++) { + const char *t, *m = test->matches[i]; + + if (!m) + break; + t = strstr(bpf_vlog, m); + if (!t) { + printf("Failed to find match: %s\n", m); + ret = 1; + printf("%s", bpf_vlog); + break; + } + } + close(fd_prog); + } + return ret; +} + +static int do_test(unsigned int from, unsigned int to) +{ + int all_pass = 0; + int all_fail = 0; + unsigned int i; + + for (i = from; i < to; i++) { + struct bpf_align_test *test = &tests[i]; + int fail; + + printf("Test %3d: %s ... ", + i, test->descr); + fail = do_test_single(test); + if (fail) { + all_fail++; + printf("FAIL\n"); + } else { + all_pass++; + printf("PASS\n"); + } + } + printf("Results: %d pass %d fail\n", + all_pass, all_fail); + return 0; +} + +int main(int argc, char **argv) +{ + unsigned int from = 0, to = ARRAY_SIZE(tests); + + if (argc == 3) { + unsigned int l = atoi(argv[argc - 2]); + unsigned int u = atoi(argv[argc - 1]); + + if (l < to && u < to) { + from = l; + to = u + 1; + } + } else if (argc == 2) { + unsigned int t = atoi(argv[argc - 1]); + + if (t < to) { + from = t; + to = t + 1; + } + } + return do_test(from, to); +} diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h new file mode 100644 index 0000000000000000000000000000000000000000..e4cd252a1b205d53c80bde3058a273219bed54a4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_iptunnel_common.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _TEST_IPTNL_COMMON_H +#define _TEST_IPTNL_COMMON_H + +#include + +#define MAX_IPTNL_ENTRIES 256U + +struct vip { + union { + __u32 v6[4]; + __u32 v4; + } daddr; + __u16 dport; + __u16 family; + __u8 protocol; +}; + +struct iptnl_info { + union { + __u32 v6[4]; + __u32 v4; + } saddr; + union { + __u32 v6[4]; + __u32 v4; + } daddr; + __u16 family; + __u8 dmac[6]; +}; + +#endif diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c new file mode 100644 index 0000000000000000000000000000000000000000..1e10c9590991bfde9b35753d955591d2f66bb1c1 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb.c @@ -0,0 +1,473 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __always_inline __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static __always_inline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE; + __u32 key = RING_SIZE * vip_info->vip_num + hash; + __u32 *real_pos; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static __always_inline int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static __always_inline bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static __always_inline int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 00b0aff56e2e7256de150815e0da2eb0fb20d7b6..8c10c9180c1a6535f18caafcdf7195f69d04ecd2 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -22,7 +22,7 @@ #include "bpf_util.h" #define LOCAL_FREE_TARGET (128) -#define PERCPU_FREE_TARGET (16) +#define PERCPU_FREE_TARGET (4) static int nr_cpus; @@ -191,12 +191,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) - /* Ther percpu lru list (i.e each cpu has its own LRU - * list) does not have a local free list. Hence, - * it will only free old nodes till there is no free - * from the LRU list. Hence, this test does not apply - * to BPF_F_NO_COMMON_LRU - */ + /* This test is only applicable to common LRU list */ return; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, @@ -227,7 +222,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } /* Insert 1+tgt_free to 2*tgt_free @@ -273,12 +268,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) int next_cpu = 0; if (map_flags & BPF_F_NO_COMMON_LRU) - /* Ther percpu lru list (i.e each cpu has its own LRU - * list) does not have a local free list. Hence, - * it will only free old nodes till there is no free - * from the LRU list. Hence, this test does not apply - * to BPF_F_NO_COMMON_LRU - */ + /* This test is only applicable to common LRU list */ return; printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, @@ -290,11 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) assert(batch_size * 2 == tgt_free); map_size = tgt_free + batch_size; - if (map_flags & BPF_F_NO_COMMON_LRU) - lru_map_fd = create_map(map_type, map_flags, - map_size * nr_cpus); - else - lru_map_fd = create_map(map_type, map_flags, map_size); + lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size); @@ -341,7 +327,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); assert(value[0] == 4321); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } value[0] = 1234; @@ -361,7 +347,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } assert(map_equal(lru_map_fd, expected_map_fd)); @@ -387,6 +373,10 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) unsigned int map_size; int next_cpu = 0; + if (map_flags & BPF_F_NO_COMMON_LRU) + /* This test is only applicable to common LRU list */ + return; + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, map_flags); @@ -396,11 +386,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(batch_size * 2 == tgt_free); map_size = tgt_free * 2; - if (map_flags & BPF_F_NO_COMMON_LRU) - lru_map_fd = create_map(map_type, map_flags, - map_size * nr_cpus); - else - lru_map_fd = create_map(map_type, map_flags, map_size); + lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size); @@ -419,7 +405,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } /* Add 1+2*tgt_free to tgt_free*5/2 @@ -431,7 +417,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } assert(map_equal(lru_map_fd, expected_map_fd)); @@ -491,7 +477,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, - BPF_NOEXIST)); + BPF_NOEXIST)); } assert(map_equal(lru_map_fd, expected_map_fd)); @@ -566,6 +552,65 @@ static void test_lru_sanity5(int map_type, int map_flags) printf("Pass\n"); } +/* Test list rotation for BPF_F_NO_COMMON_LRU map */ +static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) +{ + int lru_map_fd, expected_map_fd; + unsigned long long key, value[nr_cpus]; + unsigned int map_size = tgt_free * 2; + int next_cpu = 0; + + if (!(map_flags & BPF_F_NO_COMMON_LRU)) + return; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, &next_cpu) != -1); + + expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size); + assert(expected_map_fd != -1); + + lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus); + assert(lru_map_fd != -1); + + value[0] = 1234; + + for (key = 1; key <= tgt_free; key++) { + assert(!bpf_map_update_elem(lru_map_fd, &key, value, + BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + } + + for (; key <= tgt_free * 2; key++) { + unsigned long long stable_key; + + /* Make ref bit sticky for key: [1, tgt_free] */ + for (stable_key = 1; stable_key <= tgt_free; stable_key++) { + /* Mark the ref bit */ + assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key, + value)); + } + assert(!bpf_map_update_elem(lru_map_fd, &key, value, + BPF_NOEXIST)); + } + + for (; key <= tgt_free * 3; key++) { + assert(!bpf_map_update_elem(lru_map_fd, &key, value, + BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + } + + assert(map_equal(lru_map_fd, expected_map_fd)); + + close(expected_map_fd); + close(lru_map_fd); + + printf("Pass\n"); +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -593,6 +638,7 @@ int main(int argc, char **argv) test_lru_sanity3(map_types[t], map_flags[f], tgt_free); test_lru_sanity4(map_types[t], map_flags[f], tgt_free); test_lru_sanity5(map_types[t], map_flags[f]); + test_lru_sanity6(map_types[t], map_flags[f], tgt_free); printf("\n"); } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index a0aa2009b0e0a81e65672eb795039a2172484c55..93314524de0d033ded2a00d71bbf500a0afbd60e 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -28,7 +28,7 @@ static int map_flags; static void test_hashmap(int task, void *data) { - long long key, next_key, value; + long long key, next_key, first_key, value; int fd; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), @@ -89,10 +89,13 @@ static void test_hashmap(int task, void *data) assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && + (first_key == 1 || first_key == 2)); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); + (next_key == first_key)); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && - (next_key == 1 || next_key == 2)); + (next_key == 1 || next_key == 2) && + (next_key != first_key)); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && errno == ENOENT); @@ -105,6 +108,8 @@ static void test_hashmap(int task, void *data) key = 0; /* Check that map is empty. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && errno == ENOENT); @@ -132,20 +137,20 @@ static void test_hashmap_sizes(int task, void *data) static void test_hashmap_percpu(int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); - long long value[nr_cpus]; - long long key, next_key; + BPF_DECLARE_PERCPU(long, value); + long long key, next_key, first_key; int expected_key_mask = 0; int fd, i; fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), - sizeof(value[0]), 2, map_flags); + sizeof(bpf_percpu(value, 0)), 2, map_flags); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); } for (i = 0; i < nr_cpus; i++) - value[i] = i + 100; + bpf_percpu(value, i) = i + 100; key = 1; /* Insert key=1 element. */ @@ -165,8 +170,9 @@ static void test_hashmap_percpu(int task, void *data) /* Check that key=1 can be found. Value could be 0 if the lookup * was run from a different CPU. */ - value[0] = 1; - assert(bpf_map_lookup_elem(fd, &key, value) == 0 && value[0] == 100); + bpf_percpu(value, 0) = 1; + assert(bpf_map_lookup_elem(fd, &key, value) == 0 && + bpf_percpu(value, 0) == 100); key = 2; /* Check that key=2 is not found. */ @@ -193,14 +199,20 @@ static void test_hashmap_percpu(int task, void *data) assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && + ((expected_key_mask & first_key) == first_key)); while (!bpf_map_get_next_key(fd, &key, &next_key)) { + if (first_key) { + assert(next_key == first_key); + first_key = 0; + } assert((expected_key_mask & next_key) == next_key); expected_key_mask &= ~next_key; assert(bpf_map_lookup_elem(fd, &next_key, value) == 0); for (i = 0; i < nr_cpus; i++) - assert(value[i] == i + 100); + assert(bpf_percpu(value, i) == i + 100); key = next_key; } @@ -219,6 +231,8 @@ static void test_hashmap_percpu(int task, void *data) key = 0; /* Check that map is empty. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && errno == ENOENT); @@ -264,6 +278,8 @@ static void test_arraymap(int task, void *data) assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && + next_key == 0); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && @@ -281,34 +297,36 @@ static void test_arraymap(int task, void *data) static void test_arraymap_percpu(int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); + BPF_DECLARE_PERCPU(long, values); int key, next_key, fd, i; - long values[nr_cpus]; fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(values[0]), 2, 0); + sizeof(bpf_percpu(values, 0)), 2, 0); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); } for (i = 0; i < nr_cpus; i++) - values[i] = i + 100; + bpf_percpu(values, i) = i + 100; key = 1; /* Insert key=1 element. */ assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0); - values[0] = 0; + bpf_percpu(values, 0) = 0; assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 && errno == EEXIST); /* Check that key=1 can be found. */ - assert(bpf_map_lookup_elem(fd, &key, values) == 0 && values[0] == 100); + assert(bpf_map_lookup_elem(fd, &key, values) == 0 && + bpf_percpu(values, 0) == 100); key = 0; /* Check that key=0 is also found and zero initialized. */ assert(bpf_map_lookup_elem(fd, &key, values) == 0 && - values[0] == 0 && values[nr_cpus - 1] == 0); + bpf_percpu(values, 0) == 0 && + bpf_percpu(values, nr_cpus - 1) == 0); /* Check that key=2 cannot be inserted due to max_entries limit. */ key = 2; @@ -319,6 +337,8 @@ static void test_arraymap_percpu(int task, void *data) assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT); /* Iterate over two elements. */ + assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && + next_key == 0); assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 && next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && @@ -336,15 +356,15 @@ static void test_arraymap_percpu(int task, void *data) static void test_arraymap_percpu_many_keys(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); + BPF_DECLARE_PERCPU(long, values); /* nr_keys is not too large otherwise the test stresses percpu * allocator more than anything else */ unsigned int nr_keys = 2000; - long values[nr_cpus]; int key, fd, i; fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(values[0]), nr_keys, 0); + sizeof(bpf_percpu(values, 0)), nr_keys, 0); if (fd < 0) { printf("Failed to create per-cpu arraymap '%s'!\n", strerror(errno)); @@ -352,19 +372,19 @@ static void test_arraymap_percpu_many_keys(void) } for (i = 0; i < nr_cpus; i++) - values[i] = i + 10; + bpf_percpu(values, i) = i + 10; for (key = 0; key < nr_keys; key++) assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0); for (key = 0; key < nr_keys; key++) { for (i = 0; i < nr_cpus; i++) - values[i] = 0; + bpf_percpu(values, i) = 0; assert(bpf_map_lookup_elem(fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) - assert(values[i] == i + 10); + assert(bpf_percpu(values, i) == i + 10); } close(fd); @@ -400,6 +420,8 @@ static void test_map_large(void) errno == E2BIG); /* Iterate through all elements. */ + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); + key.c = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); @@ -499,6 +521,7 @@ static void test_map_parallel(void) errno == EEXIST); /* Check that all elements were inserted. */ + assert(bpf_map_get_next_key(fd, NULL, &key) == 0); key = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); @@ -518,6 +541,7 @@ static void test_map_parallel(void) /* Nothing should be left. */ key = -1; + assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT); assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); } diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c new file mode 100644 index 0000000000000000000000000000000000000000..6e11ba11709e5cfcba232be08e706622515cdcef --- /dev/null +++ b/tools/testing/selftests/bpf/test_pkt_access.c @@ -0,0 +1,65 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define barrier() __asm__ __volatile__("": : :"memory") +int _version SEC("version") = 1; + +SEC("test1") +int process(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct tcphdr *tcp = NULL; + __u8 proto = 255; + __u64 ihl_len; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(eth + 1); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + tcp = (struct tcphdr *)((void *)(iph) + ihl_len); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1); + + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len); + } + + if (tcp) { + if (((void *)(tcp) + 20) > data_end || proto != 6) + return TC_ACT_SHOT; + barrier(); /* to force ordering of checks */ + if (((void *)(tcp) + 18) > data_end) + return TC_ACT_SHOT; + if (tcp->urg_ptr == 123) + return TC_ACT_OK; + } + + return TC_ACT_UNSPEC; +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c new file mode 100644 index 0000000000000000000000000000000000000000..b59f5ed4ae40d36bf9755fa844e63597c2a6f2c1 --- /dev/null +++ b/tools/testing/selftests/bpf/test_progs.c @@ -0,0 +1,299 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include +typedef __u16 __sum16; +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include "test_iptunnel_common.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +static int error_cnt, pass_cnt; + +#define MAGIC_BYTES 123 + +/* ipv4 test vector */ +static struct { + struct ethhdr eth; + struct iphdr iph; + struct tcphdr tcp; +} __packed pkt_v4 = { + .eth.h_proto = bpf_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = 6, + .iph.tot_len = bpf_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, +}; + +/* ipv6 test vector */ +static struct { + struct ethhdr eth; + struct ipv6hdr iph; + struct tcphdr tcp; +} __packed pkt_v6 = { + .eth.h_proto = bpf_htons(ETH_P_IPV6), + .iph.nexthdr = 6, + .iph.payload_len = bpf_htons(MAGIC_BYTES), + .tcp.urg_ptr = 123, +}; + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + error_cnt++; \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + pass_cnt++; \ + printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\ + } \ +}) + +static int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_program *prog; + struct bpf_object *obj; + int err; + + obj = bpf_object__open(file); + if (IS_ERR(obj)) { + error_cnt++; + return -ENOENT; + } + + prog = bpf_program__next(NULL, obj); + if (!prog) { + bpf_object__close(obj); + error_cnt++; + return -ENOENT; + } + + bpf_program__set_type(prog, type); + err = bpf_object__load(obj); + if (err) { + bpf_object__close(obj); + error_cnt++; + return -EINVAL; + } + + *pobj = obj; + *prog_fd = bpf_program__fd(prog); + return 0; +} + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + error_cnt++; + return -1; + } + return bpf_map__fd(map); +} + +static void test_pkt_access(void) +{ + const char *file = "./test_pkt_access.o"; + struct bpf_object *obj; + __u32 duration, retval; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) + return; + + err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || errno || retval, "ipv4", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || errno || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + bpf_object__close(obj); +} + +static void test_xdp(void) +{ + struct vip key4 = {.protocol = 6, .family = AF_INET}; + struct vip key6 = {.protocol = 6, .family = AF_INET6}; + struct iptnl_info value4 = {.family = AF_INET}; + struct iptnl_info value6 = {.family = AF_INET6}; + const char *file = "./test_xdp.o"; + struct bpf_object *obj; + char buf[128]; + struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr); + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + __u32 duration, retval, size; + int err, prog_fd, map_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) + return; + + map_fd = bpf_find_map(__func__, obj, "vip2tnl"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key4, &value4, 0); + bpf_map_update_elem(map_fd, &key6, &value6, 0); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + CHECK(err || errno || retval != XDP_TX || size != 74 || + iph->protocol != IPPROTO_IPIP, "ipv4", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != XDP_TX || size != 114 || + iph6->nexthdr != IPPROTO_IPV6, "ipv6", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); +out: + bpf_object__close(obj); +} + +#define MAGIC_VAL 0x1234 +#define NUM_ITER 100000 +#define VIP_NUM 5 + +static void test_l4lb(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + const char *file = "./test_l4lb.o"; + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) + return; + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + +static void test_tcp_estats(void) +{ + const char *file = "./test_tcp_estats.o"; + int err, prog_fd; + struct bpf_object *obj; + __u32 duration = 0; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + CHECK(err, "", "err %d errno %d\n", err, errno); + if (err) + return; + + bpf_object__close(obj); +} + +int main(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); + + test_pkt_access(); + test_xdp(); + test_l4lb(); + test_tcp_estats(); + + printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/test_tcp_estats.c new file mode 100644 index 0000000000000000000000000000000000000000..bee3bbecc0c4178762409d9489aa71e398a452f1 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_estats.c @@ -0,0 +1,258 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +/* This program shows clang/llvm is able to generate code pattern + * like: + * _tcp_send_active_reset: + * 0: bf 16 00 00 00 00 00 00 r6 = r1 + * ...... + * 335: b7 01 00 00 0f 00 00 00 r1 = 15 + * 336: 05 00 48 00 00 00 00 00 goto 72 + * + * LBB0_3: + * 337: b7 01 00 00 01 00 00 00 r1 = 1 + * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1 + * 408: b7 01 00 00 03 00 00 00 r1 = 3 + * + * LBB0_4: + * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2) + * 410: bf a7 00 00 00 00 00 00 r7 = r10 + * 411: 07 07 00 00 b8 ff ff ff r7 += -72 + * 412: bf 73 00 00 00 00 00 00 r3 = r7 + * 413: 0f 13 00 00 00 00 00 00 r3 += r1 + * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2 + * + * From the above code snippet, the code generated by the compiler + * is reasonable. The "r1" is assigned to different values in basic + * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4". + * The verifier should be able to handle such code patterns. + */ +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define TCP_ESTATS_MAGIC 0xBAADBEEF + +/* This test case needs "sock" and "pt_regs" data structure. + * Recursively, "sock" needs "sock_common" and "inet_sock". + * However, this is a unit test case only for + * verifier purpose without bpf program execution. + * We can safely mock much simpler data structures, basically + * only taking the necessary fields from kernel headers. + */ +typedef __u32 __bitwise __portpair; +typedef __u64 __bitwise __addrpair; + +struct sock_common { + unsigned short skc_family; + union { + __addrpair skc_addrpair; + struct { + __be32 skc_daddr; + __be32 skc_rcv_saddr; + }; + }; + union { + __portpair skc_portpair; + struct { + __be16 skc_dport; + __u16 skc_num; + }; + }; + struct in6_addr skc_v6_daddr; + struct in6_addr skc_v6_rcv_saddr; +}; + +struct sock { + struct sock_common __sk_common; +#define sk_family __sk_common.skc_family +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr +}; + +struct inet_sock { + struct sock sk; +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_dport sk.__sk_common.skc_dport + __be32 inet_saddr; + __be16 inet_sport; +}; + +struct pt_regs { + long di; +}; + +static inline struct inet_sock *inet_sk(const struct sock *sk) +{ + return (struct inet_sock *)sk; +} + +/* Define various data structures for state recording. + * Some fields are not used due to test simplification. + */ +enum tcp_estats_addrtype { + TCP_ESTATS_ADDRTYPE_IPV4 = 1, + TCP_ESTATS_ADDRTYPE_IPV6 = 2 +}; + +enum tcp_estats_event_type { + TCP_ESTATS_ESTABLISH, + TCP_ESTATS_PERIODIC, + TCP_ESTATS_TIMEOUT, + TCP_ESTATS_RETRANSMIT_TIMEOUT, + TCP_ESTATS_RETRANSMIT_OTHER, + TCP_ESTATS_SYN_RETRANSMIT, + TCP_ESTATS_SYNACK_RETRANSMIT, + TCP_ESTATS_TERM, + TCP_ESTATS_TX_RESET, + TCP_ESTATS_RX_RESET, + TCP_ESTATS_WRITE_TIMEOUT, + TCP_ESTATS_CONN_TIMEOUT, + TCP_ESTATS_ACK_LATENCY, + TCP_ESTATS_NEVENTS, +}; + +struct tcp_estats_event { + int pid; + int cpu; + unsigned long ts; + unsigned int magic; + enum tcp_estats_event_type event_type; +}; + +/* The below data structure is packed in order for + * llvm compiler to generate expected code. + */ +struct tcp_estats_conn_id { + unsigned int localaddressType; + struct { + unsigned char data[16]; + } localaddress; + struct { + unsigned char data[16]; + } remaddress; + unsigned short localport; + unsigned short remport; +} __attribute__((__packed__)); + +struct tcp_estats_basic_event { + struct tcp_estats_event event; + struct tcp_estats_conn_id conn_id; +}; + +struct bpf_map_def SEC("maps") ev_record_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcp_estats_basic_event), + .max_entries = 1024, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event, + enum tcp_estats_event_type type) +{ + event->magic = TCP_ESTATS_MAGIC; + event->ts = bpf_ktime_get_ns(); + event->event_type = type; +} + +static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from) +{ + to[0] = _(from[0]); + to[1] = _(from[1]); + to[2] = _(from[2]); + to[3] = _(from[3]); +} + +static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id, + __be32 *saddr, __be32 *daddr) +{ + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4; + + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); + unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr); +} + +static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id, + __be32 *saddr, __be32 *daddr) +{ + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6; + + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32), + (__u8 *)(saddr + 1)); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2, + (__u8 *)(saddr + 2)); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3, + (__u8 *)(saddr + 3)); + + unaligned_u32_set(conn_id->remaddress.data, + (__u8 *)(daddr)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32), + (__u8 *)(daddr + 1)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2, + (__u8 *)(daddr + 2)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3, + (__u8 *)(daddr + 3)); +} + +static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id, + struct sock *sk) +{ + conn_id->localport = _(inet_sk(sk)->inet_sport); + conn_id->remport = _(inet_sk(sk)->inet_dport); + + if (_(sk->sk_family) == AF_INET6) + conn_id_ipv6_init(conn_id, + sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); + else + conn_id_ipv4_init(conn_id, + &inet_sk(sk)->inet_saddr, + &inet_sk(sk)->inet_daddr); +} + +static __always_inline void tcp_estats_init(struct sock *sk, + struct tcp_estats_event *event, + struct tcp_estats_conn_id *conn_id, + enum tcp_estats_event_type type) +{ + tcp_estats_ev_init(event, type); + tcp_estats_conn_id_init(conn_id, sk); +} + +static __always_inline void send_basic_event(struct sock *sk, + enum tcp_estats_event_type type) +{ + struct tcp_estats_basic_event ev; + __u32 key = bpf_get_prandom_u32(); + + memset(&ev, 0, sizeof(ev)); + tcp_estats_init(sk, &ev.event, &ev.conn_id, type); + bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + if (!arg->sock) + return 0; + + send_basic_event(arg->sock, TCP_ESTATS_TX_RESET); + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c848e90b64213128a248c9669a2a2796692c5776..cabb19b1e3718b289910fcc921c698f44f162768 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -46,8 +46,10 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 +#define MAX_NR_MAPS 4 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) +#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) struct bpf_test { const char *descr; @@ -55,6 +57,7 @@ struct bpf_test { int fixup_map1[MAX_FIXUPS]; int fixup_map2[MAX_FIXUPS]; int fixup_prog[MAX_FIXUPS]; + int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; enum { @@ -188,6 +191,86 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_ld_imm64 insn", .result = REJECT, }, + { + "test6 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "test7 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "test8 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "uses reserved fields", + .result = REJECT, + }, + { + "test9 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, 0, 1, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test10 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test11 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, + { + "test12 ld_imm64", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "not pointing to valid bpf_map", + .result = REJECT, + }, + { + "test13 ld_imm64", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), + BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, + }, { "no bpf_exit", .insns = { @@ -328,6 +411,30 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack", .result = REJECT, }, + { + "invalid fp arithmetic", + /* If this gets ever changed, make sure JITs can deal with it. */ + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 pointer arithmetic", + .result_unpriv = REJECT, + .errstr = "R1 invalid mem access", + .result = REJECT, + }, + { + "non-invalid fp arithmetic", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, { "invalid argument register", .insns = { @@ -770,6 +877,9 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, vlan_tci)), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -1795,6 +1905,20 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "unpriv: adding of fp", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "pointer arithmetic prohibited", + .result_unpriv = REJECT, + .errstr = "R1 invalid mem access", + .result = REJECT, + }, { "unpriv: cmp of stack pointer", .insns = { @@ -1809,16 +1933,22 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "unpriv: obfuscate stack pointer", + "stack pointer arithmetic", .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R2 pointer arithmetic", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -2466,6 +2596,49 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test16 (arith on data_end)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test17 (pruning, alignment)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_A(-6), + }, + .errstr = "misaligned packet access off 2+15+-4 size 4", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -3192,6 +3365,70 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, + { + "alu ops on ptr_to_map_value_or_null, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "alu ops on ptr_to_map_value_or_null, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "R4 invalid mem access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, { "invalid memory access with multiple map_lookup_elem calls", .insns = { @@ -4720,7 +4957,218 @@ static struct bpf_test tests[] = { .result = REJECT, .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - } + }, + { + "map in map access", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .result = ACCEPT, + }, + { + "invalid inner map pointer", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .errstr = "R1 type=inv expected=map_ptr", + .errstr_unpriv = "R1 pointer arithmetic prohibited", + .result = REJECT, + }, + { + "forgot null checking on the inner map pointer", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .errstr = "R1 type=map_value_or_null expected=map_ptr", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_abs: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "ld_ind: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 1), + BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, + }, + { + "ld_ind: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -4757,42 +5205,73 @@ static int create_prog_array(void) return fd; } +static int create_map_in_map(void) +{ + int inner_map_fd, outer_map_fd; + + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), + sizeof(int), 1, 0); + if (inner_map_fd < 0) { + printf("Failed to create array '%s'!\n", strerror(errno)); + return inner_map_fd; + } + + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, + sizeof(int), inner_map_fd, 1, 0); + if (outer_map_fd < 0) + printf("Failed to create array of maps '%s'!\n", + strerror(errno)); + + close(inner_map_fd); + + return outer_map_fd; +} + static char bpf_vlog[32768]; static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, - int *fd_f1, int *fd_f2, int *fd_f3) + int *map_fds) { int *fixup_map1 = test->fixup_map1; int *fixup_map2 = test->fixup_map2; int *fixup_prog = test->fixup_prog; + int *fixup_map_in_map = test->fixup_map_in_map; /* Allocating HTs with 1 elem is fine here, since we only test * for verifier and not do a runtime lookup, so the only thing * that really matters is value size in this case. */ if (*fixup_map1) { - *fd_f1 = create_map(sizeof(long long), 1); + map_fds[0] = create_map(sizeof(long long), 1); do { - prog[*fixup_map1].imm = *fd_f1; + prog[*fixup_map1].imm = map_fds[0]; fixup_map1++; } while (*fixup_map1); } if (*fixup_map2) { - *fd_f2 = create_map(sizeof(struct test_val), 1); + map_fds[1] = create_map(sizeof(struct test_val), 1); do { - prog[*fixup_map2].imm = *fd_f2; + prog[*fixup_map2].imm = map_fds[1]; fixup_map2++; } while (*fixup_map2); } if (*fixup_prog) { - *fd_f3 = create_prog_array(); + map_fds[2] = create_prog_array(); do { - prog[*fixup_prog].imm = *fd_f3; + prog[*fixup_prog].imm = map_fds[2]; fixup_prog++; } while (*fixup_prog); } + + if (*fixup_map_in_map) { + map_fds[3] = create_map_in_map(); + do { + prog[*fixup_map_in_map].imm = map_fds[3]; + fixup_map_in_map++; + } while (*fixup_map_in_map); + } } static void do_test_single(struct bpf_test *test, bool unpriv, @@ -4802,14 +5281,18 @@ static void do_test_single(struct bpf_test *test, bool unpriv, struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); int prog_type = test->prog_type; - int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1; + int map_fds[MAX_NR_MAPS]; const char *expected_err; + int i; + + for (i = 0; i < MAX_NR_MAPS; i++) + map_fds[i] = -1; - do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3); + do_test_fixup(test, prog, map_fds); - fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, "GPL", 0, bpf_vlog, - sizeof(bpf_vlog)); + fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, + prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog)); expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; @@ -4848,9 +5331,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, " (NOTE: reject due to unknown alignment)" : ""); close_fds: close(fd_prog); - close(fd_f1); - close(fd_f2); - close(fd_f3); + for (i = 0; i < MAX_NR_MAPS; i++) + close(map_fds[i]); sched_yield(); return; fail_log: diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c new file mode 100644 index 0000000000000000000000000000000000000000..5e7df8bb5b5db914e098a73ab1275921cf68b094 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp.c @@ -0,0 +1,235 @@ +/* Copyright (c) 2016,2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_iptunnel_common.h" + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 256, +}; + +struct bpf_map_def SEC("maps") vip2tnl = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct iptnl_info), + .max_entries = MAX_IPTNL_ENTRIES, +}; + +static __always_inline void count_tx(__u32 protocol) +{ + __u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, void *data_end, + __u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + if (th + 1 > data_end) + return -1; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + if (uh + 1 > data_end) + return -1; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct iphdr *iph = data + sizeof(struct ethhdr); + __u16 *next_iph; + __u16 payload_len; + struct vip vip = {}; + int dport; + __u32 csum = 0; + int i; + + if (iph + 1 > data_end) + return XDP_DROP; + + dport = get_dport(iph + 1, data_end, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = bpf_ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + iph = data + sizeof(*new_eth); + old_eth = data + sizeof(*iph); + + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || + iph + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + + iph->version = 4; + iph->ihl = sizeof(*iph) >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = bpf_htons(payload_len + sizeof(*iph)); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph = (__u16 *)iph; +#pragma clang loop unroll(full) + for (i = 0; i < sizeof(*iph) >> 1; i++) + csum += *next_iph++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + __u16 payload_len; + struct vip vip = {}; + int dport; + + if (ip6h + 1 > data_end) + return XDP_DROP; + + dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + ip6h = data + sizeof(*new_eth); + old_eth = data + sizeof(*ip6h); + + if (new_eth + 1 > data_end || old_eth + 1 > data_end || + ip6h + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h)); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp_tx_iptunnel") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == bpf_htons(ETH_P_IP)) + return handle_ipv4(xdp); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + + return handle_ipv6(xdp); + else + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index 72aa103e4141d3b252c00a8bcf680edc4cd2dfbb..6b214b7b10fb7c1c7ad6330c3769fbd1c46838d7 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) ifeq ($(ARCH),x86) TEST_GEN_PROGS := breakpoint_test endif -ifeq ($(ARCH),aarch64) +ifneq (,$(filter $(ARCH),aarch64 arm64)) TEST_GEN_PROGS := breakpoint_test_arm64 endif diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config new file mode 100644 index 0000000000000000000000000000000000000000..27ff72ebd0f54ba4fec9d3a3a594a9e3608ee7e7 --- /dev/null +++ b/tools/testing/selftests/cpufreq/config @@ -0,0 +1,15 @@ +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config index ef8214661612f892070a3eaa788779e11235c600..8a1c9f949fe082889aa4057ed1886c16249ef2b9 100644 --- a/tools/testing/selftests/ftrace/config +++ b/tools/testing/selftests/ftrace/config @@ -1 +1,2 @@ +CONFIG_KPROBES=y CONFIG_FTRACE=y diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 52e3c4df28d6ff243a70190592d404eba2be23b7..717581145cfcf9dbf8ec129c81c26ce81d4d9ddf 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -16,6 +16,7 @@ echo " -k|--keep Keep passed test logs" echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" echo " -d|--debug Debug mode (trace all shell commands)" +echo " -l|--logdir Save logs on the " exit $1 } @@ -57,13 +58,17 @@ parse_opts() { # opts ;; --verbose|-v|-vv) VERBOSE=$((VERBOSE + 1)) - [ $1 == '-vv' ] && VERBOSE=$((VERBOSE + 1)) + [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1)) shift 1 ;; --debug|-d) DEBUG=1 shift 1 ;; + --logdir|-l) + LOG_DIR=$2 + shift 2 + ;; *.tc) if [ -f "$1" ]; then OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`" @@ -145,11 +150,16 @@ XFAILED_CASES= UNDEFINED_CASES= TOTAL_RESULT=0 +INSTANCE= CASENO=0 testcase() { # testfile CASENO=$((CASENO+1)) desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:` - prlog -n "[$CASENO]$desc" + prlog -n "[$CASENO]$INSTANCE$desc" +} + +test_on_instance() { # testfile + grep -q "^#[ \t]*flags:.*instance" $1 } eval_result() { # sigval @@ -266,6 +276,17 @@ for t in $TEST_CASES; do run_test $t done +# Test on instance loop +INSTANCE=" (instance) " +for t in $TEST_CASES; do + test_on_instance $t || continue + SAVED_TRACING_DIR=$TRACING_DIR + export TRACING_DIR=`mktemp -d $TRACING_DIR/instances/ftracetest.XXXXXX` + run_test $t + rmdir $TRACING_DIR + TRACING_DIR=$SAVED_TRACING_DIR +done + prlog "" prlog "# of passed: " `echo $PASSED_CASES | wc -w` prlog "# of failed: " `echo $FAILED_CASES | wc -w` diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc index bf9a7b037924f84dbbae5ad4bc7fd1e78f1a52b6..ebfce83f35b44b4fff4bfa6f58792115d912db51 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: Basic test for tracers +# flags: instance test -f available_tracers for t in `cat available_tracers`; do echo $t > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc index bde6625d9785fbc25073a69c9e8a003ff7a8a905..9e33f841812f304c5deed76bc605284dc4135895 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: Basic trace clock test +# flags: instance test -f trace_clock for c in `cat trace_clock | tr -d \[\]`; do echo $c > trace_clock diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index 87eb9d6dd4ca0e376faaa4eea161026417b2971f..283b45ecb199d54f819efcaa6e6a5973f7622744 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event tracing - enable/disable with event level files +# flags: instance do_reset() { echo > set_event diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc index d4ab27b522f8acdd941e6a65d89a2ace64c1f3a7..96c1a95be4f798f88838ae8882b87d89037707b2 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event tracing - restricts events based on pid +# flags: instance do_reset() { echo > set_event diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index ced27ef0638f2d3f21cd3a4fc436ebdd7bc8b259..b8fe2e5b9e674ff95023c14c30401440d6153c5e 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event tracing - enable/disable with subsystem level files +# flags: instance do_reset() { echo > set_event diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc new file mode 100644 index 0000000000000000000000000000000000000000..bab5ff7c607edbbe12fa7680626e6e260e0bb4e8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -0,0 +1,117 @@ +#!/bin/sh +# description: ftrace - function pid filters + +# Make sure that function pid matching filter works. +# Also test it on an instance directory + +if ! grep -q function available_tracers; then + echo "no function tracer configured" + exit_unsupported +fi + +if [ ! -f set_ftrace_pid ]; then + echo "set_ftrace_pid not found? Is function tracer not set?" + exit_unsupported +fi + +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is function tracer not set?" + exit_unsupported +fi + +do_function_fork=1 + +if [ ! -f options/function-fork ]; then + do_function_fork=0 + echo "no option for function-fork found. Option will not be tested." +fi + +read PID _ < /proc/self/stat + +if [ $do_function_fork -eq 1 ]; then + # default value of function-fork option + orig_value=`grep function-fork trace_options` +fi + +do_reset() { + reset_tracer + clear_trace + enable_tracing + echo > set_ftrace_filter + echo > set_ftrace_pid + + if [ $do_function_fork -eq 0 ]; then + return + fi + + echo $orig_value > trace_options +} + +fail() { # msg + do_reset + echo $1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 +} + +do_test() { + disable_tracing + + echo do_execve* > set_ftrace_filter + echo *do_fork >> set_ftrace_filter + + echo $PID > set_ftrace_pid + echo function > current_tracer + + if [ $do_function_fork -eq 1 ]; then + # don't allow children to be traced + echo nofunction-fork > trace_options + fi + + enable_tracing + yield + + count_pid=`cat trace | grep -v ^# | grep $PID | wc -l` + count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l` + + # count_other should be 0 + if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then + fail "PID filtering not working?" + fi + + disable_tracing + clear_trace + + if [ $do_function_fork -eq 0 ]; then + return + fi + + # allow children to be traced + echo function-fork > trace_options + + enable_tracing + yield + + count_pid=`cat trace | grep -v ^# | grep $PID | wc -l` + count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l` + + # count_other should NOT be 0 + if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then + fail "PID filtering not following fork?" + fi +} + +do_test + +mkdir instances/foo +cd instances/foo +do_test +cd ../../ +rmdir instances/foo + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc new file mode 100644 index 0000000000000000000000000000000000000000..aa31368851c952e2f6e9dc78dd4a4915a36450fc --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -0,0 +1,114 @@ +#!/bin/sh +# description: ftrace - test for function event triggers +# flags: instance +# +# Ftrace allows to add triggers to functions, such as enabling or disabling +# tracing, enabling or disabling trace events, or recording a stack trace +# within the ring buffer. +# +# This test is designed to test event triggers +# + +# The triggers are set within the set_ftrace_filter file +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is dynamic ftrace not set?" + exit_unsupported +fi + +do_reset() { + reset_ftrace_filter + reset_tracer + disable_events + clear_trace + enable_tracing +} + +fail() { # mesg + do_reset + echo $1 + exit $FAIL +} + +SLEEP_TIME=".1" + +do_reset + +echo "Testing function probes with events:" + +EVENT="sched:sched_switch" +EVENT_ENABLE="events/sched/sched_switch/enable" + +cnt_trace() { + grep -v '^#' trace | wc -l +} + +test_event_enabled() { + val=$1 + + e=`cat $EVENT_ENABLE` + if [ "$e" != $val ]; then + echo "Expected $val but found $e" + exit 1 + fi +} + +run_enable_disable() { + enable=$1 # enable + Enable=$2 # Enable + check_disable=$3 # 0 + check_enable_star=$4 # 1* + check_disable_star=$5 # 0* + + cnt=`cnt_trace` + if [ $cnt -ne 0 ]; then + fail "Found junk in trace file" + fi + + echo "$Enable event all the time" + + echo $check_disable > $EVENT_ENABLE + sleep $SLEEP_TIME + + test_event_enabled $check_disable + + echo "schedule:${enable}_event:$EVENT" > set_ftrace_filter + + echo " make sure it works 5 times" + + for i in `seq 5`; do + sleep $SLEEP_TIME + echo " test $i" + test_event_enabled $check_enable_star + + echo $check_disable > $EVENT_ENABLE + done + sleep $SLEEP_TIME + echo " make sure it's still works" + test_event_enabled $check_enable_star + + reset_ftrace_filter + + echo " make sure it only works 3 times" + + echo $check_disable > $EVENT_ENABLE + sleep $SLEEP_TIME + + echo "schedule:${enable}_event:$EVENT:3" > set_ftrace_filter + + for i in `seq 3`; do + sleep $SLEEP_TIME + echo " test $i" + test_event_enabled $check_enable_star + + echo $check_disable > $EVENT_ENABLE + done + + sleep $SLEEP_TIME + echo " make sure it stop working" + test_event_enabled $check_disable_star + + do_reset +} + +run_enable_disable enable Enable 0 "1*" "0*" +run_enable_disable disable Disable 1 "0*" "1*" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc new file mode 100644 index 0000000000000000000000000000000000000000..113b4d9bc7336f1023ddc8c1a6ba1c33e8a963e3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -0,0 +1,132 @@ +#!/bin/sh +# description: ftrace - test reading of set_ftrace_filter +# +# The set_ftrace_filter file of ftrace is used to list functions as well as +# triggers (probes) attached to functions. The code to read this file is not +# straight forward and has had various bugs in the past. This test is designed +# to add functions and triggers to that file in various ways and read that +# file in various ways (cat vs dd). +# + +# The triggers are set within the set_ftrace_filter file +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is dynamic ftrace not set?" + exit_unsupported +fi + +do_reset() { + reset_tracer + reset_ftrace_filter + disable_events + clear_trace + enable_tracing +} + +fail() { # mesg + do_reset + echo $1 + exit $FAIL +} + +do_reset + +FILTER=set_ftrace_filter +FUNC1="schedule" +FUNC2="do_IRQ" + +ALL_FUNCS="#### all functions enabled ####" + +test_func() { + if ! echo "$1" | grep -q "^$2\$"; then + return 0 + fi + echo "$1" | grep -v "^$2\$" + return 1 +} + +check_set_ftrace_filter() { + cat=`cat $FILTER` + dd1=`dd if=$FILTER bs=1 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'` + dd100=`dd if=$FILTER bs=100 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'` + + echo "Testing '$@'" + + while [ $# -gt 0 ]; do + echo "test $1" + if cat=`test_func "$cat" "$1"`; then + return 0 + fi + if dd1=`test_func "$dd1" "$1"`; then + return 0 + fi + if dd100=`test_func "$dd100" "$1"`; then + return 0 + fi + shift + done + + if [ -n "$cat" ]; then + return 0 + fi + if [ -n "$dd1" ]; then + return 0 + fi + if [ -n "$dd100" ]; then + return 0 + fi + return 1; +} + +if check_set_ftrace_filter "$ALL_FUNCS"; then + fail "Expected only $ALL_FUNCS" +fi + +echo "$FUNC1:traceoff" > set_ftrace_filter +if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited"; then + fail "Expected $ALL_FUNCS and $FUNC1:traceoff:unlimited" +fi + +echo "$FUNC1" > set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited"; then + fail "Expected $FUNC1 and $FUNC1:traceoff:unlimited" +fi + +echo "$FUNC2" >> set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited"; then + fail "Expected $FUNC1 $FUNC2 and $FUNC1:traceoff:unlimited" +fi + +echo "$FUNC2:traceoff" >> set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then + fail "Expected $FUNC1 $FUNC2 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited" +fi + +echo "$FUNC1" > set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then + fail "Expected $FUNC1 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited" +fi + +echo > set_ftrace_filter +if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then + fail "Expected $ALL_FUNCS $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited" +fi + +reset_ftrace_filter + +if check_set_ftrace_filter "$ALL_FUNCS"; then + fail "Expected $ALL_FUNCS" +fi + +echo "$FUNC1" > set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" ; then + fail "Expected $FUNC1" +fi + +echo "$FUNC2" >> set_ftrace_filter +if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then + fail "Expected $FUNC1 and $FUNC2" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc new file mode 100644 index 0000000000000000000000000000000000000000..c8e02ec01eaf25640a4471dc522b4bb8916c2130 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc @@ -0,0 +1,172 @@ +#!/bin/sh +# description: ftrace - test for function traceon/off triggers +# flags: instance +# +# Ftrace allows to add triggers to functions, such as enabling or disabling +# tracing, enabling or disabling trace events, or recording a stack trace +# within the ring buffer. +# +# This test is designed to test enabling and disabling tracing triggers +# + +# The triggers are set within the set_ftrace_filter file +if [ ! -f set_ftrace_filter ]; then + echo "set_ftrace_filter not found? Is dynamic ftrace not set?" + exit_unsupported +fi + +do_reset() { + reset_ftrace_filter + reset_tracer + disable_events + clear_trace + enable_tracing +} + +fail() { # mesg + do_reset + echo $1 + exit $FAIL +} + +SLEEP_TIME=".1" + +do_reset + +echo "Testing function probes with enabling disabling tracing:" + +cnt_trace() { + grep -v '^#' trace | wc -l +} + +echo '** DISABLE TRACING' +disable_tracing +clear_trace + +cnt=`cnt_trace` +if [ $cnt -ne 0 ]; then + fail "Found junk in trace" +fi + + +echo '** ENABLE EVENTS' + +echo 1 > events/enable + +echo '** ENABLE TRACING' +enable_tracing + +cnt=`cnt_trace` +if [ $cnt -eq 0 ]; then + fail "Nothing found in trace" +fi + +# powerpc uses .schedule +func="schedule" +x=`grep '^\.schedule$' available_filter_functions | wc -l` +if [ "$x" -eq 1 ]; then + func=".schedule" +fi + +echo '** SET TRACEOFF' + +echo "$func:traceoff" > set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 1 ]; then + fail "Did not find traceoff trigger" +fi + +cnt=`cnt_trace` +sleep $SLEEP_TIME +cnt2=`cnt_trace` + +if [ $cnt -ne $cnt2 ]; then + fail "Tracing is not stopped" +fi + +on=`cat tracing_on` +if [ $on != "0" ]; then + fail "Tracing is not off" +fi + +line1=`cat trace | tail -1` +sleep $SLEEP_TIME +line2=`cat trace | tail -1` + +if [ "$line1" != "$line2" ]; then + fail "Tracing file is still changing" +fi + +clear_trace + +cnt=`cnt_trace` +if [ $cnt -ne 0 ]; then + fail "Tracing is still happeing" +fi + +echo "!$func:traceoff" >> set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 0 ]; then + fail "traceoff trigger still exists" +fi + +on=`cat tracing_on` +if [ $on != "0" ]; then + fail "Tracing is started again" +fi + +echo "$func:traceon" > set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 1 ]; then + fail "traceon trigger not found" +fi + +cnt=`cnt_trace` +if [ $cnt -eq 0 ]; then + fail "Tracing did not start" +fi + +on=`cat tracing_on` +if [ $on != "1" ]; then + fail "Tracing was not enabled" +fi + + +echo "!$func:traceon" >> set_ftrace_filter + +cnt=`grep schedule set_ftrace_filter | wc -l` +if [ $cnt -ne 0 ]; then + fail "traceon trigger still exists" +fi + +check_sleep() { + val=$1 + sleep $SLEEP_TIME + cat set_ftrace_filter + on=`cat tracing_on` + if [ $on != "$val" ]; then + fail "Expected tracing_on to be $val, but it was $on" + fi +} + + +echo "$func:traceoff:3" > set_ftrace_filter +check_sleep "0" +echo 1 > tracing_on +check_sleep "0" +echo 1 > tracing_on +check_sleep "0" +echo 1 > tracing_on +check_sleep "1" +echo "!$func:traceoff:0" > set_ftrace_filter + +if grep -e traceon -e traceoff set_ftrace_filter; then + fail "Tracing on and off triggers still exist" +fi + +disable_events + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 91de1a8e4f197808215d431d31343071a4c4221f..f2019b37370d310cd845b6a42101d77cad77bc6b 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -30,6 +30,27 @@ reset_events_filter() { # reset all current setting filters done } +reset_ftrace_filter() { # reset all triggers in set_ftrace_filter + echo > set_ftrace_filter + grep -v '^#' set_ftrace_filter | while read t; do + tr=`echo $t | cut -d: -f2` + if [ "$tr" = "" ]; then + continue + fi + if [ $tr = "enable_event" -o $tr = "disable_event" ]; then + tr=`echo $t | cut -d: -f1-4` + limit=`echo $t | cut -d: -f5` + else + tr=`echo $t | cut -d: -f1-2` + limit=`echo $t | cut -d: -f3` + fi + if [ "$limit" != "unlimited" ]; then + tr="$tr:$limit" + fi + echo "!$tr" > set_ftrace_filter + done +} + disable_events() { echo 0 > events/enable } diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 4c5a061a5b4e6d522396714de21f64b871e23ab4..c73db7863adbf41b51bd6b88ca79881801d59943 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -75,9 +75,13 @@ rmdir foo if [ -d foo ]; then fail "foo still exists" fi -exit 0 - +mkdir foo +echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter +rmdir foo +if [ -d foo ]; then + fail "foo still exists" +fi instance_slam() { diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 0a78705b43b258a84195a038482ce6b0f4837778..c75faefb4fffcb617383f9aebab0d40270f58743 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -26,7 +26,7 @@ check_types() { test $X2 = $X3 test 0x$X3 = $3 - B4=`printf "%x" $4` + B4=`printf "%02x" $4` B3=`echo -n $X3 | tail -c 3 | head -c 2` test $B3 = $B4 } diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc new file mode 100644 index 0000000000000000000000000000000000000000..57abdf1caabf71fb1d6a42cd9403633fe0333729 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# description: Kretprobe dynamic event with maxactive + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +echo > kprobe_events + +# Test if we successfully reject unknown messages +if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi + +# Test if we successfully reject too big maxactive +if echo 'r1000000:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi + +# Test if we successfully reject unparsable numbers for maxactive +if echo 'r10fuzz:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi + +# Test for kretprobe with event name without maxactive +echo 'r:myprobeaccept inet_csk_accept' > kprobe_events +grep myprobeaccept kprobe_events +test -d events/kprobes/myprobeaccept +echo '-:myprobeaccept' >> kprobe_events + +# Test for kretprobe with event name with a small maxactive +echo 'r10:myprobeaccept inet_csk_accept' > kprobe_events +grep myprobeaccept kprobe_events +test -d events/kprobes/myprobeaccept +echo '-:myprobeaccept' >> kprobe_events + +# Test for kretprobe without event name without maxactive +echo 'r inet_csk_accept' > kprobe_events +grep inet_csk_accept kprobe_events +echo > kprobe_events + +# Test for kretprobe without event name with a small maxactive +echo 'r10 inet_csk_accept' > kprobe_events +grep inet_csk_accept kprobe_events +echo > kprobe_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc new file mode 100644 index 0000000000000000000000000000000000000000..f4d1ff785d67d0b62eb7941bef178e5b8c5c2ecc --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -0,0 +1,21 @@ +#!/bin/sh +# description: Register/unregister many kprobe events + +# ftrace fentry skip size depends on the machine architecture. +# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc +case `uname -m` in + x86_64|i[3456]86) OFFS=5;; + ppc*) OFFS=4;; + *) OFFS=0;; +esac + +echo "Setup up to 256 kprobes" +grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ +head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||: + +echo 1 > events/kprobes/enable +echo 0 > events/kprobes/enable +echo > kprobe_events +echo "Waiting for unoptimizing & freeing" +sleep 5 +echo "Done" diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc index 1a9445021bf1e7a9016e71d2b3fc4deb7986f769..c5435adfdd932b38d7a09ae55a077db02c921717 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test event enable/disable trigger +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc index 514e466e198b5c0a46ce850a4621c822f7d6b15c..48849a8d577f7f2b475d0c9d0bfad2029d2a9d1b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test trigger filter +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index 400e98b64948f5b5a1f0f409e628c39b97f4355a..b7f86d10b549ab3a6838dbedbc85a0d10b7fb6ab 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test histogram modifiers +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index a00184cd9c959d901f5a7008690acb61a267d3d9..fb66f7d9339da57d3ecf188a7238b0d3065d6609 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test histogram trigger +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 3478b00ead57bdbc5487cfdc69ac0e7ecf9fa6c4..f9153087dd7cdcfabebfa55fa0e4c099072e4101 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -1,5 +1,6 @@ #!/bin/sh # description: event trigger - test multiple histogram triggers +# flags: instance do_reset() { reset_trigger diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 653c5cd9e44d6bc4fba89b79ae8d1d707993e542..e2fbb890aef9549281318a4241a2e5a8ad68b9d1 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -8,7 +8,7 @@ include ../lib.mk all: for DIR in $(SUBDIRS); do \ - BUILD_TARGET=$$OUTPUT/$$DIR; \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done @@ -22,7 +22,7 @@ override define INSTALL_RULE install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) @for SUBDIR in $(SUBDIRS); do \ - BUILD_TARGET=$$OUTPUT/$$SUBDIR; \ + BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \ mkdir $$BUILD_TARGET -p; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ done; @@ -32,9 +32,10 @@ override define EMIT_TESTS echo "./run.sh" endef -clean: +override define CLEAN for DIR in $(SUBDIRS); do \ - BUILD_TARGET=$$OUTPUT/$$DIR; \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done +endef diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 205e4d10e0853c7b8d20342b21111a8693701834..298929df97e695b5be640a47d51b84d173f5da5f 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -2,13 +2,20 @@ TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES) BINARIES := gpio-mockup-chardev +EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio +EXTRA_DIRS := ../gpioinclude/ +EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o +EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o +EXTRA_OBJS += ../gpiolsgpio.o include ../lib.mk all: $(BINARIES) -clean: - $(RM) $(BINARIES) +override define CLEAN + $(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS) + $(RM) -r $(EXTRA_DIRS) +endef CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ LDLIBS += -lmount -I/usr/include/libmount diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config new file mode 100644 index 0000000000000000000000000000000000000000..abaa6902b7b645a84fbed1e8834f809e492349d6 --- /dev/null +++ b/tools/testing/selftests/gpio/config @@ -0,0 +1,2 @@ +CONFIG_GPIOLIB=y +CONFIG_GPIO_MOCKUP=m diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 775c589ac3c0a24f470a8f53ae52f6c82cc17193..959273c3a52e10f5895735b047a3de389e4f161a 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -51,8 +51,12 @@ endef emit_tests: $(EMIT_TESTS) -clean: +define CLEAN $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) +endef + +clean: + $(CLEAN) $(OUTPUT)/%:%.c $(LINK.c) $^ $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config new file mode 100644 index 0000000000000000000000000000000000000000..126933bcc950428300a2a79f0112ca206d5c7e22 --- /dev/null +++ b/tools/testing/selftests/lib/config @@ -0,0 +1,3 @@ +CONFIG_TEST_PRINTF=m +CONFIG_TEST_BITMAP=m +CONFIG_PRIME_NUMBERS=m diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index fbfe5d0d5c2e05028e6af86d8db76d95e41c4fc5..35cbb4cba4109551cc14beabcba39dff5d22e85c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,7 +5,7 @@ CFLAGS += -I../../../../usr/include/ reuseport_bpf_numa: LDFLAGS += -lnuma -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh new file mode 100755 index 0000000000000000000000000000000000000000..4e00568d70c2c398651675ad763f646453b45b3b --- /dev/null +++ b/tools/testing/selftests/net/netdevice.sh @@ -0,0 +1,200 @@ +#!/bin/sh +# +# This test is for checking network interface +# For the moment it tests only ethernet interface (but wifi could be easily added) +# +# We assume that all network driver are loaded +# if not they probably have failed earlier in the boot process and their logged error will be catched by another test +# + +# this function will try to up the interface +# if already up, nothing done +# arg1: network interface name +kci_net_start() +{ + netdev=$1 + + ip link show "$netdev" |grep -q UP + if [ $? -eq 0 ];then + echo "SKIP: $netdev: interface already up" + return 0 + fi + + ip link set "$netdev" up + if [ $? -ne 0 ];then + echo "FAIL: $netdev: Fail to up interface" + return 1 + else + echo "PASS: $netdev: set interface up" + NETDEV_STARTED=1 + fi + return 0 +} + +# this function will try to setup an IP and MAC address on a network interface +# Doing nothing if the interface was already up +# arg1: network interface name +kci_net_setup() +{ + netdev=$1 + + # do nothing if the interface was already up + if [ $NETDEV_STARTED -eq 0 ];then + return 0 + fi + + MACADDR='02:03:04:05:06:07' + ip link set dev $netdev address "$MACADDR" + if [ $? -ne 0 ];then + echo "FAIL: $netdev: Cannot set MAC address" + else + ip link show $netdev |grep -q "$MACADDR" + if [ $? -eq 0 ];then + echo "PASS: $netdev: set MAC address" + else + echo "FAIL: $netdev: Cannot set MAC address" + fi + fi + + #check that the interface did not already have an IP + ip address show "$netdev" |grep '^[[:space:]]*inet' + if [ $? -eq 0 ];then + echo "SKIP: $netdev: already have an IP" + return 0 + fi + + # TODO what ipaddr to set ? DHCP ? + echo "SKIP: $netdev: set IP address" + return 0 +} + +# test an ethtool command +# arg1: return code for not supported (see ethtool code source) +# arg2: summary of the command +# arg3: command to execute +kci_netdev_ethtool_test() +{ + if [ $# -le 2 ];then + echo "SKIP: $netdev: ethtool: invalid number of arguments" + return 1 + fi + $3 >/dev/null + ret=$? + if [ $ret -ne 0 ];then + if [ $ret -eq "$1" ];then + echo "SKIP: $netdev: ethtool $2 not supported" + else + echo "FAIL: $netdev: ethtool $2" + return 1 + fi + else + echo "PASS: $netdev: ethtool $2" + fi + return 0 +} + +# test ethtool commands +# arg1: network interface name +kci_netdev_ethtool() +{ + netdev=$1 + + #check presence of ethtool + ethtool --version 2>/dev/null >/dev/null + if [ $? -ne 0 ];then + echo "SKIP: ethtool not present" + return 1 + fi + + TMP_ETHTOOL_FEATURES="$(mktemp)" + if [ ! -e "$TMP_ETHTOOL_FEATURES" ];then + echo "SKIP: Cannot create a tmp file" + return 1 + fi + + ethtool -k "$netdev" > "$TMP_ETHTOOL_FEATURES" + if [ $? -ne 0 ];then + echo "FAIL: $netdev: ethtool list features" + rm "$TMP_ETHTOOL_FEATURES" + return 1 + fi + echo "PASS: $netdev: ethtool list features" + #TODO for each non fixed features, try to turn them on/off + rm "$TMP_ETHTOOL_FEATURES" + + kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev" + kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev" + return 0 +} + +# stop a netdev +# arg1: network interface name +kci_netdev_stop() +{ + netdev=$1 + + if [ $NETDEV_STARTED -eq 0 ];then + echo "SKIP: $netdev: interface kept up" + return 0 + fi + + ip link set "$netdev" down + if [ $? -ne 0 ];then + echo "FAIL: $netdev: stop interface" + return 1 + fi + echo "PASS: $netdev: stop interface" + return 0 +} + +# run all test on a netdev +# arg1: network interface name +kci_test_netdev() +{ + NETDEV_STARTED=0 + IFACE_TO_UPDOWN="$1" + IFACE_TO_TEST="$1" + #check for VLAN interface + MASTER_IFACE="$(echo $1 | cut -d@ -f2)" + if [ ! -z "$MASTER_IFACE" ];then + IFACE_TO_UPDOWN="$MASTER_IFACE" + IFACE_TO_TEST="$(echo $1 | cut -d@ -f1)" + fi + + NETDEV_STARTED=0 + kci_net_start "$IFACE_TO_UPDOWN" + + kci_net_setup "$IFACE_TO_TEST" + + kci_netdev_ethtool "$IFACE_TO_TEST" + + kci_netdev_stop "$IFACE_TO_UPDOWN" + return 0 +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +ip -Version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without the ip tool" + exit 0 +fi + +TMP_LIST_NETDEV="$(mktemp)" +if [ ! -e "$TMP_LIST_NETDEV" ];then + echo "FAIL: Cannot create a tmp file" + exit 1 +fi + +ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV" +while read netdev +do + kci_test_netdev "$netdev" +done < "$TMP_LIST_NETDEV" + +rm "$TMP_LIST_NETDEV" +exit 0 diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 4124593696862fcb370fee15bcf7e34e11cdf9e0..989f917068d1ccfa1dd67ed8d2353b4b13904d2b 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -71,18 +71,17 @@ /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ -static int sock_fanout_open(uint16_t typeflags, int num_packets) +static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { int fd, val; - fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); if (fd < 0) { perror("socket packet"); exit(1); } - /* fanout group ID is always 0: tests whether old groups are deleted */ - val = ((int) typeflags) << 16; + val = (((int) typeflags) << 16) | group_id; if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { if (close(fd)) { perror("close packet"); @@ -95,6 +94,38 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets) return fd; } +static void sock_fanout_set_cbpf(int fd) +{ + struct sock_filter bpf_filter[] = { + BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */ + BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */ + }; + struct sock_fprog bpf_prog; + + bpf_prog.filter = bpf_filter; + bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, + sizeof(bpf_prog))) { + perror("fanout data cbpf"); + exit(1); + } +} + +static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) +{ + int sockopt; + socklen_t sockopt_len = sizeof(sockopt); + + if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &sockopt, &sockopt_len)) { + perror("failed to getsockopt"); + exit(1); + } + *typeflags = sockopt >> 16; + *group_id = sockopt & 0xfffff; +} + static void sock_fanout_set_ebpf(int fd) { const int len_off = __builtin_offsetof(struct __sk_buff, len); @@ -223,26 +254,26 @@ static void test_control_group(void) fprintf(stderr, "test: control multiple sockets\n"); - fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } if (sock_fanout_open(PACKET_FANOUT_HASH | - PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) { + PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); exit(1); } if (sock_fanout_open(PACKET_FANOUT_HASH | - PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) { + PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); exit(1); } - if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) { + if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong mode\n"); exit(1); } - fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20); + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[1] == -1) { fprintf(stderr, "ERROR: failed to join group\n"); exit(1); @@ -253,6 +284,61 @@ static void test_control_group(void) } } +/* Test creating a unique fanout group ids */ +static void test_unique_fanout_group_ids(void) +{ + int fds[3]; + uint16_t typeflags, first_group_id, second_group_id; + + fprintf(stderr, "test: unique ids\n"); + + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_UNIQUEID, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed to create a unique id group.\n"); + exit(1); + } + + sock_fanout_getopts(fds[0], &typeflags, &first_group_id); + if (typeflags != PACKET_FANOUT_HASH) { + fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags); + exit(1); + } + + if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) { + fprintf(stderr, "ERROR: joined group with wrong type.\n"); + exit(1); + } + + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id); + if (fds[1] == -1) { + fprintf(stderr, + "ERROR: failed to join previously created group.\n"); + exit(1); + } + + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH | + PACKET_FANOUT_FLAG_UNIQUEID, 0); + if (fds[2] == -1) { + fprintf(stderr, + "ERROR: failed to create a second unique id group.\n"); + exit(1); + } + + sock_fanout_getopts(fds[2], &typeflags, &second_group_id); + if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID, + second_group_id) != -1) { + fprintf(stderr, + "ERROR: specified a group id when requesting unique id\n"); + exit(1); + } + + if (close(fds[0]) || close(fds[1]) || close(fds[2])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + static int test_datapath(uint16_t typeflags, int port_off, const int expect1[], const int expect2[]) { @@ -263,14 +349,14 @@ static int test_datapath(uint16_t typeflags, int port_off, fprintf(stderr, "test: datapath 0x%hx\n", typeflags); - fds[0] = sock_fanout_open(typeflags, 20); - fds[1] = sock_fanout_open(typeflags, 20); + fds[0] = sock_fanout_open(typeflags, 0); + fds[1] = sock_fanout_open(typeflags, 0); if (fds[0] == -1 || fds[1] == -1) { fprintf(stderr, "ERROR: failed open\n"); exit(1); } if (type == PACKET_FANOUT_CBPF) - sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA); + sock_fanout_set_cbpf(fds[0]); else if (type == PACKET_FANOUT_EBPF) sock_fanout_set_ebpf(fds[0]); @@ -331,10 +417,12 @@ int main(int argc, char **argv) const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; + const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; int port_off = 2, tries = 5, ret; test_control_single(); test_control_group(); + test_unique_fanout_group_ids(); /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, @@ -365,6 +453,9 @@ int main(int argc, char **argv) ret |= test_datapath(PACKET_FANOUT_CPU, port_off, expect_cpu1[0], expect_cpu1[1]); + ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off, + expect_uniqueid[0], expect_uniqueid[1]); + if (ret) return 1; diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index a77da88bf9469d515713e1d1f0f6d318544ac458..7d990d6c861b5863f23fe1d29523707c48ac28d5 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -38,7 +38,7 @@ # define __maybe_unused __attribute__ ((__unused__)) #endif -static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) +static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that * are based on the contents of create_payload() @@ -76,23 +76,16 @@ static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) }; struct sock_fprog bpf_prog; - if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA) - bpf_filter[5].code = 0x16; /* RET A */ - bpf_prog.filter = bpf_filter; bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); - if (setsockopt(fd, lvl, optnum, &bpf_prog, + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, sizeof(bpf_prog))) { perror("setsockopt SO_ATTACH_FILTER"); exit(1); } } -static __maybe_unused void pair_udp_setfilter(int fd) -{ - sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER); -} - static __maybe_unused void pair_udp_open(int fds[], uint16_t port) { struct sockaddr_in saddr, daddr; diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index bf13fc2297aab48fad929415dbee9bc9f10532a2..72c3ac2323e15faaeaf2d160063facfa74323a7c 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,12 +8,13 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) export CFLAGS SUB_DIRS = alignment \ benchmarks \ + cache_shape \ copyloops \ context_switch \ dscr \ @@ -59,12 +60,13 @@ override define EMIT_TESTS done; endef -clean: +override define CLEAN @for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \ done; rm -f tags +endef tags: find . -name '*.c' -o -name '*.h' | xargs ctags diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ec1848434be5483a6dc86ac73c6dee5565d7d471 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore @@ -0,0 +1 @@ +cache_shape diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b24485ab30e2e8f97b6ab4565b3d5b5fbd5cccea --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := cache_shape + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/cache_shape/cache_shape.c b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c new file mode 100644 index 0000000000000000000000000000000000000000..29ec07eba7f96256cf64b480e2dbc4c9f741e711 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c @@ -0,0 +1,125 @@ +/* + * Copyright 2017, Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#ifndef AT_L1I_CACHESIZE +#define AT_L1I_CACHESIZE 40 +#define AT_L1I_CACHEGEOMETRY 41 +#define AT_L1D_CACHESIZE 42 +#define AT_L1D_CACHEGEOMETRY 43 +#define AT_L2_CACHESIZE 44 +#define AT_L2_CACHEGEOMETRY 45 +#define AT_L3_CACHESIZE 46 +#define AT_L3_CACHEGEOMETRY 47 +#endif + +static void print_size(const char *label, uint32_t val) +{ + printf("%s cache size: %#10x %10dB %10dK\n", label, val, val, val / 1024); +} + +static void print_geo(const char *label, uint32_t val) +{ + uint16_t assoc; + + printf("%s line size: %#10x ", label, val & 0xFFFF); + + assoc = val >> 16; + if (assoc) + printf("%u-way", assoc); + else + printf("fully"); + + printf(" associative\n"); +} + +static int test_cache_shape() +{ + static char buffer[4096]; + ElfW(auxv_t) *p; + int found; + + FAIL_IF(read_auxv(buffer, sizeof(buffer))); + + found = 0; + + p = find_auxv_entry(AT_L1I_CACHESIZE, buffer); + if (p) { + found++; + print_size("L1I ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L1I_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L1I ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L1D_CACHESIZE, buffer); + if (p) { + found++; + print_size("L1D ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L1D_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L1D ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L2_CACHESIZE, buffer); + if (p) { + found++; + print_size("L2 ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L2_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L2 ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L3_CACHESIZE, buffer); + if (p) { + found++; + print_size("L3 ", (uint32_t)p->a_un.a_val); + } + + p = find_auxv_entry(AT_L3_CACHEGEOMETRY, buffer); + if (p) { + found++; + print_geo("L3 ", (uint32_t)p->a_un.a_val); + } + + /* If we found none we're probably on a system where they don't exist */ + SKIP_IF(found == 0); + + /* But if we found any, we expect to find them all */ + FAIL_IF(found != 8); + + return 0; +} + +int main(void) +{ + return test_harness(test_cache_shape, "cache_shape"); +} diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 53405e8a52abf45cc5c7b9a880e860951732513e..735815b3ad7f539489db43bcf2dbc013af38614e 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -24,7 +24,11 @@ typedef uint8_t u8; void test_harness_set_timeout(uint64_t time); int test_harness(int (test_function)(void), char *name); -extern void *get_auxv_entry(int type); + +int read_auxv(char *buf, ssize_t buf_size); +void *find_auxv_entry(int type, char *auxv); +void *get_auxv_entry(int type); + int pick_online_cpu(void); static inline bool have_hwcap(unsigned long ftr) diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 427621792229e68c7fcdaff89f28782bda14c1da..2f1f7b013293491b2845f68fa03dbc591c8d5412 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -11,3 +11,4 @@ tm-signal-context-chk-fpu tm-signal-context-chk-gpr tm-signal-context-chk-vmx tm-signal-context-chk-vsx +tm-vmx-unavail diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 5576ee6a51f21bb32a0ede4337d9da049e37fd42..958c11c14acd017a6c3afe79f02d9c6e8fad2240 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -2,7 +2,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu tm-signal-context-chk-vmx tm-signal-context-chk-vsx TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS) + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail \ + $(SIGNAL_CONTEXT_CHK_TESTS) include ../../lib.mk @@ -13,6 +14,7 @@ CFLAGS += -mhtm $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread +$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index d9c49f41515e704994ed1669dbe0a859a6dadd79..e79ccd6aada1a9acf10bb2860e76ddb444415d52 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -42,12 +42,12 @@ int test_body(void) printf("Check DSCR TM context switch: "); fflush(stdout); for (;;) { - rv = 1; asm __volatile__ ( /* set a known value into the DSCR */ "ld 3, %[dscr1];" "mtspr %[sprn_dscr], 3;" + "li %[rv], 1;" /* start and suspend a transaction */ "tbegin.;" "beq 1f;" diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c new file mode 100644 index 0000000000000000000000000000000000000000..137185ba4937b8f73b0d0ada357f7776c2295618 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -0,0 +1,118 @@ +/* + * Copyright 2017, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * Original: Breno Leitao & + * Gustavo Bueno Romero + * Edited: Michael Neuling + * + * Force VMX unavailable during a transaction and see if it corrupts + * the checkpointed VMX register state after the abort. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tm.h" +#include "utils.h" + +int passed; + +void *worker(void *unused) +{ + __int128 vmx0; + uint64_t texasr; + + asm goto ( + "li 3, 1;" /* Stick non-zero value in VMX0 */ + "std 3, 0(%[vmx0_ptr]);" + "lvx 0, 0, %[vmx0_ptr];" + + /* Wait here a bit so we get scheduled out 255 times */ + "lis 3, 0x3fff;" + "1: ;" + "addi 3, 3, -1;" + "cmpdi 3, 0;" + "bne 1b;" + + /* Kernel will hopefully turn VMX off now */ + + "tbegin. ;" + "beq failure;" + + /* Cause VMX unavail. Any VMX instruction */ + "vaddcuw 0,0,0;" + + "tend. ;" + "b %l[success];" + + /* Check VMX0 sanity after abort */ + "failure: ;" + "lvx 1, 0, %[vmx0_ptr];" + "vcmpequb. 2, 0, 1;" + "bc 4, 24, %l[value_mismatch];" + "b %l[value_match];" + : + : [vmx0_ptr] "r"(&vmx0) + : "r3" + : success, value_match, value_mismatch + ); + + /* HTM aborted and VMX0 is corrupted */ +value_mismatch: + texasr = __builtin_get_texasr(); + + printf("\n\n==============\n\n"); + printf("Failure with error: %lx\n", _TEXASR_FAILURE_CODE(texasr)); + printf("Summary error : %lx\n", _TEXASR_FAILURE_SUMMARY(texasr)); + printf("TFIAR exact : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr)); + + passed = 0; + return NULL; + + /* HTM aborted but VMX0 is correct */ +value_match: +// printf("!"); + return NULL; + +success: +// printf("."); + return NULL; +} + +int tm_vmx_unavail_test() +{ + int threads; + pthread_t *thread; + + SKIP_IF(!have_htm()); + + passed = 1; + + threads = sysconf(_SC_NPROCESSORS_ONLN) * 4; + thread = malloc(sizeof(pthread_t)*threads); + if (!thread) + return EXIT_FAILURE; + + for (uint64_t i = 0; i < threads; i++) + pthread_create(&thread[i], NULL, &worker, NULL); + + for (uint64_t i = 0; i < threads; i++) + pthread_join(thread[i], NULL); + + free(thread); + + return passed ? EXIT_SUCCESS : EXIT_FAILURE; +} + + +int main(int argc, char **argv) +{ + return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test"); +} diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index dcf74184bfd0a7f475fc496d840eccc52a46a723..d46916867a6fbcf627bd1fe2d45574decc8c9de8 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -19,45 +19,64 @@ static char auxv[4096]; -void *get_auxv_entry(int type) +int read_auxv(char *buf, ssize_t buf_size) { - ElfW(auxv_t) *p; - void *result; ssize_t num; - int fd; + int rc, fd; fd = open("/proc/self/auxv", O_RDONLY); if (fd == -1) { perror("open"); - return NULL; + return -errno; } - result = NULL; - - num = read(fd, auxv, sizeof(auxv)); + num = read(fd, buf, buf_size); if (num < 0) { perror("read"); + rc = -EIO; goto out; } - if (num > sizeof(auxv)) { - printf("Overflowed auxv buffer\n"); + if (num > buf_size) { + printf("overflowed auxv buffer\n"); + rc = -EOVERFLOW; goto out; } + rc = 0; +out: + close(fd); + return rc; +} + +void *find_auxv_entry(int type, char *auxv) +{ + ElfW(auxv_t) *p; + p = (ElfW(auxv_t) *)auxv; while (p->a_type != AT_NULL) { - if (p->a_type == type) { - result = (void *)p->a_un.a_val; - break; - } + if (p->a_type == type) + return p; p++; } -out: - close(fd); - return result; + + return NULL; +} + +void *get_auxv_entry(int type) +{ + ElfW(auxv_t) *p; + + if (read_auxv(auxv, sizeof(auxv))) + return NULL; + + p = find_auxv_entry(type, auxv); + if (p) + return (void *)p->a_un.a_val; + + return NULL; } int pick_online_cpu(void) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ea6e373edc27255efe7dccccd86b12b9eb97c189..93eede4e8fbea050ce79bf67a1f04807acd53a31 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -170,7 +170,7 @@ qemu_append="`identify_qemu_append "$QEMU"`" # Pull in Kconfig-fragment boot parameters boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" # Generate kernel-version-specific boot parameters -boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`" +boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" if test -n "$TORTURE_BUILDONLY" then diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile index de51f439d4a6a3832755c8272d608566a04f6c93..9fc78e5e5451038dd0ce4b0c4fa25a95e7707249 100644 --- a/tools/testing/selftests/splice/Makefile +++ b/tools/testing/selftests/splice/Makefile @@ -4,5 +4,4 @@ all: $(TEST_PROGS) $(EXTRA) include ../lib.mk -clean: - rm -fr $(TEST_PROGS) $(EXTRA) +EXTRA_CLEAN := $(EXTRA) diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile index 87ac400507c005db3affec4a395f49960b1126ef..4981c6b6d050e95b77fa1540640a4b71770179f2 100644 --- a/tools/testing/selftests/sync/Makefile +++ b/tools/testing/selftests/sync/Makefile @@ -20,5 +20,4 @@ TESTS += sync_stress_merge.o sync_test: $(OBJS) $(TESTS) -clean: - $(RM) sync_test $(OBJS) $(TESTS) +EXTRA_CLEAN := sync_test $(OBJS) $(TESTS) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index b90e50c36f9f99a49c414fc351cf0d6f3f1ebf16..5fa1d7e9a9158ab6e3edee87ac8388236159e5e0 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -3,7 +3,7 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) LDFLAGS += -lrt -lpthread # these are all "safe" tests that don't modify -# system time or require escalated privledges +# system time or require escalated privileges TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ inconsistency-check raw_skew threadtest rtctest @@ -14,7 +14,7 @@ TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew include ../lib.mk -# these tests require escalated privledges +# these tests require escalated privileges # and may modify the system time or trigger # other behavior like suspend run_destructive_tests: run_tests diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index fd88e3025bedc17d44c1537b8279aa6068d3ff47..5ff165373f8ba1b51502ca28a7d0324dc671a4e9 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -159,7 +159,7 @@ int main(int argv, char **argc) } - printf("Running Asyncrhonous Switching Tests...\n"); + printf("Running Asynchronous Switching Tests...\n"); pid = fork(); if (!pid) return run_tests(60); diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 41642ba5e318a153d805720e47475436817be53e..cbb29e41ef2b3af6f0b53c18e54d30489c2fb395 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -15,21 +15,15 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += userfaultfd_hugetlb -TEST_GEN_FILES += userfaultfd_shmem TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += virtual_address_range TEST_PROGS := run_vmtests include ../lib.mk -$(OUTPUT)/userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h - -$(OUTPUT)/userfaultfd_hugetlb: userfaultfd.c ../../../../usr/include/linux/kernel.h - $(CC) $(CFLAGS) -DHUGETLB_TEST -O2 -o $@ $< -lpthread - -$(OUTPUT)/userfaultfd_shmem: userfaultfd.c ../../../../usr/include/linux/kernel.h - $(CC) $(CFLAGS) -DSHMEM_TEST -O2 -o $@ $< -lpthread +$(OUTPUT)/userfaultfd: ../../../../usr/include/linux/kernel.h +$(OUTPUT)/userfaultfd: LDLIBS += -lpthread $(OUTPUT)/mlock-random-test: LDLIBS += -lcap diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 698c7ed28a26719878b28131c7070adee2ff26fd..1c0d76cb5adfd9698502b62bbea1ccea5bb074e9 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -1 +1,2 @@ +CONFIG_SYSVIPC=y CONFIG_USERFAULTFD=y diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index addcd6fc1ecc674684bd8c68e4839faa134433f9..77687ab59f77321b3858128c553d73271ac1b618 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -62,7 +62,7 @@ int main(void) void *addr; int ret; - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0); + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(1); diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c index ff0cda2b19c97fd0ad7f75906e9c348094934a7b..e5dbc87b4297778520bf1cc4a2475774410f1bca 100644 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -293,7 +293,7 @@ static int test_mlock_lock() unsigned long page_size = getpagesize(); map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (map == MAP_FAILED) { perror("test_mlock_locked mmap"); goto out; @@ -402,7 +402,7 @@ static int test_mlock_onfault() unsigned long page_size = getpagesize(); map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (map == MAP_FAILED) { perror("test_mlock_locked mmap"); goto out; @@ -445,7 +445,7 @@ static int test_lock_onfault_of_present() uint64_t page1_flags, page2_flags; map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (map == MAP_FAILED) { perror("test_mlock_locked mmap"); goto out; @@ -492,7 +492,7 @@ static int test_munlockall() unsigned long page_size = getpagesize(); map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (map == MAP_FAILED) { perror("test_munlockall mmap"); @@ -518,7 +518,7 @@ static int test_munlockall() munmap(map, 2 * page_size); map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (map == MAP_FAILED) { perror("test_munlockall second mmap"); @@ -573,7 +573,7 @@ static int test_vma_management(bool call_mlock) struct vm_boundaries page3; map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (map == MAP_FAILED) { perror("mmap()"); return ret; diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c index 0ae458f32fdb90c0ad0ac7c05086c5e4f34d7ed4..7f96a5c2e2924ccfe0ec3b67b617ab014c56c81c 100644 --- a/tools/testing/selftests/vm/on-fault-limit.c +++ b/tools/testing/selftests/vm/on-fault-limit.c @@ -26,7 +26,7 @@ static int test_limit(void) } map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, 0, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); if (map != MAP_FAILED) printf("mmap should have failed, but didn't\n"); else { diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index c92f6cf31d0a85714ae519601902dd274774de03..07548a1fa901ca109dad1e4e89accdfaa1815677 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -49,9 +49,9 @@ fi mkdir $mnt mount -t hugetlbfs none $mnt -echo "--------------------" +echo "---------------------" echo "running hugepage-mmap" -echo "--------------------" +echo "---------------------" ./hugepage-mmap if [ $? -ne 0 ]; then echo "[FAIL]" @@ -77,9 +77,9 @@ fi echo $shmmax > /proc/sys/kernel/shmmax echo $shmall > /proc/sys/kernel/shmall -echo "--------------------" +echo "-------------------" echo "running map_hugetlb" -echo "--------------------" +echo "-------------------" ./map_hugetlb if [ $? -ne 0 ]; then echo "[FAIL]" @@ -92,10 +92,10 @@ echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." -echo "--------------------" +echo "-------------------" echo "running userfaultfd" -echo "--------------------" -./userfaultfd 128 32 +echo "-------------------" +./userfaultfd anon 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -103,11 +103,11 @@ else echo "[PASS]" fi -echo "----------------------------" +echo "---------------------------" echo "running userfaultfd_hugetlb" -echo "----------------------------" -# 258MB total huge pages == 128MB src and 128MB dst -./userfaultfd_hugetlb 128 32 $mnt/ufd_test_file +echo "---------------------------" +# 256MB total huge pages == 128MB src and 128MB dst +./userfaultfd hugetlb 128 32 $mnt/ufd_test_file if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -116,10 +116,10 @@ else fi rm -f $mnt/ufd_test_file -echo "----------------------------" +echo "-------------------------" echo "running userfaultfd_shmem" -echo "----------------------------" -./userfaultfd_shmem 128 32 +echo "-------------------------" +./userfaultfd shmem 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -143,9 +143,9 @@ else echo "[PASS]" fi -echo "--------------------" +echo "----------------------" echo "running on-fault-limit" -echo "--------------------" +echo "----------------------" sudo -u nobody ./on-fault-limit if [ $? -ne 0 ]; then echo "[FAIL]" @@ -165,4 +165,15 @@ else echo "[PASS]" fi +echo "-----------------------------" +echo "running virtual_address_range" +echo "-----------------------------" +./virtual_address_range +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c index 0bc737a75150bf225f6b3508182341536b66c88e..88a2ab535e0129106dcdca2d7acb4235cd0557ce 100644 --- a/tools/testing/selftests/vm/thuge-gen.c +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -146,7 +146,7 @@ void test_mmap(unsigned long size, unsigned flags) before = read_free(size); map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, 0, 0); + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0); if (map == (char *)-1) err("mmap"); memset(map, 0xff, size*NUM_PAGES); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e9449c8018887785e9ab17219019423244e1b2b0..1eae79ae5b4e93f8862732b13fd2bd57d08d168f 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -77,10 +77,13 @@ static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; #define BOUNCE_POLL (1<<3) static int bounces; -#ifdef HUGETLB_TEST +#define TEST_ANON 1 +#define TEST_HUGETLB 2 +#define TEST_SHMEM 3 +static int test_type; + static int huge_fd; static char *huge_fd_off0; -#endif static unsigned long long *count_verify; static int uffd, uffd_flags, finished, *pipefd; static char *area_src, *area_dst; @@ -102,14 +105,7 @@ pthread_attr_t attr; ~(unsigned long)(sizeof(unsigned long long) \ - 1))) -#if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) - -/* Anonymous memory */ -#define EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE)) - -static int release_pages(char *rel_area) +static int anon_release_pages(char *rel_area) { int ret = 0; @@ -121,7 +117,7 @@ static int release_pages(char *rel_area) return ret; } -static void allocate_area(void **alloc_area) +static void anon_allocate_area(void **alloc_area) { if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) { fprintf(stderr, "out of memory\n"); @@ -129,14 +125,9 @@ static void allocate_area(void **alloc_area) } } -#else /* HUGETLB_TEST or SHMEM_TEST */ - -#define EXPECTED_IOCTLS UFFD_API_RANGE_IOCTLS_BASIC - -#ifdef HUGETLB_TEST /* HugeTLB memory */ -static int release_pages(char *rel_area) +static int hugetlb_release_pages(char *rel_area) { int ret = 0; @@ -152,7 +143,7 @@ static int release_pages(char *rel_area) } -static void allocate_area(void **alloc_area) +static void hugetlb_allocate_area(void **alloc_area) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB, huge_fd, @@ -167,10 +158,8 @@ static void allocate_area(void **alloc_area) huge_fd_off0 = *alloc_area; } -#elif defined(SHMEM_TEST) - /* Shared memory */ -static int release_pages(char *rel_area) +static int shmem_release_pages(char *rel_area) { int ret = 0; @@ -182,7 +171,7 @@ static int release_pages(char *rel_area) return ret; } -static void allocate_area(void **alloc_area) +static void shmem_allocate_area(void **alloc_area) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); @@ -192,11 +181,35 @@ static void allocate_area(void **alloc_area) } } -#else /* SHMEM_TEST */ -#error "Undefined test type" -#endif /* HUGETLB_TEST */ - -#endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */ +struct uffd_test_ops { + unsigned long expected_ioctls; + void (*allocate_area)(void **alloc_area); + int (*release_pages)(char *rel_area); +}; + +#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ + (1 << _UFFDIO_COPY) | \ + (1 << _UFFDIO_ZEROPAGE)) + +static struct uffd_test_ops anon_uffd_test_ops = { + .expected_ioctls = ANON_EXPECTED_IOCTLS, + .allocate_area = anon_allocate_area, + .release_pages = anon_release_pages, +}; + +static struct uffd_test_ops shmem_uffd_test_ops = { + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .allocate_area = shmem_allocate_area, + .release_pages = shmem_release_pages, +}; + +static struct uffd_test_ops hugetlb_uffd_test_ops = { + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .allocate_area = hugetlb_allocate_area, + .release_pages = hugetlb_release_pages, +}; + +static struct uffd_test_ops *uffd_test_ops; static int my_bcmp(char *str1, char *str2, size_t n) { @@ -505,7 +518,7 @@ static int stress(unsigned long *userfaults) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - if (release_pages(area_src)) + if (uffd_test_ops->release_pages(area_src)) return 1; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -577,12 +590,12 @@ static int faulting_process(void) { unsigned long nr; unsigned long long count; + unsigned long split_nr_pages; -#ifndef HUGETLB_TEST - unsigned long split_nr_pages = (nr_pages + 1) / 2; -#else - unsigned long split_nr_pages = nr_pages; -#endif + if (test_type != TEST_HUGETLB) + split_nr_pages = (nr_pages + 1) / 2; + else + split_nr_pages = nr_pages; for (nr = 0; nr < split_nr_pages; nr++) { count = *area_count(area_dst, nr); @@ -594,7 +607,9 @@ static int faulting_process(void) } } -#ifndef HUGETLB_TEST + if (test_type == TEST_HUGETLB) + return 0; + area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, area_src); if (area_dst == MAP_FAILED) @@ -610,7 +625,7 @@ static int faulting_process(void) } } - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; for (nr = 0; nr < nr_pages; nr++) { @@ -618,8 +633,6 @@ static int faulting_process(void) fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); } -#endif /* HUGETLB_TEST */ - return 0; } @@ -627,7 +640,9 @@ static int uffdio_zeropage(int ufd, unsigned long offset) { struct uffdio_zeropage uffdio_zeropage; int ret; - unsigned long has_zeropage = EXPECTED_IOCTLS & (1 << _UFFDIO_ZEROPAGE); + unsigned long has_zeropage; + + has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); if (offset >= nr_pages * page_size) fprintf(stderr, "unexpected offset %lu\n", @@ -675,7 +690,7 @@ static int userfaultfd_zeropage_test(void) printf("testing UFFDIO_ZEROPAGE: "); fflush(stdout); - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; if (userfaultfd_open(0) < 0) @@ -686,7 +701,7 @@ static int userfaultfd_zeropage_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) fprintf(stderr, "register failure\n"), exit(1); - expected_ioctls = EXPECTED_IOCTLS; + expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) fprintf(stderr, @@ -716,7 +731,7 @@ static int userfaultfd_events_test(void) printf("testing events (fork, remap, remove): "); fflush(stdout); - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP | @@ -731,7 +746,7 @@ static int userfaultfd_events_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) fprintf(stderr, "register failure\n"), exit(1); - expected_ioctls = EXPECTED_IOCTLS; + expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) fprintf(stderr, @@ -773,10 +788,10 @@ static int userfaultfd_stress(void) int err; unsigned long userfaults[nr_cpus]; - allocate_area((void **)&area_src); + uffd_test_ops->allocate_area((void **)&area_src); if (!area_src) return 1; - allocate_area((void **)&area_dst); + uffd_test_ops->allocate_area((void **)&area_dst); if (!area_dst) return 1; @@ -856,7 +871,7 @@ static int userfaultfd_stress(void) fprintf(stderr, "register failure\n"); return 1; } - expected_ioctls = EXPECTED_IOCTLS; + expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { fprintf(stderr, @@ -888,7 +903,7 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - if (release_pages(area_dst)) + if (uffd_test_ops->release_pages(area_dst)) return 1; /* bounce pass */ @@ -934,36 +949,6 @@ static int userfaultfd_stress(void) return userfaultfd_zeropage_test() || userfaultfd_events_test(); } -#ifndef HUGETLB_TEST - -int main(int argc, char **argv) -{ - if (argc < 3) - fprintf(stderr, "Usage: \n"), exit(1); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - page_size = sysconf(_SC_PAGE_SIZE); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) - fprintf(stderr, "Impossible to run this test\n"), exit(2); - nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / - nr_cpus; - if (!nr_pages_per_cpu) { - fprintf(stderr, "invalid MiB\n"); - fprintf(stderr, "Usage: \n"), exit(1); - } - bounces = atoi(argv[2]); - if (bounces <= 0) { - fprintf(stderr, "invalid bounces\n"); - fprintf(stderr, "Usage: \n"), exit(1); - } - nr_pages = nr_pages_per_cpu * nr_cpus; - printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); -} - -#else /* HUGETLB_TEST */ - /* * Copied from mlock2-tests.c */ @@ -988,48 +973,78 @@ unsigned long default_huge_page_size(void) return hps; } -int main(int argc, char **argv) +static void set_test_type(const char *type) { - if (argc < 4) - fprintf(stderr, "Usage: \n"), - exit(1); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - page_size = default_huge_page_size(); + if (!strcmp(type, "anon")) { + test_type = TEST_ANON; + uffd_test_ops = &anon_uffd_test_ops; + } else if (!strcmp(type, "hugetlb")) { + test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; + } else if (!strcmp(type, "shmem")) { + test_type = TEST_SHMEM; + uffd_test_ops = &shmem_uffd_test_ops; + } else { + fprintf(stderr, "Unknown test type: %s\n", type), exit(1); + } + + if (test_type == TEST_HUGETLB) + page_size = default_huge_page_size(); + else + page_size = sysconf(_SC_PAGE_SIZE); + if (!page_size) - fprintf(stderr, "Unable to determine huge page size\n"), + fprintf(stderr, "Unable to determine page size\n"), exit(2); if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 > page_size) fprintf(stderr, "Impossible to run this test\n"), exit(2); - nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / +} + +int main(int argc, char **argv) +{ + if (argc < 4) + fprintf(stderr, "Usage: [hugetlbfs_file]\n"), + exit(1); + + set_test_type(argv[1]); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / nr_cpus; if (!nr_pages_per_cpu) { fprintf(stderr, "invalid MiB\n"); fprintf(stderr, "Usage: \n"), exit(1); } - bounces = atoi(argv[2]); + + bounces = atoi(argv[3]); if (bounces <= 0) { fprintf(stderr, "invalid bounces\n"); fprintf(stderr, "Usage: \n"), exit(1); } nr_pages = nr_pages_per_cpu * nr_cpus; - huge_fd = open(argv[3], O_CREAT | O_RDWR, 0755); - if (huge_fd < 0) { - fprintf(stderr, "Open of %s failed", argv[3]); - perror("open"); - exit(1); - } - if (ftruncate(huge_fd, 0)) { - fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); - perror("ftruncate"); - exit(1); + + if (test_type == TEST_HUGETLB) { + if (argc < 5) + fprintf(stderr, "Usage: hugetlb \n"), + exit(1); + huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); + if (huge_fd < 0) { + fprintf(stderr, "Open of %s failed", argv[3]); + perror("open"); + exit(1); + } + if (ftruncate(huge_fd, 0)) { + fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]); + perror("ftruncate"); + exit(1); + } } printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } -#endif #else /* __NR_userfaultfd */ #warning "missing __NR_userfaultfd definition" diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c new file mode 100644 index 0000000000000000000000000000000000000000..3b02aa6eb9da7f91d1b8b02bb5f547b5d33bdbcb --- /dev/null +++ b/tools/testing/selftests/vm/virtual_address_range.c @@ -0,0 +1,122 @@ +/* + * Copyright 2017, Anshuman Khandual, IBM Corp. + * Licensed under GPLv2. + * + * Works on architectures which support 128TB virtual + * address range and beyond. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Maximum address range mapped with a single mmap() + * call is little bit more than 16GB. Hence 16GB is + * chosen as the single chunk size for address space + * mapping. + */ +#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */ + +/* + * Address space till 128TB is mapped without any hint + * and is enabled by default. Address space beyond 128TB + * till 512TB is obtained by passing hint address as the + * first argument into mmap() system call. + * + * The process heap address space is divided into two + * different areas one below 128TB and one above 128TB + * till it reaches 512TB. One with size 128TB and the + * other being 384TB. + */ +#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */ +#define NR_CHUNKS_384TB 24576UL /* Number of 16GB chunks for 384TB */ + +#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ + +static char *hind_addr(void) +{ + int bits = 48 + rand() % 15; + + return (char *) (1UL << bits); +} + +static int validate_addr(char *ptr, int high_addr) +{ + unsigned long addr = (unsigned long) ptr; + + if (high_addr) { + if (addr < ADDR_MARK_128TB) { + printf("Bad address %lx\n", addr); + return 1; + } + return 0; + } + + if (addr > ADDR_MARK_128TB) { + printf("Bad address %lx\n", addr); + return 1; + } + return 0; +} + +static int validate_lower_address_hint(void) +{ + char *ptr; + + ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ | + PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr == MAP_FAILED) + return 0; + + return 1; +} + +int main(int argc, char *argv[]) +{ + char *ptr[NR_CHUNKS_128TB]; + char *hptr[NR_CHUNKS_384TB]; + char *hint; + unsigned long i, lchunks, hchunks; + + for (i = 0; i < NR_CHUNKS_128TB; i++) { + ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (ptr[i] == MAP_FAILED) { + if (validate_lower_address_hint()) + return 1; + break; + } + + if (validate_addr(ptr[i], 0)) + return 1; + } + lchunks = i; + + for (i = 0; i < NR_CHUNKS_384TB; i++) { + hint = hind_addr(); + hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (hptr[i] == MAP_FAILED) + break; + + if (validate_addr(hptr[i], 1)) + return 1; + } + hchunks = i; + + for (i = 0; i < lchunks; i++) + munmap(ptr[i], MAP_CHUNK_SIZE); + + for (i = 0; i < hchunks; i++) + munmap(hptr[i], MAP_CHUNK_SIZE); + + return 0; +} diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index 6983d05097e25e2afcaeb788ed306ab19c104495..a74c9d739d0745b2aaa644c0c467c6eaba1194a4 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -24,9 +24,11 @@ const char v = 'V'; static void keep_alive(void) { int dummy; + int ret; - printf("."); - ioctl(fd, WDIOC_KEEPALIVE, &dummy); + ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy); + if (!ret) + printf("."); } /* @@ -51,6 +53,7 @@ int main(int argc, char *argv[]) int flags; unsigned int ping_rate = 1; int ret; + int i; setbuf(stdout, NULL); @@ -61,31 +64,35 @@ int main(int argc, char *argv[]) exit(-1); } - if (argc > 1) { - if (!strncasecmp(argv[1], "-d", 2)) { - flags = WDIOS_DISABLECARD; - ioctl(fd, WDIOC_SETOPTIONS, &flags); - printf("Watchdog card disabled.\n"); - goto end; - } else if (!strncasecmp(argv[1], "-e", 2)) { - flags = WDIOS_ENABLECARD; - ioctl(fd, WDIOC_SETOPTIONS, &flags); - printf("Watchdog card enabled.\n"); - goto end; - } else if (!strncasecmp(argv[1], "-t", 2) && argv[2]) { - flags = atoi(argv[2]); - ioctl(fd, WDIOC_SETTIMEOUT, &flags); - printf("Watchdog timeout set to %u seconds.\n", flags); - goto end; - } else if (!strncasecmp(argv[1], "-p", 2) && argv[2]) { - ping_rate = strtoul(argv[2], NULL, 0); - printf("Watchdog ping rate set to %u seconds.\n", ping_rate); - } else { - printf("-d to disable, -e to enable, -t to set " \ - "the timeout,\n-p to set the ping rate, and \n"); - printf("run by itself to tick the card.\n"); - goto end; - } + for (i = 1; i < argc; i++) { + if (!strncasecmp(argv[i], "-d", 2)) { + flags = WDIOS_DISABLECARD; + ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); + if (!ret) + printf("Watchdog card disabled.\n"); + } else if (!strncasecmp(argv[i], "-e", 2)) { + flags = WDIOS_ENABLECARD; + ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); + if (!ret) + printf("Watchdog card enabled.\n"); + } else if (!strncasecmp(argv[i], "-t", 2) && argv[2]) { + flags = atoi(argv[i + 1]); + ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); + if (!ret) + printf("Watchdog timeout set to %u seconds.\n", flags); + i++; + } else if (!strncasecmp(argv[i], "-p", 2) && argv[2]) { + ping_rate = strtoul(argv[i + 1], NULL, 0); + printf("Watchdog ping rate set to %u seconds.\n", ping_rate); + i++; + } else { + printf("-d to disable, -e to enable, -t to set " + "the timeout,\n-p to set the ping rate, and "); + printf("run by itself to tick the card.\n"); + printf("Parameters are parsed left-to-right in real-time.\n"); + printf("Example: %s -d -t 10 -p 5 -e\n", argv[0]); + goto end; + } } printf("Watchdog Ticking Away!\n"); diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore index 15034fef9698fcb71e72f2cc763ade6491247551..7757f73ff9a32397c97718d3302749b4d93a6016 100644 --- a/tools/testing/selftests/x86/.gitignore +++ b/tools/testing/selftests/x86/.gitignore @@ -1,2 +1,15 @@ *_32 *_64 +single_step_syscall +sysret_ss_attrs +syscall_nt +ptrace_syscall +test_mremap_vdso +check_initial_reg_state +sigreturn +ldt_gdt +iopl +mpx-mini-test +ioperm +protection_keys +test_vdso diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 38e0a9ca5d71154c4d1d32ad7af9ca9154893a33..97f187e2663f3adaf37a50674c36aafec11d291f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -40,8 +40,7 @@ all_32: $(BINARIES_32) all_64: $(BINARIES_64) -clean: - $(RM) $(BINARIES_32) $(BINARIES_64) +EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) $(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index f6121612e769f5600d1cc0920037ee4c6ee0bf92..b9a22f18566ae27eef76b22590448e2725b15e12 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -409,6 +409,51 @@ static void *threadproc(void *ctx) } } +#ifdef __i386__ + +#ifndef SA_RESTORE +#define SA_RESTORER 0x04000000 +#endif + +/* + * The UAPI header calls this 'struct sigaction', which conflicts with + * glibc. Sigh. + */ +struct fake_ksigaction { + void *handler; /* the real type is nasty */ + unsigned long sa_flags; + void (*sa_restorer)(void); + unsigned char sigset[8]; +}; + +static void fix_sa_restorer(int sig) +{ + struct fake_ksigaction ksa; + + if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) { + /* + * glibc has a nasty bug: it sometimes writes garbage to + * sa_restorer. This interacts quite badly with anything + * that fiddles with SS because it can trigger legacy + * stack switching. Patch it up. See: + * + * https://sourceware.org/bugzilla/show_bug.cgi?id=21269 + */ + if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) { + ksa.sa_restorer = NULL; + if (syscall(SYS_rt_sigaction, sig, &ksa, NULL, + sizeof(ksa.sigset)) != 0) + err(1, "rt_sigaction"); + } + } +} +#else +static void fix_sa_restorer(int sig) +{ + /* 64-bit glibc works fine. */ +} +#endif + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -420,6 +465,7 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), if (sigaction(sig, &sa, 0)) err(1, "sigaction"); + fix_sa_restorer(sig); } static jmp_buf jmpbuf; diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 616ee967333933778eaf54b84c41fcce59a5e9f4..a8df159a8924829695c0e56289f4ad29a598ac22 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -404,8 +404,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) dprintf2("info->si_lower: %p\n", __si_bounds_lower(si)); dprintf2("info->si_upper: %p\n", __si_bounds_upper(si)); - check_siginfo_vs_shadow(si); - for (i = 0; i < 8; i++) dprintf3("[%d]: %p\n", i, si_addr_ptr[i]); switch (br_reason) { @@ -416,6 +414,9 @@ void handler(int signum, siginfo_t *si, void *vucontext) exit(5); case 1: /* #BR MPX bounds exception */ /* these are normal and we expect to see them */ + + check_siginfo_vs_shadow(si); + dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n", status, (void *)ip, si->si_addr); num_bnd_chk++; diff --git a/tools/usb/.gitignore b/tools/usb/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1b74489814351c4d273cad5123fc971658d1c1ee --- /dev/null +++ b/tools/usb/.gitignore @@ -0,0 +1,2 @@ +ffs-test +testusb diff --git a/tools/usb/usbip/README b/tools/usb/usbip/README index 5eb2b6c7722b2489552deec5a840564ad4e4745e..7844490fc603dfe9dd5fd7abe2dd0d25a600eb5a 100644 --- a/tools/usb/usbip/README +++ b/tools/usb/usbip/README @@ -244,7 +244,7 @@ Detach the imported device: - See 'Debug Tips' on the project wiki. - http://usbip.wiki.sourceforge.net/how-to-debug-usbip - usbip-host.ko must be bound to the target device. - - See /proc/bus/usb/devices and find "Driver=..." lines of the device. + - See /sys/kernel/debug/usb/devices and find "Driver=..." lines of the device. - Target USB gadget must be bound to vudc (using USB gadget susbsys, not usbip bind command) - Shutdown firewall. diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c index ac73710473deaecd079d9846f3a9f5239b8055df..1517a232ab18798ba748d2c9bf16a4a3f6fa9ef5 100644 --- a/tools/usb/usbip/libsrc/usbip_common.c +++ b/tools/usb/usbip/libsrc/usbip_common.c @@ -215,9 +215,16 @@ int read_usb_interface(struct usbip_usb_device *udev, int i, struct usbip_usb_interface *uinf) { char busid[SYSFS_BUS_ID_SIZE]; + int size; struct udev_device *sif; - sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i); + size = snprintf(busid, sizeof(busid), "%s:%d.%d", + udev->busid, udev->bConfigurationValue, i); + if (size < 0 || (unsigned int)size >= sizeof(busid)) { + err("busid length %i >= %lu or < 0", size, + (long unsigned)sizeof(busid)); + return -1; + } sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid); if (!sif) { diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index 9d415228883deb286ec684d22a311940aae943a3..6ff7b601f854562090b348097963554113e3fb75 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -40,13 +40,20 @@ struct udev *udev_context; static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) { char status_attr_path[SYSFS_PATH_MAX]; + int size; int fd; int length; char status; int value = 0; - snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status", - udev->path); + size = snprintf(status_attr_path, sizeof(status_attr_path), + "%s/usbip_status", udev->path); + if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) { + err("usbip_status path length %i >= %lu or < 0", size, + (long unsigned)sizeof(status_attr_path)); + return -1; + } + fd = open(status_attr_path, O_RDONLY); if (fd < 0) { @@ -218,6 +225,7 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) { char attr_name[] = "usbip_sockfd"; char sockfd_attr_path[SYSFS_PATH_MAX]; + int size; char sockfd_buff[30]; int ret; @@ -237,10 +245,20 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd) } /* only the first interface is true */ - snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", - edev->udev.path, attr_name); + size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", + edev->udev.path, attr_name); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) { + err("exported device path length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_attr_path)); + return -1; + } - snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) { + err("socket length %i >= %lu or < 0", size, + (long unsigned)sizeof(sockfd_buff)); + return -1; + } ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff, strlen(sockfd_buff)); diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index ad9204773533f4e3d6961dc2a9261720a39a1a44..f659c146cdc8410001a3da7197f2a47566e63a35 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -123,33 +123,15 @@ static int refresh_imported_device_list(void) static int get_nports(void) { - char *c; - int nports = 0; - const char *attr_status; + const char *attr_nports; - attr_status = udev_device_get_sysattr_value(vhci_driver->hc_device, - "status"); - if (!attr_status) { - err("udev_device_get_sysattr_value failed"); + attr_nports = udev_device_get_sysattr_value(vhci_driver->hc_device, "nports"); + if (!attr_nports) { + err("udev_device_get_sysattr_value nports failed"); return -1; } - /* skip a header line */ - c = strchr(attr_status, '\n'); - if (!c) - return 0; - c++; - - while (*c != '\0') { - /* go to the next line */ - c = strchr(c, '\n'); - if (!c) - return nports; - c++; - nports += 1; - } - - return nports; + return (int)strtoul(attr_nports, NULL, 10); } /* diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c index d7599d9435298286d63642067144c280c6110002..73d8eee8130b3db2121849dead5b7c064b0247c6 100644 --- a/tools/usb/usbip/src/usbip.c +++ b/tools/usb/usbip/src/usbip.c @@ -176,6 +176,8 @@ int main(int argc, char *argv[]) break; case '?': printf("usbip: invalid option\n"); + /* Terminate after printing error */ + /* FALLTHRU */ default: usbip_usage(); goto out; diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 9377c8b4ac167723de43088e96e5b5f0effcf4b2..d8f534025b7f7bd3cb27d663f78de9b232fa85d3 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -57,6 +57,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool ctx, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index f31353fac5415d8b9f5614e6f46f71a8f062f09b..453ca3c211933d12a5b7933852043f7154434fe6 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -20,6 +20,7 @@ int runcycles = 10000000; int max_outstanding = INT_MAX; int batch = 1; +int param = 0; bool do_sleep = false; bool do_relax = false; @@ -86,7 +87,7 @@ void set_affinity(const char *arg) cpu = strtol(arg, &endptr, 0); assert(!*endptr); - assert(cpu >= 0 || cpu < CPU_SETSIZE); + assert(cpu >= 0 && cpu < CPU_SETSIZE); self = pthread_self(); CPU_ZERO(&cpuset); @@ -246,6 +247,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'b', }, + { + .name = "param", + .has_arg = required_argument, + .val = 'p', + }, { .name = "sleep", .has_arg = no_argument, @@ -274,6 +280,7 @@ static void help(void) " [--run-cycles C (default: %d)]" " [--batch b]" " [--outstanding o]" + " [--param p]" " [--sleep]" " [--relax]" " [--exit]" @@ -328,6 +335,12 @@ int main(int argc, char **argv) assert(c > 0 && c < INT_MAX); max_outstanding = c; break; + case 'p': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + param = c; + break; case 'b': c = strtol(optarg, &endptr, 0); assert(!*endptr); diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 14142faf040b7e81a1c38a983aa76d9ae50ee4e1..90b0133004e17ddbc48b90a9eab2e6a489da13b0 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -10,6 +10,8 @@ #include +extern int param; + extern bool do_exit; #if defined(__x86_64__) || defined(__i386__) diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index 635b07b4fdd3949c7883a2775575c0ff4d8ce228..7b22f1b20652082b606e4ee55dae31b56f670ef3 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -97,6 +97,9 @@ void alloc_ring(void) { int ret = ptr_ring_init(&array, ring_size, 0); assert(!ret); + /* Hacky way to poke at ring internals. Useful for testing though. */ + if (param) + array.batch = param; } /* guest side */ diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index e0445898f08fa981d372d4de6bad4eebe265cb15..0fecaec90d0d69cf622555104cf9b5f66001b68d 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -100,7 +100,7 @@ static void vq_info_add(struct vdev_info *dev, int num) vring_init(&info->vring, num, info->ring, 4096); info->vq = vring_new_virtqueue(info->idx, info->vring.num, 4096, &dev->vdev, - true, info->ring, + true, false, info->ring, vq_notify, vq_callback, "test"); assert(info->vq); info->vq->priv = info; @@ -202,7 +202,7 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, test = 0; r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); - fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious); + fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious); } const char optstring[] = "h"; diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 5f94f51056781e1f8a662be8db245cfd0285b5a8..9476c616d0642c5e2f0162d4fabd3945a98d4f32 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -314,7 +314,8 @@ static int parallel_test(u64 features, err(1, "Could not set affinity to cpu %u", first_cpu); vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true, - guest_map, fast_vringh ? no_notify_host + false, guest_map, + fast_vringh ? no_notify_host : parallel_notify_host, never_callback_guest, "guest vq"); @@ -479,7 +480,7 @@ int main(int argc, char *argv[]) memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN)); /* Set up guest side. */ - vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, + vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false, __user_addr_min, never_notify_host, never_callback_guest, "guest vq"); @@ -663,7 +664,7 @@ int main(int argc, char *argv[]) /* Force creation of direct, which we modify. */ __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, - __user_addr_min, + false, __user_addr_min, never_notify_host, never_callback_guest, "guest vq"); diff --git a/usr/Kconfig b/usr/Kconfig index 6278f135256d424c153d3fadc48f8636ecd24ec1..ad0543e21760562d94bc0a44676700b1179c1787 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -21,6 +21,16 @@ config INITRAMFS_SOURCE If you are not sure, leave it blank. +config INITRAMFS_FORCE + bool "Ignore the initramfs passed by the bootloader" + depends on CMDLINE_EXTEND || CMDLINE_FORCE + help + This option causes the kernel to ignore the initramfs image + (or initrd image) passed to it by the bootloader. This is + analogous to CMDLINE_FORCE, which is found on some architectures, + and is useful if you cannot or don't want to change the image + your bootloader passes to the kernel. + config INITRAMFS_ROOT_UID int "User ID to map to 0 (user root)" depends on INITRAMFS_SOURCE!="" @@ -210,6 +220,7 @@ config INITRAMFS_COMPRESSION_LZ4 endchoice config INITRAMFS_COMPRESSION + depends on INITRAMFS_SOURCE!="" string default "" if INITRAMFS_COMPRESSION_NONE default ".gz" if INITRAMFS_COMPRESSION_GZIP diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 35d7100e0815c4fc34acdebc17e794dfd569dbcf..5976609ef27cb9bf47ca997e08be38833d51f5d3 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -184,28 +184,47 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) return cval <= now; } +/* + * Reflect the timer output level into the kvm_run structure + */ +void kvm_timer_update_run(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the device bitmap with the timer states */ + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | + KVM_ARM_DEV_EL1_PTIMER); + if (vtimer->irq.level) + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; + if (ptimer->irq.level) + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; +} + static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx) { int ret; - BUG_ON(!vgic_initialized(vcpu->kvm)); - timer_ctx->active_cleared_last = false; timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); - WARN_ON(ret); + if (likely(irqchip_in_kernel(vcpu->kvm))) { + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer_ctx->irq.irq, + timer_ctx->irq.level); + WARN_ON(ret); + } } /* * Check if there was a change in the timer state (should we raise or lower * the line level to the GIC). */ -static int kvm_timer_update_state(struct kvm_vcpu *vcpu) +static void kvm_timer_update_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); @@ -217,16 +236,14 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) * because the guest would never see the interrupt. Instead wait * until we call this function from kvm_timer_flush_hwstate. */ - if (!vgic_initialized(vcpu->kvm) || !timer->enabled) - return -ENODEV; + if (unlikely(!timer->enabled)) + return; if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); - - return 0; } /* Schedule the background timer for the emulated timer. */ @@ -286,25 +303,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) timer_disarm(timer); } -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the host, - * and inject an interrupt if that was the case. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; - if (kvm_timer_update_state(vcpu)) - return; - - /* Set the background timer for the physical timer emulation. */ - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); - /* * If we enter the guest with the virtual input level to the VGIC * asserted, then we have already told the VGIC what we need to, and @@ -356,11 +360,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) vtimer->active_cleared_last = !phys_active; } +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool vlevel, plevel; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; + + return vtimer->irq.level != vlevel || + ptimer->irq.level != plevel; +} + +static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * To prevent continuously exiting from the guest, we mask the + * physical interrupt such that the guest can make forward progress. + * Once we detect the output level being deasserted, we unmask the + * interrupt again so that we exit from the guest when the timer + * fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, 0); +} + +/** + * kvm_timer_flush_hwstate - prepare timers before running the vcpu + * @vcpu: The vcpu pointer + * + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case, making sure the timer is + * masked or disabled on the host so that we keep executing. Also schedule a + * software timer for the physical timer if it is enabled. + */ +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + if (unlikely(!timer->enabled)) + return; + + kvm_timer_update_state(vcpu); + + /* Set the background timer for the physical timer emulation. */ + kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_timer_flush_hwstate_user(vcpu); + else + kvm_timer_flush_hwstate_vgic(vcpu); +} + /** * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer has expired while we were running in the guest, + * Check if any of the timers have expired while we were running in the guest, * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) @@ -560,6 +625,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (timer->enabled) return 0; + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ + if (!irqchip_in_kernel(vcpu->kvm)) + goto no_vgic; + + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -583,8 +655,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (ret) return ret; +no_vgic: timer->enabled = 1; - return 0; } diff --git a/arch/arm/kvm/arm.c b/virt/kvm/arm/arm.c similarity index 97% rename from arch/arm/kvm/arm.c rename to virt/kvm/arm/arm.c index 314eb6abe1ff9879272fdae542c2e0043f3759eb..3417e184c8e144d32e4d1fa3983b454754b51670 100644 --- a/arch/arm/kvm/arm.c +++ b/virt/kvm/arm/arm.c @@ -53,7 +53,6 @@ __asm__(".arch_extension virt"); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); static kvm_cpu_context_t __percpu *kvm_host_cpu_state; -static unsigned long hyp_default_vectors; /* Per-CPU variable containing the currently running vcpu. */ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); @@ -209,9 +208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; case KVM_CAP_ARM_SET_DEVICE_ADDR: r = 1; break; @@ -230,6 +226,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = kvm->arch.vgic.msis_require_devid; break; + case KVM_CAP_ARM_USER_IRQ: + /* + * 1: EL1_VTIMER, EL1_PTIMER, and PMU. + * (bump this number if adding more devices) + */ + r = 1; + break; default: r = kvm_arch_dev_ioctl_check_extension(kvm, ext); break; @@ -329,7 +332,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_arm_reset_debug_ptr(vcpu); - return 0; + return kvm_vgic_vcpu_init(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -351,15 +354,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); kvm_arm_set_running_vcpu(vcpu); + + kvm_vgic_load(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { - /* - * The arch-generic KVM code expects the cpu field of a vcpu to be -1 - * if the vcpu is no longer assigned to a cpu. This is used for the - * optimized make_all_cpus_request path. - */ + kvm_vgic_put(vcpu); + vcpu->cpu = -1; kvm_arm_set_running_vcpu(NULL); @@ -517,13 +519,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Enable the arch timers only if we have an in-kernel VGIC - * and it has been properly initialized, since we cannot handle - * interrupts from the virtual timer with a userspace gic. - */ - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) - ret = kvm_timer_enable(vcpu); + ret = kvm_timer_enable(vcpu); return ret; } @@ -633,16 +629,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * non-preemptible context. */ preempt_disable(); + kvm_pmu_flush_hwstate(vcpu); + kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); local_irq_disable(); /* - * Re-check atomic conditions + * If we have a singal pending, or need to notify a userspace + * irqchip about timer or PMU level changes, then we exit (and + * update the timer level state in kvm_timer_update_run + * below). */ - if (signal_pending(current)) { + if (signal_pending(current) || + kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; } @@ -714,6 +717,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = handle_exit(vcpu, run, ret); } + /* Tell userspace about in-kernel device output levels */ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { + kvm_timer_update_run(vcpu); + kvm_pmu_update_run(vcpu); + } + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); return ret; @@ -1112,8 +1121,16 @@ static void cpu_init_hyp_mode(void *dummy) kvm_arm_init_debug(); } +static void cpu_hyp_reset(void) +{ + if (!is_kernel_in_hyp_mode()) + __hyp_reset_vectors(); +} + static void cpu_hyp_reinit(void) { + cpu_hyp_reset(); + if (is_kernel_in_hyp_mode()) { /* * __cpu_init_stage2() is safe to call even if the PM @@ -1121,21 +1138,13 @@ static void cpu_hyp_reinit(void) */ __cpu_init_stage2(); } else { - if (__hyp_get_vectors() == hyp_default_vectors) - cpu_init_hyp_mode(NULL); + cpu_init_hyp_mode(NULL); } if (vgic_present) kvm_vgic_init_cpu_hardware(); } -static void cpu_hyp_reset(void) -{ - if (!is_kernel_in_hyp_mode()) - __cpu_reset_hyp_mode(hyp_default_vectors, - kvm_get_idmap_start()); -} - static void _kvm_arch_hardware_enable(void *discard) { if (!__this_cpu_read(kvm_arm_hardware_enabled)) { @@ -1318,12 +1327,6 @@ static int init_hyp_mode(void) if (err) goto out_err; - /* - * It is probably enough to obtain the default on one - * CPU. It's unlikely to be different on the others. - */ - hyp_default_vectors = __hyp_get_vectors(); - /* * Allocate stack pages for Hypervisor-mode */ diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index c8aeb7b91ec89bdf023151fc3ba5456be67f4d53..a3f18d3623661ae6204750fa9aa8639a5fde7617 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -22,49 +22,6 @@ #include #include -static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, - void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 eisr0, eisr1; - int i; - bool expect_mi; - - expect_mi = !!(cpu_if->vgic_hcr & GICH_HCR_UIE); - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - expect_mi |= (!(cpu_if->vgic_lr[i] & GICH_LR_HW) && - (cpu_if->vgic_lr[i] & GICH_LR_EOI)); - } - - if (expect_mi) { - cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); - - if (cpu_if->vgic_misr & GICH_MISR_EOI) { - eisr0 = readl_relaxed(base + GICH_EISR0); - if (unlikely(nr_lr > 32)) - eisr1 = readl_relaxed(base + GICH_EISR1); - else - eisr1 = 0; - } else { - eisr0 = eisr1 = 0; - } - } else { - cpu_if->vgic_misr = 0; - eisr0 = eisr1 = 0; - } - -#ifdef CONFIG_CPU_BIG_ENDIAN - cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; -#else - cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; -#endif -} - static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; @@ -87,13 +44,10 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - + for (i = 0; i < used_lrs; i++) { if (cpu_if->vgic_elrsr & (1UL << i)) cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; else @@ -110,26 +64,20 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &kvm->arch.vgic; void __iomem *base = kern_hyp_va(vgic->vctrl_base); + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; if (!base) return; - cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); - - if (vcpu->arch.vgic_cpu.live_lrs) { + if (used_lrs) { cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - save_maint_int_state(vcpu, base); save_elrsr(vcpu, base); save_lrs(vcpu, base); writel_relaxed(0, base + GICH_HCR); - - vcpu->arch.vgic_cpu.live_lrs = 0; } else { - cpu_if->vgic_eisr = 0; cpu_if->vgic_elrsr = ~0UL; - cpu_if->vgic_misr = 0; cpu_if->vgic_apr = 0; } } @@ -141,32 +89,20 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &kvm->arch.vgic; void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; - u64 live_lrs = 0; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; if (!base) return; - - for (i = 0; i < nr_lr; i++) - if (cpu_if->vgic_lr[i] & GICH_LR_STATE) - live_lrs |= 1UL << i; - - if (live_lrs) { + if (used_lrs) { writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - for (i = 0; i < nr_lr; i++) { - if (!(live_lrs & (1UL << i))) - continue; - + for (i = 0; i < used_lrs; i++) { writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); } } - - writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); - vcpu->arch.vgic_cpu.live_lrs = live_lrs; } #ifdef CONFIG_ARM64 diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 3947095cc0a1cec88e2ad2f5fa3a3e5ea81111c1..87940364570bcbff87cfa4447cb555e96612355b 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -22,7 +22,7 @@ #include #define vtr_to_max_lr_idx(v) ((v) & 0xf) -#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) +#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) { @@ -118,66 +118,32 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } -static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, int nr_lr) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - int i; - bool expect_mi; - - expect_mi = !!(cpu_if->vgic_hcr & ICH_HCR_UIE); - - for (i = 0; i < nr_lr; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; - - expect_mi |= (!(cpu_if->vgic_lr[i] & ICH_LR_HW) && - (cpu_if->vgic_lr[i] & ICH_LR_EOI)); - } - - if (expect_mi) { - cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); - - if (cpu_if->vgic_misr & ICH_MISR_EOI) - cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); - else - cpu_if->vgic_eisr = 0; - } else { - cpu_if->vgic_misr = 0; - cpu_if->vgic_eisr = 0; - } -} - void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 val; /* * Make sure stores to the GIC via the memory mapped interface * are now visible to the system register interface. */ - if (!cpu_if->vgic_sre) + if (!cpu_if->vgic_sre) { dsb(st); + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); - - if (vcpu->arch.vgic_cpu.live_lrs) { + if (used_lrs) { int i; - u32 max_lr_idx, nr_pri_bits; + u32 nr_pre_bits; cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); - nr_pri_bits = vtr_to_nr_pri_bits(val); - - save_maint_int_state(vcpu, max_lr_idx + 1); - - for (i = 0; i <= max_lr_idx; i++) { - if (!(vcpu->arch.vgic_cpu.live_lrs & (1UL << i))) - continue; + nr_pre_bits = vtr_to_nr_pre_bits(val); + for (i = 0; i < used_lrs; i++) { if (cpu_if->vgic_elrsr & (1 << i)) cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; else @@ -186,7 +152,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) __gic_v3_set_lr(0, i); } - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); @@ -196,7 +162,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); } - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); @@ -205,11 +171,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) default: cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); } - - vcpu->arch.vgic_cpu.live_lrs = 0; } else { - cpu_if->vgic_misr = 0; - cpu_if->vgic_eisr = 0; cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; @@ -234,9 +196,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 val; - u32 max_lr_idx, nr_pri_bits; - u16 live_lrs = 0; + u32 nr_pre_bits; int i; /* @@ -245,28 +207,22 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * delivered as a FIQ to the guest, with potentially fatal * consequences. So we must make sure that ICC_SRE_EL1 has * been actually programmed with the value we want before - * starting to mess with the rest of the GIC. + * starting to mess with the rest of the GIC, and VMCR_EL2 in + * particular. */ if (!cpu_if->vgic_sre) { write_gicreg(0, ICC_SRE_EL1); isb(); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); } val = read_gicreg(ICH_VTR_EL2); - max_lr_idx = vtr_to_max_lr_idx(val); - nr_pri_bits = vtr_to_nr_pri_bits(val); + nr_pre_bits = vtr_to_nr_pre_bits(val); - for (i = 0; i <= max_lr_idx; i++) { - if (cpu_if->vgic_lr[i] & ICH_LR_STATE) - live_lrs |= (1 << i); - } - - write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - - if (live_lrs) { + if (used_lrs) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); @@ -276,7 +232,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); } - switch (nr_pri_bits) { + switch (nr_pre_bits) { case 7: write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); @@ -286,12 +242,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); } - for (i = 0; i <= max_lr_idx; i++) { - if (!(live_lrs & (1 << i))) - continue; - + for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); - } } /* @@ -303,7 +255,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) isb(); dsb(sy); } - vcpu->arch.vgic_cpu.live_lrs = live_lrs; /* * Prevent the guest from touching the GIC system registers if @@ -326,3 +277,13 @@ u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } + +u64 __hyp_text __vgic_v3_read_vmcr(void) +{ + return read_gicreg(ICH_VMCR_EL2); +} + +void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +{ + write_gicreg(vmcr, ICH_VMCR_EL2); +} diff --git a/arch/arm/kvm/mmio.c b/virt/kvm/arm/mmio.c similarity index 100% rename from arch/arm/kvm/mmio.c rename to virt/kvm/arm/mmio.c diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c new file mode 100644 index 0000000000000000000000000000000000000000..e2e5effba2a999577edb338fdc0c2cf5c9a1e63f --- /dev/null +++ b/virt/kvm/arm/mmu.c @@ -0,0 +1,1987 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" + +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; +static pgd_t *merged_hyp_pgd; +static DEFINE_MUTEX(kvm_hyp_pgd_mutex); + +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + +#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) +#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + +#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) +#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) + +static bool memslot_is_logging(struct kvm_memory_slot *memslot) +{ + return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); +} + +/** + * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries + */ +void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); +} + +static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); +} + +/* + * D-Cache management functions. They take the page table entries by + * value, as they are flushing the cache using the kernel mapping (or + * kmap on 32bit). + */ +static void kvm_flush_dcache_pte(pte_t pte) +{ + __kvm_flush_dcache_pte(pte); +} + +static void kvm_flush_dcache_pmd(pmd_t pmd) +{ + __kvm_flush_dcache_pmd(pmd); +} + +static void kvm_flush_dcache_pud(pud_t pud) +{ + __kvm_flush_dcache_pud(pud); +} + +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + +/** + * stage2_dissolve_pmd() - clear and flush huge PMD entry + * @kvm: pointer to kvm structure. + * @addr: IPA + * @pmd: pmd pointer for IPA + * + * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all + * pages in the range dirty. + */ +static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +{ + if (!pmd_thp_or_huge(*pmd)) + return; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); +} + +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, + int min, int max) +{ + void *page; + + BUG_ON(max > KVM_NR_MEM_OBJS); + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < max) { + page = (void *)__get_free_page(PGALLOC_GFP); + if (!page) + return -ENOMEM; + cache->objects[cache->nobjs++] = page; + } + return 0; +} + +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + free_page((unsigned long)mc->objects[--mc->nobjs]); +} + +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) +{ + void *p; + + BUG_ON(!mc || !mc->nobjs); + p = mc->objects[--mc->nobjs]; + return p; +} + +static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +{ + pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); + stage2_pgd_clear(pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + stage2_pud_free(pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +{ + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); + VM_BUG_ON(stage2_pud_huge(*pud)); + stage2_pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + stage2_pmd_free(pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +/* + * Unmapping vs dcache management: + * + * If a guest maps certain memory pages as uncached, all writes will + * bypass the data cache and go directly to RAM. However, the CPUs + * can still speculate reads (not writes) and fill cache lines with + * data. + * + * Those cache lines will be *clean* cache lines though, so a + * clean+invalidate operation is equivalent to an invalidate + * operation, because no cache lines are marked dirty. + * + * Those clean cache lines could be filled prior to an uncached write + * by the guest, and the cache coherent IO subsystem would therefore + * end up writing old data to disk. + * + * This is why right after unmapping a page/section and invalidating + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure + * the IO subsystem will never hit in the cache. + */ +static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + pte_t old_pte = *pte; + + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + + /* No need to invalidate the cache for device mappings */ + if (!kvm_is_device_pfn(pte_pfn(old_pte))) + kvm_flush_dcache_pte(old_pte); + + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (stage2_pte_table_empty(start_pte)) + clear_stage2_pmd_entry(kvm, pmd, start_addr); +} + +static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = stage2_pmd_offset(pud, addr); + do { + next = stage2_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { + pmd_t old_pmd = *pmd; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pmd(old_pmd); + + put_page(virt_to_page(pmd)); + } else { + unmap_stage2_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); + + if (stage2_pmd_table_empty(start_pmd)) + clear_stage2_pud_entry(kvm, pud, start_addr); +} + +static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; + + start_pud = pud = stage2_pud_offset(pgd, addr); + do { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + if (stage2_pud_huge(*pud)) { + pud_t old_pud = *pud; + + stage2_pud_clear(pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_flush_dcache_pud(old_pud); + put_page(virt_to_page(pud)); + } else { + unmap_stage2_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); + + if (stage2_pud_table_empty(start_pud)) + clear_stage2_pgd_entry(kvm, pgd, start_addr); +} + +/** + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range + * @kvm: The VM pointer + * @start: The intermediate physical base address of the range to unmap + * @size: The size of the area to unmap + * + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must + * be called while holding mmu_lock (unless for freeing the stage2 pgd before + * destroying the VM), otherwise another faulting VCPU may come in and mess + * with things behind our backs. + */ +static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + assert_spin_locked(&kvm->mmu_lock); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); + do { + /* + * Make sure the page table is still active, as another thread + * could have possibly freed the page table, while we released + * the lock. + */ + if (!READ_ONCE(kvm->arch.pgd)) + break; + next = stage2_pgd_addr_end(addr, end); + if (!stage2_pgd_none(*pgd)) + unmap_stage2_puds(kvm, pgd, addr, next); + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (next != end) + cond_resched_lock(&kvm->mmu_lock); + } while (pgd++, addr = next, addr != end); +} + +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte))) + kvm_flush_dcache_pte(*pte); + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = stage2_pmd_offset(pud, addr); + do { + next = stage2_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) + kvm_flush_dcache_pmd(*pmd); + else + stage2_flush_ptes(kvm, pmd, addr, next); + } + } while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = stage2_pud_offset(pgd, addr); + do { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + if (stage2_pud_huge(*pud)) + kvm_flush_dcache_pud(*pud); + else + stage2_flush_pmds(kvm, pud, addr, next); + } + } while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + stage2_pgd_index(addr); + do { + next = stage2_pgd_addr_end(addr, end); + stage2_flush_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +static void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + +static void clear_hyp_pgd_entry(pgd_t *pgd) +{ + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); + pgd_clear(pgd); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_hyp_pud_entry(pud_t *pud) +{ + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_hyp_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (hyp_pte_table_empty(start_pte)) + clear_hyp_pmd_entry(pmd); +} + +static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* Hyp doesn't use huge pmds */ + if (!pmd_none(*pmd)) + unmap_hyp_ptes(pmd, addr, next); + } while (pmd++, addr = next, addr != end); + + if (hyp_pmd_table_empty(start_pmd)) + clear_hyp_pud_entry(pud); +} + +static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + /* Hyp doesn't use huge puds */ + if (!pud_none(*pud)) + unmap_hyp_pmds(pud, addr, next); + } while (pud++, addr = next, addr != end); + + if (hyp_pud_table_empty(start_pud)) + clear_hyp_pgd_entry(pgd); +} + +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + /* + * We don't unmap anything from HYP, except at the hyp tear down. + * Hence, we don't have to invalidate the TLBs here. + */ + pgd = pgdp + pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + if (!pgd_none(*pgd)) + unmap_hyp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the vmalloc range (from + * VMALLOC_START to VMALLOC_END). + * + * boot_hyp_pgd should only map two pages for the init code. + */ +void free_hyp_pgds(void) +{ + unsigned long addr; + + mutex_lock(&kvm_hyp_pgd_mutex); + + if (boot_hyp_pgd) { + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) { + unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); + + free_pages((unsigned long)hyp_pgd, hyp_pgd_order); + hyp_pgd = NULL; + } + if (merged_hyp_pgd) { + clear_page(merged_hyp_pgd); + free_page((unsigned long)merged_hyp_pgd); + merged_hyp_pgd = NULL; + } + + mutex_unlock(&kvm_hyp_pgd_mutex); +} + +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pte_t *pte; + unsigned long addr; + + addr = start; + do { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + kvm_flush_dcache_to_poc(pte, sizeof(*pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); +} + +static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pmd_t *pmd; + pte_t *pte; + unsigned long addr, next; + + addr = start; + do { + pmd = pmd_offset(pud, addr); + + BUG_ON(pmd_sect(*pmd)); + + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL, addr); + if (!pte) { + kvm_err("Cannot allocate Hyp pte\n"); + return -ENOMEM; + } + pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); + } + + next = pmd_addr_end(addr, end); + + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + +static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pud_t *pud; + pmd_t *pmd; + unsigned long addr, next; + int ret; + + addr = start; + do { + pud = pud_offset(pgd, addr); + + if (pud_none_or_clear_bad(pud)) { + pmd = pmd_alloc_one(NULL, addr); + if (!pmd) { + kvm_err("Cannot allocate Hyp pmd\n"); + return -ENOMEM; + } + pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); + } + + next = pud_addr_end(addr, end); + ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); + if (ret) + return ret; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + +static int __create_hyp_mappings(pgd_t *pgdp, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + pgd_t *pgd; + pud_t *pud; + unsigned long addr, next; + int err = 0; + + mutex_lock(&kvm_hyp_pgd_mutex); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + pgd_index(addr); + + if (pgd_none(*pgd)) { + pud = pud_alloc_one(NULL, addr); + if (!pud) { + kvm_err("Cannot allocate Hyp pud\n"); + err = -ENOMEM; + goto out; + } + pgd_populate(NULL, pgd, pud); + get_page(virt_to_page(pgd)); + kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); + } + + next = pgd_addr_end(addr, end); + err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); + if (err) + goto out; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); +out: + mutex_unlock(&kvm_hyp_pgd_mutex); + return err; +} + +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + +/** + * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode + * @from: The virtual kernel start address of the range + * @to: The virtual kernel end address of the range (exclusive) + * @prot: The protection to be applied to this range + * + * The same virtual address as the kernel virtual address is also used + * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying + * physical pages. + */ +int create_hyp_mappings(void *from, void *to, pgprot_t prot) +{ + phys_addr_t phys_addr; + unsigned long virt_addr; + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); + + if (is_kernel_in_hyp_mode()) + return 0; + + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); + + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, virt_addr, + virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + prot); + if (err) + return err; + } + + return 0; +} + +/** + * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode + * @from: The kernel start VA of the range + * @to: The kernel end VA of the range (exclusive) + * @phys_addr: The physical start address which gets mapped + * + * The resulting HYP VA is the same as the kernel VA, modulo + * HYP_PAGE_OFFSET. + */ +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) +{ + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); + + if (is_kernel_in_hyp_mode()) + return 0; + + /* Check for a valid kernel IO mapping */ + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); +} + +/** + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. + * @kvm: The KVM struct pointer for the VM. + * + * Allocates only the stage-2 HW PGD level table(s) (can support either full + * 40-bit input addresses or limited to 32-bit input addresses). Clears the + * allocated pages. + * + * Note we don't need locking here as this is only called when the VM is + * created, which can only be done once. + */ +int kvm_alloc_stage2_pgd(struct kvm *kvm) +{ + pgd_t *pgd; + + if (kvm->arch.pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + + /* Allocate the HW PGD, making sure that each page gets its own refcount */ + pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); + if (!pgd) + return -ENOMEM; + + kvm->arch.pgd = pgd; + return 0; +} + +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any reguler RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + down_read(¤t->mm->mmap_sem); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + up_read(¤t->mm->mmap_sem); + srcu_read_unlock(&kvm->srcu, idx); +} + +/** + * kvm_free_stage2_pgd - free all stage-2 tables + * @kvm: The KVM struct pointer for the VM. + * + * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all + * underlying level-2 and level-3 tables before freeing the actual level-1 table + * and setting the struct pointer to NULL. + */ +void kvm_free_stage2_pgd(struct kvm *kvm) +{ + void *pgd = NULL; + + spin_lock(&kvm->mmu_lock); + if (kvm->arch.pgd) { + unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + pgd = READ_ONCE(kvm->arch.pgd); + kvm->arch.pgd = NULL; + } + spin_unlock(&kvm->mmu_lock); + + /* Free the HW pgd, one page at a time */ + if (pgd) + free_pages_exact(pgd, S2_PGD_SIZE); +} + +static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = kvm->arch.pgd + stage2_pgd_index(addr); + if (WARN_ON(stage2_pgd_none(*pgd))) { + if (!cache) + return NULL; + pud = mmu_memory_cache_alloc(cache); + stage2_pgd_populate(pgd, pud); + get_page(virt_to_page(pgd)); + } + + return stage2_pud_offset(pgd, addr); +} + +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = stage2_get_pud(kvm, cache, addr); + if (!pud) + return NULL; + + if (stage2_pud_none(*pud)) { + if (!cache) + return NULL; + pmd = mmu_memory_cache_alloc(cache); + stage2_pud_populate(pud, pmd); + get_page(virt_to_page(pud)); + } + + return stage2_pmd_offset(pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + /* + * Mapping in huge pages should only happen through a fault. If a + * page is merged into a transparent huge page, the individual + * subpages of that huge page should be unmapped through MMU + * notifiers before we get here. + * + * Merging of CompoundPages is not supported; they should become + * splitting first, unmapped, merged, and mapped back in on-demand. + */ + VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); + + old_pmd = *pmd; + if (pmd_present(old_pmd)) { + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pmd)); + } + + kvm_set_pmd(pmd, *new_pmd); + return 0; +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, + unsigned long flags) +{ + pmd_t *pmd; + pte_t *pte, old_pte; + bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; + bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; + + VM_BUG_ON(logging_active && !cache); + + /* Create stage-2 page table mapping - Levels 0 and 1 */ + pmd = stage2_get_pmd(kvm, cache, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* + * While dirty page logging - dissolve huge PMD, then continue on to + * allocate page. + */ + if (logging_active) + stage2_dissolve_pmd(kvm, addr, pmd); + + /* Create stage-2 page mappings - Level 2 */ + if (pmd_none(*pmd)) { + if (!cache) + return 0; /* ignore calls from kvm_set_spte_hva */ + pte = mmu_memory_cache_alloc(cache); + pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); + } + + pte = pte_offset_kernel(pmd, addr); + + if (iomap && pte_present(*pte)) + return -EFAULT; + + /* Create 2nd stage page table mapping - Level 3 */ + old_pte = *pte; + if (pte_present(old_pte)) { + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pte)); + } + + kvm_set_pte(pte, *new_pte); + return 0; +} + +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); + return 1; + } + return 0; +} +#else +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + return __ptep_test_and_clear_young(pte); +} +#endif + +static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pmd); +} + +/** + * kvm_phys_addr_ioremap - map a device range to guest IPA + * + * @kvm: The KVM pointer + * @guest_ipa: The IPA at which to insert the mapping + * @pa: The physical address of the device + * @size: The size of the mapping + */ +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size, bool writable) +{ + phys_addr_t addr, end; + int ret = 0; + unsigned long pfn; + struct kvm_mmu_memory_cache cache = { 0, }; + + end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; + pfn = __phys_to_pfn(pa); + + for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { + pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); + + if (writable) + pte = kvm_s2pte_mkwrite(pte); + + ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); + if (ret) + goto out; + spin_lock(&kvm->mmu_lock); + ret = stage2_set_pte(kvm, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); + spin_unlock(&kvm->mmu_lock); + if (ret) + goto out; + + pfn++; + } + +out: + mmu_free_memory_cache(&cache); + return ret; +} + +static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) +{ + kvm_pfn_t pfn = *pfnp; + gfn_t gfn = *ipap >> PAGE_SHIFT; + + if (PageTransCompoundMap(pfn_to_page(pfn))) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + +static bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + +/** + * stage2_wp_ptes - write protect PMD range + * @pmd: pointer to pmd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + if (!kvm_s2pte_readonly(pte)) + kvm_set_s2pte_readonly(pte); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +/** + * stage2_wp_pmds - write protect PUD range + * @pud: pointer to pud entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = stage2_pmd_offset(pud, addr); + + do { + next = stage2_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { + if (!kvm_s2pmd_readonly(pmd)) + kvm_set_s2pmd_readonly(pmd); + } else { + stage2_wp_ptes(pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +/** + * stage2_wp_puds - write protect PGD range + * @pgd: pointer to pgd entry + * @addr: range start address + * @end: range end address + * + * Process PUD entries, for a huge PUD we cause a panic. + */ +static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = stage2_pud_offset(pgd, addr); + do { + next = stage2_pud_addr_end(addr, end); + if (!stage2_pud_none(*pud)) { + /* TODO:PUD not supported, revisit later if supported */ + BUG_ON(stage2_pud_huge(*pud)); + stage2_wp_pmds(pud, addr, next); + } + } while (pud++, addr = next, addr != end); +} + +/** + * stage2_wp_range() - write protect stage2 memory region range + * @kvm: The KVM pointer + * @addr: Start address of range + * @end: End address of range + */ +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +{ + pgd_t *pgd; + phys_addr_t next; + + pgd = kvm->arch.pgd + stage2_pgd_index(addr); + do { + /* + * Release kvm_mmu_lock periodically if the memory region is + * large. Otherwise, we may see kernel panics with + * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, + * CONFIG_LOCKDEP. Additionally, holding the lock too long + * will also starve other vCPUs. We have to also make sure + * that the page tables are not freed while we released + * the lock. + */ + cond_resched_lock(&kvm->mmu_lock); + if (!READ_ONCE(kvm->arch.pgd)) + break; + next = stage2_pgd_addr_end(addr, end); + if (stage2_pgd_present(*pgd)) + stage2_wp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot + * @kvm: The KVM pointer + * @slot: The memory slot to write protect + * + * Called to start logging dirty pages after memory region + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns + * all present PMD and PTEs are write protected in the memory region. + * Afterwards read of dirty page log can be called. + * + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, + * serializing operations for VM memory regions. + */ +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_wp_range(kvm, start, end); + spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); +} + +/** + * kvm_mmu_write_protect_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire kvm_mmu_lock. + */ +static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + phys_addr_t base_gfn = slot->base_gfn + gfn_offset; + phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + + stage2_wp_range(kvm, start, end); +} + +/* + * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected + * dirty pages. + * + * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to + * enable dirty logging for them. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); +} + +static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, + unsigned long size) +{ + __coherent_cache_guest_page(vcpu, pfn, size); +} + +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_memory_slot *memslot, unsigned long hva, + unsigned long fault_status) +{ + int ret; + bool write_fault, writable, hugetlb = false, force_pte = false; + unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + kvm_pfn_t pfn; + pgprot_t mem_type = PAGE_S2; + bool logging_active = memslot_is_logging(memslot); + unsigned long flags = 0; + + write_fault = kvm_is_write_fault(vcpu); + if (fault_status == FSC_PERM && !write_fault) { + kvm_err("Unexpected L2 read permission error\n"); + return -EFAULT; + } + + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (unlikely(!vma)) { + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + + if (is_vm_hugetlb_page(vma) && !logging_active) { + hugetlb = true; + gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; + } else { + /* + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. + */ + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) + force_pte = true; + } + up_read(¤t->mm->mmap_sem); + + /* We need minimum second+third level pages */ + ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); + if (ret) + return ret; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* + * Ensure the read of mmu_notifier_seq happens before we call + * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk + * the page we just got a reference to gets unmapped before we have a + * chance to grab the mmu_lock, which ensure that if the page gets + * unmapped afterwards, the call to kvm_unmap_hva will take it away + * from us again properly. This smp_rmb() interacts with the smp_wmb() + * in kvm_mmu_notifier_invalidate_. + */ + smp_rmb(); + + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); + if (is_error_noslot_pfn(pfn)) + return -EFAULT; + + if (kvm_is_device_pfn(pfn)) { + mem_type = PAGE_S2_DEVICE; + flags |= KVM_S2PTE_FLAG_IS_IOMAP; + } else if (logging_active) { + /* + * Faults on pages in a memslot with logging enabled + * should not be mapped with huge pages (it introduces churn + * and performance degradation), so force a pte mapping. + */ + force_pte = true; + flags |= KVM_S2_FLAG_LOGGING_ACTIVE; + + /* + * Only actually map the page as writable if this was a write + * fault. + */ + if (!write_fault) + writable = false; + } + + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + + if (!hugetlb && !force_pte) + hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); + + if (hugetlb) { + pmd_t new_pmd = pfn_pmd(pfn, mem_type); + new_pmd = pmd_mkhuge(new_pmd); + if (writable) { + new_pmd = kvm_s2pmd_mkwrite(new_pmd); + kvm_set_pfn_dirty(pfn); + } + coherent_cache_guest_page(vcpu, pfn, PMD_SIZE); + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, mem_type); + + if (writable) { + new_pte = kvm_s2pte_mkwrite(new_pte); + kvm_set_pfn_dirty(pfn); + mark_page_dirty(kvm, gfn); + } + coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE); + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + } + +out_unlock: + spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); + kvm_release_pfn_clean(pfn); + return ret; +} + +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), + * so there is no need for atomic (pte|pmd)_mkyoung operations. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pmd_t *pmd; + pte_t *pte; + kvm_pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + goto out; + + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + goto out; + } + + pte = pte_offset_kernel(pmd, fault_ipa); + if (pte_none(*pte)) /* Nothing there either */ + goto out; + + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + +/** + * kvm_handle_guest_abort - handles all 2nd stage aborts + * @vcpu: the VCPU pointer + * @run: the kvm_run structure + * + * Any abort that gets to the host is almost guaranteed to be caused by a + * missing second stage translation table entry, which can mean that either the + * guest simply needs more memory and we must allocate an appropriate page or it + * can mean that the guest tried to access I/O memory, which is emulated by user + * space. The distinction is based on the IPA causing the fault and whether this + * memory region has been registered as standard RAM by user space. + */ +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long fault_status; + phys_addr_t fault_ipa; + struct kvm_memory_slot *memslot; + unsigned long hva; + bool is_iabt, write_fault, writable; + gfn_t gfn; + int ret, idx; + + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { + kvm_inject_vabt(vcpu); + return 1; + } + + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + kvm_vcpu_get_hfar(vcpu), fault_ipa); + + /* Check the stage-2 fault is trans. fault or write fault */ + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; + } + + idx = srcu_read_lock(&vcpu->kvm->srcu); + + gfn = fault_ipa >> PAGE_SHIFT; + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(vcpu); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + if (is_iabt) { + /* Prefetch Abort on I/O address */ + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + goto out_unlock; + } + + /* + * Check for a cache maintenance operation. Since we + * ended-up here, we know it is outside of any memory + * slot. But we can't find out if that is for a device, + * or if the guest is just being stupid. The only thing + * we know for sure is that this range cannot be cached. + * + * So let's assume that the guest is just being + * cautious, and skip the instruction. + */ + if (kvm_vcpu_dabt_is_cm(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + ret = 1; + goto out_unlock; + } + + /* + * The IPA is reported as [MAX:12], so we need to + * complement it with the bottom 12 bits from the + * faulting VA. This is always 12 bits, irrespective + * of the page size. + */ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); + ret = io_mem_abort(vcpu, run, fault_ipa); + goto out_unlock; + } + + /* Userspace should not be able to register out-of-bounds IPAs */ + VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); + if (ret == 0) + ret = 1; +out_unlock: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} + +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, u64 size, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gpa; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT; + ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data); + } + + return ret; +} + +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + unmap_stage2_range(kvm, gpa, size); + return 0; +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + unsigned long end = hva + PAGE_SIZE; + + if (!kvm->arch.pgd) + return 0; + + trace_kvm_unmap_hva(hva); + handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + + trace_kvm_unmap_hva_range(start, end); + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + return 0; +} + +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pte_t *pte = (pte_t *)data; + + WARN_ON(size != PAGE_SIZE); + /* + * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE + * flag clear because MMU notifiers will have unmapped a huge PMD before + * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and + * therefore stage2_set_pte() never needs to clear out a huge PMD + * through this calling path. + */ + stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; +} + + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + pte_t stage2_pte; + + if (!kvm->arch.pgd) + return; + + trace_kvm_set_spte_hva(hva); + stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); + handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); +} + +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE); + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ + return stage2_pmdp_test_and_clear_young(pmd); + + pte = pte_offset_kernel(pmd, gpa); + if (pte_none(*pte)) + return 0; + + return stage2_ptep_test_and_clear_young(pte); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE); + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ + return pmd_young(*pmd); + + pte = pte_offset_kernel(pmd, gpa); + if (!pte_none(*pte)) /* Just a page... */ + return pte_young(*pte); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +phys_addr_t kvm_mmu_get_httbr(void) +{ + if (__kvm_cpu_uses_extended_idmap()) + return virt_to_phys(merged_hyp_pgd); + else + return virt_to_phys(hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + +static int kvm_map_idmap_text(pgd_t *pgd) +{ + int err; + + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP_EXEC); + if (err) + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + + return err; +} + +int kvm_mmu_init(void) +{ + int err; + + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); + + /* + * We rely on the linker script to ensure at build time that the HYP + * init code does not cross a page boundary. + */ + BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); + + kvm_info("IDMAP page: %lx\n", hyp_idmap_start); + kvm_info("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + + if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && + hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { + /* + * The idmap page is intersecting with the VA space, + * it is not safe to continue further. + */ + kvm_err("IDMAP intersecting with HYP VA, unable to continue\n"); + err = -EINVAL; + goto out; + } + + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + if (!hyp_pgd) { + kvm_err("Hyp mode PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + if (__kvm_cpu_uses_extended_idmap()) { + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + hyp_pgd_order); + if (!boot_hyp_pgd) { + kvm_err("Hyp boot PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + err = kvm_map_idmap_text(boot_hyp_pgd); + if (err) + goto out; + + merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!merged_hyp_pgd) { + kvm_err("Failed to allocate extra HYP pgd\n"); + goto out; + } + __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd, + hyp_idmap_start); + } else { + err = kvm_map_idmap_text(hyp_pgd); + if (err) + goto out; + } + + return 0; +out: + free_hyp_pgds(); + return err; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be be tracked while the + * memory slot is write protected. + */ + if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) + kvm_mmu_wp_memory_region(kvm, mem->slot); +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + hva_t hva = mem->userspace_addr; + hva_t reg_end = hva + mem->memory_size; + bool writable = !(mem->flags & KVM_MEM_READONLY); + int ret = 0; + + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && + change != KVM_MR_FLAGS_ONLY) + return 0; + + /* + * Prevent userspace from creating a memory region outside of the IPA + * space addressable by the KVM guest IPA space. + */ + if (memslot->base_gfn + memslot->npages >= + (KVM_PHYS_SIZE >> PAGE_SHIFT)) + return -EFAULT; + + down_read(¤t->mm->mmap_sem); + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we can map + * any of them right now. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Mapping a read-only VMA is only allowed if the + * memory region is configured as read-only. + */ + if (writable && !(vma->vm_flags & VM_WRITE)) { + ret = -EPERM; + break; + } + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (vma->vm_flags & VM_PFNMAP) { + gpa_t gpa = mem->guest_phys_addr + + (vm_start - mem->userspace_addr); + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; + + /* IO region dirty page logging not allowed */ + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EINVAL; + goto out; + } + + ret = kvm_phys_addr_ioremap(kvm, gpa, pa, + vm_end - vm_start, + writable); + if (ret) + break; + } + hva = vm_end; + } while (hva < reg_end); + + if (change == KVM_MR_FLAGS_ONLY) + goto out; + + spin_lock(&kvm->mmu_lock); + if (ret) + unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); +out: + up_read(¤t->mm->mmap_sem); + return ret; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + kvm_free_stage2_pgd(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; + phys_addr_t size = slot->npages << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); +} + +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + * + * Main problems: + * - S/W ops are local to a CPU (not broadcast) + * - We have line migration behind our back (speculation) + * - System caches don't support S/W at all (damn!) + * + * In the face of the above, the best we can do is to try and convert + * S/W ops to VA ops. Because the guest is not allowed to infer the + * S/W to PA mapping, it can only use S/W to nuke the whole cache, + * which is a rather good thing for us. + * + * Also, it is only used when turning caches on/off ("The expected + * usage of the cache maintenance instructions that operate by set/way + * is associated with the cache maintenance instructions associated + * with the powerdown and powerup of caches, if this is required by + * the implementation."). + * + * We use the following policy: + * + * - If we trap a S/W operation, we enable VM trapping to detect + * caches being turned on/off, and do a full clean. + * + * - We flush the caches on both caches being turned on and off. + * + * - Once the caches are enabled, we stop trapping VM ops. + */ +void kvm_set_way_flush(struct kvm_vcpu *vcpu) +{ + unsigned long hcr = vcpu_get_hcr(vcpu); + + /* + * If this is the first time we do a S/W operation + * (i.e. HCR_TVM not set) flush the whole memory, and set the + * VM trapping. + * + * Otherwise, rely on the VM trapping to wait for the MMU + + * Caches to be turned off. At that point, we'll be able to + * clean the caches again. + */ + if (!(hcr & HCR_TVM)) { + trace_kvm_set_way_flush(*vcpu_pc(vcpu), + vcpu_has_cache_enabled(vcpu)); + stage2_flush_vm(vcpu->kvm); + vcpu_set_hcr(vcpu, hcr | HCR_TVM); + } +} + +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) +{ + bool now_enabled = vcpu_has_cache_enabled(vcpu); + + /* + * If switching the MMU+caches on, need to invalidate the caches. + * If switching it off, need to clean the caches. + * Clean + invalidate does the trick always. + */ + if (now_enabled != was_enabled) + stage2_flush_vm(vcpu->kvm); + + /* Caches are now on, stop trapping VM ops (until a S/W op) */ + if (now_enabled) + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); + + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); +} diff --git a/arch/arm/kvm/perf.c b/virt/kvm/arm/perf.c similarity index 100% rename from arch/arm/kvm/perf.c rename to virt/kvm/arm/perf.c diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 69ccce308458a4c3de79ab1b6ffceee0f2cfb47d..4b43e7f3b15801916d35e60101938e79df41b26c 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -230,13 +230,44 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) return; overflow = !!kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level != overflow) { - pmu->irq_level = overflow; - kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret; + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow); + WARN_ON(ret); } } +bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + return pmu->irq_level != run_level; +} + +/* + * Reflect the PMU overflow interrupt output level into the kvm_run structure + */ +void kvm_pmu_update_run(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the timer bitmap for user space */ + regs->device_irq_level &= ~KVM_ARM_DEV_PMU; + if (vcpu->arch.pmu.irq_level) + regs->device_irq_level |= KVM_ARM_DEV_PMU; +} + /** * kvm_pmu_flush_hwstate - flush pmu state to cpu * @vcpu: The vcpu pointer diff --git a/arch/arm/kvm/psci.c b/virt/kvm/arm/psci.c similarity index 98% rename from arch/arm/kvm/psci.c rename to virt/kvm/arm/psci.c index c2b131527a643f6e6808b78f8c53e9751d0f71a9..a08d7a93aebbece7777f042ec3881ad088104a4d 100644 --- a/arch/arm/kvm/psci.c +++ b/virt/kvm/arm/psci.c @@ -208,9 +208,10 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - int ret = 1; + struct kvm *kvm = vcpu->kvm; unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); unsigned long val; + int ret = 1; switch (psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: @@ -230,7 +231,9 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; case PSCI_0_2_FN_CPU_ON: case PSCI_0_2_FN64_CPU_ON: + mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN64_AFFINITY_INFO: @@ -279,6 +282,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -288,7 +292,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: + mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 37d8b98867d5a04fd814ac89be30c1a44e0f213c..f7dc5ddd6847ba09a6ced07403cba502bb4f8e4b 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -7,26 +7,250 @@ #define TRACE_SYSTEM kvm /* - * Tracepoints for vgic + * Tracepoints for entry/exit to guest */ -TRACE_EVENT(vgic_update_irq_pending, - TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), - TP_ARGS(vcpu_id, irq, level), +TRACE_EVENT(kvm_entry, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), TP_STRUCT__entry( - __field( unsigned long, vcpu_id ) - __field( __u32, irq ) - __field( bool, level ) + __field( unsigned long, vcpu_pc ) ), TP_fast_assign( - __entry->vcpu_id = vcpu_id; - __entry->irq = irq; + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc), + TP_ARGS(idx, exit_reason, vcpu_pc), + + TP_STRUCT__entry( + __field( int, idx ) + __field( unsigned int, exit_reason ) + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->idx = idx; + __entry->exit_reason = exit_reason; + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", + __print_symbolic(__entry->idx, kvm_arm_exception_type), + __entry->exit_reason, + __print_symbolic(__entry->exit_reason, kvm_arm_exception_class), + __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_guest_fault, + TP_PROTO(unsigned long vcpu_pc, unsigned long hsr, + unsigned long hxfar, + unsigned long long ipa), + TP_ARGS(vcpu_pc, hsr, hxfar, ipa), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, hsr ) + __field( unsigned long, hxfar ) + __field( unsigned long long, ipa ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->hsr = hsr; + __entry->hxfar = hxfar; + __entry->ipa = ipa; + ), + + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + +TRACE_EVENT(kvm_irq_line, + TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), + TP_ARGS(type, vcpu_idx, irq_num, level), + + TP_STRUCT__entry( + __field( unsigned int, type ) + __field( int, vcpu_idx ) + __field( int, irq_num ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->vcpu_idx = vcpu_idx; + __entry->irq_num = irq_num; __entry->level = level; ), - TP_printk("VCPU: %ld, IRQ %d, level: %d", - __entry->vcpu_id, __entry->irq, __entry->level) + TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d", + (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" : + (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" : + (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN", + __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level) +); + +TRACE_EVENT(kvm_mmio_emulate, + TP_PROTO(unsigned long vcpu_pc, unsigned long instr, + unsigned long cpsr), + TP_ARGS(vcpu_pc, instr, cpsr), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, instr ) + __field( unsigned long, cpsr ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->instr = instr; + __entry->cpsr = cpsr; + ), + + TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", + __entry->vcpu_pc, __entry->instr, __entry->cpsr) +); + +TRACE_EVENT(kvm_unmap_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_unmap_hva_range, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier unmap range: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_set_spte_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_set_way_flush, + TP_PROTO(unsigned long vcpu_pc, bool cache), + TP_ARGS(vcpu_pc, cache), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, cache ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->cache = cache; + ), + + TP_printk("S/W flush at 0x%016lx (cache %s)", + __entry->vcpu_pc, __entry->cache ? "on" : "off") +); + +TRACE_EVENT(kvm_toggle_cache, + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), + TP_ARGS(vcpu_pc, was, now), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, was ) + __field( bool, now ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->was = was; + __entry->now = now; + ), + + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", + __entry->vcpu_pc, __entry->was ? "on" : "off", + __entry->now ? "on" : "off") ); /* diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h new file mode 100644 index 0000000000000000000000000000000000000000..ed3229282888680e6d8af1ad9fa86540db42156f --- /dev/null +++ b/virt/kvm/arm/vgic/trace.h @@ -0,0 +1,37 @@ +#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VGIC_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_VGIC_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 702f8108608d053d43a6eff18ef8be84c3589994..3a0b8999f011c6e3e1ff6ea7699577879a651fd3 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -24,7 +24,12 @@ /* * Initialization rules: there are multiple stages to the vgic - * initialization, both for the distributor and the CPU interfaces. + * initialization, both for the distributor and the CPU interfaces. The basic + * idea is that even though the VGIC is not functional or not requested from + * user space, the critical path of the run loop can still call VGIC functions + * that just won't do anything, without them having to check additional + * initialization flags to ensure they don't look at uninitialized data + * structures. * * Distributor: * @@ -39,23 +44,67 @@ * * CPU Interface: * - * - kvm_vgic_cpu_early_init(): initialization of static data that + * - kvm_vgic_vcpu_early_init(): initialization of static data that * doesn't depend on any sizing information or emulation type. No * allocation is allowed there. */ /* EARLY INIT */ -/* - * Those 2 functions should not be needed anymore but they - * still are called from arm.c +/** + * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures + * @kvm: The VM whose VGIC districutor should be initialized + * + * Only do initialization of static structures that don't require any + * allocation or sizing information from userspace. vgic_init() called + * kvm_vgic_dist_init() which takes care of the rest. */ void kvm_vgic_early_init(struct kvm *kvm) { + struct vgic_dist *dist = &kvm->arch.vgic; + + INIT_LIST_HEAD(&dist->lpi_list_head); + spin_lock_init(&dist->lpi_list_lock); } +/** + * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures + * @vcpu: The VCPU whose VGIC data structures whould be initialized + * + * Only do initialization, but do not actually enable the VGIC CPU interface + * yet. + */ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int i; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + spin_lock_init(&vgic_cpu->ap_list_lock); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + irq->targets = 1U << vcpu->vcpu_id; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } } /* CREATION */ @@ -148,9 +197,6 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; - INIT_LIST_HEAD(&dist->lpi_list_head); - spin_lock_init(&dist->lpi_list_lock); - dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); if (!dist->spis) return -ENOMEM; @@ -181,41 +227,31 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) } /** - * kvm_vgic_vcpu_init: initialize the vcpu data structures and - * enable the VCPU interface - * @vcpu: the VCPU which's VGIC should be initialized + * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs + * @vcpu: pointer to the VCPU being created and initialized */ -static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int i; + int ret = 0; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - spin_lock_init(&vgic_cpu->ap_list_lock); + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; /* - * Enable and configure all SGIs to be edge-triggered and - * configure all PPIs as level-triggered. + * If we are creating a VCPU with a GICv3 we must also register the + * KVM io device for the redistributor that belongs to this VCPU. */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - irq->intid = i; - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; - kref_init(&irq->refcount); - if (vgic_irq_is_sgi(i)) { - /* SGIs */ - irq->enabled = 1; - irq->config = VGIC_CONFIG_EDGE; - } else { - /* PPIs */ - irq->config = VGIC_CONFIG_LEVEL; - } + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + mutex_lock(&vcpu->kvm->lock); + ret = vgic_register_redist_iodev(vcpu); + mutex_unlock(&vcpu->kvm->lock); } + return ret; +} + +static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) +{ if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_enable(vcpu); else @@ -253,7 +289,7 @@ int vgic_init(struct kvm *kvm) dist->msis_require_devid = true; kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_init(vcpu); + kvm_vgic_vcpu_enable(vcpu); ret = kvm_vgic_setup_default_irq_routing(kvm); if (ret) @@ -262,6 +298,18 @@ int vgic_init(struct kvm *kvm) vgic_debug_init(kvm); dist->initialized = true; + + /* + * If we're initializing GICv2 on-demand when first running the VCPU + * then we need to load the VGIC state onto the CPU. We can detect + * this easily by checking if we are in between vcpu_load and vcpu_put + * when we just initialized the VGIC. + */ + preempt_disable(); + vcpu = kvm_arm_get_running_vcpu(); + if (vcpu) + kvm_vgic_load(vcpu); + preempt_enable(); out: return ret; } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 8d1da1af4b09e47c174cf7151b37b98666f03953..2dff288b3a668e401d924e52cee10b9e85ecd8df 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -33,6 +34,12 @@ #include "vgic.h" #include "vgic-mmio.h" +static int vgic_its_save_tables_v0(struct vgic_its *its); +static int vgic_its_restore_tables_v0(struct vgic_its *its); +static int vgic_its_commit_v0(struct vgic_its *its); +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu); + /* * Creates a new (reference to a) struct vgic_irq for a given LPI. * If this LPI is already mapped on another ITS, we increase its refcount @@ -40,10 +47,12 @@ * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. * This function returns a pointer to the _unlocked_ structure. */ -static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) +static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, + struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + int ret; /* In this case there is no put, since we keep the reference. */ if (irq) @@ -60,6 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) irq->config = VGIC_CONFIG_EDGE; kref_init(&irq->refcount); irq->intid = intid; + irq->target_vcpu = vcpu; spin_lock(&dist->lpi_list_lock); @@ -91,6 +101,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) out_unlock: spin_unlock(&dist->lpi_list_lock); + /* + * We "cache" the configuration table entries in our struct vgic_irq's. + * However we only have those structs for mapped IRQs, so we read in + * the respective config data from memory here upon mapping the LPI. + */ + ret = update_lpi_config(kvm, irq, NULL); + if (ret) + return ERR_PTR(ret); + + ret = vgic_v3_lpi_sync_pending_status(kvm, irq); + if (ret) + return ERR_PTR(ret); + return irq; } @@ -99,6 +122,8 @@ struct its_device { /* the head for the list of ITTEs */ struct list_head itt_head; + u32 num_eventid_bits; + gpa_t itt_addr; u32 device_id; }; @@ -114,8 +139,8 @@ struct its_collection { #define its_is_collection_mapped(coll) ((coll) && \ ((coll)->target_addr != COLLECTION_NOT_MAPPED)) -struct its_itte { - struct list_head itte_list; +struct its_ite { + struct list_head ite_list; struct vgic_irq *irq; struct its_collection *collection; @@ -123,6 +148,50 @@ struct its_itte { u32 event_id; }; +/** + * struct vgic_its_abi - ITS abi ops and settings + * @cte_esz: collection table entry size + * @dte_esz: device table entry size + * @ite_esz: interrupt translation table entry size + * @save tables: save the ITS tables into guest RAM + * @restore_tables: restore the ITS internal structs from tables + * stored in guest RAM + * @commit: initialize the registers which expose the ABI settings, + * especially the entry sizes + */ +struct vgic_its_abi { + int cte_esz; + int dte_esz; + int ite_esz; + int (*save_tables)(struct vgic_its *its); + int (*restore_tables)(struct vgic_its *its); + int (*commit)(struct vgic_its *its); +}; + +static const struct vgic_its_abi its_table_abi_versions[] = { + [0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8, + .save_tables = vgic_its_save_tables_v0, + .restore_tables = vgic_its_restore_tables_v0, + .commit = vgic_its_commit_v0, + }, +}; + +#define NR_ITS_ABIS ARRAY_SIZE(its_table_abi_versions) + +inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its) +{ + return &its_table_abi_versions[its->abi_rev]; +} + +int vgic_its_set_abi(struct vgic_its *its, int rev) +{ + const struct vgic_its_abi *abi; + + its->abi_rev = rev; + abi = vgic_its_get_abi(its); + return abi->commit(its); +} + /* * Find and returns a device in the device table for an ITS. * Must be called with the its_lock mutex held. @@ -143,27 +212,27 @@ static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) * Device ID/Event ID pair on an ITS. * Must be called with the its_lock mutex held. */ -static struct its_itte *find_itte(struct vgic_its *its, u32 device_id, +static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, u32 event_id) { struct its_device *device; - struct its_itte *itte; + struct its_ite *ite; device = find_its_device(its, device_id); if (device == NULL) return NULL; - list_for_each_entry(itte, &device->itt_head, itte_list) - if (itte->event_id == event_id) - return itte; + list_for_each_entry(ite, &device->itt_head, ite_list) + if (ite->event_id == event_id) + return ite; return NULL; } /* To be used as an iterator this macro misses the enclosing parentheses */ -#define for_each_lpi_its(dev, itte, its) \ +#define for_each_lpi_its(dev, ite, its) \ list_for_each_entry(dev, &(its)->device_list, dev_list) \ - list_for_each_entry(itte, &(dev)->itt_head, itte_list) + list_for_each_entry(ite, &(dev)->itt_head, ite_list) /* * We only implement 48 bits of PA at the moment, although the ITS @@ -171,11 +240,14 @@ static struct its_itte *find_itte(struct vgic_its *its, u32 device_id, */ #define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) #define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) -#define PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) -#define PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) #define GIC_LPI_OFFSET 8192 +#define VITS_TYPER_IDBITS 16 +#define VITS_TYPER_DEVBITS 16 +#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1) +#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1) + /* * Finds and returns a collection in the ITS collection table. * Must be called with the its_lock mutex held. @@ -204,7 +276,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id) static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, struct kvm_vcpu *filter_vcpu) { - u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); + u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); u8 prop; int ret; @@ -229,13 +301,13 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, } /* - * Create a snapshot of the current LPI list, so that we can enumerate all - * LPIs without holding any lock. - * Returns the array length and puts the kmalloc'ed array into intid_ptr. + * Create a snapshot of the current LPIs targeting @vcpu, so that we can + * enumerate those LPIs without holding any lock. + * Returns their number and puts the kmalloc'ed array into intid_ptr. */ -static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) +static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) { - struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_irq *irq; u32 *intids; int irq_count = dist->lpi_list_count, i = 0; @@ -254,14 +326,14 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) spin_lock(&dist->lpi_list_lock); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { /* We don't need to "get" the IRQ, as we hold the list lock. */ - intids[i] = irq->intid; - if (++i == irq_count) - break; + if (irq->target_vcpu != vcpu) + continue; + intids[i++] = irq->intid; } spin_unlock(&dist->lpi_list_lock); *intid_ptr = intids; - return irq_count; + return i; } /* @@ -270,18 +342,18 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) * Needs to be called whenever either the collection for a LPIs has * changed or the collection itself got retargeted. */ -static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte) +static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) { struct kvm_vcpu *vcpu; - if (!its_is_collection_mapped(itte->collection)) + if (!its_is_collection_mapped(ite->collection)) return; - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); - spin_lock(&itte->irq->irq_lock); - itte->irq->target_vcpu = vcpu; - spin_unlock(&itte->irq->irq_lock); + spin_lock(&ite->irq->irq_lock); + ite->irq->target_vcpu = vcpu; + spin_unlock(&ite->irq->irq_lock); } /* @@ -292,13 +364,13 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, struct its_collection *coll) { struct its_device *device; - struct its_itte *itte; + struct its_ite *ite; - for_each_lpi_its(device, itte, its) { - if (!itte->collection || coll != itte->collection) + for_each_lpi_its(device, ite, its) { + if (!ite->collection || coll != ite->collection) continue; - update_affinity_itte(kvm, itte); + update_affinity_ite(kvm, ite); } } @@ -310,20 +382,20 @@ static u32 max_lpis_propbaser(u64 propbaser) } /* - * Scan the whole LPI pending table and sync the pending bit in there + * Sync the pending table pending bit of LPIs targeting @vcpu * with our own data structures. This relies on the LPI being * mapped before. */ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) { - gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); struct vgic_irq *irq; int last_byte_offset = -1; int ret = 0; u32 *intids; int nr_irqs, i; - nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); + nr_irqs = vgic_copy_lpi_list(vcpu, &intids); if (nr_irqs < 0) return nr_irqs; @@ -364,6 +436,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) { + const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 reg = GITS_TYPER_PLPIS; /* @@ -374,8 +447,9 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, * To avoid memory waste in the guest, we keep the number of IDBits and * DevBits low - as least for the time being. */ - reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT; - reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT; + reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT; + reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT; + reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT; return extract_bytes(reg, addr & 7, len); } @@ -384,7 +458,23 @@ static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len) { - return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); + u32 val; + + val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK; + val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM; + return val; +} + +static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 rev = GITS_IIDR_REV(val); + + if (rev >= NR_ITS_ABIS) + return -EINVAL; + return vgic_its_set_abi(its, rev); } static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, @@ -425,25 +515,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid) { struct kvm_vcpu *vcpu; - struct its_itte *itte; + struct its_ite *ite; if (!its->enabled) return -EBUSY; - itte = find_itte(its, devid, eventid); - if (!itte || !its_is_collection_mapped(itte->collection)) + ite = find_ite(its, devid, eventid); + if (!ite || !its_is_collection_mapped(ite->collection)) return E_ITS_INT_UNMAPPED_INTERRUPT; - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); if (!vcpu) return E_ITS_INT_UNMAPPED_INTERRUPT; if (!vcpu->arch.vgic_cpu.lpis_enabled) return -EBUSY; - spin_lock(&itte->irq->irq_lock); - itte->irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, itte->irq); + spin_lock(&ite->irq->irq_lock); + ite->irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, ite->irq); return 0; } @@ -511,15 +601,15 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) } /* Requires the its_lock to be held. */ -static void its_free_itte(struct kvm *kvm, struct its_itte *itte) +static void its_free_ite(struct kvm *kvm, struct its_ite *ite) { - list_del(&itte->itte_list); + list_del(&ite->ite_list); /* This put matches the get in vgic_add_lpi. */ - if (itte->irq) - vgic_put_irq(kvm, itte->irq); + if (ite->irq) + vgic_put_irq(kvm, ite->irq); - kfree(itte); + kfree(ite); } static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) @@ -529,9 +619,11 @@ static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) #define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) #define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) +#define its_cmd_get_size(cmd) (its_cmd_mask_field(cmd, 1, 0, 5) + 1) #define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) #define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) #define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) +#define its_cmd_get_ittaddr(cmd) (its_cmd_mask_field(cmd, 2, 8, 44) << 8) #define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) #define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) @@ -544,17 +636,17 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, { u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); - struct its_itte *itte; + struct its_ite *ite; - itte = find_itte(its, device_id, event_id); - if (itte && itte->collection) { + ite = find_ite(its, device_id, event_id); + if (ite && ite->collection) { /* * Though the spec talks about removing the pending state, we * don't bother here since we clear the ITTE anyway and the * pending state is a property of the ITTE struct. */ - its_free_itte(kvm, itte); + its_free_ite(kvm, ite); return 0; } @@ -572,26 +664,26 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, u32 event_id = its_cmd_get_id(its_cmd); u32 coll_id = its_cmd_get_collection(its_cmd); struct kvm_vcpu *vcpu; - struct its_itte *itte; + struct its_ite *ite; struct its_collection *collection; - itte = find_itte(its, device_id, event_id); - if (!itte) + ite = find_ite(its, device_id, event_id); + if (!ite) return E_ITS_MOVI_UNMAPPED_INTERRUPT; - if (!its_is_collection_mapped(itte->collection)) + if (!its_is_collection_mapped(ite->collection)) return E_ITS_MOVI_UNMAPPED_COLLECTION; collection = find_collection(its, coll_id); if (!its_is_collection_mapped(collection)) return E_ITS_MOVI_UNMAPPED_COLLECTION; - itte->collection = collection; + ite->collection = collection; vcpu = kvm_get_vcpu(kvm, collection->target_addr); - spin_lock(&itte->irq->irq_lock); - itte->irq->target_vcpu = vcpu; - spin_unlock(&itte->irq->irq_lock); + spin_lock(&ite->irq->irq_lock); + ite->irq->target_vcpu = vcpu; + spin_unlock(&ite->irq->irq_lock); return 0; } @@ -600,16 +692,31 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, * Check whether an ID can be stored into the corresponding guest table. * For a direct table this is pretty easy, but gets a bit nasty for * indirect tables. We check whether the resulting guest physical address - * is actually valid (covered by a memslot and guest accessbible). + * is actually valid (covered by a memslot and guest accessible). * For this we have to read the respective first level entry. */ -static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) +static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, + gpa_t *eaddr) { int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + int esz = GITS_BASER_ENTRY_SIZE(baser); int index; - u64 indirect_ptr; gfn_t gfn; - int esz = GITS_BASER_ENTRY_SIZE(baser); + + switch (type) { + case GITS_BASER_TYPE_DEVICE: + if (id >= BIT_ULL(VITS_TYPER_DEVBITS)) + return false; + break; + case GITS_BASER_TYPE_COLLECTION: + /* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */ + if (id >= BIT_ULL(16)) + return false; + break; + default: + return false; + } if (!(baser & GITS_BASER_INDIRECT)) { phys_addr_t addr; @@ -620,6 +727,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) addr = BASER_ADDRESS(baser) + id * esz; gfn = addr >> PAGE_SHIFT; + if (eaddr) + *eaddr = addr; return kvm_is_visible_gfn(its->dev->kvm, gfn); } @@ -652,6 +761,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) indirect_ptr += index * esz; gfn = indirect_ptr >> PAGE_SHIFT; + if (eaddr) + *eaddr = indirect_ptr; return kvm_is_visible_gfn(its->dev->kvm, gfn); } @@ -661,7 +772,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its, { struct its_collection *collection; - if (!vgic_its_check_id(its, its->baser_coll_table, coll_id)) + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) return E_ITS_MAPC_COLLECTION_OOR; collection = kzalloc(sizeof(*collection), GFP_KERNEL); @@ -679,7 +790,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) { struct its_collection *collection; struct its_device *device; - struct its_itte *itte; + struct its_ite *ite; /* * Clearing the mapping for that collection ID removes the @@ -690,15 +801,34 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) if (!collection) return; - for_each_lpi_its(device, itte, its) - if (itte->collection && - itte->collection->collection_id == coll_id) - itte->collection = NULL; + for_each_lpi_its(device, ite, its) + if (ite->collection && + ite->collection->collection_id == coll_id) + ite->collection = NULL; list_del(&collection->coll_list); kfree(collection); } +/* Must be called with its_lock mutex held */ +static struct its_ite *vgic_its_alloc_ite(struct its_device *device, + struct its_collection *collection, + u32 lpi_id, u32 event_id) +{ + struct its_ite *ite; + + ite = kzalloc(sizeof(*ite), GFP_KERNEL); + if (!ite) + return ERR_PTR(-ENOMEM); + + ite->event_id = event_id; + ite->collection = collection; + ite->lpi = lpi_id; + + list_add_tail(&ite->ite_list, &device->itt_head); + return ite; +} + /* * The MAPTI and MAPI commands map LPIs to ITTEs. * Must be called with its_lock mutex held. @@ -709,16 +839,20 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_itte *itte; + struct its_ite *ite; + struct kvm_vcpu *vcpu = NULL; struct its_device *device; struct its_collection *collection, *new_coll = NULL; - int lpi_nr; struct vgic_irq *irq; + int lpi_nr; device = find_its_device(its, device_id); if (!device) return E_ITS_MAPTI_UNMAPPED_DEVICE; + if (event_id >= BIT_ULL(device->num_eventid_bits)) + return E_ITS_MAPTI_ID_OOR; + if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) lpi_nr = its_cmd_get_physical_id(its_cmd); else @@ -728,7 +862,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, return E_ITS_MAPTI_PHYSICALID_OOR; /* If there is an existing mapping, behavior is UNPREDICTABLE. */ - if (find_itte(its, device_id, event_id)) + if (find_ite(its, device_id, event_id)) return 0; collection = find_collection(its, coll_id); @@ -739,36 +873,24 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, new_coll = collection; } - itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); - if (!itte) { + ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id); + if (IS_ERR(ite)) { if (new_coll) vgic_its_free_collection(its, coll_id); - return -ENOMEM; + return PTR_ERR(ite); } - itte->event_id = event_id; - list_add_tail(&itte->itte_list, &device->itt_head); - - itte->collection = collection; - itte->lpi = lpi_nr; + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); - irq = vgic_add_lpi(kvm, lpi_nr); + irq = vgic_add_lpi(kvm, lpi_nr, vcpu); if (IS_ERR(irq)) { if (new_coll) vgic_its_free_collection(its, coll_id); - its_free_itte(kvm, itte); + its_free_ite(kvm, ite); return PTR_ERR(irq); } - itte->irq = irq; - - update_affinity_itte(kvm, itte); - - /* - * We "cache" the configuration table entries in out struct vgic_irq's. - * However we only have those structs for mapped IRQs, so we read in - * the respective config data from memory here upon mapping the LPI. - */ - update_lpi_config(kvm, itte->irq, NULL); + ite->irq = irq; return 0; } @@ -776,20 +898,40 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, /* Requires the its_lock to be held. */ static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) { - struct its_itte *itte, *temp; + struct its_ite *ite, *temp; /* * The spec says that unmapping a device with still valid * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, * since we cannot leave the memory unreferenced. */ - list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list) - its_free_itte(kvm, itte); + list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) + its_free_ite(kvm, ite); list_del(&device->dev_list); kfree(device); } +/* Must be called with its_lock mutex held */ +static struct its_device *vgic_its_alloc_device(struct vgic_its *its, + u32 device_id, gpa_t itt_addr, + u8 num_eventid_bits) +{ + struct its_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return ERR_PTR(-ENOMEM); + + device->device_id = device_id; + device->itt_addr = itt_addr; + device->num_eventid_bits = num_eventid_bits; + INIT_LIST_HEAD(&device->itt_head); + + list_add_tail(&device->dev_list, &its->device_list); + return device; +} + /* * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). * Must be called with the its_lock mutex held. @@ -799,11 +941,16 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, { u32 device_id = its_cmd_get_deviceid(its_cmd); bool valid = its_cmd_get_validbit(its_cmd); + u8 num_eventid_bits = its_cmd_get_size(its_cmd); + gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); struct its_device *device; - if (!vgic_its_check_id(its, its->baser_device_table, device_id)) + if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL)) return E_ITS_MAPD_DEVICE_OOR; + if (valid && num_eventid_bits > VITS_TYPER_IDBITS) + return E_ITS_MAPD_ITTSIZE_OOR; + device = find_its_device(its, device_id); /* @@ -821,14 +968,10 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, if (!valid) return 0; - device = kzalloc(sizeof(struct its_device), GFP_KERNEL); - if (!device) - return -ENOMEM; - - device->device_id = device_id; - INIT_LIST_HEAD(&device->itt_head); - - list_add_tail(&device->dev_list, &its->device_list); + device = vgic_its_alloc_device(its, device_id, itt_addr, + num_eventid_bits); + if (IS_ERR(device)) + return PTR_ERR(device); return 0; } @@ -883,14 +1026,14 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, { u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); - struct its_itte *itte; + struct its_ite *ite; - itte = find_itte(its, device_id, event_id); - if (!itte) + ite = find_ite(its, device_id, event_id); + if (!ite) return E_ITS_CLEAR_UNMAPPED_INTERRUPT; - itte->irq->pending_latch = false; + ite->irq->pending_latch = false; return 0; } @@ -904,14 +1047,14 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, { u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); - struct its_itte *itte; + struct its_ite *ite; - itte = find_itte(its, device_id, event_id); - if (!itte) + ite = find_ite(its, device_id, event_id); + if (!ite) return E_ITS_INV_UNMAPPED_INTERRUPT; - return update_lpi_config(kvm, itte->irq, NULL); + return update_lpi_config(kvm, ite->irq, NULL); } /* @@ -938,7 +1081,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, vcpu = kvm_get_vcpu(kvm, collection->target_addr); - irq_count = vgic_copy_lpi_list(kvm, &intids); + irq_count = vgic_copy_lpi_list(vcpu, &intids); if (irq_count < 0) return irq_count; @@ -1213,6 +1356,33 @@ static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, return extract_bytes(its->creadr, addr & 0x7, len); } +static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 cmd_offset; + int ret = 0; + + mutex_lock(&its->cmd_lock); + + if (its->enabled) { + ret = -EBUSY; + goto out; + } + + cmd_offset = ITS_CMD_OFFSET(val); + if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + ret = -EINVAL; + goto out; + } + + its->creadr = cmd_offset; +out: + mutex_unlock(&its->cmd_lock); + return ret; +} + #define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, struct vgic_its *its, @@ -1241,6 +1411,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, gpa_t addr, unsigned int len, unsigned long val) { + const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 entry_size, device_type; u64 reg, *regptr, clearbits = 0; @@ -1251,12 +1422,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, switch (BASER_INDEX(addr)) { case 0: regptr = &its->baser_device_table; - entry_size = 8; + entry_size = abi->dte_esz; device_type = GITS_BASER_TYPE_DEVICE; break; case 1: regptr = &its->baser_coll_table; - entry_size = 8; + entry_size = abi->cte_esz; device_type = GITS_BASER_TYPE_COLLECTION; clearbits = GITS_BASER_INDIRECT; break; @@ -1317,6 +1488,16 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, .its_write = wr, \ } +#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ + .uaccess_its_write = uwr, \ +} + static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, gpa_t addr, unsigned int len, unsigned long val) { @@ -1327,8 +1508,9 @@ static struct vgic_register_region its_registers[] = { REGISTER_ITS_DESC(GITS_CTLR, vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_IIDR, - vgic_mmio_read_its_iidr, its_mmio_write_wi, 4, + REGISTER_ITS_DESC_UACCESS(GITS_IIDR, + vgic_mmio_read_its_iidr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_iidr, 4, VGIC_ACCESS_32bit), REGISTER_ITS_DESC(GITS_TYPER, vgic_mmio_read_its_typer, its_mmio_write_wi, 8, @@ -1339,8 +1521,9 @@ static struct vgic_register_region its_registers[] = { REGISTER_ITS_DESC(GITS_CWRITER, vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), - REGISTER_ITS_DESC(GITS_CREADR, - vgic_mmio_read_its_creadr, its_mmio_write_wi, 8, + REGISTER_ITS_DESC_UACCESS(GITS_CREADR, + vgic_mmio_read_its_creadr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_creadr, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_ITS_DESC(GITS_BASER, vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, @@ -1357,17 +1540,19 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu) its_sync_lpi_pending_table(vcpu); } -static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) +static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its, + u64 addr) { struct vgic_io_device *iodev = &its->iodev; int ret; - if (!its->initialized) - return -EBUSY; - - if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) - return -ENXIO; + mutex_lock(&kvm->slots_lock); + if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -EBUSY; + goto out; + } + its->vgic_its_base = addr; iodev->regions = its_registers; iodev->nr_regions = ARRAY_SIZE(its_registers); kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); @@ -1375,9 +1560,9 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) iodev->base_addr = its->vgic_its_base; iodev->iodev_type = IODEV_ITS; iodev->its = its; - mutex_lock(&kvm->slots_lock); ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, KVM_VGIC_V3_ITS_SIZE, &iodev->dev); +out: mutex_unlock(&kvm->slots_lock); return ret; @@ -1387,7 +1572,6 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ - ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | \ GITS_BASER_PAGE_SIZE_64K) #define INITIAL_PROPBASER_VALUE \ @@ -1415,7 +1599,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) INIT_LIST_HEAD(&its->collection_list); dev->kvm->arch.vgic.has_its = true; - its->initialized = false; its->enabled = false; its->dev = dev; @@ -1427,16 +1610,23 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) dev->private = its; - return 0; + return vgic_its_set_abi(its, NR_ITS_ABIS - 1); +} + +static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev) +{ + struct its_ite *ite, *tmp; + + list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list) + its_free_ite(kvm, ite); + list_del(&dev->dev_list); + kfree(dev); } static void vgic_its_destroy(struct kvm_device *kvm_dev) { struct kvm *kvm = kvm_dev->kvm; struct vgic_its *its = kvm_dev->private; - struct its_device *dev; - struct its_itte *itte; - struct list_head *dev_cur, *dev_temp; struct list_head *cur, *temp; /* @@ -1447,25 +1637,710 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) return; mutex_lock(&its->its_lock); - list_for_each_safe(dev_cur, dev_temp, &its->device_list) { - dev = container_of(dev_cur, struct its_device, dev_list); - list_for_each_safe(cur, temp, &dev->itt_head) { - itte = (container_of(cur, struct its_itte, itte_list)); - its_free_itte(kvm, itte); - } - list_del(dev_cur); - kfree(dev); + list_for_each_safe(cur, temp, &its->device_list) { + struct its_device *dev; + + dev = list_entry(cur, struct its_device, dev_list); + vgic_its_free_device(kvm, dev); } list_for_each_safe(cur, temp, &its->collection_list) { + struct its_collection *coll; + + coll = list_entry(cur, struct its_collection, coll_list); list_del(cur); - kfree(container_of(cur, struct its_collection, coll_list)); + kfree(coll); } mutex_unlock(&its->its_lock); kfree(its); } +int vgic_its_has_attr_regs(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + gpa_t offset = attr->attr; + int align; + + align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7; + + if (offset & align) + return -EINVAL; + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) + return -ENXIO; + + return 0; +} + +int vgic_its_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + const struct vgic_register_region *region; + struct vgic_its *its; + gpa_t addr, offset; + unsigned int len; + int align, ret = 0; + + its = dev->private; + offset = attr->attr; + + /* + * Although the spec supports upper/lower 32-bit accesses to + * 64-bit ITS registers, the userspace ABI requires 64-bit + * accesses to all 64-bit wide registers. We therefore only + * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID + * registers + */ + if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4)) + align = 0x3; + else + align = 0x7; + + if (offset & align) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -ENXIO; + goto out; + } + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) { + ret = -ENXIO; + goto out; + } + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + addr = its->vgic_its_base + offset; + + len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; + + if (is_write) { + if (region->uaccess_its_write) + ret = region->uaccess_its_write(dev->kvm, its, addr, + len, *reg); + else + region->its_write(dev->kvm, its, addr, len, *reg); + } else { + *reg = region->its_read(dev->kvm, its, addr, len); + } + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static u32 compute_next_devid_offset(struct list_head *h, + struct its_device *dev) +{ + struct its_device *next; + u32 next_offset; + + if (list_is_last(&dev->dev_list, h)) + return 0; + next = list_next_entry(dev, dev_list); + next_offset = next->device_id - dev->device_id; + + return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET); +} + +static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite) +{ + struct its_ite *next; + u32 next_offset; + + if (list_is_last(&ite->ite_list, h)) + return 0; + next = list_next_entry(ite, ite_list); + next_offset = next->event_id - ite->event_id; + + return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET); +} + +/** + * entry_fn_t - Callback called on a table entry restore path + * @its: its handle + * @id: id of the entry + * @entry: pointer to the entry + * @opaque: pointer to an opaque data + * + * Return: < 0 on error, 0 if last element was identified, id offset to next + * element otherwise + */ +typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, + void *opaque); + +/** + * scan_its_table - Scan a contiguous table in guest RAM and applies a function + * to each entry + * + * @its: its handle + * @base: base gpa of the table + * @size: size of the table in bytes + * @esz: entry size in bytes + * @start_id: the ID of the first entry in the table + * (non zero for 2d level tables) + * @fn: function to apply on each entry + * + * Return: < 0 on error, 0 if last element was identified, 1 otherwise + * (the last element may not be found on second level tables) + */ +static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, + int start_id, entry_fn_t fn, void *opaque) +{ + void *entry = kzalloc(esz, GFP_KERNEL); + struct kvm *kvm = its->dev->kvm; + unsigned long len = size; + int id = start_id; + gpa_t gpa = base; + int ret; + + while (len > 0) { + int next_offset; + size_t byte_offset; + + ret = kvm_read_guest(kvm, gpa, entry, esz); + if (ret) + goto out; + + next_offset = fn(its, id, entry, opaque); + if (next_offset <= 0) { + ret = next_offset; + goto out; + } + + byte_offset = next_offset * esz; + id += next_offset; + gpa += byte_offset; + len -= byte_offset; + } + ret = 1; + +out: + kfree(entry); + return ret; +} + +/** + * vgic_its_save_ite - Save an interrupt translation entry at @gpa + */ +static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, + struct its_ite *ite, gpa_t gpa, int ite_esz) +{ + struct kvm *kvm = its->dev->kvm; + u32 next_offset; + u64 val; + + next_offset = compute_next_eventid_offset(&dev->itt_head, ite); + val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | + ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) | + ite->collection->collection_id; + val = cpu_to_le64(val); + return kvm_write_guest(kvm, gpa, &val, ite_esz); +} + +/** + * vgic_its_restore_ite - restore an interrupt translation entry + * @event_id: id used for indexing + * @ptr: pointer to the ITE entry + * @opaque: pointer to the its_device + */ +static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, + void *ptr, void *opaque) +{ + struct its_device *dev = (struct its_device *)opaque; + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + struct kvm_vcpu *vcpu = NULL; + u64 val; + u64 *p = (u64 *)ptr; + struct vgic_irq *irq; + u32 coll_id, lpi_id; + struct its_ite *ite; + u32 offset; + + val = *p; + + val = le64_to_cpu(val); + + coll_id = val & KVM_ITS_ITE_ICID_MASK; + lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT; + + if (!lpi_id) + return 1; /* invalid entry, no choice but to scan next entry */ + + if (lpi_id < VGIC_MIN_LPI) + return -EINVAL; + + offset = val >> KVM_ITS_ITE_NEXT_SHIFT; + if (event_id + offset >= BIT_ULL(dev->num_eventid_bits)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (!collection) + return -EINVAL; + + ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id); + if (IS_ERR(ite)) + return PTR_ERR(ite); + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_id, vcpu); + if (IS_ERR(irq)) + return PTR_ERR(irq); + ite->irq = irq; + + return offset; +} + +static int vgic_its_ite_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct its_ite *itea = container_of(a, struct its_ite, ite_list); + struct its_ite *iteb = container_of(b, struct its_ite, ite_list); + + if (itea->event_id < iteb->event_id) + return -1; + else + return 1; +} + +static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = device->itt_addr; + struct its_ite *ite; + int ret; + int ite_esz = abi->ite_esz; + + list_sort(NULL, &device->itt_head, vgic_its_ite_cmp); + + list_for_each_entry(ite, &device->itt_head, ite_list) { + gpa_t gpa = base + ite->event_id * ite_esz; + + ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + if (ret) + return ret; + } + return 0; +} + +static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = dev->itt_addr; + int ret; + int ite_esz = abi->ite_esz; + size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz; + + ret = scan_its_table(its, base, max_size, ite_esz, 0, + vgic_its_restore_ite, dev); + + return ret; +} + +/** + * vgic_its_save_dte - Save a device table entry at a given GPA + * + * @its: ITS handle + * @dev: ITS device + * @ptr: GPA + */ +static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, + gpa_t ptr, int dte_esz) +{ + struct kvm *kvm = its->dev->kvm; + u64 val, itt_addr_field; + u32 next_offset; + + itt_addr_field = dev->itt_addr >> 8; + next_offset = compute_next_devid_offset(&its->device_list, dev); + val = (1ULL << KVM_ITS_DTE_VALID_SHIFT | + ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) | + (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | + (dev->num_eventid_bits - 1)); + val = cpu_to_le64(val); + return kvm_write_guest(kvm, ptr, &val, dte_esz); +} + +/** + * vgic_its_restore_dte - restore a device table entry + * + * @its: its handle + * @id: device id the DTE corresponds to + * @ptr: kernel VA where the 8 byte DTE is located + * @opaque: unused + * + * Return: < 0 on error, 0 if the dte is the last one, id offset to the + * next dte otherwise + */ +static int vgic_its_restore_dte(struct vgic_its *its, u32 id, + void *ptr, void *opaque) +{ + struct its_device *dev; + gpa_t itt_addr; + u8 num_eventid_bits; + u64 entry = *(u64 *)ptr; + bool valid; + u32 offset; + int ret; + + entry = le64_to_cpu(entry); + + valid = entry >> KVM_ITS_DTE_VALID_SHIFT; + num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1; + itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK) + >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8; + + if (!valid) + return 1; + + /* dte entry is valid */ + offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + + dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = vgic_its_restore_itt(its, dev); + if (ret) { + vgic_its_free_device(its->dev->kvm, dev); + return ret; + } + + return offset; +} + +static int vgic_its_device_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct its_device *deva = container_of(a, struct its_device, dev_list); + struct its_device *devb = container_of(b, struct its_device, dev_list); + + if (deva->device_id < devb->device_id) + return -1; + else + return 1; +} + +/** + * vgic_its_save_device_tables - Save the device table and all ITT + * into guest RAM + * + * L1/L2 handling is hidden by vgic_its_check_id() helper which directly + * returns the GPA of the device entry + */ +static int vgic_its_save_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + struct its_device *dev; + int dte_esz = abi->dte_esz; + u64 baser; + + baser = its->baser_device_table; + + list_sort(NULL, &its->device_list, vgic_its_device_cmp); + + list_for_each_entry(dev, &its->device_list, dev_list) { + int ret; + gpa_t eaddr; + + if (!vgic_its_check_id(its, baser, + dev->device_id, &eaddr)) + return -EINVAL; + + ret = vgic_its_save_itt(its, dev); + if (ret) + return ret; + + ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * handle_l1_dte - callback used for L1 device table entries (2 stage case) + * + * @its: its handle + * @id: index of the entry in the L1 table + * @addr: kernel VA + * @opaque: unused + * + * L1 table entries are scanned by steps of 1 entry + * Return < 0 if error, 0 if last dte was found when scanning the L2 + * table, +1 otherwise (meaning next L1 entry must be scanned) + */ +static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, + void *opaque) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int l2_start_id = id * (SZ_64K / abi->dte_esz); + u64 entry = *(u64 *)addr; + int dte_esz = abi->dte_esz; + gpa_t gpa; + int ret; + + entry = le64_to_cpu(entry); + + if (!(entry & KVM_ITS_L1E_VALID_MASK)) + return 1; + + gpa = entry & KVM_ITS_L1E_ADDR_MASK; + + ret = scan_its_table(its, gpa, SZ_64K, dte_esz, + l2_start_id, vgic_its_restore_dte, NULL); + + if (ret <= 0) + return ret; + + return 1; +} + +/** + * vgic_its_restore_device_tables - Restore the device table and all ITT + * from guest RAM to internal data structs + */ +static int vgic_its_restore_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + int l1_esz, ret; + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + gpa_t l1_gpa; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + l1_gpa = BASER_ADDRESS(baser); + + if (baser & GITS_BASER_INDIRECT) { + l1_esz = GITS_LVL1_ENTRY_SIZE; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + handle_l1_dte, NULL); + } else { + l1_esz = abi->dte_esz; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + vgic_its_restore_dte, NULL); + } + + if (ret > 0) + ret = -EINVAL; + + return ret; +} + +static int vgic_its_save_cte(struct vgic_its *its, + struct its_collection *collection, + gpa_t gpa, int esz) +{ + u64 val; + + val = (1ULL << KVM_ITS_CTE_VALID_SHIFT | + ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | + collection->collection_id); + val = cpu_to_le64(val); + return kvm_write_guest(its->dev->kvm, gpa, &val, esz); +} + +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +{ + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + u32 target_addr, coll_id; + u64 val; + int ret; + + BUG_ON(esz > sizeof(val)); + ret = kvm_read_guest(kvm, gpa, &val, esz); + if (ret) + return ret; + val = le64_to_cpu(val); + if (!(val & KVM_ITS_CTE_VALID_MASK)) + return 0; + + target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); + coll_id = val & KVM_ITS_CTE_ICID_MASK; + + if (target_addr >= atomic_read(&kvm->online_vcpus)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (collection) + return -EEXIST; + ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + return 1; +} + +/** + * vgic_its_save_collection_table - Save the collection table into + * guest RAM + */ +static int vgic_its_save_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + struct its_collection *collection; + u64 val; + gpa_t gpa; + size_t max_size, filled = 0; + int ret, cte_esz = abi->cte_esz; + + gpa = BASER_ADDRESS(its->baser_coll_table); + if (!gpa) + return 0; + + max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + if (ret) + return ret; + gpa += cte_esz; + filled += cte_esz; + } + + if (filled == max_size) + return 0; + + /* + * table is not fully filled, add a last dummy element + * with valid bit unset + */ + val = 0; + BUG_ON(cte_esz > sizeof(val)); + ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz); + return ret; +} + +/** + * vgic_its_restore_collection_table - reads the collection table + * in guest memory and restores the ITS internal state. Requires the + * BASER registers to be restored before. + */ +static int vgic_its_restore_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int cte_esz = abi->cte_esz; + size_t max_size, read = 0; + gpa_t gpa; + int ret; + + if (!(its->baser_coll_table & GITS_BASER_VALID)) + return 0; + + gpa = BASER_ADDRESS(its->baser_coll_table); + + max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K; + + while (read < max_size) { + ret = vgic_its_restore_cte(its, gpa, cte_esz); + if (ret <= 0) + break; + gpa += cte_esz; + read += cte_esz; + } + return ret; +} + +/** + * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM + * according to v0 ABI + */ +static int vgic_its_save_tables_v0(struct vgic_its *its) +{ + struct kvm *kvm = its->dev->kvm; + int ret; + + mutex_lock(&kvm->lock); + mutex_lock(&its->its_lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + ret = vgic_its_save_device_tables(its); + if (ret) + goto out; + + ret = vgic_its_save_collection_table(its); + +out: + unlock_all_vcpus(kvm); + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return ret; +} + +/** + * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM + * to internal data structs according to V0 ABI + * + */ +static int vgic_its_restore_tables_v0(struct vgic_its *its) +{ + struct kvm *kvm = its->dev->kvm; + int ret; + + mutex_lock(&kvm->lock); + mutex_lock(&its->its_lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + ret = vgic_its_restore_collection_table(its); + if (ret) + goto out; + + ret = vgic_its_restore_device_tables(its); +out: + unlock_all_vcpus(kvm); + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + + return ret; +} + +static int vgic_its_commit_v0(struct vgic_its *its) +{ + const struct vgic_its_abi *abi; + + abi = vgic_its_get_abi(its); + its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + + its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + + its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + return 0; +} + static int vgic_its_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -1480,8 +2355,14 @@ static int vgic_its_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return 0; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + return 0; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + return 0; } break; + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: + return vgic_its_has_attr_regs(dev, attr); } return -ENXIO; } @@ -1509,18 +2390,30 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (ret) return ret; - its->vgic_its_base = addr; - - return 0; + return vgic_register_its_iodev(dev->kvm, its, addr); } - case KVM_DEV_ARM_VGIC_GRP_CTRL: + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - its->initialized = true; - + /* Nothing to do */ return 0; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + return abi->save_tables(its); + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + return abi->restore_tables(its); } - break; + } + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_its_attr_regs_access(dev, attr, ®, true); + } } return -ENXIO; } @@ -1541,10 +2434,20 @@ static int vgic_its_get_attr(struct kvm_device *dev, if (copy_to_user(uaddr, &addr, sizeof(addr))) return -EFAULT; break; + } + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + int ret; + + ret = vgic_its_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } default: return -ENXIO; } - } return 0; } @@ -1563,30 +2466,3 @@ int kvm_vgic_register_its_device(void) return kvm_register_device_ops(&kvm_arm_vgic_its_ops, KVM_DEV_TYPE_ARM_VGIC_ITS); } - -/* - * Registers all ITSes with the kvm_io_bus framework. - * To follow the existing VGIC initialization sequence, this has to be - * done as late as possible, just before the first VCPU runs. - */ -int vgic_register_its_iodevs(struct kvm *kvm) -{ - struct kvm_device *dev; - int ret = 0; - - list_for_each_entry(dev, &kvm->devices, vm_node) { - if (dev->ops != &kvm_arm_vgic_its_ops) - continue; - - ret = vgic_register_its_iodev(kvm, dev->private); - if (ret) - return ret; - /* - * We don't need to care about tearing down previously - * registered ITSes, as the kvm_io_bus framework removes - * them for us if the VM gets destroyed. - */ - } - - return ret; -} diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index d181d2baee9c4d6c5a5d93a569110e9030bc5e7a..10ae6f394b718d8e49805d23481974b0526764d1 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -37,6 +37,14 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, return 0; } +static int vgic_check_type(struct kvm *kvm, int type_needed) +{ + if (kvm->arch.vgic.vgic_model != type_needed) + return -ENODEV; + else + return 0; +} + /** * kvm_vgic_addr - set or get vgic VM base addresses * @kvm: pointer to the vm struct @@ -57,40 +65,41 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - int type_needed; phys_addr_t *addr_ptr, alignment; mutex_lock(&kvm->lock); switch (type) { case KVM_VGIC_V2_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_4K; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_cpu_base; alignment = SZ_4K; break; case KVM_VGIC_V3_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_64K; break; case KVM_VGIC_V3_ADDR_TYPE_REDIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + if (write) { + r = vgic_v3_set_redist_base(kvm, *addr); + goto out; + } addr_ptr = &vgic->vgic_redist_base; - alignment = SZ_64K; break; default: r = -ENODEV; - goto out; } - if (vgic->vgic_model != type_needed) { - r = -ENODEV; + if (r) goto out; - } if (write) { r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); @@ -259,13 +268,13 @@ static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) } } -static void unlock_all_vcpus(struct kvm *kvm) +void unlock_all_vcpus(struct kvm *kvm) { unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); } /* Returns true if all vcpus were locked, false otherwise */ -static bool lock_all_vcpus(struct kvm *kvm) +bool lock_all_vcpus(struct kvm *kvm) { struct kvm_vcpu *tmp_vcpu; int c; @@ -580,6 +589,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev, reg = tmp32; return vgic_v3_attr_regs_access(dev, attr, ®, true); } + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + int ret; + + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + ret = vgic_v3_save_pending_tables(dev->kvm); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return ret; + } + break; + } } return -ENXIO; } @@ -658,6 +685,8 @@ static int vgic_v3_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return 0; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + return 0; } } return -ENXIO; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 0a4283ed9aa735e55e476bdea0a19ed159952646..63e0bbdcddcc3e5e59163c08a5691dd36c416564 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -226,7 +226,13 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, switch (addr & 0xff) { case GIC_CPU_CTRL: - val = vmcr.ctlr; + val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT; + val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT; + val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT; + val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT; + val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT; + val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT; + break; case GIC_CPU_PRIMASK: /* @@ -267,7 +273,13 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, switch (addr & 0xff) { case GIC_CPU_CTRL: - vmcr.ctlr = val; + vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0); + vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1); + vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl); + vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn); + vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR); + vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS); + break; case GIC_CPU_PRIMASK: /* diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 6afb3b484886336b963cbcc1cb6d2d7cb89ba2d1..201d5e2e973dd4504c1f41747b2f07bb797ade11 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -556,67 +556,136 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) return SZ_64K; } -int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) +/** + * vgic_register_redist_iodev - register a single redist iodev + * @vcpu: The VCPU to which the redistributor belongs + * + * Register a KVM iodev for this VCPU's redistributor using the address + * provided. + * + * Return 0 on success, -ERRNO otherwise. + */ +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu; - int c, ret = 0; + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; + gpa_t rd_base, sgi_base; + int ret; - kvm_for_each_vcpu(c, vcpu, kvm) { - gpa_t rd_base = redist_base_address + c * SZ_64K * 2; - gpa_t sgi_base = rd_base + SZ_64K; - struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; - struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; - - kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); - rd_dev->base_addr = rd_base; - rd_dev->iodev_type = IODEV_REDIST; - rd_dev->regions = vgic_v3_rdbase_registers; - rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); - rd_dev->redist_vcpu = vcpu; + /* + * We may be creating VCPUs before having set the base address for the + * redistributor region, in which case we will come back to this + * function for all VCPUs when the base address is set. Just return + * without doing any work for now. + */ + if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base)) + return 0; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, - SZ_64K, &rd_dev->dev); - mutex_unlock(&kvm->slots_lock); + if (!vgic_v3_check_base(kvm)) + return -EINVAL; - if (ret) - break; + rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset; + sgi_base = rd_base + SZ_64K; - kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); - sgi_dev->base_addr = sgi_base; - sgi_dev->iodev_type = IODEV_REDIST; - sgi_dev->regions = vgic_v3_sgibase_registers; - sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); - sgi_dev->redist_vcpu = vcpu; + kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); + rd_dev->base_addr = rd_base; + rd_dev->iodev_type = IODEV_REDIST; + rd_dev->regions = vgic_v3_rdbase_registers; + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); + rd_dev->redist_vcpu = vcpu; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, - SZ_64K, &sgi_dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &rd_dev->dev); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, + SZ_64K, &rd_dev->dev); + mutex_unlock(&kvm->slots_lock); + + if (ret) + return ret; + + kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); + sgi_dev->base_addr = sgi_base; + sgi_dev->iodev_type = IODEV_REDIST; + sgi_dev->regions = vgic_v3_sgibase_registers; + sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); + sgi_dev->redist_vcpu = vcpu; + + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, + SZ_64K, &sgi_dev->dev); + if (ret) { + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &rd_dev->dev); + goto out; + } + + vgic->vgic_redist_free_offset += 2 * SZ_64K; +out: + mutex_unlock(&kvm->slots_lock); + return ret; +} + +static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; + + kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev); + kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev); +} + +static int vgic_register_all_redist_iodevs(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c, ret = 0; + + kvm_for_each_vcpu(c, vcpu, kvm) { + ret = vgic_register_redist_iodev(vcpu); + if (ret) break; - } } if (ret) { /* The current c failed, so we start with the previous one. */ + mutex_lock(&kvm->slots_lock); for (c--; c >= 0; c--) { - struct vgic_cpu *vgic_cpu; - vcpu = kvm_get_vcpu(kvm, c); - vgic_cpu = &vcpu->arch.vgic_cpu; - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &vgic_cpu->rd_iodev.dev); - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &vgic_cpu->sgi_iodev.dev); + vgic_unregister_redist_iodev(vcpu); } + mutex_unlock(&kvm->slots_lock); } return ret; } +int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr) +{ + struct vgic_dist *vgic = &kvm->arch.vgic; + int ret; + + /* vgic_check_ioaddr makes sure we don't do this twice */ + ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K); + if (ret) + return ret; + + vgic->vgic_redist_base = addr; + if (!vgic_v3_check_base(kvm)) { + vgic->vgic_redist_base = VGIC_ADDR_UNDEF; + return -EINVAL; + } + + /* + * Register iodevs for each existing VCPU. Adding more VCPUs + * afterwards will register the iodevs when needed. + */ + ret = vgic_register_all_redist_iodevs(kvm); + if (ret) + return ret; + + return 0; +} + int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) { const struct vgic_register_region *region; diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 2a5db135272215d5c9d4bfa544b7d3ed11a9b9c3..1c17b2a2f105c0a11cbe1083088e71d94c3e3b51 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -446,13 +446,12 @@ static int match_region(const void *key, const void *elt) return 0; } -/* Find the proper register handler entry given a certain address offset. */ -static const struct vgic_register_region * -vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, - unsigned int offset) +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset) { - return bsearch((void *)(uintptr_t)offset, region, nr_regions, - sizeof(region[0]), match_region); + return bsearch((void *)(uintptr_t)offset, regions, nr_regions, + sizeof(regions[0]), match_region); } void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 98bb566b660a29c85614beb22fe01659f5e74513..ea4171acdef3b2da35326fb2ded7dd1b37b8eaa1 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -36,8 +36,13 @@ struct vgic_register_region { }; unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); - void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len, unsigned long val); + union { + void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; }; extern struct kvm_io_device_ops kvm_io_gic_ops; @@ -192,4 +197,9 @@ u64 vgic_sanitise_shareability(u64 reg); u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, u64 (*sanitise_fn)(u64)); +/* Find the proper register handler entry given a certain address offset */ +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset); + #endif diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index b637d9c7afe3ff51b9e8dfdcd947ee37ef2df029..e4187e52bb26e65c87743c760fb5c651eedb5ea7 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -22,20 +22,6 @@ #include "vgic.h" -/* - * Call this function to convert a u64 value to an unsigned long * bitmask - * in a way that works on both 32-bit and 64-bit LE and BE platforms. - * - * Warning: Calling this function may modify *val. - */ -static unsigned long *u64_to_bitmask(u64 *val) -{ -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 - *val = (*val >> 32) | (*val << 32); -#endif - return (unsigned long *)val; -} - static inline void vgic_v2_write_lr(int lr, u32 val) { void __iomem *base = kvm_vgic_global_state.vctrl_base; @@ -51,45 +37,17 @@ void vgic_v2_init_lrs(void) vgic_v2_write_lr(i, 0); } -void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; - if (cpuif->vgic_misr & GICH_MISR_EOI) { - u64 eisr = cpuif->vgic_eisr; - unsigned long *eisr_bmap = u64_to_bitmask(&eisr); - int lr; - - for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) { - u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID; - - WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE); - - /* Only SPIs require notification */ - if (vgic_valid_spi(vcpu->kvm, intid)) - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); - } - } - - /* check and disable underflow maintenance IRQ */ - cpuif->vgic_hcr &= ~GICH_HCR_UIE; - - /* - * In the next iterations of the vcpu loop, if we sync the - * vgic state after flushing it, but before entering the guest - * (this happens for pending signals and vmid rollovers), then - * make sure we don't pick up any old maintenance interrupts - * here. - */ - cpuif->vgic_eisr = 0; + cpuif->vgic_hcr |= GICH_HCR_UIE; } -void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) +static bool lr_signals_eoi_mi(u32 lr_val) { - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; - - cpuif->vgic_hcr |= GICH_HCR_UIE; + return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) && + !(lr_val & GICH_LR_HW); } /* @@ -101,14 +59,22 @@ void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) */ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) { - struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; int lr; - for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { + cpuif->vgic_hcr &= ~GICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; u32 intid = val & GICH_LR_VIRTUALID; struct vgic_irq *irq; + /* Notify fds when the guest EOI'ed a level-triggered SPI */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); spin_lock(&irq->irq_lock); @@ -141,6 +107,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } + + vgic_cpu->used_lrs = 0; } /* @@ -181,6 +149,13 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->hw) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active && irq_is_pending(irq)) + val &= ~GICH_LR_PENDING_BIT; } else { if (irq->config == VGIC_CONFIG_LEVEL) val |= GICH_LR_EOI; @@ -199,9 +174,21 @@ void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; u32 vmcr; - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; + vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) & + GICH_VMCR_ENABLE_GRP0_MASK; + vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) & + GICH_VMCR_ENABLE_GRP1_MASK; + vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) & + GICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) & + GICH_VMCR_FIQ_EN_MASK; + vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) & + GICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) & + GICH_VMCR_EOI_MODE_MASK; vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & @@ -209,15 +196,29 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; + cpu_if->vgic_vmcr = vmcr; } void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >> + GICH_VMCR_ENABLE_GRP0_SHIFT; + vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >> + GICH_VMCR_ENABLE_GRP1_SHIFT; + vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >> + GICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >> + GICH_VMCR_FIQ_EN_SHIFT; + vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >> + GICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >> + GICH_VMCR_EOI_MODE_SHIFT; - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> - GICH_VMCR_CTRL_SHIFT; vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> @@ -390,3 +391,19 @@ int vgic_v2_probe(const struct gic_kvm_info *info) return ret; } + +void vgic_v2_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + + writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + + cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); +} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index be0f4c3e0142e04216cb28e1f965487d52d0b4c9..030248e669f65acd5e0155fbbef189d96e7cf7e3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,59 +21,29 @@ #include "vgic.h" -void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - u32 model = vcpu->kvm->arch.vgic.vgic_model; - - if (cpuif->vgic_misr & ICH_MISR_EOI) { - unsigned long eisr_bmap = cpuif->vgic_eisr; - int lr; - - for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) { - u32 intid; - u64 val = cpuif->vgic_lr[lr]; - - if (model == KVM_DEV_TYPE_ARM_VGIC_V3) - intid = val & ICH_LR_VIRTUAL_ID_MASK; - else - intid = val & GICH_LR_VIRTUALID; - - WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE); - - /* Only SPIs require notification */ - if (vgic_valid_spi(vcpu->kvm, intid)) - kvm_notify_acked_irq(vcpu->kvm, 0, - intid - VGIC_NR_PRIVATE_IRQS); - } - /* - * In the next iterations of the vcpu loop, if we sync - * the vgic state after flushing it, but before - * entering the guest (this happens for pending - * signals and vmid rollovers), then make sure we - * don't pick up any old maintenance interrupts here. - */ - cpuif->vgic_eisr = 0; - } - - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr |= ICH_HCR_UIE; } -void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) +static bool lr_signals_eoi_mi(u64 lr_val) { - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - - cpuif->vgic_hcr |= ICH_HCR_UIE; + return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && + !(lr_val & ICH_LR_HW); } void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) { - struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 model = vcpu->kvm->arch.vgic.vgic_model; int lr; - for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) { + cpuif->vgic_hcr &= ~ICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; u32 intid; struct vgic_irq *irq; @@ -82,6 +52,12 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) intid = val & ICH_LR_VIRTUAL_ID_MASK; else intid = val & GICH_LR_VIRTUALID; + + /* Notify fds when the guest EOI'ed a level-triggered IRQ */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); if (!irq) /* An LPI could have been unmapped. */ continue; @@ -117,6 +93,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); } + + vgic_cpu->used_lrs = 0; } /* Requires the irq to be locked already */ @@ -149,6 +127,13 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->hw) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active && irq_is_pending(irq)) + val &= ~ICH_LR_PENDING_BIT; } else { if (irq->config == VGIC_CONFIG_LEVEL) val |= ICH_LR_EOI; @@ -173,35 +158,58 @@ void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; u32 vmcr; - /* - * Ignore the FIQen bit, because GIC emulation always implies - * SRE=1 which means the vFIQEn bit is also RES1. - */ - vmcr = ((vmcrp->ctlr >> ICC_CTLR_EL1_EOImode_SHIFT) << - ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; - vmcr |= (vmcrp->ctlr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & + ICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & + ICH_VMCR_FIQ_EN_MASK; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcr = ICH_VMCR_FIQ_EN_MASK; + } + + vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; - vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; + cpu_if->vgic_vmcr = vmcr; } void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) { - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; - /* - * Ignore the FIQen bit, because GIC emulation always implies - * SRE=1 which means the vFIQEn bit is also RES1. - */ - vmcrp->ctlr = ((vmcr >> ICH_VMCR_EOIM_SHIFT) << - ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; - vmcrp->ctlr |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + vmcr = cpu_if->vgic_vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> + ICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> + ICH_VMCR_FIQ_EN_SHIFT; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcrp->fiqen = 1; + vmcrp->ackctl = 0; + } + + vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; @@ -252,19 +260,125 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr = ICH_HCR_EN; } -/* check for overlapping regions and for regions crossing the end of memory */ -static bool vgic_v3_check_base(struct kvm *kvm) +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) +{ + struct kvm_vcpu *vcpu; + int byte_offset, bit_nr; + gpa_t pendbase, ptr; + bool status; + u8 val; + int ret; + +retry: + vcpu = irq->target_vcpu; + if (!vcpu) + return 0; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + ret = kvm_read_guest(kvm, ptr, &val, 1); + if (ret) + return ret; + + status = val & (1 << bit_nr); + + spin_lock(&irq->irq_lock); + if (irq->target_vcpu != vcpu) { + spin_unlock(&irq->irq_lock); + goto retry; + } + irq->pending_latch = status; + vgic_queue_irq_unlock(vcpu->kvm, irq); + + if (status) { + /* clear consumed data */ + val &= ~(1 << bit_nr); + ret = kvm_write_guest(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_its_save_pending_tables - Save the pending tables into guest RAM + * kvm lock and all vcpu lock must be held + */ +int vgic_v3_save_pending_tables(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int last_byte_offset = -1; + struct vgic_irq *irq; + int ret; + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + int byte_offset, bit_nr; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr; + bool stored; + u8 val; + + vcpu = irq->target_vcpu; + if (!vcpu) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + if (byte_offset != last_byte_offset) { + ret = kvm_read_guest(kvm, ptr, &val, 1); + if (ret) + return ret; + last_byte_offset = byte_offset; + } + + stored = val & (1U << bit_nr); + if (stored == irq->pending_latch) + continue; + + if (irq->pending_latch) + val |= 1 << bit_nr; + else + val &= ~(1 << bit_nr); + + ret = kvm_write_guest(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/* + * Check for overlapping regions and for regions crossing the end of memory + * for base addresses which have already been set. + */ +bool vgic_v3_check_base(struct kvm *kvm) { struct vgic_dist *d = &kvm->arch.vgic; gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; redist_size *= atomic_read(&kvm->online_vcpus); - if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) + if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) return false; - if (d->vgic_redist_base + redist_size < d->vgic_redist_base) + + if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) && + d->vgic_redist_base + redist_size < d->vgic_redist_base) return false; + /* Both base addresses must be set to check if they overlap */ + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(d->vgic_redist_base)) + return true; + if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) return true; if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) @@ -309,20 +423,6 @@ int vgic_v3_map_resources(struct kvm *kvm) goto out; } - ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base); - if (ret) { - kvm_err("Unable to register VGICv3 redist MMIO regions\n"); - goto out; - } - - if (vgic_has_its(kvm)) { - ret = vgic_register_its_iodevs(kvm); - if (ret) { - kvm_err("Unable to register VGIC ITS MMIO regions\n"); - goto out; - } - } - dist->ready = true; out: @@ -386,3 +486,24 @@ int vgic_v3_probe(const struct gic_kvm_info *info) return 0; } + +void vgic_v3_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (likely(cpu_if->vgic_sre)) + kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + if (likely(cpu_if->vgic_sre)) + cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); +} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 654dfd40e449cb54afccdc19792b80a7cf59664a..83b24d20ff8f817e688548d69e50e00d25699210 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -21,7 +21,7 @@ #include "vgic.h" #define CREATE_TRACE_POINTS -#include "../trace.h" +#include "trace.h" #ifdef CONFIG_DEBUG_SPINLOCK #define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) @@ -29,7 +29,9 @@ #define DEBUG_SPINLOCK_BUG_ON(p) #endif -struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,}; +struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, +}; /* * Locking order is always: @@ -527,14 +529,6 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) spin_unlock(&vgic_cpu->ap_list_lock); } -static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu) -{ - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_process_maintenance(vcpu); - else - vgic_v3_process_maintenance(vcpu); -} - static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) { if (kvm_vgic_global_state.type == VGIC_V2) @@ -601,10 +595,8 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) { - vgic_set_underflow(vcpu); + if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) vgic_sort_ap_list(vcpu); - } list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); @@ -623,8 +615,12 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) next: spin_unlock(&irq->irq_lock); - if (count == kvm_vgic_global_state.nr_lr) + if (count == kvm_vgic_global_state.nr_lr) { + if (!list_is_last(&irq->ap_list, + &vgic_cpu->ap_list_head)) + vgic_set_underflow(vcpu); break; + } } vcpu->arch.vgic_cpu.used_lrs = count; @@ -637,18 +633,30 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - if (unlikely(!vgic_initialized(vcpu->kvm))) + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* An empty ap_list_head implies used_lrs == 0 */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; - vgic_process_maintenance_interrupt(vcpu); - vgic_fold_lr_state(vcpu); + if (vgic_cpu->used_lrs) + vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); } /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { - if (unlikely(!vgic_initialized(vcpu->kvm))) + /* + * If there are no virtual interrupts active or pending for this + * VCPU, then there is no work to do and we can bail out without + * taking any lock. There is a potential race with someone injecting + * interrupts to the VCPU, but it is a benign race as the VCPU will + * either observe the new interrupt before or after doing this check, + * and introducing additional synchronization mechanism doesn't change + * this. + */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); @@ -656,6 +664,28 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); } +void kvm_vgic_load(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_load(vcpu); + else + vgic_v3_load(vcpu); +} + +void kvm_vgic_put(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_put(vcpu); + else + vgic_v3_put(vcpu); +} + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 6cf557e9f71807f05017d10af8074bd4416c151c..bba7fa22a7f7c41a5d1fa88c02fb15c158992ccd 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -73,6 +73,29 @@ KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) +/* + * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt, + * below macros are defined for ITS table entry encoding. + */ +#define KVM_ITS_CTE_VALID_SHIFT 63 +#define KVM_ITS_CTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_CTE_RDBASE_SHIFT 16 +#define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_ITE_NEXT_SHIFT 48 +#define KVM_ITS_ITE_PINTID_SHIFT 16 +#define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16) +#define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_DTE_VALID_SHIFT 63 +#define KVM_ITS_DTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_DTE_NEXT_SHIFT 49 +#define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49) +#define KVM_ITS_DTE_ITTADDR_SHIFT 5 +#define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5) +#define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0) +#define KVM_ITS_L1E_VALID_MASK BIT_ULL(63) +/* we only support 64 kB translation table page size */ +#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) + static inline bool irq_is_pending(struct vgic_irq *irq) { if (irq->config == VGIC_CONFIG_EDGE) @@ -88,14 +111,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq) * registers regardless of the hardware backed GIC used. */ struct vgic_vmcr { - u32 ctlr; + u32 grpen0; + u32 grpen1; + + u32 ackctl; + u32 fiqen; + u32 cbpr; + u32 eoim; + u32 abpr; u32 bpr; u32 pmr; /* Priority mask field in the GICC_PMR and * ICC_PMR_EL1 priority field format */ - /* Below member variable are valid only for GICv3 */ - u32 grpen0; - u32 grpen1; }; struct vgic_reg_attr { @@ -119,7 +146,6 @@ void vgic_kick_vcpus(struct kvm *kvm); int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment); -void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); @@ -138,6 +164,8 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type); void vgic_v2_init_lrs(void); +void vgic_v2_load(struct kvm_vcpu *vcpu); +void vgic_v2_put(struct kvm_vcpu *vcpu); static inline void vgic_get_irq_kref(struct vgic_irq *irq) { @@ -147,7 +175,6 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq) kref_get(&irq->refcount); } -void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); @@ -157,9 +184,15 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_enable(struct kvm_vcpu *vcpu); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); -int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); +int vgic_v3_save_pending_tables(struct kvm *kvm); +int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr); +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +bool vgic_v3_check_base(struct kvm *kvm); + +void vgic_v3_load(struct kvm_vcpu *vcpu); +void vgic_v3_put(struct kvm_vcpu *vcpu); -int vgic_register_its_iodevs(struct kvm *kvm); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); void vgic_enable_lpis(struct kvm_vcpu *vcpu); @@ -184,4 +217,7 @@ int vgic_init(struct kvm *kvm); int vgic_debug_init(struct kvm *kvm); int vgic_debug_destroy(struct kvm *kvm); +bool lock_all_vcpus(struct kvm *kvm); +void unlock_all_vcpus(struct kvm *kvm); + #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 4d28a9ddbee01077fea01beeeae5523917822da9..a8d540398bbd0350b8b969820d7a11d528cf672d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); - kvm_vcpu_request_scan_ioapic(kvm); + kvm_arch_post_irq_ack_notifier_list_update(kvm); } void kvm_unregister_irq_ack_notifier(struct kvm *kvm, @@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); synchronize_srcu(&kvm->irq_srcu); - kvm_vcpu_request_scan_ioapic(kvm); + kvm_arch_post_irq_ack_notifier_list_update(kvm); } #endif diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 3bcc9990adf79eb9367a6fee61a7fd9683233a35..31e40c9e81df41de6f8cadb076515677ac5cae9c 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - int r = -EINVAL; struct kvm_kernel_irq_routing_entry *ei; + int r; /* * Do not allow GSI to be mapped to the same irqchip more than once. @@ -153,26 +153,30 @@ static int setup_routing_entry(struct kvm *kvm, if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) - return r; + return -EINVAL; e->gsi = ue->gsi; e->type = ue->type; r = kvm_set_routing_entry(kvm, e, ue); if (r) - goto out; + return r; if (e->type == KVM_IRQ_ROUTING_IRQCHIP) rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi; hlist_add_head(&e->link, &rt->map[e->gsi]); - r = 0; -out: - return r; + + return 0; } void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) { } +bool __weak kvm_arch_can_set_irq_routing(struct kvm *kvm) +{ + return true; +} + int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, unsigned nr, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 88257b311cb579b5b720330456f99fbec97a58ac..f0fe9d02f6bb2c47d909cf493ad6e19778cc666c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -165,6 +165,24 @@ void vcpu_put(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(vcpu_put); +/* TODO: merge with kvm_arch_vcpu_should_kick */ +static bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req) +{ + int mode = kvm_vcpu_exiting_guest_mode(vcpu); + + /* + * We need to wait for the VCPU to reenable interrupts and get out of + * READING_SHADOW_PAGE_TABLES mode. + */ + if (req & KVM_REQUEST_WAIT) + return mode != OUTSIDE_GUEST_MODE; + + /* + * Need to kick a running VCPU, but otherwise there is nothing to do. + */ + return mode == IN_GUEST_MODE; +} + static void ack_flush(void *_completed) { } @@ -174,6 +192,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) int i, cpu, me; cpumask_var_t cpus; bool called = true; + bool wait = req & KVM_REQUEST_WAIT; struct kvm_vcpu *vcpu; zalloc_cpumask_var(&cpus, GFP_ATOMIC); @@ -183,17 +202,17 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) kvm_make_request(req, vcpu); cpu = vcpu->cpu; - /* Set ->requests bit before we read ->mode. */ - smp_mb__after_atomic(); + if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) + continue; if (cpus != NULL && cpu != -1 && cpu != me && - kvm_vcpu_exiting_guest_mode(vcpu) != OUTSIDE_GUEST_MODE) + kvm_request_needs_ipi(vcpu, req)) cpumask_set_cpu(cpu, cpus); } if (unlikely(cpus == NULL)) - smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); + smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait); else if (!cpumask_empty(cpus)) - smp_call_function_many(cpus, ack_flush, NULL, 1); + smp_call_function_many(cpus, ack_flush, NULL, wait); else called = false; put_cpu(); @@ -504,7 +523,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void) int i; struct kvm_memslots *slots; - slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) return NULL; @@ -689,18 +708,6 @@ static struct kvm *kvm_create_vm(unsigned long type) return ERR_PTR(r); } -/* - * Avoid using vmalloc for a small buffer. - * Should not be used when the size is statically known. - */ -void *kvm_kvzalloc(unsigned long size) -{ - if (size > PAGE_SIZE) - return vzalloc(size); - else - return kzalloc(size, GFP_KERNEL); -} - static void kvm_destroy_devices(struct kvm *kvm) { struct kvm_device *dev, *tmp; @@ -782,7 +789,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); - memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); + memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL); if (!memslot->dirty_bitmap) return -ENOMEM; @@ -1008,7 +1015,7 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); + slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) goto out_free; memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); @@ -1019,8 +1026,6 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, as_id, slots); - /* slot was deleted or moved, clear iommu mapping */ - kvm_iommu_unmap_pages(kvm, &old); /* From this point no new shadow pages pointing to a deleted, * or moved, memslot will be created. * @@ -1055,21 +1060,6 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_memslot(kvm, &old, &new); kvfree(old_memslots); - - /* - * IOMMU mapping: New slots need to be mapped. Old slots need to be - * un-mapped and re-mapped if their base changes. Since base change - * unmapping is handled above with slot deletion, mapping alone is - * needed here. Anything else the iommu might care about for existing - * slots (size changes, userspace addr changes and read-only flag - * changes) is disallowed above, so any other attribute changes getting - * here can be skipped. - */ - if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) { - r = kvm_iommu_map_pages(kvm, &new); - return r; - } - return 0; out_slots: @@ -1973,18 +1963,18 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, return 0; } -int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); +EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); -int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len) +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); int r; gpa_t gpa = ghc->gpa + offset; @@ -1994,7 +1984,7 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_ __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_vcpu_write_guest(vcpu, gpa, data, len); + return kvm_write_guest(kvm, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2006,19 +1996,19 @@ int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_ return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); +EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); -int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); + return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); +EXPORT_SYMBOL_GPL(kvm_write_guest_cached); -int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); + struct kvm_memslots *slots = kvm_memslots(kvm); int r; BUG_ON(len > ghc->len); @@ -2027,7 +2017,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); + return kvm_read_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2038,7 +2028,7 @@ int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *g return 0; } -EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); +EXPORT_SYMBOL_GPL(kvm_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { @@ -2212,8 +2202,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_block); -#ifndef CONFIG_S390 -void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) +bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { struct swait_queue_head *wqp; @@ -2221,11 +2210,14 @@ void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) if (swait_active(wqp)) { swake_up(wqp); ++vcpu->stat.halt_wakeup; + return true; } + return false; } EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up); +#ifndef CONFIG_S390 /* * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. */ @@ -2234,7 +2226,9 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) int me; int cpu = vcpu->cpu; - kvm_vcpu_wake_up(vcpu); + if (kvm_vcpu_wake_up(vcpu)) + return; + me = get_cpu(); if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) if (kvm_arch_vcpu_should_kick(vcpu)) @@ -2366,7 +2360,7 @@ static int kvm_vcpu_fault(struct vm_fault *vmf) else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); #endif -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif @@ -2842,10 +2836,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, #endif - -#ifdef CONFIG_KVM_XICS - [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, -#endif }; int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) @@ -2935,6 +2925,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: return 1; +#ifdef CONFIG_KVM_MMIO + case KVM_CAP_COALESCED_MMIO: + return KVM_COALESCED_MMIO_PAGE_OFFSET; +#endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; @@ -2984,7 +2978,7 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_get_dirty_log(kvm, &log); break; } -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO case KVM_REGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; @@ -3067,6 +3061,8 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&routing, argp, sizeof(routing))) goto out; r = -EINVAL; + if (!kvm_arch_can_set_irq_routing(kvm)) + goto out; if (routing.nr > KVM_MAX_IRQ_ROUTES) goto out; if (routing.flags) @@ -3176,7 +3172,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) kvm = kvm_create_vm(type); if (IS_ERR(kvm)) return PTR_ERR(kvm); -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO r = kvm_coalesced_mmio_init(kvm); if (r < 0) { kvm_put_kvm(kvm); @@ -3229,7 +3225,7 @@ static long kvm_dev_ioctl(struct file *filp, #ifdef CONFIG_X86 r += PAGE_SIZE; /* pio data page */ #endif -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO r += PAGE_SIZE; /* coalesced mmio ring page */ #endif break; @@ -3715,7 +3711,7 @@ static const struct file_operations vm_stat_get_per_vm_fops = { .release = kvm_debugfs_release, .read = simple_attr_read, .write = simple_attr_write, - .llseek = generic_file_llseek, + .llseek = no_llseek, }; static int vcpu_stat_get_per_vm(void *data, u64 *val) @@ -3760,7 +3756,7 @@ static const struct file_operations vcpu_stat_get_per_vm_fops = { .release = kvm_debugfs_release, .read = simple_attr_read, .write = simple_attr_write, - .llseek = generic_file_llseek, + .llseek = no_llseek, }; static const struct file_operations *stat_fops_per_vm[] = { diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index d32f239eb47133ae3f57ab0ed52a71d5c0478b8e..37d9118fd84be07bc119d40efe58b8edc1f2a889 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -20,6 +20,10 @@ #include #include "vfio.h" +#ifdef CONFIG_SPAPR_TCE_IOMMU +#include +#endif + struct kvm_vfio_group { struct list_head node; struct vfio_group *vfio_group; @@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group) return ret > 0; } +#ifdef CONFIG_SPAPR_TCE_IOMMU +static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group) +{ + int (*fn)(struct vfio_group *); + int ret = -EINVAL; + + fn = symbol_get(vfio_external_user_iommu_id); + if (!fn) + return ret; + + ret = fn(vfio_group); + + symbol_put(vfio_external_user_iommu_id); + + return ret; +} + +static struct iommu_group *kvm_vfio_group_get_iommu_group( + struct vfio_group *group) +{ + int group_id = kvm_vfio_external_user_iommu_id(group); + + if (group_id < 0) + return NULL; + + return iommu_group_get_by_id(group_id); +} + +static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm, + struct vfio_group *vfio_group) +{ + struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group); + + if (WARN_ON_ONCE(!grp)) + return; + + kvm_spapr_tce_release_iommu_group(kvm, grp); + iommu_group_put(grp); +} +#endif + /* * Groups can use the same or different IOMMU domains. If the same then * adding a new group may change the coherency of groups we've previously @@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group); +#endif kvm_vfio_group_set_kvm(vfio_group, NULL); kvm_vfio_group_put_external_user(vfio_group); @@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) kvm_vfio_update_coherency(dev); return ret; + +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: { + struct kvm_vfio_spapr_tce param; + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + struct fd f; + struct iommu_group *grp; + + if (copy_from_user(¶m, (void __user *)arg, + sizeof(struct kvm_vfio_spapr_tce))) + return -EFAULT; + + f = fdget(param.groupfd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + grp = kvm_vfio_group_get_iommu_group(vfio_group); + if (WARN_ON_ONCE(!grp)) { + kvm_vfio_group_put_external_user(vfio_group); + return -EIO; + } + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + ret = kvm_spapr_tce_attach_iommu_group(dev->kvm, + param.tablefd, grp); + break; + } + + mutex_unlock(&kv->lock); + + iommu_group_put(grp); + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } +#endif /* CONFIG_SPAPR_TCE_IOMMU */ } return -ENXIO; @@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_VFIO_GROUP_ADD: case KVM_DEV_VFIO_GROUP_DEL: +#ifdef CONFIG_SPAPR_TCE_IOMMU + case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: +#endif return 0; } @@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev) struct kvm_vfio_group *kvg, *tmp; list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group); +#endif kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node);